diff options
590 files changed, 16671 insertions, 7654 deletions
@@ -3203,7 +3203,7 @@ N: Eugene Surovegin E: ebs@ebshome.net W: http://kernel.ebshome.net/ P: 1024D/AE5467F1 FF22 39F1 6728 89F6 6E6C 2365 7602 F33D AE54 67F1 -D: Embedded PowerPC 4xx: I2C, PIC and random hacks/fixes +D: Embedded PowerPC 4xx: EMAC, I2C, PIC and random hacks/fixes S: Sunnyvale, California 94085 S: USA diff --git a/Documentation/Changes b/Documentation/Changes index 86b86399d61d..fe5ae0f55020 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -31,8 +31,6 @@ al español de este documento en varios formatos. Eine deutsche Version dieser Datei finden Sie unter <http://www.stefan-winter.de/Changes-2.4.0.txt>. -Last updated: October 29th, 2002 - Chris Ricker (kaboom@gatech.edu or chris.ricker@genetics.utah.edu). Current Minimal Requirements @@ -48,7 +46,7 @@ necessary on all systems; obviously, if you don't have any ISDN hardware, for example, you probably needn't concern yourself with isdn4k-utils. -o Gnu C 2.95.3 # gcc --version +o Gnu C 3.2 # gcc --version o Gnu make 3.79.1 # make --version o binutils 2.12 # ld -v o util-linux 2.10o # fdformat --version @@ -74,26 +72,7 @@ GCC --- The gcc version requirements may vary depending on the type of CPU in your -computer. The next paragraph applies to users of x86 CPUs, but not -necessarily to users of other CPUs. Users of other CPUs should obtain -information about their gcc version requirements from another source. - -The recommended compiler for the kernel is gcc 2.95.x (x >= 3), and it -should be used when you need absolute stability. You may use gcc 3.0.x -instead if you wish, although it may cause problems. Later versions of gcc -have not received much testing for Linux kernel compilation, and there are -almost certainly bugs (mainly, but not exclusively, in the kernel) that -will need to be fixed in order to use these compilers. In any case, using -pgcc instead of plain gcc is just asking for trouble. - -The Red Hat gcc 2.96 compiler subtree can also be used to build this tree. -You should ensure you use gcc-2.96-74 or later. gcc-2.96-54 will not build -the kernel correctly. - -In addition, please pay attention to compiler optimization. Anything -greater than -O2 may not be wise. Similarly, if you choose to use gcc-2.95.x -or derivatives, be sure not to use -fstrict-aliasing (which, depending on -your version of gcc 2.95.x, may necessitate using -fno-strict-aliasing). +computer. Make ---- @@ -322,9 +301,9 @@ Getting updated software Kernel compilation ****************** -gcc 2.95.3 ----------- -o <ftp://ftp.gnu.org/gnu/gcc/gcc-2.95.3.tar.gz> +gcc +--- +o <ftp://ftp.gnu.org/gnu/gcc/> Make ---- diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index eb7db3c19227..ce780ef648f1 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -344,7 +344,7 @@ Remember: if another thread can find your data structure, and you don't have a reference count on it, you almost certainly have a bug. - Chapter 11: Macros, Enums, Inline functions and RTL + Chapter 11: Macros, Enums and RTL Names of macros defining constants and labels in enums are capitalized. @@ -429,7 +429,35 @@ from void pointer to any other pointer type is guaranteed by the C programming language. - Chapter 14: References + Chapter 14: The inline disease + +There appears to be a common misperception that gcc has a magic "make me +faster" speedup option called "inline". While the use of inlines can be +appropriate (for example as a means of replacing macros, see Chapter 11), it +very often is not. Abundant use of the inline keyword leads to a much bigger +kernel, which in turn slows the system as a whole down, due to a bigger +icache footprint for the CPU and simply because there is less memory +available for the pagecache. Just think about it; a pagecache miss causes a +disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles +that can go into these 5 miliseconds. + +A reasonable rule of thumb is to not put inline at functions that have more +than 3 lines of code in them. An exception to this rule are the cases where +a parameter is known to be a compiletime constant, and as a result of this +constantness you *know* the compiler will be able to optimize most of your +function away at compile time. For a good example of this later case, see +the kmalloc() inline function. + +Often people argue that adding inline to functions that are static and used +only once is always a win since there is no space tradeoff. While this is +technically correct, gcc is capable of inlining these automatically without +help, and the maintenance issue of removing the inline when a second user +appears outweighs the potential value of the hint that tells gcc to do +something it would have done anyway. + + + + Chapter 15: References The C Programming Language, Second Edition by Brian W. Kernighan and Dennis M. Ritchie. @@ -444,10 +472,13 @@ ISBN 0-201-61586-X. URL: http://cm.bell-labs.com/cm/cs/tpop/ GNU manuals - where in compliance with K&R and this text - for cpp, gcc, -gcc internals and indent, all available from http://www.gnu.org +gcc internals and indent, all available from http://www.gnu.org/manual/ WG14 is the international standardization working group for the programming -language C, URL: http://std.dkuug.dk/JTC1/SC22/WG14/ +language C, URL: http://www.open-std.org/JTC1/SC22/WG14/ + +Kernel CodingStyle, by greg@kroah.com at OLS 2002: +http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ -- -Last updated on 16 February 2004 by a community effort on LKML. +Last updated on 30 December 2005 by a community effort on LKML. diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt index a23fee66064d..3f60db41b2f0 100644 --- a/Documentation/RCU/rcuref.txt +++ b/Documentation/RCU/rcuref.txt @@ -1,74 +1,67 @@ -Refcounter framework for elements of lists/arrays protected by -RCU. +Refcounter design for elements of lists/arrays protected by RCU. Refcounting on elements of lists which are protected by traditional reader/writer spinlocks or semaphores are straight forward as in: -1. 2. -add() search_and_reference() -{ { - alloc_object read_lock(&list_lock); - ... search_for_element - atomic_set(&el->rc, 1); atomic_inc(&el->rc); - write_lock(&list_lock); ... - add_element read_unlock(&list_lock); - ... ... - write_unlock(&list_lock); } +1. 2. +add() search_and_reference() +{ { + alloc_object read_lock(&list_lock); + ... search_for_element + atomic_set(&el->rc, 1); atomic_inc(&el->rc); + write_lock(&list_lock); ... + add_element read_unlock(&list_lock); + ... ... + write_unlock(&list_lock); } } 3. 4. release_referenced() delete() { { - ... write_lock(&list_lock); - atomic_dec(&el->rc, relfunc) ... - ... delete_element -} write_unlock(&list_lock); - ... - if (atomic_dec_and_test(&el->rc)) - kfree(el); - ... + ... write_lock(&list_lock); + atomic_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (atomic_dec_and_test(&el->rc)) + kfree(el); + ... } If this list/array is made lock free using rcu as in changing the write_lock in add() and delete() to spin_lock and changing read_lock -in search_and_reference to rcu_read_lock(), the rcuref_get in +in search_and_reference to rcu_read_lock(), the atomic_get in search_and_reference could potentially hold reference to an element which -has already been deleted from the list/array. rcuref_lf_get_rcu takes +has already been deleted from the list/array. atomic_inc_not_zero takes care of this scenario. search_and_reference should look as; 1. 2. add() search_and_reference() { { - alloc_object rcu_read_lock(); - ... search_for_element - atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) { - write_lock(&list_lock); rcu_read_unlock(); - return FAIL; - add_element } - ... ... - write_unlock(&list_lock); rcu_read_unlock(); + alloc_object rcu_read_lock(); + ... search_for_element + atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) { + write_lock(&list_lock); rcu_read_unlock(); + return FAIL; + add_element } + ... ... + write_unlock(&list_lock); rcu_read_unlock(); } } 3. 4. release_referenced() delete() { { - ... write_lock(&list_lock); - rcuref_dec(&el->rc, relfunc) ... - ... delete_element -} write_unlock(&list_lock); - ... - if (rcuref_dec_and_test(&el->rc)) - call_rcu(&el->head, el_free); - ... + ... write_lock(&list_lock); + atomic_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (atomic_dec_and_test(&el->rc)) + call_rcu(&el->head, el_free); + ... } Sometimes, reference to the element need to be obtained in the -update (write) stream. In such cases, rcuref_inc_lf might be an overkill -since the spinlock serialising list updates are held. rcuref_inc +update (write) stream. In such cases, atomic_inc_not_zero might be an +overkill since the spinlock serialising list updates are held. atomic_inc is to be used in such cases. -For arches which do not have cmpxchg rcuref_inc_lf -api uses a hashed spinlock implementation and the same hashed spinlock -is acquired in all rcuref_xxx primitives to preserve atomicity. -Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the -refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api -might lead to races. rcuref_inc_lf() must be used in lockfree -RCU critical sections only. + diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers index c3cca924e94b..dd311cff1cc3 100644 --- a/Documentation/SubmittingDrivers +++ b/Documentation/SubmittingDrivers @@ -27,18 +27,17 @@ Who To Submit Drivers To ------------------------ Linux 2.0: - No new drivers are accepted for this kernel tree + No new drivers are accepted for this kernel tree. Linux 2.2: + No new drivers are accepted for this kernel tree. + +Linux 2.4: If the code area has a general maintainer then please submit it to the maintainer listed in MAINTAINERS in the kernel file. If the maintainer does not respond or you cannot find the appropriate - maintainer then please contact the 2.2 kernel maintainer: - Marc-Christian Petersen <m.c.p@wolk-project.de>. - -Linux 2.4: - The same rules apply as 2.2. The final contact point for Linux 2.4 - submissions is Marcelo Tosatti <marcelo.tosatti@cyclades.com>. + maintainer then please contact Marcelo Tosatti + <marcelo.tosatti@cyclades.com>. Linux 2.6: The same rules apply as 2.4 except that you should follow linux-kernel @@ -53,6 +52,7 @@ Licensing: The code must be released to us under the of exclusive GPL licensing, and if you wish the driver to be useful to other communities such as BSD you may well wish to release under multiple licenses. + See accepted licenses at include/linux/module.h Copyright: The copyright owner must agree to use of GPL. It's best if the submitter and copyright owner @@ -143,5 +143,13 @@ KernelNewbies: http://kernelnewbies.org/ Linux USB project: - http://sourceforge.net/projects/linux-usb/ + http://linux-usb.sourceforge.net/ + +How to NOT write kernel driver by arjanv@redhat.com + http://people.redhat.com/arjanv/olspaper.pdf + +Kernel Janitor: + http://janitor.kernelnewbies.org/ +-- +Last updated on 17 Nov 2005. diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 1d47e6c09dc6..6198e5ebcf65 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -78,7 +78,9 @@ Randy Dunlap's patch scripts: http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz Andrew Morton's patch scripts: -http://www.zip.com.au/~akpm/linux/patches/patch-scripts-0.20 +http://www.zip.com.au/~akpm/linux/patches/ +Instead of these scripts, quilt is the recommended patch management +tool (see above). @@ -97,7 +99,7 @@ need to split up your patch. See #3, next. 3) Separate your changes. -Separate each logical change into its own patch. +Separate _logical changes_ into a single patch file. For example, if your changes include both bug fixes and performance enhancements for a single driver, separate those changes into two @@ -112,6 +114,10 @@ If one patch depends on another patch in order for a change to be complete, that is OK. Simply note "this patch depends on patch X" in your patch description. +If you cannot condense your patch set into a smaller set of patches, +then only post say 15 or so at a time and wait for review and integration. + + 4) Select e-mail destination. @@ -124,6 +130,10 @@ your patch to the primary Linux kernel developer's mailing list, linux-kernel@vger.kernel.org. Most kernel developers monitor this e-mail list, and can comment on your changes. + +Do not send more than 15 patches at once to the vger mailing lists!!! + + Linus Torvalds is the final arbiter of all changes accepted into the Linux kernel. His e-mail address is <torvalds@osdl.org>. He gets a lot of e-mail, so typically you should do your best to -avoid- sending @@ -149,6 +159,9 @@ USB, framebuffer devices, the VFS, the SCSI subsystem, etc. See the MAINTAINERS file for a mailing list that relates specifically to your change. +Majordomo lists of VGER.KERNEL.ORG at: + <http://vger.kernel.org/vger-lists.html> + If changes affect userland-kernel interfaces, please send the MAN-PAGES maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at least a notification of the change, @@ -373,27 +386,14 @@ a diffstat, to show what files have changed, and the number of inserted and deleted lines per file. A diffstat is especially useful on bigger patches. Other comments relevant only to the moment or the maintainer, not suitable for the permanent changelog, should also go here. +Use diffstat options "-p 1 -w 70" so that filenames are listed from the +top of the kernel source tree and don't use too much horizontal space +(easily fit in 80 columns, maybe with some indentation). See more details on the proper patch format in the following references. -13) More references for submitting patches - -Andrew Morton, "The perfect patch" (tpp). - <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt> - -Jeff Garzik, "Linux kernel patch submission format." - <http://linux.yyz.us/patch-format.html> - -Greg KH, "How to piss off a kernel subsystem maintainer" - <http://www.kroah.com/log/2005/03/31/> - -Kernel Documentation/CodingStyle - <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle> - -Linus Torvald's mail on the canonical patch format: - <http://lkml.org/lkml/2005/4/7/183> ----------------------------------- @@ -466,3 +466,30 @@ and 'extern __inline__'. Don't try to anticipate nebulous future cases which may or may not be useful: "Make it as simple as you can, and no simpler." + + +---------------------- +SECTION 3 - REFERENCES +---------------------- + +Andrew Morton, "The perfect patch" (tpp). + <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt> + +Jeff Garzik, "Linux kernel patch submission format." + <http://linux.yyz.us/patch-format.html> + +Greg Kroah, "How to piss off a kernel subsystem maintainer". + <http://www.kroah.com/log/2005/03/31/> + <http://www.kroah.com/log/2005/07/08/> + <http://www.kroah.com/log/2005/10/19/> + +NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!. + <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2> + +Kernel Documentation/CodingStyle + <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle> + +Linus Torvald's mail on the canonical patch format: + <http://lkml.org/lkml/2005/4/7/183> +-- +Last updated on 17 Nov 2005. diff --git a/Documentation/applying-patches.txt b/Documentation/applying-patches.txt index 681e426e2482..05a08c2c1889 100644 --- a/Documentation/applying-patches.txt +++ b/Documentation/applying-patches.txt @@ -2,7 +2,8 @@ Applying Patches To The Linux Kernel ------------------------------------ - (Written by Jesper Juhl, August 2005) + Original by: Jesper Juhl, August 2005 + Last update: 2005-12-02 @@ -118,7 +119,7 @@ wrong. When patch encounters a change that it can't fix up with fuzz it rejects it outright and leaves a file with a .rej extension (a reject file). You can -read this file to see exactely what change couldn't be applied, so you can +read this file to see exactly what change couldn't be applied, so you can go fix it up by hand if you wish. If you don't have any third party patches applied to your kernel source, but @@ -127,7 +128,7 @@ and have made no modifications yourself to the source files, then you should never see a fuzz or reject message from patch. If you do see such messages anyway, then there's a high risk that either your local source tree or the patch file is corrupted in some way. In that case you should probably try -redownloading the patch and if things are still not OK then you'd be advised +re-downloading the patch and if things are still not OK then you'd be advised to start with a fresh tree downloaded in full from kernel.org. Let's look a bit more at some of the messages patch can produce. @@ -180,9 +181,11 @@ wish to apply. Are there any alternatives to `patch'? --- - Yes there are alternatives. You can use the `interdiff' program -(http://cyberelk.net/tim/patchutils/) to generate a patch representing the -differences between two patches and then apply the result. + Yes there are alternatives. + + You can use the `interdiff' program (http://cyberelk.net/tim/patchutils/) to +generate a patch representing the differences between two patches and then +apply the result. This will let you move from something like 2.6.12.2 to 2.6.12.3 in a single step. The -z flag to interdiff will even let you feed it patches in gzip or bzip2 compressed form directly without the use of zcat or bzcat or manual @@ -197,7 +200,7 @@ do the additional steps since interdiff can get things wrong in some cases. Another alternative is `ketchup', which is a python script for automatic downloading and applying of patches (http://www.selenic.com/ketchup/). -Other nice tools are diffstat which shows a summary of changes made by a + Other nice tools are diffstat which shows a summary of changes made by a patch, lsdiff which displays a short listing of affected files in a patch file, along with (optionally) the line numbers of the start of each patch and grepdiff which displays a list of the files modified by a patch where @@ -258,7 +261,7 @@ $ patch -p1 -R < ../patch-2.6.11.1 # revert the 2.6.11.1 patch # source dir is now 2.6.11 $ patch -p1 < ../patch-2.6.12 # apply new 2.6.12 patch $ cd .. -$ mv linux-2.6.11.1 inux-2.6.12 # rename source dir +$ mv linux-2.6.11.1 linux-2.6.12 # rename source dir The 2.6.x.y kernels @@ -433,7 +436,11 @@ $ cd .. $ mv linux-2.6.12-mm1 linux-2.6.13-rc3-mm3 # rename the source dir -This concludes this list of explanations of the various kernel trees and I -hope you are now crystal clear on how to apply the various patches and help -testing the kernel. +This concludes this list of explanations of the various kernel trees. +I hope you are now clear on how to apply the various patches and help testing +the kernel. + +Thank you's to Randy Dunlap, Rolf Eike Beer, Linus Torvalds, Bodo Eggert, +Johannes Stezenbach, Grant Coady, Pavel Machek and others that I may have +forgotten for their reviews and contributions to this document. diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt new file mode 100644 index 000000000000..0dbc946de2ea --- /dev/null +++ b/Documentation/block/stat.txt @@ -0,0 +1,82 @@ +Block layer statistics in /sys/block/<dev>/stat +=============================================== + +This file documents the contents of the /sys/block/<dev>/stat file. + +The stat file provides several statistics about the state of block +device <dev>. + +Q. Why are there multiple statistics in a single file? Doesn't sysfs + normally contain a single value per file? +A. By having a single file, the kernel can guarantee that the statistics + represent a consistent snapshot of the state of the device. If the + statistics were exported as multiple files containing one statistic + each, it would be impossible to guarantee that a set of readings + represent a single point in time. + +The stat file consists of a single line of text containing 11 decimal +values separated by whitespace. The fields are summarized in the +following table, and described in more detail below. + +Name units description +---- ----- ----------- +read I/Os requests number of read I/Os processed +read merges requests number of read I/Os merged with in-queue I/O +read sectors sectors number of sectors read +read ticks milliseconds total wait time for read requests +write I/Os requests number of write I/Os processed +write merges requests number of write I/Os merged with in-queue I/O +write sectors sectors number of sectors written +write ticks milliseconds total wait time for write requests +in_flight requests number of I/Os currently in flight +io_ticks milliseconds total time this block device has been active +time_in_queue milliseconds total wait time for all requests + +read I/Os, write I/Os +===================== + +These values increment when an I/O request completes. + +read merges, write merges +========================= + +These values increment when an I/O request is merged with an +already-queued I/O request. + +read sectors, write sectors +=========================== + +These values count the number of sectors read from or written to this +block device. The "sectors" in question are the standard UNIX 512-byte +sectors, not any device- or filesystem-specific block size. The +counters are incremented when the I/O completes. + +read ticks, write ticks +======================= + +These values count the number of milliseconds that I/O requests have +waited on this block device. If there are multiple I/O requests waiting, +these values will increase at a rate greater than 1000/second; for +example, if 60 read requests wait for an average of 30 ms, the read_ticks +field will increase by 60*30 = 1800. + +in_flight +========= + +This value counts the number of I/O requests that have been issued to +the device driver but have not yet completed. It does not include I/O +requests that are in the queue but not yet issued to the device driver. + +io_ticks +======== + +This value counts the number of milliseconds during which the device has +had I/O requests queued. + +time_in_queue +============= + +This value counts the number of milliseconds that I/O requests have waited +on this block device. If there are multiple I/O requests waiting, this +value will increase as the product of the number of milliseconds times the +number of requests waiting (see "read ticks" above for an example). diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt new file mode 100644 index 000000000000..08c5d04f3086 --- /dev/null +++ b/Documentation/cpu-hotplug.txt @@ -0,0 +1,357 @@ + CPU hotplug Support in Linux(tm) Kernel + + Maintainers: + CPU Hotplug Core: + Rusty Russell <rusty@rustycorp.com.au> + Srivatsa Vaddagiri <vatsa@in.ibm.com> + i386: + Zwane Mwaikambo <zwane@arm.linux.org.uk> + ppc64: + Nathan Lynch <nathanl@austin.ibm.com> + Joel Schopp <jschopp@austin.ibm.com> + ia64/x86_64: + Ashok Raj <ashok.raj@intel.com> + +Authors: Ashok Raj <ashok.raj@intel.com> +Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>, + Joel Schopp <jschopp@austin.ibm.com> + +Introduction + +Modern advances in system architectures have introduced advanced error +reporting and correction capabilities in processors. CPU architectures permit +partitioning support, where compute resources of a single CPU could be made +available to virtual machine environments. There are couple OEMS that +support NUMA hardware which are hot pluggable as well, where physical +node insertion and removal require support for CPU hotplug. + +Such advances require CPUs available to a kernel to be removed either for +provisioning reasons, or for RAS purposes to keep an offending CPU off +system execution path. Hence the need for CPU hotplug support in the +Linux kernel. + +A more novel use of CPU-hotplug support is its use today in suspend +resume support for SMP. Dual-core and HT support makes even +a laptop run SMP kernels which didn't support these methods. SMP support +for suspend/resume is a work in progress. + +General Stuff about CPU Hotplug +-------------------------------- + +Command Line Switches +--------------------- +maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using + maxcpus=2 will only boot 2. You can choose to bring the + other cpus later online, read FAQ's for more info. + +additional_cpus=n [x86_64 only] use this to limit hotpluggable cpus. + This option sets + cpu_possible_map = cpu_present_map + additional_cpus + +CPU maps and such +----------------- +[More on cpumaps and primitive to manipulate, please check +include/linux/cpumask.h that has more descriptive text.] + +cpu_possible_map: Bitmap of possible CPUs that can ever be available in the +system. This is used to allocate some boot time memory for per_cpu variables +that aren't designed to grow/shrink as CPUs are made available or removed. +Once set during boot time discovery phase, the map is static, i.e no bits +are added or removed anytime. Trimming it accurately for your system needs +upfront can save some boot time memory. See below for how we use heuristics +in x86_64 case to keep this under check. + +cpu_online_map: Bitmap of all CPUs currently online. Its set in __cpu_up() +after a cpu is available for kernel scheduling and ready to receive +interrupts from devices. Its cleared when a cpu is brought down using +__cpu_disable(), before which all OS services including interrupts are +migrated to another target CPU. + +cpu_present_map: Bitmap of CPUs currently present in the system. Not all +of them may be online. When physical hotplug is processed by the relevant +subsystem (e.g ACPI) can change and new bit either be added or removed +from the map depending on the event is hot-add/hot-remove. There are currently +no locking rules as of now. Typical usage is to init topology during boot, +at which time hotplug is disabled. + +You really dont need to manipulate any of the system cpu maps. They should +be read-only for most use. When setting up per-cpu resources almost always use +cpu_possible_map/for_each_cpu() to iterate. + +Never use anything other than cpumask_t to represent bitmap of CPUs. + +#include <linux/cpumask.h> + +for_each_cpu - Iterate over cpu_possible_map +for_each_online_cpu - Iterate over cpu_online_map +for_each_present_cpu - Iterate over cpu_present_map +for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask. + +#include <linux/cpu.h> +lock_cpu_hotplug() and unlock_cpu_hotplug(): + +The above calls are used to inhibit cpu hotplug operations. While holding the +cpucontrol mutex, cpu_online_map will not change. If you merely need to avoid +cpus going away, you could also use preempt_disable() and preempt_enable() +for those sections. Just remember the critical section cannot call any +function that can sleep or schedule this process away. The preempt_disable() +will work as long as stop_machine_run() is used to take a cpu down. + +CPU Hotplug - Frequently Asked Questions. + +Q: How to i enable my kernel to support CPU hotplug? +A: When doing make defconfig, Enable CPU hotplug support + + "Processor type and Features" -> Support for Hotpluggable CPUs + +Make sure that you have CONFIG_HOTPLUG, and CONFIG_SMP turned on as well. + +You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support +as well. + +Q: What architectures support CPU hotplug? +A: As of 2.6.14, the following architectures support CPU hotplug. + +i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64 + +Q: How to test if hotplug is supported on the newly built kernel? +A: You should now notice an entry in sysfs. + +Check if sysfs is mounted, using the "mount" command. You should notice +an entry as shown below in the output. + +.... +none on /sys type sysfs (rw) +.... + +if this is not mounted, do the following. + +#mkdir /sysfs +#mount -t sysfs sys /sys + +now you should see entries for all present cpu, the following is an example +in a 8-way system. + +#pwd +#/sys/devices/system/cpu +#ls -l +total 0 +drwxr-xr-x 10 root root 0 Sep 19 07:44 . +drwxr-xr-x 13 root root 0 Sep 19 07:45 .. +drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu0 +drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu1 +drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu2 +drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu3 +drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu4 +drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu5 +drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu6 +drwxr-xr-x 3 root root 0 Sep 19 07:48 cpu7 + +Under each directory you would find an "online" file which is the control +file to logically online/offline a processor. + +Q: Does hot-add/hot-remove refer to physical add/remove of cpus? +A: The usage of hot-add/remove may not be very consistently used in the code. +CONFIG_CPU_HOTPLUG enables logical online/offline capability in the kernel. +To support physical addition/removal, one would need some BIOS hooks and +the platform should have something like an attention button in PCI hotplug. +CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs. + +Q: How do i logically offline a CPU? +A: Do the following. + +#echo 0 > /sys/devices/system/cpu/cpuX/online + +once the logical offline is successful, check + +#cat /proc/interrupts + +you should now not see the CPU that you removed. Also online file will report +the state as 0 when a cpu if offline and 1 when its online. + +#To display the current cpu state. +#cat /sys/devices/system/cpu/cpuX/online + +Q: Why cant i remove CPU0 on some systems? +A: Some architectures may have some special dependency on a certain CPU. + +For e.g in IA64 platforms we have ability to sent platform interrupts to the +OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI +specifications, we didn't have a way to change the target CPU. Hence if the +current ACPI version doesn't support such re-direction, we disable that CPU +by making it not-removable. + +In such cases you will also notice that the online file is missing under cpu0. + +Q: How do i find out if a particular CPU is not removable? +A: Depending on the implementation, some architectures may show this by the +absence of the "online" file. This is done if it can be determined ahead of +time that this CPU cannot be removed. + +In some situations, this can be a run time check, i.e if you try to remove the +last CPU, this will not be permitted. You can find such failures by +investigating the return value of the "echo" command. + +Q: What happens when a CPU is being logically offlined? +A: The following happen, listed in no particular order :-) + +- A notification is sent to in-kernel registered modules by sending an event + CPU_DOWN_PREPARE +- All process is migrated away from this outgoing CPU to a new CPU +- All interrupts targeted to this CPU is migrated to a new CPU +- timers/bottom half/task lets are also migrated to a new CPU +- Once all services are migrated, kernel calls an arch specific routine + __cpu_disable() to perform arch specific cleanup. +- Once this is successful, an event for successful cleanup is sent by an event + CPU_DEAD. + + "It is expected that each service cleans up when the CPU_DOWN_PREPARE + notifier is called, when CPU_DEAD is called its expected there is nothing + running on behalf of this CPU that was offlined" + +Q: If i have some kernel code that needs to be aware of CPU arrival and + departure, how to i arrange for proper notification? +A: This is what you would need in your kernel code to receive notifications. + + #include <linux/cpu.h> + static int __cpuinit foobar_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) + { + unsigned int cpu = (unsigned long)hcpu; + + switch (action) { + case CPU_ONLINE: + foobar_online_action(cpu); + break; + case CPU_DEAD: + foobar_dead_action(cpu); + break; + } + return NOTIFY_OK; + } + + static struct notifier_block foobar_cpu_notifer = + { + .notifier_call = foobar_cpu_callback, + }; + + +In your init function, + + register_cpu_notifier(&foobar_cpu_notifier); + +You can fail PREPARE notifiers if something doesn't work to prepare resources. +This will stop the activity and send a following CANCELED event back. + +CPU_DEAD should not be failed, its just a goodness indication, but bad +things will happen if a notifier in path sent a BAD notify code. + +Q: I don't see my action being called for all CPUs already up and running? +A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined. + If you need to perform some action for each cpu already in the system, then + + for_each_online_cpu(i) { + foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i); + foobar_cpu_callback(&foobar-cpu_notifier, CPU_ONLINE, i); + } + +Q: If i would like to develop cpu hotplug support for a new architecture, + what do i need at a minimum? +A: The following are what is required for CPU hotplug infrastructure to work + correctly. + + - Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU + - __cpu_up() - Arch interface to bring up a CPU + - __cpu_disable() - Arch interface to shutdown a CPU, no more interrupts + can be handled by the kernel after the routine + returns. Including local APIC timers etc are + shutdown. + - __cpu_die() - This actually supposed to ensure death of the CPU. + Actually look at some example code in other arch + that implement CPU hotplug. The processor is taken + down from the idle() loop for that specific + architecture. __cpu_die() typically waits for some + per_cpu state to be set, to ensure the processor + dead routine is called to be sure positively. + +Q: I need to ensure that a particular cpu is not removed when there is some + work specific to this cpu is in progress. +A: First switch the current thread context to preferred cpu + + int my_func_on_cpu(int cpu) + { + cpumask_t saved_mask, new_mask = CPU_MASK_NONE; + int curr_cpu, err = 0; + + saved_mask = current->cpus_allowed; + cpu_set(cpu, new_mask); + err = set_cpus_allowed(current, new_mask); + + if (err) + return err; + + /* + * If we got scheduled out just after the return from + * set_cpus_allowed() before running the work, this ensures + * we stay locked. + */ + curr_cpu = get_cpu(); + + if (curr_cpu != cpu) { + err = -EAGAIN; + goto ret; + } else { + /* + * Do work : But cant sleep, since get_cpu() disables preempt + */ + } + ret: + put_cpu(); + set_cpus_allowed(current, saved_mask); + return err; + } + + +Q: How do we determine how many CPUs are available for hotplug. +A: There is no clear spec defined way from ACPI that can give us that + information today. Based on some input from Natalie of Unisys, + that the ACPI MADT (Multiple APIC Description Tables) marks those possible + CPUs in a system with disabled status. + + Andi implemented some simple heuristics that count the number of disabled + CPUs in MADT as hotpluggable CPUS. In the case there are no disabled CPUS + we assume 1/2 the number of CPUs currently present can be hotplugged. + + Caveat: Today's ACPI MADT can only provide 256 entries since the apicid field + in MADT is only 8 bits. + +User Space Notification + +Hotplug support for devices is common in Linux today. Its being used today to +support automatic configuration of network, usb and pci devices. A hotplug +event can be used to invoke an agent script to perform the configuration task. + +You can add /etc/hotplug/cpu.agent to handle hotplug notification user space +scripts. + + #!/bin/bash + # $Id: cpu.agent + # Kernel hotplug params include: + #ACTION=%s [online or offline] + #DEVPATH=%s + # + cd /etc/hotplug + . ./hotplug.functions + + case $ACTION in + online) + echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt + ;; + offline) + echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt + ;; + *) + debug_mesg CPU $ACTION event not supported + exit 1 + ;; + esac diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index a09a8eb80665..9e49b1c35729 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -14,7 +14,10 @@ CONTENTS: 1.1 What are cpusets ? 1.2 Why are cpusets needed ? 1.3 How are cpusets implemented ? - 1.4 How do I use cpusets ? + 1.4 What are exclusive cpusets ? + 1.5 What does notify_on_release do ? + 1.6 What is memory_pressure ? + 1.7 How do I use cpusets ? 2. Usage Examples and Syntax 2.1 Basic Usage 2.2 Adding/removing cpus @@ -49,29 +52,6 @@ its cpus_allowed vector, and the kernel page allocator will not allocate a page on a node that is not allowed in the requesting tasks mems_allowed vector. -If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct -ancestor or descendent, may share any of the same CPUs or Memory Nodes. -A cpuset that is cpu exclusive has a sched domain associated with it. -The sched domain consists of all cpus in the current cpuset that are not -part of any exclusive child cpusets. -This ensures that the scheduler load balacing code only balances -against the cpus that are in the sched domain as defined above and not -all of the cpus in the system. This removes any overhead due to -load balancing code trying to pull tasks outside of the cpu exclusive -cpuset only to be prevented by the tasks' cpus_allowed mask. - -A cpuset that is mem_exclusive restricts kernel allocations for -page, buffer and other data commonly shared by the kernel across -multiple users. All cpusets, whether mem_exclusive or not, restrict -allocations of memory for user space. This enables configuring a -system so that several independent jobs can share common kernel -data, such as file system pages, while isolating each jobs user -allocation in its own cpuset. To do this, construct a large -mem_exclusive cpuset to hold all the jobs, and construct child, -non-mem_exclusive cpusets for each individual job. Only a small -amount of typical kernel memory, such as requests from interrupt -handlers, is allowed to be taken outside even a mem_exclusive cpuset. - User level code may create and destroy cpusets by name in the cpuset virtual file system, manage the attributes and permissions of these cpusets and which CPUs and Memory Nodes are assigned to each cpuset, @@ -192,9 +172,15 @@ containing the following files describing that cpuset: - cpus: list of CPUs in that cpuset - mems: list of Memory Nodes in that cpuset + - memory_migrate flag: if set, move pages to cpusets nodes - cpu_exclusive flag: is cpu placement exclusive? - mem_exclusive flag: is memory placement exclusive? - tasks: list of tasks (by pid) attached to that cpuset + - notify_on_release flag: run /sbin/cpuset_release_agent on exit? + - memory_pressure: measure of how much paging pressure in cpuset + +In addition, the root cpuset only has the following file: + - memory_pressure_enabled flag: compute memory_pressure? New cpusets are created using the mkdir system call or shell command. The properties of a cpuset, such as its flags, allowed @@ -228,7 +214,108 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs) to represent the cpuset hierarchy provides for a familiar permission and name space for cpusets, with a minimum of additional kernel code. -1.4 How do I use cpusets ? + +1.4 What are exclusive cpusets ? +-------------------------------- + +If a cpuset is cpu or mem exclusive, no other cpuset, other than +a direct ancestor or descendent, may share any of the same CPUs or +Memory Nodes. + +A cpuset that is cpu_exclusive has a scheduler (sched) domain +associated with it. The sched domain consists of all CPUs in the +current cpuset that are not part of any exclusive child cpusets. +This ensures that the scheduler load balancing code only balances +against the CPUs that are in the sched domain as defined above and +not all of the CPUs in the system. This removes any overhead due to +load balancing code trying to pull tasks outside of the cpu_exclusive +cpuset only to be prevented by the tasks' cpus_allowed mask. + +A cpuset that is mem_exclusive restricts kernel allocations for +page, buffer and other data commonly shared by the kernel across +multiple users. All cpusets, whether mem_exclusive or not, restrict +allocations of memory for user space. This enables configuring a +system so that several independent jobs can share common kernel data, +such as file system pages, while isolating each jobs user allocation in +its own cpuset. To do this, construct a large mem_exclusive cpuset to +hold all the jobs, and construct child, non-mem_exclusive cpusets for +each individual job. Only a small amount of typical kernel memory, +such as requests from interrupt handlers, is allowed to be taken +outside even a mem_exclusive cpuset. + + +1.5 What does notify_on_release do ? +------------------------------------ + +If the notify_on_release flag is enabled (1) in a cpuset, then whenever +the last task in the cpuset leaves (exits or attaches to some other +cpuset) and the last child cpuset of that cpuset is removed, then +the kernel runs the command /sbin/cpuset_release_agent, supplying the +pathname (relative to the mount point of the cpuset file system) of the +abandoned cpuset. This enables automatic removal of abandoned cpusets. +The default value of notify_on_release in the root cpuset at system +boot is disabled (0). The default value of other cpusets at creation +is the current value of their parents notify_on_release setting. + + +1.6 What is memory_pressure ? +----------------------------- +The memory_pressure of a cpuset provides a simple per-cpuset metric +of the rate that the tasks in a cpuset are attempting to free up in +use memory on the nodes of the cpuset to satisfy additional memory +requests. + +This enables batch managers monitoring jobs running in dedicated +cpusets to efficiently detect what level of memory pressure that job +is causing. + +This is useful both on tightly managed systems running a wide mix of +submitted jobs, which may choose to terminate or re-prioritize jobs that +are trying to use more memory than allowed on the nodes assigned them, +and with tightly coupled, long running, massively parallel scientific +computing jobs that will dramatically fail to meet required performance +goals if they start to use more memory than allowed to them. + +This mechanism provides a very economical way for the batch manager +to monitor a cpuset for signs of memory pressure. It's up to the +batch manager or other user code to decide what to do about it and +take action. + +==> Unless this feature is enabled by writing "1" to the special file + /dev/cpuset/memory_pressure_enabled, the hook in the rebalance + code of __alloc_pages() for this metric reduces to simply noticing + that the cpuset_memory_pressure_enabled flag is zero. So only + systems that enable this feature will compute the metric. + +Why a per-cpuset, running average: + + Because this meter is per-cpuset, rather than per-task or mm, + the system load imposed by a batch scheduler monitoring this + metric is sharply reduced on large systems, because a scan of + the tasklist can be avoided on each set of queries. + + Because this meter is a running average, instead of an accumulating + counter, a batch scheduler can detect memory pressure with a + single read, instead of having to read and accumulate results + for a period of time. + + Because this meter is per-cpuset rather than per-task or mm, + the batch scheduler can obtain the key information, memory + pressure in a cpuset, with a single read, rather than having to + query and accumulate results over all the (dynamically changing) + set of tasks in the cpuset. + +A per-cpuset simple digital filter (requires a spinlock and 3 words +of data per-cpuset) is kept, and updated by any task attached to that +cpuset, if it enters the synchronous (direct) page reclaim code. + +A per-cpuset file provides an integer number representing the recent +(half-life of 10 seconds) rate of direct page reclaims caused by +the tasks in the cpuset, in units of reclaims attempted per second, +times 1000. + + +1.7 How do I use cpusets ? -------------------------- In order to minimize the impact of cpusets on critical kernel @@ -277,6 +364,30 @@ rewritten to the 'tasks' file of its cpuset. This is done to avoid impacting the scheduler code in the kernel with a check for changes in a tasks processor placement. +Normally, once a page is allocated (given a physical page +of main memory) then that page stays on whatever node it +was allocated, so long as it remains allocated, even if the +cpusets memory placement policy 'mems' subsequently changes. +If the cpuset flag file 'memory_migrate' is set true, then when +tasks are attached to that cpuset, any pages that task had +allocated to it on nodes in its previous cpuset are migrated +to the tasks new cpuset. Depending on the implementation, +this migration may either be done by swapping the page out, +so that the next time the page is referenced, it will be paged +into the tasks new cpuset, usually on the node where it was +referenced, or this migration may be done by directly copying +the pages from the tasks previous cpuset to the new cpuset, +where possible to the same node, relative to the new cpuset, +as the node that held the page, relative to the old cpuset. +Also if 'memory_migrate' is set true, then if that cpusets +'mems' file is modified, pages allocated to tasks in that +cpuset, that were on nodes in the previous setting of 'mems', +will be moved to nodes in the new setting of 'mems.' Again, +depending on the implementation, this might be done by swapping, +or by direct copying. In either case, pages that were not in +the tasks prior cpuset, or in the cpusets prior 'mems' setting, +will not be moved. + There is an exception to the above. If hotplug functionality is used to remove all the CPUs that are currently assigned to a cpuset, then the kernel will automatically update the cpus_allowed of all diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 9840d5b8d5b9..22e4040564d5 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -22,6 +22,11 @@ journal=inum When a journal already exists, this option is the inode which will represent the ext3 file system's journal file. +journal_dev=devnum When the external journal device's major/minor numbers + have changed, this option allows to specify the new + journal location. The journal device is identified + through its new major/minor numbers encoded in devnum. + noload Don't load the journal on mounting. data=journal All data are committed into the journal prior diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index d4773565ea2f..a4dcf42c2fd9 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent unnecessary page faults in thrashing situation. The unit of the value is second. The value would be useful to tune thrashing behavior. +drop_caches +----------- + +Writing to this will cause the kernel to drop clean caches, dentries and +inodes from memory, causing that memory to become free. + +To free pagecache: + echo 1 > /proc/sys/vm/drop_caches +To free dentries and inodes: + echo 2 > /proc/sys/vm/drop_caches +To free pagecache, dentries and inodes: + echo 3 > /proc/sys/vm/drop_caches + +As this is a non-destructive operation and dirty objects are not freeable, the +user should run `sync' first. + + 2.5 /proc/sys/dev - Device specific parameters ---------------------------------------------- diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt index b3404a032596..60ab61e54e8a 100644 --- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt +++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt @@ -143,12 +143,26 @@ as the following example: dir /mnt 755 0 0 file /init initramfs/init.sh 755 0 0 +Run "usr/gen_init_cpio" (after the kernel build) to get a usage message +documenting the above file format. + One advantage of the text file is that root access is not required to set permissions or create device nodes in the new archive. (Note that those two example "file" entries expect to find files named "init.sh" and "busybox" in a directory called "initramfs", under the linux-2.6.* directory. See Documentation/early-userspace/README for more details.) +The kernel does not depend on external cpio tools, gen_init_cpio is created +from usr/gen_init_cpio.c which is entirely self-contained, and the kernel's +boot-time extractor is also (obviously) self-contained. However, if you _do_ +happen to have cpio installed, the following command line can extract the +generated cpio image back into its component files: + + cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames + +Contents of initramfs: +---------------------- + If you don't already understand what shared libraries, devices, and paths you need to get a minimal root filesystem up and running, here are some references: @@ -161,13 +175,69 @@ designed to be a tiny C library to statically link early userspace code against, along with some related utilities. It is BSD licensed. I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net) -myself. These are LGPL and GPL, respectively. +myself. These are LGPL and GPL, respectively. (A self-contained initramfs +package is planned for the busybox 1.2 release.) In theory you could use glibc, but that's not well suited for small embedded uses like this. (A "hello world" program statically linked against glibc is over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do name lookups, even when otherwise statically linked.) +Why cpio rather than tar? +------------------------- + +This decision was made back in December, 2001. The discussion started here: + + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1538.html + +And spawned a second thread (specifically on tar vs cpio), starting here: + + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1587.html + +The quick and dirty summary version (which is no substitute for reading +the above threads) is: + +1) cpio is a standard. It's decades old (from the AT&T days), and already + widely used on Linux (inside RPM, Red Hat's device driver disks). Here's + a Linux Journal article about it from 1996: + + http://www.linuxjournal.com/article/1213 + + It's not as popular as tar because the traditional cpio command line tools + require _truly_hideous_ command line arguments. But that says nothing + either way about the archive format, and there are alternative tools, + such as: + + http://freshmeat.net/projects/afio/ + +2) The cpio archive format chosen by the kernel is simpler and cleaner (and + thus easier to create and parse) than any of the (literally dozens of) + various tar archive formats. The complete initramfs archive format is + explained in buffer-format.txt, created in usr/gen_init_cpio.c, and + extracted in init/initramfs.c. All three together come to less than 26k + total of human-readable text. + +3) The GNU project standardizing on tar is approximately as relevant as + Windows standardizing on zip. Linux is not part of either, and is free + to make its own technical decisions. + +4) Since this is a kernel internal format, it could easily have been + something brand new. The kernel provides its own tools to create and + extract this format anyway. Using an existing standard was preferable, + but not essential. + +5) Al Viro made the decision (quote: "tar is ugly as hell and not going to be + supported on the kernel side"): + + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1540.html + + explained his reasoning: + + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html + + and, most importantly, designed and implemented the initramfs code. + Future directions: ------------------ diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt index d803abed29f0..5832377b7340 100644 --- a/Documentation/filesystems/relayfs.txt +++ b/Documentation/filesystems/relayfs.txt @@ -44,30 +44,41 @@ relayfs can operate in a mode where it will overwrite data not yet collected by userspace, and not wait for it to consume it. relayfs itself does not provide for communication of such data between -userspace and kernel, allowing the kernel side to remain simple and not -impose a single interface on userspace. It does provide a separate -helper though, described below. +userspace and kernel, allowing the kernel side to remain simple and +not impose a single interface on userspace. It does provide a set of +examples and a separate helper though, described below. + +klog and relay-apps example code +================================ + +relayfs itself is ready to use, but to make things easier, a couple +simple utility functions and a set of examples are provided. + +The relay-apps example tarball, available on the relayfs sourceforge +site, contains a set of self-contained examples, each consisting of a +pair of .c files containing boilerplate code for each of the user and +kernel sides of a relayfs application; combined these two sets of +boilerplate code provide glue to easily stream data to disk, without +having to bother with mundane housekeeping chores. + +The 'klog debugging functions' patch (klog.patch in the relay-apps +tarball) provides a couple of high-level logging functions to the +kernel which allow writing formatted text or raw data to a channel, +regardless of whether a channel to write into exists or not, or +whether relayfs is compiled into the kernel or is configured as a +module. These functions allow you to put unconditional 'trace' +statements anywhere in the kernel or kernel modules; only when there +is a 'klog handler' registered will data actually be logged (see the +klog and kleak examples for details). + +It is of course possible to use relayfs from scratch i.e. without +using any of the relay-apps example code or klog, but you'll have to +implement communication between userspace and kernel, allowing both to +convey the state of buffers (full, empty, amount of padding). + +klog and the relay-apps examples can be found in the relay-apps +tarball on http://relayfs.sourceforge.net -klog, relay-app & librelay -========================== - -relayfs itself is ready to use, but to make things easier, two -additional systems are provided. klog is a simple wrapper to make -writing formatted text or raw data to a channel simpler, regardless of -whether a channel to write into exists or not, or whether relayfs is -compiled into the kernel or is configured as a module. relay-app is -the kernel counterpart of userspace librelay.c, combined these two -files provide glue to easily stream data to disk, without having to -bother with housekeeping. klog and relay-app can be used together, -with klog providing high-level logging functions to the kernel and -relay-app taking care of kernel-user control and disk-logging chores. - -It is possible to use relayfs without relay-app & librelay, but you'll -have to implement communication between userspace and kernel, allowing -both to convey the state of buffers (full, empty, amount of padding). - -klog, relay-app and librelay can be found in the relay-apps tarball on -http://relayfs.sourceforge.net The relayfs user space API ========================== @@ -125,6 +136,8 @@ Here's a summary of the API relayfs provides to in-kernel clients: relay_reset(chan) relayfs_create_dir(name, parent) relayfs_remove_dir(dentry) + relayfs_create_file(name, parent, mode, fops, data) + relayfs_remove_file(dentry) channel management typically called on instigation of userspace: @@ -141,6 +154,8 @@ Here's a summary of the API relayfs provides to in-kernel clients: subbuf_start(buf, subbuf, prev_subbuf, prev_padding) buf_mapped(buf, filp) buf_unmapped(buf, filp) + create_buf_file(filename, parent, mode, buf, is_global) + remove_buf_file(dentry) helper functions: @@ -320,6 +335,71 @@ forces a sub-buffer switch on all the channel buffers, and can be used to finalize and process the last sub-buffers before the channel is closed. +Creating non-relay files +------------------------ + +relay_open() automatically creates files in the relayfs filesystem to +represent the per-cpu kernel buffers; it's often useful for +applications to be able to create their own files alongside the relay +files in the relayfs filesystem as well e.g. 'control' files much like +those created in /proc or debugfs for similar purposes, used to +communicate control information between the kernel and user sides of a +relayfs application. For this purpose the relayfs_create_file() and +relayfs_remove_file() API functions exist. For relayfs_create_file(), +the caller passes in a set of user-defined file operations to be used +for the file and an optional void * to a user-specified data item, +which will be accessible via inode->u.generic_ip (see the relay-apps +tarball for examples). The file_operations are a required parameter +to relayfs_create_file() and thus the semantics of these files are +completely defined by the caller. + +See the relay-apps tarball at http://relayfs.sourceforge.net for +examples of how these non-relay files are meant to be used. + +Creating relay files in other filesystems +----------------------------------------- + +By default of course, relay_open() creates relay files in the relayfs +filesystem. Because relay_file_operations is exported, however, it's +also possible to create and use relay files in other pseudo-filesytems +such as debugfs. + +For this purpose, two callback functions are provided, +create_buf_file() and remove_buf_file(). create_buf_file() is called +once for each per-cpu buffer from relay_open() to allow the client to +create a file to be used to represent the corresponding buffer; if +this callback is not defined, the default implementation will create +and return a file in the relayfs filesystem to represent the buffer. +The callback should return the dentry of the file created to represent +the relay buffer. Note that the parent directory passed to +relay_open() (and passed along to the callback), if specified, must +exist in the same filesystem the new relay file is created in. If +create_buf_file() is defined, remove_buf_file() must also be defined; +it's responsible for deleting the file(s) created in create_buf_file() +and is called during relay_close(). + +The create_buf_file() implementation can also be defined in such a way +as to allow the creation of a single 'global' buffer instead of the +default per-cpu set. This can be useful for applications interested +mainly in seeing the relative ordering of system-wide events without +the need to bother with saving explicit timestamps for the purpose of +merging/sorting per-cpu files in a postprocessing step. + +To have relay_open() create a global buffer, the create_buf_file() +implementation should set the value of the is_global outparam to a +non-zero value in addition to creating the file that will be used to +represent the single buffer. In the case of a global buffer, +create_buf_file() and remove_buf_file() will be called only once. The +normal channel-writing functions e.g. relay_write() can still be used +- writes from any cpu will transparently end up in the global buffer - +but since it is a global buffer, callers should make sure they use the +proper locking for such a buffer, either by wrapping writes in a +spinlock, or by copying a write function from relayfs_fs.h and +creating a local version that internally does the proper locking. + +See the 'exported-relayfile' examples in the relay-apps tarball for +examples of creating and using relay files in debugfs. + Misc ---- diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt index 5f2b9c5edbb5..22488d791168 100644 --- a/Documentation/keys-request-key.txt +++ b/Documentation/keys-request-key.txt @@ -56,10 +56,12 @@ A request proceeds in the following manner: (4) request_key() then forks and executes /sbin/request-key with a new session keyring that contains a link to auth key V. - (5) /sbin/request-key execs an appropriate program to perform the actual + (5) /sbin/request-key assumes the authority associated with key U. + + (6) /sbin/request-key execs an appropriate program to perform the actual instantiation. - (6) The program may want to access another key from A's context (say a + (7) The program may want to access another key from A's context (say a Kerberos TGT key). It just requests the appropriate key, and the keyring search notes that the session keyring has auth key V in its bottom level. @@ -67,19 +69,19 @@ A request proceeds in the following manner: UID, GID, groups and security info of process A as if it was process A, and come up with key W. - (7) The program then does what it must to get the data with which to + (8) The program then does what it must to get the data with which to instantiate key U, using key W as a reference (perhaps it contacts a Kerberos server using the TGT) and then instantiates key U. - (8) Upon instantiating key U, auth key V is automatically revoked so that it + (9) Upon instantiating key U, auth key V is automatically revoked so that it may not be used again. - (9) The program then exits 0 and request_key() deletes key V and returns key +(10) The program then exits 0 and request_key() deletes key V and returns key U to the caller. -This also extends further. If key W (step 5 above) didn't exist, key W would be -created uninstantiated, another auth key (X) would be created [as per step 3] -and another copy of /sbin/request-key spawned [as per step 4]; but the context +This also extends further. If key W (step 7 above) didn't exist, key W would be +created uninstantiated, another auth key (X) would be created (as per step 3) +and another copy of /sbin/request-key spawned (as per step 4); but the context specified by auth key X will still be process A, as it was in auth key V. This is because process A's keyrings can't simply be attached to @@ -138,8 +140,8 @@ until one succeeds: (3) The process's session keyring is searched. - (4) If the process has a request_key() authorisation key in its session - keyring then: + (4) If the process has assumed the authority associated with a request_key() + authorisation key then: (a) If extant, the calling process's thread keyring is searched. diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 6304db59bfe4..aaa01b0e3ee9 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -308,6 +308,8 @@ process making the call: KEY_SPEC_USER_KEYRING -4 UID-specific keyring KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring + KEY_SPEC_REQKEY_AUTH_KEY -7 assumed request_key() + authorisation key The main syscalls are: @@ -498,7 +500,11 @@ The keyctl syscall functions are: keyring is full, error ENFILE will result. The link procedure checks the nesting of the keyrings, returning ELOOP if - it appears to deep or EDEADLK if the link would introduce a cycle. + it appears too deep or EDEADLK if the link would introduce a cycle. + + Any links within the keyring to keys that match the new key in terms of + type and description will be discarded from the keyring as the new one is + added. (*) Unlink a key or keyring from another keyring: @@ -628,6 +634,41 @@ The keyctl syscall functions are: there is one, otherwise the user default session keyring. + (*) Set the timeout on a key. + + long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout); + + This sets or clears the timeout on a key. The timeout can be 0 to clear + the timeout or a number of seconds to set the expiry time that far into + the future. + + The process must have attribute modification access on a key to set its + timeout. Timeouts may not be set with this function on negative, revoked + or expired keys. + + + (*) Assume the authority granted to instantiate a key + + long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key); + + This assumes or divests the authority required to instantiate the + specified key. Authority can only be assumed if the thread has the + authorisation key associated with the specified key in its keyrings + somewhere. + + Once authority is assumed, searches for keys will also search the + requester's keyrings using the requester's security label, UID, GID and + groups. + + If the requested authority is unavailable, error EPERM will be returned, + likewise if the authority has been revoked because the target key is + already instantiated. + + If the specified key is 0, then any assumed authority will be divested. + + The assumed authorititive key is inherited across fork and exec. + + =============== KERNEL SERVICES =============== diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 2f1aae32a5d9..6910c0136f8d 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm: - min_free_kbytes - laptop_mode - block_dump +- drop-caches ============================================================== dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, -block_dump, swap_token_timeout: +block_dump, swap_token_timeout, drop-caches: See Documentation/filesystems/proc.txt @@ -102,3 +103,20 @@ This is used to force the Linux VM to keep a minimum number of kilobytes free. The VM uses this number to compute a pages_min value for each lowmem zone in the system. Each lowmem zone gets a number of reserved free pages based proportionally on its size. + +============================================================== + +percpu_pagelist_fraction + +This is the fraction of pages at most (high mark pcp->high) in each zone that +are allocated for each per cpu page list. The min value for this is 8. It +means that we don't allow more than 1/8th of pages in each zone to be +allocated in any single per_cpu_pagelist. This entry only changes the value +of hot per cpu pagelists. User can specify a number like 100 to allocate +1/100th of each zone to each per cpu page list. + +The batch value of each per cpu pagelist is also updated as a result. It is +set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) + +The initial value is zero. Kernel does not use this value at boot time to set +the high water marks for each per cpu page list. diff --git a/MAINTAINERS b/MAINTAINERS index 7e780906d34c..76dc820bc889 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -927,7 +927,6 @@ S: Maintained FARSYNC SYNCHRONOUS DRIVER P: Kevin Curtis M: kevin.curtis@farsite.co.uk -M: kevin.curtis@farsite.co.uk W: http://www.farsite.co.uk/ S: Supported @@ -183,11 +183,8 @@ CONFIGURING the kernel: COMPILING the kernel: - - Make sure you have gcc 2.95.3 available. - gcc 2.91.66 (egcs-1.1.2), and gcc 2.7.2.3 are known to miscompile - some parts of the kernel, and are *no longer supported*. - Also remember to upgrade your binutils package (for as/ld/nm and company) - if necessary. For more information, refer to Documentation/Changes. + - Make sure you have at least gcc 3.2 available. + For more information, refer to Documentation/Changes. Please note that you can still run a.out user programs with this kernel. diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 153337ff1d7b..eedf41bf7057 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -18,9 +18,6 @@ config MMU bool default y -config UID16 - bool - config RWSEM_GENERIC_SPINLOCK bool diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index a8682612abc0..abb739b88ed1 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -43,6 +43,11 @@ #include "proto.h" #include "pci_impl.h" +/* + * Power off function, if any + */ +void (*pm_power_off)(void) = machine_power_off; + void cpu_idle(void) { diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index bbd37536d14e..9969d212e94d 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -265,30 +265,16 @@ do_sys_ptrace(long request, long pid, long addr, long data, lock_kernel(); DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n", request, pid, addr, data)); - ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out_notsk; - ret = security_ptrace(current->parent, current); - if (ret) - goto out_notsk; - /* set the ptrace bit in the process ptrace flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out_notsk; } - if (pid == 1) /* you may not mess with init */ - goto out_notsk; - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out_notsk; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e149f152e70b..50b9afa8ae6d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -46,10 +46,6 @@ config MCA <file:Documentation/mca.txt> (and especially the web page given there) before attempting to build an MCA bus kernel. -config UID16 - bool - default y - config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index b6de43e73699..a2dfe0b0f1ec 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -13,6 +13,7 @@ #include <linux/device.h> #include <linux/string.h> +#include <linux/slab.h> #include <linux/platform_device.h> #include <asm/io.h> #include <asm/hardware/scoop.h> diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 04d3082a7b94..0abbce8c70bc 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -23,20 +23,15 @@ #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32 #endif /* - * GCC 2.95.1, 2.95.2: ignores register clobber list in asm(). * GCC 3.0, 3.1: general bad code generation. * GCC 3.2.0: incorrect function argument offset calculation. * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c * (http://gcc.gnu.org/PR8896) and incorrect structure * initialisation in fs/jffs2/erase.c */ -#if __GNUC__ < 2 || \ - (__GNUC__ == 2 && __GNUC_MINOR__ < 95) || \ - (__GNUC__ == 2 && __GNUC_MINOR__ == 95 && __GNUC_PATCHLEVEL__ != 0 && \ - __GNUC_PATCHLEVEL__ < 3) || \ - (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) #error Your compiler is too buggy; it is known to miscompile kernels. -#error Known good compilers: 2.95.3, 2.95.4, 2.96, 3.3 +#error Known good compilers: 3.3 #endif /* Use marker if you need to separate the values later */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 869c466e6258..b5645c4462cf 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -684,8 +684,12 @@ int setup_irq(unsigned int irq, struct irqaction *new) spin_lock_irqsave(&irq_controller_lock, flags); p = &desc->action; if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { + /* + * Can't share interrupts unless both agree to and are + * the same type. + */ + if (!(old->flags & new->flags & SA_SHIRQ) || + (~old->flags & new->flags) & SA_TRIGGER_MASK) { spin_unlock_irqrestore(&irq_controller_lock, flags); return -EBUSY; } @@ -705,6 +709,12 @@ int setup_irq(unsigned int irq, struct irqaction *new) desc->running = 0; desc->pending = 0; desc->disable_depth = 1; + + if (new->flags & SA_TRIGGER_MASK) { + unsigned int type = new->flags & SA_TRIGGER_MASK; + desc->chip->set_type(irq, type); + } + if (!desc->noautoenable) { desc->disable_depth = 0; desc->chip->unmask(irq); diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index 775f85fc8513..9e563de465b5 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c @@ -601,6 +601,7 @@ EXPORT_SYMBOL(gpio_lock); EXPORT_SYMBOL(gpio_modify_op); EXPORT_SYMBOL(gpio_modify_io); EXPORT_SYMBOL(cpld_modify); +EXPORT_SYMBOL(gpio_read); /* * Initialise any other hardware after we've got the PCI bus diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c index 9f46aaef8968..3c22c16b38bf 100644 --- a/arch/arm/mach-integrator/time.c +++ b/arch/arm/mach-integrator/time.c @@ -96,7 +96,8 @@ static struct rtc_ops rtc_ops = { .set_alarm = rtc_set_alarm, }; -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { writel(0, rtc_base + RTC_EOI); return IRQ_HANDLED; @@ -124,7 +125,7 @@ static int rtc_probe(struct amba_device *dev, void *id) xtime.tv_sec = __raw_readl(rtc_base + RTC_DR); - ret = request_irq(dev->irq[0], rtc_interrupt, SA_INTERRUPT, + ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT, "rtc-pl030", dev); if (ret) goto map_out; diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index fcfb81d13cfe..7a68f098a025 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -252,9 +252,8 @@ static void __init omap_serial_set_port_wakeup(int gpio_nr) return; } omap_set_gpio_direction(gpio_nr, 1); - set_irq_type(OMAP_GPIO_IRQ(gpio_nr), IRQT_RISING); ret = request_irq(OMAP_GPIO_IRQ(gpio_nr), &omap_serial_wake_interrupt, - 0, "serial wakeup", NULL); + SA_TRIGGER_RISING, "serial wakeup", NULL); if (ret) { omap_free_gpio(gpio_nr); printk(KERN_ERR "No interrupt for UART wake GPIO: %i\n", diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 100fb31b5156..5a7b873f29b3 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -213,15 +213,14 @@ static int corgi_mci_init(struct device *dev, irqreturn_t (*corgi_detect_int)(in corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250); - err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, SA_INTERRUPT, - "MMC card detect", data); + err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "MMC card detect", data); if (err) { printk(KERN_ERR "corgi_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); return -1; } - set_irq_type(CORGI_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE); - return 0; } diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index eef3de26ad37..663c95005985 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -146,15 +146,14 @@ static int poodle_mci_init(struct device *dev, irqreturn_t (*poodle_detect_int)( poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250); - err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, SA_INTERRUPT, - "MMC card detect", data); + err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "MMC card detect", data); if (err) { printk(KERN_ERR "poodle_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); return -1; } - set_irq_type(POODLE_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE); - return 0; } diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index f2007db0cda5..a9eacc06555f 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -296,15 +296,14 @@ static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(in spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250); - err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, SA_INTERRUPT, - "MMC card detect", data); + err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "MMC card detect", data); if (err) { printk(KERN_ERR "spitz_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); return -1; } - set_irq_type(SPITZ_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE); - return 0; } diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c index c9d7c596b200..caf6b8bb6c95 100644 --- a/arch/arm/mach-realview/localtimer.c +++ b/arch/arm/mach-realview/localtimer.c @@ -13,6 +13,7 @@ #include <linux/delay.h> #include <linux/device.h> #include <linux/smp.h> +#include <linux/jiffies.h> #include <asm/mach/time.h> #include <asm/hardware/arm_twd.h> diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c index 5098b50158a3..495f8c6ffcb6 100644 --- a/arch/arm/mach-s3c2410/usb-simtec.c +++ b/arch/arm/mach-s3c2410/usb-simtec.c @@ -84,13 +84,13 @@ static void usb_simtec_enableoc(struct s3c2410_hcd_info *info, int on) int ret; if (on) { - ret = request_irq(IRQ_USBOC, usb_simtec_ocirq, SA_INTERRUPT, + ret = request_irq(IRQ_USBOC, usb_simtec_ocirq, + SA_INTERRUPT | SA_TRIGGER_RISING | + SA_TRIGGER_FALLING, "USB Over-current", info); if (ret != 0) { printk(KERN_ERR "failed to request usb oc irq\n"); } - - set_irq_type(IRQ_USBOC, IRQT_BOTHEDGE); } else { free_irq(IRQ_USBOC, info); } diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index 1f00b3d03a07..274e07019b46 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -34,10 +34,6 @@ config FORCE_MAX_ZONEORDER int default 9 -config UID16 - bool - default y - config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/arm26/kernel/asm-offsets.c b/arch/arm26/kernel/asm-offsets.c index 4ccacaef94df..ac682d5fd039 100644 --- a/arch/arm26/kernel/asm-offsets.c +++ b/arch/arm26/kernel/asm-offsets.c @@ -25,13 +25,6 @@ #if defined(__APCS_32__) && defined(CONFIG_CPU_26) #error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26 #endif -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) -#error Sorry, your compiler is known to miscompile kernels. Only use gcc 2.95.3 and later. -#endif -#if __GNUC__ == 2 && __GNUC_MINOR__ == 95 -/* shame we can't detect the .1 or .2 releases */ -#warning GCC 2.95.2 and earlier miscompiles kernels. -#endif /* Use marker if you need to separate the values later */ diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index e5979d68e352..b83261949737 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -9,10 +9,6 @@ config MMU bool default y -config UID16 - bool - default y - config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index ec85c0d6c6da..61261b78ced7 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -274,6 +274,11 @@ config GPREL_DATA_NONE endchoice +config FRV_ONCPU_SERIAL + bool "Use on-CPU serial ports" + select SERIAL_8250 + default y + config PCI bool "Use PCI" depends on MB93090_MB00 @@ -305,23 +310,7 @@ config RESERVE_DMA_COHERENT source "drivers/pci/Kconfig" -config PCMCIA - tristate "Use PCMCIA" - help - Say Y here if you want to attach PCMCIA- or PC-cards to your FR-V - board. These are credit-card size devices such as network cards, - modems or hard drives often used with laptops computers. There are - actually two varieties of these cards: the older 16 bit PCMCIA cards - and the newer 32 bit CardBus cards. If you want to use CardBus - cards, you need to say Y here and also to "CardBus support" below. - - To use your PC-cards, you will need supporting software from David - Hinds pcmcia-cs package (see the file <file:Documentation/Changes> - for location). Please also read the PCMCIA-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. - - To compile this driver as modules, choose M here: the - modules will be called pcmcia_core and ds. +source "drivers/pcmcia/Kconfig" #config MATH_EMULATION # bool "Math emulation support (EXPERIMENTAL)" diff --git a/arch/frv/Kconfig.debug b/arch/frv/Kconfig.debug index 0034b654995d..211f01bc4caa 100644 --- a/arch/frv/Kconfig.debug +++ b/arch/frv/Kconfig.debug @@ -2,32 +2,10 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config EARLY_PRINTK - bool "Early printk" - depends on EMBEDDED && DEBUG_KERNEL - default n - help - Write kernel log output directly into the VGA buffer or to a serial - port. - - This is useful for kernel debugging when your machine crashes very - early before the console code is initialized. For normal operation - it is not recommended because it looks ugly and doesn't cooperate - with klogd/syslogd or the X server. You should normally N here, - unless you want to debug such a crash. - config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL -config DEBUG_PAGEALLOC - bool "Page alloc debugging" - depends on DEBUG_KERNEL - help - Unmap pages from the kernel linear mapping after free_pages(). - This results in a large slowdown, but helps to find certain types - of memory corruptions. - config GDBSTUB bool "Remote GDB kernel debugging" depends on DEBUG_KERNEL diff --git a/arch/frv/Makefile b/arch/frv/Makefile index 54046d2386f5..90c0fb8d9dc3 100644 --- a/arch/frv/Makefile +++ b/arch/frv/Makefile @@ -109,10 +109,10 @@ bootstrap: $(Q)$(MAKEBOOT) bootstrap archmrproper: - $(Q)$(MAKE) -C arch/frv/boot mrproper + $(Q)$(MAKE) $(build)=arch/frv/boot mrproper archclean: - $(Q)$(MAKE) -C arch/frv/boot clean + $(Q)$(MAKE) $(build)=arch/frv/boot clean archdep: scripts/mkdep symlinks - $(Q)$(MAKE) -C arch/frv/boot dep + $(Q)$(MAKE) $(build)=arch/frv/boot dep diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile index 422f30ede575..5a827b349b5e 100644 --- a/arch/frv/kernel/Makefile +++ b/arch/frv/kernel/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_PM) += pm.o cmode.o obj-$(CONFIG_MB93093_PDK) += pm-mb93093.o obj-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_FUTEX) += futex.o +obj-$(CONFIG_MODULES) += module.o diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c index 1a76d5247190..5f118c89d091 100644 --- a/arch/frv/kernel/frv_ksyms.c +++ b/arch/frv/kernel/frv_ksyms.c @@ -16,10 +16,11 @@ #include <asm/semaphore.h> #include <asm/checksum.h> #include <asm/hardirq.h> -#include <asm/current.h> +#include <asm/cacheflush.h> extern void dump_thread(struct pt_regs *, struct user *); extern long __memcpy_user(void *dst, const void *src, size_t count); +extern long __memset_user(void *dst, const void *src, size_t count); /* platform dependent support */ @@ -50,7 +51,11 @@ EXPORT_SYMBOL(disable_irq); EXPORT_SYMBOL(__res_bus_clock_speed_HZ); EXPORT_SYMBOL(__page_offset); EXPORT_SYMBOL(__memcpy_user); -EXPORT_SYMBOL(flush_dcache_page); +EXPORT_SYMBOL(__memset_user); +EXPORT_SYMBOL(frv_dcache_writeback); +EXPORT_SYMBOL(frv_cache_invalidate); +EXPORT_SYMBOL(frv_icache_invalidate); +EXPORT_SYMBOL(frv_cache_wback_inv); #ifndef CONFIG_MMU EXPORT_SYMBOL(memory_start); @@ -72,6 +77,9 @@ EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memscan); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__outsl_ns); +EXPORT_SYMBOL(__insl_ns); + EXPORT_SYMBOL(get_wchan); #ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS @@ -80,14 +88,13 @@ EXPORT_SYMBOL(atomic_test_and_OR_mask); EXPORT_SYMBOL(atomic_test_and_XOR_mask); EXPORT_SYMBOL(atomic_add_return); EXPORT_SYMBOL(atomic_sub_return); -EXPORT_SYMBOL(__xchg_8); -EXPORT_SYMBOL(__xchg_16); EXPORT_SYMBOL(__xchg_32); -EXPORT_SYMBOL(__cmpxchg_8); -EXPORT_SYMBOL(__cmpxchg_16); EXPORT_SYMBOL(__cmpxchg_32); #endif +EXPORT_SYMBOL(__debug_bug_printk); +EXPORT_SYMBOL(__delay_loops_MHz); + /* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that @@ -101,6 +108,8 @@ extern void __divdi3(void); extern void __lshrdi3(void); extern void __moddi3(void); extern void __muldi3(void); +extern void __mulll(void); +extern void __umulll(void); extern void __negdi2(void); extern void __ucmpdi2(void); extern void __udivdi3(void); @@ -116,8 +125,10 @@ EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__lshrdi3); //EXPORT_SYMBOL(__moddi3); EXPORT_SYMBOL(__muldi3); +EXPORT_SYMBOL(__mulll); +EXPORT_SYMBOL(__umulll); EXPORT_SYMBOL(__negdi2); -//EXPORT_SYMBOL(__ucmpdi2); +EXPORT_SYMBOL(__ucmpdi2); //EXPORT_SYMBOL(__udivdi3); //EXPORT_SYMBOL(__udivmoddi4); //EXPORT_SYMBOL(__umoddi3); diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index 8c524cdd2717..59580c59c62c 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -32,6 +32,7 @@ #include <linux/irq.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/module.h> #include <asm/atomic.h> #include <asm/io.h> @@ -178,6 +179,8 @@ void disable_irq_nosync(unsigned int irq) spin_unlock_irqrestore(&level->lock, flags); } +EXPORT_SYMBOL(disable_irq_nosync); + /** * disable_irq - disable an irq and wait for completion * @irq: Interrupt to disable @@ -204,6 +207,8 @@ void disable_irq(unsigned int irq) #endif } +EXPORT_SYMBOL(disable_irq); + /** * enable_irq - enable handling of an irq * @irq: Interrupt to enable @@ -268,6 +273,8 @@ void enable_irq(unsigned int irq) spin_unlock_irqrestore(&level->lock, flags); } +EXPORT_SYMBOL(enable_irq); + /*****************************************************************************/ /* * handles all normal device IRQ's @@ -425,6 +432,8 @@ int request_irq(unsigned int irq, return retval; } +EXPORT_SYMBOL(request_irq); + /** * free_irq - free an interrupt * @irq: Interrupt line to free @@ -496,6 +505,8 @@ void free_irq(unsigned int irq, void *dev_id) } } +EXPORT_SYMBOL(free_irq); + /* * IRQ autodetection code.. * @@ -519,6 +530,8 @@ unsigned long probe_irq_on(void) return 0; } +EXPORT_SYMBOL(probe_irq_on); + /* * Return a mask of triggered interrupts (this * can handle only legacy ISA interrupts). @@ -542,6 +555,8 @@ unsigned int probe_irq_mask(unsigned long xmask) return 0; } +EXPORT_SYMBOL(probe_irq_mask); + /* * Return the one interrupt that triggered (this can * handle any interrupt source). @@ -571,6 +586,8 @@ int probe_irq_off(unsigned long xmask) return -1; } +EXPORT_SYMBOL(probe_irq_off); + /* this was setup_x86_irq but it seems pretty generic */ int setup_irq(unsigned int irq, struct irqaction *new) { diff --git a/arch/frv/kernel/module.c b/arch/frv/kernel/module.c new file mode 100644 index 000000000000..850d168f69fc --- /dev/null +++ b/arch/frv/kernel/module.c @@ -0,0 +1,80 @@ +/* module.c: FRV specific module loading bits + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * - Derived from arch/i386/kernel/module.c, Copyright (C) 2001 Rusty Russell. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt...) +#endif + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + + return vmalloc_exec(size); +} + + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +/* We don't need anything special. */ +int module_frob_arch_sections(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + char *secstrings, + struct module *mod) +{ + return 0; +} + +int apply_relocate(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", me->name); + return -ENOEXEC; +} + +int apply_relocate_add(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", me->name); + return -ENOEXEC; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + return 0; +} + +void module_arch_cleanup(struct module *mod) +{ +} diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index 712c3c24c954..f0b8fff3e733 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -13,6 +13,7 @@ #include <linux/config.h> #include <linux/init.h> +#include <linux/module.h> #include <linux/pm.h> #include <linux/pm_legacy.h> #include <linux/sched.h> @@ -27,6 +28,7 @@ #include "local.h" void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); extern void frv_change_cmode(int); diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index 767ebb55bd83..5908deae9607 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -787,6 +787,7 @@ void __init setup_arch(char **cmdline_p) #endif /* register those serial ports that are available */ +#ifdef CONFIG_FRV_ONCPU_SERIAL #ifndef CONFIG_GDBSTUB_UART0 __reg(UART0_BASE + UART_IER * 8) = 0; early_serial_setup(&__frv_uart0); @@ -795,6 +796,7 @@ void __init setup_arch(char **cmdline_p) __reg(UART1_BASE + UART_IER * 8) = 0; early_serial_setup(&__frv_uart1); #endif +#endif #if defined(CONFIG_CHR_DEV_FLASH) || defined(CONFIG_BLK_DEV_FLASH) /* we need to initialize the Flashrom device here since we might diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index 2e9741227b73..24cf85f89e40 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -189,6 +189,8 @@ void do_gettimeofday(struct timeval *tv) tv->tv_usec = usec; } +EXPORT_SYMBOL(do_gettimeofday); + int do_settimeofday(struct timespec *tv) { time_t wtm_sec, sec = tv->tv_sec; @@ -218,6 +220,7 @@ int do_settimeofday(struct timespec *tv) clock_was_set(); return 0; } + EXPORT_SYMBOL(do_settimeofday); /* diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 89073cae4b5d..9eb84b2e6abc 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -19,6 +19,7 @@ #include <linux/string.h> #include <linux/linkage.h> #include <linux/init.h> +#include <linux/module.h> #include <asm/setup.h> #include <asm/fpu.h> @@ -250,6 +251,8 @@ void dump_stack(void) show_stack(NULL, NULL); } +EXPORT_SYMBOL(dump_stack); + void show_stack(struct task_struct *task, unsigned long *sp) { } diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c index f3fd58a5bc4a..9b751c0f0e84 100644 --- a/arch/frv/kernel/uaccess.c +++ b/arch/frv/kernel/uaccess.c @@ -10,6 +10,7 @@ */ #include <linux/mm.h> +#include <linux/module.h> #include <asm/uaccess.h> /*****************************************************************************/ @@ -58,8 +59,11 @@ long strncpy_from_user(char *dst, const char *src, long count) memset(p, 0, count); /* clear remainder of buffer [security] */ return err; + } /* end strncpy_from_user() */ +EXPORT_SYMBOL(strncpy_from_user); + /*****************************************************************************/ /* * Return the size of a string (including the ending 0) @@ -92,4 +96,7 @@ long strnlen_user(const char *src, long count) } return p - src + 1; /* return length including NUL */ + } /* end strnlen_user() */ + +EXPORT_SYMBOL(strnlen_user); diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index fceafd2cc202..f474534ba78a 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -112,6 +112,7 @@ SECTIONS #endif ) SCHED_TEXT + LOCK_TEXT *(.fixup) *(.gnu.warning) *(.exitcall.exit) diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile index 19be2626d5e6..08be305c9f44 100644 --- a/arch/frv/lib/Makefile +++ b/arch/frv/lib/Makefile @@ -3,6 +3,6 @@ # lib-y := \ - __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o \ + __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ checksum.o memcpy.o memset.o atomic-ops.o \ outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o diff --git a/arch/frv/lib/__ucmpdi2.S b/arch/frv/lib/__ucmpdi2.S new file mode 100644 index 000000000000..d892f16ffaa9 --- /dev/null +++ b/arch/frv/lib/__ucmpdi2.S @@ -0,0 +1,45 @@ +/* __ucmpdi2.S: 64-bit unsigned compare + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + + .text + .p2align 4 + +############################################################################### +# +# int __ucmpdi2(unsigned long long a [GR8:GR9], +# unsigned long long b [GR10:GR11]) +# +# - returns 0, 1, or 2 as a <, =, > b respectively. +# +############################################################################### + .globl __ucmpdi2 + .type __ucmpdi2,@function +__ucmpdi2: + or.p gr8,gr0,gr4 + subcc gr8,gr10,gr0,icc0 + setlos.p #0,gr8 + bclr icc0,#2 ; a.msw < b.msw + + setlos.p #2,gr8 + bhilr icc0,#0 ; a.msw > b.msw + + subcc.p gr9,gr11,gr0,icc1 + setlos #0,gr8 + setlos.p #2,gr9 + setlos #1,gr7 + cknc icc1,cc6 + cor.p gr9,gr0,gr8, cc6,#1 + cckls icc1,cc4, cc6,#1 + andcr cc6,cc4,cc4 + cor gr7,gr0,gr8, cc4,#1 + bralr + .size __ucmpdi2, .-__ucmpdi2 diff --git a/arch/frv/lib/atomic-ops.S b/arch/frv/lib/atomic-ops.S index b03d510a89e4..545cd325ac57 100644 --- a/arch/frv/lib/atomic-ops.S +++ b/arch/frv/lib/atomic-ops.S @@ -129,48 +129,6 @@ atomic_sub_return: ############################################################################### # -# uint8_t __xchg_8(uint8_t i, uint8_t *v) -# -############################################################################### - .globl __xchg_8 - .type __xchg_8,@function -__xchg_8: - or.p gr8,gr8,gr10 -0: - orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */ - ckeq icc3,cc7 - ldub.p @(gr9,gr0),gr8 /* LD.P/ORCR must be atomic */ - orcr cc7,cc7,cc3 /* set CC3 to true */ - cstb.p gr10,@(gr9,gr0) ,cc3,#1 - corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */ - beq icc3,#0,0b - bralr - - .size __xchg_8, .-__xchg_8 - -############################################################################### -# -# uint16_t __xchg_16(uint16_t i, uint16_t *v) -# -############################################################################### - .globl __xchg_16 - .type __xchg_16,@function -__xchg_16: - or.p gr8,gr8,gr10 -0: - orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */ - ckeq icc3,cc7 - lduh.p @(gr9,gr0),gr8 /* LD.P/ORCR must be atomic */ - orcr cc7,cc7,cc3 /* set CC3 to true */ - csth.p gr10,@(gr9,gr0) ,cc3,#1 - corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */ - beq icc3,#0,0b - bralr - - .size __xchg_16, .-__xchg_16 - -############################################################################### -# # uint32_t __xchg_32(uint32_t i, uint32_t *v) # ############################################################################### @@ -192,56 +150,6 @@ __xchg_32: ############################################################################### # -# uint8_t __cmpxchg_8(uint8_t *v, uint8_t test, uint8_t new) -# -############################################################################### - .globl __cmpxchg_8 - .type __cmpxchg_8,@function -__cmpxchg_8: - or.p gr8,gr8,gr11 -0: - orcc gr0,gr0,gr0,icc3 - ckeq icc3,cc7 - ldub.p @(gr11,gr0),gr8 - orcr cc7,cc7,cc3 - sub gr8,gr9,gr7 - sllicc gr7,#24,gr0,icc0 - bne icc0,#0,1f - cstb.p gr10,@(gr11,gr0) ,cc3,#1 - corcc gr29,gr29,gr0 ,cc3,#1 - beq icc3,#0,0b -1: - bralr - - .size __cmpxchg_8, .-__cmpxchg_8 - -############################################################################### -# -# uint16_t __cmpxchg_16(uint16_t *v, uint16_t test, uint16_t new) -# -############################################################################### - .globl __cmpxchg_16 - .type __cmpxchg_16,@function -__cmpxchg_16: - or.p gr8,gr8,gr11 -0: - orcc gr0,gr0,gr0,icc3 - ckeq icc3,cc7 - lduh.p @(gr11,gr0),gr8 - orcr cc7,cc7,cc3 - sub gr8,gr9,gr7 - sllicc gr7,#16,gr0,icc0 - bne icc0,#0,1f - csth.p gr10,@(gr11,gr0) ,cc3,#1 - corcc gr29,gr29,gr0 ,cc3,#1 - beq icc3,#0,0b -1: - bralr - - .size __cmpxchg_16, .-__cmpxchg_16 - -############################################################################### -# # uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new) # ############################################################################### diff --git a/arch/frv/lib/checksum.c b/arch/frv/lib/checksum.c index 7bf5bd6cac8a..20e7dfc474ef 100644 --- a/arch/frv/lib/checksum.c +++ b/arch/frv/lib/checksum.c @@ -33,6 +33,7 @@ #include <net/checksum.h> #include <asm/checksum.h> +#include <linux/module.h> static inline unsigned short from32to16(unsigned long x) { @@ -115,34 +116,52 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) return result; } +EXPORT_SYMBOL(csum_partial); + /* * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ unsigned short ip_compute_csum(const unsigned char * buff, int len) { - return ~do_csum(buff,len); + return ~do_csum(buff, len); } +EXPORT_SYMBOL(ip_compute_csum); + /* * copy from fs while checksumming, otherwise like csum_partial */ - unsigned int -csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *csum_err) +csum_partial_copy_from_user(const char __user *src, char *dst, + int len, int sum, int *csum_err) { - if (csum_err) *csum_err = 0; - memcpy(dst, src, len); + int rem; + + if (csum_err) + *csum_err = 0; + + rem = copy_from_user(dst, src, len); + if (rem != 0) { + if (csum_err) + *csum_err = -EFAULT; + memset(dst + len - rem, 0, rem); + len = rem; + } + return csum_partial(dst, len, sum); } +EXPORT_SYMBOL(csum_partial_copy_from_user); + /* * copy from ds while checksumming, otherwise like csum_partial */ - unsigned int csum_partial_copy(const char *src, char *dst, int len, int sum) { memcpy(dst, src, len); return csum_partial(dst, len, sum); } + +EXPORT_SYMBOL(csum_partial_copy); diff --git a/arch/frv/mb93090-mb00/Makefile b/arch/frv/mb93090-mb00/Makefile index 3faf0f8cf9b5..76595e870733 100644 --- a/arch/frv/mb93090-mb00/Makefile +++ b/arch/frv/mb93090-mb00/Makefile @@ -3,7 +3,7 @@ # ifeq "$(CONFIG_PCI)" "y" -obj-y := pci-frv.o pci-irq.o pci-vdk.o +obj-y := pci-frv.o pci-irq.o pci-vdk.o pci-iomap.o ifeq "$(CONFIG_MMU)" "y" obj-y += pci-dma.o diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c index 2082a9647f4f..4985466b1a7c 100644 --- a/arch/frv/mb93090-mb00/pci-dma-nommu.c +++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c @@ -83,6 +83,8 @@ void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_hand return NULL; } +EXPORT_SYMBOL(dma_alloc_coherent); + void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) { struct dma_alloc_record *rec; @@ -102,6 +104,8 @@ void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_ BUG(); } +EXPORT_SYMBOL(dma_free_coherent); + /* * Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. @@ -120,6 +124,8 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, return virt_to_bus(ptr); } +EXPORT_SYMBOL(dma_map_single); + /* * Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the @@ -150,3 +156,5 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, return nents; } + +EXPORT_SYMBOL(dma_map_sg); diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c index 86fbdadc51b6..671ce1e8434f 100644 --- a/arch/frv/mb93090-mb00/pci-dma.c +++ b/arch/frv/mb93090-mb00/pci-dma.c @@ -28,11 +28,15 @@ void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_hand return ret; } +EXPORT_SYMBOL(dma_alloc_coherent); + void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) { consistent_free(vaddr); } +EXPORT_SYMBOL(dma_free_coherent); + /* * Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. @@ -51,6 +55,8 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, return virt_to_bus(ptr); } +EXPORT_SYMBOL(dma_map_single); + /* * Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the @@ -96,6 +102,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, return nents; } +EXPORT_SYMBOL(dma_map_sg); + dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) { @@ -103,3 +111,5 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long off flush_dcache_page(page); return (dma_addr_t) page_to_phys(page) + offset; } + +EXPORT_SYMBOL(dma_map_page); diff --git a/arch/frv/mb93090-mb00/pci-iomap.c b/arch/frv/mb93090-mb00/pci-iomap.c new file mode 100644 index 000000000000..068fa04bd527 --- /dev/null +++ b/arch/frv/mb93090-mb00/pci-iomap.c @@ -0,0 +1,29 @@ +/* pci-iomap.c: description + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/pci.h> +#include <linux/module.h> + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len || !start) + return NULL; + + if ((flags & IORESOURCE_IO) || (flags & IORESOURCE_MEM)) + return (void __iomem *) start; + + return NULL; +} + +EXPORT_SYMBOL(pci_iomap); diff --git a/arch/frv/mm/cache-page.c b/arch/frv/mm/cache-page.c index 683b5e344318..0261cbe153b5 100644 --- a/arch/frv/mm/cache-page.c +++ b/arch/frv/mm/cache-page.c @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/highmem.h> +#include <linux/module.h> #include <asm/pgalloc.h> /*****************************************************************************/ @@ -38,6 +39,8 @@ void flush_dcache_page(struct page *page) } /* end flush_dcache_page() */ +EXPORT_SYMBOL(flush_dcache_page); + /*****************************************************************************/ /* * ICI takes a virtual address and the page may not currently have one @@ -64,3 +67,5 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, } } /* end flush_icache_user_range() */ + +EXPORT_SYMBOL(flush_icache_user_range); diff --git a/arch/frv/mm/extable.c b/arch/frv/mm/extable.c index 41be1128dc64..caacf030ac75 100644 --- a/arch/frv/mm/extable.c +++ b/arch/frv/mm/extable.c @@ -43,7 +43,7 @@ static inline unsigned long search_one_table(const struct exception_table_entry */ unsigned long search_exception_table(unsigned long pc) { - unsigned long ret = 0; + const struct exception_table_entry *extab; /* determine if the fault lay during a memcpy_user or a memset_user */ if (__frame->lr == (unsigned long) &__memset_user_error_lr && @@ -55,9 +55,10 @@ unsigned long search_exception_table(unsigned long pc) */ return (unsigned long) &__memset_user_error_handler; } - else if (__frame->lr == (unsigned long) &__memcpy_user_error_lr && - (unsigned long) &memcpy <= pc && pc < (unsigned long) &__memcpy_end - ) { + + if (__frame->lr == (unsigned long) &__memcpy_user_error_lr && + (unsigned long) &memcpy <= pc && pc < (unsigned long) &__memcpy_end + ) { /* the fault occurred in a protected memset * - we search for the return address (in LR) instead of the program counter * - it was probably during a copy_to/from_user() @@ -65,27 +66,10 @@ unsigned long search_exception_table(unsigned long pc) return (unsigned long) &__memcpy_user_error_handler; } -#ifndef CONFIG_MODULES - /* there is only the kernel to search. */ - ret = search_one_table(__start___ex_table, __stop___ex_table - 1, pc); - return ret; - -#else - /* the kernel is the last "module" -- no need to treat it special */ - unsigned long flags; - struct module *mp; + extab = search_exception_tables(pc); + if (extab) + return extab->fixup; - spin_lock_irqsave(&modlist_lock, flags); - - for (mp = module_list; mp != NULL; mp = mp->next) { - if (mp->ex_table_start == NULL || !(mp->flags & (MOD_RUNNING | MOD_INITIALIZING))) - continue; - ret = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, pc); - if (ret) - break; - } + return 0; - spin_unlock_irqrestore(&modlist_lock, flags); - return ret; -#endif } /* end search_exception_table() */ diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c index 7dc8fbf3af97..7f77db7fabc7 100644 --- a/arch/frv/mm/highmem.c +++ b/arch/frv/mm/highmem.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/highmem.h> +#include <linux/module.h> void *kmap(struct page *page) { @@ -18,6 +19,8 @@ void *kmap(struct page *page) return kmap_high(page); } +EXPORT_SYMBOL(kmap); + void kunmap(struct page *page) { if (in_interrupt()) @@ -27,7 +30,12 @@ void kunmap(struct page *page) kunmap_high(page); } +EXPORT_SYMBOL(kunmap); + struct page *kmap_atomic_to_page(void *ptr) { return virt_to_page(ptr); } + + +EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 26698a49f153..80940d712acf 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -21,10 +21,6 @@ config FPU bool default n -config UID16 - bool - default y - config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 968fabd8723f..d849c6870e3a 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -29,10 +29,6 @@ config MMU config SBUS bool -config UID16 - bool - default y - config GENERIC_ISA_DMA bool default y @@ -630,10 +626,6 @@ config REGPARM and passes the first three arguments of a function call in registers. This will probably break binary only modules. - This feature is only enabled for gcc-3.0 and later - earlier compilers - generate incorrect output with certain kernel constructs when - -mregparm=3 is used. - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS @@ -703,7 +695,7 @@ depends on PM && !X86_VISWS config APM tristate "APM (Advanced Power Management) BIOS support" - depends on PM && PM_LEGACY + depends on PM ---help--- APM is a BIOS specification for saving power using several different techniques. This is mostly useful for battery powered laptops with diff --git a/arch/i386/Makefile b/arch/i386/Makefile index d121ea18460f..b84119f9cc63 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -37,10 +37,7 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu -# -mregparm=3 works ok on gcc-3.0 and later -# -GCC_VERSION := $(call cc-version) -cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;) +cflags-$(CONFIG_REGPARM) += -mregparm=3 # Disable unit-at-a-time mode, it makes gcc use a lot more stack # due to the lack of sharing of stacklots. diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu index 8e51456df23d..dcd936ef45db 100644 --- a/arch/i386/Makefile.cpu +++ b/arch/i386/Makefile.cpu @@ -1,7 +1,7 @@ # CPU tuning section - shared with UML. # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. -#-mtune exists since gcc 3.4, and some -mcpu flavors didn't exist in gcc 2.95. +#-mtune exists since gcc 3.4 HAS_MTUNE := $(call cc-option-yn, -mtune=i386) ifeq ($(HAS_MTUNE),y) tune = $(call cc-option,-mtune=$(1),) @@ -14,7 +14,7 @@ cflags-$(CONFIG_M386) += -march=i386 cflags-$(CONFIG_M486) += -march=i486 cflags-$(CONFIG_M586) += -march=i586 cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) +cflags-$(CONFIG_M586MMX) += -march=pentium-mmx cflags-$(CONFIG_M686) += -march=i686 cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) @@ -23,8 +23,8 @@ cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) -cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) +cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) @@ -37,5 +37,5 @@ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support -cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) +cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 82a807f9f5e6..f19f3a7492a5 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -11,7 +11,7 @@ #include <linux/linkage.h> #include <linux/vmalloc.h> -#include <linux/tty.h> +#include <linux/screen_info.h> #include <asm/io.h> #include <asm/page.h> diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index f10de0f2c5e6..be1880bb75b4 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -4,10 +4,10 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o i8237.o + quirks.o i8237.o obj-y += cpu/ obj-y += timers/ @@ -33,6 +33,8 @@ obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o +obj-$(CONFIG_DOUBLEFAULT) += doublefault.o +obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o EXTRA_AFLAGS := -traditional diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 2d793d4aef1a..9d8827156e54 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -2291,7 +2291,9 @@ static int __init apm_init(void) apm_info.disabled = 1; return -ENODEV; } +#ifdef CONFIG_PM_LEGACY pm_active = 1; +#endif /* * Set up a segment that references the real mode segment 0x40 @@ -2382,7 +2384,9 @@ static void __exit apm_exit(void) exit_kapmd = 1; while (kapmd_running) schedule(); +#ifdef CONFIG_PM_LEGACY pm_active = 0; +#endif } module_init(apm_init); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index cca655688ffc..170400879f44 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -609,8 +609,10 @@ void __devinit cpu_init(void) load_TR_desc(); load_LDT(&init_mm.context); +#ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif /* Clear %fs and %gs. */ asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 607c06007508..4d704724b2f5 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -323,6 +323,7 @@ work_notifysig: # deal with pending signals and ALIGN work_notifysig_v86: +#ifdef CONFIG_VM86 pushl %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer popl %ecx @@ -330,6 +331,7 @@ work_notifysig_v86: xorl %edx, %edx call do_notify_resume jmp resume_userspace +#endif # perform syscall exit tracing ALIGN diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index 9caa8e8db80c..cff95d10a4d8 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 1a201a932865..f3a9c78c4a24 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -19,7 +19,7 @@ #include <linux/cpu.h> #include <linux/delay.h> -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); #ifndef CONFIG_X86_LOCAL_APIC diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 45e7f0ac4b04..035928f3f6c1 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -48,6 +48,7 @@ #include <asm/processor.h> #include <asm/i387.h> #include <asm/desc.h> +#include <asm/vm86.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index f7ba4acc20ec..6ff3e5243226 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -293,3 +293,4 @@ ENTRY(sys_call_table) .long sys_inotify_init .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_migrate_pages diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 9caeaa315cd7..a529f0cdce17 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup); #include <linux/mc146818rtc.h> #include <linux/rtc.h> -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 67932ad53082..57b047c27e46 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -37,10 +37,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) endif -ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),) -$(error Sorry, your compiler is too old. GCC v2.96 is known to generate bad code.) -endif - ifeq ($(GCC_VERSION),0304) cflags-$(CONFIG_ITANIUM) += -mtune=merced cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index dc282710421a..9f8e8d558873 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1761,21 +1761,15 @@ sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data) lock_kernel(); if (request == PTRACE_TRACEME) { - ret = sys_ptrace(request, pid, addr, data); + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out; - ret = -EPERM; - if (pid == 1) /* no messing around with init! */ - goto out_tsk; + } if (request == PTRACE_ATTACH) { ret = sys_ptrace(request, pid, addr, data); diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index a3aa45cbcfa0..c485a3b32ba8 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -247,6 +247,32 @@ typedef struct kern_memdesc { static kern_memdesc_t *kern_memmap; +#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) + +static inline u64 +kmd_end(kern_memdesc_t *kmd) +{ + return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); +} + +static inline u64 +efi_md_end(efi_memory_desc_t *md) +{ + return (md->phys_addr + efi_md_size(md)); +} + +static inline int +efi_wb(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_WB); +} + +static inline int +efi_uc(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_UC); +} + static void walk (efi_freemem_callback_t callback, void *arg, u64 attr) { @@ -595,8 +621,8 @@ efi_get_iobase (void) return 0; } -u32 -efi_mem_type (unsigned long phys_addr) +static efi_memory_desc_t * +efi_memory_descriptor (unsigned long phys_addr) { void *efi_map_start, *efi_map_end, *p; efi_memory_desc_t *md; @@ -610,13 +636,13 @@ efi_mem_type (unsigned long phys_addr) md = p; if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) - return md->type; + return md; } return 0; } -u64 -efi_mem_attributes (unsigned long phys_addr) +static int +efi_memmap_has_mmio (void) { void *efi_map_start, *efi_map_end, *p; efi_memory_desc_t *md; @@ -629,36 +655,98 @@ efi_mem_attributes (unsigned long phys_addr) for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { md = p; - if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) - return md->attribute; + if (md->type == EFI_MEMORY_MAPPED_IO) + return 1; } return 0; } + +u32 +efi_mem_type (unsigned long phys_addr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + + if (md) + return md->type; + return 0; +} + +u64 +efi_mem_attributes (unsigned long phys_addr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + + if (md) + return md->attribute; + return 0; +} EXPORT_SYMBOL(efi_mem_attributes); +/* + * Determines whether the memory at phys_addr supports the desired + * attribute (WB, UC, etc). If this returns 1, the caller can safely + * access *size bytes at phys_addr with the specified attribute. + */ +static int +efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + unsigned long md_end; + + if (!md || (md->attribute & attr) != attr) + return 0; + + do { + md_end = efi_md_end(md); + if (phys_addr + *size <= md_end) + return 1; + + md = efi_memory_descriptor(md_end); + if (!md || (md->attribute & attr) != attr) { + *size = md_end - phys_addr; + return 1; + } + } while (md); + return 0; +} + +/* + * For /dev/mem, we only allow read & write system calls to access + * write-back memory, because read & write don't allow the user to + * control access size. + */ int valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) { - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; + return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB); +} - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; +/* + * We allow mmap of anything in the EFI memory map that supports + * either write-back or uncacheable access. For uncacheable regions, + * the supported access sizes are system-dependent, and the user is + * responsible for using the correct size. + * + * Note that this doesn't currently allow access to hot-added memory, + * because that doesn't appear in the boot-time EFI memory map. + */ +int +valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size) +{ + if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB)) + return 1; - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; + if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC)) + return 1; - if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) { - if (!(md->attribute & EFI_MEMORY_WB)) - return 0; + /* + * Some firmware doesn't report MMIO regions in the EFI memory map. + * The Intel BigSur (a.k.a. HP i2000) has this problem. In this + * case, we can't use the EFI memory map to validate mmap requests. + */ + if (!efi_memmap_has_mmio()) + return 1; - if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr) - *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr; - return 1; - } - } return 0; } @@ -707,32 +795,6 @@ efi_uart_console_only(void) return 0; } -#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) - -static inline u64 -kmd_end(kern_memdesc_t *kmd) -{ - return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); -} - -static inline u64 -efi_md_end(efi_memory_desc_t *md) -{ - return (md->phys_addr + efi_md_size(md)); -} - -static inline int -efi_wb(efi_memory_desc_t *md) -{ - return (md->attribute & EFI_MEMORY_WB); -} - -static inline int -efi_uc(efi_memory_desc_t *md) -{ - return (md->attribute & EFI_MEMORY_UC); -} - /* * Look for the first granule aligned memory descriptor memory * that is big enough to hold EFI memory map. Make sure this diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0741b066b98f..7a6ffd613789 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1600,5 +1600,6 @@ sys_call_table: data8 sys_inotify_init data8 sys_inotify_add_watch data8 sys_inotify_rm_watch + data8 sys_migrate_pages // 1280 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index bfe65b2e8621..fbc7ea35dd57 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1060,7 +1060,7 @@ SET_REG(b5); * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. */ -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) .prologue diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 5db9d3bcbbcb..e72de580ebbf 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -103,7 +103,7 @@ EXPORT_SYMBOL(unw_init_running); #ifdef ASM_SUPPORTED # ifdef CONFIG_SMP -# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +# if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) /* * This is not a normal routine and we don't want a function descriptor for it, so we use * a fake declaration here. diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 4b19d0410632..8d88eeea02d1 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1422,14 +1422,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) lock_kernel(); ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c index b7dabbfb0d61..adb01566bd57 100644 --- a/arch/ia64/oprofile/backtrace.c +++ b/arch/ia64/oprofile/backtrace.c @@ -32,7 +32,7 @@ typedef struct u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */ } ia64_backtrace_t; -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) /* * Returns non-zero if the PC is in the spinlock contention out-of-line code * with non-standard calling sequence (on older compilers). diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index cc4b571e5db7..3bf55d92933f 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -50,6 +50,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk) * Powermanagement idle function, if any.. */ void (*pm_idle)(void) = NULL; +EXPORT_SYMBOL(pm_idle); + +void (*pm_power_off)(void) = NULL; +EXPORT_SYMBOL(pm_power_off); void disable_hlt(void) { diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 078d2a0e71c2..9b75caaf5cec 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -762,28 +762,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) int ret; lock_kernel(); - ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 1dd5d18b2201..96b919828053 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -10,10 +10,6 @@ config MMU bool default y -config UID16 - bool - default y - config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index b96498120fe9..e2a6e8648960 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -17,10 +17,6 @@ config FPU bool default n -config UID16 - bool - default y - config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 9a9b04972132..7e55457a491f 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -57,30 +57,16 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data) (unsigned long) data); #endif lock_kernel(); - ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - if ((ret = security_ptrace(current->parent, current))) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 07631a97670b..ce907eda221b 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/signal.h> /* for SIGBUS */ +#include <linux/sched.h> /* schow_regs(), force_sig() */ #include <asm/module.h> #include <asm/sn/addrs.h> diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 874a283edb95..e77a06e9621e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -19,9 +19,6 @@ config MMU config STACK_GROWSUP def_bool y -config UID16 - bool - config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index db93dbc0e21a..331483ace0d9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -26,9 +26,6 @@ config MMU bool default y -config UID16 - bool - config GENERIC_HARDIRQS bool default y diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 61762640b877..826ee3d056de 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -45,33 +45,19 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, unsigned long data) { struct task_struct *child; - int ret = -EPERM; + int ret; lock_kernel(); if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index cc3f64c084c5..e396f4591d59 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -8,9 +8,6 @@ config MMU bool default y -config UID16 - bool - config GENERIC_HARDIRQS bool default y diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 8ecda6d66de4..cc02232aa96e 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -712,35 +712,18 @@ sys_ptrace(long request, long pid, long addr, long data) int ret; lock_kernel(); - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - ret = -EPERM; - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - goto out; + ret = ptrace_traceme(); + goto out; } - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out; - - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out; + } ret = do_ptrace(child, request, addr, data); - put_task_struct(child); out: unlock_kernel(); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 64f5ae0ff96d..8cf6d437a630 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -14,10 +14,6 @@ config SUPERH gaming console. The SuperH port has a home page at <http://www.linux-sh.org/>. -config UID16 - bool - default y - config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c index 870fe5327e09..1195af37ee5a 100644 --- a/arch/sh64/kernel/time.c +++ b/arch/sh64/kernel/time.c @@ -417,7 +417,7 @@ static __init unsigned int get_cpu_hz(void) /* ** Regardless the toolchain, force the compiler to use the ** arbitrary register r3 as a clock tick counter. - ** NOTE: r3 must be in accordance with rtc_interrupt() + ** NOTE: r3 must be in accordance with sh64_rtc_interrupt() */ register unsigned long long __rtc_irq_flag __asm__ ("r3"); @@ -482,7 +482,8 @@ static __init unsigned int get_cpu_hz(void) #endif } -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { ctrl_outb(0, RCR1); /* Disable Carry Interrupts */ regs->regs[3] = 1; /* Using r3 */ @@ -491,7 +492,7 @@ static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) } static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; -static struct irqaction irq1 = { rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; +static struct irqaction irq1 = { sh64_rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; void __init time_init(void) { diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 56c34e7fd4ee..f944b58cdfe7 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -9,10 +9,6 @@ config MMU bool default y -config UID16 - bool - default y - config HIGHMEM bool default y diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c index 475c4c13462c..fc470c0e9dc6 100644 --- a/arch/sparc/kernel/ptrace.c +++ b/arch/sparc/kernel/ptrace.c @@ -286,40 +286,17 @@ asmlinkage void do_ptrace(struct pt_regs *regs) s, (int) request, (int) pid, addr, data, addr2); } #endif - if (request == PTRACE_TRACEME) { - int my_ret; - - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) { - pt_error_return(regs, EPERM); - goto out; - } - my_ret = security_ptrace(current->parent, current); - if (my_ret) { - pt_error_return(regs, -my_ret); - goto out; - } - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; + if (request == PTRACE_TRACEME) { + ret = ptrace_traceme(); pt_succ_return(regs, 0); goto out; } -#ifndef ALLOW_INIT_TRACING - if (pid == 1) { - /* Can't dork with init. */ - pt_error_return(regs, EPERM); - goto out; - } -#endif - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) { - pt_error_return(regs, ESRCH); + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + pt_error_return(regs, -ret); goto out; } diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index c4b7ad70cd7c..b775ceb4cf98 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -309,11 +309,6 @@ config COMPAT depends on SPARC32_COMPAT default y -config UID16 - bool - depends on SPARC32_COMPAT - default y - config BINFMT_ELF32 tristate "Kernel support for 32-bit ELF binaries" depends on SPARC32_COMPAT diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 774ecbb8a031..84d3df2264cb 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -198,39 +198,15 @@ asmlinkage void do_ptrace(struct pt_regs *regs) } #endif if (request == PTRACE_TRACEME) { - int ret; - - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) { - pt_error_return(regs, EPERM); - goto out; - } - ret = security_ptrace(current->parent, current); - if (ret) { - pt_error_return(regs, -ret); - goto out; - } - - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; + ret = ptrace_traceme(); pt_succ_return(regs, 0); goto out; } -#ifndef ALLOW_INIT_TRACING - if (pid == 1) { - /* Can't dork with init. */ - pt_error_return(regs, EPERM); - goto out; - } -#endif - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) { - pt_error_return(regs, ESRCH); + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + pt_error_return(regs, -ret); goto out; } diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 1eb21de9d1b5..b4ff2e576021 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -22,10 +22,6 @@ config SBUS config PCI bool -config UID16 - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -83,7 +79,7 @@ config KERNEL_HALF_GIGS of physical memory. config MODE_SKAS - bool "Separate Kernel Address Space support" + bool "Separate Kernel Address Space support" if MODE_TT default y help This option controls whether skas (separate kernel address space) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 73f9652b2ee9..3a93c6f772fa 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -117,6 +117,7 @@ static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); static int ubd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg); +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); #define MAX_DEV (8) @@ -125,6 +126,7 @@ static struct block_device_operations ubd_blops = { .open = ubd_open, .release = ubd_release, .ioctl = ubd_ioctl, + .getgeo = ubd_getgeo, }; /* Protected by the queue_lock */ @@ -1058,6 +1060,16 @@ static void do_ubd_request(request_queue_t *q) } } +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ubd *dev = bdev->bd_disk->private_data; + + geo->heads = 128; + geo->sectors = 32; + geo->cylinders = dev->size / (128 * 32 * 512); + return 0; +} + static int ubd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { @@ -1070,16 +1082,7 @@ static int ubd_ioctl(struct inode * inode, struct file * file, }; switch (cmd) { - struct hd_geometry g; struct cdrom_volctrl volume; - case HDIO_GETGEO: - if(!loc) return(-EINVAL); - g.heads = 128; - g.sectors = 32; - g.cylinders = dev->size / (128 * 32 * 512); - g.start = get_start_sect(inode->i_bdev); - return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); - case HDIO_GET_IDENTITY: ubd_id.cyls = dev->size / (128 * 32 * 512); if(copy_to_user((char __user *) arg, (char *) &ubd_id, diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index e5fec5570199..8f4e46d677ab 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -10,6 +10,19 @@ #include "sysdep/ptrace.h" #include "sysdep/faultinfo.h" +typedef void (*kern_hndl)(int, union uml_pt_regs *); + +struct kern_handlers { + kern_hndl relay_signal; + kern_hndl winch; + kern_hndl bus_handler; + kern_hndl page_fault; + kern_hndl sigio_handler; + kern_hndl timer_handler; +}; + +extern struct kern_handlers handlinfo_kern; + extern int ncpus; extern char *linux_prog; extern char *gdb_init; @@ -51,8 +64,6 @@ extern void timer_handler(int sig, union uml_pt_regs *regs); extern int set_signals(int enable); extern void force_sigbus(void); extern int pid_to_processor_id(int pid); -extern void block_signals(void); -extern void unblock_signals(void); extern void deliver_signals(void *t); extern int next_syscall_index(int max); extern int next_trap_index(int max); @@ -111,6 +122,8 @@ extern void arch_switch(void); extern void free_irq(unsigned int, void *); extern int um_in_interrupt(void); extern int cpu(void); +extern void segv_handler(int sig, union uml_pt_regs *regs); +extern void sigio_handler(int sig, union uml_pt_regs *regs); #endif diff --git a/arch/um/include/os.h b/arch/um/include/os.h index c279ee6d89e4..dd72d66cf0ed 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -9,6 +9,8 @@ #include "uml-config.h" #include "asm/types.h" #include "../os/include/file.h" +#include "sysdep/ptrace.h" +#include "kern_util.h" #define OS_TYPE_FILE 1 #define OS_TYPE_DIR 2 @@ -219,4 +221,18 @@ extern int umid_file_name(char *name, char *buf, int len); extern int set_umid(char *name); extern char *get_umid(void); +/* signal.c */ +extern void set_sigstack(void *sig_stack, int size); +extern void remove_sigstack(void); +extern void set_handler(int sig, void (*handler)(int), int flags, ...); +extern int change_sig(int signal, int on); +extern void block_signals(void); +extern void unblock_signals(void); +extern int get_signals(void); +extern int set_signals(int enable); + +/* trap.c */ +extern void os_fill_handlinfo(struct kern_handlers h); +extern void do_longjmp(void *p, int val); + #endif diff --git a/arch/um/include/signal_user.h b/arch/um/include/signal_user.h deleted file mode 100644 index b075e543d864..000000000000 --- a/arch/um/include/signal_user.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#ifndef __SIGNAL_USER_H__ -#define __SIGNAL_USER_H__ - -extern int signal_stack_size; - -extern int change_sig(int signal, int on); -extern void set_sigstack(void *stack, int size); -extern void set_handler(int sig, void (*handler)(int), int flags, ...); -extern int set_signals(int enable); -extern int get_signals(void); - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h index b9984003e603..c1dbd77b073f 100644 --- a/arch/um/include/user_util.h +++ b/arch/um/include/user_util.h @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -23,12 +23,7 @@ struct cpu_task { extern struct cpu_task cpu_tasks[]; -struct signal_info { - void (*handler)(int, union uml_pt_regs *); - int is_irq; -}; - -extern struct signal_info sig_info[]; +extern void (*sig_info[])(int, union uml_pt_regs *); extern unsigned long low_physmem; extern unsigned long high_physmem; @@ -64,7 +59,6 @@ extern void setup_machinename(char *machine_out); extern void setup_hostinfo(void); extern void do_exec(int old_pid, int new_pid); extern void tracer_panic(char *msg, ...); -extern void do_longjmp(void *p, int val); extern int detach(int pid, int sig); extern int attach(int pid); extern void kill_child_dead(int pid); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 6f7700593a6f..193cc2b7448d 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -9,8 +9,8 @@ clean-files := obj-y = config.o exec_kern.o exitcode.o \ init_task.o irq.o irq_user.o ksyms.o mem.o physmem.o \ process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \ - signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o time.o \ - time_kern.o tlb.o trap_kern.o trap_user.o uaccess.o um_arch.o umid.o \ + signal_kern.o smp.o syscall_kern.o sysrq.o time.o \ + time_kern.o tlb.o trap_kern.o uaccess.o um_arch.o umid.o \ user_util.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o diff --git a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c index 50a2aa35cda9..0e32f5f4a887 100644 --- a/arch/um/kernel/irq_user.c +++ b/arch/um/kernel/irq_user.c @@ -15,7 +15,6 @@ #include "kern_util.h" #include "user.h" #include "process.h" -#include "signal_user.h" #include "sigio.h" #include "irq_user.h" #include "os.h" diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 651abf255bc5..d2d3f256778c 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -36,7 +36,6 @@ #include "kern_util.h" #include "kern.h" #include "signal_kern.h" -#include "signal_user.h" #include "init.h" #include "irq_user.h" #include "mem_user.h" diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index a637e885c583..6f1a3a288117 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -12,6 +12,8 @@ #include "mode.h" #include "choose-mode.h" +void (*pm_power_off)(void); + #ifdef CONFIG_SMP static void kill_idlers(int me) { diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c index 03618bd13d55..7b0e0e81c161 100644 --- a/arch/um/kernel/signal_kern.c +++ b/arch/um/kernel/signal_kern.c @@ -22,7 +22,6 @@ #include "asm/ucontext.h" #include "kern_util.h" #include "signal_kern.h" -#include "signal_user.h" #include "kern.h" #include "frame_kern.h" #include "sigcontext.h" diff --git a/arch/um/kernel/signal_user.c b/arch/um/kernel/signal_user.c deleted file mode 100644 index 62f457835fb1..000000000000 --- a/arch/um/kernel/signal_user.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <signal.h> -#include <errno.h> -#include <stdarg.h> -#include <string.h> -#include <sys/mman.h> -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "signal_user.h" -#include "signal_kern.h" -#include "sysdep/sigcontext.h" -#include "sigcontext.h" - -void set_sigstack(void *sig_stack, int size) -{ - stack_t stack = ((stack_t) { .ss_flags = 0, - .ss_sp = (__ptr_t) sig_stack, - .ss_size = size - sizeof(void *) }); - - if(sigaltstack(&stack, NULL) != 0) - panic("enabling signal stack failed, errno = %d\n", errno); -} - -void set_handler(int sig, void (*handler)(int), int flags, ...) -{ - struct sigaction action; - va_list ap; - int mask; - - va_start(ap, flags); - action.sa_handler = handler; - sigemptyset(&action.sa_mask); - while((mask = va_arg(ap, int)) != -1){ - sigaddset(&action.sa_mask, mask); - } - va_end(ap); - action.sa_flags = flags; - action.sa_restorer = NULL; - if(sigaction(sig, &action, NULL) < 0) - panic("sigaction failed"); -} - -int change_sig(int signal, int on) -{ - sigset_t sigset, old; - - sigemptyset(&sigset); - sigaddset(&sigset, signal); - sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); - return(!sigismember(&old, signal)); -} - -/* Both here and in set/get_signal we don't touch SIGPROF, because we must not - * disable profiling; it's safe because the profiling code does not interact - * with the kernel code at all.*/ - -static void change_signals(int type) -{ - sigset_t mask; - - sigemptyset(&mask); - sigaddset(&mask, SIGVTALRM); - sigaddset(&mask, SIGALRM); - sigaddset(&mask, SIGIO); - if(sigprocmask(type, &mask, NULL) < 0) - panic("Failed to change signal mask - errno = %d", errno); -} - -void block_signals(void) -{ - change_signals(SIG_BLOCK); -} - -void unblock_signals(void) -{ - change_signals(SIG_UNBLOCK); -} - -/* These are the asynchronous signals. SIGVTALRM and SIGARLM are handled - * together under SIGVTALRM_BIT. SIGPROF is excluded because we want to - * be able to profile all of UML, not just the non-critical sections. If - * profiling is not thread-safe, then that is not my problem. We can disable - * profiling when SMP is enabled in that case. - */ -#define SIGIO_BIT 0 -#define SIGVTALRM_BIT 1 - -static int enable_mask(sigset_t *mask) -{ - int sigs; - - sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT; - sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT; - sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT; - return(sigs); -} - -int get_signals(void) -{ - sigset_t mask; - - if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0) - panic("Failed to get signal mask"); - return(enable_mask(&mask)); -} - -int set_signals(int enable) -{ - sigset_t mask; - int ret; - - sigemptyset(&mask); - if(enable & (1 << SIGIO_BIT)) - sigaddset(&mask, SIGIO); - if(enable & (1 << SIGVTALRM_BIT)){ - sigaddset(&mask, SIGVTALRM); - sigaddset(&mask, SIGALRM); - } - - /* This is safe - sigprocmask is guaranteed to copy locally the - * value of new_set, do his work and then, at the end, write to - * old_set. - */ - if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0) - panic("Failed to enable signals"); - ret = enable_mask(&mask); - sigemptyset(&mask); - if((enable & (1 << SIGIO_BIT)) == 0) - sigaddset(&mask, SIGIO); - if((enable & (1 << SIGVTALRM_BIT)) == 0){ - sigaddset(&mask, SIGVTALRM); - sigaddset(&mask, SIGALRM); - } - if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0) - panic("Failed to block signals"); - - return(ret); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index 8de471b59c1c..7a9fc16d71d4 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -4,7 +4,7 @@ # obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ - syscall.o tlb.o trap_user.o uaccess.o + syscall.o tlb.o uaccess.o USER_OBJS := process.o clone.o diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index daa2f85b684c..01d489de3986 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -22,7 +22,6 @@ extern int start_idle_thread(void *stack, void *switch_buf_ptr, extern int user_thread(unsigned long stack, int flags); extern void userspace(union uml_pt_regs *regs); extern void new_thread_proc(void *stack, void (*handler)(int sig)); -extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); extern int map(struct mm_id * mm_idp, unsigned long virt, diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 599d679bd4fc..9264d4021dfe 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -31,7 +31,6 @@ #include "proc_mm.h" #include "skas_ptrace.h" #include "chan_user.h" -#include "signal_user.h" #include "registers.h" #include "mem.h" #include "uml-config.h" @@ -514,16 +513,6 @@ int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr) siglongjmp(**switch_buf, 1); } -void remove_sigstack(void) -{ - stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE, - .ss_sp = NULL, - .ss_size = 0 }); - - if(sigaltstack(&stack, NULL) != 0) - panic("disabling signal stack failed, errno = %d\n", errno); -} - void initial_thread_cb_skas(void (*proc)(void *), void *arg) { sigjmp_buf here; diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 9c990253966c..09790ccb161c 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -14,7 +14,6 @@ #include "asm/atomic.h" #include "kern_util.h" #include "time_user.h" -#include "signal_user.h" #include "skas.h" #include "os.h" #include "user_util.h" diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index c40b611e3d93..11f518a7e156 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -14,9 +14,9 @@ #include "kern_util.h" #include "user.h" #include "process.h" -#include "signal_user.h" #include "time_user.h" #include "kern_constants.h" +#include "os.h" /* XXX This really needs to be declared and initialized in a kernel file since * it's in <linux/time.h> diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index 0d4c10a73607..d56046c2aba2 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -26,9 +26,13 @@ #include "mconsole_kern.h" #include "mem.h" #include "mem_kern.h" +#include "sysdep/sigcontext.h" +#include "sysdep/ptrace.h" +#include "os.h" #ifdef CONFIG_MODE_SKAS #include "skas.h" #endif +#include "os.h" /* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, @@ -125,6 +129,25 @@ out_of_memory: goto out; } +void segv_handler(int sig, union uml_pt_regs *regs) +{ + struct faultinfo * fi = UPT_FAULTINFO(regs); + + if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ + bad_segv(*fi, UPT_IP(regs)); + return; + } + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); +} + +struct kern_handlers handlinfo_kern = { + .relay_signal = relay_signal, + .winch = winch, + .bus_handler = relay_signal, + .page_fault = segv_handler, + .sigio_handler = sigio_handler, + .timer_handler = timer_handler +}; /* * We give a *copy* of the faultinfo in the regs to segv. * This must be done, since nesting SEGVs could overwrite diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c deleted file mode 100644 index e9ccd6b8d3c7..000000000000 --- a/arch/um/kernel/trap_user.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdlib.h> -#include <errno.h> -#include <setjmp.h> -#include <signal.h> -#include <sys/time.h> -#include <sys/wait.h> -#include <asm/page.h> -#include <asm/unistd.h> -#include <asm/ptrace.h> -#include "init.h" -#include "sysdep/ptrace.h" -#include "sigcontext.h" -#include "sysdep/sigcontext.h" -#include "irq_user.h" -#include "signal_user.h" -#include "time_user.h" -#include "task.h" -#include "mode.h" -#include "choose-mode.h" -#include "kern_util.h" -#include "user_util.h" -#include "os.h" - -void kill_child_dead(int pid) -{ - kill(pid, SIGKILL); - kill(pid, SIGCONT); - do { - int n; - CATCH_EINTR(n = waitpid(pid, NULL, 0)); - if (n > 0) - kill(pid, SIGCONT); - else - break; - } while(1); -} - -void segv_handler(int sig, union uml_pt_regs *regs) -{ - struct faultinfo * fi = UPT_FAULTINFO(regs); - - if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ - bad_segv(*fi, UPT_IP(regs)); - return; - } - segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); -} - -void usr2_handler(int sig, union uml_pt_regs *regs) -{ - CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0); -} - -struct signal_info sig_info[] = { - [ SIGTRAP ] { .handler = relay_signal, - .is_irq = 0 }, - [ SIGFPE ] { .handler = relay_signal, - .is_irq = 0 }, - [ SIGILL ] { .handler = relay_signal, - .is_irq = 0 }, - [ SIGWINCH ] { .handler = winch, - .is_irq = 1 }, - [ SIGBUS ] { .handler = bus_handler, - .is_irq = 0 }, - [ SIGSEGV] { .handler = segv_handler, - .is_irq = 0 }, - [ SIGIO ] { .handler = sigio_handler, - .is_irq = 1 }, - [ SIGVTALRM ] { .handler = timer_handler, - .is_irq = 1 }, - [ SIGALRM ] { .handler = timer_handler, - .is_irq = 1 }, - [ SIGUSR2 ] { .handler = usr2_handler, - .is_irq = 0 }, -}; - -void do_longjmp(void *b, int val) -{ - sigjmp_buf *buf = b; - - siglongjmp(*buf, val); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/tt/exec_kern.c b/arch/um/kernel/tt/exec_kern.c index 065b504a653b..136e54c47d37 100644 --- a/arch/um/kernel/tt/exec_kern.c +++ b/arch/um/kernel/tt/exec_kern.c @@ -14,7 +14,6 @@ #include "kern_util.h" #include "irq_user.h" #include "time_user.h" -#include "signal_user.h" #include "mem_user.h" #include "os.h" #include "tlb.h" diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index cfaa373a6e77..14d4622a5fb8 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -13,7 +13,6 @@ #include "asm/ptrace.h" #include "asm/tlbflush.h" #include "irq_user.h" -#include "signal_user.h" #include "kern_util.h" #include "user_util.h" #include "os.h" diff --git a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c index d11e7399d7a1..71daae24e48a 100644 --- a/arch/um/kernel/tt/tracer.c +++ b/arch/um/kernel/tt/tracer.c @@ -19,7 +19,6 @@ #include "sigcontext.h" #include "sysdep/sigcontext.h" #include "os.h" -#include "signal_user.h" #include "user_util.h" #include "mem_user.h" #include "process.h" diff --git a/arch/um/kernel/tt/trap_user.c b/arch/um/kernel/tt/trap_user.c index fc108615beaf..a414c529fbcd 100644 --- a/arch/um/kernel/tt/trap_user.c +++ b/arch/um/kernel/tt/trap_user.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -8,18 +8,18 @@ #include <signal.h> #include "sysdep/ptrace.h" #include "sysdep/sigcontext.h" -#include "signal_user.h" #include "user_util.h" #include "kern_util.h" #include "task.h" #include "tt.h" +#include "os.h" void sig_handler_common_tt(int sig, void *sc_ptr) { struct sigcontext *sc = sc_ptr; struct tt_regs save_regs, *r; - struct signal_info *info; int save_errno = errno, is_user; + void (*handler)(int, union uml_pt_regs *); /* This is done because to allow SIGSEGV to be delivered inside a SEGV * handler. This can happen in copy_user, and if SEGV is disabled, @@ -40,10 +40,14 @@ void sig_handler_common_tt(int sig, void *sc_ptr) if(sig != SIGUSR2) r->syscall = -1; - info = &sig_info[sig]; - if(!info->is_irq) unblock_signals(); + handler = sig_info[sig]; + + /* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */ + if (sig != SIGIO && sig != SIGWINCH && + sig != SIGVTALRM && sig != SIGALRM) + unblock_signals(); - (*info->handler)(sig, (union uml_pt_regs *) r); + handler(sig, (union uml_pt_regs *) r); if(is_user){ interrupt_end(); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 26626b2b9172..73747ac19774 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) * Licensed under the GPL */ @@ -363,6 +363,11 @@ int linux_main(int argc, char **argv) uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0, &host_task_size, &task_size); + /* + * Setting up handlers to 'sig_info' struct + */ + os_fill_handlinfo(handlinfo_kern); + brk_start = (unsigned long) sbrk(0); CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start); /* Increase physical memory size for exec-shield users diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 11e30b13e318..40c7d6b1df68 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -4,11 +4,13 @@ # obj-y = aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ - start_up.o time.o tt.o tty.o uaccess.o umid.o user_syms.o drivers/ \ - sys-$(SUBARCH)/ + start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o user_syms.o \ + drivers/ sys-$(SUBARCH)/ + +obj-$(CONFIG_MODE_SKAS) += skas/ USER_OBJS := aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ - start_up.o time.o tt.o tty.o uaccess.o umid.o + start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o elf_aux.o: $(ARCH_DIR)/kernel-offsets.h CFLAGS_elf_aux.o += -I$(objtree)/arch/um diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 23da27d22569..172c8474453c 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -16,7 +16,6 @@ #include "user_util.h" #include "kern_util.h" #include "mem_user.h" -#include "signal_user.h" #include "time_user.h" #include "irq_user.h" #include "user.h" diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index d9c52387c4a1..39815c6b5e45 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -15,7 +15,6 @@ #include "os.h" #include "user.h" #include "user_util.h" -#include "signal_user.h" #include "process.h" #include "irq_user.h" #include "kern_util.h" diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index c7bfd5ee3925..c1f46a0fef13 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -4,9 +4,22 @@ */ #include <signal.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <string.h> +#include <sys/mman.h> +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "signal_kern.h" +#include "sysdep/sigcontext.h" +#include "sysdep/signal.h" +#include "sigcontext.h" #include "time_user.h" #include "mode.h" -#include "sysdep/signal.h" void sig_handler(ARCH_SIGHDLR_PARAM) { @@ -36,13 +49,138 @@ void alarm_handler(ARCH_SIGHDLR_PARAM) switch_timers(1); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: +void set_sigstack(void *sig_stack, int size) +{ + stack_t stack = ((stack_t) { .ss_flags = 0, + .ss_sp = (__ptr_t) sig_stack, + .ss_size = size - sizeof(void *) }); + + if(sigaltstack(&stack, NULL) != 0) + panic("enabling signal stack failed, errno = %d\n", errno); +} + +void remove_sigstack(void) +{ + stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE, + .ss_sp = NULL, + .ss_size = 0 }); + + if(sigaltstack(&stack, NULL) != 0) + panic("disabling signal stack failed, errno = %d\n", errno); +} + +void set_handler(int sig, void (*handler)(int), int flags, ...) +{ + struct sigaction action; + va_list ap; + int mask; + + va_start(ap, flags); + action.sa_handler = handler; + sigemptyset(&action.sa_mask); + while((mask = va_arg(ap, int)) != -1){ + sigaddset(&action.sa_mask, mask); + } + va_end(ap); + action.sa_flags = flags; + action.sa_restorer = NULL; + if(sigaction(sig, &action, NULL) < 0) + panic("sigaction failed"); +} + +int change_sig(int signal, int on) +{ + sigset_t sigset, old; + + sigemptyset(&sigset); + sigaddset(&sigset, signal); + sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); + return(!sigismember(&old, signal)); +} + +/* Both here and in set/get_signal we don't touch SIGPROF, because we must not + * disable profiling; it's safe because the profiling code does not interact + * with the kernel code at all.*/ + +static void change_signals(int type) +{ + sigset_t mask; + + sigemptyset(&mask); + sigaddset(&mask, SIGVTALRM); + sigaddset(&mask, SIGALRM); + sigaddset(&mask, SIGIO); + if(sigprocmask(type, &mask, NULL) < 0) + panic("Failed to change signal mask - errno = %d", errno); +} + +void block_signals(void) +{ + change_signals(SIG_BLOCK); +} + +void unblock_signals(void) +{ + change_signals(SIG_UNBLOCK); +} + +/* These are the asynchronous signals. SIGVTALRM and SIGARLM are handled + * together under SIGVTALRM_BIT. SIGPROF is excluded because we want to + * be able to profile all of UML, not just the non-critical sections. If + * profiling is not thread-safe, then that is not my problem. We can disable + * profiling when SMP is enabled in that case. */ +#define SIGIO_BIT 0 +#define SIGVTALRM_BIT 1 + +static int enable_mask(sigset_t *mask) +{ + int sigs; + + sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT; + sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT; + sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT; + return(sigs); +} + +int get_signals(void) +{ + sigset_t mask; + + if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0) + panic("Failed to get signal mask"); + return(enable_mask(&mask)); +} + +int set_signals(int enable) +{ + sigset_t mask; + int ret; + + sigemptyset(&mask); + if(enable & (1 << SIGIO_BIT)) + sigaddset(&mask, SIGIO); + if(enable & (1 << SIGVTALRM_BIT)){ + sigaddset(&mask, SIGVTALRM); + sigaddset(&mask, SIGALRM); + } + + /* This is safe - sigprocmask is guaranteed to copy locally the + * value of new_set, do his work and then, at the end, write to + * old_set. + */ + if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0) + panic("Failed to enable signals"); + ret = enable_mask(&mask); + sigemptyset(&mask); + if((enable & (1 << SIGIO_BIT)) == 0) + sigaddset(&mask, SIGIO); + if((enable & (1 << SIGVTALRM_BIT)) == 0){ + sigaddset(&mask, SIGVTALRM); + sigaddset(&mask, SIGALRM); + } + if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0) + panic("Failed to block signals"); + + return(ret); +} diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile new file mode 100644 index 000000000000..eab5386d60a7 --- /dev/null +++ b/arch/um/os-Linux/skas/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com) +# Licensed under the GPL +# + +obj-y := trap.o + +USER_OBJS := trap.o + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/os-Linux/skas/trap.c index 9950a6716fe5..9ad5fbec4593 100644 --- a/arch/um/kernel/skas/trap_user.c +++ b/arch/um/os-Linux/skas/trap.c @@ -1,11 +1,10 @@ -/* +/* * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ #include <signal.h> #include <errno.h> -#include "signal_user.h" #include "user_util.h" #include "kern_util.h" #include "task.h" @@ -14,12 +13,13 @@ #include "ptrace_user.h" #include "sysdep/ptrace.h" #include "sysdep/ptrace_user.h" +#include "os.h" void sig_handler_common_skas(int sig, void *sc_ptr) { struct sigcontext *sc = sc_ptr; struct skas_regs *r; - struct signal_info *info; + void (*handler)(int, union uml_pt_regs *); int save_errno = errno; int save_user; @@ -34,17 +34,22 @@ void sig_handler_common_skas(int sig, void *sc_ptr) r = &TASK_REGS(get_current())->skas; save_user = r->is_user; r->is_user = 0; - if ( sig == SIGFPE || sig == SIGSEGV || - sig == SIGBUS || sig == SIGILL || - sig == SIGTRAP ) { - GET_FAULTINFO_FROM_SC(r->faultinfo, sc); - } + if ( sig == SIGFPE || sig == SIGSEGV || + sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP ) { + GET_FAULTINFO_FROM_SC(r->faultinfo, sc); + } change_sig(SIGUSR1, 1); - info = &sig_info[sig]; - if(!info->is_irq) unblock_signals(); - (*info->handler)(sig, (union uml_pt_regs *) r); + handler = sig_info[sig]; + + /* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */ + if (sig != SIGIO && sig != SIGWINCH && + sig != SIGVTALRM && sig != SIGALRM) + unblock_signals(); + + handler(sig, (union uml_pt_regs *) r); errno = save_errno; r->is_user = save_user; @@ -54,25 +59,15 @@ extern int ptrace_faultinfo; void user_signal(int sig, union uml_pt_regs *regs, int pid) { - struct signal_info *info; - int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || - (sig == SIGILL) || (sig == SIGTRAP)); + void (*handler)(int, union uml_pt_regs *); + int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || + (sig == SIGILL) || (sig == SIGTRAP)); if (segv) get_skas_faultinfo(pid, ®s->skas.faultinfo); - info = &sig_info[sig]; - (*info->handler)(sig, regs); + + handler = sig_info[sig]; + handler(sig, (union uml_pt_regs *) regs); unblock_signals(); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 29a9e3f43763..b47e5e71d1a5 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -24,7 +24,6 @@ #include "kern_util.h" #include "user.h" #include "signal_kern.h" -#include "signal_user.h" #include "sysdep/ptrace.h" #include "sysdep/sigcontext.h" #include "irq_user.h" diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c new file mode 100644 index 000000000000..321e1c8e227d --- /dev/null +++ b/arch/um/os-Linux/trap.c @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <signal.h> +#include <setjmp.h> +#include "kern_util.h" +#include "user_util.h" +#include "os.h" +#include "mode.h" + +void usr2_handler(int sig, union uml_pt_regs *regs) +{ + CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0); +} + +void (*sig_info[NSIG])(int, union uml_pt_regs *); + +void os_fill_handlinfo(struct kern_handlers h) +{ + sig_info[SIGTRAP] = h.relay_signal; + sig_info[SIGFPE] = h.relay_signal; + sig_info[SIGILL] = h.relay_signal; + sig_info[SIGWINCH] = h.winch; + sig_info[SIGBUS] = h.bus_handler; + sig_info[SIGSEGV] = h.page_fault; + sig_info[SIGIO] = h.sigio_handler; + sig_info[SIGVTALRM] = h.timer_handler; + sig_info[SIGALRM] = h.timer_handler; + sig_info[SIGUSR2] = usr2_handler; +} + +void do_longjmp(void *b, int val) +{ + sigjmp_buf *buf = b; + + siglongjmp(*buf, val); +} diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c index a6db8877931a..cb2648b79d0f 100644 --- a/arch/um/os-Linux/tt.c +++ b/arch/um/os-Linux/tt.c @@ -23,7 +23,6 @@ #include "kern_util.h" #include "user.h" #include "signal_kern.h" -#include "signal_user.h" #include "sysdep/ptrace.h" #include "sysdep/sigcontext.h" #include "irq_user.h" @@ -50,6 +49,20 @@ int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, return(0); } +void kill_child_dead(int pid) +{ + kill(pid, SIGKILL); + kill(pid, SIGCONT); + do { + int n; + CATCH_EINTR(n = waitpid(pid, NULL, 0)); + if (n > 0) + kill(pid, SIGCONT); + else + break; + } while(1); +} + /* *------------------------- * only for tt mode (will be deleted in future...) diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 16bc19928b3c..7cd1a82dc8c2 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -10,7 +10,6 @@ #include "asm/uaccess.h" #include "asm/unistd.h" #include "frame_kern.h" -#include "signal_user.h" #include "sigcontext.h" #include "registers.h" #include "mode.h" diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig index 310865903234..04494638b963 100644 --- a/arch/v850/Kconfig +++ b/arch/v850/Kconfig @@ -10,9 +10,6 @@ mainmenu "uClinux/v850 (w/o MMU) Kernel Configuration" config MMU bool default n -config UID16 - bool - default n config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 6ece645e4dbe..4f3e925962c3 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -542,11 +542,6 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y -config UID16 - bool - depends on IA32_EMULATION - default y - endmenu source "net/Kconfig" diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c index 0e10fd84c7cc..cf4b88c416dc 100644 --- a/arch/x86_64/boot/compressed/misc.c +++ b/arch/x86_64/boot/compressed/misc.c @@ -9,7 +9,7 @@ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ -#include "miscsetup.h" +#include <linux/screen_info.h> #include <asm/io.h> #include <asm/page.h> diff --git a/arch/x86_64/boot/compressed/miscsetup.h b/arch/x86_64/boot/compressed/miscsetup.h deleted file mode 100644 index bb1620531703..000000000000 --- a/arch/x86_64/boot/compressed/miscsetup.h +++ /dev/null @@ -1,39 +0,0 @@ -#define NULL 0 -//typedef unsigned int size_t; - - -struct screen_info { - unsigned char orig_x; /* 0x00 */ - unsigned char orig_y; /* 0x01 */ - unsigned short dontuse1; /* 0x02 -- EXT_MEM_K sits here */ - unsigned short orig_video_page; /* 0x04 */ - unsigned char orig_video_mode; /* 0x06 */ - unsigned char orig_video_cols; /* 0x07 */ - unsigned short unused2; /* 0x08 */ - unsigned short orig_video_ega_bx; /* 0x0a */ - unsigned short unused3; /* 0x0c */ - unsigned char orig_video_lines; /* 0x0e */ - unsigned char orig_video_isVGA; /* 0x0f */ - unsigned short orig_video_points; /* 0x10 */ - - /* VESA graphic mode -- linear frame buffer */ - unsigned short lfb_width; /* 0x12 */ - unsigned short lfb_height; /* 0x14 */ - unsigned short lfb_depth; /* 0x16 */ - unsigned long lfb_base; /* 0x18 */ - unsigned long lfb_size; /* 0x1c */ - unsigned short dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ - unsigned short lfb_linelength; /* 0x24 */ - unsigned char red_size; /* 0x26 */ - unsigned char red_pos; /* 0x27 */ - unsigned char green_size; /* 0x28 */ - unsigned char green_pos; /* 0x29 */ - unsigned char blue_size; /* 0x2a */ - unsigned char blue_pos; /* 0x2b */ - unsigned char rsvd_size; /* 0x2c */ - unsigned char rsvd_pos; /* 0x2d */ - unsigned short vesapm_seg; /* 0x2e */ - unsigned short vesapm_off; /* 0x30 */ - unsigned short pages; /* 0x32 */ - /* 0x34 -- 0x3f reserved for future expansion */ -}; diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index df0773c9bdbe..1f0ff5adc80e 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -643,6 +643,7 @@ ia32_sys_call_table: .quad sys_inotify_init .quad sys_inotify_add_watch .quad sys_inotify_rm_watch + .quad sys_migrate_pages ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c index 2a925e2af390..5f4cdfa56901 100644 --- a/arch/x86_64/ia32/ptrace32.c +++ b/arch/x86_64/ia32/ptrace32.c @@ -196,36 +196,6 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val) #undef R32 -static struct task_struct *find_target(int request, int pid, int *err) -{ - struct task_struct *child; - - *err = -EPERM; - if (pid == 1) - return NULL; - - *err = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (child) { - *err = -EPERM; - if (child->pid == 1) - goto out; - *err = ptrace_check_attach(child, request == PTRACE_KILL); - if (*err < 0) - goto out; - return child; - } - out: - if (child) - put_task_struct(child); - return NULL; - -} - asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) { struct task_struct *child; @@ -254,9 +224,16 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) break; } - child = find_target(request, pid, &ret); - if (!child) - return ret; + if (request == PTRACE_TRACEME) + return ptrace_traceme(); + + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) + return PTR_ERR(child); + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out; childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs)); @@ -373,6 +350,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) break; } + out: put_task_struct(child); return ret; } diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c index e0ba5c1043fd..ce31d904d601 100644 --- a/arch/x86_64/kernel/init_task.c +++ b/arch/x86_64/kernel/init_task.c @@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; #define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 74102796e5c0..43c9fa0f8d5f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1075,8 +1075,6 @@ device_initcall(time_init_device); */ #include <linux/rtc.h> -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 diff --git a/block/ioctl.c b/block/ioctl.c index 6e278474f9a8..82030e1dfd63 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -1,6 +1,7 @@ #include <linux/sched.h> /* for capable() */ #include <linux/blkdev.h> #include <linux/blkpg.h> +#include <linux/hdreg.h> #include <linux/backing-dev.h> #include <linux/buffer_head.h> #include <linux/smp_lock.h> @@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, set_device_ro(bdev, n); unlock_kernel(); return 0; + case HDIO_GETGEO: { + struct hd_geometry geo; + + if (!arg) + return -EINVAL; + if (!disk->fops->getgeo) + return -ENOTTY; + + /* + * We need to set the startsect first, the driver may + * want to override it. + */ + geo.start = get_start_sect(bdev); + ret = disk->fops->getgeo(bdev, &geo); + if (ret) + return ret; + if (copy_to_user((struct hd_geometry __user *)arg, &geo, + sizeof(geo))) + return -EFAULT; + return 0; + } } lock_kernel(); diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c index 4b65f74d66b1..ce074f6f3369 100644 --- a/drivers/acorn/block/mfmhd.c +++ b/drivers/acorn/block/mfmhd.c @@ -129,19 +129,6 @@ static DEFINE_SPINLOCK(mfm_lock); #define MAJOR_NR MFM_ACORN_MAJOR #define QUEUE (mfm_queue) #define CURRENT elv_next_request(mfm_queue) -/* - * This sort of stuff should be in a header file shared with ide.c, hd.c, xd.c etc - */ -#ifndef HDIO_GETGEO -#define HDIO_GETGEO 0x301 -struct hd_geometry { - unsigned char heads; - unsigned char sectors; - unsigned short cylinders; - unsigned long start; -}; -#endif - /* * Configuration section @@ -1153,22 +1140,13 @@ static int mfm_initdrives(void) * The 'front' end of the mfm driver follows... */ -static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg) +static int mfm_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct mfm_info *p = inode->i_bdev->bd_disk->private_data; - struct hd_geometry *geo = (struct hd_geometry *) arg; - if (cmd != HDIO_GETGEO) - return -EINVAL; - if (!arg) - return -EINVAL; - if (put_user (p->heads, &geo->heads)) - return -EFAULT; - if (put_user (p->sectors, &geo->sectors)) - return -EFAULT; - if (put_user (p->cylinders, &geo->cylinders)) - return -EFAULT; - if (put_user (get_start_sect(inode->i_bdev), &geo->start)) - return -EFAULT; + struct mfm_info *p = bdev->bd_disk->private_data; + + geo->heads = p->heads; + geo->sectors = p->sectors; + geo->cylinders = p->cylinders; return 0; } @@ -1219,7 +1197,7 @@ void xd_set_geometry(struct block_device *bdev, unsigned char secsptrack, static struct block_device_operations mfm_fops = { .owner = THIS_MODULE, - .ioctl = mfm_ioctl, + .getgeo = mfm_getgeo, }; /* diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index e3cd0b16031a..20c9a37643c7 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -204,11 +204,13 @@ acpi_os_map_memory(acpi_physical_address phys, acpi_size size, return AE_OK; } +EXPORT_SYMBOL_GPL(acpi_os_map_memory); void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) { iounmap(virt); } +EXPORT_SYMBOL_GPL(acpi_os_unmap_memory); #ifdef ACPI_FUTURE_USAGE acpi_status diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index c57e20dcb0f8..074abc81ec3d 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -2126,8 +2126,7 @@ static void process_rsq(ns_dev *card) if (!ns_rsqe_valid(card->rsq.next)) return; - while (ns_rsqe_valid(card->rsq.next)) - { + do { dequeue_rx(card, card->rsq.next); ns_rsqe_init(card->rsq.next); previous = card->rsq.next; @@ -2135,7 +2134,7 @@ static void process_rsq(ns_dev *card) card->rsq.next = card->rsq.base; else card->rsq.next++; - } + } while (ns_rsqe_valid(card->rsq.next)); writel((((u32) previous) - ((u32) card->rsq.base)), card->membase + RSQH); } diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 21097a39a057..4a7bb7dfce85 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -92,34 +92,28 @@ static int DAC960_open(struct inode *inode, struct file *file) return 0; } -static int DAC960_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct gendisk *disk = inode->i_bdev->bd_disk; + struct gendisk *disk = bdev->bd_disk; DAC960_Controller_T *p = disk->queue->queuedata; int drive_nr = (long)disk->private_data; - struct hd_geometry g; - struct hd_geometry __user *loc = (struct hd_geometry __user *)arg; - - if (cmd != HDIO_GETGEO || !loc) - return -EINVAL; if (p->FirmwareType == DAC960_V1_Controller) { - g.heads = p->V1.GeometryTranslationHeads; - g.sectors = p->V1.GeometryTranslationSectors; - g.cylinders = p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize / (g.heads * g.sectors); + geo->heads = p->V1.GeometryTranslationHeads; + geo->sectors = p->V1.GeometryTranslationSectors; + geo->cylinders = p->V1.LogicalDriveInformation[drive_nr]. + LogicalDriveSize / (geo->heads * geo->sectors); } else { DAC960_V2_LogicalDeviceInfo_T *i = p->V2.LogicalDeviceInformation[drive_nr]; switch (i->DriveGeometry) { case DAC960_V2_Geometry_128_32: - g.heads = 128; - g.sectors = 32; + geo->heads = 128; + geo->sectors = 32; break; case DAC960_V2_Geometry_255_63: - g.heads = 255; - g.sectors = 63; + geo->heads = 255; + geo->sectors = 63; break; default: DAC960_Error("Illegal Logical Device Geometry %d\n", @@ -127,12 +121,11 @@ static int DAC960_ioctl(struct inode *inode, struct file *file, return -EINVAL; } - g.cylinders = i->ConfigurableDeviceSize / (g.heads * g.sectors); + geo->cylinders = i->ConfigurableDeviceSize / + (geo->heads * geo->sectors); } - g.start = get_start_sect(inode->i_bdev); - - return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; + return 0; } static int DAC960_media_changed(struct gendisk *disk) @@ -157,7 +150,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk) static struct block_device_operations DAC960_BlockDeviceOperations = { .owner = THIS_MODULE, .open = DAC960_open, - .ioctl = DAC960_ioctl, + .getgeo = DAC960_getgeo, .media_changed = DAC960_media_changed, .revalidate_disk = DAC960_revalidate_disk, }; @@ -3767,7 +3760,7 @@ static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command) if (SenseKey == DAC960_SenseKey_VendorSpecific && AdditionalSenseCode == 0x80 && AdditionalSenseCodeQualifier < - sizeof(DAC960_EventMessages) / sizeof(char *)) + ARRAY_SIZE(DAC960_EventMessages)) DAC960_Critical("Physical Device %d:%d %s\n", Controller, EventLogEntry->Channel, EventLogEntry->TargetID, diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c index 5d2d649f7e8d..196c0ec9cd54 100644 --- a/drivers/block/acsi.c +++ b/drivers/block/acsi.c @@ -1079,6 +1079,19 @@ static void redo_acsi_request( void ) * ***********************************************************************/ +static int acsi_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct acsi_info_struct *aip = bdev->bd_disk->private_data; + + /* + * Just fake some geometry here, it's nonsense anyway + * To make it easy, use Adaptec's usual 64/32 mapping + */ + geo->heads = 64; + geo->sectors = 32; + geo->cylinders = aip->size >> 11; + return 0; +} static int acsi_ioctl( struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg ) @@ -1086,18 +1099,6 @@ static int acsi_ioctl( struct inode *inode, struct file *file, struct gendisk *disk = inode->i_bdev->bd_disk; struct acsi_info_struct *aip = disk->private_data; switch (cmd) { - case HDIO_GETGEO: - /* HDIO_GETGEO is supported more for getting the partition's - * start sector... */ - { struct hd_geometry *geo = (struct hd_geometry *)arg; - /* just fake some geometry here, it's nonsense anyway; to make it - * easy, use Adaptec's usual 64/32 mapping */ - put_user( 64, &geo->heads ); - put_user( 32, &geo->sectors ); - put_user( aip->size >> 11, &geo->cylinders ); - put_user(get_start_sect(inode->i_bdev), &geo->start); - return 0; - } case SCSI_IOCTL_GET_IDLUN: /* SCSI compatible GET_IDLUN call to get target's ID and LUN number */ put_user( aip->target | (aip->lun << 8), @@ -1592,6 +1593,7 @@ static struct block_device_operations acsi_fops = { .open = acsi_open, .release = acsi_release, .ioctl = acsi_ioctl, + .getgeo = acsi_getgeo, .media_changed = acsi_media_change, .revalidate_disk= acsi_revalidate, }; diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 0acbfff8ad28..3c679d30b698 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -131,7 +131,7 @@ static struct fd_drive_type drive_types[] = { { FD_DD_5, "DD 5.25", 40, 2, 14716, 13630, 1, 40, 81, 6, 30, 2}, { FD_NODRIVE, "No Drive", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }; -static int num_dr_types = sizeof(drive_types) / sizeof(drive_types[0]); +static int num_dr_types = ARRAY_SIZE(drive_types); static int amiga_read(int), dos_read(int); static void amiga_write(int), dos_write(int); @@ -1424,6 +1424,16 @@ static void do_fd_request(request_queue_t * q) redo_fd_request(); } +static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + int drive = MINOR(bdev->bd_dev) & 3; + + geo->heads = unit[drive].type->heads; + geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult; + geo->cylinders = unit[drive].type->tracks; + return 0; +} + static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long param) { @@ -1431,18 +1441,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, static struct floppy_struct getprm; switch(cmd){ - case HDIO_GETGEO: - { - struct hd_geometry loc; - loc.heads = unit[drive].type->heads; - loc.sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult; - loc.cylinders = unit[drive].type->tracks; - loc.start = 0; - if (copy_to_user((void *)param, (void *)&loc, - sizeof(struct hd_geometry))) - return -EFAULT; - break; - } case FDFMTBEG: get_fdc(drive); if (fd_ref[drive] > 1) { @@ -1652,6 +1650,7 @@ static struct block_device_operations floppy_fops = { .open = floppy_open, .release = floppy_release, .ioctl = fd_ioctl, + .getgeo = fd_getgeo, .media_changed = amiga_floppy_change, }; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0e97fcb9f3a1..c05ee8bffd97 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -169,38 +169,26 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio) return 0; } -/* This ioctl implementation expects userland to have the device node - * permissions set so that only priviledged users can open an aoe - * block device directly. - */ static int -aoeblk_ioctl(struct inode *inode, struct file *filp, uint cmd, ulong arg) +aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct aoedev *d; - - if (!arg) - return -EINVAL; + struct aoedev *d = bdev->bd_disk->private_data; - d = inode->i_bdev->bd_disk->private_data; if ((d->flags & DEVFL_UP) == 0) { printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n"); return -ENODEV; } - if (cmd == HDIO_GETGEO) { - d->geo.start = get_start_sect(inode->i_bdev); - if (!copy_to_user((void __user *) arg, &d->geo, sizeof d->geo)) - return 0; - return -EFAULT; - } - printk(KERN_INFO "aoe: aoeblk_ioctl: unknown ioctl %d\n", cmd); - return -EINVAL; + geo->cylinders = d->geo.cylinders; + geo->heads = d->geo.heads; + geo->sectors = d->geo.sectors; + return 0; } static struct block_device_operations aoe_bdops = { .open = aoeblk_open, .release = aoeblk_release, - .ioctl = aoeblk_ioctl, + .getgeo = aoeblk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 22bda05fc693..3aa68a5447d6 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -181,7 +181,7 @@ static struct { { 6, TYPE_HD }, /* 31: H1640 <- was H1600 == h1600 for PC */ }; -#define NUM_DISK_MINORS (sizeof(minor2disktype)/sizeof(*minor2disktype)) +#define NUM_DISK_MINORS ARRAY_SIZE(minor2disktype) /* * Maximum disk size (in kilobytes). This default is used whenever the diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d2815b7a9150..88452c79fb64 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1,6 +1,6 @@ /* * Disk Array driver for HP SA 5xxx and 6xxx Controllers - * Copyright 2000, 2005 Hewlett-Packard Development Company, L.P. + * Copyright 2000, 2006 Hewlett-Packard Development Company, L.P. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,12 +47,12 @@ #include <linux/completion.h> #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin)) -#define DRIVER_NAME "HP CISS Driver (v 2.6.8)" -#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,8) +#define DRIVER_NAME "HP CISS Driver (v 2.6.10)" +#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,10) /* Embedded module documentation macros - see modules.h */ MODULE_AUTHOR("Hewlett-Packard Company"); -MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.8"); +MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.10"); MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400" " SA6i P600 P800 P400 P400i E200 E200i"); MODULE_LICENSE("GPL"); @@ -103,7 +103,7 @@ static const struct pci_device_id cciss_pci_device_id[] = { }; MODULE_DEVICE_TABLE(pci, cciss_pci_device_id); -#define NR_PRODUCTS (sizeof(products)/sizeof(struct board_type)) +#define NR_PRODUCTS ARRAY_SIZE(products) /* board_id = Subsystem Device ID & Vendor ID * product = Marketing Name for the board @@ -153,6 +153,7 @@ static int cciss_open(struct inode *inode, struct file *filep); static int cciss_release(struct inode *inode, struct file *filep); static int cciss_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg); +static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int revalidate_allvol(ctlr_info_t *host); static int cciss_revalidate(struct gendisk *disk); @@ -166,7 +167,7 @@ static void cciss_geometry_inquiry(int ctlr, int logvol, unsigned int block_size, InquiryData_struct *inq_buff, drive_info_struct *drv); static void cciss_getgeometry(int cntl_num); - +static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *, __u32); static void start_io( ctlr_info_t *h); static int sendcmd( __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num, unsigned int log_unit, __u8 page_code, @@ -194,6 +195,7 @@ static struct block_device_operations cciss_fops = { .open = cciss_open, .release = cciss_release, .ioctl = cciss_ioctl, + .getgeo = cciss_getgeo, #ifdef CONFIG_COMPAT .compat_ioctl = cciss_compat_ioctl, #endif @@ -282,7 +284,7 @@ static int cciss_proc_get_info(char *buffer, char **start, off_t offset, h->product_name, (unsigned long)h->board_id, h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], h->firm_ver[3], - (unsigned int)h->intr, + (unsigned int)h->intr[SIMPLE_MODE_INT], h->num_luns, h->Qdepth, h->commands_outstanding, h->maxQsinceinit, h->max_outstanding, h->maxSG); @@ -633,6 +635,20 @@ static int cciss_ioctl32_big_passthru(struct file *file, unsigned cmd, unsigned return err; } #endif + +static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + drive_info_struct *drv = get_drv(bdev->bd_disk); + + if (!drv->cylinders) + return -ENXIO; + + geo->heads = drv->heads; + geo->sectors = drv->sectors; + geo->cylinders = drv->cylinders; + return 0; +} + /* * ioctl */ @@ -651,21 +667,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, #endif /* CCISS_DEBUG */ switch(cmd) { - case HDIO_GETGEO: - { - struct hd_geometry driver_geo; - if (drv->cylinders) { - driver_geo.heads = drv->heads; - driver_geo.sectors = drv->sectors; - driver_geo.cylinders = drv->cylinders; - } else - return -ENXIO; - driver_geo.start= get_start_sect(inode->i_bdev); - if (copy_to_user(argp, &driver_geo, sizeof(struct hd_geometry))) - return -EFAULT; - return(0); - } - case CCISS_GETPCIINFO: { cciss_pci_info_struct pciinfo; @@ -2661,6 +2662,60 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, return -1; } +/* If MSI/MSI-X is supported by the kernel we will try to enable it on + * controllers that are capable. If not, we use IO-APIC mode. + */ + +static void __devinit cciss_interrupt_mode(ctlr_info_t *c, struct pci_dev *pdev, __u32 board_id) +{ +#ifdef CONFIG_PCI_MSI + int err; + struct msix_entry cciss_msix_entries[4] = {{0,0}, {0,1}, + {0,2}, {0,3}}; + + /* Some boards advertise MSI but don't really support it */ + if ((board_id == 0x40700E11) || + (board_id == 0x40800E11) || + (board_id == 0x40820E11) || + (board_id == 0x40830E11)) + goto default_int_mode; + + if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) { + err = pci_enable_msix(pdev, cciss_msix_entries, 4); + if (!err) { + c->intr[0] = cciss_msix_entries[0].vector; + c->intr[1] = cciss_msix_entries[1].vector; + c->intr[2] = cciss_msix_entries[2].vector; + c->intr[3] = cciss_msix_entries[3].vector; + c->msix_vector = 1; + return; + } + if (err > 0) { + printk(KERN_WARNING "cciss: only %d MSI-X vectors " + "available\n", err); + } else { + printk(KERN_WARNING "cciss: MSI-X init failed %d\n", + err); + } + } + if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) { + if (!pci_enable_msi(pdev)) { + c->intr[SIMPLE_MODE_INT] = pdev->irq; + c->msi_vector = 1; + return; + } else { + printk(KERN_WARNING "cciss: MSI init failed\n"); + c->intr[SIMPLE_MODE_INT] = pdev->irq; + return; + } + } +#endif /* CONFIG_PCI_MSI */ + /* if we get here we're going to use the default interrupt mode */ +default_int_mode: + c->intr[SIMPLE_MODE_INT] = pdev->irq; + return; +} + static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) { ushort subsystem_vendor_id, subsystem_device_id, command; @@ -2721,7 +2776,10 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) printk("board_id = %x\n", board_id); #endif /* CCISS_DEBUG */ - c->intr = pdev->irq; +/* If the kernel supports MSI/MSI-X we will try to enable that functionality, + * else we use the IO-APIC interrupt assigned to us by system ROM. + */ + cciss_interrupt_mode(c, pdev, board_id); /* * Memory base addr is first addr , the second points to the config @@ -2775,7 +2833,7 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) c->board_id = board_id; #ifdef CCISS_DEBUG - print_cfg_table(c->cfgtable); + print_cfg_table(c->cfgtable); #endif /* CCISS_DEBUG */ for(i=0; i<NR_PRODUCTS; i++) { @@ -3060,7 +3118,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, * 8 controller support. */ if (i < MAX_CTLR_ORIG) - hba[i]->major = MAJOR_NR + i; + hba[i]->major = COMPAQ_CISS_MAJOR + i; rc = register_blkdev(hba[i]->major, hba[i]->devname); if(rc == -EBUSY || rc == -EINVAL) { printk(KERN_ERR @@ -3075,11 +3133,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* make sure the board interrupts are off */ hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF); - if( request_irq(hba[i]->intr, do_cciss_intr, + if( request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr, SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, hba[i]->devname, hba[i])) { printk(KERN_ERR "cciss: Unable to get irq %d for %s\n", - hba[i]->intr, hba[i]->devname); + hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname); goto clean2; } hba[i]->cmd_pool_bits = kmalloc(((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long), GFP_KERNEL); @@ -3185,7 +3243,7 @@ clean4: NR_CMDS * sizeof( ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); - free_irq(hba[i]->intr, hba[i]); + free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]); clean2: unregister_blkdev(hba[i]->major, hba[i]->devname); clean1: @@ -3226,7 +3284,15 @@ static void __devexit cciss_remove_one (struct pci_dev *pdev) printk(KERN_WARNING "Error Flushing cache on controller %d\n", i); } - free_irq(hba[i]->intr, hba[i]); + free_irq(hba[i]->intr[2], hba[i]); + +#ifdef CONFIG_PCI_MSI + if (hba[i]->msix_vector) + pci_disable_msix(hba[i]->pdev); + else if (hba[i]->msi_vector) + pci_disable_msi(hba[i]->pdev); +#endif /* CONFIG_PCI_MSI */ + pci_set_drvdata(pdev, NULL); iounmap(hba[i]->vaddr); cciss_unregister_scsi(i); /* unhook from SCSI subsystem */ diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 3b0858c83897..b24fc0553ccf 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -13,8 +13,6 @@ #define IO_OK 0 #define IO_ERROR 1 -#define MAJOR_NR COMPAQ_CISS_MAJOR - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -65,7 +63,6 @@ struct ctlr_info unsigned long io_mem_addr; unsigned long io_mem_length; CfgTable_struct __iomem *cfgtable; - unsigned int intr; int interrupts_enabled; int major; int max_commands; @@ -74,6 +71,13 @@ struct ctlr_info int num_luns; int highest_lun; int usage_count; /* number of opens all all minor devices */ +# define DOORBELL_INT 0 +# define PERF_MODE_INT 1 +# define SIMPLE_MODE_INT 2 +# define MEMQ_MODE_INT 3 + unsigned int intr[4]; + unsigned int msix_vector; + unsigned int msi_vector; // information about each logical volume drive_info_struct drv[CISS_MAX_LUN]; diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 2942d32280a5..9e35de05d5c5 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -714,7 +714,7 @@ cciss_scsi_detect(int ctlr) ((struct cciss_scsi_adapter_data_t *) hba[ctlr]->scsi_ctlr)->scsi_host = (void *) sh; sh->hostdata[0] = (unsigned long) hba[ctlr]; - sh->irq = hba[ctlr]->intr; + sh->irq = hba[ctlr]->intr[SIMPLE_MODE_INT]; sh->unique_id = sh->irq; error = scsi_add_host(sh, &hba[ctlr]->pdev->dev); if (error) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 9bddb6874873..862b9abac0ae 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -72,11 +72,11 @@ static ctlr_info_t *hba[MAX_CTLR]; static int eisa[8]; -#define NR_PRODUCTS (sizeof(products)/sizeof(struct board_type)) +#define NR_PRODUCTS ARRAY_SIZE(products) /* board_id = Subsystem Device ID & Vendor ID * product = Marketing Name for the board - * access = Address of the struct of function pointers + * access = Address of the struct of function pointers */ static struct board_type products[] = { { 0x0040110E, "IDA", &smart1_access }, @@ -160,6 +160,7 @@ static int sendcmd( static int ida_open(struct inode *inode, struct file *filep); static int ida_release(struct inode *inode, struct file *filep); static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg); +static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io); static void do_ida_request(request_queue_t *q); @@ -199,6 +200,7 @@ static struct block_device_operations ida_fops = { .open = ida_open, .release = ida_release, .ioctl = ida_ioctl, + .getgeo = ida_getgeo, .revalidate_disk= ida_revalidate, }; @@ -1124,6 +1126,23 @@ static void ida_timer(unsigned long tdata) h->misc_tflags = 0; } +static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + drv_info_t *drv = get_drv(bdev->bd_disk); + + if (drv->cylinders) { + geo->heads = drv->heads; + geo->sectors = drv->sectors; + geo->cylinders = drv->cylinders; + } else { + geo->heads = 0xff; + geo->sectors = 0x3f; + geo->cylinders = drv->nr_blks / (0xff*0x3f); + } + + return 0; +} + /* * ida_ioctl does some miscellaneous stuff like reporting drive geometry, * setting readahead and submitting commands from userspace to the controller. @@ -1133,27 +1152,10 @@ static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, drv_info_t *drv = get_drv(inode->i_bdev->bd_disk); ctlr_info_t *host = get_host(inode->i_bdev->bd_disk); int error; - int diskinfo[4]; - struct hd_geometry __user *geo = (struct hd_geometry __user *)arg; ida_ioctl_t __user *io = (ida_ioctl_t __user *)arg; ida_ioctl_t *my_io; switch(cmd) { - case HDIO_GETGEO: - if (drv->cylinders) { - diskinfo[0] = drv->heads; - diskinfo[1] = drv->sectors; - diskinfo[2] = drv->cylinders; - } else { - diskinfo[0] = 0xff; - diskinfo[1] = 0x3f; - diskinfo[2] = drv->nr_blks / (0xff*0x3f); - } - put_user(diskinfo[0], &geo->heads); - put_user(diskinfo[1], &geo->sectors); - put_user(diskinfo[2], &geo->cylinders); - put_user(get_start_sect(inode->i_bdev), &geo->start); - return 0; case IDAGETDRVINFO: if (copy_to_user(&io->c.drv, drv, sizeof(drv_info_t))) return -EFAULT; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a5b857c5c4b8..374621a512e0 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -479,7 +479,6 @@ static struct floppy_struct floppy_type[32] = { { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */ }; -#define NUMBER(x) (sizeof(x) / sizeof(*(x))) #define SECTSIZE (_FD_SECTSIZE(*floppy)) /* Auto-detection: Disk type used until the next media change occurs. */ @@ -3445,6 +3444,23 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g) return 0; } +static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + int drive = (long)bdev->bd_disk->private_data; + int type = ITYPE(drive_state[drive].fd_device); + struct floppy_struct *g; + int ret; + + ret = get_floppy_geometry(drive, type, &g); + if (ret) + return ret; + + geo->heads = g->head; + geo->sectors = g->sect; + geo->cylinders = g->track; + return 0; +} + static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long param) { @@ -3474,23 +3490,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, cmd = FDEJECT; } - /* generic block device ioctls */ - switch (cmd) { - /* the following have been inspired by the corresponding - * code for other block devices. */ - struct floppy_struct *g; - case HDIO_GETGEO: - { - struct hd_geometry loc; - ECALL(get_floppy_geometry(drive, type, &g)); - loc.heads = g->head; - loc.sectors = g->sect; - loc.cylinders = g->track; - loc.start = 0; - return _COPYOUT(loc); - } - } - /* convert the old style command into a new style command */ if ((cmd & 0xff00) == 0x0200) { ECALL(normalize_ioctl(&cmd, &size)); @@ -3645,7 +3644,7 @@ static void __init config_types(void) const char *name = NULL; static char temparea[32]; - if (type < NUMBER(default_drive_params)) { + if (type < ARRAY_SIZE(default_drive_params)) { params = &default_drive_params[type].params; if (type) { name = default_drive_params[type].name; @@ -3938,6 +3937,7 @@ static struct block_device_operations floppy_fops = { .open = floppy_open, .release = floppy_release, .ioctl = fd_ioctl, + .getgeo = fd_getgeo, .media_changed = check_floppy_change, .revalidate_disk = floppy_revalidate, }; @@ -3960,7 +3960,7 @@ static void __init register_devfs_entries(int drive) { int base_minor = (drive < 4) ? drive : (124 + drive); - if (UDP->cmos < NUMBER(default_drive_params)) { + if (UDP->cmos < ARRAY_SIZE(default_drive_params)) { int i = 0; do { int minor = base_minor + (table_sup[UDP->cmos][i] << 2); @@ -4218,7 +4218,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) !(allowed_drive_mask & (1 << drive)) || fdc_state[FDC(drive)].version == FDC_NONE) return NULL; - if (((*part >> 2) & 0x1f) >= NUMBER(floppy_type)) + if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type)) return NULL; *part = 0; return get_disk(disks[drive]); @@ -4570,7 +4570,7 @@ static void unregister_devfs_entries(int drive) { int i; - if (UDP->cmos < NUMBER(default_drive_params)) { + if (UDP->cmos < ARRAY_SIZE(default_drive_params)) { i = 0; do { devfs_remove("floppy/%d%s", drive, diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 33d6f237b2ed..6997d8e6bfb5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -174,7 +174,6 @@ static int sock_xmit(struct socket *sock, int send, void *buf, int size, msg.msg_namelen = 0; msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_namelen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (send) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index fa49d62626ba..62d2464c12f2 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -747,32 +747,33 @@ static int pd_open(struct inode *inode, struct file *file) return 0; } +static int pd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct pd_unit *disk = bdev->bd_disk->private_data; + + if (disk->alt_geom) { + geo->heads = PD_LOG_HEADS; + geo->sectors = PD_LOG_SECTS; + geo->cylinders = disk->capacity / (geo->heads * geo->sectors); + } else { + geo->heads = disk->heads; + geo->sectors = disk->sectors; + geo->cylinders = disk->cylinders; + } + + return 0; +} + static int pd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct pd_unit *disk = inode->i_bdev->bd_disk->private_data; - struct hd_geometry __user *geo = (struct hd_geometry __user *) arg; - struct hd_geometry g; switch (cmd) { case CDROMEJECT: if (disk->access == 1) pd_special_command(disk, pd_eject); return 0; - case HDIO_GETGEO: - if (disk->alt_geom) { - g.heads = PD_LOG_HEADS; - g.sectors = PD_LOG_SECTS; - g.cylinders = disk->capacity / (g.heads * g.sectors); - } else { - g.heads = disk->heads; - g.sectors = disk->sectors; - g.cylinders = disk->cylinders; - } - g.start = get_start_sect(inode->i_bdev); - if (copy_to_user(geo, &g, sizeof(struct hd_geometry))) - return -EFAULT; - return 0; default: return -EINVAL; } @@ -815,6 +816,7 @@ static struct block_device_operations pd_fops = { .open = pd_open, .release = pd_release, .ioctl = pd_ioctl, + .getgeo = pd_getgeo, .media_changed = pd_check_media, .revalidate_disk= pd_revalidate }; diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index e9746af29b9f..852b564e903a 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -205,6 +205,7 @@ static int pf_open(struct inode *inode, struct file *file); static void do_pf_request(request_queue_t * q); static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); +static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int pf_release(struct inode *inode, struct file *file); @@ -266,6 +267,7 @@ static struct block_device_operations pf_fops = { .open = pf_open, .release = pf_release, .ioctl = pf_ioctl, + .getgeo = pf_getgeo, .media_changed = pf_check_media, }; @@ -313,34 +315,34 @@ static int pf_open(struct inode *inode, struct file *file) return 0; } -static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct pf_unit *pf = inode->i_bdev->bd_disk->private_data; - struct hd_geometry __user *geo = (struct hd_geometry __user *) arg; - struct hd_geometry g; - sector_t capacity; - - if (cmd == CDROMEJECT) { - if (pf->access == 1) { - pf_eject(pf); - return 0; - } - return -EBUSY; - } - if (cmd != HDIO_GETGEO) - return -EINVAL; - capacity = get_capacity(pf->disk); + struct pf_unit *pf = bdev->bd_disk->private_data; + sector_t capacity = get_capacity(pf->disk); + if (capacity < PF_FD_MAX) { - g.cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT); - g.heads = PF_FD_HDS; - g.sectors = PF_FD_SPT; + geo->cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT); + geo->heads = PF_FD_HDS; + geo->sectors = PF_FD_SPT; } else { - g.cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT); - g.heads = PF_HD_HDS; - g.sectors = PF_HD_SPT; + geo->cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT); + geo->heads = PF_HD_HDS; + geo->sectors = PF_HD_SPT; } - if (copy_to_user(geo, &g, sizeof(g))) - return -EFAULT; + + return 0; +} + +static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pf_unit *pf = inode->i_bdev->bd_disk->private_data; + + if (cmd != CDROMEJECT) + return -EINVAL; + + if (pf->access != 1) + return -EBUSY; + pf_eject(pf); return 0; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index c0233efabeba..51b7a5c5b77a 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1955,9 +1955,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write) if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY))) goto out; + if ((ret = bd_claim(pd->bdev, pd))) + goto out_putdev; + if ((ret = pkt_get_last_written(pd, &lba))) { printk("pktcdvd: pkt_get_last_written failed\n"); - goto out_putdev; + goto out_unclaim; } set_capacity(pd->disk, lba << 2); @@ -1967,7 +1970,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write) q = bdev_get_queue(pd->bdev); if (write) { if ((ret = pkt_open_write(pd))) - goto out_putdev; + goto out_unclaim; /* * Some CDRW drives can not handle writes larger than one packet, * even if the size is a multiple of the packet size. @@ -1982,13 +1985,15 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write) } if ((ret = pkt_set_segment_merging(pd, q))) - goto out_putdev; + goto out_unclaim; if (write) printk("pktcdvd: %lukB available on disc\n", lba << 1); return 0; +out_unclaim: + bd_release(pd->bdev); out_putdev: blkdev_put(pd->bdev); out: @@ -2007,6 +2012,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + bd_release(pd->bdev); blkdev_put(pd->bdev); } diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c index 29d1518be72a..43415f69839f 100644 --- a/drivers/block/ps2esdi.c +++ b/drivers/block/ps2esdi.c @@ -81,8 +81,7 @@ static void (*current_int_handler) (u_int) = NULL; static void ps2esdi_normal_interrupt_handler(u_int); static void ps2esdi_initial_reset_int_handler(u_int); static void ps2esdi_geometry_int_handler(u_int); -static int ps2esdi_ioctl(struct inode *inode, struct file *file, - u_int cmd, u_long arg); +static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int ps2esdi_read_status_words(int num_words, int max_words, u_short * buffer); @@ -132,7 +131,7 @@ static struct ps2esdi_i_struct ps2esdi_info[MAX_HD] = static struct block_device_operations ps2esdi_fops = { .owner = THIS_MODULE, - .ioctl = ps2esdi_ioctl, + .getgeo = ps2esdi_getgeo, }; static struct gendisk *ps2esdi_gendisk[2]; @@ -1058,21 +1057,13 @@ static void dump_cmd_complete_status(u_int int_ret_code) } -static int ps2esdi_ioctl(struct inode *inode, - struct file *file, u_int cmd, u_long arg) +static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct ps2esdi_i_struct *p = inode->i_bdev->bd_disk->private_data; - struct ps2esdi_geometry geom; - - if (cmd != HDIO_GETGEO) - return -EINVAL; - memset(&geom, 0, sizeof(geom)); - geom.heads = p->head; - geom.sectors = p->sect; - geom.cylinders = p->cyl; - geom.start = get_start_sect(inode->i_bdev); - if (copy_to_user((void __user *)arg, &geom, sizeof(geom))) - return -EFAULT; + struct ps2esdi_i_struct *p = bdev->bd_disk->private_data; + + geo->heads = p->head; + geo->sectors = p->sect; + geo->cylinders = p->cyl; return 0; } diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 9251f4131b53..c0cdc182a8b0 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -407,8 +407,7 @@ struct carm_array_info { static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); static void carm_remove_one (struct pci_dev *pdev); -static int carm_bdev_ioctl(struct inode *ino, struct file *fil, - unsigned int cmd, unsigned long arg); +static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo); static struct pci_device_id carm_pci_tbl[] = { { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, }, @@ -426,7 +425,7 @@ static struct pci_driver carm_driver = { static struct block_device_operations carm_bd_ops = { .owner = THIS_MODULE, - .ioctl = carm_bdev_ioctl, + .getgeo = carm_bdev_getgeo, }; static unsigned int carm_host_id; @@ -434,32 +433,14 @@ static unsigned long carm_major_alloc; -static int carm_bdev_ioctl(struct inode *ino, struct file *fil, - unsigned int cmd, unsigned long arg) +static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - void __user *usermem = (void __user *) arg; - struct carm_port *port = ino->i_bdev->bd_disk->private_data; - struct hd_geometry geom; + struct carm_port *port = bdev->bd_disk->private_data; - switch (cmd) { - case HDIO_GETGEO: - if (!usermem) - return -EINVAL; - - geom.heads = (u8) port->dev_geom_head; - geom.sectors = (u8) port->dev_geom_sect; - geom.cylinders = port->dev_geom_cyl; - geom.start = get_start_sect(ino->i_bdev); - - if (copy_to_user(usermem, &geom, sizeof(geom))) - return -EFAULT; - return 0; - - default: - break; - } - - return -EOPNOTSUPP; + geo->heads = (u8) port->dev_geom_head; + geo->sectors = (u8) port->dev_geom_sect; + geo->cylinders = port->dev_geom_cyl; + return 0; } static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE }; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 0f48301342da..15299e7a1ade 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -809,34 +809,23 @@ static int mm_revalidate(struct gendisk *disk) set_capacity(disk, card->mm_size << 1); return 0; } -/* ------------------------------------------------------------------------------------ --- mm_ioctl ------------------------------------------------------------------------------------ -*/ -static int mm_ioctl(struct inode *i, struct file *f, unsigned int cmd, unsigned long arg) + +static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - if (cmd == HDIO_GETGEO) { - struct cardinfo *card = i->i_bdev->bd_disk->private_data; - int size = card->mm_size * (1024 / MM_HARDSECT); - struct hd_geometry geo; - /* - * get geometry: we have to fake one... trim the size to a - * multiple of 2048 (1M): tell we have 32 sectors, 64 heads, - * whatever cylinders. - */ - geo.heads = 64; - geo.sectors = 32; - geo.start = get_start_sect(i->i_bdev); - geo.cylinders = size / (geo.heads * geo.sectors); - - if (copy_to_user((void __user *) arg, &geo, sizeof(geo))) - return -EFAULT; - return 0; - } + struct cardinfo *card = bdev->bd_disk->private_data; + int size = card->mm_size * (1024 / MM_HARDSECT); - return -EINVAL; + /* + * get geometry: we have to fake one... trim the size to a + * multiple of 2048 (1M): tell we have 32 sectors, 64 heads, + * whatever cylinders. + */ + geo->heads = 64; + geo->sectors = 32; + geo->cylinders = size / (geo->heads * geo->sectors); + return 0; } + /* ----------------------------------------------------------------------------------- -- mm_check_change @@ -855,7 +844,7 @@ static int mm_check_change(struct gendisk *disk) */ static struct block_device_operations mm_fops = { .owner = THIS_MODULE, - .ioctl = mm_ioctl, + .getgeo = mm_getgeo, .revalidate_disk= mm_revalidate, .media_changed = mm_check_change, }; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 063f0304a163..d1aaf31bd97e 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -247,43 +247,17 @@ static int viodasd_release(struct inode *ino, struct file *fil) /* External ioctl entry point. */ -static int viodasd_ioctl(struct inode *ino, struct file *fil, - unsigned int cmd, unsigned long arg) +static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - unsigned char sectors; - unsigned char heads; - unsigned short cylinders; - struct hd_geometry *geo; - struct gendisk *gendisk; - struct viodasd_device *d; + struct gendisk *disk = bdev->bd_disk; + struct viodasd_device *d = disk->private_data; - switch (cmd) { - case HDIO_GETGEO: - geo = (struct hd_geometry *)arg; - if (geo == NULL) - return -EINVAL; - if (!access_ok(VERIFY_WRITE, geo, sizeof(*geo))) - return -EFAULT; - gendisk = ino->i_bdev->bd_disk; - d = gendisk->private_data; - sectors = d->sectors; - if (sectors == 0) - sectors = 32; - heads = d->tracks; - if (heads == 0) - heads = 64; - cylinders = d->cylinders; - if (cylinders == 0) - cylinders = get_capacity(gendisk) / (sectors * heads); - if (__put_user(sectors, &geo->sectors) || - __put_user(heads, &geo->heads) || - __put_user(cylinders, &geo->cylinders) || - __put_user(get_start_sect(ino->i_bdev), &geo->start)) - return -EFAULT; - return 0; - } + geo->sectors = d->sectors ? d->sectors : 0; + geo->heads = d->tracks ? d->tracks : 64; + geo->cylinders = d->cylinders ? d->cylinders : + get_capacity(disk) / (geo->cylinders * geo->heads); - return -EINVAL; + return 0; } /* @@ -293,7 +267,7 @@ static struct block_device_operations viodasd_fops = { .owner = THIS_MODULE, .open = viodasd_open, .release = viodasd_release, - .ioctl = viodasd_ioctl, + .getgeo = viodasd_getgeo, }; /* diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 68b6d7b154cf..cbce7c5e9445 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -128,9 +128,12 @@ static DEFINE_SPINLOCK(xd_lock); static struct gendisk *xd_gendisk[2]; +static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); + static struct block_device_operations xd_fops = { .owner = THIS_MODULE, .ioctl = xd_ioctl, + .getgeo = xd_getgeo, }; static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; @@ -276,11 +279,11 @@ static u_char __init xd_detect (u_char *controller, unsigned int *address) return(1); } - for (i = 0; i < (sizeof(xd_bases) / sizeof(xd_bases[0])); i++) { + for (i = 0; i < ARRAY_SIZE(xd_bases); i++) { void __iomem *p = ioremap(xd_bases[i], 0x2000); if (!p) continue; - for (j = 1; j < (sizeof(xd_sigs) / sizeof(xd_sigs[0])); j++) { + for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) { const char *s = xd_sigs[j].string; if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) { *controller = j; @@ -330,22 +333,20 @@ static void do_xd_request (request_queue_t * q) } } +static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + XD_INFO *p = bdev->bd_disk->private_data; + + geo->heads = p->heads; + geo->sectors = p->sectors; + geo->cylinders = p->cylinders; + return 0; +} + /* xd_ioctl: handle device ioctl's */ static int xd_ioctl (struct inode *inode,struct file *file,u_int cmd,u_long arg) { - XD_INFO *p = inode->i_bdev->bd_disk->private_data; - switch (cmd) { - case HDIO_GETGEO: - { - struct hd_geometry g; - struct hd_geometry __user *geom= (void __user *)arg; - g.heads = p->heads; - g.sectors = p->sectors; - g.cylinders = p->cylinders; - g.start = get_start_sect(inode->i_bdev); - return copy_to_user(geom, &g, sizeof(g)) ? -EFAULT : 0; - } case HDIO_SET_DMA: if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (xdc_busy) return -EBUSY; @@ -1017,7 +1018,7 @@ static void __init do_xd_setup (int *integers) case 2: if ((integers[2] > 0) && (integers[2] < 16)) xd_irq = integers[2]; case 1: xd_override = 1; - if ((integers[1] >= 0) && (integers[1] < (sizeof(xd_sigs) / sizeof(xd_sigs[0])))) + if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs))) xd_type = integers[1]; case 0: break; default:printk("xd: too many parameters for xd\n"); diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 5ebd06b1b4ca..dd7e6901c575 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -220,6 +220,14 @@ config SYNCLINKMP The module will be called synclinkmp. If you want to do that, say M here. +config SYNCLINK_GT + tristate "SyncLink GT/AC support" + depends on SERIAL_NONSTANDARD + help + Support for SyncLink GT and SyncLink AC families of + synchronous and asynchronous serial adapters + manufactured by Microgate Systems, Ltd. (www.microgate.com) + config N_HDLC tristate "HDLC line discipline support" depends on SERIAL_NONSTANDARD @@ -687,7 +695,7 @@ config NVRAM config RTC tristate "Enhanced Real Time Clock Support" - depends on !PPC32 && !PARISC && !IA64 && !M68K && (!SPARC || PCI) + depends on !PPC32 && !PARISC && !IA64 && !M68K && (!SPARC || PCI) && !FRV ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you @@ -735,7 +743,7 @@ config SGI_IP27_RTC config GEN_RTC tristate "Generic /dev/rtc emulation" - depends on RTC!=y && !IA64 && !ARM && !M32R && !SPARC + depends on RTC!=y && !IA64 && !ARM && !M32R && !SPARC && !FRV ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 4aeae687e88a..d973d14d8f7f 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_RISCOM8) += riscom8.o obj-$(CONFIG_ISI) += isicom.o obj-$(CONFIG_SYNCLINK) += synclink.o obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o +obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o obj-$(CONFIG_N_HDLC) += n_hdlc.o obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o obj-$(CONFIG_SX) += sx.o generic_serial.o diff --git a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c index 3f8f7fa6b0ff..268f78d926d3 100644 --- a/drivers/char/agp/sworks-agp.c +++ b/drivers/char/agp/sworks-agp.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/jiffies.h> #include <linux/agp_backend.h> #include "agp.h" diff --git a/drivers/char/hw_random.c b/drivers/char/hw_random.c index 49769f59ea1b..b3bc2e37e616 100644 --- a/drivers/char/hw_random.c +++ b/drivers/char/hw_random.c @@ -169,6 +169,7 @@ static struct pci_device_id rng_pci_tbl[] = { { 0x8086, 0x2418, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, { 0x8086, 0x2428, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, + { 0x8086, 0x2430, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, { 0x8086, 0x2448, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, { 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, { 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel }, diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 91dd669273e0..5b2d18035073 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -101,6 +101,11 @@ static inline int valid_phys_addr_range(unsigned long addr, size_t *count) return 1; } + +static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t *size) +{ + return 1; +} #endif /* @@ -228,26 +233,36 @@ static ssize_t write_mem(struct file * file, const char __user * buf, return written; } +#ifndef __HAVE_PHYS_MEM_ACCESS_PROT +static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ +#ifdef pgprot_noncached + unsigned long offset = pfn << PAGE_SHIFT; + + if (uncached_access(file, offset)) + return pgprot_noncached(vma_prot); +#endif + return vma_prot; +} +#endif + static int mmap_mem(struct file * file, struct vm_area_struct * vma) { -#if defined(__HAVE_PHYS_MEM_ACCESS_PROT) + size_t size = vma->vm_end - vma->vm_start; + + if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, &size)) + return -EINVAL; + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, - vma->vm_end - vma->vm_start, + size, vma->vm_page_prot); -#elif defined(pgprot_noncached) - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - int uncached; - - uncached = uncached_access(file, offset); - if (uncached) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -#endif /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end-vma->vm_start, + size, vma->vm_page_prot)) return -EAGAIN; return 0; @@ -817,7 +832,7 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { char *tmp; - int ret; + ssize_t ret; tmp = kmalloc(count + 1, GFP_KERNEL); if (tmp == NULL) @@ -826,6 +841,9 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf, if (!copy_from_user(tmp, buf, count)) { tmp[count] = 0; ret = printk("%s", tmp); + if (ret > count) + /* printk can add a prefix */ + ret = count; } kfree(tmp); return ret; diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index 51a07370e636..f8dd8527c6aa 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -471,7 +471,6 @@ struct sonypi_keypress { static struct sonypi_device { struct pci_dev *dev; - struct platform_device *pdev; u16 irq; u16 bits; u16 ioport1; @@ -511,6 +510,11 @@ static struct sonypi_device { #define SONYPI_ACPI_ACTIVE 0 #endif /* CONFIG_ACPI */ +#ifdef CONFIG_ACPI +static struct acpi_device *sonypi_acpi_device; +static int acpi_enabled; +#endif + static int sonypi_ec_write(u8 addr, u8 value) { #ifdef CONFIG_ACPI_EC @@ -864,6 +868,11 @@ found: if (useinput) sonypi_report_input_event(event); +#ifdef CONFIG_ACPI + if (acpi_enabled) + acpi_bus_generate_event(sonypi_acpi_device, 1, event); +#endif + kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event)); kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN); wake_up_interruptible(&sonypi_device.fifo_proc_list); @@ -1165,45 +1174,38 @@ static int sonypi_disable(void) return 0; } -#ifdef CONFIG_PM -static int old_camera_power; - -static int sonypi_suspend(struct platform_device *dev, pm_message_t state) +#ifdef CONFIG_ACPI +static int sonypi_acpi_add(struct acpi_device *device) { - old_camera_power = sonypi_device.camera_power; - sonypi_disable(); - + sonypi_acpi_device = device; + strcpy(acpi_device_name(device), "Sony laptop hotkeys"); + strcpy(acpi_device_class(device), "sony/hotkey"); return 0; } -static int sonypi_resume(struct platform_device *dev) +static int sonypi_acpi_remove(struct acpi_device *device, int type) { - sonypi_enable(old_camera_power); + sonypi_acpi_device = NULL; return 0; } -#endif - -static void sonypi_shutdown(struct platform_device *dev) -{ - sonypi_disable(); -} -static struct platform_driver sonypi_driver = { -#ifdef CONFIG_PM - .suspend = sonypi_suspend, - .resume = sonypi_resume, -#endif - .shutdown = sonypi_shutdown, - .driver = { - .name = "sonypi", +static struct acpi_driver sonypi_acpi_driver = { + .name = "sonypi", + .class = "hkey", + .ids = "SNY6001", + .ops = { + .add = sonypi_acpi_add, + .remove = sonypi_acpi_remove, }, }; +#endif static int __devinit sonypi_create_input_devices(void) { struct input_dev *jog_dev; struct input_dev *key_dev; int i; + int error; sonypi_device.input_jog_dev = jog_dev = input_allocate_device(); if (!jog_dev) @@ -1219,9 +1221,8 @@ static int __devinit sonypi_create_input_devices(void) sonypi_device.input_key_dev = key_dev = input_allocate_device(); if (!key_dev) { - input_free_device(jog_dev); - sonypi_device.input_jog_dev = NULL; - return -ENOMEM; + error = -ENOMEM; + goto err_free_jogdev; } key_dev->name = "Sony Vaio Keys"; @@ -1234,56 +1235,122 @@ static int __devinit sonypi_create_input_devices(void) if (sonypi_inputkeys[i].inputev) set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit); - input_register_device(jog_dev); - input_register_device(key_dev); + error = input_register_device(jog_dev); + if (error) + goto err_free_keydev; + + error = input_register_device(key_dev); + if (error) + goto err_unregister_jogdev; return 0; + + err_unregister_jogdev: + input_unregister_device(jog_dev); + /* Set to NULL so we don't free it again below */ + jog_dev = NULL; + err_free_keydev: + input_free_device(key_dev); + sonypi_device.input_key_dev = NULL; + err_free_jogdev: + input_free_device(jog_dev); + sonypi_device.input_jog_dev = NULL; + + return error; } -static int __devinit sonypi_probe(void) +static int __devinit sonypi_setup_ioports(struct sonypi_device *dev, + const struct sonypi_ioport_list *ioport_list) { - int i, ret; - struct sonypi_ioport_list *ioport_list; - struct sonypi_irq_list *irq_list; - struct pci_dev *pcidev; + while (ioport_list->port1) { - if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82371AB_3, NULL))) - sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE1; - else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_ICH6_1, NULL))) - sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3; - else - sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2; + if (request_region(ioport_list->port1, + sonypi_device.region_size, + "Sony Programable I/O Device")) { + dev->ioport1 = ioport_list->port1; + dev->ioport2 = ioport_list->port2; + return 0; + } + ioport_list++; + } - sonypi_device.dev = pcidev; + return -EBUSY; +} + +static int __devinit sonypi_setup_irq(struct sonypi_device *dev, + const struct sonypi_irq_list *irq_list) +{ + while (irq_list->irq) { + + if (!request_irq(irq_list->irq, sonypi_irq, + SA_SHIRQ, "sonypi", sonypi_irq)) { + dev->irq = irq_list->irq; + dev->bits = irq_list->bits; + return 0; + } + irq_list++; + } + + return -EBUSY; +} + +static void __devinit sonypi_display_info(void) +{ + printk(KERN_INFO "sonypi: detected type%d model, " + "verbose = %d, fnkeyinit = %s, camera = %s, " + "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n", + sonypi_device.model, + verbose, + fnkeyinit ? "on" : "off", + camera ? "on" : "off", + compat ? "on" : "off", + mask, + useinput ? "on" : "off", + SONYPI_ACPI_ACTIVE ? "on" : "off"); + printk(KERN_INFO "sonypi: enabled at irq=%d, port1=0x%x, port2=0x%x\n", + sonypi_device.irq, + sonypi_device.ioport1, sonypi_device.ioport2); + + if (minor == -1) + printk(KERN_INFO "sonypi: device allocated minor is %d\n", + sonypi_misc_device.minor); +} + +static int __devinit sonypi_probe(struct platform_device *dev) +{ + const struct sonypi_ioport_list *ioport_list; + const struct sonypi_irq_list *irq_list; + struct pci_dev *pcidev; + int error; spin_lock_init(&sonypi_device.fifo_lock); sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL, &sonypi_device.fifo_lock); if (IS_ERR(sonypi_device.fifo)) { printk(KERN_ERR "sonypi: kfifo_alloc failed\n"); - ret = PTR_ERR(sonypi_device.fifo); - goto out_fifo; + return PTR_ERR(sonypi_device.fifo); } init_waitqueue_head(&sonypi_device.fifo_proc_list); init_MUTEX(&sonypi_device.lock); sonypi_device.bluetooth_power = -1; + if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, NULL))) + sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE1; + else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_ICH6_1, NULL))) + sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3; + else + sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2; + if (pcidev && pci_enable_device(pcidev)) { printk(KERN_ERR "sonypi: pci_enable_device failed\n"); - ret = -EIO; - goto out_pcienable; - } - - if (minor != -1) - sonypi_misc_device.minor = minor; - if ((ret = misc_register(&sonypi_misc_device))) { - printk(KERN_ERR "sonypi: misc_register failed\n"); - goto out_miscreg; + error = -EIO; + goto err_put_pcidev; } + sonypi_device.dev = pcidev; if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE1) { ioport_list = sonypi_type1_ioport_list; @@ -1302,43 +1369,36 @@ static int __devinit sonypi_probe(void) irq_list = sonypi_type3_irq_list; } - for (i = 0; ioport_list[i].port1; i++) { - if (request_region(ioport_list[i].port1, - sonypi_device.region_size, - "Sony Programable I/O Device")) { - /* get the ioport */ - sonypi_device.ioport1 = ioport_list[i].port1; - sonypi_device.ioport2 = ioport_list[i].port2; - break; - } - } - if (!sonypi_device.ioport1) { - printk(KERN_ERR "sonypi: request_region failed\n"); - ret = -ENODEV; - goto out_reqreg; + error = sonypi_setup_ioports(&sonypi_device, ioport_list); + if (error) { + printk(KERN_ERR "sonypi: failed to request ioports\n"); + goto err_disable_pcidev; } - for (i = 0; irq_list[i].irq; i++) { - - sonypi_device.irq = irq_list[i].irq; - sonypi_device.bits = irq_list[i].bits; - - if (!request_irq(sonypi_device.irq, sonypi_irq, - SA_SHIRQ, "sonypi", sonypi_irq)) - break; + error = sonypi_setup_irq(&sonypi_device, irq_list); + if (error) { + printk(KERN_ERR "sonypi: request_irq failed\n"); + goto err_free_ioports; } - if (!irq_list[i].irq) { - printk(KERN_ERR "sonypi: request_irq failed\n"); - ret = -ENODEV; - goto out_reqirq; + if (minor != -1) + sonypi_misc_device.minor = minor; + error = misc_register(&sonypi_misc_device); + if (error) { + printk(KERN_ERR "sonypi: misc_register failed\n"); + goto err_free_irq; } + sonypi_display_info(); + if (useinput) { - ret = sonypi_create_input_devices(); - if (ret) - goto out_inputdevices; + error = sonypi_create_input_devices(); + if (error) { + printk(KERN_ERR + "sonypi: failed to create input devices\n"); + goto err_miscdev_unregister; + } spin_lock_init(&sonypi_device.input_fifo_lock); sonypi_device.input_fifo = @@ -1346,91 +1406,104 @@ static int __devinit sonypi_probe(void) &sonypi_device.input_fifo_lock); if (IS_ERR(sonypi_device.input_fifo)) { printk(KERN_ERR "sonypi: kfifo_alloc failed\n"); - ret = PTR_ERR(sonypi_device.input_fifo); - goto out_infifo; + error = PTR_ERR(sonypi_device.input_fifo); + goto err_inpdev_unregister; } INIT_WORK(&sonypi_device.input_work, input_keyrelease, NULL); } - sonypi_device.pdev = platform_device_register_simple("sonypi", -1, - NULL, 0); - if (IS_ERR(sonypi_device.pdev)) { - ret = PTR_ERR(sonypi_device.pdev); - goto out_platformdev; - } - sonypi_enable(0); - printk(KERN_INFO "sonypi: Sony Programmable I/O Controller Driver" - "v%s.\n", SONYPI_DRIVER_VERSION); - printk(KERN_INFO "sonypi: detected type%d model, " - "verbose = %d, fnkeyinit = %s, camera = %s, " - "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n", - sonypi_device.model, - verbose, - fnkeyinit ? "on" : "off", - camera ? "on" : "off", - compat ? "on" : "off", - mask, - useinput ? "on" : "off", - SONYPI_ACPI_ACTIVE ? "on" : "off"); - printk(KERN_INFO "sonypi: enabled at irq=%d, port1=0x%x, port2=0x%x\n", - sonypi_device.irq, - sonypi_device.ioport1, sonypi_device.ioport2); - - if (minor == -1) - printk(KERN_INFO "sonypi: device allocated minor is %d\n", - sonypi_misc_device.minor); - return 0; -out_platformdev: - kfifo_free(sonypi_device.input_fifo); -out_infifo: + err_inpdev_unregister: input_unregister_device(sonypi_device.input_key_dev); input_unregister_device(sonypi_device.input_jog_dev); -out_inputdevices: + err_miscdev_unregister: + misc_deregister(&sonypi_misc_device); + err_free_irq: free_irq(sonypi_device.irq, sonypi_irq); -out_reqirq: + err_free_ioports: release_region(sonypi_device.ioport1, sonypi_device.region_size); -out_reqreg: - misc_deregister(&sonypi_misc_device); -out_miscreg: + err_disable_pcidev: if (pcidev) pci_disable_device(pcidev); -out_pcienable: + err_put_pcidev: + pci_dev_put(pcidev); kfifo_free(sonypi_device.fifo); -out_fifo: - pci_dev_put(sonypi_device.dev); - return ret; + + return error; } -static void __devexit sonypi_remove(void) +static int __devexit sonypi_remove(struct platform_device *dev) { sonypi_disable(); synchronize_sched(); /* Allow sonypi interrupt to complete. */ flush_scheduled_work(); - platform_device_unregister(sonypi_device.pdev); - if (useinput) { input_unregister_device(sonypi_device.input_key_dev); input_unregister_device(sonypi_device.input_jog_dev); kfifo_free(sonypi_device.input_fifo); } + misc_deregister(&sonypi_misc_device); + free_irq(sonypi_device.irq, sonypi_irq); release_region(sonypi_device.ioport1, sonypi_device.region_size); - misc_deregister(&sonypi_misc_device); - if (sonypi_device.dev) + + if (sonypi_device.dev) { pci_disable_device(sonypi_device.dev); + pci_dev_put(sonypi_device.dev); + } + kfifo_free(sonypi_device.fifo); - pci_dev_put(sonypi_device.dev); - printk(KERN_INFO "sonypi: removed.\n"); + + return 0; } +#ifdef CONFIG_PM +static int old_camera_power; + +static int sonypi_suspend(struct platform_device *dev, pm_message_t state) +{ + old_camera_power = sonypi_device.camera_power; + sonypi_disable(); + + return 0; +} + +static int sonypi_resume(struct platform_device *dev) +{ + sonypi_enable(old_camera_power); + return 0; +} +#else +#define sonypi_suspend NULL +#define sonypi_resume NULL +#endif + +static void sonypi_shutdown(struct platform_device *dev) +{ + sonypi_disable(); +} + +static struct platform_driver sonypi_driver = { + .driver = { + .name = "sonypi", + .owner = THIS_MODULE, + }, + .probe = sonypi_probe, + .remove = __devexit_p(sonypi_remove), + .shutdown = sonypi_shutdown, + .suspend = sonypi_suspend, + .resume = sonypi_resume, +}; + +static struct platform_device *sonypi_platform_device; + static struct dmi_system_id __initdata sonypi_dmi_table[] = { { .ident = "Sony Vaio", @@ -1451,26 +1524,52 @@ static struct dmi_system_id __initdata sonypi_dmi_table[] = { static int __init sonypi_init(void) { - int ret; + int error; + + printk(KERN_INFO + "sonypi: Sony Programmable I/O Controller Driver v%s.\n", + SONYPI_DRIVER_VERSION); if (!dmi_check_system(sonypi_dmi_table)) return -ENODEV; - ret = platform_driver_register(&sonypi_driver); - if (ret) - return ret; + error = platform_driver_register(&sonypi_driver); + if (error) + return error; - ret = sonypi_probe(); - if (ret) - platform_driver_unregister(&sonypi_driver); + sonypi_platform_device = platform_device_alloc("sonypi", -1); + if (!sonypi_platform_device) { + error = -ENOMEM; + goto err_driver_unregister; + } - return ret; + error = platform_device_add(sonypi_platform_device); + if (error) + goto err_free_device; + +#ifdef CONFIG_ACPI + if (acpi_bus_register_driver(&sonypi_acpi_driver) > 0) + acpi_enabled = 1; +#endif + + return 0; + + err_free_device: + platform_device_put(sonypi_platform_device); + err_driver_unregister: + platform_driver_unregister(&sonypi_driver); + return error; } static void __exit sonypi_exit(void) { +#ifdef CONFIG_ACPI + if (acpi_enabled) + acpi_bus_unregister_driver(&sonypi_acpi_driver); +#endif + platform_device_unregister(sonypi_platform_device); platform_driver_unregister(&sonypi_driver); - sonypi_remove(); + printk(KERN_INFO "sonypi: removed.\n"); } module_init(sonypi_init); diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c new file mode 100644 index 000000000000..2b9cde94e2f7 --- /dev/null +++ b/drivers/char/synclink_gt.c @@ -0,0 +1,4501 @@ +/* + * $Id: synclink_gt.c,v 4.20 2005/11/08 19:51:55 paulkf Exp $ + * + * Device driver for Microgate SyncLink GT serial adapters. + * + * written by Paul Fulghum for Microgate Corporation + * paulkf@microgate.com + * + * Microgate and SyncLink are trademarks of Microgate Corporation + * + * This code is released under the GNU General Public License (GPL) + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * DEBUG OUTPUT DEFINITIONS + * + * uncomment lines below to enable specific types of debug output + * + * DBGINFO information - most verbose output + * DBGERR serious errors + * DBGBH bottom half service routine debugging + * DBGISR interrupt service routine debugging + * DBGDATA output receive and transmit data + * DBGTBUF output transmit DMA buffers and registers + * DBGRBUF output receive DMA buffers and registers + */ + +#define DBGINFO(fmt) if (debug_level >= DEBUG_LEVEL_INFO) printk fmt +#define DBGERR(fmt) if (debug_level >= DEBUG_LEVEL_ERROR) printk fmt +#define DBGBH(fmt) if (debug_level >= DEBUG_LEVEL_BH) printk fmt +#define DBGISR(fmt) if (debug_level >= DEBUG_LEVEL_ISR) printk fmt +#define DBGDATA(info, buf, size, label) if (debug_level >= DEBUG_LEVEL_DATA) trace_block((info), (buf), (size), (label)) +//#define DBGTBUF(info) dump_tbufs(info) +//#define DBGRBUF(info) dump_rbufs(info) + + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/version.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/serial.h> +#include <linux/major.h> +#include <linux/string.h> +#include <linux/fcntl.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/ioctl.h> +#include <linux/termios.h> +#include <linux/bitops.h> +#include <linux/workqueue.h> +#include <linux/hdlc.h> + +#include <asm/serial.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/dma.h> +#include <asm/types.h> +#include <asm/uaccess.h> + +#include "linux/synclink.h" + +#ifdef CONFIG_HDLC_MODULE +#define CONFIG_HDLC 1 +#endif + +/* + * module identification + */ +static char *driver_name = "SyncLink GT"; +static char *driver_version = "$Revision: 4.20 $"; +static char *tty_driver_name = "synclink_gt"; +static char *tty_dev_prefix = "ttySLG"; +MODULE_LICENSE("GPL"); +#define MGSL_MAGIC 0x5401 +#define MAX_DEVICES 12 + +static struct pci_device_id pci_table[] = { + {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT4_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PCI_VENDOR_ID_MICROGATE, SYNCLINK_AC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {0,}, /* terminate list */ +}; +MODULE_DEVICE_TABLE(pci, pci_table); + +static int init_one(struct pci_dev *dev,const struct pci_device_id *ent); +static void remove_one(struct pci_dev *dev); +static struct pci_driver pci_driver = { + .name = "synclink_gt", + .id_table = pci_table, + .probe = init_one, + .remove = __devexit_p(remove_one), +}; + +static int pci_registered; + +/* + * module configuration and status + */ +static struct slgt_info *slgt_device_list; +static int slgt_device_count; + +static int ttymajor; +static int debug_level; +static int maxframe[MAX_DEVICES]; +static int dosyncppp[MAX_DEVICES]; + +module_param(ttymajor, int, 0); +module_param(debug_level, int, 0); +module_param_array(maxframe, int, NULL, 0); +module_param_array(dosyncppp, int, NULL, 0); + +MODULE_PARM_DESC(ttymajor, "TTY major device number override: 0=auto assigned"); +MODULE_PARM_DESC(debug_level, "Debug syslog output: 0=disabled, 1 to 5=increasing detail"); +MODULE_PARM_DESC(maxframe, "Maximum frame size used by device (4096 to 65535)"); +MODULE_PARM_DESC(dosyncppp, "Enable synchronous net device, 0=disable 1=enable"); + +/* + * tty support and callbacks + */ +#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) + +static struct tty_driver *serial_driver; + +static int open(struct tty_struct *tty, struct file * filp); +static void close(struct tty_struct *tty, struct file * filp); +static void hangup(struct tty_struct *tty); +static void set_termios(struct tty_struct *tty, struct termios *old_termios); + +static int write(struct tty_struct *tty, const unsigned char *buf, int count); +static void put_char(struct tty_struct *tty, unsigned char ch); +static void send_xchar(struct tty_struct *tty, char ch); +static void wait_until_sent(struct tty_struct *tty, int timeout); +static int write_room(struct tty_struct *tty); +static void flush_chars(struct tty_struct *tty); +static void flush_buffer(struct tty_struct *tty); +static void tx_hold(struct tty_struct *tty); +static void tx_release(struct tty_struct *tty); + +static int ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); +static int read_proc(char *page, char **start, off_t off, int count,int *eof, void *data); +static int chars_in_buffer(struct tty_struct *tty); +static void throttle(struct tty_struct * tty); +static void unthrottle(struct tty_struct * tty); +static void set_break(struct tty_struct *tty, int break_state); + +/* + * generic HDLC support and callbacks + */ +#ifdef CONFIG_HDLC +#define dev_to_port(D) (dev_to_hdlc(D)->priv) +static void hdlcdev_tx_done(struct slgt_info *info); +static void hdlcdev_rx(struct slgt_info *info, char *buf, int size); +static int hdlcdev_init(struct slgt_info *info); +static void hdlcdev_exit(struct slgt_info *info); +#endif + + +/* + * device specific structures, macros and functions + */ + +#define SLGT_MAX_PORTS 4 +#define SLGT_REG_SIZE 256 + +/* + * DMA buffer descriptor and access macros + */ +struct slgt_desc +{ + unsigned short count; + unsigned short status; + unsigned int pbuf; /* physical address of data buffer */ + unsigned int next; /* physical address of next descriptor */ + + /* driver book keeping */ + char *buf; /* virtual address of data buffer */ + unsigned int pdesc; /* physical address of this descriptor */ + dma_addr_t buf_dma_addr; +}; + +#define set_desc_buffer(a,b) (a).pbuf = cpu_to_le32((unsigned int)(b)) +#define set_desc_next(a,b) (a).next = cpu_to_le32((unsigned int)(b)) +#define set_desc_count(a,b)(a).count = cpu_to_le16((unsigned short)(b)) +#define set_desc_eof(a,b) (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0)) +#define desc_count(a) (le16_to_cpu((a).count)) +#define desc_status(a) (le16_to_cpu((a).status)) +#define desc_complete(a) (le16_to_cpu((a).status) & BIT15) +#define desc_eof(a) (le16_to_cpu((a).status) & BIT2) +#define desc_crc_error(a) (le16_to_cpu((a).status) & BIT1) +#define desc_abort(a) (le16_to_cpu((a).status) & BIT0) +#define desc_residue(a) ((le16_to_cpu((a).status) & 0x38) >> 3) + +struct _input_signal_events { + int ri_up; + int ri_down; + int dsr_up; + int dsr_down; + int dcd_up; + int dcd_down; + int cts_up; + int cts_down; +}; + +/* + * device instance data structure + */ +struct slgt_info { + void *if_ptr; /* General purpose pointer (used by SPPP) */ + + struct slgt_info *next_device; /* device list link */ + + int magic; + int flags; + + char device_name[25]; + struct pci_dev *pdev; + + int port_count; /* count of ports on adapter */ + int adapter_num; /* adapter instance number */ + int port_num; /* port instance number */ + + /* array of pointers to port contexts on this adapter */ + struct slgt_info *port_array[SLGT_MAX_PORTS]; + + int count; /* count of opens */ + int line; /* tty line instance number */ + unsigned short close_delay; + unsigned short closing_wait; /* time to wait before closing */ + + struct mgsl_icount icount; + + struct tty_struct *tty; + int timeout; + int x_char; /* xon/xoff character */ + int blocked_open; /* # of blocked opens */ + unsigned int read_status_mask; + unsigned int ignore_status_mask; + + wait_queue_head_t open_wait; + wait_queue_head_t close_wait; + + wait_queue_head_t status_event_wait_q; + wait_queue_head_t event_wait_q; + struct timer_list tx_timer; + struct timer_list rx_timer; + + spinlock_t lock; /* spinlock for synchronizing with ISR */ + + struct work_struct task; + u32 pending_bh; + int bh_requested; + int bh_running; + + int isr_overflow; + int irq_requested; /* nonzero if IRQ requested */ + int irq_occurred; /* for diagnostics use */ + + /* device configuration */ + + unsigned int bus_type; + unsigned int irq_level; + unsigned long irq_flags; + + unsigned char __iomem * reg_addr; /* memory mapped registers address */ + u32 phys_reg_addr; + u32 reg_offset; + int reg_addr_requested; + + MGSL_PARAMS params; /* communications parameters */ + u32 idle_mode; + u32 max_frame_size; /* as set by device config */ + + unsigned int raw_rx_size; + unsigned int if_mode; + + /* device status */ + + int rx_enabled; + int rx_restart; + + int tx_enabled; + int tx_active; + + unsigned char signals; /* serial signal states */ + unsigned int init_error; /* initialization error */ + + unsigned char *tx_buf; + int tx_count; + + char flag_buf[MAX_ASYNC_BUFFER_SIZE]; + char char_buf[MAX_ASYNC_BUFFER_SIZE]; + BOOLEAN drop_rts_on_tx_done; + struct _input_signal_events input_signal_events; + + int dcd_chkcount; /* check counts to prevent */ + int cts_chkcount; /* too many IRQs if a signal */ + int dsr_chkcount; /* is floating */ + int ri_chkcount; + + char *bufs; /* virtual address of DMA buffer lists */ + dma_addr_t bufs_dma_addr; /* physical address of buffer descriptors */ + + unsigned int rbuf_count; + struct slgt_desc *rbufs; + unsigned int rbuf_current; + unsigned int rbuf_index; + + unsigned int tbuf_count; + struct slgt_desc *tbufs; + unsigned int tbuf_current; + unsigned int tbuf_start; + + unsigned char *tmp_rbuf; + unsigned int tmp_rbuf_count; + + /* SPPP/Cisco HDLC device parts */ + + int netcount; + int dosyncppp; + spinlock_t netlock; +#ifdef CONFIG_HDLC + struct net_device *netdev; +#endif + +}; + +static MGSL_PARAMS default_params = { + .mode = MGSL_MODE_HDLC, + .loopback = 0, + .flags = HDLC_FLAG_UNDERRUN_ABORT15, + .encoding = HDLC_ENCODING_NRZI_SPACE, + .clock_speed = 0, + .addr_filter = 0xff, + .crc_type = HDLC_CRC_16_CCITT, + .preamble_length = HDLC_PREAMBLE_LENGTH_8BITS, + .preamble = HDLC_PREAMBLE_PATTERN_NONE, + .data_rate = 9600, + .data_bits = 8, + .stop_bits = 1, + .parity = ASYNC_PARITY_NONE +}; + + +#define BH_RECEIVE 1 +#define BH_TRANSMIT 2 +#define BH_STATUS 4 +#define IO_PIN_SHUTDOWN_LIMIT 100 + +#define DMABUFSIZE 256 +#define DESC_LIST_SIZE 4096 + +#define MASK_PARITY BIT1 +#define MASK_FRAMING BIT2 +#define MASK_BREAK BIT3 +#define MASK_OVERRUN BIT4 + +#define GSR 0x00 /* global status */ +#define TDR 0x80 /* tx data */ +#define RDR 0x80 /* rx data */ +#define TCR 0x82 /* tx control */ +#define TIR 0x84 /* tx idle */ +#define TPR 0x85 /* tx preamble */ +#define RCR 0x86 /* rx control */ +#define VCR 0x88 /* V.24 control */ +#define CCR 0x89 /* clock control */ +#define BDR 0x8a /* baud divisor */ +#define SCR 0x8c /* serial control */ +#define SSR 0x8e /* serial status */ +#define RDCSR 0x90 /* rx DMA control/status */ +#define TDCSR 0x94 /* tx DMA control/status */ +#define RDDAR 0x98 /* rx DMA descriptor address */ +#define TDDAR 0x9c /* tx DMA descriptor address */ + +#define RXIDLE BIT14 +#define RXBREAK BIT14 +#define IRQ_TXDATA BIT13 +#define IRQ_TXIDLE BIT12 +#define IRQ_TXUNDER BIT11 /* HDLC */ +#define IRQ_RXDATA BIT10 +#define IRQ_RXIDLE BIT9 /* HDLC */ +#define IRQ_RXBREAK BIT9 /* async */ +#define IRQ_RXOVER BIT8 +#define IRQ_DSR BIT7 +#define IRQ_CTS BIT6 +#define IRQ_DCD BIT5 +#define IRQ_RI BIT4 +#define IRQ_ALL 0x3ff0 +#define IRQ_MASTER BIT0 + +#define slgt_irq_on(info, mask) \ + wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) | (mask))) +#define slgt_irq_off(info, mask) \ + wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) & ~(mask))) + +static __u8 rd_reg8(struct slgt_info *info, unsigned int addr); +static void wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value); +static __u16 rd_reg16(struct slgt_info *info, unsigned int addr); +static void wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value); +static __u32 rd_reg32(struct slgt_info *info, unsigned int addr); +static void wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value); + +static void msc_set_vcr(struct slgt_info *info); + +static int startup(struct slgt_info *info); +static int block_til_ready(struct tty_struct *tty, struct file * filp,struct slgt_info *info); +static void shutdown(struct slgt_info *info); +static void program_hw(struct slgt_info *info); +static void change_params(struct slgt_info *info); + +static int register_test(struct slgt_info *info); +static int irq_test(struct slgt_info *info); +static int loopback_test(struct slgt_info *info); +static int adapter_test(struct slgt_info *info); + +static void reset_adapter(struct slgt_info *info); +static void reset_port(struct slgt_info *info); +static void async_mode(struct slgt_info *info); +static void hdlc_mode(struct slgt_info *info); + +static void rx_stop(struct slgt_info *info); +static void rx_start(struct slgt_info *info); +static void reset_rbufs(struct slgt_info *info); +static void free_rbufs(struct slgt_info *info, unsigned int first, unsigned int last); +static void rdma_reset(struct slgt_info *info); +static int rx_get_frame(struct slgt_info *info); +static int rx_get_buf(struct slgt_info *info); + +static void tx_start(struct slgt_info *info); +static void tx_stop(struct slgt_info *info); +static void tx_set_idle(struct slgt_info *info); +static unsigned int free_tbuf_count(struct slgt_info *info); +static void reset_tbufs(struct slgt_info *info); +static void tdma_reset(struct slgt_info *info); +static void tx_load(struct slgt_info *info, const char *buf, unsigned int count); + +static void get_signals(struct slgt_info *info); +static void set_signals(struct slgt_info *info); +static void enable_loopback(struct slgt_info *info); +static void set_rate(struct slgt_info *info, u32 data_rate); + +static int bh_action(struct slgt_info *info); +static void bh_handler(void* context); +static void bh_transmit(struct slgt_info *info); +static void isr_serial(struct slgt_info *info); +static void isr_rdma(struct slgt_info *info); +static void isr_txeom(struct slgt_info *info, unsigned short status); +static void isr_tdma(struct slgt_info *info); +static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs); + +static int alloc_dma_bufs(struct slgt_info *info); +static void free_dma_bufs(struct slgt_info *info); +static int alloc_desc(struct slgt_info *info); +static void free_desc(struct slgt_info *info); +static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count); +static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count); + +static int alloc_tmp_rbuf(struct slgt_info *info); +static void free_tmp_rbuf(struct slgt_info *info); + +static void tx_timeout(unsigned long context); +static void rx_timeout(unsigned long context); + +/* + * ioctl handlers + */ +static int get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount); +static int get_params(struct slgt_info *info, MGSL_PARAMS __user *params); +static int set_params(struct slgt_info *info, MGSL_PARAMS __user *params); +static int get_txidle(struct slgt_info *info, int __user *idle_mode); +static int set_txidle(struct slgt_info *info, int idle_mode); +static int tx_enable(struct slgt_info *info, int enable); +static int tx_abort(struct slgt_info *info); +static int rx_enable(struct slgt_info *info, int enable); +static int modem_input_wait(struct slgt_info *info,int arg); +static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr); +static int tiocmget(struct tty_struct *tty, struct file *file); +static int tiocmset(struct tty_struct *tty, struct file *file, + unsigned int set, unsigned int clear); +static void set_break(struct tty_struct *tty, int break_state); +static int get_interface(struct slgt_info *info, int __user *if_mode); +static int set_interface(struct slgt_info *info, int if_mode); + +/* + * driver functions + */ +static void add_device(struct slgt_info *info); +static void device_init(int adapter_num, struct pci_dev *pdev); +static int claim_resources(struct slgt_info *info); +static void release_resources(struct slgt_info *info); + +/* + * DEBUG OUTPUT CODE + */ +#ifndef DBGINFO +#define DBGINFO(fmt) +#endif +#ifndef DBGERR +#define DBGERR(fmt) +#endif +#ifndef DBGBH +#define DBGBH(fmt) +#endif +#ifndef DBGISR +#define DBGISR(fmt) +#endif + +#ifdef DBGDATA +static void trace_block(struct slgt_info *info, const char *data, int count, const char *label) +{ + int i; + int linecount; + printk("%s %s data:\n",info->device_name, label); + while(count) { + linecount = (count > 16) ? 16 : count; + for(i=0; i < linecount; i++) + printk("%02X ",(unsigned char)data[i]); + for(;i<17;i++) + printk(" "); + for(i=0;i<linecount;i++) { + if (data[i]>=040 && data[i]<=0176) + printk("%c",data[i]); + else + printk("."); + } + printk("\n"); + data += linecount; + count -= linecount; + } +} +#else +#define DBGDATA(info, buf, size, label) +#endif + +#ifdef DBGTBUF +static void dump_tbufs(struct slgt_info *info) +{ + int i; + printk("tbuf_current=%d\n", info->tbuf_current); + for (i=0 ; i < info->tbuf_count ; i++) { + printk("%d: count=%04X status=%04X\n", + i, le16_to_cpu(info->tbufs[i].count), le16_to_cpu(info->tbufs[i].status)); + } +} +#else +#define DBGTBUF(info) +#endif + +#ifdef DBGRBUF +static void dump_rbufs(struct slgt_info *info) +{ + int i; + printk("rbuf_current=%d\n", info->rbuf_current); + for (i=0 ; i < info->rbuf_count ; i++) { + printk("%d: count=%04X status=%04X\n", + i, le16_to_cpu(info->rbufs[i].count), le16_to_cpu(info->rbufs[i].status)); + } +} +#else +#define DBGRBUF(info) +#endif + +static inline int sanity_check(struct slgt_info *info, char *devname, const char *name) +{ +#ifdef SANITY_CHECK + if (!info) { + printk("null struct slgt_info for (%s) in %s\n", devname, name); + return 1; + } + if (info->magic != MGSL_MAGIC) { + printk("bad magic number struct slgt_info (%s) in %s\n", devname, name); + return 1; + } +#else + if (!info) + return 1; +#endif + return 0; +} + +/** + * line discipline callback wrappers + * + * The wrappers maintain line discipline references + * while calling into the line discipline. + * + * ldisc_receive_buf - pass receive data to line discipline + */ +static void ldisc_receive_buf(struct tty_struct *tty, + const __u8 *data, char *flags, int count) +{ + struct tty_ldisc *ld; + if (!tty) + return; + ld = tty_ldisc_ref(tty); + if (ld) { + if (ld->receive_buf) + ld->receive_buf(tty, data, flags, count); + tty_ldisc_deref(ld); + } +} + +/* tty callbacks */ + +static int open(struct tty_struct *tty, struct file *filp) +{ + struct slgt_info *info; + int retval, line; + unsigned long flags; + + line = tty->index; + if ((line < 0) || (line >= slgt_device_count)) { + DBGERR(("%s: open with invalid line #%d.\n", driver_name, line)); + return -ENODEV; + } + + info = slgt_device_list; + while(info && info->line != line) + info = info->next_device; + if (sanity_check(info, tty->name, "open")) + return -ENODEV; + if (info->init_error) { + DBGERR(("%s init error=%d\n", info->device_name, info->init_error)); + return -ENODEV; + } + + tty->driver_data = info; + info->tty = tty; + + DBGINFO(("%s open, old ref count = %d\n", info->device_name, info->count)); + + /* If port is closing, signal caller to try again */ + if (tty_hung_up_p(filp) || info->flags & ASYNC_CLOSING){ + if (info->flags & ASYNC_CLOSING) + interruptible_sleep_on(&info->close_wait); + retval = ((info->flags & ASYNC_HUP_NOTIFY) ? + -EAGAIN : -ERESTARTSYS); + goto cleanup; + } + + info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + + spin_lock_irqsave(&info->netlock, flags); + if (info->netcount) { + retval = -EBUSY; + spin_unlock_irqrestore(&info->netlock, flags); + goto cleanup; + } + info->count++; + spin_unlock_irqrestore(&info->netlock, flags); + + if (info->count == 1) { + /* 1st open on this device, init hardware */ + retval = startup(info); + if (retval < 0) + goto cleanup; + } + + retval = block_til_ready(tty, filp, info); + if (retval) { + DBGINFO(("%s block_til_ready rc=%d\n", info->device_name, retval)); + goto cleanup; + } + + retval = 0; + +cleanup: + if (retval) { + if (tty->count == 1) + info->tty = NULL; /* tty layer will release tty struct */ + if(info->count) + info->count--; + } + + DBGINFO(("%s open rc=%d\n", info->device_name, retval)); + return retval; +} + +static void close(struct tty_struct *tty, struct file *filp) +{ + struct slgt_info *info = tty->driver_data; + + if (sanity_check(info, tty->name, "close")) + return; + DBGINFO(("%s close entry, count=%d\n", info->device_name, info->count)); + + if (!info->count) + return; + + if (tty_hung_up_p(filp)) + goto cleanup; + + if ((tty->count == 1) && (info->count != 1)) { + /* + * tty->count is 1 and the tty structure will be freed. + * info->count should be one in this case. + * if it's not, correct it so that the port is shutdown. + */ + DBGERR(("%s close: bad refcount; tty->count=1, " + "info->count=%d\n", info->device_name, info->count)); + info->count = 1; + } + + info->count--; + + /* if at least one open remaining, leave hardware active */ + if (info->count) + goto cleanup; + + info->flags |= ASYNC_CLOSING; + + /* set tty->closing to notify line discipline to + * only process XON/XOFF characters. Only the N_TTY + * discipline appears to use this (ppp does not). + */ + tty->closing = 1; + + /* wait for transmit data to clear all layers */ + + if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE) { + DBGINFO(("%s call tty_wait_until_sent\n", info->device_name)); + tty_wait_until_sent(tty, info->closing_wait); + } + + if (info->flags & ASYNC_INITIALIZED) + wait_until_sent(tty, info->timeout); + if (tty->driver->flush_buffer) + tty->driver->flush_buffer(tty); + tty_ldisc_flush(tty); + + shutdown(info); + + tty->closing = 0; + info->tty = NULL; + + if (info->blocked_open) { + if (info->close_delay) { + msleep_interruptible(jiffies_to_msecs(info->close_delay)); + } + wake_up_interruptible(&info->open_wait); + } + + info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); + + wake_up_interruptible(&info->close_wait); + +cleanup: + DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->count)); +} + +static void hangup(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + + if (sanity_check(info, tty->name, "hangup")) + return; + DBGINFO(("%s hangup\n", info->device_name)); + + flush_buffer(tty); + shutdown(info); + + info->count = 0; + info->flags &= ~ASYNC_NORMAL_ACTIVE; + info->tty = NULL; + + wake_up_interruptible(&info->open_wait); +} + +static void set_termios(struct tty_struct *tty, struct termios *old_termios) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + DBGINFO(("%s set_termios\n", tty->driver->name)); + + /* just return if nothing has changed */ + if ((tty->termios->c_cflag == old_termios->c_cflag) + && (RELEVANT_IFLAG(tty->termios->c_iflag) + == RELEVANT_IFLAG(old_termios->c_iflag))) + return; + + change_params(info); + + /* Handle transition to B0 status */ + if (old_termios->c_cflag & CBAUD && + !(tty->termios->c_cflag & CBAUD)) { + info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR); + spin_lock_irqsave(&info->lock,flags); + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } + + /* Handle transition away from B0 status */ + if (!(old_termios->c_cflag & CBAUD) && + tty->termios->c_cflag & CBAUD) { + info->signals |= SerialSignal_DTR; + if (!(tty->termios->c_cflag & CRTSCTS) || + !test_bit(TTY_THROTTLED, &tty->flags)) { + info->signals |= SerialSignal_RTS; + } + spin_lock_irqsave(&info->lock,flags); + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } + + /* Handle turning off CRTSCTS */ + if (old_termios->c_cflag & CRTSCTS && + !(tty->termios->c_cflag & CRTSCTS)) { + tty->hw_stopped = 0; + tx_release(tty); + } +} + +static int write(struct tty_struct *tty, + const unsigned char *buf, int count) +{ + int ret = 0; + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "write")) + goto cleanup; + DBGINFO(("%s write count=%d\n", info->device_name, count)); + + if (!tty || !info->tx_buf) + goto cleanup; + + if (count > info->max_frame_size) { + ret = -EIO; + goto cleanup; + } + + if (!count) + goto cleanup; + + if (info->params.mode == MGSL_MODE_RAW) { + unsigned int bufs_needed = (count/DMABUFSIZE); + unsigned int bufs_free = free_tbuf_count(info); + if (count % DMABUFSIZE) + ++bufs_needed; + if (bufs_needed > bufs_free) + goto cleanup; + } else { + if (info->tx_active) + goto cleanup; + if (info->tx_count) { + /* send accumulated data from send_char() calls */ + /* as frame and wait before accepting more data. */ + tx_load(info, info->tx_buf, info->tx_count); + goto start; + } + } + + ret = info->tx_count = count; + tx_load(info, buf, count); + goto start; + +start: + if (info->tx_count && !tty->stopped && !tty->hw_stopped) { + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active) + tx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + } + +cleanup: + DBGINFO(("%s write rc=%d\n", info->device_name, ret)); + return ret; +} + +static void put_char(struct tty_struct *tty, unsigned char ch) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "put_char")) + return; + DBGINFO(("%s put_char(%d)\n", info->device_name, ch)); + if (!tty || !info->tx_buf) + return; + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active && (info->tx_count < info->max_frame_size)) + info->tx_buf[info->tx_count++] = ch; + spin_unlock_irqrestore(&info->lock,flags); +} + +static void send_xchar(struct tty_struct *tty, char ch) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "send_xchar")) + return; + DBGINFO(("%s send_xchar(%d)\n", info->device_name, ch)); + info->x_char = ch; + if (ch) { + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_enabled) + tx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + } +} + +static void wait_until_sent(struct tty_struct *tty, int timeout) +{ + struct slgt_info *info = tty->driver_data; + unsigned long orig_jiffies, char_time; + + if (!info ) + return; + if (sanity_check(info, tty->name, "wait_until_sent")) + return; + DBGINFO(("%s wait_until_sent entry\n", info->device_name)); + if (!(info->flags & ASYNC_INITIALIZED)) + goto exit; + + orig_jiffies = jiffies; + + /* Set check interval to 1/5 of estimated time to + * send a character, and make it at least 1. The check + * interval should also be less than the timeout. + * Note: use tight timings here to satisfy the NIST-PCTS. + */ + + if (info->params.data_rate) { + char_time = info->timeout/(32 * 5); + if (!char_time) + char_time++; + } else + char_time = 1; + + if (timeout) + char_time = min_t(unsigned long, char_time, timeout); + + while (info->tx_active) { + msleep_interruptible(jiffies_to_msecs(char_time)); + if (signal_pending(current)) + break; + if (timeout && time_after(jiffies, orig_jiffies + timeout)) + break; + } + +exit: + DBGINFO(("%s wait_until_sent exit\n", info->device_name)); +} + +static int write_room(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + int ret; + + if (sanity_check(info, tty->name, "write_room")) + return 0; + ret = (info->tx_active) ? 0 : HDLC_MAX_FRAME_SIZE; + DBGINFO(("%s write_room=%d\n", info->device_name, ret)); + return ret; +} + +static void flush_chars(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "flush_chars")) + return; + DBGINFO(("%s flush_chars entry tx_count=%d\n", info->device_name, info->tx_count)); + + if (info->tx_count <= 0 || tty->stopped || + tty->hw_stopped || !info->tx_buf) + return; + + DBGINFO(("%s flush_chars start transmit\n", info->device_name)); + + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active && info->tx_count) { + tx_load(info, info->tx_buf,info->tx_count); + tx_start(info); + } + spin_unlock_irqrestore(&info->lock,flags); +} + +static void flush_buffer(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "flush_buffer")) + return; + DBGINFO(("%s flush_buffer\n", info->device_name)); + + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active) + info->tx_count = 0; + spin_unlock_irqrestore(&info->lock,flags); + + wake_up_interruptible(&tty->write_wait); + tty_wakeup(tty); +} + +/* + * throttle (stop) transmitter + */ +static void tx_hold(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "tx_hold")) + return; + DBGINFO(("%s tx_hold\n", info->device_name)); + spin_lock_irqsave(&info->lock,flags); + if (info->tx_enabled && info->params.mode == MGSL_MODE_ASYNC) + tx_stop(info); + spin_unlock_irqrestore(&info->lock,flags); +} + +/* + * release (start) transmitter + */ +static void tx_release(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "tx_release")) + return; + DBGINFO(("%s tx_release\n", info->device_name)); + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active && info->tx_count) { + tx_load(info, info->tx_buf, info->tx_count); + tx_start(info); + } + spin_unlock_irqrestore(&info->lock,flags); +} + +/* + * Service an IOCTL request + * + * Arguments + * + * tty pointer to tty instance data + * file pointer to associated file object for device + * cmd IOCTL command code + * arg command argument/context + * + * Return 0 if success, otherwise error code + */ +static int ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct slgt_info *info = tty->driver_data; + struct mgsl_icount cnow; /* kernel counter temps */ + struct serial_icounter_struct __user *p_cuser; /* user space */ + unsigned long flags; + void __user *argp = (void __user *)arg; + + if (sanity_check(info, tty->name, "ioctl")) + return -ENODEV; + DBGINFO(("%s ioctl() cmd=%08X\n", info->device_name, cmd)); + + if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) && + (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) { + if (tty->flags & (1 << TTY_IO_ERROR)) + return -EIO; + } + + switch (cmd) { + case MGSL_IOCGPARAMS: + return get_params(info, argp); + case MGSL_IOCSPARAMS: + return set_params(info, argp); + case MGSL_IOCGTXIDLE: + return get_txidle(info, argp); + case MGSL_IOCSTXIDLE: + return set_txidle(info, (int)arg); + case MGSL_IOCTXENABLE: + return tx_enable(info, (int)arg); + case MGSL_IOCRXENABLE: + return rx_enable(info, (int)arg); + case MGSL_IOCTXABORT: + return tx_abort(info); + case MGSL_IOCGSTATS: + return get_stats(info, argp); + case MGSL_IOCWAITEVENT: + return wait_mgsl_event(info, argp); + case TIOCMIWAIT: + return modem_input_wait(info,(int)arg); + case MGSL_IOCGIF: + return get_interface(info, argp); + case MGSL_IOCSIF: + return set_interface(info,(int)arg); + case TIOCGICOUNT: + spin_lock_irqsave(&info->lock,flags); + cnow = info->icount; + spin_unlock_irqrestore(&info->lock,flags); + p_cuser = argp; + if (put_user(cnow.cts, &p_cuser->cts) || + put_user(cnow.dsr, &p_cuser->dsr) || + put_user(cnow.rng, &p_cuser->rng) || + put_user(cnow.dcd, &p_cuser->dcd) || + put_user(cnow.rx, &p_cuser->rx) || + put_user(cnow.tx, &p_cuser->tx) || + put_user(cnow.frame, &p_cuser->frame) || + put_user(cnow.overrun, &p_cuser->overrun) || + put_user(cnow.parity, &p_cuser->parity) || + put_user(cnow.brk, &p_cuser->brk) || + put_user(cnow.buf_overrun, &p_cuser->buf_overrun)) + return -EFAULT; + return 0; + default: + return -ENOIOCTLCMD; + } + return 0; +} + +/* + * proc fs support + */ +static inline int line_info(char *buf, struct slgt_info *info) +{ + char stat_buf[30]; + int ret; + unsigned long flags; + + ret = sprintf(buf, "%s: IO=%08X IRQ=%d MaxFrameSize=%u\n", + info->device_name, info->phys_reg_addr, + info->irq_level, info->max_frame_size); + + /* output current serial signal states */ + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + + stat_buf[0] = 0; + stat_buf[1] = 0; + if (info->signals & SerialSignal_RTS) + strcat(stat_buf, "|RTS"); + if (info->signals & SerialSignal_CTS) + strcat(stat_buf, "|CTS"); + if (info->signals & SerialSignal_DTR) + strcat(stat_buf, "|DTR"); + if (info->signals & SerialSignal_DSR) + strcat(stat_buf, "|DSR"); + if (info->signals & SerialSignal_DCD) + strcat(stat_buf, "|CD"); + if (info->signals & SerialSignal_RI) + strcat(stat_buf, "|RI"); + + if (info->params.mode != MGSL_MODE_ASYNC) { + ret += sprintf(buf+ret, "\tHDLC txok:%d rxok:%d", + info->icount.txok, info->icount.rxok); + if (info->icount.txunder) + ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder); + if (info->icount.txabort) + ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort); + if (info->icount.rxshort) + ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort); + if (info->icount.rxlong) + ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong); + if (info->icount.rxover) + ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover); + if (info->icount.rxcrc) + ret += sprintf(buf+ret, " rxcrc:%d", info->icount.rxcrc); + } else { + ret += sprintf(buf+ret, "\tASYNC tx:%d rx:%d", + info->icount.tx, info->icount.rx); + if (info->icount.frame) + ret += sprintf(buf+ret, " fe:%d", info->icount.frame); + if (info->icount.parity) + ret += sprintf(buf+ret, " pe:%d", info->icount.parity); + if (info->icount.brk) + ret += sprintf(buf+ret, " brk:%d", info->icount.brk); + if (info->icount.overrun) + ret += sprintf(buf+ret, " oe:%d", info->icount.overrun); + } + + /* Append serial signal status to end */ + ret += sprintf(buf+ret, " %s\n", stat_buf+1); + + ret += sprintf(buf+ret, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", + info->tx_active,info->bh_requested,info->bh_running, + info->pending_bh); + + return ret; +} + +/* Called to print information about devices + */ +static int read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int len = 0, l; + off_t begin = 0; + struct slgt_info *info; + + len += sprintf(page, "synclink_gt driver:%s\n", driver_version); + + info = slgt_device_list; + while( info ) { + l = line_info(page + len, info); + len += l; + if (len+begin > off+count) + goto done; + if (len+begin < off) { + begin += len; + len = 0; + } + info = info->next_device; + } + + *eof = 1; +done: + if (off >= len+begin) + return 0; + *start = page + (off-begin); + return ((count < begin+len-off) ? count : begin+len-off); +} + +/* + * return count of bytes in transmit buffer + */ +static int chars_in_buffer(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + if (sanity_check(info, tty->name, "chars_in_buffer")) + return 0; + DBGINFO(("%s chars_in_buffer()=%d\n", info->device_name, info->tx_count)); + return info->tx_count; +} + +/* + * signal remote device to throttle send data (our receive data) + */ +static void throttle(struct tty_struct * tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "throttle")) + return; + DBGINFO(("%s throttle\n", info->device_name)); + if (I_IXOFF(tty)) + send_xchar(tty, STOP_CHAR(tty)); + if (tty->termios->c_cflag & CRTSCTS) { + spin_lock_irqsave(&info->lock,flags); + info->signals &= ~SerialSignal_RTS; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } +} + +/* + * signal remote device to stop throttling send data (our receive data) + */ +static void unthrottle(struct tty_struct * tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "unthrottle")) + return; + DBGINFO(("%s unthrottle\n", info->device_name)); + if (I_IXOFF(tty)) { + if (info->x_char) + info->x_char = 0; + else + send_xchar(tty, START_CHAR(tty)); + } + if (tty->termios->c_cflag & CRTSCTS) { + spin_lock_irqsave(&info->lock,flags); + info->signals |= SerialSignal_RTS; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } +} + +/* + * set or clear transmit break condition + * break_state -1=set break condition, 0=clear + */ +static void set_break(struct tty_struct *tty, int break_state) +{ + struct slgt_info *info = tty->driver_data; + unsigned short value; + unsigned long flags; + + if (sanity_check(info, tty->name, "set_break")) + return; + DBGINFO(("%s set_break(%d)\n", info->device_name, break_state)); + + spin_lock_irqsave(&info->lock,flags); + value = rd_reg16(info, TCR); + if (break_state == -1) + value |= BIT6; + else + value &= ~BIT6; + wr_reg16(info, TCR, value); + spin_unlock_irqrestore(&info->lock,flags); +} + +#ifdef CONFIG_HDLC + +/** + * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.) + * set encoding and frame check sequence (FCS) options + * + * dev pointer to network device structure + * encoding serial encoding setting + * parity FCS setting + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, + unsigned short parity) +{ + struct slgt_info *info = dev_to_port(dev); + unsigned char new_encoding; + unsigned short new_crctype; + + /* return error if TTY interface open */ + if (info->count) + return -EBUSY; + + DBGINFO(("%s hdlcdev_attach\n", info->device_name)); + + switch (encoding) + { + case ENCODING_NRZ: new_encoding = HDLC_ENCODING_NRZ; break; + case ENCODING_NRZI: new_encoding = HDLC_ENCODING_NRZI_SPACE; break; + case ENCODING_FM_MARK: new_encoding = HDLC_ENCODING_BIPHASE_MARK; break; + case ENCODING_FM_SPACE: new_encoding = HDLC_ENCODING_BIPHASE_SPACE; break; + case ENCODING_MANCHESTER: new_encoding = HDLC_ENCODING_BIPHASE_LEVEL; break; + default: return -EINVAL; + } + + switch (parity) + { + case PARITY_NONE: new_crctype = HDLC_CRC_NONE; break; + case PARITY_CRC16_PR1_CCITT: new_crctype = HDLC_CRC_16_CCITT; break; + case PARITY_CRC32_PR1_CCITT: new_crctype = HDLC_CRC_32_CCITT; break; + default: return -EINVAL; + } + + info->params.encoding = new_encoding; + info->params.crc_type = new_crctype;; + + /* if network interface up, reprogram hardware */ + if (info->netcount) + program_hw(info); + + return 0; +} + +/** + * called by generic HDLC layer to send frame + * + * skb socket buffer containing HDLC frame + * dev pointer to network device structure + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + struct net_device_stats *stats = hdlc_stats(dev); + unsigned long flags; + + DBGINFO(("%s hdlc_xmit\n", dev->name)); + + /* stop sending until this frame completes */ + netif_stop_queue(dev); + + /* copy data to device buffers */ + info->tx_count = skb->len; + tx_load(info, skb->data, skb->len); + + /* update network statistics */ + stats->tx_packets++; + stats->tx_bytes += skb->len; + + /* done with socket buffer, so free it */ + dev_kfree_skb(skb); + + /* save start time for transmit timeout detection */ + dev->trans_start = jiffies; + + /* start hardware transmitter if necessary */ + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active) + tx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + + return 0; +} + +/** + * called by network layer when interface enabled + * claim resources and initialize hardware + * + * dev pointer to network device structure + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_open(struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + int rc; + unsigned long flags; + + DBGINFO(("%s hdlcdev_open\n", dev->name)); + + /* generic HDLC layer open processing */ + if ((rc = hdlc_open(dev))) + return rc; + + /* arbitrate between network and tty opens */ + spin_lock_irqsave(&info->netlock, flags); + if (info->count != 0 || info->netcount != 0) { + DBGINFO(("%s hdlc_open busy\n", dev->name)); + spin_unlock_irqrestore(&info->netlock, flags); + return -EBUSY; + } + info->netcount=1; + spin_unlock_irqrestore(&info->netlock, flags); + + /* claim resources and init adapter */ + if ((rc = startup(info)) != 0) { + spin_lock_irqsave(&info->netlock, flags); + info->netcount=0; + spin_unlock_irqrestore(&info->netlock, flags); + return rc; + } + + /* assert DTR and RTS, apply hardware settings */ + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + program_hw(info); + + /* enable network layer transmit */ + dev->trans_start = jiffies; + netif_start_queue(dev); + + /* inform generic HDLC layer of current DCD status */ + spin_lock_irqsave(&info->lock, flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock, flags); + hdlc_set_carrier(info->signals & SerialSignal_DCD, dev); + + return 0; +} + +/** + * called by network layer when interface is disabled + * shutdown hardware and release resources + * + * dev pointer to network device structure + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_close(struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + unsigned long flags; + + DBGINFO(("%s hdlcdev_close\n", dev->name)); + + netif_stop_queue(dev); + + /* shutdown adapter and release resources */ + shutdown(info); + + hdlc_close(dev); + + spin_lock_irqsave(&info->netlock, flags); + info->netcount=0; + spin_unlock_irqrestore(&info->netlock, flags); + + return 0; +} + +/** + * called by network layer to process IOCTL call to network device + * + * dev pointer to network device structure + * ifr pointer to network interface request structure + * cmd IOCTL command code + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + const size_t size = sizeof(sync_serial_settings); + sync_serial_settings new_line; + sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; + struct slgt_info *info = dev_to_port(dev); + unsigned int flags; + + DBGINFO(("%s hdlcdev_ioctl\n", dev->name)); + + /* return error if TTY interface open */ + if (info->count) + return -EBUSY; + + if (cmd != SIOCWANDEV) + return hdlc_ioctl(dev, ifr, cmd); + + switch(ifr->ifr_settings.type) { + case IF_GET_IFACE: /* return current sync_serial_settings */ + + ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; + if (ifr->ifr_settings.size < size) { + ifr->ifr_settings.size = size; /* data size wanted */ + return -ENOBUFS; + } + + flags = info->params.flags & (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL | + HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN | + HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL | + HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN); + + switch (flags){ + case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN): new_line.clock_type = CLOCK_EXT; break; + case (HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG): new_line.clock_type = CLOCK_INT; break; + case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG): new_line.clock_type = CLOCK_TXINT; break; + case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN): new_line.clock_type = CLOCK_TXFROMRX; break; + default: new_line.clock_type = CLOCK_DEFAULT; + } + + new_line.clock_rate = info->params.clock_speed; + new_line.loopback = info->params.loopback ? 1:0; + + if (copy_to_user(line, &new_line, size)) + return -EFAULT; + return 0; + + case IF_IFACE_SYNC_SERIAL: /* set sync_serial_settings */ + + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&new_line, line, size)) + return -EFAULT; + + switch (new_line.clock_type) + { + case CLOCK_EXT: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN; break; + case CLOCK_TXFROMRX: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN; break; + case CLOCK_INT: flags = HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG; break; + case CLOCK_TXINT: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG; break; + case CLOCK_DEFAULT: flags = info->params.flags & + (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL | + HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN | + HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL | + HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN); break; + default: return -EINVAL; + } + + if (new_line.loopback != 0 && new_line.loopback != 1) + return -EINVAL; + + info->params.flags &= ~(HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL | + HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN | + HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL | + HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN); + info->params.flags |= flags; + + info->params.loopback = new_line.loopback; + + if (flags & (HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG)) + info->params.clock_speed = new_line.clock_rate; + else + info->params.clock_speed = 0; + + /* if network interface up, reprogram hardware */ + if (info->netcount) + program_hw(info); + return 0; + + default: + return hdlc_ioctl(dev, ifr, cmd); + } +} + +/** + * called by network layer when transmit timeout is detected + * + * dev pointer to network device structure + */ +static void hdlcdev_tx_timeout(struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + struct net_device_stats *stats = hdlc_stats(dev); + unsigned long flags; + + DBGINFO(("%s hdlcdev_tx_timeout\n", dev->name)); + + stats->tx_errors++; + stats->tx_aborted_errors++; + + spin_lock_irqsave(&info->lock,flags); + tx_stop(info); + spin_unlock_irqrestore(&info->lock,flags); + + netif_wake_queue(dev); +} + +/** + * called by device driver when transmit completes + * reenable network layer transmit if stopped + * + * info pointer to device instance information + */ +static void hdlcdev_tx_done(struct slgt_info *info) +{ + if (netif_queue_stopped(info->netdev)) + netif_wake_queue(info->netdev); +} + +/** + * called by device driver when frame received + * pass frame to network layer + * + * info pointer to device instance information + * buf pointer to buffer contianing frame data + * size count of data bytes in buf + */ +static void hdlcdev_rx(struct slgt_info *info, char *buf, int size) +{ + struct sk_buff *skb = dev_alloc_skb(size); + struct net_device *dev = info->netdev; + struct net_device_stats *stats = hdlc_stats(dev); + + DBGINFO(("%s hdlcdev_rx\n", dev->name)); + + if (skb == NULL) { + DBGERR(("%s: can't alloc skb, drop packet\n", dev->name)); + stats->rx_dropped++; + return; + } + + memcpy(skb_put(skb, size),buf,size); + + skb->protocol = hdlc_type_trans(skb, info->netdev); + + stats->rx_packets++; + stats->rx_bytes += size; + + netif_rx(skb); + + info->netdev->last_rx = jiffies; +} + +/** + * called by device driver when adding device instance + * do generic HDLC initialization + * + * info pointer to device instance information + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_init(struct slgt_info *info) +{ + int rc; + struct net_device *dev; + hdlc_device *hdlc; + + /* allocate and initialize network and HDLC layer objects */ + + if (!(dev = alloc_hdlcdev(info))) { + printk(KERN_ERR "%s hdlc device alloc failure\n", info->device_name); + return -ENOMEM; + } + + /* for network layer reporting purposes only */ + dev->mem_start = info->phys_reg_addr; + dev->mem_end = info->phys_reg_addr + SLGT_REG_SIZE - 1; + dev->irq = info->irq_level; + + /* network layer callbacks and settings */ + dev->do_ioctl = hdlcdev_ioctl; + dev->open = hdlcdev_open; + dev->stop = hdlcdev_close; + dev->tx_timeout = hdlcdev_tx_timeout; + dev->watchdog_timeo = 10*HZ; + dev->tx_queue_len = 50; + + /* generic HDLC layer callbacks and settings */ + hdlc = dev_to_hdlc(dev); + hdlc->attach = hdlcdev_attach; + hdlc->xmit = hdlcdev_xmit; + + /* register objects with HDLC layer */ + if ((rc = register_hdlc_device(dev))) { + printk(KERN_WARNING "%s:unable to register hdlc device\n",__FILE__); + free_netdev(dev); + return rc; + } + + info->netdev = dev; + return 0; +} + +/** + * called by device driver when removing device instance + * do generic HDLC cleanup + * + * info pointer to device instance information + */ +static void hdlcdev_exit(struct slgt_info *info) +{ + unregister_hdlc_device(info->netdev); + free_netdev(info->netdev); + info->netdev = NULL; +} + +#endif /* ifdef CONFIG_HDLC */ + +/* + * get async data from rx DMA buffers + */ +static void rx_async(struct slgt_info *info) +{ + struct tty_struct *tty = info->tty; + struct mgsl_icount *icount = &info->icount; + unsigned int start, end; + unsigned char *p; + unsigned char status; + struct slgt_desc *bufs = info->rbufs; + int i, count; + + start = end = info->rbuf_current; + + while(desc_complete(bufs[end])) { + count = desc_count(bufs[end]) - info->rbuf_index; + p = bufs[end].buf + info->rbuf_index; + + DBGISR(("%s rx_async count=%d\n", info->device_name, count)); + DBGDATA(info, p, count, "rx"); + + for(i=0 ; i < count; i+=2, p+=2) { + if (tty) { + if (tty->flip.count >= TTY_FLIPBUF_SIZE) + tty_flip_buffer_push(tty); + if (tty->flip.count >= TTY_FLIPBUF_SIZE) + break; + *tty->flip.char_buf_ptr = *p; + *tty->flip.flag_buf_ptr = 0; + } + icount->rx++; + + if ((status = *(p+1) & (BIT9 + BIT8))) { + if (status & BIT9) + icount->parity++; + else if (status & BIT8) + icount->frame++; + /* discard char if tty control flags say so */ + if (status & info->ignore_status_mask) + continue; + if (tty) { + if (status & BIT9) + *tty->flip.flag_buf_ptr = TTY_PARITY; + else if (status & BIT8) + *tty->flip.flag_buf_ptr = TTY_FRAME; + } + } + if (tty) { + tty->flip.flag_buf_ptr++; + tty->flip.char_buf_ptr++; + tty->flip.count++; + } + } + + if (i < count) { + /* receive buffer not completed */ + info->rbuf_index += i; + info->rx_timer.expires = jiffies + 1; + add_timer(&info->rx_timer); + break; + } + + info->rbuf_index = 0; + free_rbufs(info, end, end); + + if (++end == info->rbuf_count) + end = 0; + + /* if entire list searched then no frame available */ + if (end == start) + break; + } + + if (tty && tty->flip.count) + tty_flip_buffer_push(tty); +} + +/* + * return next bottom half action to perform + */ +static int bh_action(struct slgt_info *info) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&info->lock,flags); + + if (info->pending_bh & BH_RECEIVE) { + info->pending_bh &= ~BH_RECEIVE; + rc = BH_RECEIVE; + } else if (info->pending_bh & BH_TRANSMIT) { + info->pending_bh &= ~BH_TRANSMIT; + rc = BH_TRANSMIT; + } else if (info->pending_bh & BH_STATUS) { + info->pending_bh &= ~BH_STATUS; + rc = BH_STATUS; + } else { + /* Mark BH routine as complete */ + info->bh_running = 0; + info->bh_requested = 0; + rc = 0; + } + + spin_unlock_irqrestore(&info->lock,flags); + + return rc; +} + +/* + * perform bottom half processing + */ +static void bh_handler(void* context) +{ + struct slgt_info *info = context; + int action; + + if (!info) + return; + info->bh_running = 1; + + while((action = bh_action(info))) { + switch (action) { + case BH_RECEIVE: + DBGBH(("%s bh receive\n", info->device_name)); + switch(info->params.mode) { + case MGSL_MODE_ASYNC: + rx_async(info); + break; + case MGSL_MODE_HDLC: + while(rx_get_frame(info)); + break; + case MGSL_MODE_RAW: + while(rx_get_buf(info)); + break; + } + /* restart receiver if rx DMA buffers exhausted */ + if (info->rx_restart) + rx_start(info); + break; + case BH_TRANSMIT: + bh_transmit(info); + break; + case BH_STATUS: + DBGBH(("%s bh status\n", info->device_name)); + info->ri_chkcount = 0; + info->dsr_chkcount = 0; + info->dcd_chkcount = 0; + info->cts_chkcount = 0; + break; + default: + DBGBH(("%s unknown action\n", info->device_name)); + break; + } + } + DBGBH(("%s bh_handler exit\n", info->device_name)); +} + +static void bh_transmit(struct slgt_info *info) +{ + struct tty_struct *tty = info->tty; + + DBGBH(("%s bh_transmit\n", info->device_name)); + if (tty) { + tty_wakeup(tty); + wake_up_interruptible(&tty->write_wait); + } +} + +static void dsr_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("dsr_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->dsr_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_DSR); + return; + } + info->icount.dsr++; + if (info->signals & SerialSignal_DSR) + info->input_signal_events.dsr_up++; + else + info->input_signal_events.dsr_down++; + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; +} + +static void cts_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("cts_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->cts_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_CTS); + return; + } + info->icount.cts++; + if (info->signals & SerialSignal_CTS) + info->input_signal_events.cts_up++; + else + info->input_signal_events.cts_down++; + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; + + if (info->flags & ASYNC_CTS_FLOW) { + if (info->tty) { + if (info->tty->hw_stopped) { + if (info->signals & SerialSignal_CTS) { + info->tty->hw_stopped = 0; + info->pending_bh |= BH_TRANSMIT; + return; + } + } else { + if (!(info->signals & SerialSignal_CTS)) + info->tty->hw_stopped = 1; + } + } + } +} + +static void dcd_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("dcd_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->dcd_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_DCD); + return; + } + info->icount.dcd++; + if (info->signals & SerialSignal_DCD) { + info->input_signal_events.dcd_up++; + } else { + info->input_signal_events.dcd_down++; + } +#ifdef CONFIG_HDLC + if (info->netcount) + hdlc_set_carrier(info->signals & SerialSignal_DCD, info->netdev); +#endif + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; + + if (info->flags & ASYNC_CHECK_CD) { + if (info->signals & SerialSignal_DCD) + wake_up_interruptible(&info->open_wait); + else { + if (info->tty) + tty_hangup(info->tty); + } + } +} + +static void ri_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("ri_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->ri_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_RI); + return; + } + info->icount.dcd++; + if (info->signals & SerialSignal_RI) { + info->input_signal_events.ri_up++; + } else { + info->input_signal_events.ri_down++; + } + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; +} + +static void isr_serial(struct slgt_info *info) +{ + unsigned short status = rd_reg16(info, SSR); + + DBGISR(("%s isr_serial status=%04X\n", info->device_name, status)); + + wr_reg16(info, SSR, status); /* clear pending */ + + info->irq_occurred = 1; + + if (info->params.mode == MGSL_MODE_ASYNC) { + if (status & IRQ_TXIDLE) { + if (info->tx_count) + isr_txeom(info, status); + } + if ((status & IRQ_RXBREAK) && (status & RXBREAK)) { + info->icount.brk++; + /* process break detection if tty control allows */ + if (info->tty) { + if (!(status & info->ignore_status_mask)) { + if (info->read_status_mask & MASK_BREAK) { + *info->tty->flip.flag_buf_ptr = TTY_BREAK; + if (info->flags & ASYNC_SAK) + do_SAK(info->tty); + } + } + } + } + } else { + if (status & (IRQ_TXIDLE + IRQ_TXUNDER)) + isr_txeom(info, status); + + if (status & IRQ_RXIDLE) { + if (status & RXIDLE) + info->icount.rxidle++; + else + info->icount.exithunt++; + wake_up_interruptible(&info->event_wait_q); + } + + if (status & IRQ_RXOVER) + rx_start(info); + } + + if (status & IRQ_DSR) + dsr_change(info); + if (status & IRQ_CTS) + cts_change(info); + if (status & IRQ_DCD) + dcd_change(info); + if (status & IRQ_RI) + ri_change(info); +} + +static void isr_rdma(struct slgt_info *info) +{ + unsigned int status = rd_reg32(info, RDCSR); + + DBGISR(("%s isr_rdma status=%08x\n", info->device_name, status)); + + /* RDCSR (rx DMA control/status) + * + * 31..07 reserved + * 06 save status byte to DMA buffer + * 05 error + * 04 eol (end of list) + * 03 eob (end of buffer) + * 02 IRQ enable + * 01 reset + * 00 enable + */ + wr_reg32(info, RDCSR, status); /* clear pending */ + + if (status & (BIT5 + BIT4)) { + DBGISR(("%s isr_rdma rx_restart=1\n", info->device_name)); + info->rx_restart = 1; + } + info->pending_bh |= BH_RECEIVE; +} + +static void isr_tdma(struct slgt_info *info) +{ + unsigned int status = rd_reg32(info, TDCSR); + + DBGISR(("%s isr_tdma status=%08x\n", info->device_name, status)); + + /* TDCSR (tx DMA control/status) + * + * 31..06 reserved + * 05 error + * 04 eol (end of list) + * 03 eob (end of buffer) + * 02 IRQ enable + * 01 reset + * 00 enable + */ + wr_reg32(info, TDCSR, status); /* clear pending */ + + if (status & (BIT5 + BIT4 + BIT3)) { + // another transmit buffer has completed + // run bottom half to get more send data from user + info->pending_bh |= BH_TRANSMIT; + } +} + +static void isr_txeom(struct slgt_info *info, unsigned short status) +{ + DBGISR(("%s txeom status=%04x\n", info->device_name, status)); + + slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER); + tdma_reset(info); + reset_tbufs(info); + if (status & IRQ_TXUNDER) { + unsigned short val = rd_reg16(info, TCR); + wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, TCR, val); /* clear reset bit */ + } + + if (info->tx_active) { + if (info->params.mode != MGSL_MODE_ASYNC) { + if (status & IRQ_TXUNDER) + info->icount.txunder++; + else if (status & IRQ_TXIDLE) + info->icount.txok++; + } + + info->tx_active = 0; + info->tx_count = 0; + + del_timer(&info->tx_timer); + + if (info->params.mode != MGSL_MODE_ASYNC && info->drop_rts_on_tx_done) { + info->signals &= ~SerialSignal_RTS; + info->drop_rts_on_tx_done = 0; + set_signals(info); + } + +#ifdef CONFIG_HDLC + if (info->netcount) + hdlcdev_tx_done(info); + else +#endif + { + if (info->tty && (info->tty->stopped || info->tty->hw_stopped)) { + tx_stop(info); + return; + } + info->pending_bh |= BH_TRANSMIT; + } + } +} + +/* interrupt service routine + * + * irq interrupt number + * dev_id device ID supplied during interrupt registration + * regs interrupted processor context + */ +static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct slgt_info *info; + unsigned int gsr; + unsigned int i; + + DBGISR(("slgt_interrupt irq=%d entry\n", irq)); + + info = dev_id; + if (!info) + return IRQ_NONE; + + spin_lock(&info->lock); + + while((gsr = rd_reg32(info, GSR) & 0xffffff00)) { + DBGISR(("%s gsr=%08x\n", info->device_name, gsr)); + info->irq_occurred = 1; + for(i=0; i < info->port_count ; i++) { + if (info->port_array[i] == NULL) + continue; + if (gsr & (BIT8 << i)) + isr_serial(info->port_array[i]); + if (gsr & (BIT16 << (i*2))) + isr_rdma(info->port_array[i]); + if (gsr & (BIT17 << (i*2))) + isr_tdma(info->port_array[i]); + } + } + + for(i=0; i < info->port_count ; i++) { + struct slgt_info *port = info->port_array[i]; + + if (port && (port->count || port->netcount) && + port->pending_bh && !port->bh_running && + !port->bh_requested) { + DBGISR(("%s bh queued\n", port->device_name)); + schedule_work(&port->task); + port->bh_requested = 1; + } + } + + spin_unlock(&info->lock); + + DBGISR(("slgt_interrupt irq=%d exit\n", irq)); + return IRQ_HANDLED; +} + +static int startup(struct slgt_info *info) +{ + DBGINFO(("%s startup\n", info->device_name)); + + if (info->flags & ASYNC_INITIALIZED) + return 0; + + if (!info->tx_buf) { + info->tx_buf = kmalloc(info->max_frame_size, GFP_KERNEL); + if (!info->tx_buf) { + DBGERR(("%s can't allocate tx buffer\n", info->device_name)); + return -ENOMEM; + } + } + + info->pending_bh = 0; + + memset(&info->icount, 0, sizeof(info->icount)); + + /* program hardware for current parameters */ + change_params(info); + + if (info->tty) + clear_bit(TTY_IO_ERROR, &info->tty->flags); + + info->flags |= ASYNC_INITIALIZED; + + return 0; +} + +/* + * called by close() and hangup() to shutdown hardware + */ +static void shutdown(struct slgt_info *info) +{ + unsigned long flags; + + if (!(info->flags & ASYNC_INITIALIZED)) + return; + + DBGINFO(("%s shutdown\n", info->device_name)); + + /* clear status wait queue because status changes */ + /* can't happen after shutting down the hardware */ + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + + del_timer_sync(&info->tx_timer); + del_timer_sync(&info->rx_timer); + + kfree(info->tx_buf); + info->tx_buf = NULL; + + spin_lock_irqsave(&info->lock,flags); + + tx_stop(info); + rx_stop(info); + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); + + if (!info->tty || info->tty->termios->c_cflag & HUPCL) { + info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS); + set_signals(info); + } + + spin_unlock_irqrestore(&info->lock,flags); + + if (info->tty) + set_bit(TTY_IO_ERROR, &info->tty->flags); + + info->flags &= ~ASYNC_INITIALIZED; +} + +static void program_hw(struct slgt_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&info->lock,flags); + + rx_stop(info); + tx_stop(info); + + if (info->params.mode == MGSL_MODE_HDLC || + info->params.mode == MGSL_MODE_RAW || + info->netcount) + hdlc_mode(info); + else + async_mode(info); + + set_signals(info); + + info->dcd_chkcount = 0; + info->cts_chkcount = 0; + info->ri_chkcount = 0; + info->dsr_chkcount = 0; + + slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR); + get_signals(info); + + if (info->netcount || + (info->tty && info->tty->termios->c_cflag & CREAD)) + rx_start(info); + + spin_unlock_irqrestore(&info->lock,flags); +} + +/* + * reconfigure adapter based on new parameters + */ +static void change_params(struct slgt_info *info) +{ + unsigned cflag; + int bits_per_char; + + if (!info->tty || !info->tty->termios) + return; + DBGINFO(("%s change_params\n", info->device_name)); + + cflag = info->tty->termios->c_cflag; + + /* if B0 rate (hangup) specified then negate DTR and RTS */ + /* otherwise assert DTR and RTS */ + if (cflag & CBAUD) + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR); + + /* byte size and parity */ + + switch (cflag & CSIZE) { + case CS5: info->params.data_bits = 5; break; + case CS6: info->params.data_bits = 6; break; + case CS7: info->params.data_bits = 7; break; + case CS8: info->params.data_bits = 8; break; + default: info->params.data_bits = 7; break; + } + + info->params.stop_bits = (cflag & CSTOPB) ? 2 : 1; + + if (cflag & PARENB) + info->params.parity = (cflag & PARODD) ? ASYNC_PARITY_ODD : ASYNC_PARITY_EVEN; + else + info->params.parity = ASYNC_PARITY_NONE; + + /* calculate number of jiffies to transmit a full + * FIFO (32 bytes) at specified data rate + */ + bits_per_char = info->params.data_bits + + info->params.stop_bits + 1; + + info->params.data_rate = tty_get_baud_rate(info->tty); + + if (info->params.data_rate) { + info->timeout = (32*HZ*bits_per_char) / + info->params.data_rate; + } + info->timeout += HZ/50; /* Add .02 seconds of slop */ + + if (cflag & CRTSCTS) + info->flags |= ASYNC_CTS_FLOW; + else + info->flags &= ~ASYNC_CTS_FLOW; + + if (cflag & CLOCAL) + info->flags &= ~ASYNC_CHECK_CD; + else + info->flags |= ASYNC_CHECK_CD; + + /* process tty input control flags */ + + info->read_status_mask = IRQ_RXOVER; + if (I_INPCK(info->tty)) + info->read_status_mask |= MASK_PARITY | MASK_FRAMING; + if (I_BRKINT(info->tty) || I_PARMRK(info->tty)) + info->read_status_mask |= MASK_BREAK; + if (I_IGNPAR(info->tty)) + info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING; + if (I_IGNBRK(info->tty)) { + info->ignore_status_mask |= MASK_BREAK; + /* If ignoring parity and break indicators, ignore + * overruns too. (For real raw support). + */ + if (I_IGNPAR(info->tty)) + info->ignore_status_mask |= MASK_OVERRUN; + } + + program_hw(info); +} + +static int get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount) +{ + DBGINFO(("%s get_stats\n", info->device_name)); + if (!user_icount) { + memset(&info->icount, 0, sizeof(info->icount)); + } else { + if (copy_to_user(user_icount, &info->icount, sizeof(struct mgsl_icount))) + return -EFAULT; + } + return 0; +} + +static int get_params(struct slgt_info *info, MGSL_PARAMS __user *user_params) +{ + DBGINFO(("%s get_params\n", info->device_name)); + if (copy_to_user(user_params, &info->params, sizeof(MGSL_PARAMS))) + return -EFAULT; + return 0; +} + +static int set_params(struct slgt_info *info, MGSL_PARAMS __user *new_params) +{ + unsigned long flags; + MGSL_PARAMS tmp_params; + + DBGINFO(("%s set_params\n", info->device_name)); + if (copy_from_user(&tmp_params, new_params, sizeof(MGSL_PARAMS))) + return -EFAULT; + + spin_lock_irqsave(&info->lock, flags); + memcpy(&info->params, &tmp_params, sizeof(MGSL_PARAMS)); + spin_unlock_irqrestore(&info->lock, flags); + + change_params(info); + + return 0; +} + +static int get_txidle(struct slgt_info *info, int __user *idle_mode) +{ + DBGINFO(("%s get_txidle=%d\n", info->device_name, info->idle_mode)); + if (put_user(info->idle_mode, idle_mode)) + return -EFAULT; + return 0; +} + +static int set_txidle(struct slgt_info *info, int idle_mode) +{ + unsigned long flags; + DBGINFO(("%s set_txidle(%d)\n", info->device_name, idle_mode)); + spin_lock_irqsave(&info->lock,flags); + info->idle_mode = idle_mode; + tx_set_idle(info); + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +static int tx_enable(struct slgt_info *info, int enable) +{ + unsigned long flags; + DBGINFO(("%s tx_enable(%d)\n", info->device_name, enable)); + spin_lock_irqsave(&info->lock,flags); + if (enable) { + if (!info->tx_enabled) + tx_start(info); + } else { + if (info->tx_enabled) + tx_stop(info); + } + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +/* + * abort transmit HDLC frame + */ +static int tx_abort(struct slgt_info *info) +{ + unsigned long flags; + DBGINFO(("%s tx_abort\n", info->device_name)); + spin_lock_irqsave(&info->lock,flags); + tdma_reset(info); + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +static int rx_enable(struct slgt_info *info, int enable) +{ + unsigned long flags; + DBGINFO(("%s rx_enable(%d)\n", info->device_name, enable)); + spin_lock_irqsave(&info->lock,flags); + if (enable) { + if (!info->rx_enabled) + rx_start(info); + } else { + if (info->rx_enabled) + rx_stop(info); + } + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +/* + * wait for specified event to occur + */ +static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr) +{ + unsigned long flags; + int s; + int rc=0; + struct mgsl_icount cprev, cnow; + int events; + int mask; + struct _input_signal_events oldsigs, newsigs; + DECLARE_WAITQUEUE(wait, current); + + if (get_user(mask, mask_ptr)) + return -EFAULT; + + DBGINFO(("%s wait_mgsl_event(%d)\n", info->device_name, mask)); + + spin_lock_irqsave(&info->lock,flags); + + /* return immediately if state matches requested events */ + get_signals(info); + s = info->signals; + + events = mask & + ( ((s & SerialSignal_DSR) ? MgslEvent_DsrActive:MgslEvent_DsrInactive) + + ((s & SerialSignal_DCD) ? MgslEvent_DcdActive:MgslEvent_DcdInactive) + + ((s & SerialSignal_CTS) ? MgslEvent_CtsActive:MgslEvent_CtsInactive) + + ((s & SerialSignal_RI) ? MgslEvent_RiActive :MgslEvent_RiInactive) ); + if (events) { + spin_unlock_irqrestore(&info->lock,flags); + goto exit; + } + + /* save current irq counts */ + cprev = info->icount; + oldsigs = info->input_signal_events; + + /* enable hunt and idle irqs if needed */ + if (mask & (MgslEvent_ExitHuntMode+MgslEvent_IdleReceived)) { + unsigned short val = rd_reg16(info, SCR); + if (!(val & IRQ_RXIDLE)) + wr_reg16(info, SCR, (unsigned short)(val | IRQ_RXIDLE)); + } + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&info->event_wait_q, &wait); + + spin_unlock_irqrestore(&info->lock,flags); + + for(;;) { + schedule(); + if (signal_pending(current)) { + rc = -ERESTARTSYS; + break; + } + + /* get current irq counts */ + spin_lock_irqsave(&info->lock,flags); + cnow = info->icount; + newsigs = info->input_signal_events; + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&info->lock,flags); + + /* if no change, wait aborted for some reason */ + if (newsigs.dsr_up == oldsigs.dsr_up && + newsigs.dsr_down == oldsigs.dsr_down && + newsigs.dcd_up == oldsigs.dcd_up && + newsigs.dcd_down == oldsigs.dcd_down && + newsigs.cts_up == oldsigs.cts_up && + newsigs.cts_down == oldsigs.cts_down && + newsigs.ri_up == oldsigs.ri_up && + newsigs.ri_down == oldsigs.ri_down && + cnow.exithunt == cprev.exithunt && + cnow.rxidle == cprev.rxidle) { + rc = -EIO; + break; + } + + events = mask & + ( (newsigs.dsr_up != oldsigs.dsr_up ? MgslEvent_DsrActive:0) + + (newsigs.dsr_down != oldsigs.dsr_down ? MgslEvent_DsrInactive:0) + + (newsigs.dcd_up != oldsigs.dcd_up ? MgslEvent_DcdActive:0) + + (newsigs.dcd_down != oldsigs.dcd_down ? MgslEvent_DcdInactive:0) + + (newsigs.cts_up != oldsigs.cts_up ? MgslEvent_CtsActive:0) + + (newsigs.cts_down != oldsigs.cts_down ? MgslEvent_CtsInactive:0) + + (newsigs.ri_up != oldsigs.ri_up ? MgslEvent_RiActive:0) + + (newsigs.ri_down != oldsigs.ri_down ? MgslEvent_RiInactive:0) + + (cnow.exithunt != cprev.exithunt ? MgslEvent_ExitHuntMode:0) + + (cnow.rxidle != cprev.rxidle ? MgslEvent_IdleReceived:0) ); + if (events) + break; + + cprev = cnow; + oldsigs = newsigs; + } + + remove_wait_queue(&info->event_wait_q, &wait); + set_current_state(TASK_RUNNING); + + + if (mask & (MgslEvent_ExitHuntMode + MgslEvent_IdleReceived)) { + spin_lock_irqsave(&info->lock,flags); + if (!waitqueue_active(&info->event_wait_q)) { + /* disable enable exit hunt mode/idle rcvd IRQs */ + wr_reg16(info, SCR, + (unsigned short)(rd_reg16(info, SCR) & ~IRQ_RXIDLE)); + } + spin_unlock_irqrestore(&info->lock,flags); + } +exit: + if (rc == 0) + rc = put_user(events, mask_ptr); + return rc; +} + +static int get_interface(struct slgt_info *info, int __user *if_mode) +{ + DBGINFO(("%s get_interface=%x\n", info->device_name, info->if_mode)); + if (put_user(info->if_mode, if_mode)) + return -EFAULT; + return 0; +} + +static int set_interface(struct slgt_info *info, int if_mode) +{ + unsigned long flags; + unsigned char val; + + DBGINFO(("%s set_interface=%x)\n", info->device_name, if_mode)); + spin_lock_irqsave(&info->lock,flags); + info->if_mode = if_mode; + + msc_set_vcr(info); + + /* TCR (tx control) 07 1=RTS driver control */ + val = rd_reg16(info, TCR); + if (info->if_mode & MGSL_INTERFACE_RTS_EN) + val |= BIT7; + else + val &= ~BIT7; + wr_reg16(info, TCR, val); + + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +static int modem_input_wait(struct slgt_info *info,int arg) +{ + unsigned long flags; + int rc; + struct mgsl_icount cprev, cnow; + DECLARE_WAITQUEUE(wait, current); + + /* save current irq counts */ + spin_lock_irqsave(&info->lock,flags); + cprev = info->icount; + add_wait_queue(&info->status_event_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&info->lock,flags); + + for(;;) { + schedule(); + if (signal_pending(current)) { + rc = -ERESTARTSYS; + break; + } + + /* get new irq counts */ + spin_lock_irqsave(&info->lock,flags); + cnow = info->icount; + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&info->lock,flags); + + /* if no change, wait aborted for some reason */ + if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && + cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) { + rc = -EIO; + break; + } + + /* check for change in caller specified modem input */ + if ((arg & TIOCM_RNG && cnow.rng != cprev.rng) || + (arg & TIOCM_DSR && cnow.dsr != cprev.dsr) || + (arg & TIOCM_CD && cnow.dcd != cprev.dcd) || + (arg & TIOCM_CTS && cnow.cts != cprev.cts)) { + rc = 0; + break; + } + + cprev = cnow; + } + remove_wait_queue(&info->status_event_wait_q, &wait); + set_current_state(TASK_RUNNING); + return rc; +} + +/* + * return state of serial control and status signals + */ +static int tiocmget(struct tty_struct *tty, struct file *file) +{ + struct slgt_info *info = tty->driver_data; + unsigned int result; + unsigned long flags; + + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + + result = ((info->signals & SerialSignal_RTS) ? TIOCM_RTS:0) + + ((info->signals & SerialSignal_DTR) ? TIOCM_DTR:0) + + ((info->signals & SerialSignal_DCD) ? TIOCM_CAR:0) + + ((info->signals & SerialSignal_RI) ? TIOCM_RNG:0) + + ((info->signals & SerialSignal_DSR) ? TIOCM_DSR:0) + + ((info->signals & SerialSignal_CTS) ? TIOCM_CTS:0); + + DBGINFO(("%s tiocmget value=%08X\n", info->device_name, result)); + return result; +} + +/* + * set modem control signals (DTR/RTS) + * + * cmd signal command: TIOCMBIS = set bit TIOCMBIC = clear bit + * TIOCMSET = set/clear signal values + * value bit mask for command + */ +static int tiocmset(struct tty_struct *tty, struct file *file, + unsigned int set, unsigned int clear) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + DBGINFO(("%s tiocmset(%x,%x)\n", info->device_name, set, clear)); + + if (set & TIOCM_RTS) + info->signals |= SerialSignal_RTS; + if (set & TIOCM_DTR) + info->signals |= SerialSignal_DTR; + if (clear & TIOCM_RTS) + info->signals &= ~SerialSignal_RTS; + if (clear & TIOCM_DTR) + info->signals &= ~SerialSignal_DTR; + + spin_lock_irqsave(&info->lock,flags); + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +/* + * block current process until the device is ready to open + */ +static int block_til_ready(struct tty_struct *tty, struct file *filp, + struct slgt_info *info) +{ + DECLARE_WAITQUEUE(wait, current); + int retval; + int do_clocal = 0, extra_count = 0; + unsigned long flags; + + DBGINFO(("%s block_til_ready\n", tty->driver->name)); + + if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ + /* nonblock mode is set or port is not enabled */ + info->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + + if (tty->termios->c_cflag & CLOCAL) + do_clocal = 1; + + /* Wait for carrier detect and the line to become + * free (i.e., not in use by the callout). While we are in + * this loop, info->count is dropped by one, so that + * close() knows when to free things. We restore it upon + * exit, either normal or abnormal. + */ + + retval = 0; + add_wait_queue(&info->open_wait, &wait); + + spin_lock_irqsave(&info->lock, flags); + if (!tty_hung_up_p(filp)) { + extra_count = 1; + info->count--; + } + spin_unlock_irqrestore(&info->lock, flags); + info->blocked_open++; + + while (1) { + if ((tty->termios->c_cflag & CBAUD)) { + spin_lock_irqsave(&info->lock,flags); + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } + + set_current_state(TASK_INTERRUPTIBLE); + + if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)){ + retval = (info->flags & ASYNC_HUP_NOTIFY) ? + -EAGAIN : -ERESTARTSYS; + break; + } + + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + + if (!(info->flags & ASYNC_CLOSING) && + (do_clocal || (info->signals & SerialSignal_DCD)) ) { + break; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + + DBGINFO(("%s block_til_ready wait\n", tty->driver->name)); + schedule(); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&info->open_wait, &wait); + + if (extra_count) + info->count++; + info->blocked_open--; + + if (!retval) + info->flags |= ASYNC_NORMAL_ACTIVE; + + DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval)); + return retval; +} + +static int alloc_tmp_rbuf(struct slgt_info *info) +{ + info->tmp_rbuf = kmalloc(info->max_frame_size, GFP_KERNEL); + if (info->tmp_rbuf == NULL) + return -ENOMEM; + return 0; +} + +static void free_tmp_rbuf(struct slgt_info *info) +{ + kfree(info->tmp_rbuf); + info->tmp_rbuf = NULL; +} + +/* + * allocate DMA descriptor lists. + */ +static int alloc_desc(struct slgt_info *info) +{ + unsigned int i; + unsigned int pbufs; + + /* allocate memory to hold descriptor lists */ + info->bufs = pci_alloc_consistent(info->pdev, DESC_LIST_SIZE, &info->bufs_dma_addr); + if (info->bufs == NULL) + return -ENOMEM; + + memset(info->bufs, 0, DESC_LIST_SIZE); + + info->rbufs = (struct slgt_desc*)info->bufs; + info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count; + + pbufs = (unsigned int)info->bufs_dma_addr; + + /* + * Build circular lists of descriptors + */ + + for (i=0; i < info->rbuf_count; i++) { + /* physical address of this descriptor */ + info->rbufs[i].pdesc = pbufs + (i * sizeof(struct slgt_desc)); + + /* physical address of next descriptor */ + if (i == info->rbuf_count - 1) + info->rbufs[i].next = cpu_to_le32(pbufs); + else + info->rbufs[i].next = cpu_to_le32(pbufs + ((i+1) * sizeof(struct slgt_desc))); + set_desc_count(info->rbufs[i], DMABUFSIZE); + } + + for (i=0; i < info->tbuf_count; i++) { + /* physical address of this descriptor */ + info->tbufs[i].pdesc = pbufs + ((info->rbuf_count + i) * sizeof(struct slgt_desc)); + + /* physical address of next descriptor */ + if (i == info->tbuf_count - 1) + info->tbufs[i].next = cpu_to_le32(pbufs + info->rbuf_count * sizeof(struct slgt_desc)); + else + info->tbufs[i].next = cpu_to_le32(pbufs + ((info->rbuf_count + i + 1) * sizeof(struct slgt_desc))); + } + + return 0; +} + +static void free_desc(struct slgt_info *info) +{ + if (info->bufs != NULL) { + pci_free_consistent(info->pdev, DESC_LIST_SIZE, info->bufs, info->bufs_dma_addr); + info->bufs = NULL; + info->rbufs = NULL; + info->tbufs = NULL; + } +} + +static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count) +{ + int i; + for (i=0; i < count; i++) { + if ((bufs[i].buf = pci_alloc_consistent(info->pdev, DMABUFSIZE, &bufs[i].buf_dma_addr)) == NULL) + return -ENOMEM; + bufs[i].pbuf = cpu_to_le32((unsigned int)bufs[i].buf_dma_addr); + } + return 0; +} + +static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count) +{ + int i; + for (i=0; i < count; i++) { + if (bufs[i].buf == NULL) + continue; + pci_free_consistent(info->pdev, DMABUFSIZE, bufs[i].buf, bufs[i].buf_dma_addr); + bufs[i].buf = NULL; + } +} + +static int alloc_dma_bufs(struct slgt_info *info) +{ + info->rbuf_count = 32; + info->tbuf_count = 32; + + if (alloc_desc(info) < 0 || + alloc_bufs(info, info->rbufs, info->rbuf_count) < 0 || + alloc_bufs(info, info->tbufs, info->tbuf_count) < 0 || + alloc_tmp_rbuf(info) < 0) { + DBGERR(("%s DMA buffer alloc fail\n", info->device_name)); + return -ENOMEM; + } + reset_rbufs(info); + return 0; +} + +static void free_dma_bufs(struct slgt_info *info) +{ + if (info->bufs) { + free_bufs(info, info->rbufs, info->rbuf_count); + free_bufs(info, info->tbufs, info->tbuf_count); + free_desc(info); + } + free_tmp_rbuf(info); +} + +static int claim_resources(struct slgt_info *info) +{ + if (request_mem_region(info->phys_reg_addr, SLGT_REG_SIZE, "synclink_gt") == NULL) { + DBGERR(("%s reg addr conflict, addr=%08X\n", + info->device_name, info->phys_reg_addr)); + info->init_error = DiagStatus_AddressConflict; + goto errout; + } + else + info->reg_addr_requested = 1; + + info->reg_addr = ioremap(info->phys_reg_addr, PAGE_SIZE); + if (!info->reg_addr) { + DBGERR(("%s cant map device registers, addr=%08X\n", + info->device_name, info->phys_reg_addr)); + info->init_error = DiagStatus_CantAssignPciResources; + goto errout; + } + info->reg_addr += info->reg_offset; + return 0; + +errout: + release_resources(info); + return -ENODEV; +} + +static void release_resources(struct slgt_info *info) +{ + if (info->irq_requested) { + free_irq(info->irq_level, info); + info->irq_requested = 0; + } + + if (info->reg_addr_requested) { + release_mem_region(info->phys_reg_addr, SLGT_REG_SIZE); + info->reg_addr_requested = 0; + } + + if (info->reg_addr) { + iounmap(info->reg_addr - info->reg_offset); + info->reg_addr = NULL; + } +} + +/* Add the specified device instance data structure to the + * global linked list of devices and increment the device count. + */ +static void add_device(struct slgt_info *info) +{ + char *devstr; + + info->next_device = NULL; + info->line = slgt_device_count; + sprintf(info->device_name, "%s%d", tty_dev_prefix, info->line); + + if (info->line < MAX_DEVICES) { + if (maxframe[info->line]) + info->max_frame_size = maxframe[info->line]; + info->dosyncppp = dosyncppp[info->line]; + } + + slgt_device_count++; + + if (!slgt_device_list) + slgt_device_list = info; + else { + struct slgt_info *current_dev = slgt_device_list; + while(current_dev->next_device) + current_dev = current_dev->next_device; + current_dev->next_device = info; + } + + if (info->max_frame_size < 4096) + info->max_frame_size = 4096; + else if (info->max_frame_size > 65535) + info->max_frame_size = 65535; + + switch(info->pdev->device) { + case SYNCLINK_GT_DEVICE_ID: + devstr = "GT"; + break; + case SYNCLINK_GT4_DEVICE_ID: + devstr = "GT4"; + break; + case SYNCLINK_AC_DEVICE_ID: + devstr = "AC"; + info->params.mode = MGSL_MODE_ASYNC; + break; + default: + devstr = "(unknown model)"; + } + printk("SyncLink %s %s IO=%08x IRQ=%d MaxFrameSize=%u\n", + devstr, info->device_name, info->phys_reg_addr, + info->irq_level, info->max_frame_size); + +#ifdef CONFIG_HDLC + hdlcdev_init(info); +#endif +} + +/* + * allocate device instance structure, return NULL on failure + */ +static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev) +{ + struct slgt_info *info; + + info = kmalloc(sizeof(struct slgt_info), GFP_KERNEL); + + if (!info) { + DBGERR(("%s device alloc failed adapter=%d port=%d\n", + driver_name, adapter_num, port_num)); + } else { + memset(info, 0, sizeof(struct slgt_info)); + info->magic = MGSL_MAGIC; + INIT_WORK(&info->task, bh_handler, info); + info->max_frame_size = 4096; + info->raw_rx_size = DMABUFSIZE; + info->close_delay = 5*HZ/10; + info->closing_wait = 30*HZ; + init_waitqueue_head(&info->open_wait); + init_waitqueue_head(&info->close_wait); + init_waitqueue_head(&info->status_event_wait_q); + init_waitqueue_head(&info->event_wait_q); + spin_lock_init(&info->netlock); + memcpy(&info->params,&default_params,sizeof(MGSL_PARAMS)); + info->idle_mode = HDLC_TXIDLE_FLAGS; + info->adapter_num = adapter_num; + info->port_num = port_num; + + init_timer(&info->tx_timer); + info->tx_timer.data = (unsigned long)info; + info->tx_timer.function = tx_timeout; + + init_timer(&info->rx_timer); + info->rx_timer.data = (unsigned long)info; + info->rx_timer.function = rx_timeout; + + /* Copy configuration info to device instance data */ + info->pdev = pdev; + info->irq_level = pdev->irq; + info->phys_reg_addr = pci_resource_start(pdev,0); + + /* veremap works on page boundaries + * map full page starting at the page boundary + */ + info->reg_offset = info->phys_reg_addr & (PAGE_SIZE-1); + info->phys_reg_addr &= ~(PAGE_SIZE-1); + + info->bus_type = MGSL_BUS_TYPE_PCI; + info->irq_flags = SA_SHIRQ; + + info->init_error = -1; /* assume error, set to 0 on successful init */ + } + + return info; +} + +static void device_init(int adapter_num, struct pci_dev *pdev) +{ + struct slgt_info *port_array[SLGT_MAX_PORTS]; + int i; + int port_count = 1; + + if (pdev->device == SYNCLINK_GT4_DEVICE_ID) + port_count = 4; + + /* allocate device instances for all ports */ + for (i=0; i < port_count; ++i) { + port_array[i] = alloc_dev(adapter_num, i, pdev); + if (port_array[i] == NULL) { + for (--i; i >= 0; --i) + kfree(port_array[i]); + return; + } + } + + /* give copy of port_array to all ports and add to device list */ + for (i=0; i < port_count; ++i) { + memcpy(port_array[i]->port_array, port_array, sizeof(port_array)); + add_device(port_array[i]); + port_array[i]->port_count = port_count; + spin_lock_init(&port_array[i]->lock); + } + + /* Allocate and claim adapter resources */ + if (!claim_resources(port_array[0])) { + + alloc_dma_bufs(port_array[0]); + + /* copy resource information from first port to others */ + for (i = 1; i < port_count; ++i) { + port_array[i]->lock = port_array[0]->lock; + port_array[i]->irq_level = port_array[0]->irq_level; + port_array[i]->reg_addr = port_array[0]->reg_addr; + alloc_dma_bufs(port_array[i]); + } + + if (request_irq(port_array[0]->irq_level, + slgt_interrupt, + port_array[0]->irq_flags, + port_array[0]->device_name, + port_array[0]) < 0) { + DBGERR(("%s request_irq failed IRQ=%d\n", + port_array[0]->device_name, + port_array[0]->irq_level)); + } else { + port_array[0]->irq_requested = 1; + adapter_test(port_array[0]); + for (i=1 ; i < port_count ; i++) + port_array[i]->init_error = port_array[0]->init_error; + } + } +} + +static int __devinit init_one(struct pci_dev *dev, + const struct pci_device_id *ent) +{ + if (pci_enable_device(dev)) { + printk("error enabling pci device %p\n", dev); + return -EIO; + } + pci_set_master(dev); + device_init(slgt_device_count, dev); + return 0; +} + +static void __devexit remove_one(struct pci_dev *dev) +{ +} + +static struct tty_operations ops = { + .open = open, + .close = close, + .write = write, + .put_char = put_char, + .flush_chars = flush_chars, + .write_room = write_room, + .chars_in_buffer = chars_in_buffer, + .flush_buffer = flush_buffer, + .ioctl = ioctl, + .throttle = throttle, + .unthrottle = unthrottle, + .send_xchar = send_xchar, + .break_ctl = set_break, + .wait_until_sent = wait_until_sent, + .read_proc = read_proc, + .set_termios = set_termios, + .stop = tx_hold, + .start = tx_release, + .hangup = hangup, + .tiocmget = tiocmget, + .tiocmset = tiocmset, +}; + +static void slgt_cleanup(void) +{ + int rc; + struct slgt_info *info; + struct slgt_info *tmp; + + printk("unload %s %s\n", driver_name, driver_version); + + if (serial_driver) { + if ((rc = tty_unregister_driver(serial_driver))) + DBGERR(("tty_unregister_driver error=%d\n", rc)); + put_tty_driver(serial_driver); + } + + /* reset devices */ + info = slgt_device_list; + while(info) { + reset_port(info); + info = info->next_device; + } + + /* release devices */ + info = slgt_device_list; + while(info) { +#ifdef CONFIG_HDLC + hdlcdev_exit(info); +#endif + free_dma_bufs(info); + free_tmp_rbuf(info); + if (info->port_num == 0) + release_resources(info); + tmp = info; + info = info->next_device; + kfree(tmp); + } + + if (pci_registered) + pci_unregister_driver(&pci_driver); +} + +/* + * Driver initialization entry point. + */ +static int __init slgt_init(void) +{ + int rc; + + printk("%s %s\n", driver_name, driver_version); + + slgt_device_count = 0; + if ((rc = pci_register_driver(&pci_driver)) < 0) { + printk("%s pci_register_driver error=%d\n", driver_name, rc); + return rc; + } + pci_registered = 1; + + if (!slgt_device_list) { + printk("%s no devices found\n",driver_name); + return -ENODEV; + } + + serial_driver = alloc_tty_driver(MAX_DEVICES); + if (!serial_driver) { + rc = -ENOMEM; + goto error; + } + + /* Initialize the tty_driver structure */ + + serial_driver->owner = THIS_MODULE; + serial_driver->driver_name = tty_driver_name; + serial_driver->name = tty_dev_prefix; + serial_driver->major = ttymajor; + serial_driver->minor_start = 64; + serial_driver->type = TTY_DRIVER_TYPE_SERIAL; + serial_driver->subtype = SERIAL_TYPE_NORMAL; + serial_driver->init_termios = tty_std_termios; + serial_driver->init_termios.c_cflag = + B9600 | CS8 | CREAD | HUPCL | CLOCAL; + serial_driver->flags = TTY_DRIVER_REAL_RAW; + tty_set_operations(serial_driver, &ops); + if ((rc = tty_register_driver(serial_driver)) < 0) { + DBGERR(("%s can't register serial driver\n", driver_name)); + put_tty_driver(serial_driver); + serial_driver = NULL; + goto error; + } + + printk("%s %s, tty major#%d\n", + driver_name, driver_version, + serial_driver->major); + + return 0; + +error: + slgt_cleanup(); + return rc; +} + +static void __exit slgt_exit(void) +{ + slgt_cleanup(); +} + +module_init(slgt_init); +module_exit(slgt_exit); + +/* + * register access routines + */ + +#define CALC_REGADDR() \ + unsigned long reg_addr = ((unsigned long)info->reg_addr) + addr; \ + if (addr >= 0x80) \ + reg_addr += (info->port_num) * 32; + +static __u8 rd_reg8(struct slgt_info *info, unsigned int addr) +{ + CALC_REGADDR(); + return readb((void __iomem *)reg_addr); +} + +static void wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value) +{ + CALC_REGADDR(); + writeb(value, (void __iomem *)reg_addr); +} + +static __u16 rd_reg16(struct slgt_info *info, unsigned int addr) +{ + CALC_REGADDR(); + return readw((void __iomem *)reg_addr); +} + +static void wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value) +{ + CALC_REGADDR(); + writew(value, (void __iomem *)reg_addr); +} + +static __u32 rd_reg32(struct slgt_info *info, unsigned int addr) +{ + CALC_REGADDR(); + return readl((void __iomem *)reg_addr); +} + +static void wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value) +{ + CALC_REGADDR(); + writel(value, (void __iomem *)reg_addr); +} + +static void rdma_reset(struct slgt_info *info) +{ + unsigned int i; + + /* set reset bit */ + wr_reg32(info, RDCSR, BIT1); + + /* wait for enable bit cleared */ + for(i=0 ; i < 1000 ; i++) + if (!(rd_reg32(info, RDCSR) & BIT0)) + break; +} + +static void tdma_reset(struct slgt_info *info) +{ + unsigned int i; + + /* set reset bit */ + wr_reg32(info, TDCSR, BIT1); + + /* wait for enable bit cleared */ + for(i=0 ; i < 1000 ; i++) + if (!(rd_reg32(info, TDCSR) & BIT0)) + break; +} + +/* + * enable internal loopback + * TxCLK and RxCLK are generated from BRG + * and TxD is looped back to RxD internally. + */ +static void enable_loopback(struct slgt_info *info) +{ + /* SCR (serial control) BIT2=looopback enable */ + wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT2)); + + if (info->params.mode != MGSL_MODE_ASYNC) { + /* CCR (clock control) + * 07..05 tx clock source (010 = BRG) + * 04..02 rx clock source (010 = BRG) + * 01 auxclk enable (0 = disable) + * 00 BRG enable (1 = enable) + * + * 0100 1001 + */ + wr_reg8(info, CCR, 0x49); + + /* set speed if available, otherwise use default */ + if (info->params.clock_speed) + set_rate(info, info->params.clock_speed); + else + set_rate(info, 3686400); + } +} + +/* + * set baud rate generator to specified rate + */ +static void set_rate(struct slgt_info *info, u32 rate) +{ + unsigned int div; + static unsigned int osc = 14745600; + + /* div = osc/rate - 1 + * + * Round div up if osc/rate is not integer to + * force to next slowest rate. + */ + + if (rate) { + div = osc/rate; + if (!(osc % rate) && div) + div--; + wr_reg16(info, BDR, (unsigned short)div); + } +} + +static void rx_stop(struct slgt_info *info) +{ + unsigned short val; + + /* disable and reset receiver */ + val = rd_reg16(info, RCR) & ~BIT1; /* clear enable bit */ + wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, RCR, val); /* clear reset bit */ + + slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA + IRQ_RXIDLE); + + /* clear pending rx interrupts */ + wr_reg16(info, SSR, IRQ_RXIDLE + IRQ_RXOVER); + + rdma_reset(info); + + info->rx_enabled = 0; + info->rx_restart = 0; +} + +static void rx_start(struct slgt_info *info) +{ + unsigned short val; + + slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA); + + /* clear pending rx overrun IRQ */ + wr_reg16(info, SSR, IRQ_RXOVER); + + /* reset and disable receiver */ + val = rd_reg16(info, RCR) & ~BIT1; /* clear enable bit */ + wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, RCR, val); /* clear reset bit */ + + rdma_reset(info); + reset_rbufs(info); + + /* set 1st descriptor address */ + wr_reg32(info, RDDAR, info->rbufs[0].pdesc); + + if (info->params.mode != MGSL_MODE_ASYNC) { + /* enable rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT2 + BIT0)); + } else { + /* enable saving of rx status, rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0)); + } + + slgt_irq_on(info, IRQ_RXOVER); + + /* enable receiver */ + wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | BIT1)); + + info->rx_restart = 0; + info->rx_enabled = 1; +} + +static void tx_start(struct slgt_info *info) +{ + if (!info->tx_enabled) { + wr_reg16(info, TCR, + (unsigned short)(rd_reg16(info, TCR) | BIT1)); + info->tx_enabled = TRUE; + } + + if (info->tx_count) { + info->drop_rts_on_tx_done = 0; + + if (info->params.mode != MGSL_MODE_ASYNC) { + if (info->params.flags & HDLC_FLAG_AUTO_RTS) { + get_signals(info); + if (!(info->signals & SerialSignal_RTS)) { + info->signals |= SerialSignal_RTS; + set_signals(info); + info->drop_rts_on_tx_done = 1; + } + } + + slgt_irq_off(info, IRQ_TXDATA); + slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE); + /* clear tx idle and underrun status bits */ + wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER)); + + if (!(rd_reg32(info, TDCSR) & BIT0)) { + /* tx DMA stopped, restart tx DMA */ + tdma_reset(info); + /* set 1st descriptor address */ + wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc); + if (info->params.mode == MGSL_MODE_RAW) + wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */ + else + wr_reg32(info, TDCSR, BIT0); /* DMA enable */ + } + + if (info->params.mode != MGSL_MODE_RAW) { + info->tx_timer.expires = jiffies + msecs_to_jiffies(5000); + add_timer(&info->tx_timer); + } + } else { + tdma_reset(info); + /* set 1st descriptor address */ + wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc); + + slgt_irq_off(info, IRQ_TXDATA); + slgt_irq_on(info, IRQ_TXIDLE); + /* clear tx idle status bit */ + wr_reg16(info, SSR, IRQ_TXIDLE); + + /* enable tx DMA */ + wr_reg32(info, TDCSR, BIT0); + } + + info->tx_active = 1; + } +} + +static void tx_stop(struct slgt_info *info) +{ + unsigned short val; + + del_timer(&info->tx_timer); + + tdma_reset(info); + + /* reset and disable transmitter */ + val = rd_reg16(info, TCR) & ~BIT1; /* clear enable bit */ + wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, TCR, val); /* clear reset */ + + slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER); + + /* clear tx idle and underrun status bit */ + wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER)); + + reset_tbufs(info); + + info->tx_enabled = 0; + info->tx_active = 0; +} + +static void reset_port(struct slgt_info *info) +{ + if (!info->reg_addr) + return; + + tx_stop(info); + rx_stop(info); + + info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS); + set_signals(info); + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); +} + +static void reset_adapter(struct slgt_info *info) +{ + int i; + for (i=0; i < info->port_count; ++i) { + if (info->port_array[i]) + reset_port(info->port_array[i]); + } +} + +static void async_mode(struct slgt_info *info) +{ + unsigned short val; + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); + tx_stop(info); + rx_stop(info); + + /* TCR (tx control) + * + * 15..13 mode, 010=async + * 12..10 encoding, 000=NRZ + * 09 parity enable + * 08 1=odd parity, 0=even parity + * 07 1=RTS driver control + * 06 1=break enable + * 05..04 character length + * 00=5 bits + * 01=6 bits + * 10=7 bits + * 11=8 bits + * 03 0=1 stop bit, 1=2 stop bits + * 02 reset + * 01 enable + * 00 auto-CTS enable + */ + val = 0x4000; + + if (info->if_mode & MGSL_INTERFACE_RTS_EN) + val |= BIT7; + + if (info->params.parity != ASYNC_PARITY_NONE) { + val |= BIT9; + if (info->params.parity == ASYNC_PARITY_ODD) + val |= BIT8; + } + + switch (info->params.data_bits) + { + case 6: val |= BIT4; break; + case 7: val |= BIT5; break; + case 8: val |= BIT5 + BIT4; break; + } + + if (info->params.stop_bits != 1) + val |= BIT3; + + if (info->params.flags & HDLC_FLAG_AUTO_CTS) + val |= BIT0; + + wr_reg16(info, TCR, val); + + /* RCR (rx control) + * + * 15..13 mode, 010=async + * 12..10 encoding, 000=NRZ + * 09 parity enable + * 08 1=odd parity, 0=even parity + * 07..06 reserved, must be 0 + * 05..04 character length + * 00=5 bits + * 01=6 bits + * 10=7 bits + * 11=8 bits + * 03 reserved, must be zero + * 02 reset + * 01 enable + * 00 auto-DCD enable + */ + val = 0x4000; + + if (info->params.parity != ASYNC_PARITY_NONE) { + val |= BIT9; + if (info->params.parity == ASYNC_PARITY_ODD) + val |= BIT8; + } + + switch (info->params.data_bits) + { + case 6: val |= BIT4; break; + case 7: val |= BIT5; break; + case 8: val |= BIT5 + BIT4; break; + } + + if (info->params.flags & HDLC_FLAG_AUTO_DCD) + val |= BIT0; + + wr_reg16(info, RCR, val); + + /* CCR (clock control) + * + * 07..05 011 = tx clock source is BRG/16 + * 04..02 010 = rx clock source is BRG + * 01 0 = auxclk disabled + * 00 1 = BRG enabled + * + * 0110 1001 + */ + wr_reg8(info, CCR, 0x69); + + msc_set_vcr(info); + + tx_set_idle(info); + + /* SCR (serial control) + * + * 15 1=tx req on FIFO half empty + * 14 1=rx req on FIFO half full + * 13 tx data IRQ enable + * 12 tx idle IRQ enable + * 11 rx break on IRQ enable + * 10 rx data IRQ enable + * 09 rx break off IRQ enable + * 08 overrun IRQ enable + * 07 DSR IRQ enable + * 06 CTS IRQ enable + * 05 DCD IRQ enable + * 04 RI IRQ enable + * 03 reserved, must be zero + * 02 1=txd->rxd internal loopback enable + * 01 reserved, must be zero + * 00 1=master IRQ enable + */ + val = BIT15 + BIT14 + BIT0; + wr_reg16(info, SCR, val); + + slgt_irq_on(info, IRQ_RXBREAK | IRQ_RXOVER); + + set_rate(info, info->params.data_rate * 16); + + if (info->params.loopback) + enable_loopback(info); +} + +static void hdlc_mode(struct slgt_info *info) +{ + unsigned short val; + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); + tx_stop(info); + rx_stop(info); + + /* TCR (tx control) + * + * 15..13 mode, 000=HDLC 001=raw sync + * 12..10 encoding + * 09 CRC enable + * 08 CRC32 + * 07 1=RTS driver control + * 06 preamble enable + * 05..04 preamble length + * 03 share open/close flag + * 02 reset + * 01 enable + * 00 auto-CTS enable + */ + val = 0; + + if (info->params.mode == MGSL_MODE_RAW) + val |= BIT13; + if (info->if_mode & MGSL_INTERFACE_RTS_EN) + val |= BIT7; + + switch(info->params.encoding) + { + case HDLC_ENCODING_NRZB: val |= BIT10; break; + case HDLC_ENCODING_NRZI_MARK: val |= BIT11; break; + case HDLC_ENCODING_NRZI: val |= BIT11 + BIT10; break; + case HDLC_ENCODING_BIPHASE_MARK: val |= BIT12; break; + case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break; + case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break; + case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break; + } + + switch (info->params.crc_type) + { + case HDLC_CRC_16_CCITT: val |= BIT9; break; + case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break; + } + + if (info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE) + val |= BIT6; + + switch (info->params.preamble_length) + { + case HDLC_PREAMBLE_LENGTH_16BITS: val |= BIT5; break; + case HDLC_PREAMBLE_LENGTH_32BITS: val |= BIT4; break; + case HDLC_PREAMBLE_LENGTH_64BITS: val |= BIT5 + BIT4; break; + } + + if (info->params.flags & HDLC_FLAG_AUTO_CTS) + val |= BIT0; + + wr_reg16(info, TCR, val); + + /* TPR (transmit preamble) */ + + switch (info->params.preamble) + { + case HDLC_PREAMBLE_PATTERN_FLAGS: val = 0x7e; break; + case HDLC_PREAMBLE_PATTERN_ONES: val = 0xff; break; + case HDLC_PREAMBLE_PATTERN_ZEROS: val = 0x00; break; + case HDLC_PREAMBLE_PATTERN_10: val = 0x55; break; + case HDLC_PREAMBLE_PATTERN_01: val = 0xaa; break; + default: val = 0x7e; break; + } + wr_reg8(info, TPR, (unsigned char)val); + + /* RCR (rx control) + * + * 15..13 mode, 000=HDLC 001=raw sync + * 12..10 encoding + * 09 CRC enable + * 08 CRC32 + * 07..03 reserved, must be 0 + * 02 reset + * 01 enable + * 00 auto-DCD enable + */ + val = 0; + + if (info->params.mode == MGSL_MODE_RAW) + val |= BIT13; + + switch(info->params.encoding) + { + case HDLC_ENCODING_NRZB: val |= BIT10; break; + case HDLC_ENCODING_NRZI_MARK: val |= BIT11; break; + case HDLC_ENCODING_NRZI: val |= BIT11 + BIT10; break; + case HDLC_ENCODING_BIPHASE_MARK: val |= BIT12; break; + case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break; + case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break; + case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break; + } + + switch (info->params.crc_type) + { + case HDLC_CRC_16_CCITT: val |= BIT9; break; + case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break; + } + + if (info->params.flags & HDLC_FLAG_AUTO_DCD) + val |= BIT0; + + wr_reg16(info, RCR, val); + + /* CCR (clock control) + * + * 07..05 tx clock source + * 04..02 rx clock source + * 01 auxclk enable + * 00 BRG enable + */ + val = 0; + + if (info->params.flags & HDLC_FLAG_TXC_BRG) + { + // when RxC source is DPLL, BRG generates 16X DPLL + // reference clock, so take TxC from BRG/16 to get + // transmit clock at actual data rate + if (info->params.flags & HDLC_FLAG_RXC_DPLL) + val |= BIT6 + BIT5; /* 011, txclk = BRG/16 */ + else + val |= BIT6; /* 010, txclk = BRG */ + } + else if (info->params.flags & HDLC_FLAG_TXC_DPLL) + val |= BIT7; /* 100, txclk = DPLL Input */ + else if (info->params.flags & HDLC_FLAG_TXC_RXCPIN) + val |= BIT5; /* 001, txclk = RXC Input */ + + if (info->params.flags & HDLC_FLAG_RXC_BRG) + val |= BIT3; /* 010, rxclk = BRG */ + else if (info->params.flags & HDLC_FLAG_RXC_DPLL) + val |= BIT4; /* 100, rxclk = DPLL */ + else if (info->params.flags & HDLC_FLAG_RXC_TXCPIN) + val |= BIT2; /* 001, rxclk = TXC Input */ + + if (info->params.clock_speed) + val |= BIT1 + BIT0; + + wr_reg8(info, CCR, (unsigned char)val); + + if (info->params.flags & (HDLC_FLAG_TXC_DPLL + HDLC_FLAG_RXC_DPLL)) + { + // program DPLL mode + switch(info->params.encoding) + { + case HDLC_ENCODING_BIPHASE_MARK: + case HDLC_ENCODING_BIPHASE_SPACE: + val = BIT7; break; + case HDLC_ENCODING_BIPHASE_LEVEL: + case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: + val = BIT7 + BIT6; break; + default: val = BIT6; // NRZ encodings + } + wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | val)); + + // DPLL requires a 16X reference clock from BRG + set_rate(info, info->params.clock_speed * 16); + } + else + set_rate(info, info->params.clock_speed); + + tx_set_idle(info); + + msc_set_vcr(info); + + /* SCR (serial control) + * + * 15 1=tx req on FIFO half empty + * 14 1=rx req on FIFO half full + * 13 tx data IRQ enable + * 12 tx idle IRQ enable + * 11 underrun IRQ enable + * 10 rx data IRQ enable + * 09 rx idle IRQ enable + * 08 overrun IRQ enable + * 07 DSR IRQ enable + * 06 CTS IRQ enable + * 05 DCD IRQ enable + * 04 RI IRQ enable + * 03 reserved, must be zero + * 02 1=txd->rxd internal loopback enable + * 01 reserved, must be zero + * 00 1=master IRQ enable + */ + wr_reg16(info, SCR, BIT15 + BIT14 + BIT0); + + if (info->params.loopback) + enable_loopback(info); +} + +/* + * set transmit idle mode + */ +static void tx_set_idle(struct slgt_info *info) +{ + unsigned char val = 0xff; + + switch(info->idle_mode) + { + case HDLC_TXIDLE_FLAGS: val = 0x7e; break; + case HDLC_TXIDLE_ALT_ZEROS_ONES: val = 0xaa; break; + case HDLC_TXIDLE_ZEROS: val = 0x00; break; + case HDLC_TXIDLE_ONES: val = 0xff; break; + case HDLC_TXIDLE_ALT_MARK_SPACE: val = 0xaa; break; + case HDLC_TXIDLE_SPACE: val = 0x00; break; + case HDLC_TXIDLE_MARK: val = 0xff; break; + } + + wr_reg8(info, TIR, val); +} + +/* + * get state of V24 status (input) signals + */ +static void get_signals(struct slgt_info *info) +{ + unsigned short status = rd_reg16(info, SSR); + + /* clear all serial signals except DTR and RTS */ + info->signals &= SerialSignal_DTR + SerialSignal_RTS; + + if (status & BIT3) + info->signals |= SerialSignal_DSR; + if (status & BIT2) + info->signals |= SerialSignal_CTS; + if (status & BIT1) + info->signals |= SerialSignal_DCD; + if (status & BIT0) + info->signals |= SerialSignal_RI; +} + +/* + * set V.24 Control Register based on current configuration + */ +static void msc_set_vcr(struct slgt_info *info) +{ + unsigned char val = 0; + + /* VCR (V.24 control) + * + * 07..04 serial IF select + * 03 DTR + * 02 RTS + * 01 LL + * 00 RL + */ + + switch(info->if_mode & MGSL_INTERFACE_MASK) + { + case MGSL_INTERFACE_RS232: + val |= BIT5; /* 0010 */ + break; + case MGSL_INTERFACE_V35: + val |= BIT7 + BIT6 + BIT5; /* 1110 */ + break; + case MGSL_INTERFACE_RS422: + val |= BIT6; /* 0100 */ + break; + } + + if (info->signals & SerialSignal_DTR) + val |= BIT3; + if (info->signals & SerialSignal_RTS) + val |= BIT2; + if (info->if_mode & MGSL_INTERFACE_LL) + val |= BIT1; + if (info->if_mode & MGSL_INTERFACE_RL) + val |= BIT0; + wr_reg8(info, VCR, val); +} + +/* + * set state of V24 control (output) signals + */ +static void set_signals(struct slgt_info *info) +{ + unsigned char val = rd_reg8(info, VCR); + if (info->signals & SerialSignal_DTR) + val |= BIT3; + else + val &= ~BIT3; + if (info->signals & SerialSignal_RTS) + val |= BIT2; + else + val &= ~BIT2; + wr_reg8(info, VCR, val); +} + +/* + * free range of receive DMA buffers (i to last) + */ +static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last) +{ + int done = 0; + + while(!done) { + /* reset current buffer for reuse */ + info->rbufs[i].status = 0; + if (info->params.mode == MGSL_MODE_RAW) + set_desc_count(info->rbufs[i], info->raw_rx_size); + else + set_desc_count(info->rbufs[i], DMABUFSIZE); + + if (i == last) + done = 1; + if (++i == info->rbuf_count) + i = 0; + } + info->rbuf_current = i; +} + +/* + * mark all receive DMA buffers as free + */ +static void reset_rbufs(struct slgt_info *info) +{ + free_rbufs(info, 0, info->rbuf_count - 1); +} + +/* + * pass receive HDLC frame to upper layer + * + * return 1 if frame available, otherwise 0 + */ +static int rx_get_frame(struct slgt_info *info) +{ + unsigned int start, end; + unsigned short status; + unsigned int framesize = 0; + int rc = 0; + unsigned long flags; + struct tty_struct *tty = info->tty; + unsigned char addr_field = 0xff; + +check_again: + + framesize = 0; + addr_field = 0xff; + start = end = info->rbuf_current; + + for (;;) { + if (!desc_complete(info->rbufs[end])) + goto cleanup; + + if (framesize == 0 && info->params.addr_filter != 0xff) + addr_field = info->rbufs[end].buf[0]; + + framesize += desc_count(info->rbufs[end]); + + if (desc_eof(info->rbufs[end])) + break; + + if (++end == info->rbuf_count) + end = 0; + + if (end == info->rbuf_current) { + if (info->rx_enabled){ + spin_lock_irqsave(&info->lock,flags); + rx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + } + goto cleanup; + } + } + + /* status + * + * 15 buffer complete + * 14..06 reserved + * 05..04 residue + * 02 eof (end of frame) + * 01 CRC error + * 00 abort + */ + status = desc_status(info->rbufs[end]); + + /* ignore CRC bit if not using CRC (bit is undefined) */ + if (info->params.crc_type == HDLC_CRC_NONE) + status &= ~BIT1; + + if (framesize == 0 || + (addr_field != 0xff && addr_field != info->params.addr_filter)) { + free_rbufs(info, start, end); + goto check_again; + } + + if (framesize < 2 || status & (BIT1+BIT0)) { + if (framesize < 2 || (status & BIT0)) + info->icount.rxshort++; + else + info->icount.rxcrc++; + framesize = 0; + +#ifdef CONFIG_HDLC + { + struct net_device_stats *stats = hdlc_stats(info->netdev); + stats->rx_errors++; + stats->rx_frame_errors++; + } +#endif + } else { + /* adjust frame size for CRC, if any */ + if (info->params.crc_type == HDLC_CRC_16_CCITT) + framesize -= 2; + else if (info->params.crc_type == HDLC_CRC_32_CCITT) + framesize -= 4; + } + + DBGBH(("%s rx frame status=%04X size=%d\n", + info->device_name, status, framesize)); + DBGDATA(info, info->rbufs[start].buf, min_t(int, framesize, DMABUFSIZE), "rx"); + + if (framesize) { + if (framesize > info->max_frame_size) + info->icount.rxlong++; + else { + /* copy dma buffer(s) to contiguous temp buffer */ + int copy_count = framesize; + int i = start; + unsigned char *p = info->tmp_rbuf; + info->tmp_rbuf_count = framesize; + + info->icount.rxok++; + + while(copy_count) { + int partial_count = min(copy_count, DMABUFSIZE); + memcpy(p, info->rbufs[i].buf, partial_count); + p += partial_count; + copy_count -= partial_count; + if (++i == info->rbuf_count) + i = 0; + } + +#ifdef CONFIG_HDLC + if (info->netcount) + hdlcdev_rx(info,info->tmp_rbuf, framesize); + else +#endif + ldisc_receive_buf(tty, info->tmp_rbuf, info->flag_buf, framesize); + } + } + free_rbufs(info, start, end); + rc = 1; + +cleanup: + return rc; +} + +/* + * pass receive buffer (RAW synchronous mode) to tty layer + * return 1 if buffer available, otherwise 0 + */ +static int rx_get_buf(struct slgt_info *info) +{ + unsigned int i = info->rbuf_current; + + if (!desc_complete(info->rbufs[i])) + return 0; + DBGDATA(info, info->rbufs[i].buf, desc_count(info->rbufs[i]), "rx"); + DBGINFO(("rx_get_buf size=%d\n", desc_count(info->rbufs[i]))); + ldisc_receive_buf(info->tty, info->rbufs[i].buf, + info->flag_buf, desc_count(info->rbufs[i])); + free_rbufs(info, i, i); + return 1; +} + +static void reset_tbufs(struct slgt_info *info) +{ + unsigned int i; + info->tbuf_current = 0; + for (i=0 ; i < info->tbuf_count ; i++) { + info->tbufs[i].status = 0; + info->tbufs[i].count = 0; + } +} + +/* + * return number of free transmit DMA buffers + */ +static unsigned int free_tbuf_count(struct slgt_info *info) +{ + unsigned int count = 0; + unsigned int i = info->tbuf_current; + + do + { + if (desc_count(info->tbufs[i])) + break; /* buffer in use */ + ++count; + if (++i == info->tbuf_count) + i=0; + } while (i != info->tbuf_current); + + /* last buffer with zero count may be in use, assume it is */ + if (count) + --count; + + return count; +} + +/* + * load transmit DMA buffer(s) with data + */ +static void tx_load(struct slgt_info *info, const char *buf, unsigned int size) +{ + unsigned short count; + unsigned int i; + struct slgt_desc *d; + + if (size == 0) + return; + + DBGDATA(info, buf, size, "tx"); + + info->tbuf_start = i = info->tbuf_current; + + while (size) { + d = &info->tbufs[i]; + if (++i == info->tbuf_count) + i = 0; + + count = (unsigned short)((size > DMABUFSIZE) ? DMABUFSIZE : size); + memcpy(d->buf, buf, count); + + size -= count; + buf += count; + + if (!size && info->params.mode != MGSL_MODE_RAW) + set_desc_eof(*d, 1); /* HDLC: set EOF of last desc */ + else + set_desc_eof(*d, 0); + + set_desc_count(*d, count); + } + + info->tbuf_current = i; +} + +static int register_test(struct slgt_info *info) +{ + static unsigned short patterns[] = + {0x0000, 0xffff, 0xaaaa, 0x5555, 0x6969, 0x9696}; + static unsigned int count = sizeof(patterns)/sizeof(patterns[0]); + unsigned int i; + int rc = 0; + + for (i=0 ; i < count ; i++) { + wr_reg16(info, TIR, patterns[i]); + wr_reg16(info, BDR, patterns[(i+1)%count]); + if ((rd_reg16(info, TIR) != patterns[i]) || + (rd_reg16(info, BDR) != patterns[(i+1)%count])) { + rc = -ENODEV; + break; + } + } + + info->init_error = rc ? 0 : DiagStatus_AddressFailure; + return rc; +} + +static int irq_test(struct slgt_info *info) +{ + unsigned long timeout; + unsigned long flags; + struct tty_struct *oldtty = info->tty; + u32 speed = info->params.data_rate; + + info->params.data_rate = 921600; + info->tty = NULL; + + spin_lock_irqsave(&info->lock, flags); + async_mode(info); + slgt_irq_on(info, IRQ_TXIDLE); + + /* enable transmitter */ + wr_reg16(info, TCR, + (unsigned short)(rd_reg16(info, TCR) | BIT1)); + + /* write one byte and wait for tx idle */ + wr_reg16(info, TDR, 0); + + /* assume failure */ + info->init_error = DiagStatus_IrqFailure; + info->irq_occurred = FALSE; + + spin_unlock_irqrestore(&info->lock, flags); + + timeout=100; + while(timeout-- && !info->irq_occurred) + msleep_interruptible(10); + + spin_lock_irqsave(&info->lock,flags); + reset_port(info); + spin_unlock_irqrestore(&info->lock,flags); + + info->params.data_rate = speed; + info->tty = oldtty; + + info->init_error = info->irq_occurred ? 0 : DiagStatus_IrqFailure; + return info->irq_occurred ? 0 : -ENODEV; +} + +static int loopback_test_rx(struct slgt_info *info) +{ + unsigned char *src, *dest; + int count; + + if (desc_complete(info->rbufs[0])) { + count = desc_count(info->rbufs[0]); + src = info->rbufs[0].buf; + dest = info->tmp_rbuf; + + for( ; count ; count-=2, src+=2) { + /* src=data byte (src+1)=status byte */ + if (!(*(src+1) & (BIT9 + BIT8))) { + *dest = *src; + dest++; + info->tmp_rbuf_count++; + } + } + DBGDATA(info, info->tmp_rbuf, info->tmp_rbuf_count, "rx"); + return 1; + } + return 0; +} + +static int loopback_test(struct slgt_info *info) +{ +#define TESTFRAMESIZE 20 + + unsigned long timeout; + u16 count = TESTFRAMESIZE; + unsigned char buf[TESTFRAMESIZE]; + int rc = -ENODEV; + unsigned long flags; + + struct tty_struct *oldtty = info->tty; + MGSL_PARAMS params; + + memcpy(¶ms, &info->params, sizeof(params)); + + info->params.mode = MGSL_MODE_ASYNC; + info->params.data_rate = 921600; + info->params.loopback = 1; + info->tty = NULL; + + /* build and send transmit frame */ + for (count = 0; count < TESTFRAMESIZE; ++count) + buf[count] = (unsigned char)count; + + info->tmp_rbuf_count = 0; + memset(info->tmp_rbuf, 0, TESTFRAMESIZE); + + /* program hardware for HDLC and enabled receiver */ + spin_lock_irqsave(&info->lock,flags); + async_mode(info); + rx_start(info); + info->tx_count = count; + tx_load(info, buf, count); + tx_start(info); + spin_unlock_irqrestore(&info->lock, flags); + + /* wait for receive complete */ + for (timeout = 100; timeout; --timeout) { + msleep_interruptible(10); + if (loopback_test_rx(info)) { + rc = 0; + break; + } + } + + /* verify received frame length and contents */ + if (!rc && (info->tmp_rbuf_count != count || + memcmp(buf, info->tmp_rbuf, count))) { + rc = -ENODEV; + } + + spin_lock_irqsave(&info->lock,flags); + reset_adapter(info); + spin_unlock_irqrestore(&info->lock,flags); + + memcpy(&info->params, ¶ms, sizeof(info->params)); + info->tty = oldtty; + + info->init_error = rc ? DiagStatus_DmaFailure : 0; + return rc; +} + +static int adapter_test(struct slgt_info *info) +{ + DBGINFO(("testing %s\n", info->device_name)); + if ((info->init_error = register_test(info)) < 0) { + printk("register test failure %s addr=%08X\n", + info->device_name, info->phys_reg_addr); + } else if ((info->init_error = irq_test(info)) < 0) { + printk("IRQ test failure %s IRQ=%d\n", + info->device_name, info->irq_level); + } else if ((info->init_error = loopback_test(info)) < 0) { + printk("loopback test failure %s\n", info->device_name); + } + return info->init_error; +} + +/* + * transmit timeout handler + */ +static void tx_timeout(unsigned long context) +{ + struct slgt_info *info = (struct slgt_info*)context; + unsigned long flags; + + DBGINFO(("%s tx_timeout\n", info->device_name)); + if(info->tx_active && info->params.mode == MGSL_MODE_HDLC) { + info->icount.txtimeout++; + } + spin_lock_irqsave(&info->lock,flags); + info->tx_active = 0; + info->tx_count = 0; + spin_unlock_irqrestore(&info->lock,flags); + +#ifdef CONFIG_HDLC + if (info->netcount) + hdlcdev_tx_done(info); + else +#endif + bh_transmit(info); +} + +/* + * receive buffer polling timer + */ +static void rx_timeout(unsigned long context) +{ + struct slgt_info *info = (struct slgt_info*)context; + unsigned long flags; + + DBGINFO(("%s rx_timeout\n", info->device_name)); + spin_lock_irqsave(&info->lock, flags); + info->pending_bh |= BH_RECEIVE; + spin_unlock_irqrestore(&info->lock, flags); + bh_handler(info); +} + diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 2392e404e8d1..ba4582d160fd 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -2,6 +2,9 @@ # Makefile for the kernel tpm device drivers. # obj-$(CONFIG_TCG_TPM) += tpm.o +ifdef CONFIG_ACPI + obj-$(CONFIG_TCG_TPM) += tpm_bios.o +endif obj-$(CONFIG_TCG_NSC) += tpm_nsc.o obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index a9be0e8eaea5..5a3870477ef1 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -466,6 +466,7 @@ void tpm_remove_hardware(struct device *dev) kfree(chip->vendor->miscdev.name); sysfs_remove_group(&dev->kobj, chip->vendor->attr_group); + tpm_bios_log_teardown(chip->bios_dir); dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &= ~(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES)); @@ -593,6 +594,8 @@ dev_num_search_complete: sysfs_create_group(&dev->kobj, chip->vendor->attr_group); + chip->bios_dir = tpm_bios_log_setup(devname); + return 0; } EXPORT_SYMBOL_GPL(tpm_register_hardware); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 159882ca69dd..fd3a4beaa53d 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -82,6 +82,8 @@ struct tpm_chip { struct tpm_vendor_specific *vendor; + struct dentry **bios_dir; + struct list_head list; }; @@ -107,3 +109,16 @@ extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *); extern void tpm_remove_hardware(struct device *); extern int tpm_pm_suspend(struct device *, pm_message_t); extern int tpm_pm_resume(struct device *); + +#ifdef CONFIG_ACPI +extern struct dentry ** tpm_bios_log_setup(char *); +extern void tpm_bios_log_teardown(struct dentry **); +#else +static inline struct dentry* tpm_bios_log_setup(char *name) +{ + return NULL; +} +static inline void tpm_bios_log_teardown(struct dentry **dir) +{ +} +#endif diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c new file mode 100644 index 000000000000..aedf7a8e6da7 --- /dev/null +++ b/drivers/char/tpm/tpm_bios.c @@ -0,0 +1,540 @@ +/* + * Copyright (C) 2005 IBM Corporation + * + * Authors: + * Seiji Munetoh <munetoh@jp.ibm.com> + * Stefan Berger <stefanb@us.ibm.com> + * Reiner Sailer <sailer@watson.ibm.com> + * Kylene Hall <kjhall@us.ibm.com> + * + * Access to the eventlog extended by the TCG BIOS of PC platform + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/seq_file.h> +#include <linux/fs.h> +#include <linux/security.h> +#include <linux/module.h> +#include <acpi/acpi.h> +#include <acpi/actypes.h> +#include <acpi/actbl.h> +#include "tpm.h" + +#define TCG_EVENT_NAME_LEN_MAX 255 +#define MAX_TEXT_EVENT 1000 /* Max event string length */ +#define ACPI_TCPA_SIG "TCPA" /* 0x41504354 /'TCPA' */ + +struct tpm_bios_log { + void *bios_event_log; + void *bios_event_log_end; +}; + +struct acpi_tcpa { + struct acpi_table_header hdr; + u16 reserved; + u32 log_max_len __attribute__ ((packed)); + u32 log_start_addr __attribute__ ((packed)); +}; + +struct tcpa_event { + u32 pcr_index; + u32 event_type; + u8 pcr_value[20]; /* SHA1 */ + u32 event_size; + u8 event_data[0]; +}; + +enum tcpa_event_types { + PREBOOT = 0, + POST_CODE, + UNUSED, + NO_ACTION, + SEPARATOR, + ACTION, + EVENT_TAG, + SCRTM_CONTENTS, + SCRTM_VERSION, + CPU_MICROCODE, + PLATFORM_CONFIG_FLAGS, + TABLE_OF_DEVICES, + COMPACT_HASH, + IPL, + IPL_PARTITION_DATA, + NONHOST_CODE, + NONHOST_CONFIG, + NONHOST_INFO, +}; + +static const char* tcpa_event_type_strings[] = { + "PREBOOT", + "POST CODE", + "", + "NO ACTION", + "SEPARATOR", + "ACTION", + "EVENT TAG", + "S-CRTM Contents", + "S-CRTM Version", + "CPU Microcode", + "Platform Config Flags", + "Table of Devices", + "Compact Hash", + "IPL", + "IPL Partition Data", + "Non-Host Code", + "Non-Host Config", + "Non-Host Info" +}; + +enum tcpa_pc_event_ids { + SMBIOS = 1, + BIS_CERT, + POST_BIOS_ROM, + ESCD, + CMOS, + NVRAM, + OPTION_ROM_EXEC, + OPTION_ROM_CONFIG, + OPTION_ROM_MICROCODE, + S_CRTM_VERSION, + S_CRTM_CONTENTS, + POST_CONTENTS, +}; + +static const char* tcpa_pc_event_id_strings[] = { + "" + "SMBIOS", + "BIS Certificate", + "POST BIOS ", + "ESCD ", + "CMOS", + "NVRAM", + "Option ROM", + "Option ROM config", + "Option ROM microcode", + "S-CRTM Version", + "S-CRTM Contents", + "S-CRTM POST Contents", +}; + +/* returns pointer to start of pos. entry of tcg log */ +static void *tpm_bios_measurements_start(struct seq_file *m, loff_t *pos) +{ + loff_t i; + struct tpm_bios_log *log = m->private; + void *addr = log->bios_event_log; + void *limit = log->bios_event_log_end; + struct tcpa_event *event; + + /* read over *pos measurements */ + for (i = 0; i < *pos; i++) { + event = addr; + + if ((addr + sizeof(struct tcpa_event)) < limit) { + if (event->event_type == 0 && event->event_size == 0) + return NULL; + addr += sizeof(struct tcpa_event) + event->event_size; + } + } + + /* now check if current entry is valid */ + if ((addr + sizeof(struct tcpa_event)) >= limit) + return NULL; + + event = addr; + + if ((event->event_type == 0 && event->event_size == 0) || + ((addr + sizeof(struct tcpa_event) + event->event_size) >= limit)) + return NULL; + + return addr; +} + +static void *tpm_bios_measurements_next(struct seq_file *m, void *v, + loff_t *pos) +{ + struct tcpa_event *event = v; + struct tpm_bios_log *log = m->private; + void *limit = log->bios_event_log_end; + + v += sizeof(struct tcpa_event) + event->event_size; + + /* now check if current entry is valid */ + if ((v + sizeof(struct tcpa_event)) >= limit) + return NULL; + + event = v; + + if (event->event_type == 0 && event->event_size == 0) + return NULL; + + if ((event->event_type == 0 && event->event_size == 0) || + ((v + sizeof(struct tcpa_event) + event->event_size) >= limit)) + return NULL; + + (*pos)++; + return v; +} + +static void tpm_bios_measurements_stop(struct seq_file *m, void *v) +{ +} + +static int get_event_name(char *dest, struct tcpa_event *event, + unsigned char * event_entry) +{ + const char *name = ""; + char data[40] = ""; + int i, n_len = 0, d_len = 0; + u32 event_id, event_data_size; + + switch(event->event_type) { + case PREBOOT: + case POST_CODE: + case UNUSED: + case NO_ACTION: + case SCRTM_CONTENTS: + case SCRTM_VERSION: + case CPU_MICROCODE: + case PLATFORM_CONFIG_FLAGS: + case TABLE_OF_DEVICES: + case COMPACT_HASH: + case IPL: + case IPL_PARTITION_DATA: + case NONHOST_CODE: + case NONHOST_CONFIG: + case NONHOST_INFO: + name = tcpa_event_type_strings[event->event_type]; + n_len = strlen(name); + break; + case SEPARATOR: + case ACTION: + if (MAX_TEXT_EVENT > event->event_size) { + name = event_entry; + n_len = event->event_size; + } + break; + case EVENT_TAG: + event_id = be32_to_cpu(event_entry); + event_data_size = be32_to_cpu(&event_entry[4]); + + /* ToDo Row data -> Base64 */ + + switch (event_id) { + case SMBIOS: + case BIS_CERT: + case CMOS: + case NVRAM: + case OPTION_ROM_EXEC: + case OPTION_ROM_CONFIG: + case OPTION_ROM_MICROCODE: + case S_CRTM_VERSION: + case S_CRTM_CONTENTS: + case POST_CONTENTS: + name = tcpa_pc_event_id_strings[event_id]; + n_len = strlen(name); + break; + case POST_BIOS_ROM: + case ESCD: + name = tcpa_pc_event_id_strings[event_id]; + n_len = strlen(name); + for (i = 0; i < 20; i++) + d_len += sprintf(data, "%02x", + event_entry[8 + i]); + break; + default: + break; + } + default: + break; + } + + return snprintf(dest, MAX_TEXT_EVENT, "[%.*s%.*s]", + n_len, name, d_len, data); + +} + +static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v) +{ + + char *eventname; + char data[4]; + u32 help; + int i, len; + struct tcpa_event *event = (struct tcpa_event *) v; + unsigned char *event_entry = + (unsigned char *) (v + sizeof(struct tcpa_event)); + + eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL); + if (!eventname) { + printk(KERN_ERR "%s: ERROR - No Memory for event name\n ", + __func__); + return -ENOMEM; + } + + /* 1st: PCR used is in little-endian format (4 bytes) */ + help = le32_to_cpu(event->pcr_index); + memcpy(data, &help, 4); + for (i = 0; i < 4; i++) + seq_putc(m, data[i]); + + /* 2nd: SHA1 (20 bytes) */ + for (i = 0; i < 20; i++) + seq_putc(m, event->pcr_value[i]); + + /* 3rd: event type identifier (4 bytes) */ + help = le32_to_cpu(event->event_type); + memcpy(data, &help, 4); + for (i = 0; i < 4; i++) + seq_putc(m, data[i]); + + len = 0; + + len += get_event_name(eventname, event, event_entry); + + /* 4th: filename <= 255 + \'0' delimiter */ + if (len > TCG_EVENT_NAME_LEN_MAX) + len = TCG_EVENT_NAME_LEN_MAX; + + for (i = 0; i < len; i++) + seq_putc(m, eventname[i]); + + /* 5th: delimiter */ + seq_putc(m, '\0'); + + return 0; +} + +static int tpm_bios_measurements_release(struct inode *inode, + struct file *file) +{ + struct seq_file *seq = file->private_data; + struct tpm_bios_log *log = seq->private; + + if (log) { + kfree(log->bios_event_log); + kfree(log); + } + + return seq_release(inode, file); +} + +static int tpm_ascii_bios_measurements_show(struct seq_file *m, void *v) +{ + int len = 0; + int i; + char *eventname; + struct tcpa_event *event = v; + unsigned char *event_entry = + (unsigned char *) (v + sizeof(struct tcpa_event)); + + eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL); + if (!eventname) { + printk(KERN_ERR "%s: ERROR - No Memory for event name\n ", + __func__); + return -EFAULT; + } + + seq_printf(m, "%2d ", event->pcr_index); + + /* 2nd: SHA1 */ + for (i = 0; i < 20; i++) + seq_printf(m, "%02x", event->pcr_value[i]); + + /* 3rd: event type identifier */ + seq_printf(m, " %02x", event->event_type); + + len += get_event_name(eventname, event, event_entry); + + /* 4th: eventname <= max + \'0' delimiter */ + seq_printf(m, " %s\n", eventname); + + return 0; +} + +static struct seq_operations tpm_ascii_b_measurments_seqops = { + .start = tpm_bios_measurements_start, + .next = tpm_bios_measurements_next, + .stop = tpm_bios_measurements_stop, + .show = tpm_ascii_bios_measurements_show, +}; + +static struct seq_operations tpm_binary_b_measurments_seqops = { + .start = tpm_bios_measurements_start, + .next = tpm_bios_measurements_next, + .stop = tpm_bios_measurements_stop, + .show = tpm_binary_bios_measurements_show, +}; + +/* read binary bios log */ +static int read_log(struct tpm_bios_log *log) +{ + struct acpi_tcpa *buff; + acpi_status status; + void *virt; + + if (log->bios_event_log != NULL) { + printk(KERN_ERR + "%s: ERROR - Eventlog already initialized\n", + __func__); + return -EFAULT; + } + + /* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */ + status = acpi_get_firmware_table(ACPI_TCPA_SIG, 1, + ACPI_LOGICAL_ADDRESSING, + (struct acpi_table_header **) + &buff); + + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "%s: ERROR - Could not get TCPA table\n", + __func__); + return -EIO; + } + + if (buff->log_max_len == 0) { + printk(KERN_ERR "%s: ERROR - TCPA log area empty\n", __func__); + return -EIO; + } + + /* malloc EventLog space */ + log->bios_event_log = kmalloc(buff->log_max_len, GFP_KERNEL); + if (!log->bios_event_log) { + printk + ("%s: ERROR - Not enough Memory for BIOS measurements\n", + __func__); + return -ENOMEM; + } + + log->bios_event_log_end = log->bios_event_log + buff->log_max_len; + + acpi_os_map_memory(buff->log_start_addr, buff->log_max_len, &virt); + + memcpy(log->bios_event_log, virt, buff->log_max_len); + + acpi_os_unmap_memory(virt, buff->log_max_len); + return 0; +} + +static int tpm_ascii_bios_measurements_open(struct inode *inode, + struct file *file) +{ + int err; + struct tpm_bios_log *log; + struct seq_file *seq; + + log = kzalloc(sizeof(struct tpm_bios_log), GFP_KERNEL); + if (!log) + return -ENOMEM; + + if ((err = read_log(log))) + return err; + + /* now register seq file */ + err = seq_open(file, &tpm_ascii_b_measurments_seqops); + if (!err) { + seq = file->private_data; + seq->private = log; + } else { + kfree(log->bios_event_log); + kfree(log); + } + return err; +} + +struct file_operations tpm_ascii_bios_measurements_ops = { + .open = tpm_ascii_bios_measurements_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tpm_bios_measurements_release, +}; + +static int tpm_binary_bios_measurements_open(struct inode *inode, + struct file *file) +{ + int err; + struct tpm_bios_log *log; + struct seq_file *seq; + + log = kzalloc(sizeof(struct tpm_bios_log), GFP_KERNEL); + if (!log) + return -ENOMEM; + + if ((err = read_log(log))) + return err; + + /* now register seq file */ + err = seq_open(file, &tpm_binary_b_measurments_seqops); + if (!err) { + seq = file->private_data; + seq->private = log; + } else { + kfree(log->bios_event_log); + kfree(log); + } + return err; +} + +struct file_operations tpm_binary_bios_measurements_ops = { + .open = tpm_binary_bios_measurements_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tpm_bios_measurements_release, +}; + +struct dentry **tpm_bios_log_setup(char *name) +{ + struct dentry **ret = NULL, *tpm_dir, *bin_file, *ascii_file; + + tpm_dir = securityfs_create_dir(name, NULL); + if (!tpm_dir) + goto out; + + bin_file = + securityfs_create_file("binary_bios_measurements", + S_IRUSR | S_IRGRP, tpm_dir, NULL, + &tpm_binary_bios_measurements_ops); + if (!bin_file) + goto out_tpm; + + ascii_file = + securityfs_create_file("ascii_bios_measurements", + S_IRUSR | S_IRGRP, tpm_dir, NULL, + &tpm_ascii_bios_measurements_ops); + if (!ascii_file) + goto out_bin; + + ret = kmalloc(3 * sizeof(struct dentry *), GFP_KERNEL); + if (!ret) + goto out_ascii; + + ret[0] = ascii_file; + ret[1] = bin_file; + ret[2] = tpm_dir; + + return ret; + +out_ascii: + securityfs_remove(ascii_file); +out_bin: + securityfs_remove(bin_file); +out_tpm: + securityfs_remove(tpm_dir); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(tpm_bios_log_setup); + +void tpm_bios_log_teardown(struct dentry **lst) +{ + int i; + + for (i = 0; i < 3; i++) + securityfs_remove(lst[i]); +} +EXPORT_SYMBOL_GPL(tpm_bios_log_teardown); diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c index 9ac6d43437b3..a5b18e086a94 100644 --- a/drivers/char/vr41xx_giu.c +++ b/drivers/char/vr41xx_giu.c @@ -718,7 +718,7 @@ static struct platform_driver giu_device_driver = { }, }; -static int __devinit vr41xx_giu_init(void) +static int __init vr41xx_giu_init(void) { int retval; @@ -733,7 +733,7 @@ static int __devinit vr41xx_giu_init(void) return retval; } -static void __devexit vr41xx_giu_exit(void) +static void __exit vr41xx_giu_exit(void) { platform_driver_unregister(&giu_device_driver); diff --git a/drivers/char/watchdog/wdt977.c b/drivers/char/watchdog/wdt977.c index 44d49dfacbb3..3843900e94c4 100644 --- a/drivers/char/watchdog/wdt977.c +++ b/drivers/char/watchdog/wdt977.c @@ -1,5 +1,5 @@ /* - * Wdt977 0.03: A Watchdog Device for Netwinder W83977AF chip + * Wdt977 0.04: A Watchdog Device for Netwinder W83977AF chip * * (c) Copyright 1998 Rebel.com (Woody Suwalski <woody@netwinder.org>) * @@ -18,6 +18,8 @@ * from minutes to seconds. * 07-Jul-2003 Daniele Bellucci: Audit return code of misc_register in * nwwatchdog_init. + * 25-Oct-2005 Woody Suwalski: Convert addresses to #defs, add spinlocks + * remove limitiation to be used on Netwinders only */ #include <linux/module.h> @@ -28,6 +30,7 @@ #include <linux/fs.h> #include <linux/miscdevice.h> #include <linux/init.h> +#include <linux/ioport.h> #include <linux/watchdog.h> #include <linux/notifier.h> #include <linux/reboot.h> @@ -37,8 +40,18 @@ #include <asm/mach-types.h> #include <asm/uaccess.h> -#define PFX "Wdt977: " -#define WATCHDOG_MINOR 130 +#define WATCHDOG_VERSION "0.04" +#define WATCHDOG_NAME "Wdt977" +#define PFX WATCHDOG_NAME ": " +#define DRIVER_VERSION WATCHDOG_NAME " driver, v" WATCHDOG_VERSION "\n" + +#define IO_INDEX_PORT 0x370 /* on some systems it can be 0x3F0 */ +#define IO_DATA_PORT (IO_INDEX_PORT+1) + +#define UNLOCK_DATA 0x87 +#define LOCK_DATA 0xAA +#define DEVICE_REGISTER 0x07 + #define DEFAULT_TIMEOUT 60 /* default timeout in seconds */ @@ -47,6 +60,7 @@ static int timeoutM; /* timeout in minutes */ static unsigned long timer_alive; static int testmode; static char expect_close; +static spinlock_t spinlock; module_param(timeout, int, 0); MODULE_PARM_DESC(timeout,"Watchdog timeout in seconds (60..15300), default=" __MODULE_STRING(DEFAULT_TIMEOUT) ")"); @@ -63,9 +77,13 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=CON static int wdt977_start(void) { + unsigned long flags; + + spin_lock_irqsave(&spinlock, flags); + /* unlock the SuperIO chip */ - outb(0x87,0x370); - outb(0x87,0x370); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); /* select device Aux2 (device=8) and set watchdog regs F2, F3 and F4 * F2 has the timeout in minutes @@ -73,28 +91,29 @@ static int wdt977_start(void) * at timeout, and to reset timer on kbd/mouse activity (not impl.) * F4 is used to just clear the TIMEOUT'ed state (bit 0) */ - outb(0x07,0x370); - outb(0x08,0x371); - outb(0xF2,0x370); - outb(timeoutM,0x371); - outb(0xF3,0x370); - outb(0x00,0x371); /* another setting is 0E for kbd/mouse/LED */ - outb(0xF4,0x370); - outb(0x00,0x371); + outb_p(DEVICE_REGISTER, IO_INDEX_PORT); + outb_p(0x08, IO_DATA_PORT); + outb_p(0xF2, IO_INDEX_PORT); + outb_p(timeoutM, IO_DATA_PORT); + outb_p(0xF3, IO_INDEX_PORT); + outb_p(0x00, IO_DATA_PORT); /* another setting is 0E for kbd/mouse/LED */ + outb_p(0xF4, IO_INDEX_PORT); + outb_p(0x00, IO_DATA_PORT); /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */ /* in test mode watch the bit 1 on F4 to indicate "triggered" */ if (!testmode) { - outb(0x07,0x370); - outb(0x07,0x371); - outb(0xE6,0x370); - outb(0x08,0x371); + outb_p(DEVICE_REGISTER, IO_INDEX_PORT); + outb_p(0x07, IO_DATA_PORT); + outb_p(0xE6, IO_INDEX_PORT); + outb_p(0x08, IO_DATA_PORT); } /* lock the SuperIO chip */ - outb(0xAA,0x370); + outb_p(LOCK_DATA, IO_INDEX_PORT); + spin_unlock_irqrestore(&spinlock, flags); printk(KERN_INFO PFX "activated.\n"); return 0; @@ -106,35 +125,39 @@ static int wdt977_start(void) static int wdt977_stop(void) { + unsigned long flags; + spin_lock_irqsave(&spinlock, flags); + /* unlock the SuperIO chip */ - outb(0x87,0x370); - outb(0x87,0x370); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); /* select device Aux2 (device=8) and set watchdog regs F2,F3 and F4 * F3 is reset to its default state * F4 can clear the TIMEOUT'ed state (bit 0) - back to default * We can not use GP17 as a PowerLed, as we use its usage as a RedLed */ - outb(0x07,0x370); - outb(0x08,0x371); - outb(0xF2,0x370); - outb(0xFF,0x371); - outb(0xF3,0x370); - outb(0x00,0x371); - outb(0xF4,0x370); - outb(0x00,0x371); - outb(0xF2,0x370); - outb(0x00,0x371); + outb_p(DEVICE_REGISTER, IO_INDEX_PORT); + outb_p(0x08, IO_DATA_PORT); + outb_p(0xF2, IO_INDEX_PORT); + outb_p(0xFF, IO_DATA_PORT); + outb_p(0xF3, IO_INDEX_PORT); + outb_p(0x00, IO_DATA_PORT); + outb_p(0xF4, IO_INDEX_PORT); + outb_p(0x00, IO_DATA_PORT); + outb_p(0xF2, IO_INDEX_PORT); + outb_p(0x00, IO_DATA_PORT); /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */ - outb(0x07,0x370); - outb(0x07,0x371); - outb(0xE6,0x370); - outb(0x08,0x371); + outb_p(DEVICE_REGISTER, IO_INDEX_PORT); + outb_p(0x07, IO_DATA_PORT); + outb_p(0xE6, IO_INDEX_PORT); + outb_p(0x08, IO_DATA_PORT); /* lock the SuperIO chip */ - outb(0xAA,0x370); + outb_p(LOCK_DATA, IO_INDEX_PORT); + spin_unlock_irqrestore(&spinlock, flags); printk(KERN_INFO PFX "shutdown.\n"); return 0; @@ -147,19 +170,23 @@ static int wdt977_stop(void) static int wdt977_keepalive(void) { + unsigned long flags; + spin_lock_irqsave(&spinlock, flags); + /* unlock the SuperIO chip */ - outb(0x87,0x370); - outb(0x87,0x370); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); /* select device Aux2 (device=8) and kicks watchdog reg F2 */ /* F2 has the timeout in minutes */ - outb(0x07,0x370); - outb(0x08,0x371); - outb(0xF2,0x370); - outb(timeoutM,0x371); + outb_p(DEVICE_REGISTER, IO_INDEX_PORT); + outb_p(0x08, IO_DATA_PORT); + outb_p(0xF2, IO_INDEX_PORT); + outb_p(timeoutM, IO_DATA_PORT); /* lock the SuperIO chip */ - outb(0xAA,0x370); + outb_p(LOCK_DATA, IO_INDEX_PORT); + spin_unlock_irqrestore(&spinlock, flags); return 0; } @@ -198,22 +225,26 @@ static int wdt977_set_timeout(int t) static int wdt977_get_status(int *status) { int new_status; + unsigned long flags; - *status=0; + spin_lock_irqsave(&spinlock, flags); /* unlock the SuperIO chip */ - outb(0x87,0x370); - outb(0x87,0x370); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); + outb_p(UNLOCK_DATA, IO_INDEX_PORT); /* select device Aux2 (device=8) and read watchdog reg F4 */ - outb(0x07,0x370); - outb(0x08,0x371); - outb(0xF4,0x370); - new_status = inb(0x371); + outb_p(DEVICE_REGISTER, IO_INDEX_PORT); + outb_p(0x08, IO_DATA_PORT); + outb_p(0xF4, IO_INDEX_PORT); + new_status = inb_p(IO_DATA_PORT); /* lock the SuperIO chip */ - outb(0xAA,0x370); + outb_p(LOCK_DATA, IO_INDEX_PORT); + spin_unlock_irqrestore(&spinlock, flags); + + *status=0; if (new_status & 1) *status |= WDIOF_CARDRESET; @@ -249,8 +280,8 @@ static int wdt977_release(struct inode *inode, struct file *file) wdt977_stop(); clear_bit(0,&timer_alive); } else { - printk(KERN_CRIT PFX "Unexpected close, not stopping watchdog!\n"); wdt977_keepalive(); + printk(KERN_CRIT PFX "Unexpected close, not stopping watchdog!\n"); } expect_close = 0; return 0; @@ -271,14 +302,17 @@ static int wdt977_release(struct inode *inode, struct file *file) static ssize_t wdt977_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - if (count) { - if (!nowayout) { + if (count) + { + if (!nowayout) + { size_t i; /* In case it was set long ago */ expect_close = 0; - for (i = 0; i != count; i++) { + for (i = 0; i != count; i++) + { char c; if (get_user(c, buf + i)) return -EFAULT; @@ -287,6 +321,7 @@ static ssize_t wdt977_write(struct file *file, const char __user *buf, } } + /* someone wrote to us, we should restart timer */ wdt977_keepalive(); } return count; @@ -308,7 +343,7 @@ static struct watchdog_info ident = { WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING, .firmware_version = 1, - .identity = "Winbond 83977", + .identity = WATCHDOG_NAME, }; static int wdt977_ioctl(struct inode *inode, struct file *file, @@ -405,50 +440,81 @@ static struct notifier_block wdt977_notifier = { .notifier_call = wdt977_notify_sys, }; -static int __init nwwatchdog_init(void) +static int __init wd977_init(void) { - int retval; - if (!machine_is_netwinder()) - return -ENODEV; + int rc; + + //if (!machine_is_netwinder()) + // return -ENODEV; + + printk(KERN_INFO PFX DRIVER_VERSION); + + spin_lock_init(&spinlock); /* Check that the timeout value is within it's range ; if not reset to the default */ - if (wdt977_set_timeout(timeout)) { + if (wdt977_set_timeout(timeout)) + { wdt977_set_timeout(DEFAULT_TIMEOUT); printk(KERN_INFO PFX "timeout value must be 60<timeout<15300, using %d\n", DEFAULT_TIMEOUT); } - retval = register_reboot_notifier(&wdt977_notifier); - if (retval) { - printk(KERN_ERR PFX "cannot register reboot notifier (err=%d)\n", - retval); - return retval; + /* on Netwinder the IOports are already reserved by + * arch/arm/mach-footbridge/netwinder-hw.c + */ + if (!machine_is_netwinder()) + { + if (!request_region(IO_INDEX_PORT, 2, WATCHDOG_NAME)) + { + printk(KERN_ERR PFX "I/O address 0x%04x already in use\n", + IO_INDEX_PORT); + rc = -EIO; + goto err_out; + } } - retval = misc_register(&wdt977_miscdev); - if (retval) { + rc = misc_register(&wdt977_miscdev); + if (rc) + { printk(KERN_ERR PFX "cannot register miscdev on minor=%d (err=%d)\n", - WATCHDOG_MINOR, retval); - unregister_reboot_notifier(&wdt977_notifier); - return retval; + wdt977_miscdev.minor, rc); + goto err_out_region; + } + + rc = register_reboot_notifier(&wdt977_notifier); + if (rc) + { + printk(KERN_ERR PFX "cannot register reboot notifier (err=%d)\n", + rc); + goto err_out_miscdev; } - printk(KERN_INFO PFX "initialized. timeout=%d sec (nowayout=%d, testmode = %i)\n", + printk(KERN_INFO PFX "initialized. timeout=%d sec (nowayout=%d, testmode=%i)\n", timeout, nowayout, testmode); return 0; + +err_out_miscdev: + misc_deregister(&wdt977_miscdev); +err_out_region: + if (!machine_is_netwinder()) + release_region(IO_INDEX_PORT,2); +err_out: + return rc; } -static void __exit nwwatchdog_exit(void) +static void __exit wd977_exit(void) { + wdt977_stop(); misc_deregister(&wdt977_miscdev); unregister_reboot_notifier(&wdt977_notifier); + release_region(IO_INDEX_PORT,2); } -module_init(nwwatchdog_init); -module_exit(nwwatchdog_exit); +module_init(wd977_init); +module_exit(wd977_exit); -MODULE_AUTHOR("Woody Suwalski <woody@netwinder.org>"); +MODULE_AUTHOR("Woody Suwalski <woodys@xandros.com>"); MODULE_DESCRIPTION("W83977AF Watchdog driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 969d2b4aaec0..385e52930c02 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -34,14 +34,14 @@ static atomic_t proc_event_num_listeners = ATOMIC_INIT(0); static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; -/* proc_counts is used as the sequence number of the netlink message */ +/* proc_event_counts is used as the sequence number of the netlink message */ static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; static inline void get_seq(__u32 *ts, int *cpu) { *ts = get_cpu_var(proc_event_counts)++; *cpu = smp_processor_id(); - put_cpu_var(proc_counts); + put_cpu_var(proc_event_counts); } void proc_fork_connector(struct task_struct *task) diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index e70b3db69edd..1af3dfbb8086 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -494,6 +494,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) { struct tps65010 *tps; int status; + unsigned long irqflags; if (the_tps) { dev_dbg(&bus->dev, "only one %s for now\n", DRIVER_NAME); @@ -520,13 +521,14 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) } #ifdef CONFIG_ARM + irqflags = SA_SAMPLE_RANDOM | SA_TRIGGER_LOW; if (machine_is_omap_h2()) { tps->model = TPS65010; omap_cfg_reg(W4_GPIO58); tps->irq = OMAP_GPIO_IRQ(58); omap_request_gpio(58); omap_set_gpio_direction(58, 1); - set_irq_type(tps->irq, IRQT_FALLING); + irqflags |= SA_TRIGGER_FALLING; } if (machine_is_omap_osk()) { tps->model = TPS65010; @@ -534,7 +536,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) tps->irq = OMAP_GPIO_IRQ(OMAP_MPUIO(1)); omap_request_gpio(OMAP_MPUIO(1)); omap_set_gpio_direction(OMAP_MPUIO(1), 1); - set_irq_type(tps->irq, IRQT_FALLING); + irqflags |= SA_TRIGGER_FALLING; } if (machine_is_omap_h3()) { tps->model = TPS65013; @@ -542,13 +544,12 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) // FIXME set up this board's IRQ ... } #else -#define set_irq_type(num,trigger) do{}while(0) + irqflags = SA_SAMPLE_RANDOM; #endif if (tps->irq > 0) { - set_irq_type(tps->irq, IRQT_LOW); status = request_irq(tps->irq, tps65010_irq, - SA_SAMPLE_RANDOM, DRIVER_NAME, tps); + irqflags, DRIVER_NAME, tps); if (status < 0) { dev_dbg(&tps->client.dev, "can't get IRQ %d, err %d\n", tps->irq, status); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 4b441720b6ba..cab362ea0336 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1130,6 +1130,17 @@ static int idedisk_release(struct inode *inode, struct file *filp) return 0; } +static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk); + ide_drive_t *drive = idkp->drive; + + geo->heads = drive->bios_head; + geo->sectors = drive->bios_sect; + geo->cylinders = (u16)drive->bios_cyl; /* truncate */ + return 0; +} + static int idedisk_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -1164,6 +1175,7 @@ static struct block_device_operations idedisk_ops = { .open = idedisk_open, .release = idedisk_release, .ioctl = idedisk_ioctl, + .getgeo = idedisk_getgeo, .media_changed = idedisk_media_changed, .revalidate_disk= idedisk_revalidate_disk }; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index fba3fffc2d66..5945f551aaaa 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -2031,6 +2031,17 @@ static int idefloppy_release(struct inode *inode, struct file *filp) return 0; } +static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ide_floppy_obj *floppy = ide_floppy_g(bdev->bd_disk); + ide_drive_t *drive = floppy->drive; + + geo->heads = drive->bios_head; + geo->sectors = drive->bios_sect; + geo->cylinders = (u16)drive->bios_cyl; /* truncate */ + return 0; +} + static int idefloppy_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -2120,6 +2131,7 @@ static struct block_device_operations idefloppy_ops = { .open = idefloppy_open, .release = idefloppy_release, .ioctl = idefloppy_ioctl, + .getgeo = idefloppy_getgeo, .media_changed = idefloppy_media_changed, .revalidate_disk= idefloppy_revalidate_disk }; diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 4b524f6b3ecd..b069b13b75a7 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1278,19 +1278,6 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device up(&ide_setting_sem); switch (cmd) { - case HDIO_GETGEO: - { - struct hd_geometry geom; - if (!p || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - geom.heads = drive->bios_head; - geom.sectors = drive->bios_sect; - geom.cylinders = (u16)drive->bios_cyl; /* truncate */ - geom.start = get_start_sect(bdev); - if (copy_to_user(p, &geom, sizeof(struct hd_geometry))) - return -EFAULT; - return 0; - } - case HDIO_OBSOLETE_IDENTITY: case HDIO_GET_IDENTITY: if (bdev != bdev->bd_contains) diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c index 242029c9c0ca..6439dec66881 100644 --- a/drivers/ide/legacy/hd.c +++ b/drivers/ide/legacy/hd.c @@ -658,22 +658,14 @@ static void do_hd_request (request_queue_t * q) enable_irq(HD_IRQ); } -static int hd_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg) +static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct hd_i_struct *disk = inode->i_bdev->bd_disk->private_data; - struct hd_geometry __user *loc = (struct hd_geometry __user *) arg; - struct hd_geometry g; - - if (cmd != HDIO_GETGEO) - return -EINVAL; - if (!loc) - return -EINVAL; - g.heads = disk->head; - g.sectors = disk->sect; - g.cylinders = disk->cyl; - g.start = get_start_sect(inode->i_bdev); - return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; + struct hd_i_struct *disk = bdev->bd_disk->private_data; + + geo->heads = disk->head; + geo->sectors = disk->sect; + geo->cylinders = disk->cyl; + return 0; } /* @@ -695,7 +687,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id, struct pt_regs *regs) } static struct block_device_operations hd_fops = { - .ioctl = hd_ioctl, + .getgeo = hd_getgeo, }; /* diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c index ff2e217a8c84..0d3073f4eab4 100644 --- a/drivers/ide/pci/serverworks.c +++ b/drivers/ide/pci/serverworks.c @@ -69,7 +69,7 @@ static int check_in_drive_lists (ide_drive_t *drive, const char **list) static u8 svwks_ratemask (ide_drive_t *drive) { struct pci_dev *dev = HWIF(drive)->pci_dev; - u8 mode; + u8 mode = 0; if (!svwks_revision) pci_read_config_byte(dev, PCI_REVISION_ID, &svwks_revision); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 02110e00d145..3a611fe5497e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -308,10 +308,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; int ret; + static int next_id; do { spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1, + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++, (__force int *) &cm_id_priv->id.local_id); spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); @@ -684,6 +685,13 @@ retest: cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); break; case IB_CM_REQ_SENT: + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, + &cm_id_priv->av.port->cm_dev->ca_guid, + sizeof cm_id_priv->av.port->cm_dev->ca_guid, + NULL, 0); + break; case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: @@ -694,10 +702,8 @@ retest: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, - &cm_id_priv->av.port->cm_dev->ca_guid, - sizeof cm_id_priv->av.port->cm_dev->ca_guid, - NULL, 0); + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, + NULL, 0, NULL, 0); break; case IB_CM_ESTABLISHED: spin_unlock_irqrestore(&cm_id_priv->lock, flags); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index eb7f52537ccc..c908de8db5a9 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -197,8 +197,8 @@ static void send_handler(struct ib_mad_agent *agent, memcpy(timeout->mad.data, packet->mad.data, sizeof (struct ib_mad_hdr)); - if (!queue_packet(file, agent, timeout)) - return; + if (queue_packet(file, agent, timeout)) + kfree(timeout); } out: kfree(packet); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a57d021d435a..a02c5a05c984 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -489,6 +489,7 @@ err_idr: err_unreg: ib_dereg_mr(mr); + atomic_dec(&pd->usecnt); err_up: up(&ib_uverbs_idr_mutex); @@ -593,13 +594,18 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; - if (cmd.comp_channel >= 0) - ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; + if (cmd.comp_channel >= 0) { + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + if (!ev_file) { + ret = -EINVAL; + goto err; + } + } + uobj->uobject.user_handle = cmd.user_handle; uobj->uobject.context = file->ucontext; uobj->uverbs_file = file; @@ -663,6 +669,8 @@ err_up: ib_destroy_cq(cq); err: + if (ev_file) + ib_uverbs_release_ucq(file, ev_file, uobj); kfree(uobj); return ret; } @@ -935,6 +943,11 @@ err_idr: err_destroy: ib_destroy_qp(qp); + atomic_dec(&pd->usecnt); + atomic_dec(&attr.send_cq->usecnt); + atomic_dec(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_dec(&attr.srq->usecnt); err_up: up(&ib_uverbs_idr_mutex); @@ -1448,6 +1461,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, attr.sl = cmd.attr.sl; attr.src_path_bits = cmd.attr.src_path_bits; attr.static_rate = cmd.attr.static_rate; + attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; attr.port_num = cmd.attr.port_num; attr.grh.flow_label = cmd.attr.grh.flow_label; attr.grh.sgid_index = cmd.attr.grh.sgid_index; @@ -1729,6 +1743,7 @@ err_idr: err_destroy: ib_destroy_srq(srq); + atomic_dec(&pd->usecnt); err_up: up(&ib_uverbs_idr_mutex); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4c15e112736c..c857361be449 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -107,9 +107,9 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, if (wc->wc_flags & IB_WC_GRH) { ah_attr.ah_flags = IB_AH_GRH; - ah_attr.grh.dgid = grh->dgid; + ah_attr.grh.dgid = grh->sgid; - ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num, + ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num, &gid_index); if (ret) return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 9ed34587fc5c..22ac72bc20c3 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -937,10 +937,6 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, if (err) goto out; - MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = (1 << field) - 1; - MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1056,6 +1052,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); if (mthca_is_memfree(dev)) { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET); dev_lim->hca.arbel.resize_srq = field & 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); @@ -1087,6 +1087,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, mthca_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20); } else { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = (1 << field) - 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET); dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f); dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE; diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 4a8adcef2079..96f1a86bf049 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -128,12 +128,12 @@ struct mthca_err_cqe { __be32 my_qpn; u32 reserved1[3]; u8 syndrome; - u8 reserved2; + u8 vendor_err; __be16 db_cnt; - u32 reserved3; + u32 reserved2; __be32 wqe; u8 opcode; - u8 reserved4[2]; + u8 reserved3[2]; u8 owner; }; @@ -253,6 +253,15 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn, wake_up(&cq->wait); } +static inline int is_recv_cqe(struct mthca_cqe *cqe) +{ + if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK) + return !(cqe->opcode & 0x01); + else + return !(cqe->is_send & 0x80); +} + void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, struct mthca_srq *srq) { @@ -296,7 +305,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, while ((int) --prod_index - (int) cq->cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); if (cqe->my_qpn == cpu_to_be32(qpn)) { - if (srq) + if (srq && is_recv_cqe(cqe)) mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; } else if (nfreed) @@ -333,8 +342,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, } /* - * For completions in error, only work request ID, status (and - * freed resource count for RD) have to be set. + * For completions in error, only work request ID, status, vendor error + * (and freed resource count for RD) have to be set. */ switch (cqe->syndrome) { case SYNDROME_LOCAL_LENGTH_ERR: @@ -396,6 +405,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, break; } + entry->vendor_err = cqe->vendor_err; + /* * Mem-free HCAs always generate one CQE per WQE, even in the * error case, so we don't have to check the doorbell count, etc. diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 497ff794ef6a..795b379260bf 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -43,6 +43,7 @@ #include <linux/kernel.h> #include <linux/pci.h> #include <linux/dma-mapping.h> +#include <linux/timer.h> #include <asm/semaphore.h> #include "mthca_provider.h" diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 34d68e5a72d8..e8a948f087c0 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -484,8 +484,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, u8 intr, struct mthca_eq *eq) { - int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / - PAGE_SIZE; + int npages; u64 *dma_list = NULL; dma_addr_t t; struct mthca_mailbox *mailbox; @@ -496,6 +495,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); + npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 6f94b25f3acd..8b00d9a0f6f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -261,6 +261,10 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) } err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + goto err_disable; + } profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 2fc449da418d..77bc6c746f43 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -111,7 +111,8 @@ static int find_mgm(struct mthca_dev *dev, goto out; if (status) { mthca_err(dev, "READ_MGM returned status %02x\n", status); - return -EINVAL; + err = -EINVAL; + goto out; } if (!memcmp(mgm->gid, zero_gid, 16)) { @@ -126,7 +127,7 @@ static int find_mgm(struct mthca_dev *dev, goto out; *prev = *index; - *index = be32_to_cpu(mgm->next_gid_index) >> 5; + *index = be32_to_cpu(mgm->next_gid_index) >> 6; } while (*index); *index = -1; @@ -153,8 +154,10 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return PTR_ERR(mailbox); mgm = mailbox->buf; - if (down_interruptible(&dev->mcg_table.sem)) - return -EINTR; + if (down_interruptible(&dev->mcg_table.sem)) { + err = -EINTR; + goto err_sem; + } err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) @@ -181,9 +184,8 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = -EINVAL; goto out; } - + memset(mgm, 0, sizeof *mgm); memcpy(mgm->gid, gid->raw, 16); - mgm->next_gid_index = 0; } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) @@ -209,6 +211,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (status) { mthca_err(dev, "WRITE_MGM returned status %02x\n", status); err = -EINVAL; + goto out; } if (!link) @@ -223,7 +226,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - mgm->next_gid_index = cpu_to_be32(index << 5); + mgm->next_gid_index = cpu_to_be32(index << 6); err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) @@ -234,7 +237,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } out: + if (err && link && index != -1) { + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); + } up(&dev->mcg_table.sem); + err_sem: mthca_free_mailbox(dev, mailbox); return err; } @@ -255,8 +263,10 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return PTR_ERR(mailbox); mgm = mailbox->buf; - if (down_interruptible(&dev->mcg_table.sem)) - return -EINTR; + if (down_interruptible(&dev->mcg_table.sem)) { + err = -EINTR; + goto err_sem; + } err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) @@ -305,13 +315,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (i != 1) goto out; - goto out; - if (prev == -1) { /* Remove entry from MGM */ - if (be32_to_cpu(mgm->next_gid_index) >> 5) { - err = mthca_READ_MGM(dev, - be32_to_cpu(mgm->next_gid_index) >> 5, + int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6; + if (amgm_index_to_free) { + err = mthca_READ_MGM(dev, amgm_index_to_free, mailbox, &status); if (err) goto out; @@ -332,9 +340,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = -EINVAL; goto out; } + if (amgm_index_to_free) { + BUG_ON(amgm_index_to_free < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, amgm_index_to_free); + } } else { /* Remove entry from AMGM */ - index = be32_to_cpu(mgm->next_gid_index) >> 5; + int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; @@ -344,7 +356,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - mgm->next_gid_index = cpu_to_be32(index << 5); + mgm->next_gid_index = cpu_to_be32(curr_next_index << 6); err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) @@ -354,10 +366,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = -EINVAL; goto out; } + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); } out: up(&dev->mcg_table.sem); + err_sem: mthca_free_mailbox(dev, mailbox); return err; } @@ -365,11 +380,12 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int __devinit mthca_init_mcg_table(struct mthca_dev *dev) { int err; + int table_size = dev->limits.num_mgms + dev->limits.num_amgms; err = mthca_alloc_init(&dev->mcg_table.alloc, - dev->limits.num_amgms, - dev->limits.num_amgms - 1, - 0); + table_size, + table_size - 1, + dev->limits.num_mgms); if (err) return err; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index d72fe95cba08..9fb985a016e9 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -233,7 +233,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj) for (i = 0; i < chunk->npages; ++i) { if (chunk->mem[i].length >= offset) { page = chunk->mem[i].page; - break; + goto out; } offset -= chunk->mem[i].length; } @@ -485,6 +485,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, put_page(db_tab->page[i].mem.page); } } + + kfree(db_tab); } int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 7450550db736..564b6d51c394 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -383,12 +383,10 @@ static const struct { [UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | - IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [MLX] = (IB_QP_CUR_STATE | @@ -476,9 +474,8 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), - [UC] = IB_QP_CUR_STATE, - [RC] = (IB_QP_CUR_STATE | - IB_QP_MIN_RNR_TIMER), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS), [MLX] = (IB_QP_CUR_STATE | IB_QP_QKEY), } @@ -522,6 +519,55 @@ static void init_port(struct mthca_dev *dev, int port) mthca_warn(dev, "INIT_IB returned status %02x.\n", status); } +static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + u8 dest_rd_atomic; + u32 access_flags; + u32 hw_access_flags = 0; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MTHCA_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= MTHCA_QP_BIT_RAE; + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MTHCA_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + +static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path) +{ + path->g_mylmc = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + path->static_rate = !!ah->static_rate; + + if (ah->ah_flags & IB_AH_GRH) { + path->g_mylmc |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->sl_tclass_flowlabel = + cpu_to_be32((ah->sl << 28) | + (ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } else + path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28); +} + int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) { struct mthca_dev *dev = to_mdev(ibqp->device); @@ -591,6 +637,26 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return -EINVAL; } + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { + mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); + return -EINVAL; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { + mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", + attr->max_rd_atomic, dev->limits.max_qp_init_rdma); + return -EINVAL; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { + mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", + attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); + return -EINVAL; + } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); @@ -665,28 +731,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_RNR_RETRY) { - qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY); + qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry = + attr->rnr_retry << 5; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | + MTHCA_QP_OPTPAR_ALT_RNR_RETRY); } if (attr_mask & IB_QP_AV) { - qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; - qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); - qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate; - if (attr->ah_attr.ah_flags & IB_AH_GRH) { - qp_context->pri_path.g_mylmc |= 1 << 7; - qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; - qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit; - qp_context->pri_path.sl_tclass_flowlabel = - cpu_to_be32((attr->ah_attr.sl << 28) | - (attr->ah_attr.grh.traffic_class << 20) | - (attr->ah_attr.grh.flow_label)); - memcpy(qp_context->pri_path.rgid, - attr->ah_attr.grh.dgid.raw, 16); - } else { - qp_context->pri_path.sl_tclass_flowlabel = - cpu_to_be32(attr->ah_attr.sl << 28); - } + mthca_path_set(&attr->ah_attr, &qp_context->pri_path); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); } @@ -695,7 +747,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); } - /* XXX alt_path */ + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) { + mthca_dbg(dev, "Alternate port number (%u) is invalid\n", + attr->alt_port_num); + return -EINVAL; + } + + mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path); + qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | + attr->alt_port_num << 24); + qp_context->alt_path.ackto = attr->alt_timeout << 3; + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH); + } /* leave rdd as 0 */ qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num); @@ -703,9 +767,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey); qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) | (MTHCA_FLIGHT_LIMIT << 24) | - MTHCA_QP_BIT_SRE | - MTHCA_QP_BIT_SWE | - MTHCA_QP_BIT_SAE); + MTHCA_QP_BIT_SWE); if (qp->sq_policy == IB_SIGNAL_ALL_WR) qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC); if (attr_mask & IB_QP_RETRY_CNT) { @@ -714,9 +776,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ? - ffs(attr->max_rd_atomic) - 1 : 0, - 7) << 21); + if (attr->max_rd_atomic) { + qp_context->params1 |= + cpu_to_be32(MTHCA_QP_BIT_SRE | + MTHCA_QP_BIT_SAE); + qp_context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + } qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); } @@ -729,71 +795,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index); } - if (attr_mask & IB_QP_ACCESS_FLAGS) { - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ? - MTHCA_QP_BIT_RWE : 0); - - /* - * Only enable RDMA reads and atomics if we have - * responder resources set to a non-zero value. - */ - if (qp->resp_depth) { - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ? - MTHCA_QP_BIT_RRE : 0); - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ? - MTHCA_QP_BIT_RAE : 0); - } - - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | - MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - - qp->atomic_rd_en = attr->qp_access_flags; - } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - u8 rra_max; - - if (qp->resp_depth && !attr->max_dest_rd_atomic) { - /* - * Lowering our responder resources to zero. - * Turn off reads RDMA and atomics as responder. - * (RRE/RAE in params2 already zero) - */ - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - } - - if (!qp->resp_depth && attr->max_dest_rd_atomic) { - /* - * Increasing our responder resources from - * zero. Turn on RDMA reads and atomics as - * appropriate. - */ + if (attr->max_dest_rd_atomic) qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ? - MTHCA_QP_BIT_RRE : 0); - qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ? - MTHCA_QP_BIT_RAE : 0); - - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - } + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); - for (rra_max = 0; - 1 << rra_max < attr->max_dest_rd_atomic && - rra_max < dev->qp_table.rdb_shift; - ++rra_max) - ; /* nothing */ - - qp_context->params2 |= cpu_to_be32(rra_max << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); + } - qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask); + qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | + MTHCA_QP_OPTPAR_RRE | + MTHCA_QP_OPTPAR_RAE); } qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); @@ -835,8 +849,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) err = -EINVAL; } - if (!err) + if (!err) { qp->state = new_state; + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + } mthca_free_mailbox(dev, mailbox); @@ -885,18 +904,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return err; } -static void mthca_adjust_qp_caps(struct mthca_dev *dev, - struct mthca_pd *pd, - struct mthca_qp *qp) +static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz) { - int max_data_size; - /* * Calculate the maximum size of WQE s/g segments, excluding * the next segment and other non-data segments. */ - max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) - - sizeof (struct mthca_next_seg); + int max_data_size = desc_sz - sizeof (struct mthca_next_seg); switch (qp->transport) { case MLX: @@ -915,11 +929,24 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev, break; } + return max_data_size; +} + +static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size) +{ /* We don't support inline data for kernel QPs (yet). */ - if (!pd->ibpd.uobject) - qp->max_inline_data = 0; - else - qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE; + return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0; +} + +static void mthca_adjust_qp_caps(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int max_data_size = mthca_max_data_size(dev, qp, + min(dev->limits.max_desc_sz, + 1 << qp->sq.wqe_shift)); + + qp->max_inline_data = mthca_max_inline_data(pd, max_data_size); qp->sq.max_gs = min_t(int, dev->limits.max_sg, max_data_size / sizeof (struct mthca_data_seg)); @@ -1186,13 +1213,23 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, } static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_pd *pd, struct mthca_qp *qp) { + int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz); + /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > dev->limits.max_wqes || - cap->max_recv_wr > dev->limits.max_wqes || - cap->max_send_sge > dev->limits.max_sg || - cap->max_recv_sge > dev->limits.max_sg) + if (cap->max_send_wr > dev->limits.max_wqes || + cap->max_recv_wr > dev->limits.max_wqes || + cap->max_send_sge > dev->limits.max_sg || + cap->max_recv_sge > dev->limits.max_sg || + cap->max_inline_data > mthca_max_inline_data(pd, max_data_size)) + return -EINVAL; + + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { @@ -1211,14 +1248,6 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, MTHCA_INLINE_CHUNK_SIZE) / sizeof (struct mthca_data_seg)); - /* - * For MLX transport we need 2 extra S/G entries: - * one for the header and one for the checksum at the end - */ - if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || - qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) - return -EINVAL; - return 0; } @@ -1233,7 +1262,7 @@ int mthca_alloc_qp(struct mthca_dev *dev, { int err; - err = mthca_set_qp_size(dev, cap, qp); + err = mthca_set_qp_size(dev, cap, pd, qp); if (err) return err; @@ -1276,7 +1305,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; - err = mthca_set_qp_size(dev, cap, &sqp->qp); + err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); if (err) return err; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index f7d234295efe..e7e153d9c4c6 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -201,7 +201,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (mthca_is_memfree(dev)) srq->max = roundup_pow_of_two(srq->max + 1); - ds = min(64UL, + ds = max(64UL, roundup_pow_of_two(sizeof (struct mthca_next_seg) + srq->max_gs * sizeof (struct mthca_data_seg))); srq->wqe_shift = long_log2(ds); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ee9fe226ae99..dd488d3cffa9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -39,6 +39,7 @@ #include <linux/string.h> #include <linux/parser.h> #include <linux/random.h> +#include <linux/jiffies.h> #include <asm/atomic.h> diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c index 64672d491222..e301ee4ca264 100644 --- a/drivers/input/keyboard/corgikbd.c +++ b/drivers/input/keyboard/corgikbd.c @@ -19,7 +19,6 @@ #include <linux/jiffies.h> #include <linux/module.h> #include <linux/slab.h> -#include <asm/irq.h> #include <asm/arch/corgi.h> #include <asm/arch/hardware.h> @@ -343,10 +342,9 @@ static int __init corgikbd_probe(struct platform_device *pdev) for (i = 0; i < CORGI_KEY_SENSE_NUM; i++) { pxa_gpio_mode(CORGI_GPIO_KEY_SENSE(i) | GPIO_IN); if (request_irq(CORGI_IRQ_GPIO_KEY_SENSE(i), corgikbd_interrupt, - SA_INTERRUPT, "corgikbd", corgikbd)) + SA_INTERRUPT | SA_TRIGGER_RISING, + "corgikbd", corgikbd)) printk(KERN_WARNING "corgikbd: Can't get IRQ: %d!\n", i); - else - set_irq_type(CORGI_IRQ_GPIO_KEY_SENSE(i),IRQT_RISING); } /* Set Strobe lines as outputs - set high */ diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c index 6a15fe3bc527..83999d583122 100644 --- a/drivers/input/keyboard/spitzkbd.c +++ b/drivers/input/keyboard/spitzkbd.c @@ -19,7 +19,6 @@ #include <linux/jiffies.h> #include <linux/module.h> #include <linux/slab.h> -#include <asm/irq.h> #include <asm/arch/spitz.h> #include <asm/arch/hardware.h> @@ -407,10 +406,9 @@ static int __init spitzkbd_probe(struct platform_device *dev) for (i = 0; i < SPITZ_KEY_SENSE_NUM; i++) { pxa_gpio_mode(spitz_senses[i] | GPIO_IN); if (request_irq(IRQ_GPIO(spitz_senses[i]), spitzkbd_interrupt, - SA_INTERRUPT, "Spitzkbd Sense", spitzkbd)) + SA_INTERRUPT|SA_TRIGGER_RISING, + "Spitzkbd Sense", spitzkbd)) printk(KERN_WARNING "spitzkbd: Can't get Sense IRQ: %d!\n", i); - else - set_irq_type(IRQ_GPIO(spitz_senses[i]),IRQT_RISING); } /* Set Strobe lines as outputs - set high */ @@ -422,15 +420,18 @@ static int __init spitzkbd_probe(struct platform_device *dev) pxa_gpio_mode(SPITZ_GPIO_SWA | GPIO_IN); pxa_gpio_mode(SPITZ_GPIO_SWB | GPIO_IN); - request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd Sync", spitzkbd); - request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd PwrOn", spitzkbd); - request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWA", spitzkbd); - request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWB", spitzkbd); - - set_irq_type(SPITZ_IRQ_GPIO_SYNC, IRQT_BOTHEDGE); - set_irq_type(SPITZ_IRQ_GPIO_ON_KEY, IRQT_BOTHEDGE); - set_irq_type(SPITZ_IRQ_GPIO_SWA, IRQT_BOTHEDGE); - set_irq_type(SPITZ_IRQ_GPIO_SWB, IRQT_BOTHEDGE); + request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd Sync", spitzkbd); + request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd PwrOn", spitzkbd); + request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd SWA", spitzkbd); + request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd SWB", spitzkbd); printk(KERN_INFO "input: Spitz Keyboard Registered\n"); diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index 1cd7657f7e42..1be963961c15 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -60,8 +60,6 @@ static struct fasync_struct *hp_sdc_rtc_async_queue; static DECLARE_WAIT_QUEUE_HEAD(hp_sdc_rtc_wait); -static loff_t hp_sdc_rtc_llseek(struct file *file, loff_t offset, int origin); - static ssize_t hp_sdc_rtc_read(struct file *file, char *buf, size_t count, loff_t *ppos); @@ -387,11 +385,6 @@ static int hp_sdc_rtc_set_i8042timer (struct timeval *setto, uint8_t setcmd) return 0; } -static loff_t hp_sdc_rtc_llseek(struct file *file, loff_t offset, int origin) -{ - return -ESPIPE; -} - static ssize_t hp_sdc_rtc_read(struct file *file, char *buf, size_t count, loff_t *ppos) { ssize_t retval; @@ -679,7 +672,7 @@ static int hp_sdc_rtc_ioctl(struct inode *inode, struct file *file, static struct file_operations hp_sdc_rtc_fops = { .owner = THIS_MODULE, - .llseek = hp_sdc_rtc_llseek, + .llseek = no_llseek, .read = hp_sdc_rtc_read, .poll = hp_sdc_rtc_poll, .ioctl = hp_sdc_rtc_ioctl, diff --git a/drivers/isdn/act2000/act2000.h b/drivers/isdn/act2000/act2000.h index b091d1a54125..d4c50512a1ff 100644 --- a/drivers/isdn/act2000/act2000.h +++ b/drivers/isdn/act2000/act2000.h @@ -181,17 +181,17 @@ typedef struct act2000_card { char regname[35]; /* Name used for request_region */ } act2000_card; -extern __inline__ void act2000_schedule_tx(act2000_card *card) +static inline void act2000_schedule_tx(act2000_card *card) { schedule_work(&card->snd_tq); } -extern __inline__ void act2000_schedule_rx(act2000_card *card) +static inline void act2000_schedule_rx(act2000_card *card) { schedule_work(&card->rcv_tq); } -extern __inline__ void act2000_schedule_poll(act2000_card *card) +static inline void act2000_schedule_poll(act2000_card *card) { schedule_work(&card->poll_tq); } diff --git a/drivers/isdn/act2000/capi.h b/drivers/isdn/act2000/capi.h index f6d5f530b86b..49f453c53c64 100644 --- a/drivers/isdn/act2000/capi.h +++ b/drivers/isdn/act2000/capi.h @@ -78,29 +78,29 @@ typedef union actcapi_infoel { /* info element */ typedef struct actcapi_msn { __u8 eaz; __u8 len; /* Length of MSN */ - __u8 msn[15] __attribute__ ((packed)); -} actcapi_msn; + __u8 msn[15]; +} __attribute__((packed)) actcapi_msn; typedef struct actcapi_dlpd { __u8 len; /* Length of structure */ - __u16 dlen __attribute__ ((packed)); /* Data Length */ - __u8 laa __attribute__ ((packed)); /* Link Address A */ + __u16 dlen; /* Data Length */ + __u8 laa; /* Link Address A */ __u8 lab; /* Link Address B */ __u8 modulo; /* Modulo Mode */ __u8 win; /* Window size */ __u8 xid[100]; /* XID Information */ -} actcapi_dlpd; +} __attribute__((packed)) actcapi_dlpd; typedef struct actcapi_ncpd { __u8 len; /* Length of structure */ - __u16 lic __attribute__ ((packed)); - __u16 hic __attribute__ ((packed)); - __u16 ltc __attribute__ ((packed)); - __u16 htc __attribute__ ((packed)); - __u16 loc __attribute__ ((packed)); - __u16 hoc __attribute__ ((packed)); - __u8 modulo __attribute__ ((packed)); -} actcapi_ncpd; + __u16 lic; + __u16 hic; + __u16 ltc; + __u16 htc; + __u16 loc; + __u16 hoc; + __u8 modulo; +} __attribute__((packed)) actcapi_ncpd; #define actcapi_ncpi actcapi_ncpd /* @@ -168,19 +168,19 @@ typedef struct actcapi_msg { __u16 manuf_msg; __u16 controller; actcapi_msn msnmap; - } manufacturer_req_msn; + } __attribute ((packed)) manufacturer_req_msn; /* TODO: TraceInit-req/conf/ind/resp and * TraceDump-req/conf/ind/resp */ struct connect_req { __u8 controller; __u8 bchan; - __u32 infomask __attribute__ ((packed)); + __u32 infomask; __u8 si1; __u8 si2; __u8 eaz; actcapi_addr addr; - } connect_req; + } __attribute__ ((packed)) connect_req; struct connect_conf { __u16 plci; __u16 info; @@ -192,7 +192,7 @@ typedef struct actcapi_msg { __u8 si2; __u8 eaz; actcapi_addr addr; - } connect_ind; + } __attribute__ ((packed)) connect_ind; struct connect_resp { __u16 plci; __u8 rejectcause; @@ -200,14 +200,14 @@ typedef struct actcapi_msg { struct connect_active_ind { __u16 plci; actcapi_addr addr; - } connect_active_ind; + } __attribute__ ((packed)) connect_active_ind; struct connect_active_resp { __u16 plci; } connect_active_resp; struct connect_b3_req { __u16 plci; actcapi_ncpi ncpi; - } connect_b3_req; + } __attribute__ ((packed)) connect_b3_req; struct connect_b3_conf { __u16 plci; __u16 ncci; @@ -217,12 +217,12 @@ typedef struct actcapi_msg { __u16 ncci; __u16 plci; actcapi_ncpi ncpi; - } connect_b3_ind; + } __attribute__ ((packed)) connect_b3_ind; struct connect_b3_resp { __u16 ncci; __u8 rejectcause; - actcapi_ncpi ncpi __attribute__ ((packed)); - } connect_b3_resp; + actcapi_ncpi ncpi; + } __attribute__ ((packed)) connect_b3_resp; struct disconnect_req { __u16 plci; __u8 cause; @@ -241,14 +241,14 @@ typedef struct actcapi_msg { struct connect_b3_active_ind { __u16 ncci; actcapi_ncpi ncpi; - } connect_b3_active_ind; + } __attribute__ ((packed)) connect_b3_active_ind; struct connect_b3_active_resp { __u16 ncci; } connect_b3_active_resp; struct disconnect_b3_req { __u16 ncci; actcapi_ncpi ncpi; - } disconnect_b3_req; + } __attribute__ ((packed)) disconnect_b3_req; struct disconnect_b3_conf { __u16 ncci; __u16 info; @@ -257,7 +257,7 @@ typedef struct actcapi_msg { __u16 ncci; __u16 info; actcapi_ncpi ncpi; - } disconnect_b3_ind; + } __attribute__ ((packed)) disconnect_b3_ind; struct disconnect_b3_resp { __u16 ncci; } disconnect_b3_resp; @@ -265,7 +265,7 @@ typedef struct actcapi_msg { __u16 plci; actcapi_infonr nr; actcapi_infoel el; - } info_ind; + } __attribute__ ((packed)) info_ind; struct info_resp { __u16 plci; } info_resp; @@ -279,8 +279,8 @@ typedef struct actcapi_msg { struct select_b2_protocol_req { __u16 plci; __u8 protocol; - actcapi_dlpd dlpd __attribute__ ((packed)); - } select_b2_protocol_req; + actcapi_dlpd dlpd; + } __attribute__ ((packed)) select_b2_protocol_req; struct select_b2_protocol_conf { __u16 plci; __u16 info; @@ -288,49 +288,49 @@ typedef struct actcapi_msg { struct select_b3_protocol_req { __u16 plci; __u8 protocol; - actcapi_ncpd ncpd __attribute__ ((packed)); - } select_b3_protocol_req; + actcapi_ncpd ncpd; + } __attribute__ ((packed)) select_b3_protocol_req; struct select_b3_protocol_conf { __u16 plci; __u16 info; } select_b3_protocol_conf; struct listen_req { __u8 controller; - __u32 infomask __attribute__ ((packed)); - __u16 eazmask __attribute__ ((packed)); - __u16 simask __attribute__ ((packed)); - } listen_req; + __u32 infomask; + __u16 eazmask; + __u16 simask; + } __attribute__ ((packed)) listen_req; struct listen_conf { __u8 controller; - __u16 info __attribute__ ((packed)); - } listen_conf; + __u16 info; + } __attribute__ ((packed)) listen_conf; struct data_b3_req { __u16 fakencci; __u16 datalen; __u32 unused; __u8 blocknr; - __u16 flags __attribute__ ((packed)); - } data_b3_req; + __u16 flags; + } __attribute ((packed)) data_b3_req; struct data_b3_ind { __u16 fakencci; __u16 datalen; __u32 unused; __u8 blocknr; - __u16 flags __attribute__ ((packed)); - } data_b3_ind; + __u16 flags; + } __attribute__ ((packed)) data_b3_ind; struct data_b3_resp { __u16 ncci; __u8 blocknr; - } data_b3_resp; + } __attribute__ ((packed)) data_b3_resp; struct data_b3_conf { __u16 ncci; __u8 blocknr; - __u16 info __attribute__ ((packed)); - } data_b3_conf; + __u16 info; + } __attribute__ ((packed)) data_b3_conf; } msg; -} actcapi_msg; +} __attribute__ ((packed)) actcapi_msg; -extern __inline__ unsigned short +static inline unsigned short actcapi_nextsmsg(act2000_card *card) { unsigned long flags; diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index 7b564c0dd996..207cae366256 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -17,6 +17,8 @@ #include <linux/ctype.h> #include <linux/sched.h> /* current */ +#include "capifs.h" + MODULE_DESCRIPTION("CAPI4Linux: /dev/capi/ filesystem"); MODULE_AUTHOR("Carsten Paeth"); MODULE_LICENSE("GPL"); diff --git a/drivers/isdn/hardware/eicon/os_4bri.c b/drivers/isdn/hardware/eicon/os_4bri.c index cccfabc1117d..11e6f937c1e4 100644 --- a/drivers/isdn/hardware/eicon/os_4bri.c +++ b/drivers/isdn/hardware/eicon/os_4bri.c @@ -16,6 +16,7 @@ #include "diva_pci.h" #include "mi_pc.h" #include "dsrv4bri.h" +#include "helpers.h" static void *diva_xdiLoadFileFile = NULL; static dword diva_xdiLoadFileLength = 0; @@ -815,7 +816,7 @@ diva_4bri_cmd_card_proc(struct _diva_os_xdi_adapter *a, return (ret); } -void *xdiLoadFile(char *FileName, unsigned long *FileLength, +void *xdiLoadFile(char *FileName, dword *FileLength, unsigned long lim) { void *ret = diva_xdiLoadFileFile; diff --git a/drivers/isdn/hardware/eicon/os_bri.c b/drivers/isdn/hardware/eicon/os_bri.c index 4cc44a5dd1db..f31bba5b16ff 100644 --- a/drivers/isdn/hardware/eicon/os_bri.c +++ b/drivers/isdn/hardware/eicon/os_bri.c @@ -16,6 +16,7 @@ #include "diva_pci.h" #include "mi_pc.h" #include "pc_maint.h" +#include "dsrv_bri.h" /* ** IMPORTS diff --git a/drivers/isdn/hardware/eicon/os_pri.c b/drivers/isdn/hardware/eicon/os_pri.c index 8ac207f75e54..a296a846f296 100644 --- a/drivers/isdn/hardware/eicon/os_pri.c +++ b/drivers/isdn/hardware/eicon/os_pri.c @@ -18,6 +18,7 @@ #include "pc_maint.h" #include "dsp_tst.h" #include "diva_dma.h" +#include "dsrv_pri.h" /* -------------------------------------------------------------------------- OS Dependent part of XDI driver for DIVA PRI Adapter diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig index c82105920d71..0ef560144be3 100644 --- a/drivers/isdn/hisax/Kconfig +++ b/drivers/isdn/hisax/Kconfig @@ -110,7 +110,7 @@ config HISAX_16_3 config HISAX_TELESPCI bool "Teles PCI" - depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) + depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV)) help This enables HiSax support for the Teles PCI. See <file:Documentation/isdn/README.HiSax> on how to configure it. @@ -238,7 +238,7 @@ config HISAX_MIC config HISAX_NETJET bool "NETjet card" - depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) + depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV)) help This enables HiSax support for the NetJet from Traverse Technologies. @@ -249,7 +249,7 @@ config HISAX_NETJET config HISAX_NETJET_U bool "NETspider U card" - depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) + depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV)) help This enables HiSax support for the Netspider U interface ISDN card from Traverse Technologies. @@ -317,7 +317,7 @@ config HISAX_GAZEL config HISAX_HFC_PCI bool "HFC PCI-Bus cards" - depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) + depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV)) help This enables HiSax support for the HFC-S PCI 2BDS0 based cards. @@ -344,7 +344,7 @@ config HISAX_HFC_SX config HISAX_ENTERNOW_PCI bool "Formula-n enter:now PCI card" - depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K)) + depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV)) help This enables HiSax support for the Formula-n enter:now PCI ISDN card. diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h index 26c545fa223b..1b85ce166af8 100644 --- a/drivers/isdn/hisax/hisax.h +++ b/drivers/isdn/hisax/hisax.h @@ -396,17 +396,17 @@ struct isar_hw { struct hdlc_stat_reg { #ifdef __BIG_ENDIAN - u_char fill __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char cmd __attribute__((packed)); + u_char fill; + u_char mode; + u_char xml; + u_char cmd; #else - u_char cmd __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char fill __attribute__((packed)); + u_char cmd; + u_char xml; + u_char mode; + u_char fill; #endif -}; +} __attribute__((packed)); struct hdlc_hw { union { diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.h b/drivers/isdn/hisax/hisax_fcpcipnp.h index bd8a22e4d6a2..21fbcedf3a94 100644 --- a/drivers/isdn/hisax/hisax_fcpcipnp.h +++ b/drivers/isdn/hisax/hisax_fcpcipnp.h @@ -12,17 +12,17 @@ enum { struct hdlc_stat_reg { #ifdef __BIG_ENDIAN - u_char fill __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char cmd __attribute__((packed)); + u_char fill; + u_char mode; + u_char xml; + u_char cmd; #else - u_char cmd __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char fill __attribute__((packed)); + u_char cmd; + u_char xml; + u_char mode; + u_char fill; #endif -}; +} __attribute__((packed)); struct fritz_bcs { struct hisax_b_if b_if; diff --git a/drivers/isdn/sc/command.c b/drivers/isdn/sc/command.c index 19f2fcf0ae4a..b4b24335f716 100644 --- a/drivers/isdn/sc/command.c +++ b/drivers/isdn/sc/command.c @@ -43,7 +43,6 @@ extern int send_and_receive(int, unsigned int, unsigned char, unsigned char, RspMessage *, int); extern int sendmessage(int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int *); -extern inline void pullphone(char *, char *); #ifdef DEBUG /* diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index 2c3158c81ff2..4d811600bdab 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/wait.h> +#include <linux/completion.h> #include <asm/prom.h> #include <asm/machdep.h> #include <asm/io.h> diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c index b558cc209d49..1a00d9c75a23 100644 --- a/drivers/macintosh/windfarm_smu_sensors.c +++ b/drivers/macintosh/windfarm_smu_sensors.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/wait.h> +#include <linux/completion.h> #include <asm/prom.h> #include <asm/machdep.h> #include <asm/io.h> diff --git a/drivers/md/md.c b/drivers/md/md.c index 1b76fb29fb70..e423a16ba3c9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3598,12 +3598,21 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev) return 0; } +static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + mddev_t *mddev = bdev->bd_disk->private_data; + + geo->heads = 2; + geo->sectors = 4; + geo->cylinders = get_capacity(mddev->gendisk) / 8; + return 0; +} + static int md_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { int err = 0; void __user *argp = (void __user *)arg; - struct hd_geometry __user *loc = argp; mddev_t *mddev = NULL; if (!capable(CAP_SYS_ADMIN)) @@ -3765,24 +3774,6 @@ static int md_ioctl(struct inode *inode, struct file *file, * 4 sectors (with a BIG number of cylinders...). This drives * dosfs just mad... ;-) */ - case HDIO_GETGEO: - if (!loc) { - err = -EINVAL; - goto abort_unlock; - } - err = put_user (2, (char __user *) &loc->heads); - if (err) - goto abort_unlock; - err = put_user (4, (char __user *) &loc->sectors); - if (err) - goto abort_unlock; - err = put_user(get_capacity(mddev->gendisk)/8, - (short __user *) &loc->cylinders); - if (err) - goto abort_unlock; - err = put_user (get_start_sect(inode->i_bdev), - (long __user *) &loc->start); - goto done_unlock; } /* @@ -3911,6 +3902,7 @@ static struct block_device_operations md_fops = .open = md_open, .release = md_release, .ioctl = md_ioctl, + .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, }; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index abbca150202b..d03f99cf4b7d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -306,9 +306,6 @@ static int raid0_run (mddev_t *mddev) printk("raid0 : conf->hash_spacing is %llu blocks.\n", (unsigned long long)conf->hash_spacing); { -#if __GNUC__ < 3 - volatile -#endif sector_t s = mddev->array_size; sector_t space = conf->hash_spacing; int round; @@ -439,9 +436,6 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio) { -#if __GNUC__ < 3 - volatile -#endif sector_t x = block >> conf->preshift; sector_div(x, (u32)conf->hash_spacing); zone = conf->hash_table[x]; diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c index 597b8db35a13..62a7d636ef11 100644 --- a/drivers/media/video/v4l2-common.c +++ b/drivers/media/video/v4l2-common.c @@ -191,9 +191,7 @@ char *v4l2_type_names[] = { }; char *v4l2_ioctl_names[256] = { -#if __GNUC__ >= 3 [0 ... 255] = "UNKNOWN", -#endif [_IOC_NR(VIDIOC_QUERYCAP)] = "VIDIOC_QUERYCAP", [_IOC_NR(VIDIOC_RESERVED)] = "VIDIOC_RESERVED", [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT", diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 5b1febed3133..b09fb6307153 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -662,6 +662,13 @@ static int i2o_block_release(struct inode *inode, struct file *file) return 0; } +static int i2o_block_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + i2o_block_biosparam(get_capacity(bdev->bd_disk), + &geo->cylinders, &geo->heads, &geo->sectors); + return 0; +} + /** * i2o_block_ioctl - Issue device specific ioctl calls. * @cmd: ioctl command @@ -676,7 +683,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file, { struct gendisk *disk = inode->i_bdev->bd_disk; struct i2o_block_device *dev = disk->private_data; - void __user *argp = (void __user *)arg; /* Anyone capable of this syscall can do *real bad* things */ @@ -684,15 +690,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file, return -EPERM; switch (cmd) { - case HDIO_GETGEO: - { - struct hd_geometry g; - i2o_block_biosparam(get_capacity(disk), - &g.cylinders, &g.heads, &g.sectors); - g.start = get_start_sect(inode->i_bdev); - return copy_to_user(argp, &g, sizeof(g)) ? -EFAULT : 0; - } - case BLKI2OGRSTRAT: return put_user(dev->rcache, (int __user *)arg); case BLKI2OGWSTRAT: @@ -962,6 +959,7 @@ static struct block_device_operations i2o_block_fops = { .open = i2o_block_open, .release = i2o_block_release, .ioctl = i2o_block_ioctl, + .getgeo = i2o_block_getgeo, .media_changed = i2o_block_media_changed }; diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index e335d54c4659..b42e0fbab59b 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -27,7 +27,6 @@ #include <asm/dma.h> #include <asm/hardware.h> -#include <asm/irq.h> #include "ucb1x00.h" @@ -507,14 +506,14 @@ static int ucb1x00_probe(struct mcp *mcp) goto err_free; } - ret = request_irq(ucb->irq, ucb1x00_irq, 0, "UCB1x00", ucb); + ret = request_irq(ucb->irq, ucb1x00_irq, SA_TRIGGER_RISING, + "UCB1x00", ucb); if (ret) { printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n", ucb->irq, ret); goto err_free; } - set_irq_type(ucb->irq, IRQT_RISING); mcp_set_drvdata(mcp, ucb); ret = class_device_register(&ucb->cdev); diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 551061c2eadf..79fd062ccb34 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -32,7 +32,6 @@ #include <linux/suspend.h> #include <linux/slab.h> #include <linux/kthread.h> -#include <linux/delay.h> #include <asm/dma.h> #include <asm/semaphore.h> diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index 198561d21710..d5f28981596b 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -113,31 +113,18 @@ static int mmc_blk_release(struct inode *inode, struct file *filp) } static int -mmc_blk_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) +mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct block_device *bdev = inode->i_bdev; - - if (cmd == HDIO_GETGEO) { - struct hd_geometry geo; - - memset(&geo, 0, sizeof(struct hd_geometry)); - - geo.cylinders = get_capacity(bdev->bd_disk) / (4 * 16); - geo.heads = 4; - geo.sectors = 16; - geo.start = get_start_sect(bdev); - - return copy_to_user((void __user *)arg, &geo, sizeof(geo)) - ? -EFAULT : 0; - } - - return -ENOTTY; + geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16); + geo->heads = 4; + geo->sectors = 16; + return 0; } static struct block_device_operations mmc_bdops = { .open = mmc_blk_open, .release = mmc_blk_release, - .ioctl = mmc_blk_ioctl, + .getgeo = mmc_blk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 339cb1218eaa..7f3ff500b68e 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -194,6 +194,14 @@ static int blktrans_release(struct inode *i, struct file *f) return ret; } +static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data; + + if (dev->tr->getgeo) + return dev->tr->getgeo(dev, geo); + return -ENOTTY; +} static int blktrans_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) @@ -207,22 +215,6 @@ static int blktrans_ioctl(struct inode *inode, struct file *file, return tr->flush(dev); /* The core code did the work, we had nothing to do. */ return 0; - - case HDIO_GETGEO: - if (tr->getgeo) { - struct hd_geometry g; - int ret; - - memset(&g, 0, sizeof(g)); - ret = tr->getgeo(dev, &g); - if (ret) - return ret; - - g.start = get_start_sect(inode->i_bdev); - if (copy_to_user((void __user *)arg, &g, sizeof(g))) - return -EFAULT; - return 0; - } /* else */ default: return -ENOTTY; } @@ -233,6 +225,7 @@ struct block_device_operations mtd_blktrans_ops = { .open = blktrans_open, .release = blktrans_release, .ioctl = blktrans_ioctl, + .getgeo = blktrans_getgeo, }; int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c index 45c077d0f063..af06a80f44de 100644 --- a/drivers/mtd/onenand/generic.c +++ b/drivers/mtd/onenand/generic.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/platform_device.h> #include <linux/mtd/mtd.h> #include <linux/mtd/onenand.h> diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c index 20ce212638fc..a3e00a4635a5 100644 --- a/drivers/mtd/rfd_ftl.c +++ b/drivers/mtd/rfd_ftl.c @@ -18,6 +18,7 @@ #include <linux/mtd/blktrans.h> #include <linux/mtd/mtd.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <linux/jiffies.h> #include <asm/types.h> diff --git a/drivers/net/3c527.h b/drivers/net/3c527.h index c10f009ce9b6..53b5b071df08 100644 --- a/drivers/net/3c527.h +++ b/drivers/net/3c527.h @@ -32,43 +32,43 @@ struct mc32_mailbox { - u16 mbox __attribute((packed)); - u16 data[1] __attribute((packed)); -}; + u16 mbox; + u16 data[1]; +} __attribute((packed)); struct skb_header { - u8 status __attribute((packed)); - u8 control __attribute((packed)); - u16 next __attribute((packed)); /* Do not change! */ - u16 length __attribute((packed)); - u32 data __attribute((packed)); -}; + u8 status; + u8 control; + u16 next; /* Do not change! */ + u16 length; + u32 data; +} __attribute((packed)); struct mc32_stats { /* RX Errors */ - u32 rx_crc_errors __attribute((packed)); - u32 rx_alignment_errors __attribute((packed)); - u32 rx_overrun_errors __attribute((packed)); - u32 rx_tooshort_errors __attribute((packed)); - u32 rx_toolong_errors __attribute((packed)); - u32 rx_outofresource_errors __attribute((packed)); + u32 rx_crc_errors; + u32 rx_alignment_errors; + u32 rx_overrun_errors; + u32 rx_tooshort_errors; + u32 rx_toolong_errors; + u32 rx_outofresource_errors; - u32 rx_discarded __attribute((packed)); /* via card pattern match filter */ + u32 rx_discarded; /* via card pattern match filter */ /* TX Errors */ - u32 tx_max_collisions __attribute((packed)); - u32 tx_carrier_errors __attribute((packed)); - u32 tx_underrun_errors __attribute((packed)); - u32 tx_cts_errors __attribute((packed)); - u32 tx_timeout_errors __attribute((packed)) ; + u32 tx_max_collisions; + u32 tx_carrier_errors; + u32 tx_underrun_errors; + u32 tx_cts_errors; + u32 tx_timeout_errors; /* various cruft */ - u32 dataA[6] __attribute((packed)); - u16 dataB[5] __attribute((packed)); - u32 dataC[14] __attribute((packed)); -}; + u32 dataA[6]; + u16 dataB[5]; + u32 dataC[14]; +} __attribute((packed)); #define STATUS_MASK 0x0F #define COMPLETED (1<<7) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index e2fa29b612cd..1960961bf28e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1374,7 +1374,7 @@ config FORCEDETH config CS89x0 tristate "CS89x0 support" - depends on (NET_PCI && (ISA || ARCH_IXDP2X01)) || ARCH_PNX0105 + depends on NET_PCI && (ISA || ARCH_IXDP2X01 || ARCH_PNX010X) ---help--- Support for CS89x0 chipset based Ethernet cards. If you have a network (Ethernet) card of this type, say Y and read the diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c index a6078ad9b654..907c01009746 100644 --- a/drivers/net/cs89x0.c +++ b/drivers/net/cs89x0.c @@ -175,7 +175,7 @@ static unsigned int cs8900_irq_map[] = {1,0,0,0}; #include <asm/irq.h> static unsigned int netcard_portlist[] __initdata = {IXDP2X01_CS8900_VIRT_BASE, 0}; static unsigned int cs8900_irq_map[] = {IRQ_IXDP2X01_CS8900, 0, 0, 0}; -#elif defined(CONFIG_ARCH_PNX0105) +#elif defined(CONFIG_ARCH_PNX010X) #include <asm/irq.h> #include <asm/arch/gpio.h> #define CIRRUS_DEFAULT_BASE IO_ADDRESS(EXT_STATIC2_s0_BASE + 0x200000) /* = Physical address 0x48200000 */ @@ -338,30 +338,86 @@ out: } #endif +#if defined(CONFIG_ARCH_IXDP2X01) static int -readreg(struct net_device *dev, int portno) +readword(unsigned long base_addr, int portno) { - outw(portno, dev->base_addr + ADD_PORT); - return inw(dev->base_addr + DATA_PORT); + return (u16)__raw_readl(base_addr + (portno << 1)); } static void -writereg(struct net_device *dev, int portno, int value) +writeword(unsigned long base_addr, int portno, int value) { - outw(portno, dev->base_addr + ADD_PORT); - outw(value, dev->base_addr + DATA_PORT); + __raw_writel((u16)value, base_addr + (portno << 1)); +} +#else +#if defined(CONFIG_ARCH_PNX010X) +static int +readword(unsigned long base_addr, int portno) +{ + return inw(base_addr + (portno << 1)); +} + +static void +writeword(unsigned long base_addr, int portno, int value) +{ + outw(value, base_addr + (portno << 1)); +} +#else +static int +readword(unsigned long base_addr, int portno) +{ + return inw(base_addr + portno); +} + +static void +writeword(unsigned long base_addr, int portno, int value) +{ + outw(value, base_addr + portno); +} +#endif +#endif + +static void +readwords(unsigned long base_addr, int portno, void *buf, int length) +{ + u8 *buf8 = (u8 *)buf; + + do { + u32 tmp32; + + tmp32 = readword(base_addr, portno); + *buf8++ = (u8)tmp32; + *buf8++ = (u8)(tmp32 >> 8); + } while (--length); +} + +static void +writewords(unsigned long base_addr, int portno, void *buf, int length) +{ + u8 *buf8 = (u8 *)buf; + + do { + u32 tmp32; + + tmp32 = *buf8++; + tmp32 |= (*buf8++) << 8; + writeword(base_addr, portno, tmp32); + } while (--length); } static int -readword(struct net_device *dev, int portno) +readreg(struct net_device *dev, int regno) { - return inw(dev->base_addr + portno); + writeword(dev->base_addr, ADD_PORT, regno); + return readword(dev->base_addr, DATA_PORT); } static void -writeword(struct net_device *dev, int portno, int value) +writereg(struct net_device *dev, int regno, int value) { - outw(value, dev->base_addr + portno); + writeword(dev->base_addr, ADD_PORT, regno); + writeword(dev->base_addr, DATA_PORT, value); } static int __init @@ -456,7 +512,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular) #endif } -#ifdef CONFIG_ARCH_PNX0105 +#ifdef CONFIG_ARCH_PNX010X initialize_ebi(); /* Map GPIO registers for the pins connected to the CS8900a. */ @@ -491,8 +547,8 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular) #ifdef CONFIG_SH_HICOSH4 /* truely reset the chip */ - outw(0x0114, ioaddr + ADD_PORT); - outw(0x0040, ioaddr + DATA_PORT); + writeword(ioaddr, ADD_PORT, 0x0114); + writeword(ioaddr, DATA_PORT, 0x0040); #endif /* if they give us an odd I/O address, then do ONE write to @@ -503,24 +559,24 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular) if (net_debug > 1) printk(KERN_INFO "%s: odd ioaddr 0x%x\n", dev->name, ioaddr); if ((ioaddr & 2) != 2) - if ((inw((ioaddr & ~3)+ ADD_PORT) & ADD_MASK) != ADD_SIG) { + if ((readword(ioaddr & ~3, ADD_PORT) & ADD_MASK) != ADD_SIG) { printk(KERN_ERR "%s: bad signature 0x%x\n", - dev->name, inw((ioaddr & ~3)+ ADD_PORT)); + dev->name, readword(ioaddr & ~3, ADD_PORT)); retval = -ENODEV; goto out2; } } - printk(KERN_DEBUG "PP_addr at %x: 0x%x\n", - ioaddr + ADD_PORT, inw(ioaddr + ADD_PORT)); + printk(KERN_DEBUG "PP_addr at %x[%x]: 0x%x\n", + ioaddr, ADD_PORT, readword(ioaddr, ADD_PORT)); ioaddr &= ~3; - outw(PP_ChipID, ioaddr + ADD_PORT); + writeword(ioaddr, ADD_PORT, PP_ChipID); - tmp = inw(ioaddr + DATA_PORT); + tmp = readword(ioaddr, DATA_PORT); if (tmp != CHIP_EISA_ID_SIG) { - printk(KERN_DEBUG "%s: incorrect signature at %x: 0x%x!=" + printk(KERN_DEBUG "%s: incorrect signature at %x[%x]: 0x%x!=" CHIP_EISA_ID_SIG_STR "\n", - dev->name, ioaddr + DATA_PORT, tmp); + dev->name, ioaddr, DATA_PORT, tmp); retval = -ENODEV; goto out2; } @@ -724,7 +780,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular) } else { i = lp->isa_config & INT_NO_MASK; if (lp->chip_type == CS8900) { -#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX0105) +#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX010X) i = cs8900_irq_map[0]; #else /* Translate the IRQ using the IRQ mapping table. */ @@ -790,7 +846,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular) goto out3; return 0; out3: - outw(PP_ChipID, dev->base_addr + ADD_PORT); + writeword(dev->base_addr, ADD_PORT, PP_ChipID); out2: release_region(ioaddr & ~3, NETCARD_IO_EXTENT); out1: @@ -970,11 +1026,11 @@ void __init reset_chip(struct net_device *dev) #ifndef CONFIG_ARCH_IXDP2X01 if (lp->chip_type != CS8900) { /* Hardware problem requires PNP registers to be reconfigured after a reset */ - outw(PP_CS8920_ISAINT, ioaddr + ADD_PORT); + writeword(ioaddr, ADD_PORT, PP_CS8920_ISAINT); outb(dev->irq, ioaddr + DATA_PORT); outb(0, ioaddr + DATA_PORT + 1); - outw(PP_CS8920_ISAMemB, ioaddr + ADD_PORT); + writeword(ioaddr, ADD_PORT, PP_CS8920_ISAMemB); outb((dev->mem_start >> 16) & 0xff, ioaddr + DATA_PORT); outb((dev->mem_start >> 8) & 0xff, ioaddr + DATA_PORT + 1); } @@ -1104,8 +1160,8 @@ send_test_pkt(struct net_device *dev) memcpy(test_packet, dev->dev_addr, ETH_ALEN); memcpy(test_packet+ETH_ALEN, dev->dev_addr, ETH_ALEN); - writeword(dev, TX_CMD_PORT, TX_AFTER_ALL); - writeword(dev, TX_LEN_PORT, ETH_ZLEN); + writeword(dev->base_addr, TX_CMD_PORT, TX_AFTER_ALL); + writeword(dev->base_addr, TX_LEN_PORT, ETH_ZLEN); /* Test to see if the chip has allocated memory for the packet */ while (jiffies - timenow < 5) @@ -1115,7 +1171,7 @@ send_test_pkt(struct net_device *dev) return 0; /* this shouldn't happen */ /* Write the contents of the packet */ - outsw(dev->base_addr + TX_FRAME_PORT,test_packet,(ETH_ZLEN+1) >>1); + writewords(dev->base_addr, TX_FRAME_PORT,test_packet,(ETH_ZLEN+1) >>1); if (net_debug > 1) printk("Sending test packet "); /* wait a couple of jiffies for packet to be received */ @@ -1200,7 +1256,7 @@ net_open(struct net_device *dev) int i; int ret; -#if !defined(CONFIG_SH_HICOSH4) && !defined(CONFIG_ARCH_PNX0105) /* uses irq#1, so this won't work */ +#if !defined(CONFIG_SH_HICOSH4) && !defined(CONFIG_ARCH_PNX010X) /* uses irq#1, so this won't work */ if (dev->irq < 2) { /* Allow interrupts to be generated by the chip */ /* Cirrus' release had this: */ @@ -1231,7 +1287,7 @@ net_open(struct net_device *dev) else #endif { -#if !defined(CONFIG_ARCH_IXDP2X01) && !defined(CONFIG_ARCH_PNX0105) +#if !defined(CONFIG_ARCH_IXDP2X01) && !defined(CONFIG_ARCH_PNX010X) if (((1 << dev->irq) & lp->irq_map) == 0) { printk(KERN_ERR "%s: IRQ %d is not in our map of allowable IRQs, which is %x\n", dev->name, dev->irq, lp->irq_map); @@ -1316,7 +1372,7 @@ net_open(struct net_device *dev) case A_CNF_MEDIA_10B_2: result = lp->adapter_cnf & A_CNF_10B_2; break; default: result = lp->adapter_cnf & (A_CNF_10B_T | A_CNF_AUI | A_CNF_10B_2); } -#ifdef CONFIG_ARCH_PNX0105 +#ifdef CONFIG_ARCH_PNX010X result = A_CNF_10B_T; #endif if (!result) { @@ -1457,8 +1513,8 @@ static int net_send_packet(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); /* initiate a transmit sequence */ - writeword(dev, TX_CMD_PORT, lp->send_cmd); - writeword(dev, TX_LEN_PORT, skb->len); + writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd); + writeword(dev->base_addr, TX_LEN_PORT, skb->len); /* Test to see if the chip has allocated memory for the packet */ if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) { @@ -1472,7 +1528,7 @@ static int net_send_packet(struct sk_buff *skb, struct net_device *dev) return 1; } /* Write the contents of the packet */ - outsw(dev->base_addr + TX_FRAME_PORT,skb->data,(skb->len+1) >>1); + writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1); spin_unlock_irq(&lp->lock); lp->stats.tx_bytes += skb->len; dev->trans_start = jiffies; @@ -1512,7 +1568,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id, struct pt_regs * regs) course, if you're on a slow machine, and packets are arriving faster than you can read them off, you're screwed. Hasta la vista, baby! */ - while ((status = readword(dev, ISQ_PORT))) { + while ((status = readword(dev->base_addr, ISQ_PORT))) { if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status); handled = 1; switch(status & ISQ_EVENT_MASK) { @@ -1606,8 +1662,8 @@ net_rx(struct net_device *dev) int status, length; int ioaddr = dev->base_addr; - status = inw(ioaddr + RX_FRAME_PORT); - length = inw(ioaddr + RX_FRAME_PORT); + status = readword(ioaddr, RX_FRAME_PORT); + length = readword(ioaddr, RX_FRAME_PORT); if ((status & RX_OK) == 0) { count_rx_errors(status, lp); @@ -1626,9 +1682,9 @@ net_rx(struct net_device *dev) skb_reserve(skb, 2); /* longword align L3 header */ skb->dev = dev; - insw(ioaddr + RX_FRAME_PORT, skb_put(skb, length), length >> 1); + readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1); if (length & 1) - skb->data[length-1] = inw(ioaddr + RX_FRAME_PORT); + skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT); if (net_debug > 3) { printk( "%s: received %d byte packet of type %x\n", @@ -1901,7 +1957,7 @@ void cleanup_module(void) { unregister_netdev(dev_cs89x0); - outw(PP_ChipID, dev_cs89x0->base_addr + ADD_PORT); + writeword(dev_cs89x0->base_addr, ADD_PORT, PP_ChipID); release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT); free_netdev(dev_cs89x0); } diff --git a/drivers/net/cs89x0.h b/drivers/net/cs89x0.h index decea264f121..bd954aaa636f 100644 --- a/drivers/net/cs89x0.h +++ b/drivers/net/cs89x0.h @@ -16,13 +16,6 @@ #include <linux/config.h> -#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX0105) -/* IXDP2401/IXDP2801 uses dword-aligned register addressing */ -#define CS89x0_PORT(reg) ((reg) * 2) -#else -#define CS89x0_PORT(reg) (reg) -#endif - #define PP_ChipID 0x0000 /* offset 0h -> Corp -ID */ /* offset 2h -> Model/Product Number */ /* offset 3h -> Chip Revision Number */ @@ -332,16 +325,16 @@ #define RAM_SIZE 0x1000 /* The card has 4k bytes or RAM */ #define PKT_START PP_TxFrame /* Start of packet RAM */ -#define RX_FRAME_PORT CS89x0_PORT(0x0000) +#define RX_FRAME_PORT 0x0000 #define TX_FRAME_PORT RX_FRAME_PORT -#define TX_CMD_PORT CS89x0_PORT(0x0004) +#define TX_CMD_PORT 0x0004 #define TX_NOW 0x0000 /* Tx packet after 5 bytes copied */ #define TX_AFTER_381 0x0040 /* Tx packet after 381 bytes copied */ #define TX_AFTER_ALL 0x00c0 /* Tx packet after all bytes copied */ -#define TX_LEN_PORT CS89x0_PORT(0x0006) -#define ISQ_PORT CS89x0_PORT(0x0008) -#define ADD_PORT CS89x0_PORT(0x000A) -#define DATA_PORT CS89x0_PORT(0x000C) +#define TX_LEN_PORT 0x0006 +#define ISQ_PORT 0x0008 +#define ADD_PORT 0x000A +#define DATA_PORT 0x000C #define EEPROM_WRITE_EN 0x00F0 #define EEPROM_WRITE_DIS 0x0000 diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 3e9accf137e7..41b3d83c2ab8 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -524,6 +524,7 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) ax->dev->trans_start = jiffies; ax->xleft = count - actual; ax->xhead = ax->xbuff + actual; + spin_unlock_bh(&ax->buflock); } /* Encapsulate an AX.25 packet and kick it into a TTY queue. */ diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h index 741aecc655df..a82a4ba8de4f 100644 --- a/drivers/net/irda/vlsi_ir.h +++ b/drivers/net/irda/vlsi_ir.h @@ -577,8 +577,8 @@ struct ring_descr_hw { struct { u8 addr_res[3]; volatile u8 status; /* descriptor status */ - } rd_s __attribute__((packed)); - } rd_u __attribute((packed)); + } __attribute__((packed)) rd_s; + } __attribute((packed)) rd_u; } __attribute__ ((packed)); #define rd_addr rd_u.addr diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 28bf2e69eb5e..7ec08127c9d6 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c @@ -88,7 +88,6 @@ static const char version[] = #include <linux/skbuff.h> #include <asm/io.h> -#include <asm/irq.h> #include "smc91x.h" @@ -2007,12 +2006,10 @@ static int __init smc_probe(struct net_device *dev, void __iomem *ioaddr) } /* Grab the IRQ */ - retval = request_irq(dev->irq, &smc_interrupt, 0, dev->name, dev); + retval = request_irq(dev->irq, &smc_interrupt, SMC_IRQ_FLAGS, dev->name, dev); if (retval) goto err_out; - set_irq_type(dev->irq, SMC_IRQ_TRIGGER_TYPE); - #ifdef SMC_USE_PXA_DMA { int dma = pxa_request_dma(dev->name, DMA_PRIO_LOW, diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h index 5c2824be4ee6..e0efd1964e72 100644 --- a/drivers/net/smc91x.h +++ b/drivers/net/smc91x.h @@ -90,7 +90,7 @@ __l--; \ } \ } while (0) -#define set_irq_type(irq, type) +#define SMC_IRQ_FLAGS (0) #elif defined(CONFIG_SA1100_PLEB) /* We can only do 16-bit reads and writes in the static memory space. */ @@ -109,7 +109,7 @@ #define SMC_outw(v, a, r) writew(v, (a) + (r)) #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) -#define set_irq_type(irq, type) do {} while (0) +#define SMC_IRQ_FLAGS (0) #elif defined(CONFIG_SA1100_ASSABET) @@ -185,11 +185,11 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #include <asm/mach-types.h> #include <asm/arch/cpu.h> -#define SMC_IRQ_TRIGGER_TYPE (( \ +#define SMC_IRQ_FLAGS (( \ machine_is_omap_h2() \ || machine_is_omap_h3() \ || (machine_is_omap_innovator() && !cpu_is_omap1510()) \ - ) ? IRQT_FALLING : IRQT_RISING) + ) ? SA_TRIGGER_FALLING : SA_TRIGGER_RISING) #elif defined(CONFIG_SH_SH4202_MICRODEV) @@ -209,7 +209,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_insw(a, r, p, l) insw((a) + (r) - 0xa0000000, p, l) #define SMC_outsw(a, r, p, l) outsw((a) + (r) - 0xa0000000, p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_IRQ_FLAGS (0) #elif defined(CONFIG_ISA) @@ -237,7 +237,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_insw(a, r, p, l) insw(((u32)a) + (r), p, l) #define SMC_outsw(a, r, p, l) outsw(((u32)a) + (r), p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_IRQ_FLAGS (0) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 @@ -319,7 +319,7 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) au_writew(*_p++ , _a); \ } while(0) -#define set_irq_type(irq, type) do {} while (0) +#define SMC_IRQ_FLAGS (0) #else @@ -342,8 +342,8 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) #endif -#ifndef SMC_IRQ_TRIGGER_TYPE -#define SMC_IRQ_TRIGGER_TYPE IRQT_RISING +#ifndef SMC_IRQ_FLAGS +#define SMC_IRQ_FLAGS SA_TRIGGER_RISING #endif #ifdef SMC_USE_PXA_DMA diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 036adc4f8ba7..22e794071cf4 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -329,9 +329,9 @@ static int sdla_cpuspeed(struct net_device *dev, struct ifreq *ifr) struct _dlci_stat { - short dlci __attribute__((packed)); - char flags __attribute__((packed)); -}; + short dlci; + char flags; +} __attribute__((packed)); struct _frad_stat { diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 531b07313141..b2e8e49c8659 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -43,13 +43,16 @@ static void process_task_mortuary(void); * list for processing. Only after two full buffer syncs * does the task eventually get freed, because by then * we are sure we will not reference it again. + * Can be invoked from softirq via RCU callback due to + * call_rcu() of the task struct, hence the _irqsave. */ static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) { + unsigned long flags; struct task_struct * task = data; - spin_lock(&task_mortuary); + spin_lock_irqsave(&task_mortuary, flags); list_add(&task->tasks, &dying_tasks); - spin_unlock(&task_mortuary); + spin_unlock_irqrestore(&task_mortuary, flags); return NOTIFY_OK; } @@ -431,25 +434,22 @@ static void increment_tail(struct oprofile_cpu_buffer * b) */ static void process_task_mortuary(void) { - struct list_head * pos; - struct list_head * pos2; + unsigned long flags; + LIST_HEAD(local_dead_tasks); struct task_struct * task; + struct task_struct * ttask; - spin_lock(&task_mortuary); + spin_lock_irqsave(&task_mortuary, flags); - list_for_each_safe(pos, pos2, &dead_tasks) { - task = list_entry(pos, struct task_struct, tasks); - list_del(&task->tasks); - free_task(task); - } + list_splice_init(&dead_tasks, &local_dead_tasks); + list_splice_init(&dying_tasks, &dead_tasks); - list_for_each_safe(pos, pos2, &dying_tasks) { - task = list_entry(pos, struct task_struct, tasks); + spin_unlock_irqrestore(&task_mortuary, flags); + + list_for_each_entry_safe(task, ttask, &local_dead_tasks, tasks) { list_del(&task->tasks); - list_add_tail(&task->tasks, &dead_tasks); + free_task(task); } - - spin_unlock(&task_mortuary); } diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 026f671ea558..78193e4bbdb5 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -52,7 +52,8 @@ int alloc_cpu_buffers(void) for_each_online_cpu(i) { struct oprofile_cpu_buffer * b = &cpu_buffer[i]; - b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size); + b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size, + cpu_to_node(i)); if (!b->buffer) goto fail; diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig index b8241561da45..a665951b1586 100644 --- a/drivers/parport/Kconfig +++ b/drivers/parport/Kconfig @@ -34,7 +34,7 @@ config PARPORT config PARPORT_PC tristate "PC-style hardware" - depends on PARPORT && (!SPARC64 || PCI) && !SPARC32 && !M32R + depends on PARPORT && (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV ---help--- You should say Y here if you have a PC-style parallel port. All IBM PC compatible computers and some Alphas have PC-style diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 18e85ccdae67..9302b8fd7461 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2371,8 +2371,10 @@ void parport_pc_unregister_port (struct parport *p) spin_lock(&ports_lock); list_del_init(&priv->list); spin_unlock(&ports_lock); +#if defined(CONFIG_PARPORT_PC_FIFO) && defined(HAS_DMA) if (p->dma != PARPORT_DMA_NONE) free_dma(p->dma); +#endif if (p->irq != PARPORT_IRQ_NONE) free_irq(p->irq, p); release_region(p->base, 3); @@ -2380,14 +2382,12 @@ void parport_pc_unregister_port (struct parport *p) release_region(p->base + 3, p->size - 3); if (p->modes & PARPORT_MODE_ECP) release_region(p->base_hi, 3); -#ifdef CONFIG_PARPORT_PC_FIFO -#ifdef HAS_DMA +#if defined(CONFIG_PARPORT_PC_FIFO) && defined(HAS_DMA) if (priv->dma_buf) pci_free_consistent(priv->dev, PAGE_SIZE, priv->dma_buf, priv->dma_handle); #endif -#endif kfree (p->private_data); parport_put_port(p); kfree (ops); /* hope no-one cached it */ diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 6a61b9f286e1..0aac6a61337d 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -32,6 +32,7 @@ #include <linux/types.h> #include <linux/pci.h> #include <linux/delay.h> +#include <linux/sched.h> /* signal_pending() */ #include <linux/pcieport_if.h> #include "pci_hotplug.h" diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 0b8b26beb163..ac1e495c314e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -30,6 +30,9 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/signal.h> +#include <linux/jiffies.h> +#include <linux/timer.h> #include <linux/pci.h> #include <linux/interrupt.h> diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 4f7ed4bd3be9..94e30fe4b8f3 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -24,6 +24,8 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/timer.h> +#include <linux/jiffies.h> +#include <linux/slab.h> #include "rio.h" diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 30a11436e241..bef9316e95df 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -15,6 +15,7 @@ #include <linux/rio.h> #include <linux/rio_drv.h> #include <linux/stat.h> +#include <linux/sched.h> /* for capable() */ #include "rio.h" diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 3ca1011ceaac..5e382470faa2 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -23,6 +23,7 @@ #include <linux/rio_regs.h> #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/slab.h> #include "rio.h" diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f779f674dfa0..2472fa1a1be1 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -18,6 +18,7 @@ #include <linux/major.h> #include <linux/slab.h> #include <linux/buffer_head.h> +#include <linux/hdreg.h> #include <asm/ccwdev.h> #include <asm/ebcdic.h> @@ -1723,12 +1724,34 @@ dasd_release(struct inode *inp, struct file *filp) return 0; } +/* + * Return disk geometry. + */ +static int +dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct dasd_device *device; + + device = bdev->bd_disk->private_data; + if (!device) + return -ENODEV; + + if (!device->discipline || + !device->discipline->fill_geometry) + return -EINVAL; + + device->discipline->fill_geometry(device, geo); + geo->start = get_start_sect(bdev) >> device->s2b_shift; + return 0; +} + struct block_device_operations dasd_device_operations = { .owner = THIS_MODULE, .open = dasd_open, .release = dasd_release, .ioctl = dasd_ioctl, + .getgeo = dasd_getgeo, }; diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 044b75371990..8e4dcd58599e 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -486,33 +486,6 @@ dasd_ioctl_set_ro(struct block_device *bdev, int no, long args) } /* - * Return disk geometry. - */ -static int -dasd_ioctl_getgeo(struct block_device *bdev, int no, long args) -{ - struct hd_geometry geo = { 0, }; - struct dasd_device *device; - - device = bdev->bd_disk->private_data; - if (device == NULL) - return -ENODEV; - - if (device == NULL || device->discipline == NULL || - device->discipline->fill_geometry == NULL) - return -EINVAL; - - geo = (struct hd_geometry) {}; - device->discipline->fill_geometry(device, &geo); - geo.start = get_start_sect(bdev) >> device->s2b_shift; - if (copy_to_user((struct hd_geometry __user *) args, &geo, - sizeof (struct hd_geometry))) - return -EFAULT; - - return 0; -} - -/* * List of static ioctls. */ static struct { int no; dasd_ioctl_fn_t fn; } dasd_ioctls[] = @@ -528,7 +501,6 @@ static struct { int no; dasd_ioctl_fn_t fn; } dasd_ioctls[] = { BIODASDPRRST, dasd_ioctl_reset_profile }, { BLKROSET, dasd_ioctl_set_ro }, { DASDAPIVER, dasd_ioctl_api_version }, - { HDIO_GETGEO, dasd_ioctl_getgeo }, { -1, NULL } }; diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index bf3a67c3cc5e..54ecd548c318 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -328,31 +328,27 @@ fail: return 0; } -static int xpram_ioctl (struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct hd_geometry __user *geo; unsigned long size; - if (cmd != HDIO_GETGEO) - return -EINVAL; + /* * get geometry: we have to fake one... trim the size to a * multiple of 64 (32k): tell we have 16 sectors, 4 heads, * whatever cylinders. Tell also that data starts at sector. 4. */ - geo = (struct hd_geometry __user *) arg; size = (xpram_pages * 8) & ~0x3f; - put_user(size >> 6, &geo->cylinders); - put_user(4, &geo->heads); - put_user(16, &geo->sectors); - put_user(4, &geo->start); + geo->cylinders = size >> 6; + geo->heads = 4; + geo->sectors = 16; + geo->start = 4; return 0; } static struct block_device_operations xpram_devops = { .owner = THIS_MODULE, - .ioctl = xpram_ioctl, + .getgeo = xpram_getgeo, }; /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 32d4d8d7b9f3..4c5127ed379c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -527,7 +527,7 @@ static int sd_release(struct inode *inode, struct file *filp) return 0; } -static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *loc) +static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdp = sdkp->device; @@ -545,15 +545,9 @@ static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user * else scsicam_bios_param(bdev, sdkp->capacity, diskinfo); - if (put_user(diskinfo[0], &loc->heads)) - return -EFAULT; - if (put_user(diskinfo[1], &loc->sectors)) - return -EFAULT; - if (put_user(diskinfo[2], &loc->cylinders)) - return -EFAULT; - if (put_user((unsigned)get_start_sect(bdev), - (unsigned long __user *)&loc->start)) - return -EFAULT; + geo->heads = diskinfo[0]; + geo->sectors = diskinfo[1]; + geo->cylinders = diskinfo[2]; return 0; } @@ -593,12 +587,6 @@ static int sd_ioctl(struct inode * inode, struct file * filp, if (!scsi_block_when_processing_errors(sdp) || !error) return error; - if (cmd == HDIO_GETGEO) { - if (!arg) - return -EINVAL; - return sd_hdio_getgeo(bdev, p); - } - /* * Send SCSI addressing ioctls directly to mid level, send other * ioctls to block level and then onto mid level if they can't be @@ -800,6 +788,7 @@ static struct block_device_operations sd_fops = { .open = sd_open, .release = sd_release, .ioctl = sd_ioctl, + .getgeo = sd_getgeo, #ifdef CONFIG_COMPAT .compat_ioctl = sd_compat_ioctl, #endif diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index c44bbedec817..4ddc453023a2 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -186,7 +186,7 @@ static void update_bus(struct dentry *bus) down(&bus->d_inode->i_sem); - list_for_each_entry(dev, &bus->d_subdirs, d_child) + list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child) if (dev->d_inode) update_dev(dev); @@ -203,7 +203,7 @@ static void update_sb(struct super_block *sb) down(&root->d_inode->i_sem); - list_for_each_entry(bus, &root->d_subdirs, d_child) { + list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { if (bus->d_inode) { switch (S_IFMT & bus->d_inode->i_mode) { case S_IFDIR: @@ -319,7 +319,7 @@ static int usbfs_empty (struct dentry *dentry) spin_lock(&dcache_lock); list_for_each(list, &dentry->d_subdirs) { - struct dentry *de = list_entry(list, struct dentry, d_child); + struct dentry *de = list_entry(list, struct dentry, d_u.d_child); if (usbfs_positive(de)) { spin_unlock(&dcache_lock); return 0; diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c index d9cf3b327d96..77cd6ac07e3c 100644 --- a/drivers/usb/host/ohci-au1xxx.c +++ b/drivers/usb/host/ohci-au1xxx.c @@ -19,6 +19,7 @@ */ #include <linux/platform_device.h> +#include <linux/signal.h> #include <asm/mach-au1x00/au1000.h> diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c index 3959ccc88332..0020ed7a39d0 100644 --- a/drivers/usb/host/ohci-lh7a404.c +++ b/drivers/usb/host/ohci-lh7a404.c @@ -17,6 +17,7 @@ */ #include <linux/platform_device.h> +#include <linux/signal.h> #include <asm/hardware.h> diff --git a/drivers/usb/host/ohci-ppc-soc.c b/drivers/usb/host/ohci-ppc-soc.c index 2ec6a78bd65e..b2a8dfa48870 100644 --- a/drivers/usb/host/ohci-ppc-soc.c +++ b/drivers/usb/host/ohci-ppc-soc.c @@ -15,6 +15,7 @@ */ #include <linux/platform_device.h> +#include <linux/signal.h> /* configure so an HC device and id are always provided */ /* always called with process context; sleeping is OK */ diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index a5d09e159cd1..6ee449858a5c 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -6,7 +6,7 @@ menu "Console display driver support" config VGA_CONSOLE bool "VGA text console" if EMBEDDED || !X86 - depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !ARCH_VERSATILE + depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !ARCH_VERSATILE default y help Saying Y here will allow you to use Linux in text mode through a diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 167de397e4b4..12d9329d1408 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -56,6 +56,8 @@ static DEFINE_SPINLOCK(vga_lock); static int cursor_size_lastfrom; static int cursor_size_lastto; +static u32 vgacon_xres; +static u32 vgacon_yres; static struct vgastate state; #define BLANK 0x0020 @@ -69,7 +71,7 @@ static struct vgastate state; * appear. */ #undef TRIDENT_GLITCH - +#define VGA_FONTWIDTH 8 /* VGA does not support fontwidths != 8 */ /* * Interface used by the world */ @@ -325,6 +327,10 @@ static const char __init *vgacon_startup(void) vga_scan_lines = vga_video_font_height * vga_video_num_lines; } + + vgacon_xres = ORIG_VIDEO_COLS * VGA_FONTWIDTH; + vgacon_yres = vga_scan_lines; + return display_desc; } @@ -503,10 +509,18 @@ static int vgacon_doresize(struct vc_data *c, { unsigned long flags; unsigned int scanlines = height * c->vc_font.height; - u8 scanlines_lo, r7, vsync_end, mode; + u8 scanlines_lo, r7, vsync_end, mode, max_scan; spin_lock_irqsave(&vga_lock, flags); + outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg); + max_scan = inb_p(vga_video_port_val); + + if (max_scan & 0x80) + scanlines <<= 1; + + vgacon_xres = width * VGA_FONTWIDTH; + vgacon_yres = height * c->vc_font.height; outb_p(VGA_CRTC_MODE, vga_video_port_reg); mode = inb_p(vga_video_port_val); @@ -551,6 +565,10 @@ static int vgacon_doresize(struct vc_data *c, static int vgacon_switch(struct vc_data *c) { + int x = c->vc_cols * VGA_FONTWIDTH; + int y = c->vc_rows * c->vc_font.height; + int rows = ORIG_VIDEO_LINES * vga_default_font_height/ + c->vc_font.height; /* * We need to save screen size here as it's the only way * we can spot the screen has been resized and we need to @@ -566,10 +584,11 @@ static int vgacon_switch(struct vc_data *c) scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf, c->vc_screenbuf_size > vga_vram_size ? vga_vram_size : c->vc_screenbuf_size); - if (!(vga_video_num_columns % 2) && - vga_video_num_columns <= ORIG_VIDEO_COLS && - vga_video_num_lines <= (ORIG_VIDEO_LINES * - vga_default_font_height) / c->vc_font.height) + + if ((vgacon_xres != x || vgacon_yres != y) && + (!(vga_video_num_columns % 2) && + vga_video_num_columns <= ORIG_VIDEO_COLS && + vga_video_num_lines <= rows)) vgacon_doresize(c, c->vc_cols, c->vc_rows); } @@ -993,7 +1012,8 @@ static int vgacon_font_set(struct vc_data *c, struct console_font *font, unsigne if (vga_video_type < VIDEO_TYPE_EGAM) return -EINVAL; - if (font->width != 8 || (charcount != 256 && charcount != 512)) + if (font->width != VGA_FONTWIDTH || + (charcount != 256 && charcount != 512)) return -EINVAL; rc = vgacon_do_font_op(&state, font->data, 1, charcount == 512); @@ -1010,7 +1030,7 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font) if (vga_video_type < VIDEO_TYPE_EGAM) return -EINVAL; - font->width = 8; + font->width = VGA_FONTWIDTH; font->height = c->vc_font.height; font->charcount = vga_512_chars ? 512 : 256; if (!font->data) diff --git a/fs/9p/9p.c b/fs/9p/9p.c index e847f504a47c..1a6d08761f39 100644 --- a/fs/9p/9p.c +++ b/fs/9p/9p.c @@ -1,8 +1,9 @@ /* * linux/fs/9p/9p.c * - * This file contains functions 9P2000 functions + * This file contains functions to perform synchronous 9P calls * + * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -33,6 +34,7 @@ #include "debug.h" #include "v9fs.h" #include "9p.h" +#include "conv.h" #include "mux.h" /** @@ -46,16 +48,21 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, - char *version, struct v9fs_fcall **fcall) + char *version, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); - msg.id = TVERSION; - msg.params.tversion.msize = msize; - msg.params.tversion.version = version; + tc = v9fs_create_tversion(msize, version); - return v9fs_mux_rpc(v9ses, &msg, fcall); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -71,19 +78,45 @@ v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, - u32 fid, u32 afid, struct v9fs_fcall **fcall) + u32 fid, u32 afid, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall* tc; dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, aname, fid, afid); - msg.id = TATTACH; - msg.params.tattach.fid = fid; - msg.params.tattach.afid = afid; - msg.params.tattach.uname = uname; - msg.params.tattach.aname = aname; - return v9fs_mux_rpc(v9ses, &msg, fcall); + tc = v9fs_create_tattach(fid, afid, uname, aname); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; +} + +static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc, + struct v9fs_fcall *rc, int err) +{ + int fid; + struct v9fs_session_info *v9ses; + + if (err) + return; + + fid = tc->params.tclunk.fid; + kfree(tc); + + if (!rc) + return; + + dprintk(DEBUG_9P, "tcall id %d rcall id %d\n", tc->id, rc->id); + v9ses = a; + if (rc->id == RCLUNK) + v9fs_put_idpool(fid, &v9ses->fidpool); + + kfree(rc); } /** @@ -95,16 +128,25 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, */ int -v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **fcall) +v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc, *rc; dprintk(DEBUG_9P, "fid %d\n", fid); - msg.id = TCLUNK; - msg.params.tclunk.fid = fid; - return v9fs_mux_rpc(v9ses, &msg, fcall); + rc = NULL; + tc = v9fs_create_tclunk(fid); + if (!IS_ERR(tc)) + ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); + else + ret = PTR_ERR(tc); + + if (ret) + dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret); + + v9fs_t_clunk_cb(v9ses, tc, rc, ret); + return ret; } /** @@ -114,14 +156,21 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, * */ -int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) +int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; + + dprintk(DEBUG_9P, "oldtag %d\n", oldtag); + + tc = v9fs_create_tflush(oldtag); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, NULL); + kfree(tc); + } else + ret = PTR_ERR(tc); - dprintk(DEBUG_9P, "oldtag %d\n", tag); - msg.id = TFLUSH; - msg.params.tflush.oldtag = tag; - return v9fs_mux_rpc(v9ses, &msg, NULL); + return ret; } /** @@ -133,17 +182,22 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) */ int -v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) +v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d\n", fid); - if (fcall) - *fcall = NULL; - msg.id = TSTAT; - msg.params.tstat.fid = fid; - return v9fs_mux_rpc(v9ses, &msg, fcall); + ret = -ENOMEM; + tc = v9fs_create_tstat(fid); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -157,16 +211,21 @@ v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_stat *stat, struct v9fs_fcall **fcall) + struct v9fs_wstat *wstat, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; + + dprintk(DEBUG_9P, "fid %d\n", fid); - dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length); - msg.id = TWSTAT; - msg.params.twstat.fid = fid; - msg.params.twstat.stat = stat; + tc = v9fs_create_twstat(fid, wstat, v9ses->extended); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); - return v9fs_mux_rpc(v9ses, &msg, fcall); + return ret; } /** @@ -183,23 +242,27 @@ v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, - char *name, struct v9fs_fcall **fcall) + char *name, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; + int nwname; dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); - msg.id = TWALK; - msg.params.twalk.fid = fid; - msg.params.twalk.newfid = newfid; - - if (name) { - msg.params.twalk.nwname = 1; - msg.params.twalk.wnames = &name; - } else { - msg.params.twalk.nwname = 0; - } - - return v9fs_mux_rpc(v9ses, &msg, fcall); + + if (name) + nwname = 1; + else + nwname = 0; + + tc = v9fs_create_twalk(fid, newfid, nwname, &name); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -214,19 +277,21 @@ v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, - struct v9fs_fcall **fcall) + struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; - long errorno = -1; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); - msg.id = TOPEN; - msg.params.topen.fid = fid; - msg.params.topen.mode = mode; - errorno = v9fs_mux_rpc(v9ses, &msg, fcall); + tc = v9fs_create_topen(fid, mode); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); - return errorno; + return ret; } /** @@ -239,14 +304,21 @@ v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **fcall) + struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d\n", fid); - msg.id = TREMOVE; - msg.params.tremove.fid = fid; - return v9fs_mux_rpc(v9ses, &msg, fcall); + + tc = v9fs_create_tremove(fid); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -262,20 +334,22 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, - u32 perm, u8 mode, struct v9fs_fcall **fcall) + u32 perm, u8 mode, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; + int ret; + struct v9fs_fcall *tc; dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", fid, name, perm, mode); - msg.id = TCREATE; - msg.params.tcreate.fid = fid; - msg.params.tcreate.name = name; - msg.params.tcreate.perm = perm; - msg.params.tcreate.mode = mode; + tc = v9fs_create_tcreate(fid, name, perm, mode); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, rcp); + kfree(tc); + } else + ret = PTR_ERR(tc); - return v9fs_mux_rpc(v9ses, &msg, fcall); + return ret; } /** @@ -290,31 +364,29 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, struct v9fs_fcall **fcall) + u32 count, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; - struct v9fs_fcall *rc = NULL; - long errorno = -1; - - dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid, - (long unsigned int)offset, count); - msg.id = TREAD; - msg.params.tread.fid = fid; - msg.params.tread.offset = offset; - msg.params.tread.count = count; - errorno = v9fs_mux_rpc(v9ses, &msg, &rc); - - if (!errorno) { - errorno = rc->params.rread.count; - dump_data(rc->params.rread.data, rc->params.rread.count); - } - - if (fcall) - *fcall = rc; - else - kfree(rc); - - return errorno; + int ret; + struct v9fs_fcall *tc, *rc; + + dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, + (long long unsigned) offset, count); + + tc = v9fs_create_tread(fid, offset, count); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); + if (!ret) + ret = rc->params.rread.count; + if (rcp) + *rcp = rc; + else + kfree(rc); + + kfree(tc); + } else + ret = PTR_ERR(tc); + + return ret; } /** @@ -328,32 +400,30 @@ v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, */ int -v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, - u64 offset, u32 count, void *data, struct v9fs_fcall **fcall) +v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count, + const char __user *data, struct v9fs_fcall **rcp) { - struct v9fs_fcall msg; - struct v9fs_fcall *rc = NULL; - long errorno = -1; + int ret; + struct v9fs_fcall *tc, *rc; - dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid, - (unsigned long long)offset, count); - dump_data(data, count); + dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid, + (long long unsigned) offset, count); - msg.id = TWRITE; - msg.params.twrite.fid = fid; - msg.params.twrite.offset = offset; - msg.params.twrite.count = count; - msg.params.twrite.data = data; + tc = v9fs_create_twrite(fid, offset, count, data); + if (!IS_ERR(tc)) { + ret = v9fs_mux_rpc(v9ses->mux, tc, &rc); - errorno = v9fs_mux_rpc(v9ses, &msg, &rc); + if (!ret) + ret = rc->params.rwrite.count; + if (rcp) + *rcp = rc; + else + kfree(rc); - if (!errorno) - errorno = rc->params.rwrite.count; + kfree(tc); + } else + ret = PTR_ERR(tc); - if (fcall) - *fcall = rc; - else - kfree(rc); - - return errorno; + return ret; } + diff --git a/fs/9p/9p.h b/fs/9p/9p.h index f55424216be2..0cd374d94717 100644 --- a/fs/9p/9p.h +++ b/fs/9p/9p.h @@ -3,6 +3,7 @@ * * 9P protocol definitions. * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -100,9 +101,18 @@ enum { V9FS_QTFILE = 0x00, }; +#define V9FS_NOTAG (u16)(~0) +#define V9FS_NOFID (u32)(~0) +#define V9FS_MAXWELEM 16 + /* ample room for Twrite/Rread header (iounit) */ #define V9FS_IOHDRSZ 24 +struct v9fs_str { + u16 len; + char *str; +}; + /* qids are the unique ID for a file (like an inode */ struct v9fs_qid { u8 type; @@ -120,6 +130,29 @@ struct v9fs_stat { u32 atime; u32 mtime; u64 length; + struct v9fs_str name; + struct v9fs_str uid; + struct v9fs_str gid; + struct v9fs_str muid; + struct v9fs_str extension; /* 9p2000.u extensions */ + u32 n_uid; /* 9p2000.u extensions */ + u32 n_gid; /* 9p2000.u extensions */ + u32 n_muid; /* 9p2000.u extensions */ +}; + +/* file metadata (stat) structure used to create Twstat message + The is similar to v9fs_stat, but the strings don't point to + the same memory block and should be freed separately +*/ +struct v9fs_wstat { + u16 size; + u16 type; + u32 dev; + struct v9fs_qid qid; + u32 mode; + u32 atime; + u32 mtime; + u64 length; char *name; char *uid; char *gid; @@ -128,25 +161,24 @@ struct v9fs_stat { u32 n_uid; /* 9p2000.u extensions */ u32 n_gid; /* 9p2000.u extensions */ u32 n_muid; /* 9p2000.u extensions */ - char data[0]; }; /* Structures for Protocol Operations */ struct Tversion { u32 msize; - char *version; + struct v9fs_str version; }; struct Rversion { u32 msize; - char *version; + struct v9fs_str version; }; struct Tauth { u32 afid; - char *uname; - char *aname; + struct v9fs_str uname; + struct v9fs_str aname; }; struct Rauth { @@ -154,12 +186,12 @@ struct Rauth { }; struct Rerror { - char *error; + struct v9fs_str error; u32 errno; /* 9p2000.u extension */ }; struct Tflush { - u32 oldtag; + u16 oldtag; }; struct Rflush { @@ -168,8 +200,8 @@ struct Rflush { struct Tattach { u32 fid; u32 afid; - char *uname; - char *aname; + struct v9fs_str uname; + struct v9fs_str aname; }; struct Rattach { @@ -179,13 +211,13 @@ struct Rattach { struct Twalk { u32 fid; u32 newfid; - u32 nwname; - char **wnames; + u16 nwname; + struct v9fs_str wnames[16]; }; struct Rwalk { - u32 nwqid; - struct v9fs_qid *wqids; + u16 nwqid; + struct v9fs_qid wqids[16]; }; struct Topen { @@ -200,7 +232,7 @@ struct Ropen { struct Tcreate { u32 fid; - char *name; + struct v9fs_str name; u32 perm; u8 mode; }; @@ -251,12 +283,12 @@ struct Tstat { }; struct Rstat { - struct v9fs_stat *stat; + struct v9fs_stat stat; }; struct Twstat { u32 fid; - struct v9fs_stat *stat; + struct v9fs_stat stat; }; struct Rwstat { @@ -271,6 +303,7 @@ struct v9fs_fcall { u32 size; u8 id; u16 tag; + void *sdata; union { struct Tversion tversion; @@ -303,7 +336,9 @@ struct v9fs_fcall { } params; }; -#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "") +#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \ + fcall?fcall->params.rerror.error.len:0, \ + fcall?fcall->params.rerror.error.str:""); int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, char *version, struct v9fs_fcall **rcall); @@ -311,8 +346,7 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, u32 fid, u32 afid, struct v9fs_fcall **rcall); -int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_fcall **rcall); +int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid); int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag); @@ -320,7 +354,7 @@ int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcall); int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, - struct v9fs_stat *stat, struct v9fs_fcall **rcall); + struct v9fs_wstat *wstat, struct v9fs_fcall **rcall); int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, char *name, struct v9fs_fcall **rcall); @@ -338,4 +372,5 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count, struct v9fs_fcall **rcall); int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, - u32 count, void *data, struct v9fs_fcall **rcall); + u32 count, const char __user * data, + struct v9fs_fcall **rcall); diff --git a/fs/9p/Makefile b/fs/9p/Makefile index e4e4ffe5a7dc..3d023089707e 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -1,17 +1,17 @@ obj-$(CONFIG_9P_FS) := 9p2000.o 9p2000-objs := \ + trans_fd.o \ + trans_sock.o \ + mux.o \ + 9p.o \ + conv.o \ vfs_super.o \ vfs_inode.o \ vfs_file.o \ vfs_dir.o \ vfs_dentry.o \ error.o \ - mux.o \ - trans_fd.o \ - trans_sock.o \ - 9p.o \ - conv.o \ v9fs.o \ fid.o diff --git a/fs/9p/conv.c b/fs/9p/conv.c index 18121af99d3e..55ccfa10ee9e 100644 --- a/fs/9p/conv.c +++ b/fs/9p/conv.c @@ -30,7 +30,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/idr.h> - +#include <asm/uaccess.h> #include "debug.h" #include "v9fs.h" #include "9p.h" @@ -58,12 +58,15 @@ static inline int buf_check_overflow(struct cbuf *buf) static inline int buf_check_size(struct cbuf *buf, int len) { - if (buf->p+len > buf->ep) { + if (buf->p + len > buf->ep) { if (buf->p < buf->ep) { - eprintk(KERN_ERR, "buffer overflow\n"); + eprintk(KERN_ERR, "buffer overflow: want %d has %d\n", + len, (int)(buf->ep - buf->p)); + dump_stack(); buf->p = buf->ep + 1; - return 0; } + + return 0; } return 1; @@ -127,14 +130,6 @@ static inline void buf_put_string(struct cbuf *buf, const char *s) buf_put_stringn(buf, s, strlen(s)); } -static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen) -{ - if (buf_check_size(buf, datalen)) { - memcpy(buf->p, data, datalen); - buf->p += datalen; - } -} - static inline u8 buf_get_int8(struct cbuf *buf) { u8 ret = 0; @@ -183,86 +178,37 @@ static inline u64 buf_get_int64(struct cbuf *buf) return ret; } -static inline int -buf_get_string(struct cbuf *buf, char *data, unsigned int datalen) -{ - u16 len = 0; - - len = buf_get_int16(buf); - if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) { - memcpy(data, buf->p, len); - data[len] = 0; - buf->p += len; - len++; - } - - return len; -} - -static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) -{ - char *ret; - u16 len; - - ret = NULL; - len = buf_get_int16(buf); - - if (!buf_check_overflow(buf) && buf_check_size(buf, len) && - buf_check_size(sbuf, len+1)) { - - memcpy(sbuf->p, buf->p, len); - sbuf->p[len] = 0; - ret = sbuf->p; - buf->p += len; - sbuf->p += len + 1; - } - - return ret; -} - -static inline int buf_get_data(struct cbuf *buf, void *data, int datalen) +static inline void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr) { - int ret = 0; - - if (buf_check_size(buf, datalen)) { - memcpy(data, buf->p, datalen); - buf->p += datalen; - ret = datalen; + vstr->len = buf_get_int16(buf); + if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { + vstr->str = buf->p; + buf->p += vstr->len; + } else { + vstr->len = 0; + vstr->str = NULL; } - - return ret; } -static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, - int datalen) +static inline void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid) { - char *ret = NULL; - int n = 0; - - if (buf_check_size(dbuf, datalen)) { - n = buf_get_data(buf, dbuf->p, datalen); - if (n > 0) { - ret = dbuf->p; - dbuf->p += n; - } - } - - return ret; + qid->type = buf_get_int8(bufp); + qid->version = buf_get_int32(bufp); + qid->path = buf_get_int64(bufp); } /** - * v9fs_size_stat - calculate the size of a variable length stat struct - * @v9ses: session information + * v9fs_size_wstat - calculate the size of a variable length stat struct * @stat: metadata (stat) structure + * @extended: non-zero if 9P2000.u * */ -static int v9fs_size_stat(struct v9fs_session_info *v9ses, - struct v9fs_stat *stat) +static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended) { int size = 0; - if (stat == NULL) { + if (wstat == NULL) { eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); return 0; } @@ -279,82 +225,38 @@ static int v9fs_size_stat(struct v9fs_session_info *v9ses, 8 + /* length[8] */ 8; /* minimum sum of string lengths */ - if (stat->name) - size += strlen(stat->name); - if (stat->uid) - size += strlen(stat->uid); - if (stat->gid) - size += strlen(stat->gid); - if (stat->muid) - size += strlen(stat->muid); + if (wstat->name) + size += strlen(wstat->name); + if (wstat->uid) + size += strlen(wstat->uid); + if (wstat->gid) + size += strlen(wstat->gid); + if (wstat->muid) + size += strlen(wstat->muid); - if (v9ses->extended) { + if (extended) { size += 4 + /* n_uid[4] */ 4 + /* n_gid[4] */ 4 + /* n_muid[4] */ 2; /* string length of extension[4] */ - if (stat->extension) - size += strlen(stat->extension); + if (wstat->extension) + size += strlen(wstat->extension); } return size; } /** - * serialize_stat - safely format a stat structure for transmission - * @v9ses: session info - * @stat: metadata (stat) structure - * @bufp: buffer to serialize structure into - * - */ - -static int -serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat, - struct cbuf *bufp) -{ - buf_put_int16(bufp, stat->size); - buf_put_int16(bufp, stat->type); - buf_put_int32(bufp, stat->dev); - buf_put_int8(bufp, stat->qid.type); - buf_put_int32(bufp, stat->qid.version); - buf_put_int64(bufp, stat->qid.path); - buf_put_int32(bufp, stat->mode); - buf_put_int32(bufp, stat->atime); - buf_put_int32(bufp, stat->mtime); - buf_put_int64(bufp, stat->length); - - buf_put_string(bufp, stat->name); - buf_put_string(bufp, stat->uid); - buf_put_string(bufp, stat->gid); - buf_put_string(bufp, stat->muid); - - if (v9ses->extended) { - buf_put_string(bufp, stat->extension); - buf_put_int32(bufp, stat->n_uid); - buf_put_int32(bufp, stat->n_gid); - buf_put_int32(bufp, stat->n_muid); - } - - if (buf_check_overflow(bufp)) - return 0; - - return stat->size; -} - -/** - * deserialize_stat - safely decode a recieved metadata (stat) structure - * @v9ses: session info + * buf_get_stat - safely decode a recieved metadata (stat) structure * @bufp: buffer to deserialize * @stat: metadata (stat) structure - * @dbufp: buffer to deserialize variable strings into + * @extended: non-zero if 9P2000.u * */ -static inline int -deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, - struct v9fs_stat *stat, struct cbuf *dbufp) +static inline void +buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended) { - stat->size = buf_get_int16(bufp); stat->type = buf_get_int16(bufp); stat->dev = buf_get_int32(bufp); @@ -365,282 +267,82 @@ deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, stat->atime = buf_get_int32(bufp); stat->mtime = buf_get_int32(bufp); stat->length = buf_get_int64(bufp); - stat->name = buf_get_stringb(bufp, dbufp); - stat->uid = buf_get_stringb(bufp, dbufp); - stat->gid = buf_get_stringb(bufp, dbufp); - stat->muid = buf_get_stringb(bufp, dbufp); + buf_get_str(bufp, &stat->name); + buf_get_str(bufp, &stat->uid); + buf_get_str(bufp, &stat->gid); + buf_get_str(bufp, &stat->muid); - if (v9ses->extended) { - stat->extension = buf_get_stringb(bufp, dbufp); + if (extended) { + buf_get_str(bufp, &stat->extension); stat->n_uid = buf_get_int32(bufp); stat->n_gid = buf_get_int32(bufp); stat->n_muid = buf_get_int32(bufp); } - - if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) - return 0; - - return stat->size + 2; -} - -/** - * deserialize_statb - wrapper for decoding a received metadata structure - * @v9ses: session info - * @bufp: buffer to deserialize - * @dbufp: buffer to deserialize variable strings into - * - */ - -static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info - *v9ses, struct cbuf *bufp, - struct cbuf *dbufp) -{ - struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat)); - - if (ret) { - int n = deserialize_stat(v9ses, bufp, ret, dbufp); - if (n <= 0) - return NULL; - } - - return ret; } /** * v9fs_deserialize_stat - decode a received metadata structure - * @v9ses: session info * @buf: buffer to deserialize * @buflen: length of received buffer * @stat: metadata structure to decode into - * @statlen: length of destination metadata structure + * @extended: non-zero if 9P2000.u * + * Note: stat will point to the buf region. */ int -v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf, - u32 buflen, struct v9fs_stat *stat, u32 statlen) +v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, + int extended) { struct cbuf buffer; struct cbuf *bufp = &buffer; - struct cbuf dbuffer; - struct cbuf *dbufp = &dbuffer; + unsigned char *p; buf_init(bufp, buf, buflen); - buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat), - statlen - sizeof(struct v9fs_stat)); - - return deserialize_stat(v9ses, bufp, stat, dbufp); -} - -static inline int -v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall) -{ - int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */ - int i = 0; - - switch (fcall->id) { - default: - eprintk(KERN_ERR, "bad msg type %d\n", fcall->id); - return 0; - case TVERSION: /* msize[4] version[s] */ - size += 4 + 2 + strlen(fcall->params.tversion.version); - break; - case TAUTH: /* afid[4] uname[s] aname[s] */ - size += 4 + 2 + strlen(fcall->params.tauth.uname) + - 2 + strlen(fcall->params.tauth.aname); - break; - case TFLUSH: /* oldtag[2] */ - size += 2; - break; - case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */ - size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) + - 2 + strlen(fcall->params.tattach.aname); - break; - case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */ - size += 4 + 4 + 2; - /* now compute total for the array of names */ - for (i = 0; i < fcall->params.twalk.nwname; i++) - size += 2 + strlen(fcall->params.twalk.wnames[i]); - break; - case TOPEN: /* fid[4] mode[1] */ - size += 4 + 1; - break; - case TCREATE: /* fid[4] name[s] perm[4] mode[1] */ - size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1; - break; - case TREAD: /* fid[4] offset[8] count[4] */ - size += 4 + 8 + 4; - break; - case TWRITE: /* fid[4] offset[8] count[4] data[count] */ - size += 4 + 8 + 4 + fcall->params.twrite.count; - break; - case TCLUNK: /* fid[4] */ - size += 4; - break; - case TREMOVE: /* fid[4] */ - size += 4; - break; - case TSTAT: /* fid[4] */ - size += 4; - break; - case TWSTAT: /* fid[4] stat[n] */ - fcall->params.twstat.stat->size = - v9fs_size_stat(v9ses, fcall->params.twstat.stat); - size += 4 + 2 + 2 + fcall->params.twstat.stat->size; - } - return size; -} - -/* - * v9fs_serialize_fcall - marshall fcall struct into a packet - * @v9ses: session information - * @fcall: structure to convert - * @data: buffer to serialize fcall into - * @datalen: length of buffer to serialize fcall into - * - */ - -int -v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall, - void *data, u32 datalen) -{ - int i = 0; - struct v9fs_stat *stat = NULL; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - buf_init(bufp, data, datalen); - - if (!fcall) { - eprintk(KERN_ERR, "no fcall\n"); - return -EINVAL; - } - - fcall->size = v9fs_size_fcall(v9ses, fcall); - - buf_put_int32(bufp, fcall->size); - buf_put_int8(bufp, fcall->id); - buf_put_int16(bufp, fcall->tag); - - dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id, - fcall->tag); - - /* now encode it */ - switch (fcall->id) { - default: - eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id); - return -EPROTO; - case TVERSION: - buf_put_int32(bufp, fcall->params.tversion.msize); - buf_put_string(bufp, fcall->params.tversion.version); - break; - case TAUTH: - buf_put_int32(bufp, fcall->params.tauth.afid); - buf_put_string(bufp, fcall->params.tauth.uname); - buf_put_string(bufp, fcall->params.tauth.aname); - break; - case TFLUSH: - buf_put_int16(bufp, fcall->params.tflush.oldtag); - break; - case TATTACH: - buf_put_int32(bufp, fcall->params.tattach.fid); - buf_put_int32(bufp, fcall->params.tattach.afid); - buf_put_string(bufp, fcall->params.tattach.uname); - buf_put_string(bufp, fcall->params.tattach.aname); - break; - case TWALK: - buf_put_int32(bufp, fcall->params.twalk.fid); - buf_put_int32(bufp, fcall->params.twalk.newfid); - buf_put_int16(bufp, fcall->params.twalk.nwname); - for (i = 0; i < fcall->params.twalk.nwname; i++) - buf_put_string(bufp, fcall->params.twalk.wnames[i]); - break; - case TOPEN: - buf_put_int32(bufp, fcall->params.topen.fid); - buf_put_int8(bufp, fcall->params.topen.mode); - break; - case TCREATE: - buf_put_int32(bufp, fcall->params.tcreate.fid); - buf_put_string(bufp, fcall->params.tcreate.name); - buf_put_int32(bufp, fcall->params.tcreate.perm); - buf_put_int8(bufp, fcall->params.tcreate.mode); - break; - case TREAD: - buf_put_int32(bufp, fcall->params.tread.fid); - buf_put_int64(bufp, fcall->params.tread.offset); - buf_put_int32(bufp, fcall->params.tread.count); - break; - case TWRITE: - buf_put_int32(bufp, fcall->params.twrite.fid); - buf_put_int64(bufp, fcall->params.twrite.offset); - buf_put_int32(bufp, fcall->params.twrite.count); - buf_put_data(bufp, fcall->params.twrite.data, - fcall->params.twrite.count); - break; - case TCLUNK: - buf_put_int32(bufp, fcall->params.tclunk.fid); - break; - case TREMOVE: - buf_put_int32(bufp, fcall->params.tremove.fid); - break; - case TSTAT: - buf_put_int32(bufp, fcall->params.tstat.fid); - break; - case TWSTAT: - buf_put_int32(bufp, fcall->params.twstat.fid); - stat = fcall->params.twstat.stat; - - buf_put_int16(bufp, stat->size + 2); - serialize_stat(v9ses, stat, bufp); - break; - } + p = bufp->p; + buf_get_stat(bufp, stat, extended); if (buf_check_overflow(bufp)) - return -EIO; - - return fcall->size; + return 0; + else + return bufp->p - p; } /** * deserialize_fcall - unmarshal a response - * @v9ses: session information - * @msgsize: size of rcall message * @buf: recieved buffer * @buflen: length of received buffer * @rcall: fcall structure to populate * @rcalllen: length of fcall structure to populate + * @extended: non-zero if 9P2000.u * */ int -v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, - void *buf, u32 buflen, struct v9fs_fcall *rcall, - int rcalllen) +v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, + int extended) { struct cbuf buffer; struct cbuf *bufp = &buffer; - struct cbuf dbuffer; - struct cbuf *dbufp = &dbuffer; int i = 0; buf_init(bufp, buf, buflen); - buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall), - rcalllen - sizeof(struct v9fs_fcall)); - rcall->size = msgsize; + rcall->size = buf_get_int32(bufp); rcall->id = buf_get_int8(bufp); rcall->tag = buf_get_int16(bufp); dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, rcall->tag); + switch (rcall->id) { default: eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); return -EPROTO; case RVERSION: rcall->params.rversion.msize = buf_get_int32(bufp); - rcall->params.rversion.version = buf_get_stringb(bufp, dbufp); + buf_get_str(bufp, &rcall->params.rversion.version); break; case RFLUSH: break; @@ -651,34 +353,27 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, break; case RWALK: rcall->params.rwalk.nwqid = buf_get_int16(bufp); - rcall->params.rwalk.wqids = buf_alloc(dbufp, - rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); - if (rcall->params.rwalk.wqids) - for (i = 0; i < rcall->params.rwalk.nwqid; i++) { - rcall->params.rwalk.wqids[i].type = - buf_get_int8(bufp); - rcall->params.rwalk.wqids[i].version = - buf_get_int16(bufp); - rcall->params.rwalk.wqids[i].path = - buf_get_int64(bufp); - } + if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) { + eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n", + V9FS_MAXWELEM, rcall->params.rwalk.nwqid); + return -EPROTO; + } + + for (i = 0; i < rcall->params.rwalk.nwqid; i++) + buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); break; case ROPEN: - rcall->params.ropen.qid.type = buf_get_int8(bufp); - rcall->params.ropen.qid.version = buf_get_int32(bufp); - rcall->params.ropen.qid.path = buf_get_int64(bufp); + buf_get_qid(bufp, &rcall->params.ropen.qid); rcall->params.ropen.iounit = buf_get_int32(bufp); break; case RCREATE: - rcall->params.rcreate.qid.type = buf_get_int8(bufp); - rcall->params.rcreate.qid.version = buf_get_int32(bufp); - rcall->params.rcreate.qid.path = buf_get_int64(bufp); + buf_get_qid(bufp, &rcall->params.rcreate.qid); rcall->params.rcreate.iounit = buf_get_int32(bufp); break; case RREAD: rcall->params.rread.count = buf_get_int32(bufp); - rcall->params.rread.data = buf_get_datab(bufp, dbufp, - rcall->params.rread.count); + rcall->params.rread.data = bufp->p; + buf_check_size(bufp, rcall->params.rread.count); break; case RWRITE: rcall->params.rwrite.count = buf_get_int32(bufp); @@ -689,20 +384,443 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, break; case RSTAT: buf_get_int16(bufp); - rcall->params.rstat.stat = - deserialize_statb(v9ses, bufp, dbufp); + buf_get_stat(bufp, &rcall->params.rstat.stat, extended); break; case RWSTAT: break; case RERROR: - rcall->params.rerror.error = buf_get_stringb(bufp, dbufp); - if (v9ses->extended) + buf_get_str(bufp, &rcall->params.rerror.error); + if (extended) rcall->params.rerror.errno = buf_get_int16(bufp); break; } - if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) + if (buf_check_overflow(bufp)) { + dprintk(DEBUG_ERROR, "buffer overflow\n"); return -EIO; + } + + return bufp->p - bufp->sp; +} + +static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p) +{ + *p = val; + buf_put_int8(bufp, val); +} + +static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p) +{ + *p = val; + buf_put_int16(bufp, val); +} + +static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p) +{ + *p = val; + buf_put_int32(bufp, val); +} + +static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p) +{ + *p = val; + buf_put_int64(bufp, val); +} - return rcall->size; +static inline void +v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str) +{ + if (data) { + str->len = strlen(data); + str->str = bufp->p; + } else { + str->len = 0; + str->str = NULL; + } + + buf_put_stringn(bufp, data, str->len); +} + +static inline int +v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count, + unsigned char **pdata) +{ + *pdata = buf_alloc(bufp, count); + return copy_from_user(*pdata, data, count); +} + +static void +v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat, + struct v9fs_stat *stat, int statsz, int extended) +{ + v9fs_put_int16(bufp, statsz, &stat->size); + v9fs_put_int16(bufp, wstat->type, &stat->type); + v9fs_put_int32(bufp, wstat->dev, &stat->dev); + v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type); + v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version); + v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path); + v9fs_put_int32(bufp, wstat->mode, &stat->mode); + v9fs_put_int32(bufp, wstat->atime, &stat->atime); + v9fs_put_int32(bufp, wstat->mtime, &stat->mtime); + v9fs_put_int64(bufp, wstat->length, &stat->length); + + v9fs_put_str(bufp, wstat->name, &stat->name); + v9fs_put_str(bufp, wstat->uid, &stat->uid); + v9fs_put_str(bufp, wstat->gid, &stat->gid); + v9fs_put_str(bufp, wstat->muid, &stat->muid); + + if (extended) { + v9fs_put_str(bufp, wstat->extension, &stat->extension); + v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid); + v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid); + v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid); + } +} + +static struct v9fs_fcall * +v9fs_create_common(struct cbuf *bufp, u32 size, u8 id) +{ + struct v9fs_fcall *fc; + + size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ + fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + fc->sdata = (char *)fc + sizeof(*fc); + + buf_init(bufp, (char *)fc->sdata, size); + v9fs_put_int32(bufp, size, &fc->size); + v9fs_put_int8(bufp, id, &fc->id); + v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag); + + return fc; +} + +void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag) +{ + fc->tag = tag; + *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); +} + +struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 2 + strlen(version); /* msize[4] version[s] */ + fc = v9fs_create_common(bufp, size, TVERSION); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, msize, &fc->params.tversion.msize); + v9fs_put_str(bufp, version, &fc->params.tversion.version); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */ + fc = v9fs_create_common(bufp, size, TAUTH); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, afid, &fc->params.tauth.afid); + v9fs_put_str(bufp, uname, &fc->params.tauth.uname); + v9fs_put_str(bufp, aname, &fc->params.tauth.aname); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall * +v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */ + fc = v9fs_create_common(bufp, size, TATTACH); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tattach.fid); + v9fs_put_int32(bufp, afid, &fc->params.tattach.afid); + v9fs_put_str(bufp, uname, &fc->params.tattach.uname); + v9fs_put_str(bufp, aname, &fc->params.tattach.aname); + + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tflush(u16 oldtag) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 2; /* oldtag[2] */ + fc = v9fs_create_common(bufp, size, TFLUSH); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, + char **wnames) +{ + int i, size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + if (nwname > V9FS_MAXWELEM) { + dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM); + return NULL; + } + + size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ + for (i = 0; i < nwname; i++) { + size += 2 + strlen(wnames[i]); /* wname[s] */ + } + + fc = v9fs_create_common(bufp, size, TWALK); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.twalk.fid); + v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid); + v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname); + for (i = 0; i < nwname; i++) { + v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); + } + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 1; /* fid[4] mode[1] */ + fc = v9fs_create_common(bufp, size, TOPEN); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.topen.fid); + v9fs_put_int8(bufp, mode, &fc->params.topen.mode); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ + fc = v9fs_create_common(bufp, size, TCREATE); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid); + v9fs_put_str(bufp, name, &fc->params.tcreate.name); + v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm); + v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ + fc = v9fs_create_common(bufp, size, TREAD); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tread.fid); + v9fs_put_int64(bufp, offset, &fc->params.tread.offset); + v9fs_put_int32(bufp, count, &fc->params.tread.count); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, + const char __user * data) +{ + int size, err; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */ + fc = v9fs_create_common(bufp, size, TWRITE); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.twrite.fid); + v9fs_put_int64(bufp, offset, &fc->params.twrite.offset); + v9fs_put_int32(bufp, count, &fc->params.twrite.count); + err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data); + if (err) { + kfree(fc); + fc = ERR_PTR(err); + } + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tclunk(u32 fid) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = v9fs_create_common(bufp, size, TCLUNK); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tremove(u32 fid) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = v9fs_create_common(bufp, size, TREMOVE); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tremove.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_tstat(u32 fid) +{ + int size; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + size = 4; /* fid[4] */ + fc = v9fs_create_common(bufp, size, TSTAT); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.tstat.fid); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; +} + +struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, + int extended) +{ + int size, statsz; + struct v9fs_fcall *fc; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + statsz = v9fs_size_wstat(wstat, extended); + size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ + fc = v9fs_create_common(bufp, size, TWSTAT); + if (IS_ERR(fc)) + goto error; + + v9fs_put_int32(bufp, fid, &fc->params.twstat.fid); + buf_put_int16(bufp, statsz + 2); + v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended); + + if (buf_check_overflow(bufp)) { + kfree(fc); + fc = ERR_PTR(-ENOMEM); + } + error: + return fc; } diff --git a/fs/9p/conv.h b/fs/9p/conv.h index ee849613c61a..26a736e4a2e7 100644 --- a/fs/9p/conv.h +++ b/fs/9p/conv.h @@ -1,8 +1,9 @@ /* * linux/fs/9p/conv.h * - * 9P protocol conversion definitions + * 9P protocol conversion definitions. * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -24,13 +25,27 @@ * */ -int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf, - u32 buflen, struct v9fs_stat *stat, u32 statlen); -int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall, - void *buf, u32 buflen); -int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen, - void *buf, u32 buflen, struct v9fs_fcall *rcall, - int rcalllen); +int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat, + int extended); +int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall, + int extended); -/* this one is actually in error.c right now */ -int v9fs_errstr2errno(char *errstr); +void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag); + +struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version); +struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname); +struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname, + char *aname); +struct v9fs_fcall *v9fs_create_tflush(u16 oldtag); +struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname, + char **wnames); +struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode); +struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode); +struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count); +struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count, + const char __user *data); +struct v9fs_fcall *v9fs_create_tclunk(u32 fid); +struct v9fs_fcall *v9fs_create_tremove(u32 fid); +struct v9fs_fcall *v9fs_create_tstat(u32 fid); +struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat, + int extended); diff --git a/fs/9p/debug.h b/fs/9p/debug.h index 4445f06919d9..fe551032788b 100644 --- a/fs/9p/debug.h +++ b/fs/9p/debug.h @@ -51,16 +51,23 @@ do { \ #if DEBUG_DUMP_PKT static inline void dump_data(const unsigned char *data, unsigned int datalen) { - int i, j; - int len = datalen; + int i, n; + char buf[5*8]; - printk(KERN_DEBUG "data "); - for (i = 0; i < len; i += 4) { - for (j = 0; (j < 4) && (i + j < len); j++) - printk(KERN_DEBUG "%02x", data[i + j]); - printk(KERN_DEBUG " "); + n = 0; + i = 0; + while (i < datalen) { + n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]); + if (i%4 == 0) + n += snprintf(buf+n, sizeof(buf)-n, " "); + + if (i%16 == 0) { + dprintk(DEBUG_ERROR, "%s\n", buf); + n = 0; + } } - printk(KERN_DEBUG "\n"); + + dprintk(DEBUG_ERROR, "%s\n", buf); } #else /* DEBUG_DUMP_PKT */ static inline void dump_data(const unsigned char *data, unsigned int datalen) diff --git a/fs/9p/error.c b/fs/9p/error.c index 834cb179e388..e4b6f8f38b6f 100644 --- a/fs/9p/error.c +++ b/fs/9p/error.c @@ -33,7 +33,6 @@ #include <linux/list.h> #include <linux/jhash.h> -#include <linux/string.h> #include "debug.h" #include "error.h" @@ -55,7 +54,8 @@ int v9fs_error_init(void) /* load initial error map into hash table */ for (c = errmap; c->name != NULL; c++) { - bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ; + c->namelen = strlen(c->name); + bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; INIT_HLIST_NODE(&c->list); hlist_add_head(&c->list, &hash_errmap[bucket]); } @@ -69,15 +69,15 @@ int v9fs_error_init(void) * */ -int v9fs_errstr2errno(char *errstr) +int v9fs_errstr2errno(char *errstr, int len) { int errno = 0; struct hlist_node *p = NULL; struct errormap *c = NULL; - int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ; + int bucket = jhash(errstr, len, 0) % ERRHASHSZ; hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { - if (!strcmp(c->name, errstr)) { + if (c->namelen==len && !memcmp(c->name, errstr, len)) { errno = c->val; break; } diff --git a/fs/9p/error.h b/fs/9p/error.h index 78f89acf7c9a..a9794e85fe51 100644 --- a/fs/9p/error.h +++ b/fs/9p/error.h @@ -36,6 +36,7 @@ struct errormap { char *name; int val; + int namelen; struct hlist_node list; }; @@ -175,4 +176,3 @@ static struct errormap errmap[] = { }; extern int v9fs_error_init(void); -extern int v9fs_errstr2errno(char *errstr); diff --git a/fs/9p/fid.c b/fs/9p/fid.c index d95f8626d170..eda449778fa5 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -31,9 +31,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "transport.h" -#include "mux.h" -#include "conv.h" #include "fid.h" /** @@ -164,7 +161,7 @@ static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry) return v9fs_fid_create(dentry, v9ses, fidnum, 0); clunk_fid: - v9fs_t_clunk(v9ses, fidnum, NULL); + v9fs_t_clunk(v9ses, fidnum); return ERR_PTR(err); } diff --git a/fs/9p/mux.c b/fs/9p/mux.c index 8835b576f744..945cb368d451 100644 --- a/fs/9p/mux.c +++ b/fs/9p/mux.c @@ -4,7 +4,7 @@ * Protocol Multiplexer * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,448 +28,943 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/poll.h> #include <linux/kthread.h> #include <linux/idr.h> #include "debug.h" #include "v9fs.h" #include "9p.h" -#include "transport.h" #include "conv.h" +#include "transport.h" #include "mux.h" +#define ERREQFLUSH 1 +#define SCHED_TIMEOUT 10 +#define MAXPOLLWADDR 2 + +enum { + Rworksched = 1, /* read work scheduled or running */ + Rpending = 2, /* can read */ + Wworksched = 4, /* write work scheduled or running */ + Wpending = 8, /* can write */ +}; + +struct v9fs_mux_poll_task; + +struct v9fs_req { + int tag; + struct v9fs_fcall *tcall; + struct v9fs_fcall *rcall; + int err; + v9fs_mux_req_callback cb; + void *cba; + struct list_head req_list; +}; + +struct v9fs_mux_data { + spinlock_t lock; + struct list_head mux_list; + struct v9fs_mux_poll_task *poll_task; + int msize; + unsigned char *extended; + struct v9fs_transport *trans; + struct v9fs_idpool tidpool; + int err; + wait_queue_head_t equeue; + struct list_head req_list; + struct list_head unsent_req_list; + struct v9fs_fcall *rcall; + int rpos; + char *rbuf; + int wpos; + int wsize; + char *wbuf; + wait_queue_t poll_wait[MAXPOLLWADDR]; + wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; + poll_table pt; + struct work_struct rq; + struct work_struct wq; + unsigned long wsched; +}; + +struct v9fs_mux_poll_task { + struct task_struct *task; + struct list_head mux_list; + int muxnum; +}; + +struct v9fs_mux_rpc { + struct v9fs_mux_data *m; + struct v9fs_req *req; + int err; + struct v9fs_fcall *rcall; + wait_queue_head_t wqueue; +}; + +static int v9fs_poll_proc(void *); +static void v9fs_read_work(void *); +static void v9fs_write_work(void *); +static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, + poll_table * p); +static u16 v9fs_mux_get_tag(struct v9fs_mux_data *); +static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16); + +static DECLARE_MUTEX(v9fs_mux_task_lock); +static struct workqueue_struct *v9fs_mux_wq; + +static int v9fs_mux_num; +static int v9fs_mux_poll_task_num; +static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100]; + +int v9fs_mux_global_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) + v9fs_mux_poll_tasks[i].task = NULL; + + v9fs_mux_wq = create_workqueue("v9fs"); + if (!v9fs_mux_wq) + return -ENOMEM; + + return 0; +} + +void v9fs_mux_global_exit(void) +{ + destroy_workqueue(v9fs_mux_wq); +} + /** - * dprintcond - print condition of session info - * @v9ses: session info structure - * @req: RPC request structure + * v9fs_mux_calc_poll_procs - calculates the number of polling procs + * based on the number of mounted v9fs filesystems. * + * The current implementation returns sqrt of the number of mounts. */ +inline int v9fs_mux_calc_poll_procs(int muxnum) +{ + int n; + + if (v9fs_mux_poll_task_num) + n = muxnum / v9fs_mux_poll_task_num + + (muxnum % v9fs_mux_poll_task_num ? 1 : 0); + else + n = 1; + + if (n > ARRAY_SIZE(v9fs_mux_poll_tasks)) + n = ARRAY_SIZE(v9fs_mux_poll_tasks); + + return n; +} -static inline int -dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) +static int v9fs_mux_poll_start(struct v9fs_mux_data *m) { - dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status, - req->rcall); + int i, n; + struct v9fs_mux_poll_task *vpt, *vptlast; + struct task_struct *pproc; + + dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num, + v9fs_mux_poll_task_num); + up(&v9fs_mux_task_lock); + + n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1); + if (n > v9fs_mux_poll_task_num) { + for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { + if (v9fs_mux_poll_tasks[i].task == NULL) { + vpt = &v9fs_mux_poll_tasks[i]; + dprintk(DEBUG_MUX, "create proc %p\n", vpt); + pproc = kthread_create(v9fs_poll_proc, vpt, + "v9fs-poll"); + + if (!IS_ERR(pproc)) { + vpt->task = pproc; + INIT_LIST_HEAD(&vpt->mux_list); + vpt->muxnum = 0; + v9fs_mux_poll_task_num++; + wake_up_process(vpt->task); + } + break; + } + } + + if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) + dprintk(DEBUG_ERROR, "warning: no free poll slots\n"); + } + + n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num + + ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0); + + vptlast = NULL; + for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) { + vpt = &v9fs_mux_poll_tasks[i]; + if (vpt->task != NULL) { + vptlast = vpt; + if (vpt->muxnum < n) { + dprintk(DEBUG_MUX, "put in proc %d\n", i); + list_add(&m->mux_list, &vpt->mux_list); + vpt->muxnum++; + m->poll_task = vpt; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + init_poll_funcptr(&m->pt, v9fs_pollwait); + break; + } + } + } + + if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) { + if (vptlast == NULL) + return -ENOMEM; + + dprintk(DEBUG_MUX, "put in proc %d\n", i); + list_add(&m->mux_list, &vptlast->mux_list); + vptlast->muxnum++; + m->poll_task = vptlast; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + init_poll_funcptr(&m->pt, v9fs_pollwait); + } + + v9fs_mux_num++; + down(&v9fs_mux_task_lock); + return 0; } +static void v9fs_mux_poll_stop(struct v9fs_mux_data *m) +{ + int i; + struct v9fs_mux_poll_task *vpt; + + up(&v9fs_mux_task_lock); + vpt = m->poll_task; + list_del(&m->mux_list); + for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { + if (m->poll_waddr[i] != NULL) { + remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); + m->poll_waddr[i] = NULL; + } + } + vpt->muxnum--; + if (!vpt->muxnum) { + dprintk(DEBUG_MUX, "destroy proc %p\n", vpt); + send_sig(SIGKILL, vpt->task, 1); + vpt->task = NULL; + v9fs_mux_poll_task_num--; + } + v9fs_mux_num--; + down(&v9fs_mux_task_lock); +} + /** - * xread - force read of a certain number of bytes - * @v9ses: session info structure - * @ptr: pointer to buffer - * @sz: number of bytes to read + * v9fs_mux_init - allocate and initialize the per-session mux data + * Creates the polling task if this is the first session. * - * Chuck Cranor CS-533 project1 + * @trans - transport structure + * @msize - maximum message size + * @extended - pointer to the extended flag */ - -static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz) +struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, + unsigned char *extended) { - int rd = 0; - int ret = 0; - while (rd < sz) { - ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd); - if (ret <= 0) { - dprintk(DEBUG_ERROR, "xread errno %d\n", ret); - return ret; + int i, n; + struct v9fs_mux_data *m, *mtmp; + + dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize); + m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&m->lock); + INIT_LIST_HEAD(&m->mux_list); + m->msize = msize; + m->extended = extended; + m->trans = trans; + idr_init(&m->tidpool.pool); + init_MUTEX(&m->tidpool.lock); + m->err = 0; + init_waitqueue_head(&m->equeue); + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + m->rcall = NULL; + m->rpos = 0; + m->rbuf = NULL; + m->wpos = m->wsize = 0; + m->wbuf = NULL; + INIT_WORK(&m->rq, v9fs_read_work, m); + INIT_WORK(&m->wq, v9fs_write_work, m); + m->wsched = 0; + memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); + m->poll_task = NULL; + n = v9fs_mux_poll_start(m); + if (n) + return ERR_PTR(n); + + n = trans->poll(trans, &m->pt); + if (n & POLLIN) { + dprintk(DEBUG_MUX, "mux %p can read\n", m); + set_bit(Rpending, &m->wsched); + } + + if (n & POLLOUT) { + dprintk(DEBUG_MUX, "mux %p can write\n", m); + set_bit(Wpending, &m->wsched); + } + + for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { + if (IS_ERR(m->poll_waddr[i])) { + v9fs_mux_poll_stop(m); + mtmp = (void *)m->poll_waddr; /* the error code */ + kfree(m); + m = mtmp; + break; } - rd += ret; - ptr += ret; } - return (rd); + + return m; } /** - * read_message - read a full 9P2000 fcall packet - * @v9ses: session info structure - * @rcall: fcall structure to read into - * @rcalllen: size of fcall buffer - * + * v9fs_mux_destroy - cancels all pending requests and frees mux resources */ +void v9fs_mux_destroy(struct v9fs_mux_data *m) +{ + dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m, + m->mux_list.prev, m->mux_list.next); + v9fs_mux_cancel(m, -ECONNRESET); + + if (!list_empty(&m->req_list)) { + /* wait until all processes waiting on this session exit */ + dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n", + m); + wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); + dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m, + list_empty(&m->req_list)); + } + + v9fs_mux_poll_stop(m); + m->trans = NULL; + + kfree(m); +} -static int -read_message(struct v9fs_session_info *v9ses, - struct v9fs_fcall *rcall, int rcalllen) +/** + * v9fs_pollwait - called by files poll operation to add v9fs-poll task + * to files wait queue + */ +static void +v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address, + poll_table * p) { - unsigned char buf[4]; - void *data; - int size = 0; - int res = 0; - - res = xread(v9ses, buf, sizeof(buf)); - if (res < 0) { - dprintk(DEBUG_ERROR, - "Reading of count field failed returned: %d\n", res); - return res; + int i; + struct v9fs_mux_data *m; + + m = container_of(p, struct v9fs_mux_data, pt); + for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) + if (m->poll_waddr[i] == NULL) + break; + + if (i >= ARRAY_SIZE(m->poll_waddr)) { + dprintk(DEBUG_ERROR, "not enough wait_address slots\n"); + return; } - if (res < 4) { - dprintk(DEBUG_ERROR, - "Reading of count field failed returned: %d\n", res); - return -EIO; + m->poll_waddr[i] = wait_address; + + if (!wait_address) { + dprintk(DEBUG_ERROR, "no wait_address\n"); + m->poll_waddr[i] = ERR_PTR(-EIO); + return; } - size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); - dprintk(DEBUG_MUX, "got a packet count: %d\n", size); + init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); + add_wait_queue(wait_address, &m->poll_wait[i]); +} + +/** + * v9fs_poll_mux - polls a mux and schedules read or write works if necessary + */ +static inline void v9fs_poll_mux(struct v9fs_mux_data *m) +{ + int n; - /* adjust for the four bytes of size */ - size -= 4; + if (m->err < 0) + return; - if (size > v9ses->maxdata) { - dprintk(DEBUG_ERROR, "packet too big: %d\n", size); - return -E2BIG; + n = m->trans->poll(m->trans, NULL); + if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { + dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n); + if (n >= 0) + n = -ECONNRESET; + v9fs_mux_cancel(m, n); } - data = kmalloc(size, GFP_KERNEL); - if (!data) { - eprintk(KERN_WARNING, "out of memory\n"); - return -ENOMEM; + if (n & POLLIN) { + set_bit(Rpending, &m->wsched); + dprintk(DEBUG_MUX, "mux %p can read\n", m); + if (!test_and_set_bit(Rworksched, &m->wsched)) { + dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->rq); + } } - res = xread(v9ses, data, size); - if (res < size) { - dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n", - res); - kfree(data); - return res; + if (n & POLLOUT) { + set_bit(Wpending, &m->wsched); + dprintk(DEBUG_MUX, "mux %p can write\n", m); + if ((m->wsize || !list_empty(&m->unsent_req_list)) + && !test_and_set_bit(Wworksched, &m->wsched)) { + dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->wq); + } } +} + +/** + * v9fs_poll_proc - polls all v9fs transports for new events and queues + * the appropriate work to the work queue + */ +static int v9fs_poll_proc(void *a) +{ + struct v9fs_mux_data *m, *mtmp; + struct v9fs_mux_poll_task *vpt; - /* we now have an in-memory string that is the reply. - * deserialize it. There is very little to go wrong at this point - * save for v9fs_alloc errors. - */ - res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata, - rcall, rcalllen); + vpt = a; + dprintk(DEBUG_MUX, "start %p %p\n", current, vpt); + allow_signal(SIGKILL); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + break; - kfree(data); + list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { + v9fs_poll_mux(m); + } - if (res < 0) - return res; + dprintk(DEBUG_MUX, "sleeping...\n"); + schedule_timeout(SCHED_TIMEOUT * HZ); + } + __set_current_state(TASK_RUNNING); + dprintk(DEBUG_MUX, "finish\n"); return 0; } /** - * v9fs_recv - receive an RPC response for a particular tag - * @v9ses: session info structure - * @req: RPC request structure - * + * v9fs_write_work - called when a transport can send some data */ - -static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) +static void v9fs_write_work(void *a) { - int ret = 0; + int n, err; + struct v9fs_mux_data *m; + struct v9fs_req *req; - dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag); - ret = wait_event_interruptible(v9ses->read_wait, - ((v9ses->transport->status != Connected) || - (req->rcall != 0) || (req->err < 0) || - dprintcond(v9ses, req))); + m = a; - dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall); + if (m->err < 0) { + clear_bit(Wworksched, &m->wsched); + return; + } - spin_lock(&v9ses->muxlock); - list_del(&req->next); - spin_unlock(&v9ses->muxlock); + if (!m->wsize) { + if (list_empty(&m->unsent_req_list)) { + clear_bit(Wworksched, &m->wsched); + return; + } - if (req->err < 0) - return req->err; + spin_lock(&m->lock); + req = + list_entry(m->unsent_req_list.next, struct v9fs_req, + req_list); + list_move_tail(&req->req_list, &m->req_list); + m->wbuf = req->tcall->sdata; + m->wsize = req->tcall->size; + m->wpos = 0; + dump_data(m->wbuf, m->wsize); + spin_unlock(&m->lock); + } - if (v9ses->transport->status == Disconnected) - return -ECONNRESET; + dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); + clear_bit(Wpending, &m->wsched); + err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); + dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Wworksched, &m->wsched); + return; + } - return ret; -} + if (err <= 0) + goto error; -/** - * v9fs_send - send a 9P request - * @v9ses: session info structure - * @req: RPC request to send - * - */ + m->wpos += err; + if (m->wpos == m->wsize) + m->wpos = m->wsize = 0; + + if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLOUT) { + dprintk(DEBUG_MUX, "schedule write work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->wq); + } else + clear_bit(Wworksched, &m->wsched); + } else + clear_bit(Wworksched, &m->wsched); + + return; -static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) + error: + v9fs_mux_cancel(m, err); + clear_bit(Wworksched, &m->wsched); +} + +static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req) { - int ret = -1; - void *data = NULL; - struct v9fs_fcall *tcall = req->tcall; + int ecode, tag; + struct v9fs_str *ename; - data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); - if (!data) - return -ENOMEM; + tag = req->tag; + if (req->rcall->id == RERROR && !req->err) { + ecode = req->rcall->params.rerror.errno; + ename = &req->rcall->params.rerror.error; - tcall->size = 0; /* enforce size recalculation */ - ret = - v9fs_serialize_fcall(v9ses, tcall, data, - v9ses->maxdata + V9FS_IOHDRSZ); - if (ret < 0) - goto free_data; + dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str); - spin_lock(&v9ses->muxlock); - list_add(&req->next, &v9ses->mux_fcalls); - spin_unlock(&v9ses->muxlock); + if (*m->extended) + req->err = -ecode; - dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag, - tcall->size); - ret = v9ses->transport->write(v9ses->transport, data, tcall->size); + if (!req->err) { + req->err = v9fs_errstr2errno(ename->str, ename->len); - if (ret != tcall->size) { - spin_lock(&v9ses->muxlock); - list_del(&req->next); - kfree(req->rcall); + if (!req->err) { /* string match failed */ + PRINT_FCALL_ERROR("unknown error", req->rcall); + } + + if (!req->err) + req->err = -ESERVERFAULT; + } + } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { + dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n", + req->tcall->id + 1, req->rcall->id); + if (!req->err) + req->err = -EIO; + } - spin_unlock(&v9ses->muxlock); - if (ret >= 0) - ret = -EREMOTEIO; + if (req->cb && req->err != ERREQFLUSH) { + dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n", + req->tcall, req->rcall); + + (*req->cb) (req->cba, req->tcall, req->rcall, req->err); + req->cb = NULL; } else - ret = 0; + kfree(req->rcall); - free_data: - kfree(data); - return ret; + v9fs_mux_put_tag(m, tag); + + wake_up(&m->equeue); + kfree(req); } /** - * v9fs_mux_rpc - send a request, receive a response - * @v9ses: session info structure - * @tcall: fcall to send - * @rcall: buffer to place response into - * + * v9fs_read_work - called when there is some data to be read from a transport */ - -long -v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall, - struct v9fs_fcall **rcall) +static void v9fs_read_work(void *a) { - int tid = -1; - struct v9fs_fcall *fcall = NULL; - struct v9fs_rpcreq req; - int ret = -1; - - if (!v9ses) - return -EINVAL; - - if (!v9ses->transport || v9ses->transport->status != Connected) - return -EIO; + int n, err; + struct v9fs_mux_data *m; + struct v9fs_req *req, *rptr, *rreq; + struct v9fs_fcall *rcall; + char *rbuf; + + m = a; + + if (m->err < 0) + return; + + rcall = NULL; + dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); + + if (!m->rcall) { + m->rcall = + kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL); + if (!m->rcall) { + err = -ENOMEM; + goto error; + } - if (rcall) - *rcall = NULL; + m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); + m->rpos = 0; + } - if (tcall->id != TVERSION) { - tid = v9fs_get_idpool(&v9ses->tidpool); - if (tid < 0) - return -ENOMEM; + clear_bit(Rpending, &m->wsched); + err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); + dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Rworksched, &m->wsched); + return; } - tcall->tag = tid; + if (err <= 0) + goto error; - req.tcall = tcall; - req.err = 0; - req.rcall = NULL; + m->rpos += err; + while (m->rpos > 4) { + n = le32_to_cpu(*(__le32 *) m->rbuf); + if (n >= m->msize) { + dprintk(DEBUG_ERROR, + "requested packet size too big: %d\n", n); + err = -EIO; + goto error; + } - ret = v9fs_send(v9ses, &req); + if (m->rpos < n) + break; - if (ret < 0) { - if (tcall->id != TVERSION) - v9fs_put_idpool(tid, &v9ses->tidpool); - dprintk(DEBUG_MUX, "error %d\n", ret); - return ret; - } + dump_data(m->rbuf, n); + err = + v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended); + if (err < 0) { + goto error; + } + + rcall = m->rcall; + rbuf = m->rbuf; + if (m->rpos > n) { + m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize, + GFP_KERNEL); + if (!m->rcall) { + err = -ENOMEM; + goto error; + } - ret = v9fs_recv(v9ses, &req); - - fcall = req.rcall; - - dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret); - if (ret == -ERESTARTSYS) { - if (v9ses->transport->status != Disconnected - && tcall->id != TFLUSH) { - unsigned long flags; - - dprintk(DEBUG_MUX, "flushing the tag: %d\n", - tcall->tag); - clear_thread_flag(TIF_SIGPENDING); - v9fs_t_flush(v9ses, tcall->tag); - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, - flags); - dprintk(DEBUG_MUX, "flushing done\n"); + m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall); + memmove(m->rbuf, rbuf + n, m->rpos - n); + m->rpos -= n; + } else { + m->rcall = NULL; + m->rbuf = NULL; + m->rpos = 0; } - goto release_req; - } else if (ret < 0) - goto release_req; - - if (!fcall) - ret = -EIO; - else { - if (fcall->id == RERROR) { - ret = v9fs_errstr2errno(fcall->params.rerror.error); - if (ret == 0) { /* string match failed */ - if (fcall->params.rerror.errno) - ret = -(fcall->params.rerror.errno); - else - ret = -ESERVERFAULT; + dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id, + rcall->tag); + + req = NULL; + spin_lock(&m->lock); + list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { + if (rreq->tag == rcall->tag) { + req = rreq; + req->rcall = rcall; + list_del(&req->req_list); + spin_unlock(&m->lock); + process_request(m, req); + break; } - } else if (fcall->id != tcall->id + 1) { - dprintk(DEBUG_ERROR, - "fcall mismatch: expected %d, got %d\n", - tcall->id + 1, fcall->id); - ret = -EIO; + + } + + if (!req) { + spin_unlock(&m->lock); + if (err >= 0 && rcall->id != RFLUSH) + dprintk(DEBUG_ERROR, + "unexpected response mux %p id %d tag %d\n", + m, rcall->id, rcall->tag); + kfree(rcall); } } - release_req: - if (tcall->id != TVERSION) - v9fs_put_idpool(tid, &v9ses->tidpool); - if (rcall) - *rcall = fcall; - else - kfree(fcall); + if (!list_empty(&m->req_list)) { + if (test_and_clear_bit(Rpending, &m->wsched)) + n = POLLIN; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLIN) { + dprintk(DEBUG_MUX, "schedule read work mux %p\n", m); + queue_work(v9fs_mux_wq, &m->rq); + } else + clear_bit(Rworksched, &m->wsched); + } else + clear_bit(Rworksched, &m->wsched); + + return; - return ret; + error: + v9fs_mux_cancel(m, err); + clear_bit(Rworksched, &m->wsched); } /** - * v9fs_mux_cancel_requests - cancels all pending requests + * v9fs_send_request - send 9P request + * The function can sleep until the request is scheduled for sending. + * The function can be interrupted. Return from the function is not + * a guarantee that the request is sent succesfully. Can return errors + * that can be retrieved by PTR_ERR macros. * - * @v9ses: session info structure - * @err: error code to return to the requests + * @m: mux data + * @tc: request to be sent + * @cb: callback function to call when response is received + * @cba: parameter to pass to the callback function */ -void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err) +static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m, + struct v9fs_fcall *tc, + v9fs_mux_req_callback cb, void *cba) { - struct v9fs_rpcreq *rptr; - struct v9fs_rpcreq *rreq; + int n; + struct v9fs_req *req; - dprintk(DEBUG_MUX, " %d\n", err); - spin_lock(&v9ses->muxlock); - list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { - rreq->err = err; - } - spin_unlock(&v9ses->muxlock); - wake_up_all(&v9ses->read_wait); -} + dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, + tc, tc->id); + if (m->err < 0) + return ERR_PTR(m->err); -/** - * v9fs_recvproc - kproc to handle demultiplexing responses - * @data: session info structure - * - */ + req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); -static int v9fs_recvproc(void *data) -{ - struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data; - struct v9fs_fcall *rcall = NULL; - struct v9fs_rpcreq *rptr; - struct v9fs_rpcreq *req; - struct v9fs_rpcreq *rreq; - int err = 0; + if (tc->id == TVERSION) + n = V9FS_NOTAG; + else + n = v9fs_mux_get_tag(m); - allow_signal(SIGKILL); - set_current_state(TASK_INTERRUPTIBLE); - complete(&v9ses->proccmpl); - while (!kthread_should_stop() && err >= 0) { - req = rptr = rreq = NULL; - - rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); - if (!rcall) { - eprintk(KERN_ERR, "no memory for buffers\n"); - break; - } + if (n < 0) + return ERR_PTR(-ENOMEM); - err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ); - spin_lock(&v9ses->muxlock); - if (err < 0) { - list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { - rreq->err = err; - } - if(err != -ERESTARTSYS) - eprintk(KERN_ERR, - "Transport error while reading message %d\n", err); - } else { - list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { - if (rreq->tcall->tag == rcall->tag) { - req = rreq; - req->rcall = rcall; - break; - } - } - } + v9fs_set_tag(tc, n); - if (req && (req->tcall->id == TFLUSH)) { - struct v9fs_rpcreq *treq = NULL; - list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) { - if (treq->tcall->tag == - req->tcall->params.tflush.oldtag) { - list_del(&rptr->next); - kfree(treq->rcall); - break; - } + req->tag = n; + req->tcall = tc; + req->rcall = NULL; + req->err = 0; + req->cb = cb; + req->cba = cba; + + spin_lock(&m->lock); + list_add_tail(&req->req_list, &m->unsent_req_list); + spin_unlock(&m->lock); + + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = m->trans->poll(m->trans, NULL); + + if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) + queue_work(v9fs_mux_wq, &m->wq); + + return req; +} + +static inline void +v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, + int err) +{ + v9fs_mux_req_callback cb; + int tag; + struct v9fs_mux_data *m; + struct v9fs_req *req, *rptr; + + m = a; + dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc, + rc, err, tc->params.tflush.oldtag); + + spin_lock(&m->lock); + cb = NULL; + tag = tc->params.tflush.oldtag; + list_for_each_entry_safe(req, rptr, &m->req_list, req_list) { + if (req->tag == tag) { + list_del(&req->req_list); + if (req->cb) { + cb = req->cb; + req->cb = NULL; + spin_unlock(&m->lock); + (*cb) (req->cba, req->tcall, req->rcall, + req->err); } + kfree(req); + wake_up(&m->equeue); + break; } + } - spin_unlock(&v9ses->muxlock); + if (!cb) + spin_unlock(&m->lock); - if (!req) { - if (err >= 0) - dprintk(DEBUG_ERROR, - "unexpected response: id %d tag %d\n", - rcall->id, rcall->tag); + v9fs_mux_put_tag(m, tag); + kfree(tc); + kfree(rc); +} - kfree(rcall); - } +static void +v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req) +{ + struct v9fs_fcall *fc; - wake_up_all(&v9ses->read_wait); - set_current_state(TASK_INTERRUPTIBLE); + dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); + + fc = v9fs_create_tflush(req->tag); + v9fs_send_request(m, fc, v9fs_mux_flush_cb, m); +} + +static void +v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err) +{ + struct v9fs_mux_rpc *r; + + if (err == ERREQFLUSH) { + dprintk(DEBUG_MUX, "err req flush\n"); + return; } - v9ses->transport->close(v9ses->transport); + r = a; + dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req, + tc, rc, err); + r->rcall = rc; + r->err = err; + wake_up(&r->wqueue); +} - /* Inform all pending processes about the failure */ - wake_up_all(&v9ses->read_wait); +/** + * v9fs_mux_rpc - sends 9P request and waits until a response is available. + * The function can be interrupted. + * @m: mux data + * @tc: request to be sent + * @rc: pointer where a pointer to the response is stored + */ +int +v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, + struct v9fs_fcall **rc) +{ + int err; + unsigned long flags; + struct v9fs_req *req; + struct v9fs_mux_rpc r; + + r.err = 0; + r.rcall = NULL; + r.m = m; + init_waitqueue_head(&r.wqueue); + + if (rc) + *rc = NULL; + + req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r); + if (IS_ERR(req)) { + err = PTR_ERR(req); + dprintk(DEBUG_MUX, "error %d\n", err); + return PTR_ERR(req); + } - if (signal_pending(current)) - complete(&v9ses->proccmpl); + r.req = req; + dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc, + req->tag, &r, req); + err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); + if (r.err < 0) + err = r.err; + + if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) { + spin_lock(&m->lock); + req->tcall = NULL; + req->err = ERREQFLUSH; + spin_unlock(&m->lock); + + clear_thread_flag(TIF_SIGPENDING); + v9fs_mux_flush_request(m, req); + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } - dprintk(DEBUG_MUX, "recvproc: end\n"); - v9ses->recvproc = NULL; + if (!err) { + if (r.rcall) + dprintk(DEBUG_MUX, "got response id %d tag %d\n", + r.rcall->id, r.rcall->tag); + + if (rc) + *rc = r.rcall; + else + kfree(r.rcall); + } else { + kfree(r.rcall); + dprintk(DEBUG_MUX, "got error %d\n", err); + if (err > 0) + err = -EIO; + } - return err >= 0; + return err; } /** - * v9fs_mux_init - initialize multiplexer (spawn kproc) - * @v9ses: session info structure - * @dev_name: mount device information (to create unique kproc) - * + * v9fs_mux_rpcnb - sends 9P request without waiting for response. + * @m: mux data + * @tc: request to be sent + * @cb: callback function to be called when response arrives + * @cba: value to pass to the callback function */ +int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc, + v9fs_mux_req_callback cb, void *a) +{ + int err; + struct v9fs_req *req; + + req = v9fs_send_request(m, tc, cb, a); + if (IS_ERR(req)) { + err = PTR_ERR(req); + dprintk(DEBUG_MUX, "error %d\n", err); + return PTR_ERR(req); + } + + dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); + return 0; +} -int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name) +/** + * v9fs_mux_cancel - cancel all pending requests with error + * @m: mux data + * @err: error code + */ +void v9fs_mux_cancel(struct v9fs_mux_data *m, int err) { - char procname[60]; - - strncpy(procname, dev_name, sizeof(procname)); - procname[sizeof(procname) - 1] = 0; - - init_waitqueue_head(&v9ses->read_wait); - init_completion(&v9ses->fcread); - init_completion(&v9ses->proccmpl); - spin_lock_init(&v9ses->muxlock); - INIT_LIST_HEAD(&v9ses->mux_fcalls); - v9ses->recvproc = NULL; - v9ses->curfcall = NULL; - - v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses, - "v9fs_recvproc %s", procname); - - if (IS_ERR(v9ses->recvproc)) { - eprintk(KERN_ERR, "cannot create receiving thread\n"); - v9fs_session_close(v9ses); - return -ECONNABORTED; + struct v9fs_req *req, *rtmp; + LIST_HEAD(cancel_list); + + dprintk(DEBUG_MUX, "mux %p err %d\n", m, err); + m->err = err; + spin_lock(&m->lock); + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + list_move(&req->req_list, &cancel_list); } + spin_unlock(&m->lock); - wake_up_process(v9ses->recvproc); - wait_for_completion(&v9ses->proccmpl); + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + list_del(&req->req_list); + if (!req->err) + req->err = err; - return 0; + if (req->cb) + (*req->cb) (req->cba, req->tcall, req->rcall, req->err); + else + kfree(req->rcall); + + kfree(req); + } + + wake_up(&m->equeue); +} + +static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m) +{ + int tag; + + tag = v9fs_get_idpool(&m->tidpool); + if (tag < 0) + return V9FS_NOTAG; + else + return (u16) tag; +} + +static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag) +{ + if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool)) + v9fs_put_idpool(tag, &m->tidpool); } diff --git a/fs/9p/mux.h b/fs/9p/mux.h index 4994cb10badf..9473b84f24b2 100644 --- a/fs/9p/mux.h +++ b/fs/9p/mux.h @@ -3,6 +3,7 @@ * * Multiplexer Definitions * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -23,19 +24,35 @@ * */ -/* structure to manage each RPC transaction */ +struct v9fs_mux_data; -struct v9fs_rpcreq { - struct v9fs_fcall *tcall; - struct v9fs_fcall *rcall; - int err; /* error code if response failed */ +/** + * v9fs_mux_req_callback - callback function that is called when the + * response of a request is received. The callback is called from + * a workqueue and shouldn't block. + * + * @a - the pointer that was specified when the request was send to be + * passed to the callback + * @tc - request call + * @rc - response call + * @err - error code (non-zero if error occured) + */ +typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc, + struct v9fs_fcall *rc, int err); + +int v9fs_mux_global_init(void); +void v9fs_mux_global_exit(void); - /* XXX - could we put scatter/gather buffers here? */ +struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize, + unsigned char *extended); +void v9fs_mux_destroy(struct v9fs_mux_data *); - struct list_head next; -}; +int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc); +struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m); +int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc); +int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc, + v9fs_mux_req_callback cb, void *a); -int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name); -long v9fs_mux_rpc(struct v9fs_session_info *v9ses, - struct v9fs_fcall *tcall, struct v9fs_fcall **rcall); -void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err); +void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush); +void v9fs_mux_cancel(struct v9fs_mux_data *m, int err); +int v9fs_errstr2errno(char *errstr, int len); diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c index 63b58ce98ff4..1a28ef97a3d1 100644 --- a/fs/9p/trans_fd.c +++ b/fs/9p/trans_fd.c @@ -3,6 +3,7 @@ * * File Descriptor Transport Layer * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -106,9 +107,6 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data) return -ENOPROTOOPT; } - sema_init(&trans->writelock, 1); - sema_init(&trans->readlock, 1); - ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL); if (!ts) @@ -148,12 +146,12 @@ static void v9fs_fd_close(struct v9fs_transport *trans) if (!trans) return; - trans->status = Disconnected; - ts = trans->priv; + ts = xchg(&trans->priv, NULL); if (!ts) return; + trans->status = Disconnected; if (ts->in_file) fput(ts->in_file); @@ -163,10 +161,55 @@ static void v9fs_fd_close(struct v9fs_transport *trans) kfree(ts); } +static unsigned int +v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt) +{ + int ret, n; + struct v9fs_trans_fd *ts; + mm_segment_t oldfs; + + if (!trans) + return -EIO; + + ts = trans->priv; + if (trans->status != Connected || !ts) + return -EIO; + + oldfs = get_fs(); + set_fs(get_ds()); + + if (!ts->in_file->f_op || !ts->in_file->f_op->poll) { + ret = -EIO; + goto end; + } + + ret = ts->in_file->f_op->poll(ts->in_file, pt); + + if (ts->out_file != ts->in_file) { + if (!ts->out_file->f_op || !ts->out_file->f_op->poll) { + ret = -EIO; + goto end; + } + + n = ts->out_file->f_op->poll(ts->out_file, pt); + + ret &= ~POLLOUT; + n &= ~POLLIN; + + ret |= n; + } + +end: + set_fs(oldfs); + return ret; +} + + struct v9fs_transport v9fs_trans_fd = { .init = v9fs_fd_init, .write = v9fs_fd_send, .read = v9fs_fd_recv, .close = v9fs_fd_close, + .poll = v9fs_fd_poll, }; diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c index 6a9a75d40f73..44e830697acb 100644 --- a/fs/9p/trans_sock.c +++ b/fs/9p/trans_sock.c @@ -3,6 +3,7 @@ * * Socket Transport Layer * + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de> @@ -36,6 +37,7 @@ #include <asm/uaccess.h> #include <linux/inet.h> #include <linux/idr.h> +#include <linux/file.h> #include "debug.h" #include "v9fs.h" @@ -45,6 +47,7 @@ struct v9fs_trans_sock { struct socket *s; + struct file *filp; }; /** @@ -57,41 +60,26 @@ struct v9fs_trans_sock { static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) { - struct msghdr msg; - struct kvec iov; - int result; - mm_segment_t oldfs; - struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; + int ret; + struct v9fs_trans_sock *ts; - if (trans->status == Disconnected) + if (!trans || trans->status == Disconnected) { + dprintk(DEBUG_ERROR, "disconnected ...\n"); return -EREMOTEIO; + } - result = -EINVAL; - - oldfs = get_fs(); - set_fs(get_ds()); - - iov.iov_base = v; - iov.iov_len = len; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; - msg.msg_flags = MSG_NOSIGNAL; + ts = trans->priv; - result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0); + if (!(ts->filp->f_flags & O_NONBLOCK)) + dprintk(DEBUG_ERROR, "blocking read ...\n"); - dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state); - set_fs(oldfs); - - if (result <= 0) { - if (result != -ERESTARTSYS) + ret = kernel_read(ts->filp, ts->filp->f_pos, v, len); + if (ret <= 0) { + if (ret != -ERESTARTSYS && ret != -EAGAIN) trans->status = Disconnected; } - return result; + return ret; } /** @@ -104,40 +92,72 @@ static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len) { - struct kvec iov; - struct msghdr msg; - int result = -1; + int ret; mm_segment_t oldfs; - struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; + struct v9fs_trans_sock *ts; - dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len); - dump_data(v, len); + if (!trans || trans->status == Disconnected) { + dprintk(DEBUG_ERROR, "disconnected ...\n"); + return -EREMOTEIO; + } + + ts = trans->priv; + if (!ts) { + dprintk(DEBUG_ERROR, "no transport ...\n"); + return -EREMOTEIO; + } - down(&trans->writelock); + if (!(ts->filp->f_flags & O_NONBLOCK)) + dprintk(DEBUG_ERROR, "blocking write ...\n"); oldfs = get_fs(); set_fs(get_ds()); - iov.iov_base = v; - iov.iov_len = len; - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; - msg.msg_flags = MSG_NOSIGNAL; - result = kernel_sendmsg(ts->s, &msg, &iov, 1, len); + ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos); set_fs(oldfs); - if (result < 0) { - if (result != -ERESTARTSYS) + if (ret < 0) { + if (ret != -ERESTARTSYS) trans->status = Disconnected; } - up(&trans->writelock); - return result; + return ret; +} + +static unsigned int v9fs_sock_poll(struct v9fs_transport *trans, + struct poll_table_struct *pt) { + + int ret; + struct v9fs_trans_sock *ts; + mm_segment_t oldfs; + + if (!trans) { + dprintk(DEBUG_ERROR, "no transport\n"); + return -EIO; + } + + ts = trans->priv; + if (trans->status != Connected || !ts) { + dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status); + return -EIO; + } + + oldfs = get_fs(); + set_fs(get_ds()); + + if (!ts->filp->f_op || !ts->filp->f_op->poll) { + dprintk(DEBUG_ERROR, "no poll operation\n"); + ret = -EIO; + goto end; + } + + ret = ts->filp->f_op->poll(ts->filp, pt); + +end: + set_fs(oldfs); + return ret; } + /** * v9fs_tcp_init - initialize TCP socket * @v9ses: session information @@ -154,9 +174,9 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) int rc = 0; struct v9fs_trans_sock *ts = NULL; struct v9fs_transport *trans = v9ses->transport; + int fd; - sema_init(&trans->writelock, 1); - sema_init(&trans->readlock, 1); + trans->status = Disconnected; ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); @@ -165,6 +185,7 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) trans->priv = ts; ts->s = NULL; + ts->filp = NULL; if (!addr) return -EINVAL; @@ -185,7 +206,18 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) return rc; } csocket->sk->sk_allocation = GFP_NOIO; + + fd = sock_map_fd(csocket); + if (fd < 0) { + sock_release(csocket); + kfree(ts); + trans->priv = NULL; + return fd; + } + ts->s = csocket; + ts->filp = fget(fd); + ts->filp->f_flags |= O_NONBLOCK; trans->status = Connected; return 0; @@ -203,7 +235,7 @@ static int v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data) { - int rc; + int rc, fd; struct socket *csocket; struct sockaddr_un sun_server; struct v9fs_transport *trans; @@ -213,6 +245,8 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, csocket = NULL; trans = v9ses->transport; + trans->status = Disconnected; + if (strlen(dev_name) > UNIX_PATH_MAX) { eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", dev_name); @@ -225,9 +259,7 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, trans->priv = ts; ts->s = NULL; - - sema_init(&trans->writelock, 1); - sema_init(&trans->readlock, 1); + ts->filp = NULL; sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, dev_name); @@ -241,7 +273,18 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, return rc; } csocket->sk->sk_allocation = GFP_NOIO; + + fd = sock_map_fd(csocket); + if (fd < 0) { + sock_release(csocket); + kfree(ts); + trans->priv = NULL; + return fd; + } + ts->s = csocket; + ts->filp = fget(fd); + ts->filp->f_flags |= O_NONBLOCK; trans->status = Connected; return 0; @@ -262,12 +305,11 @@ static void v9fs_sock_close(struct v9fs_transport *trans) ts = trans->priv; - if ((ts) && (ts->s)) { - dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s); - sock_release(ts->s); + if ((ts) && (ts->filp)) { + fput(ts->filp); + ts->filp = NULL; ts->s = NULL; trans->status = Disconnected; - dprintk(DEBUG_TRANS, "socket closed\n"); } kfree(ts); @@ -280,6 +322,7 @@ struct v9fs_transport v9fs_trans_tcp = { .write = v9fs_sock_send, .read = v9fs_sock_recv, .close = v9fs_sock_close, + .poll = v9fs_sock_poll, }; struct v9fs_transport v9fs_trans_unix = { @@ -287,4 +330,5 @@ struct v9fs_transport v9fs_trans_unix = { .write = v9fs_sock_send, .read = v9fs_sock_recv, .close = v9fs_sock_close, + .poll = v9fs_sock_poll, }; diff --git a/fs/9p/transport.h b/fs/9p/transport.h index 9e9cd418efd5..91fcdb94b361 100644 --- a/fs/9p/transport.h +++ b/fs/9p/transport.h @@ -3,6 +3,7 @@ * * Transport Definition * + * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -31,14 +32,13 @@ enum v9fs_transport_status { struct v9fs_transport { enum v9fs_transport_status status; - struct semaphore writelock; - struct semaphore readlock; void *priv; int (*init) (struct v9fs_session_info *, const char *, char *); int (*write) (struct v9fs_transport *, void *, int); int (*read) (struct v9fs_transport *, void *, int); void (*close) (struct v9fs_transport *); + unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *); }; extern struct v9fs_transport v9fs_trans_tcp; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 418c3743fdee..5250c428fc1f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -37,7 +37,6 @@ #include "v9fs_vfs.h" #include "transport.h" #include "mux.h" -#include "conv.h" /* TODO: sysfs or debugfs interface */ int v9fs_debug_level = 0; /* feature-rific global debug level */ @@ -213,7 +212,8 @@ retry: return -1; } - error = idr_get_new(&p->pool, NULL, &i); + /* no need to store exactly p, we just need something non-null */ + error = idr_get_new(&p->pool, p, &i); up(&p->lock); if (error == -EAGAIN) @@ -243,6 +243,16 @@ void v9fs_put_idpool(int id, struct v9fs_idpool *p) } /** + * v9fs_check_idpool - check if the specified id is available + * @id - id to check + * @p - pool + */ +int v9fs_check_idpool(int id, struct v9fs_idpool *p) +{ + return idr_find(&p->pool, id) != NULL; +} + +/** * v9fs_session_init - initialize session * @v9ses: session information structure * @dev_name: device being mounted @@ -259,6 +269,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses, int n = 0; int newfid = -1; int retval = -EINVAL; + struct v9fs_str *version; v9ses->name = __getname(); if (!v9ses->name) @@ -281,9 +292,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses, /* id pools that are session-dependent: FIDs and TIDs */ idr_init(&v9ses->fidpool.pool); init_MUTEX(&v9ses->fidpool.lock); - idr_init(&v9ses->tidpool.pool); - init_MUTEX(&v9ses->tidpool.lock); - switch (v9ses->proto) { case PROTO_TCP: @@ -320,7 +328,12 @@ v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->shutdown = 0; v9ses->session_hung = 0; - if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) { + v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ, + &v9ses->extended); + + if (IS_ERR(v9ses->mux)) { + retval = PTR_ERR(v9ses->mux); + v9ses->mux = NULL; dprintk(DEBUG_ERROR, "problem initializing mux\n"); goto SessCleanUp; } @@ -339,13 +352,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses, goto FreeFcall; } - /* Really should check for 9P1 and report error */ - if (!strcmp(fcall->params.rversion.version, "9P2000.u")) { + version = &fcall->params.rversion.version; + if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) { dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); v9ses->extended = 1; - } else { + } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) { dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); v9ses->extended = 0; + } else { + retval = -EREMOTEIO; + goto FreeFcall; } n = fcall->params.rversion.msize; @@ -381,7 +397,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses, } if (v9ses->afid != ~0) { - if (v9fs_t_clunk(v9ses, v9ses->afid, NULL)) + if (v9fs_t_clunk(v9ses, v9ses->afid)) dprintk(DEBUG_ERROR, "clunk failed\n"); } @@ -403,13 +419,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses, void v9fs_session_close(struct v9fs_session_info *v9ses) { - if (v9ses->recvproc) { - send_sig(SIGKILL, v9ses->recvproc, 1); - wait_for_completion(&v9ses->proccmpl); + if (v9ses->mux) { + v9fs_mux_destroy(v9ses->mux); + v9ses->mux = NULL; } - if (v9ses->transport) + if (v9ses->transport) { v9ses->transport->close(v9ses->transport); + kfree(v9ses->transport); + v9ses->transport = NULL; + } __putname(v9ses->name); __putname(v9ses->remotename); @@ -420,8 +439,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) * and cancel all pending requests. */ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { + dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses); v9ses->transport->status = Disconnected; - v9fs_mux_cancel_requests(v9ses, -EIO); + v9fs_mux_cancel(v9ses->mux, -EIO); } extern int v9fs_error_init(void); @@ -433,11 +453,17 @@ extern int v9fs_error_init(void); static int __init init_v9fs(void) { + int ret; + v9fs_error_init(); printk(KERN_INFO "Installing v9fs 9P2000 file system support\n"); - return register_filesystem(&v9fs_fs_type); + ret = v9fs_mux_global_init(); + if (!ret) + ret = register_filesystem(&v9fs_fs_type); + + return ret; } /** @@ -447,6 +473,7 @@ static int __init init_v9fs(void) static void __exit exit_v9fs(void) { + v9fs_mux_global_exit(); unregister_filesystem(&v9fs_fs_type); } diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 45dcef42bdd6..f337da7a0eec 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -57,24 +57,14 @@ struct v9fs_session_info { /* book keeping */ struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ - struct v9fs_idpool tidpool; /* The TID pool for transactions ids */ - /* transport information */ struct v9fs_transport *transport; + struct v9fs_mux_data *mux; int inprogress; /* session in progress => true */ int shutdown; /* session shutting down. no more attaches. */ unsigned char session_hung; - - /* mux private data */ - struct v9fs_fcall *curfcall; - wait_queue_head_t read_wait; - struct completion fcread; - struct completion proccmpl; - struct task_struct *recvproc; - - spinlock_t muxlock; - struct list_head mux_fcalls; + struct dentry *debugfs_dir; }; /* possible values of ->proto */ @@ -84,11 +74,14 @@ enum { PROTO_FD, }; +extern struct dentry *v9fs_debugfs_root; + int v9fs_session_init(struct v9fs_session_info *, const char *, char *); struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); void v9fs_session_close(struct v9fs_session_info *v9ses); int v9fs_get_idpool(struct v9fs_idpool *p); void v9fs_put_idpool(int id, struct v9fs_idpool *p); +int v9fs_check_idpool(int id, struct v9fs_idpool *p); void v9fs_session_cancel(struct v9fs_session_info *v9ses); #define V9FS_MAGIC 0x01021997 diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 2f2cea7ee3e7..c78502ad00ed 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -45,9 +45,8 @@ extern struct dentry_operations v9fs_dentry_operations; struct inode *v9fs_get_inode(struct super_block *sb, int mode); ino_t v9fs_qid2ino(struct v9fs_qid *qid); -void v9fs_mistat2inode(struct v9fs_stat *, struct inode *, - struct super_block *); +void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *); int v9fs_dir_release(struct inode *inode, struct file *filp); int v9fs_file_open(struct inode *inode, struct file *file); -void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat); +void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat); void v9fs_dentry_release(struct dentry *); diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index a6aa947de0f9..2dd806dac9f1 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -40,7 +40,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "conv.h" #include "fid.h" /** @@ -95,24 +94,22 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd) void v9fs_dentry_release(struct dentry *dentry) { + int err; + dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); if (dentry->d_fsdata != NULL) { struct list_head *fid_list = dentry->d_fsdata; struct v9fs_fid *temp = NULL; struct v9fs_fid *current_fid = NULL; - struct v9fs_fcall *fcall = NULL; list_for_each_entry_safe(current_fid, temp, fid_list, list) { - if (v9fs_t_clunk - (current_fid->v9ses, current_fid->fid, &fcall)) - dprintk(DEBUG_ERROR, "clunk failed: %s\n", - FCALL_ERROR(fcall)); + err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid); - v9fs_put_idpool(current_fid->fid, - ¤t_fid->v9ses->fidpool); + if (err < 0) + dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n", + err, dentry->d_iname); - kfree(fcall); v9fs_fid_destroy(current_fid); } diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 57a43b8feef5..ae6d032b9b59 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -37,8 +37,8 @@ #include "debug.h" #include "v9fs.h" #include "9p.h" -#include "v9fs_vfs.h" #include "conv.h" +#include "v9fs_vfs.h" #include "fid.h" /** @@ -74,20 +74,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) struct inode *inode = filp->f_dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); struct v9fs_fid *file = filp->private_data; - unsigned int i, n; + unsigned int i, n, s; int fid = -1; int ret = 0; - struct v9fs_stat *mi = NULL; + struct v9fs_stat stat; int over = 0; dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name); fid = file->fid; - mi = kmalloc(v9ses->maxdata, GFP_KERNEL); - if (!mi) - return -ENOMEM; - if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { kfree(file->rdir_fcall); file->rdir_fcall = NULL; @@ -97,20 +93,20 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) n = file->rdir_fcall->params.rread.count; i = file->rdir_fpos; while (i < n) { - int s = v9fs_deserialize_stat(v9ses, - file->rdir_fcall->params.rread.data + i, - n - i, mi, v9ses->maxdata); + s = v9fs_deserialize_stat( + file->rdir_fcall->params.rread.data + i, + n - i, &stat, v9ses->extended); if (s == 0) { dprintk(DEBUG_ERROR, - "error while deserializing mistat\n"); + "error while deserializing stat\n"); ret = -EIO; goto FreeStructs; } - over = filldir(dirent, mi->name, strlen(mi->name), - filp->f_pos, v9fs_qid2ino(&mi->qid), - dt_type(mi)); + over = filldir(dirent, stat.name.str, stat.name.len, + filp->f_pos, v9fs_qid2ino(&stat.qid), + dt_type(&stat)); if (over) { file->rdir_fpos = i; @@ -130,7 +126,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) while (!over) { ret = v9fs_t_read(v9ses, fid, filp->f_pos, - v9ses->maxdata-V9FS_IOHDRSZ, &fcall); + v9ses->maxdata-V9FS_IOHDRSZ, &fcall); if (ret < 0) { dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", ret, fcall); @@ -141,19 +137,18 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) n = ret; i = 0; while (i < n) { - int s = v9fs_deserialize_stat(v9ses, - fcall->params.rread.data + i, n - i, mi, - v9ses->maxdata); + s = v9fs_deserialize_stat(fcall->params.rread.data + i, + n - i, &stat, v9ses->extended); if (s == 0) { dprintk(DEBUG_ERROR, - "error while deserializing mistat\n"); + "error while deserializing stat\n"); return -EIO; } - over = filldir(dirent, mi->name, strlen(mi->name), - filp->f_pos, v9fs_qid2ino(&mi->qid), - dt_type(mi)); + over = filldir(dirent, stat.name.str, stat.name.len, + filp->f_pos, v9fs_qid2ino(&stat.qid), + dt_type(&stat)); if (over) { file->rdir_fcall = fcall; @@ -172,7 +167,6 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) FreeStructs: kfree(fcall); - kfree(mi); return ret; } @@ -193,18 +187,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) fid->fid); fidnum = fid->fid; - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); if (fidnum >= 0) { dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, fid->fid); - if (v9fs_t_clunk(v9ses, fidnum, NULL)) + if (v9fs_t_clunk(v9ses, fidnum)) dprintk(DEBUG_ERROR, "clunk failed\n"); - v9fs_put_idpool(fid->fid, &v9ses->fidpool); - kfree(fid->rdir_fcall); kfree(fid); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 89c849da8504..6852f0eb96ed 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/smp_lock.h> #include <linux/inet.h> +#include <linux/version.h> #include <linux/list.h> #include <asm/uaccess.h> #include <linux/idr.h> @@ -117,9 +118,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) result = v9fs_t_open(v9ses, newfid, open_mode, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, - "open failed, open_mode 0x%x: %s\n", open_mode, - FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("open failed", fcall); kfree(fcall); return result; } @@ -165,8 +164,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) return -ENOLCK; if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); invalidate_inode_pages(&inode->i_data); } @@ -257,7 +255,6 @@ v9fs_file_write(struct file *filp, const char __user * data, int result = -EIO; int rsize = 0; int total = 0; - char *buf; dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, (int)*offset); @@ -265,28 +262,14 @@ v9fs_file_write(struct file *filp, const char __user * data, if (v9fid->iounit != 0 && rsize > v9fid->iounit) rsize = v9fid->iounit; - buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL); - if (!buf) - return -ENOMEM; - do { if (count < rsize) rsize = count; - result = copy_from_user(buf, data, rsize); - if (result) { - dprintk(DEBUG_ERROR, "Problem copying from user\n"); - kfree(buf); - return -EFAULT; - } - - dump_data(buf, rsize); - result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall); + result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall); if (result < 0) { - eprintk(KERN_ERR, "error while writing: %s(%d)\n", - FCALL_ERROR(fcall), result); + PRINT_FCALL_ERROR("error while writing", fcall); kfree(fcall); - kfree(buf); return result; } else *offset += result; @@ -306,7 +289,6 @@ v9fs_file_write(struct file *filp, const char __user * data, total += result; } while (count); - kfree(buf); return total; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0ea965c3bb7d..d933ef1fbd8a 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -40,7 +40,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "conv.h" #include "fid.h" static struct inode_operations v9fs_dir_inode_operations; @@ -127,100 +126,32 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) } /** - * v9fs_blank_mistat - helper function to setup a 9P stat structure + * v9fs_blank_wstat - helper function to setup a 9P stat structure * @v9ses: 9P session info (for determining extended mode) - * @mistat: structure to initialize + * @wstat: structure to initialize * */ static void -v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat) +v9fs_blank_wstat(struct v9fs_wstat *wstat) { - mistat->type = ~0; - mistat->dev = ~0; - mistat->qid.type = ~0; - mistat->qid.version = ~0; - *((long long *)&mistat->qid.path) = ~0; - mistat->mode = ~0; - mistat->atime = ~0; - mistat->mtime = ~0; - mistat->length = ~0; - mistat->name = mistat->data; - mistat->uid = mistat->data; - mistat->gid = mistat->data; - mistat->muid = mistat->data; - if (v9ses->extended) { - mistat->n_uid = ~0; - mistat->n_gid = ~0; - mistat->n_muid = ~0; - mistat->extension = mistat->data; - } - *mistat->data = 0; -} - -/** - * v9fs_mistat2unix - convert mistat to unix stat - * @mistat: Plan 9 metadata (mistat) structure - * @buf: unix metadata (stat) structure to populate - * @sb: superblock - * - */ - -static void -v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf, - struct super_block *sb) -{ - struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL; - - buf->st_nlink = 1; - - buf->st_atime = mistat->atime; - buf->st_mtime = mistat->mtime; - buf->st_ctime = mistat->mtime; - - buf->st_uid = (unsigned short)-1; - buf->st_gid = (unsigned short)-1; - - if (v9ses && v9ses->extended) { - /* TODO: string to uid mapping via user-space daemon */ - if (mistat->n_uid != -1) - sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid); - - if (mistat->n_gid != -1) - sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid); - } - - if (buf->st_uid == (unsigned short)-1) - buf->st_uid = v9ses->uid; - if (buf->st_gid == (unsigned short)-1) - buf->st_gid = v9ses->gid; - - buf->st_mode = p9mode2unixmode(v9ses, mistat->mode); - if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) { - char type = 0; - int major = -1; - int minor = -1; - sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); - switch (type) { - case 'c': - buf->st_mode &= ~S_IFBLK; - buf->st_mode |= S_IFCHR; - break; - case 'b': - break; - default: - dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", - type, mistat->extension); - }; - buf->st_rdev = MKDEV(major, minor); - } else - buf->st_rdev = 0; - - buf->st_size = mistat->length; - - buf->st_blksize = sb->s_blocksize; - buf->st_blocks = - (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits; + wstat->type = ~0; + wstat->dev = ~0; + wstat->qid.type = ~0; + wstat->qid.version = ~0; + *((long long *)&wstat->qid.path) = ~0; + wstat->mode = ~0; + wstat->atime = ~0; + wstat->mtime = ~0; + wstat->length = ~0; + wstat->name = NULL; + wstat->uid = NULL; + wstat->gid = NULL; + wstat->muid = NULL; + wstat->n_uid = ~0; + wstat->n_gid = ~0; + wstat->n_muid = ~0; + wstat->extension = NULL; } /** @@ -312,12 +243,12 @@ v9fs_create(struct inode *dir, struct inode *file_inode = NULL; struct v9fs_fcall *fcall = NULL; struct v9fs_qid qid; - struct stat newstat; int dirfidnum = -1; long newfid = -1; int result = 0; unsigned int iounit = 0; int wfidno = -1; + int err; perm = unixmode2p9mode(v9ses, perm); @@ -349,57 +280,64 @@ v9fs_create(struct inode *dir, result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("clone error", fcall); v9fs_put_idpool(newfid, &v9ses->fidpool); newfid = -1; goto CleanUpFid; } kfree(fcall); + fcall = NULL; result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name, perm, open_mode, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "create fails: %s(%d)\n", - FCALL_ERROR(fcall), result); - + PRINT_FCALL_ERROR("create fails", fcall); goto CleanUpFid; } iounit = fcall->params.rcreate.iounit; qid = fcall->params.rcreate.qid; kfree(fcall); + fcall = NULL; - fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1); - dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate); - if (!fid) { - result = -ENOMEM; - goto CleanUpFid; - } + if (!(perm&V9FS_DMDIR)) { + fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1); + dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate); + if (!fid) { + result = -ENOMEM; + goto CleanUpFid; + } - fid->qid = qid; - fid->iounit = iounit; + fid->qid = qid; + fid->iounit = iounit; + } else { + err = v9fs_t_clunk(v9ses, newfid); + newfid = -1; + if (err < 0) + dprintk(DEBUG_ERROR, "clunk for mkdir failed: %d\n", err); + } /* walk to the newly created file and put the fid in the dentry */ wfidno = v9fs_get_idpool(&v9ses->fidpool); - if (newfid < 0) { + if (wfidno < 0) { eprintk(KERN_WARNING, "no free fids available\n"); return -ENOSPC; } result = v9fs_t_walk(v9ses, dirfidnum, wfidno, - (char *) file_dentry->d_name.name, NULL); + (char *) file_dentry->d_name.name, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("clone error", fcall); v9fs_put_idpool(wfidno, &v9ses->fidpool); wfidno = -1; goto CleanUpFid; } + kfree(fcall); + fcall = NULL; if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) { - if (!v9fs_t_clunk(v9ses, newfid, &fcall)) { - v9fs_put_idpool(wfidno, &v9ses->fidpool); - } + v9fs_put_idpool(wfidno, &v9ses->fidpool); goto CleanUpFid; } @@ -409,62 +347,43 @@ v9fs_create(struct inode *dir, (perm & V9FS_DMDEVICE)) return 0; - result = v9fs_t_stat(v9ses, newfid, &fcall); + result = v9fs_t_stat(v9ses, wfidno, &fcall); if (result < 0) { - dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall), - result); + PRINT_FCALL_ERROR("stat error", fcall); goto CleanUpFid; } - v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); - file_inode = v9fs_get_inode(sb, newstat.st_mode); + file_inode = v9fs_get_inode(sb, + p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode)); + if ((!file_inode) || IS_ERR(file_inode)) { dprintk(DEBUG_ERROR, "create inode failed\n"); result = -EBADF; goto CleanUpFid; } - v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb); + v9fs_stat2inode(&fcall->params.rstat.stat, file_inode, sb); kfree(fcall); fcall = NULL; file_dentry->d_op = &v9fs_dentry_operations; d_instantiate(file_dentry, file_inode); - if (perm & V9FS_DMDIR) { - if (!v9fs_t_clunk(v9ses, newfid, &fcall)) - v9fs_put_idpool(newfid, &v9ses->fidpool); - else - dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n", - FCALL_ERROR(fcall)); - kfree(fcall); - fid->fidopen = 0; - fid->fidcreate = 0; - d_drop(file_dentry); - } - return 0; CleanUpFid: kfree(fcall); + fcall = NULL; if (newfid >= 0) { - if (!v9fs_t_clunk(v9ses, newfid, &fcall)) - v9fs_put_idpool(newfid, &v9ses->fidpool); - else - dprintk(DEBUG_ERROR, "clunk failed: %s\n", - FCALL_ERROR(fcall)); - - kfree(fcall); + err = v9fs_t_clunk(v9ses, newfid); + if (err < 0) + dprintk(DEBUG_ERROR, "clunk failed: %d\n", err); } if (wfidno >= 0) { - if (!v9fs_t_clunk(v9ses, wfidno, &fcall)) - v9fs_put_idpool(wfidno, &v9ses->fidpool); - else - dprintk(DEBUG_ERROR, "clunk failed: %s\n", - FCALL_ERROR(fcall)); - - kfree(fcall); + err = v9fs_t_clunk(v9ses, wfidno); + if (err < 0) + dprintk(DEBUG_ERROR, "clunk failed: %d\n", err); } return result; } @@ -509,10 +428,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) } result = v9fs_t_remove(v9ses, fid, &fcall); - if (result < 0) - dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n", - FCALL_ERROR(fcall), result); - else { + if (result < 0) { + PRINT_FCALL_ERROR("remove fails", fcall); + } else { v9fs_put_idpool(fid, &v9ses->fidpool); v9fs_fid_destroy(v9fid); } @@ -567,7 +485,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, struct v9fs_fid *fid; struct inode *inode; struct v9fs_fcall *fcall = NULL; - struct stat newstat; int dirfidnum = -1; int newfid = -1; int result = 0; @@ -620,8 +537,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, goto FreeFcall; } - v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); - inode = v9fs_get_inode(sb, newstat.st_mode); + inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses, + fcall->params.rstat.stat.mode)); if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", @@ -631,7 +548,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, goto FreeFcall; } - inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); + inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid); fid = v9fs_fid_create(dentry, v9ses, newfid, 0); if (fid == NULL) { @@ -640,10 +557,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, goto FreeFcall; } - fid->qid = fcall->params.rstat.stat->qid; + fid->qid = fcall->params.rstat.stat.qid; dentry->d_op = &v9fs_dentry_operations; - v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb); + v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb); d_add(dentry, inode); kfree(fcall); @@ -699,7 +616,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, v9fs_fid_lookup(old_dentry->d_parent); struct v9fs_fid *newdirfid = v9fs_fid_lookup(new_dentry->d_parent); - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + struct v9fs_wstat wstat; struct v9fs_fcall *fcall = NULL; int fid = -1; int olddirfidnum = -1; @@ -708,9 +625,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, dprintk(DEBUG_VFS, "\n"); - if (!mistat) - return -ENOMEM; - if ((!oldfid) || (!olddirfid) || (!newdirfid)) { dprintk(DEBUG_ERROR, "problem with arguments\n"); return -EBADF; @@ -734,26 +648,15 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto FreeFcallnBail; } - v9fs_blank_mistat(v9ses, mistat); + v9fs_blank_wstat(&wstat); + wstat.muid = v9ses->name; + wstat.name = (char *) new_dentry->d_name.name; - strcpy(mistat->data + 1, v9ses->name); - mistat->name = mistat->data + 1 + strlen(v9ses->name); - - if (new_dentry->d_name.len > - (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) { - dprintk(DEBUG_ERROR, "new name too long\n"); - goto FreeFcallnBail; - } - - strcpy(mistat->name, new_dentry->d_name.name); - retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall); + retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall); FreeFcallnBail: - kfree(mistat); - if (retval < 0) - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("wstat error", fcall); kfree(fcall); return retval; @@ -788,7 +691,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, if (err < 0) dprintk(DEBUG_ERROR, "stat error\n"); else { - v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode, + v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode, dentry->d_inode->i_sb); generic_fillattr(dentry->d_inode, stat); } @@ -809,57 +712,44 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); struct v9fs_fid *fid = v9fs_fid_lookup(dentry); struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + struct v9fs_wstat wstat; int res = -EPERM; dprintk(DEBUG_VFS, "\n"); - if (!mistat) - return -ENOMEM; - if (!fid) { dprintk(DEBUG_ERROR, "Couldn't find fid associated with dentry\n"); return -EBADF; } - v9fs_blank_mistat(v9ses, mistat); + v9fs_blank_wstat(&wstat); if (iattr->ia_valid & ATTR_MODE) - mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode); + wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode); if (iattr->ia_valid & ATTR_MTIME) - mistat->mtime = iattr->ia_mtime.tv_sec; + wstat.mtime = iattr->ia_mtime.tv_sec; if (iattr->ia_valid & ATTR_ATIME) - mistat->atime = iattr->ia_atime.tv_sec; + wstat.atime = iattr->ia_atime.tv_sec; if (iattr->ia_valid & ATTR_SIZE) - mistat->length = iattr->ia_size; + wstat.length = iattr->ia_size; if (v9ses->extended) { - char *ptr = mistat->data+1; - - if (iattr->ia_valid & ATTR_UID) { - mistat->uid = ptr; - ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid); - mistat->n_uid = iattr->ia_uid; - } + if (iattr->ia_valid & ATTR_UID) + wstat.n_uid = iattr->ia_uid; - if (iattr->ia_valid & ATTR_GID) { - mistat->gid = ptr; - ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid); - mistat->n_gid = iattr->ia_gid; - } + if (iattr->ia_valid & ATTR_GID) + wstat.n_gid = iattr->ia_gid; } - res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall); + res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); if (res < 0) - dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall)); + PRINT_FCALL_ERROR("wstat error", fcall); - kfree(mistat); kfree(fcall); - if (res >= 0) res = inode_setattr(dentry->d_inode, iattr); @@ -867,51 +757,47 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) } /** - * v9fs_mistat2inode - populate an inode structure with mistat info - * @mistat: Plan 9 metadata (mistat) structure + * v9fs_stat2inode - populate an inode structure with mistat info + * @stat: Plan 9 metadata (mistat) structure * @inode: inode to populate * @sb: superblock of filesystem * */ void -v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, - struct super_block *sb) +v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode, + struct super_block *sb) { + int n; + char ext[32]; struct v9fs_session_info *v9ses = sb->s_fs_info; inode->i_nlink = 1; - inode->i_atime.tv_sec = mistat->atime; - inode->i_mtime.tv_sec = mistat->mtime; - inode->i_ctime.tv_sec = mistat->mtime; + inode->i_atime.tv_sec = stat->atime; + inode->i_mtime.tv_sec = stat->mtime; + inode->i_ctime.tv_sec = stat->mtime; - inode->i_uid = -1; - inode->i_gid = -1; + inode->i_uid = v9ses->uid; + inode->i_gid = v9ses->gid; if (v9ses->extended) { - /* TODO: string to uid mapping via user-space daemon */ - inode->i_uid = mistat->n_uid; - inode->i_gid = mistat->n_gid; - - if (mistat->n_uid == -1) - sscanf(mistat->uid, "%x", &inode->i_uid); - - if (mistat->n_gid == -1) - sscanf(mistat->gid, "%x", &inode->i_gid); + inode->i_uid = stat->n_uid; + inode->i_gid = stat->n_gid; } - if (inode->i_uid == -1) - inode->i_uid = v9ses->uid; - if (inode->i_gid == -1) - inode->i_gid = v9ses->gid; - - inode->i_mode = p9mode2unixmode(v9ses, mistat->mode); + inode->i_mode = p9mode2unixmode(v9ses, stat->mode); if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { char type = 0; int major = -1; int minor = -1; - sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); + + n = stat->extension.len; + if (n > sizeof(ext)-1) + n = sizeof(ext)-1; + memmove(ext, stat->extension.str, n); + ext[n] = 0; + sscanf(ext, "%c %u %u", &type, &major, &minor); switch (type) { case 'c': inode->i_mode &= ~S_IFBLK; @@ -920,14 +806,14 @@ v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, case 'b': break; default: - dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", - type, mistat->extension); + dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n", + type, stat->extension.len, stat->extension.str); }; inode->i_rdev = MKDEV(major, minor); } else inode->i_rdev = 0; - inode->i_size = mistat->length; + inode->i_size = stat->length; inode->i_blksize = sb->s_blocksize; inode->i_blocks = @@ -955,71 +841,6 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid) } /** - * v9fs_vfs_symlink - helper function to create symlinks - * @dir: directory inode containing symlink - * @dentry: dentry for symlink - * @symname: symlink data - * - * See 9P2000.u RFC for more information - * - */ - -static int -v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) -{ - int retval = -EPERM; - struct v9fs_fid *newfid; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); - - dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, - symname); - - if (!mistat) - return -ENOMEM; - - if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); - goto FreeFcall; - } - - /* issue a create */ - retval = v9fs_create(dir, dentry, S_IFLNK, 0); - if (retval != 0) - goto FreeFcall; - - newfid = v9fs_fid_lookup(dentry); - - /* issue a twstat */ - v9fs_blank_mistat(v9ses, mistat); - strcpy(mistat->data + 1, symname); - mistat->extension = mistat->data + 1; - retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); - goto FreeFcall; - } - - kfree(fcall); - - if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { - dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", - FCALL_ERROR(fcall)); - goto FreeFcall; - } - - d_drop(dentry); /* FID - will this also clunk? */ - - FreeFcall: - kfree(mistat); - kfree(fcall); - - return retval; -} - -/** * v9fs_readlink - read a symlink's location (internal version) * @dentry: dentry for symlink * @buffer: buffer to load symlink location into @@ -1058,16 +879,17 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) if (!fcall) return -EIO; - if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) { + if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) { retval = -EINVAL; goto FreeFcall; } /* copy extension buffer into buffer */ - if (strlen(fcall->params.rstat.stat->extension) < buflen) - buflen = strlen(fcall->params.rstat.stat->extension); + if (fcall->params.rstat.stat.extension.len < buflen) + buflen = fcall->params.rstat.stat.extension.len; - memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1); + memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1); + buffer[buflen-1] = 0; retval = buflen; @@ -1157,6 +979,77 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void __putname(s); } +static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, + int mode, const char *extension) +{ + int err, retval; + struct v9fs_session_info *v9ses; + struct v9fs_fcall *fcall; + struct v9fs_fid *fid; + struct v9fs_wstat wstat; + + v9ses = v9fs_inode2v9ses(dir); + retval = -EPERM; + fcall = NULL; + + if (!v9ses->extended) { + dprintk(DEBUG_ERROR, "not extended\n"); + goto free_mem; + } + + /* issue a create */ + retval = v9fs_create(dir, dentry, mode, 0); + if (retval != 0) + goto free_mem; + + fid = v9fs_fid_get_created(dentry); + if (!fid) { + dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n"); + goto free_mem; + } + + /* issue a Twstat */ + v9fs_blank_wstat(&wstat); + wstat.muid = v9ses->name; + wstat.extension = (char *) extension; + retval = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall); + if (retval < 0) { + PRINT_FCALL_ERROR("wstat error", fcall); + goto free_mem; + } + + err = v9fs_t_clunk(v9ses, fid->fid); + if (err < 0) { + dprintk(DEBUG_ERROR, "clunk failed: %d\n", err); + goto free_mem; + } + + d_drop(dentry); /* FID - will this also clunk? */ + +free_mem: + kfree(fcall); + return retval; +} + +/** + * v9fs_vfs_symlink - helper function to create symlinks + * @dir: directory inode containing symlink + * @dentry: dentry for symlink + * @symname: symlink data + * + * See 9P2000.u RFC for more information + * + */ + +static int +v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, + symname); + + return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); +} + /** * v9fs_vfs_link - create a hardlink * @old_dentry: dentry for file to link to @@ -1173,64 +1066,24 @@ static int v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - int retval = -EPERM; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); - struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry); - struct v9fs_fid *newfid = NULL; - char *symname = __getname(); + int retval; + struct v9fs_fid *oldfid; + char *name; dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); - if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); - goto FreeMem; - } - - /* get fid of old_dentry */ - sprintf(symname, "hardlink(%d)\n", oldfid->fid); - - /* issue a create */ - retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0); - if (retval != 0) - goto FreeMem; - - newfid = v9fs_fid_lookup(dentry); - if (!newfid) { - dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n"); - goto FreeMem; - } - - /* issue a twstat */ - v9fs_blank_mistat(v9ses, mistat); - strcpy(mistat->data + 1, symname); - mistat->extension = mistat->data + 1; - retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; - } - - kfree(fcall); - - if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { - dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; + oldfid = v9fs_fid_lookup(old_dentry); + if (!oldfid) { + dprintk(DEBUG_ERROR, "can't find oldfid\n"); + return -EPERM; } - d_drop(dentry); /* FID - will this also clunk? */ - - kfree(fcall); - fcall = NULL; + name = __getname(); + sprintf(name, "hardlink(%d)\n", oldfid->fid); + retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name); + __putname(name); - FreeMem: - kfree(mistat); - kfree(fcall); - __putname(symname); return retval; } @@ -1246,82 +1099,30 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, static int v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { - int retval = -EPERM; - struct v9fs_fid *newfid; - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); - struct v9fs_fcall *fcall = NULL; - struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); - char *symname = __getname(); + int retval; + char *name; dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); - if (!mistat) - return -ENOMEM; - - if (!new_valid_dev(rdev)) { - retval = -EINVAL; - goto FreeMem; - } - - if (!v9ses->extended) { - dprintk(DEBUG_ERROR, "not extended\n"); - goto FreeMem; - } - - /* issue a create */ - retval = v9fs_create(dir, dentry, mode, 0); - - if (retval != 0) - goto FreeMem; - - newfid = v9fs_fid_lookup(dentry); - if (!newfid) { - dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n"); - retval = -EINVAL; - goto FreeMem; - } + if (!new_valid_dev(rdev)) + return -EINVAL; + name = __getname(); /* build extension */ if (S_ISBLK(mode)) - sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev)); + sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); else if (S_ISCHR(mode)) - sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev)); + sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); else if (S_ISFIFO(mode)) - ; /* DO NOTHING */ + *name = 0; else { - retval = -EINVAL; - goto FreeMem; - } - - if (!S_ISFIFO(mode)) { - /* issue a twstat */ - v9fs_blank_mistat(v9ses, mistat); - strcpy(mistat->data + 1, symname); - mistat->extension = mistat->data + 1; - retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); - if (retval < 0) { - dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; - } + __putname(name); + return -EINVAL; } - /* need to update dcache so we show up */ - kfree(fcall); - - if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { - dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", - FCALL_ERROR(fcall)); - goto FreeMem; - } - - d_drop(dentry); /* FID - will this also clunk? */ - - FreeMem: - kfree(mistat); - kfree(fcall); - __putname(symname); + retval = v9fs_vfs_mkspecial(dir, dentry, mode, name); + __putname(name); return retval; } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 82c5b0084079..ae0f06b3c11a 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -44,7 +44,6 @@ #include "v9fs.h" #include "9p.h" #include "v9fs_vfs.h" -#include "conv.h" #include "fid.h" static void v9fs_clear_inode(struct inode *); @@ -123,12 +122,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type dprintk(DEBUG_VFS, " \n"); - v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL); + v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) return ERR_PTR(-ENOMEM); if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); + kfree(v9ses); return ERR_PTR(newfid); } @@ -157,7 +157,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type stat_result = v9fs_t_stat(v9ses, newfid, &fcall); if (stat_result < 0) { dprintk(DEBUG_ERROR, "stat error\n"); - v9fs_t_clunk(v9ses, newfid, NULL); + v9fs_t_clunk(v9ses, newfid); v9fs_put_idpool(newfid, &v9ses->fidpool); } else { /* Setup the Root Inode */ @@ -167,10 +167,10 @@ static struct super_block *v9fs_get_sb(struct file_system_type goto put_back_sb; } - root_fid->qid = fcall->params.rstat.stat->qid; + root_fid->qid = fcall->params.rstat.stat.qid; root->d_inode->i_ino = - v9fs_qid2ino(&fcall->params.rstat.stat->qid); - v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb); + v9fs_qid2ino(&fcall->params.rstat.stat.qid); + v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb); } kfree(fcall); diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 175b2e8177c1..f3d3d81eb7e9 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -1,6 +1,6 @@ config BINFMT_ELF bool "Kernel support for ELF binaries" - depends on MMU + depends on MMU && (BROKEN || !FRV) default y ---help--- ELF (Executable and Linkable Format) is a format for libraries and diff --git a/fs/Makefile b/fs/Makefile index 73676111ebbe..35e9aec608e4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ - ioprio.o pnode.o + ioprio.o pnode.o drop_caches.o obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_EPOLL) += eventpoll.o diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 6682d6d7f294..5c61c24dab2a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -137,7 +137,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page) #endif /* determine how many magic numbers there should be in this page */ - latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT); + latter = dir->i_size - page_offset(page); if (latter >= PAGE_SIZE) qty = PAGE_SIZE; else diff --git a/fs/afs/volume.h b/fs/afs/volume.h index 1e691889c4c9..bfdcf19ba3f3 100644 --- a/fs/afs/volume.h +++ b/fs/afs/volume.h @@ -18,8 +18,6 @@ #include "kafsasyncd.h" #include "cache.h" -#define __packed __attribute__((packed)) - typedef enum { AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */ AFS_VLUPD_PENDING, /* on pending queue */ @@ -115,7 +113,7 @@ struct afs_volume struct cachefs_cookie *cache; /* caching cookie */ #endif afs_volid_t vid; /* volume ID */ - afs_voltype_t __packed type; /* type of volume */ + afs_voltype_t type; /* type of volume */ char type_force; /* force volume type (suppress R/O -> R/W) */ unsigned short nservers; /* number of server slots filled */ unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ @@ -29,7 +29,6 @@ #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/security.h> -#include <linux/rcuref.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff --git a/fs/attr.c b/fs/attr.c index 67bcd9b14ea5..b34732506f1d 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok); int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; - int error = 0; - - if (ia_valid & ATTR_SIZE) { - if (attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error || (ia_valid == ATTR_SIZE)) - goto out; - } else { - /* - * We skipped the truncate but must still update - * timestamps - */ - ia_valid |= ATTR_MTIME|ATTR_CTIME; - } + + if (ia_valid & ATTR_SIZE && + attr->ia_size != i_size_read(inode)) { + int error = vmtruncate(inode, attr->ia_size); + if (error) + return error; } if (ia_valid & ATTR_UID) @@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr) inode->i_mode = mode; } mark_inode_dirty(inode); -out: - return error; + + return 0; } EXPORT_SYMBOL(inode_setattr); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index fca83e28edcf..385bed09b0d8 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry) struct dentry *child; int ret = 0; - list_for_each_entry(child, &dentry->d_subdirs, d_child) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) if (simple_positive(child)) goto out; ret = 1; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index feb6ac427d05..dc39589df165 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -105,7 +105,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if (!simple_positive(dentry)) { @@ -138,7 +138,7 @@ resume: } if (this_parent != top) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -163,7 +163,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if (!simple_positive(dentry)) { @@ -199,7 +199,7 @@ cont: } if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb, /* On exit from the loop expire is set to a dgot dentry * to expire or it's NULL */ while ( next != &root->d_subdirs ) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if ( !simple_positive(dentry) ) { @@ -302,7 +302,7 @@ next: expired, (int)expired->d_name.len, expired->d_name.name); spin_lock(&dcache_lock); list_del(&expired->d_parent->d_subdirs); - list_add(&expired->d_parent->d_subdirs, &expired->d_child); + list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&dcache_lock); return expired; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 818b37be5153..2d3082854a29 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -91,7 +91,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - don`t care */ if (!simple_positive(dentry)) { @@ -117,7 +117,7 @@ resume: if (this_parent != sbi->root) { struct dentry *dentry = this_parent; - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; spin_unlock(&dcache_lock); DPRINTK("parent dentry %p %.*s", diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2a771ec66956..2241405ffc41 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -143,7 +143,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f } while(1) { - struct dentry *de = list_entry(list, struct dentry, d_child); + struct dentry *de = list_entry(list, + struct dentry, d_u.d_child); if (!d_unhashed(de) && de->d_inode) { spin_unlock(&dcache_lock); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f36f2210204f..80ca932ba0bd 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -58,7 +58,7 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); * If we don't support core dumping, then supply a NULL so we * don't even try. */ -#ifdef USE_ELF_CORE_DUMP +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file); #else #define elf_core_dump NULL @@ -288,11 +288,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) { unsigned long map_addr; + unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); down_write(¤t->mm->mmap_sem); - map_addr = do_mmap(filep, ELF_PAGESTART(addr), - eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type, - eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); + /* mmap() will return -EINVAL if given a zero size, but a + * segment with zero filesize is perfectly valid */ + if (eppnt->p_filesz + pageoffset) + map_addr = do_mmap(filep, ELF_PAGESTART(addr), + eppnt->p_filesz + pageoffset, prot, type, + eppnt->p_offset - pageoffset); + else + map_addr = ELF_PAGESTART(addr); up_write(¤t->mm->mmap_sem); return(map_addr); } @@ -1107,7 +1113,7 @@ out: * Note that some platforms still use traditional core dumps and not * the ELF core dump. Each platform can select it as appropriate. */ -#ifdef USE_ELF_CORE_DUMP +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) /* * ELF core dumper diff --git a/fs/buffer.c b/fs/buffer.c index 5287be18633b..55f0975a9b15 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -153,14 +153,8 @@ int sync_blockdev(struct block_device *bdev) { int ret = 0; - if (bdev) { - int err; - - ret = filemap_fdatawrite(bdev->bd_inode->i_mapping); - err = filemap_fdatawait(bdev->bd_inode->i_mapping); - if (!ret) - ret = err; - } + if (bdev) + ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); return ret; } EXPORT_SYMBOL(sync_blockdev); @@ -1768,7 +1762,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, * handle that here by just cleaning them. */ - block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); head = page_buffers(page); bh = head; @@ -2160,11 +2154,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block) * truncates. Uses prepare/commit_write to allow the filesystem to * deal with the hole. */ -int generic_cont_expand(struct inode *inode, loff_t size) +static int __generic_cont_expand(struct inode *inode, loff_t size, + pgoff_t index, unsigned int offset) { struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long index, offset, limit; + unsigned long limit; int err; err = -EFBIG; @@ -2176,24 +2171,24 @@ int generic_cont_expand(struct inode *inode, loff_t size) if (size > inode->i_sb->s_maxbytes) goto out; - offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ - - /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - offset++; - } - index = size >> PAGE_CACHE_SHIFT; err = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) goto out; err = mapping->a_ops->prepare_write(NULL, page, offset, offset); - if (!err) { - err = mapping->a_ops->commit_write(NULL, page, offset, offset); + if (err) { + /* + * ->prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + unlock_page(page); + page_cache_release(page); + vmtruncate(inode, inode->i_size); + goto out; } + + err = mapping->a_ops->commit_write(NULL, page, offset, offset); + unlock_page(page); page_cache_release(page); if (err > 0) @@ -2202,6 +2197,36 @@ out: return err; } +int generic_cont_expand(struct inode *inode, loff_t size) +{ + pgoff_t index; + unsigned int offset; + + offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ + + /* ugh. in prepare/commit_write, if from==to==start of block, we + ** skip the prepare. make sure we never send an offset for the start + ** of a block + */ + if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { + /* caller must handle this extra byte. */ + offset++; + } + index = size >> PAGE_CACHE_SHIFT; + + return __generic_cont_expand(inode, size, index, offset); +} + +int generic_cont_expand_simple(struct inode *inode, loff_t size) +{ + loff_t pos = size - 1; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; + + /* prepare/commit_write can handle even if from==to==start of block. */ + return __generic_cont_expand(inode, size, index, offset); +} + /* * For moronic filesystems that do not allow holes in file. * We may have to extend the file. @@ -2610,7 +2635,7 @@ int block_truncate_page(struct address_space *mapping, pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize; - pgoff_t iblock; + sector_t iblock; unsigned length, pos; struct inode *inode = mapping->host; struct page *page; @@ -2626,7 +2651,7 @@ int block_truncate_page(struct address_space *mapping, return 0; length = blocksize - length; - iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits); page = grab_cache_page(mapping, index); err = -ENOMEM; @@ -3145,6 +3170,7 @@ EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand); +EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); EXPORT_SYMBOL(ll_rw_block); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 14a1c72ced92..5ade53d7bca8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -127,8 +127,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, if (file->f_dentry->d_inode->i_mapping) { /* BB no need to lock inode until after invalidate since namei code should already have it locked? */ - filemap_fdatawrite(file->f_dentry->d_inode->i_mapping); - filemap_fdatawait(file->f_dentry->d_inode->i_mapping); + filemap_write_and_wait(file->f_dentry->d_inode->i_mapping); } cFYI(1, ("invalidating remote inode since open detected it " "changed")); @@ -419,8 +418,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file, pCifsInode = CIFS_I(inode); if (pCifsInode) { if (can_flush) { - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); /* temporarily disable caching while we go to server to get inode info */ pCifsInode->clientCanCacheAll = FALSE; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 411c1f7f84da..9558f51bca55 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1148,8 +1148,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* BB check if we need to refresh inode from server now ? BB */ /* need to flush data before changing file size on server */ - filemap_fdatawrite(direntry->d_inode->i_mapping); - filemap_fdatawait(direntry->d_inode->i_mapping); + filemap_write_and_wait(direntry->d_inode->i_mapping); if (attrs->ia_valid & ATTR_SIZE) { /* To avoid spurious oplock breaks from server, in the case of diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 80072fd9b7fa..c607d923350a 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag) spin_lock(&dcache_lock); list_for_each(child, &parent->d_subdirs) { - de = list_entry(child, struct dentry, d_child); + de = list_entry(child, struct dentry, d_u.d_child); /* don't know what to do with negative dentries */ if ( ! de->d_inode ) continue; diff --git a/fs/compat.c b/fs/compat.c index 55ac0324aaf1..271b75d1597f 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -494,9 +494,21 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, ret = sys_fcntl(fd, cmd, (unsigned long)&f); set_fs(old_fs); if (cmd == F_GETLK && ret == 0) { - if ((f.l_start >= COMPAT_OFF_T_MAX) || - ((f.l_start + f.l_len) > COMPAT_OFF_T_MAX)) + /* GETLK was successfule and we need to return the data... + * but it needs to fit in the compat structure. + * l_start shouldn't be too big, unless the original + * start + end is greater than COMPAT_OFF_T_MAX, in which + * case the app was asking for trouble, so we return + * -EOVERFLOW in that case. + * l_len could be too big, in which case we just truncate it, + * and only allow the app to see that part of the conflicting + * lock that might make sense to it anyway + */ + + if (f.l_start > COMPAT_OFF_T_MAX) ret = -EOVERFLOW; + if (f.l_len > COMPAT_OFF_T_MAX) + f.l_len = COMPAT_OFF_T_MAX; if (ret == 0) ret = put_compat_flock(&f, compat_ptr(arg)); } @@ -515,9 +527,11 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, (unsigned long)&f); set_fs(old_fs); if (cmd == F_GETLK64 && ret == 0) { - if ((f.l_start >= COMPAT_LOFF_T_MAX) || - ((f.l_start + f.l_len) > COMPAT_LOFF_T_MAX)) + /* need to return lock information - see above for commentary */ + if (f.l_start > COMPAT_LOFF_T_MAX) ret = -EOVERFLOW; + if (f.l_len > COMPAT_LOFF_T_MAX) + f.l_len = COMPAT_LOFF_T_MAX; if (ret == 0) ret = put_compat_flock64(&f, compat_ptr(arg)); } diff --git a/fs/dcache.c b/fs/dcache.c index 17e439138681..1536f15c4d4c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = { static void d_callback(struct rcu_head *head) { - struct dentry * dentry = container_of(head, struct dentry, d_rcu); + struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); if (dname_external(dentry)) kfree(dentry->d_name.name); @@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry) { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - call_rcu(&dentry->d_rcu, d_callback); + call_rcu(&dentry->d_u.d_rcu, d_callback); } /* @@ -193,7 +193,7 @@ kill_it: { list_del(&dentry->d_lru); dentry_stat.nr_unused--; } - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); @@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry) struct dentry * parent; __d_drop(dentry); - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ dentry_iput(dentry); parent = dentry->d_parent; @@ -518,7 +518,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; /* Have we found a mount point ? */ if (d_mountpoint(dentry)) @@ -532,7 +532,7 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -569,7 +569,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (!list_empty(&dentry->d_lru)) { @@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found); * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; #ifdef DCACHE_DEBUG printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n", @@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_parent = dget(parent); dentry->d_sb = parent->d_sb; } else { - INIT_LIST_HEAD(&dentry->d_child); + INIT_LIST_HEAD(&dentry->d_u.d_child); } spin_lock(&dcache_lock); if (parent) - list_add(&dentry->d_child, &parent->d_subdirs); + list_add(&dentry->d_u.d_child, &parent->d_subdirs); dentry_stat.nr_dentry++; spin_unlock(&dcache_lock); @@ -1310,8 +1310,8 @@ already_unhashed: /* Unhash the target: dput() will then get rid of it */ __d_drop(target); - list_del(&dentry->d_child); - list_del(&target->d_child); + list_del(&dentry->d_u.d_child); + list_del(&target->d_u.d_child); /* Switch the names.. */ switch_names(dentry, target); @@ -1322,15 +1322,15 @@ already_unhashed: if (IS_ROOT(dentry)) { dentry->d_parent = target->d_parent; target->d_parent = target; - INIT_LIST_HEAD(&target->d_child); + INIT_LIST_HEAD(&target->d_u.d_child); } else { do_switch(dentry->d_parent, target->d_parent); /* And add them back to the (new) parent lists */ - list_add(&target->d_child, &target->d_parent->d_subdirs); + list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); } - list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); spin_unlock(&target->d_lock); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); @@ -1568,7 +1568,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (d_unhashed(dentry)||!dentry->d_inode) continue; @@ -1579,7 +1579,7 @@ resume: atomic_dec(&dentry->d_count); } if (this_parent != root) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; atomic_dec(&this_parent->d_count); this_parent = this_parent->d_parent; goto resume; diff --git a/fs/drop_caches.c b/fs/drop_caches.c new file mode 100644 index 000000000000..4e4762389bdc --- /dev/null +++ b/fs/drop_caches.c @@ -0,0 +1,68 @@ +/* + * Implement the manual drop-all-pagecache function + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/writeback.h> +#include <linux/sysctl.h> +#include <linux/gfp.h> + +/* A global variable is a bit ugly, but it keeps the code simple */ +int sysctl_drop_caches; + +static void drop_pagecache_sb(struct super_block *sb) +{ + struct inode *inode; + + spin_lock(&inode_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (inode->i_state & (I_FREEING|I_WILL_FREE)) + continue; + invalidate_inode_pages(inode->i_mapping); + } + spin_unlock(&inode_lock); +} + +void drop_pagecache(void) +{ + struct super_block *sb; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) + drop_pagecache_sb(sb); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); +} + +void drop_slab(void) +{ + int nr_objects; + + do { + nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); + } while (nr_objects > 10); +} + +int drop_caches_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (write) { + if (sysctl_drop_caches & 1) + drop_pagecache(); + if (sysctl_drop_caches & 2) + drop_slab(); + } + return 0; +} diff --git a/fs/exec.c b/fs/exec.c index e75a9548da8e..fd02ea4a81e9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -760,7 +760,7 @@ no_thread_group: spin_lock(&oldsighand->siglock); spin_lock(&newsighand->siglock); - current->sighand = newsighand; + rcu_assign_pointer(current->sighand, newsighand); recalc_sigpending(); spin_unlock(&newsighand->siglock); @@ -768,7 +768,7 @@ no_thread_group: write_unlock_irq(&tasklist_lock); if (atomic_dec_and_test(&oldsighand->count)) - kmem_cache_free(sighand_cachep, oldsighand); + sighand_free(oldsighand); } BUG_ON(!thread_group_leader(current)); @@ -1462,6 +1462,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { current->signal->flags = SIGNAL_GROUP_EXIT; current->signal->group_exit_code = exit_code; + current->signal->group_stop_count = 0; retval = 0; } spin_unlock_irq(¤t->sighand->siglock); @@ -1477,7 +1478,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * Clear any false indication of pending signals that might * be seen by the filesystem code called to write the core file. */ - current->signal->group_stop_count = 0; clear_thread_flag(TIF_SIGPENDING); if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) @@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) goto close_fail; if (!file->f_op->write) goto close_fail; - if (do_truncate(file->f_dentry, 0, file) != 0) + if (do_truncate(file->f_dentry, 0, 0, file) != 0) goto close_fail; retval = binfmt->core_dump(signr, regs, file); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 9e4a24376210..69078079b19c 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -651,7 +651,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext3_warning(sb, __FUNCTION__, - "bad orphan ino %lu! e2fsck was run?\n", ino); + "bad orphan ino %lu! e2fsck was run?", ino); goto out; } @@ -660,7 +660,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) bitmap_bh = read_inode_bitmap(sb, block_group); if (!bitmap_bh) { ext3_warning(sb, __FUNCTION__, - "inode bitmap error for orphan %lu\n", ino); + "inode bitmap error for orphan %lu", ino); goto out; } @@ -672,7 +672,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) !(inode = iget(sb, ino)) || is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) { ext3_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?\n", ino); + "bad orphan inode %lu! e2fsck was run?", ino); printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", bit, (unsigned long long)bitmap_bh->b_blocknr, ext3_test_bit(bit, bitmap_bh->b_data)); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index b3c690a3b54a..af193a304ee5 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1476,7 +1476,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels && (dx_get_count(frames->entries) == dx_get_limit(frames->entries))) { ext3_warning(sb, __FUNCTION__, - "Directory index full!\n"); + "Directory index full!"); err = -ENOSPC; goto cleanup; } diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 6104ad310507..1041dab6de2f 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -31,7 +31,7 @@ static int verify_group_input(struct super_block *sb, unsigned start = le32_to_cpu(es->s_blocks_count); unsigned end = start + input->blocks_count; unsigned group = input->group; - unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group; + unsigned itend = input->inode_table + sbi->s_itb_per_group; unsigned overhead = ext3_bg_has_super(sb, group) ? (1 + ext3_bg_num_gdb(sb, group) + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; @@ -340,7 +340,7 @@ static int verify_reserved_gdb(struct super_block *sb, while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ ext3_warning(sb, __FUNCTION__, - "reserved GDT %ld missing grp %d (%ld)\n", + "reserved GDT %ld missing grp %d (%ld)", blk, grp, grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); return -EINVAL; @@ -393,7 +393,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (EXT3_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { ext3_warning(sb, __FUNCTION__, - "won't resize using backup superblock at %llu\n", + "won't resize using backup superblock at %llu", (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); return -EPERM; } @@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__u32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { ext3_warning(sb, __FUNCTION__, - "new group %u GDT block %lu not reserved\n", + "new group %u GDT block %lu not reserved", input->group, gdblock); err = -EINVAL; goto exit_dind; @@ -540,7 +540,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { ext3_warning(sb, __FUNCTION__, - "reserved block %lu not at offset %ld\n", + "reserved block %lu not at offset %ld", blk, (long)(data - (__u32 *)dind->b_data)); err = -EINVAL; goto exit_bh; @@ -683,7 +683,7 @@ exit_err: if (err) { ext3_warning(sb, __FUNCTION__, "can't update backup for group %d (err %d), " - "forcing fsck on next reboot\n", group, err); + "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT3_VALID_FS; sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); mark_buffer_dirty(sbi->s_sbh); @@ -722,7 +722,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { ext3_warning(sb, __FUNCTION__, - "Can't resize non-sparse filesystem further\n"); + "Can't resize non-sparse filesystem further"); return -EPERM; } @@ -730,13 +730,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_RESIZE_INODE)){ ext3_warning(sb, __FUNCTION__, - "No reserved GDT blocks, can't resize\n"); + "No reserved GDT blocks, can't resize"); return -EPERM; } inode = iget(sb, EXT3_RESIZE_INO); if (!inode || is_bad_inode(inode)) { ext3_warning(sb, __FUNCTION__, - "Error opening resize inode\n"); + "Error opening resize inode"); iput(inode); return -ENOENT; } @@ -764,9 +764,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) } lock_super(sb); - if (input->group != EXT3_SB(sb)->s_groups_count) { + if (input->group != sbi->s_groups_count) { ext3_warning(sb, __FUNCTION__, - "multiple resizers run on filesystem!\n"); + "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_journal; } @@ -799,7 +799,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. * - * The key field here is EXT3_SB(sb)->s_groups_count: as long as + * The key field here is sbi->s_groups_count: as long as * that retains its old value, nobody is going to access the new * group. * @@ -859,7 +859,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) smp_wmb(); /* Update the global fs size fields */ - EXT3_SB(sb)->s_groups_count++; + sbi->s_groups_count++; ext3_journal_dirty_metadata(handle, primary); @@ -874,7 +874,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) percpu_counter_mod(&sbi->s_freeinodes_counter, EXT3_INODES_PER_GROUP(sb)); - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + ext3_journal_dirty_metadata(handle, sbi->s_sbh); sb->s_dirt = 1; exit_journal: @@ -937,7 +937,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, if (last == 0) { ext3_warning(sb, __FUNCTION__, - "need to use ext2online to resize further\n"); + "need to use ext2online to resize further"); return -EPERM; } @@ -973,7 +973,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, lock_super(sb); if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { ext3_warning(sb, __FUNCTION__, - "multiple resizers run on filesystem!\n"); + "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_put; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 4e6730622d90..7c45acf94589 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -43,7 +43,8 @@ #include "acl.h" #include "namei.h" -static int ext3_load_journal(struct super_block *, struct ext3_super_block *); +static int ext3_load_journal(struct super_block *, struct ext3_super_block *, + unsigned long journal_devnum); static int ext3_create_journal(struct super_block *, struct ext3_super_block *, int); static void ext3_commit_super (struct super_block * sb, @@ -628,7 +629,7 @@ enum { Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, - Opt_commit, Opt_journal_update, Opt_journal_inum, + Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, @@ -666,6 +667,7 @@ static match_table_t tokens = { {Opt_commit, "commit=%u"}, {Opt_journal_update, "journal=update"}, {Opt_journal_inum, "journal=%u"}, + {Opt_journal_dev, "journal_dev=%u"}, {Opt_abort, "abort"}, {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, @@ -705,8 +707,9 @@ static unsigned long get_sb_block(void **data) return sb_block; } -static int parse_options (char * options, struct super_block *sb, - unsigned long * inum, unsigned long *n_blocks_count, int is_remount) +static int parse_options (char *options, struct super_block *sb, + unsigned long *inum, unsigned long *journal_devnum, + unsigned long *n_blocks_count, int is_remount) { struct ext3_sb_info *sbi = EXT3_SB(sb); char * p; @@ -839,6 +842,16 @@ static int parse_options (char * options, struct super_block *sb, return 0; *inum = option; break; + case Opt_journal_dev: + if (is_remount) { + printk(KERN_ERR "EXT3-fs: cannot specify " + "journal on remount\n"); + return 0; + } + if (match_int(&args[0], &option)) + return 0; + *journal_devnum = option; + break; case Opt_noload: set_opt (sbi->s_mount_opt, NOLOAD); break; @@ -1331,6 +1344,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) unsigned long logic_sb_block; unsigned long offset = 0; unsigned long journal_inum = 0; + unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; int blocksize; @@ -1411,7 +1425,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, RESERVATION); - if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) + if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, + NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -1622,7 +1637,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) */ if (!test_opt(sb, NOLOAD) && EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { - if (ext3_load_journal(sb, es)) + if (ext3_load_journal(sb, es, journal_devnum)) goto failed_mount2; } else if (journal_inum) { if (ext3_create_journal(sb, es, journal_inum)) @@ -1902,15 +1917,24 @@ out_bdev: return NULL; } -static int ext3_load_journal(struct super_block * sb, - struct ext3_super_block * es) +static int ext3_load_journal(struct super_block *sb, + struct ext3_super_block *es, + unsigned long journal_devnum) { journal_t *journal; int journal_inum = le32_to_cpu(es->s_journal_inum); - dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); + dev_t journal_dev; int err = 0; int really_read_only; + if (journal_devnum && + journal_devnum != le32_to_cpu(es->s_journal_dev)) { + printk(KERN_INFO "EXT3-fs: external journal device major/minor " + "numbers have changed\n"); + journal_dev = new_decode_dev(journal_devnum); + } else + journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); + really_read_only = bdev_read_only(sb->s_bdev); /* @@ -1969,6 +1993,16 @@ static int ext3_load_journal(struct super_block * sb, EXT3_SB(sb)->s_journal = journal; ext3_clear_journal_err(sb, es); + + if (journal_devnum && + journal_devnum != le32_to_cpu(es->s_journal_dev)) { + es->s_journal_dev = cpu_to_le32(journal_devnum); + sb->s_dirt = 1; + + /* Make sure we flush the recovery flag to disk. */ + ext3_commit_super(sb, es, 1); + } + return 0; } @@ -2197,7 +2231,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { + if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { err = -EINVAL; goto restore_opts; } diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 77c24fcf712a..1acc941245fb 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) return dclus; } -int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) +int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) int cluster, offset; *phys = 0; + *mapped_blocks = 0; if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) { - if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) + if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) { *phys = sector + sbi->dir_start; + *mapped_blocks = 1; + } return 0; } last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) @@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) cluster = fat_bmap_cluster(inode, cluster); if (cluster < 0) return cluster; - else if (cluster) + else if (cluster) { *phys = fat_clus_to_blknr(sbi, cluster) + offset; + *mapped_blocks = sbi->sec_per_clus - offset; + if (*mapped_blocks > last_block - sector) + *mapped_blocks = last_block - sector; + } return 0; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index ba824964b9bb..eef1b81aa294 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -45,8 +45,8 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock, if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) return; - bh = sb_getblk(sb, phys); - if (bh && !buffer_uptodate(bh)) { + bh = sb_find_get_block(sb, phys); + if (bh == NULL || !buffer_uptodate(bh)) { for (sec = 0; sec < sbi->sec_per_clus; sec++) sb_breadahead(sb, phys + sec); } @@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos, { struct super_block *sb = dir->i_sb; sector_t phys, iblock; - int offset; - int err; + unsigned long mapped_blocks; + int err, offset; next: if (*bh) @@ -77,7 +77,7 @@ next: *bh = NULL; iblock = *pos >> sb->s_blocksize_bits; - err = fat_bmap(dir, iblock, &phys); + err = fat_bmap(dir, iblock, &phys, &mapped_blocks); if (err || !phys) return -1; /* beyond EOF or error */ @@ -418,7 +418,7 @@ EODir: return err; } -EXPORT_SYMBOL(fat_search_long); +EXPORT_SYMBOL_GPL(fat_search_long); struct fat_ioctl_filldir_callback { struct dirent __user *dirent; @@ -780,7 +780,7 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, return -ENOENT; } -EXPORT_SYMBOL(fat_get_dotdot_entry); +EXPORT_SYMBOL_GPL(fat_get_dotdot_entry); /* See if directory is empty */ int fat_dir_empty(struct inode *dir) @@ -803,7 +803,7 @@ int fat_dir_empty(struct inode *dir) return result; } -EXPORT_SYMBOL(fat_dir_empty); +EXPORT_SYMBOL_GPL(fat_dir_empty); /* * fat_subdirs counts the number of sub-directories of dir. It can be run @@ -849,7 +849,7 @@ int fat_scan(struct inode *dir, const unsigned char *name, return -ENOENT; } -EXPORT_SYMBOL(fat_scan); +EXPORT_SYMBOL_GPL(fat_scan); static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) { @@ -936,7 +936,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) return 0; } -EXPORT_SYMBOL(fat_remove_entries); +EXPORT_SYMBOL_GPL(fat_remove_entries); static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, struct buffer_head **bhs, int nr_bhs) @@ -1048,7 +1048,7 @@ error: return err; } -EXPORT_SYMBOL(fat_alloc_new_dir); +EXPORT_SYMBOL_GPL(fat_alloc_new_dir); static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, int *nr_cluster, struct msdos_dir_entry **de, @@ -1264,4 +1264,4 @@ error_remove: return err; } -EXPORT_SYMBOL(fat_add_entries); +EXPORT_SYMBOL_GPL(fat_add_entries); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 4164cd54c4d1..a1a9e0451217 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -476,6 +476,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster) sbi->prev_free = entry; if (sbi->free_clusters != -1) sbi->free_clusters--; + sb->s_dirt = 1; cluster[idx_clus] = entry; idx_clus++; @@ -496,6 +497,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster) /* Couldn't allocate the free entries */ sbi->free_clusters = 0; + sb->s_dirt = 1; err = -ENOSPC; out: @@ -509,7 +511,6 @@ out: } for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); - fat_clusters_flush(sb); if (err && idx_clus) fat_free_clusters(inode, cluster[0]); @@ -542,8 +543,10 @@ int fat_free_clusters(struct inode *inode, int cluster) } ops->ent_put(&fatent, FAT_ENT_FREE); - if (sbi->free_clusters != -1) + if (sbi->free_clusters != -1) { sbi->free_clusters++; + sb->s_dirt = 1; + } if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { if (sb->s_flags & MS_SYNCHRONOUS) { @@ -578,7 +581,7 @@ error: return err; } -EXPORT_SYMBOL(fat_free_clusters); +EXPORT_SYMBOL_GPL(fat_free_clusters); int fat_count_free_clusters(struct super_block *sb) { @@ -605,6 +608,7 @@ int fat_count_free_clusters(struct super_block *sb) } while (fat_ent_next(sbi, &fatent)); } sbi->free_clusters = free; + sb->s_dirt = 1; fatent_brelse(&fatent); out: unlock_fat(sbi); diff --git a/fs/fat/file.c b/fs/fat/file.c index 7134403d5be2..9b07c328a6fc 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -11,6 +11,7 @@ #include <linux/msdos_fs.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/writeback.h> int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -124,6 +125,24 @@ struct file_operations fat_file_operations = { .sendfile = generic_file_sendfile, }; +static int fat_cont_expand(struct inode *inode, loff_t size) +{ + struct address_space *mapping = inode->i_mapping; + loff_t start = inode->i_size, count = size - inode->i_size; + int err; + + err = generic_cont_expand_simple(inode, size); + if (err) + goto out; + + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + if (IS_SYNC(inode)) + err = sync_page_range_nolock(inode, mapping, start, count); +out: + return err; +} + int fat_notify_change(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -132,11 +151,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr) lock_kernel(); - /* FAT cannot truncate to a longer file */ + /* + * Expand the file. Since inode_setattr() updates ->i_size + * before calling the ->truncate(), but FAT needs to fill the + * hole before it. + */ if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_size > inode->i_size) { - error = -EPERM; - goto out; + error = fat_cont_expand(inode, attr->ia_size); + if (error || attr->ia_valid == ATTR_SIZE) + goto out; + attr->ia_valid &= ~ATTR_SIZE; } } @@ -173,7 +198,7 @@ out: return error; } -EXPORT_SYMBOL(fat_notify_change); +EXPORT_SYMBOL_GPL(fat_notify_change); /* Free all clusters after the skip'th cluster. */ static int fat_free(struct inode *inode, int skip) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a0f9b9fe1307..e7f4aa7fc686 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -18,10 +18,12 @@ #include <linux/seq_file.h> #include <linux/msdos_fs.h> #include <linux/pagemap.h> +#include <linux/mpage.h> #include <linux/buffer_head.h> #include <linux/mount.h> #include <linux/vfs.h> #include <linux/parser.h> +#include <linux/uio.h> #include <asm/unaligned.h> #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -48,51 +50,97 @@ static int fat_add_cluster(struct inode *inode) return err; } -static int fat_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +static int __fat_get_blocks(struct inode *inode, sector_t iblock, + unsigned long *max_blocks, + struct buffer_head *bh_result, int create) { struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); sector_t phys; - int err; + unsigned long mapped_blocks; + int err, offset; - err = fat_bmap(inode, iblock, &phys); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks); if (err) return err; if (phys) { map_bh(bh_result, sb, phys); + *max_blocks = min(mapped_blocks, *max_blocks); return 0; } if (!create) return 0; + if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); return -EIO; } - if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) { + + offset = (unsigned long)iblock & (sbi->sec_per_clus - 1); + if (!offset) { + /* TODO: multiple cluster allocation would be desirable. */ err = fat_add_cluster(inode); if (err) return err; } - MSDOS_I(inode)->mmu_private += sb->s_blocksize; - err = fat_bmap(inode, iblock, &phys); + /* available blocks on this cluster */ + mapped_blocks = sbi->sec_per_clus - offset; + + *max_blocks = min(mapped_blocks, *max_blocks); + MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; + + err = fat_bmap(inode, iblock, &phys, &mapped_blocks); if (err) return err; - if (!phys) - BUG(); + BUG_ON(!phys); + BUG_ON(*max_blocks != mapped_blocks); set_buffer_new(bh_result); map_bh(bh_result, sb, phys); return 0; } +static int fat_get_blocks(struct inode *inode, sector_t iblock, + unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + struct super_block *sb = inode->i_sb; + int err; + + err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create); + if (err) + return err; + bh_result->b_size = max_blocks << sb->s_blocksize_bits; + return 0; +} + +static int fat_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + unsigned long max_blocks = 1; + return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create); +} + static int fat_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, fat_get_block, wbc); } +static int fat_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, fat_get_block); +} + static int fat_readpage(struct file *file, struct page *page) { - return block_read_full_page(page, fat_get_block); + return mpage_readpage(page, fat_get_block); +} + +static int fat_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, fat_get_block); } static int fat_prepare_write(struct file *file, struct page *page, @@ -115,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page, return err; } +static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) { + /* + * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * so we need to update the ->mmu_private to block boundary. + * + * But we must fill the remaining area or hole by nul for + * updating ->mmu_private. + */ + loff_t size = offset + iov_length(iov, nr_segs); + if (MSDOS_I(inode)->mmu_private < size) + return -EINVAL; + } + + /* + * FAT need to use the DIO_LOCKING for avoiding the race + * condition of fat_get_block() and ->truncate(). + */ + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, fat_get_blocks, NULL); +} + static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, fat_get_block); @@ -122,10 +198,13 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block) static struct address_space_operations fat_aops = { .readpage = fat_readpage, + .readpages = fat_readpages, .writepage = fat_writepage, + .writepages = fat_writepages, .sync_page = block_sync_page, .prepare_write = fat_prepare_write, .commit_write = fat_commit_write, + .direct_IO = fat_direct_IO, .bmap = _fat_bmap }; @@ -182,7 +261,7 @@ void fat_attach(struct inode *inode, loff_t i_pos) spin_unlock(&sbi->inode_hash_lock); } -EXPORT_SYMBOL(fat_attach); +EXPORT_SYMBOL_GPL(fat_attach); void fat_detach(struct inode *inode) { @@ -193,7 +272,7 @@ void fat_detach(struct inode *inode) spin_unlock(&sbi->inode_hash_lock); } -EXPORT_SYMBOL(fat_detach); +EXPORT_SYMBOL_GPL(fat_detach); struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { @@ -347,7 +426,7 @@ out: return inode; } -EXPORT_SYMBOL(fat_build_inode); +EXPORT_SYMBOL_GPL(fat_build_inode); static void fat_delete_inode(struct inode *inode) { @@ -374,12 +453,17 @@ static void fat_clear_inode(struct inode *inode) unlock_kernel(); } -static void fat_put_super(struct super_block *sb) +static void fat_write_super(struct super_block *sb) { - struct msdos_sb_info *sbi = MSDOS_SB(sb); + sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) fat_clusters_flush(sb); +} + +static void fat_put_super(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); if (sbi->nls_disk) { unload_nls(sbi->nls_disk); @@ -537,7 +621,7 @@ int fat_sync_inode(struct inode *inode) return fat_write_inode(inode, 1); } -EXPORT_SYMBOL(fat_sync_inode); +EXPORT_SYMBOL_GPL(fat_sync_inode); static int fat_show_options(struct seq_file *m, struct vfsmount *mnt); static struct super_operations fat_sops = { @@ -546,6 +630,7 @@ static struct super_operations fat_sops = { .write_inode = fat_write_inode, .delete_inode = fat_delete_inode, .put_super = fat_put_super, + .write_super = fat_write_super, .statfs = fat_statfs, .clear_inode = fat_clear_inode, .remount_fs = fat_remount, @@ -1347,7 +1432,7 @@ out_fail: return error; } -EXPORT_SYMBOL(fat_fill_super); +EXPORT_SYMBOL_GPL(fat_fill_super); int __init fat_cache_init(void); void fat_cache_destroy(void); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 2a0df2122f5d..32fb0a3f1da4 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -33,7 +33,7 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...) } } -EXPORT_SYMBOL(fat_fs_panic); +EXPORT_SYMBOL_GPL(fat_fs_panic); /* Flushes the number of free clusters on FAT32 */ /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ @@ -67,8 +67,6 @@ void fat_clusters_flush(struct super_block *sb) if (sbi->prev_free != -1) fsinfo->next_cluster = cpu_to_le32(sbi->prev_free); mark_buffer_dirty(bh); - if (sb->s_flags & MS_SYNCHRONOUS) - sync_dirty_buffer(bh); } brelse(bh); } @@ -194,7 +192,7 @@ void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); } -EXPORT_SYMBOL(fat_date_unix2dos); +EXPORT_SYMBOL_GPL(fat_date_unix2dos); int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { @@ -222,4 +220,4 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) return err; } -EXPORT_SYMBOL(fat_sync_bhs); +EXPORT_SYMBOL_GPL(fat_sync_bhs); diff --git a/fs/fcntl.c b/fs/fcntl.c index 863b46e0d78a..9903bde475f2 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -457,11 +457,11 @@ static void send_sigio_to_task(struct task_struct *p, else si.si_band = band_table[reason - POLL_IN]; si.si_fd = fd; - if (!send_group_sig_info(fown->signum, &si, p)) + if (!group_send_sig_info(fown->signum, &si, p)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: - send_group_sig_info(SIGIO, SEND_SIG_PRIV, p); + group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); } } @@ -495,7 +495,7 @@ static void send_sigurg_to_task(struct task_struct *p, struct fown_struct *fown) { if (sigio_perm(p, fown, SIGURG)) - send_group_sig_info(SIGURG, SEND_SIG_PRIV, p); + group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); } int send_sigurg(struct fown_struct *fown) diff --git a/fs/file_table.c b/fs/file_table.c index c3a5e2fd663b..6142250104a6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) + if (atomic_dec_and_test(&file->f_count)) __fput(file); } @@ -166,7 +166,7 @@ struct file fastcall *fget(unsigned int fd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (!rcuref_inc_lf(&file->f_count)) { + if (!atomic_inc_not_zero(&file->f_count)) { /* File object ref couldn't be taken */ rcu_read_unlock(); return NULL; @@ -198,7 +198,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (rcuref_inc_lf(&file->f_count)) + if (atomic_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -213,7 +213,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) void put_filp(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) { + if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index d0401dc68d41..6f5df1700e95 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -99,8 +99,8 @@ static int vxfs_immed_readpage(struct file *fp, struct page *pp) { struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host); - u_int64_t offset = pp->index << PAGE_CACHE_SHIFT; - caddr_t kaddr; + u_int64_t offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT; + caddr_t kaddr; kaddr = kmap(pp); memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE); diff --git a/fs/inode.c b/fs/inode.c index d8d04bd72b59..fd568caf7f74 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -770,7 +770,7 @@ EXPORT_SYMBOL(igrab); * * Note, @test is called with the inode_lock held, so can't sleep. */ -static inline struct inode *ifind(struct super_block *sb, +static struct inode *ifind(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data, const int wait) { @@ -804,7 +804,7 @@ static inline struct inode *ifind(struct super_block *sb, * * Otherwise NULL is returned. */ -static inline struct inode *ifind_fast(struct super_block *sb, +static struct inode *ifind_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) { struct inode *inode; diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 3dcc6d2162cb..2559ee10beda 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -757,7 +757,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page) read_len = 0; result = 0; - offset = page->index << PAGE_CACHE_SHIFT; + offset = page_offset(page); kmap(page); buf = page_address(page); @@ -1545,7 +1545,7 @@ jffs_commit_write(struct file *filp, struct page *page, { void *addr = page_address(page) + from; /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */ - loff_t pos = (page->index<<PAGE_CACHE_SHIFT) + from; + loff_t pos = page_offset(page) + from; return jffs_file_write(filp, addr, to-from, &pos); } /* jffs_commit_write() */ diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 68000a50ceb6..2967b7393415 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -302,8 +302,7 @@ int dbSync(struct inode *ipbmap) /* * write out dirty pages of bmap */ - filemap_fdatawrite(ipbmap->i_mapping); - filemap_fdatawait(ipbmap->i_mapping); + filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 28201b194f53..31b4aa13dd4b 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -265,8 +265,7 @@ int diSync(struct inode *ipimap) /* * write out dirty pages of imap */ - filemap_fdatawrite(ipimap->i_mapping); - filemap_fdatawait(ipimap->i_mapping); + filemap_write_and_wait(ipimap->i_mapping); diWriteSpecial(ipimap, 0); @@ -565,8 +564,7 @@ void diFreeSpecial(struct inode *ip) jfs_err("diFreeSpecial called with NULL ip!"); return; } - filemap_fdatawrite(ip->i_mapping); - filemap_fdatawait(ip->i_mapping); + filemap_write_and_wait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, 0); iput(ip); } diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index b660c93c92de..2ddb6b892bcf 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1231,10 +1231,8 @@ int txCommit(tid_t tid, /* transaction identifier */ * when we don't need to worry about it at all. * * if ((!S_ISDIR(ip->i_mode)) - * && (tblk->flag & COMMIT_DELETE) == 0) { - * filemap_fdatawrite(ip->i_mapping); - * filemap_fdatawait(ip->i_mapping); - * } + * && (tblk->flag & COMMIT_DELETE) == 0) + * filemap_write_and_wait(ip->i_mapping); */ /* diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index 5cf91785b541..21eaf7ac0fcb 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -108,8 +108,7 @@ int jfs_umount(struct super_block *sb) * Make sure all metadata makes it to disk before we mark * the superblock as clean */ - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); /* * ensure all file system file pages are propagated to their @@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb) * mark the superblock clean before everything is flushed to * disk. */ - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); updateSuper(sb, FM_CLEAN); diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index c6dc254d3253..45180361871c 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -376,8 +376,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * by txCommit(); */ filemap_fdatawait(ipbmap->i_mapping); - filemap_fdatawrite(ipbmap->i_mapping); - filemap_fdatawait(ipbmap->i_mapping); + filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); newPage = nPages; /* first new page number */ diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4226af3ea91b..8d31f1336431 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -502,8 +502,7 @@ out_no_rw: jfs_err("jfs_umount failed with return code %d", rc); } out_mount_failed: - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); make_bad_inode(sbi->direct_inode); iput(sbi->direct_inode); diff --git a/fs/libfs.c b/fs/libfs.c index 58101dff2c66..9c50523382e7 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -93,16 +93,16 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) loff_t n = file->f_pos - 2; spin_lock(&dcache_lock); - list_del(&cursor->d_child); + list_del(&cursor->d_u.d_child); p = file->f_dentry->d_subdirs.next; while (n && p != &file->f_dentry->d_subdirs) { struct dentry *next; - next = list_entry(p, struct dentry, d_child); + next = list_entry(p, struct dentry, d_u.d_child); if (!d_unhashed(next) && next->d_inode) n--; p = p->next; } - list_add_tail(&cursor->d_child, p); + list_add_tail(&cursor->d_u.d_child, p); spin_unlock(&dcache_lock); } } @@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct dentry *dentry = filp->f_dentry; struct dentry *cursor = filp->private_data; - struct list_head *p, *q = &cursor->d_child; + struct list_head *p, *q = &cursor->d_u.d_child; ino_t ino; int i = filp->f_pos; @@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) } for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; - next = list_entry(p, struct dentry, d_child); + next = list_entry(p, struct dentry, d_u.d_child); if (d_unhashed(next) || !next->d_inode) continue; @@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dcache_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_child) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) if (simple_positive(child)) goto out; ret = 1; diff --git a/fs/locks.c b/fs/locks.c index fb32d6218e21..909eab8fb1d0 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -154,7 +154,7 @@ static struct file_lock *locks_alloc_lock(void) } /* Free a lock which is not in use. */ -static inline void locks_free_lock(struct file_lock *fl) +static void locks_free_lock(struct file_lock *fl) { if (fl == NULL) { BUG(); @@ -475,8 +475,7 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) /* * Check whether two locks have the same owner. */ -static inline int -posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) +static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) { if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) return fl2->fl_lmops == fl1->fl_lmops && @@ -487,7 +486,7 @@ posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) /* Remove waiter from blocker's block list. * When blocker ends up pointing to itself then the list is empty. */ -static inline void __locks_delete_block(struct file_lock *waiter) +static void __locks_delete_block(struct file_lock *waiter) { list_del_init(&waiter->fl_block); list_del_init(&waiter->fl_link); diff --git a/fs/mpage.c b/fs/mpage.c index f1d2d02bd4c8..e431cb3878d6 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -184,7 +184,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, if (page_has_buffers(page)) goto confused; - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; bh.b_page = page; @@ -466,7 +466,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, * The page has no buffers: map it to disk */ BUG_ON(!PageUptodate(page)); - block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size - 1) >> blkbits; map_bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; ) { diff --git a/fs/namei.c b/fs/namei.c index 6dbbd42d8b95..300eae088d5f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1491,7 +1491,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (!error) { DQUOT_INIT(inode); - error = do_truncate(dentry, 0, NULL); + error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); } put_write_access(inode); if (error) diff --git a/fs/namespace.c b/fs/namespace.c index 2019899f2ab8..3e8fb61ad597 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -451,7 +451,7 @@ EXPORT_SYMBOL(may_umount); void release_mounts(struct list_head *head) { struct vfsmount *mnt; - while(!list_empty(head)) { + while (!list_empty(head)) { mnt = list_entry(head->next, struct vfsmount, mnt_hash); list_del_init(&mnt->mnt_hash); if (mnt->mnt_parent != mnt) { @@ -1526,6 +1526,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) * pointed to by put_old must yield the same directory as new_root. No other * file system may be mounted on put_old. After all, new_root is a mountpoint. * + * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem. + * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives + * in this situation. + * * Notes: * - we don't move root/cwd if they are not at the root (reason: if something * cared enough to change them, it's probably wrong to force them elsewhere) diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index a9f7a8ab1d59..cfd76f431dc0 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_child); + dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget_locked(dent); diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 9e4dc30c2435..799e5c2bec55 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); @@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); next = next->next; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e7bd0d92600f..3e4ba9cb7f80 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -644,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping) if (mapping->nrpages == 0) return 0; unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_fdatawrite(mapping); - if (ret != 0) - goto out; - ret = filemap_fdatawait(mapping); + ret = filemap_write_and_wait(mapping); if (ret != 0) goto out; ret = nfs_wb_all(mapping->host); @@ -864,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) nfs_begin_data_update(inode); /* Write all dirty data if we're changing file permissions or size */ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); nfs_wb_all(inode); } /* diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 985cc53b8dd5..e897e00c2c9d 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -275,7 +275,9 @@ static int __init root_nfs_parse(char *name, char *buf) case Opt_noacl: nfs_data.flags |= NFS_MOUNT_NOACL; break; - default : + default: + printk(KERN_WARNING "Root-NFS: unknown " + "option: %s\n", p); return 0; } } diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index f5ef5ea61a05..e8c56a3d9c64 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; mlog(ML_ENTRY, "ENTRY:\n"); \ } while (0) -/* We disable this for old compilers since they don't have support for - * __builtin_types_compatible_p. +/* + * We disable this for sparse. */ -#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \ - !defined(__CHECKER__) +#if !defined(__CHECKER__) #define mlog_exit(st) do { \ if (__builtin_types_compatible_p(typeof(st), unsigned long)) \ mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \ diff --git a/fs/open.c b/fs/open.c index f53a5b9ffb7d..75f3329e8a67 100644 --- a/fs/open.c +++ b/fs/open.c @@ -194,7 +194,8 @@ out: return error; } -int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) +int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + struct file *filp) { int err; struct iattr newattrs; @@ -204,7 +205,7 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) return -EINVAL; newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + newattrs.ia_valid = ATTR_SIZE | time_attrs; if (filp) { newattrs.ia_file = filp; newattrs.ia_valid |= ATTR_FILE; @@ -216,7 +217,7 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) return err; } -static inline long do_sys_truncate(const char __user * path, loff_t length) +static long do_sys_truncate(const char __user * path, loff_t length) { struct nameidata nd; struct inode * inode; @@ -266,7 +267,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); - error = do_truncate(nd.dentry, length, NULL); + error = do_truncate(nd.dentry, length, 0, NULL); } put_write_access(inode); @@ -282,7 +283,7 @@ asmlinkage long sys_truncate(const char __user * path, unsigned long length) return do_sys_truncate(path, (long)length); } -static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small) +static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode * inode; struct dentry *dentry; @@ -318,7 +319,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) - error = do_truncate(dentry, length, file); + error = do_truncate(dentry, length, 0, file); out_putf: fput(file); out: @@ -970,7 +971,7 @@ out: EXPORT_SYMBOL(get_unused_fd); -static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) +static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); __FD_CLR(fd, fdt->open_fds); diff --git a/fs/pnode.c b/fs/pnode.c index aeeec8ba8dd2..f1871f773f64 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m, struct vfsmount *next; struct vfsmount *master = m->mnt_master; - if ( master == origin->mnt_master ) { + if (master == origin->mnt_master) { next = next_peer(m); return ((next == origin) ? NULL : next); } else if (m->mnt_slave.next != &master->mnt_slave_list) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 72b431d0a0a4..20e5c4509a43 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -21,6 +21,8 @@ #include <linux/bitops.h> #include <asm/uaccess.h> +#include "internal.h" + static ssize_t proc_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos); static ssize_t proc_file_write(struct file *file, const char __user *buffer, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index e6a818a93f3d..6573f31f1fd9 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -19,7 +19,7 @@ #include <asm/system.h> #include <asm/uaccess.h> -extern void free_proc_entry(struct proc_dir_entry *); +#include "internal.h" static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3e55198f9806..95a1cf32b838 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -37,6 +37,10 @@ extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +void free_proc_entry(struct proc_dir_entry *de); + +int proc_init_inodecache(void); + static inline struct task_struct *proc_task(struct inode *inode) { return PROC_I(inode)->task; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5b6b0b6038a7..63bf6c00fa0c 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -323,6 +323,7 @@ static struct file_operations proc_modules_operations = { }; #endif +#ifdef CONFIG_SLAB extern struct seq_operations slabinfo_op; extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); static int slabinfo_open(struct inode *inode, struct file *file) @@ -336,6 +337,7 @@ static struct file_operations proc_slabinfo_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif static int show_stat(struct seq_file *p, void *v) { @@ -600,7 +602,9 @@ void __init proc_misc_init(void) create_seq_entry("partitions", 0, &proc_partitions_operations); create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); +#ifdef CONFIG_SLAB create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); +#endif create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); diff --git a/fs/proc/root.c b/fs/proc/root.c index aef148f099a2..68896283c8ae 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,6 +18,8 @@ #include <linux/bitops.h> #include <linux/smp_lock.h> +#include "internal.h" + struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; #ifdef CONFIG_SYSCTL @@ -36,7 +38,6 @@ static struct file_system_type proc_fs_type = { .kill_sb = kill_anon_super, }; -extern int __init proc_init_inodecache(void); void __init proc_root_init(void) { int err = proc_init_inodecache(); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 50bd5a8f0446..0eaad41f4658 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -390,129 +390,12 @@ struct seq_operations proc_pid_smaps_op = { }; #ifdef CONFIG_NUMA - -struct numa_maps { - unsigned long pages; - unsigned long anon; - unsigned long mapped; - unsigned long mapcount_max; - unsigned long node[MAX_NUMNODES]; -}; - -/* - * Calculate numa node maps for a vma - */ -static struct numa_maps *get_numa_maps(struct vm_area_struct *vma) -{ - int i; - struct page *page; - unsigned long vaddr; - struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); - - if (!md) - return NULL; - md->pages = 0; - md->anon = 0; - md->mapped = 0; - md->mapcount_max = 0; - for_each_node(i) - md->node[i] =0; - - for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { - page = follow_page(vma, vaddr, 0); - if (page) { - int count = page_mapcount(page); - - if (count) - md->mapped++; - if (count > md->mapcount_max) - md->mapcount_max = count; - md->pages++; - if (PageAnon(page)) - md->anon++; - md->node[page_to_nid(page)]++; - } - cond_resched(); - } - return md; -} - -static int show_numa_map(struct seq_file *m, void *v) -{ - struct task_struct *task = m->private; - struct vm_area_struct *vma = v; - struct mempolicy *pol; - struct numa_maps *md; - struct zone **z; - int n; - int first; - - if (!vma->vm_mm) - return 0; - - md = get_numa_maps(vma); - if (!md) - return 0; - - seq_printf(m, "%08lx", vma->vm_start); - pol = get_vma_policy(task, vma, vma->vm_start); - /* Print policy */ - switch (pol->policy) { - case MPOL_PREFERRED: - seq_printf(m, " prefer=%d", pol->v.preferred_node); - break; - case MPOL_BIND: - seq_printf(m, " bind={"); - first = 1; - for (z = pol->v.zonelist->zones; *z; z++) { - - if (!first) - seq_putc(m, ','); - else - first = 0; - seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id, - (*z)->name); - } - seq_putc(m, '}'); - break; - case MPOL_INTERLEAVE: - seq_printf(m, " interleave={"); - first = 1; - for_each_node(n) { - if (node_isset(n, pol->v.nodes)) { - if (!first) - seq_putc(m,','); - else - first = 0; - seq_printf(m, "%d",n); - } - } - seq_putc(m, '}'); - break; - default: - seq_printf(m," default"); - break; - } - seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu", - md->mapcount_max, md->pages, md->mapped); - if (md->anon) - seq_printf(m," Anon=%lu",md->anon); - - for_each_online_node(n) { - if (md->node[n]) - seq_printf(m, " N%d=%lu", n, md->node[n]); - } - seq_putc(m, '\n'); - kfree(md); - if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; - return 0; -} +extern int show_numa_map(struct seq_file *m, void *v); struct seq_operations proc_pid_numa_maps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_numa_map + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_numa_map }; #endif diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c index 84e21ffa5ca8..10187812771e 100644 --- a/fs/relayfs/buffers.c +++ b/fs/relayfs/buffers.c @@ -185,5 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf) void relay_remove_buf(struct kref *kref) { struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); - relayfs_remove(buf->dentry); + buf->chan->cb->remove_buf_file(buf->dentry); + relay_destroy_buf(buf); } diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c index 0f7f88d067ad..7b7f2cb5f0e1 100644 --- a/fs/relayfs/inode.c +++ b/fs/relayfs/inode.c @@ -26,31 +26,22 @@ static struct vfsmount * relayfs_mount; static int relayfs_mount_count; -static kmem_cache_t * relayfs_inode_cachep; static struct backing_dev_info relayfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, }; -static struct inode *relayfs_get_inode(struct super_block *sb, int mode, - struct rchan *chan) +static struct inode *relayfs_get_inode(struct super_block *sb, + int mode, + struct file_operations *fops, + void *data) { - struct rchan_buf *buf = NULL; struct inode *inode; - if (S_ISREG(mode)) { - BUG_ON(!chan); - buf = relay_create_buf(chan); - if (!buf) - return NULL; - } - inode = new_inode(sb); - if (!inode) { - relay_destroy_buf(buf); + if (!inode) return NULL; - } inode->i_mode = mode; inode->i_uid = 0; @@ -61,8 +52,9 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { case S_IFREG: - inode->i_fop = &relayfs_file_operations; - RELAYFS_I(inode)->buf = buf; + inode->i_fop = fops; + if (data) + inode->u.generic_ip = data; break; case S_IFDIR: inode->i_op = &simple_dir_inode_operations; @@ -83,7 +75,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, * @name: the name of the file to create * @parent: parent directory * @mode: mode - * @chan: relay channel associated with the file + * @fops: file operations to use for the file + * @data: user-associated data for this file * * Returns the new dentry, NULL on failure * @@ -92,7 +85,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, static struct dentry *relayfs_create_entry(const char *name, struct dentry *parent, int mode, - struct rchan *chan) + struct file_operations *fops, + void *data) { struct dentry *d; struct inode *inode; @@ -127,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name, goto release_mount; } - inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan); + inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data); if (!inode) { d = NULL; goto release_mount; @@ -155,20 +149,26 @@ exit: * @name: the name of the file to create * @parent: parent directory * @mode: mode, if not specied the default perms are used - * @chan: channel associated with the file + * @fops: file operations to use for the file + * @data: user-associated data for this file * * Returns file dentry if successful, NULL otherwise. * * The file will be created user r on behalf of current user. */ -struct dentry *relayfs_create_file(const char *name, struct dentry *parent, - int mode, struct rchan *chan) +struct dentry *relayfs_create_file(const char *name, + struct dentry *parent, + int mode, + struct file_operations *fops, + void *data) { + BUG_ON(!fops); + if (!mode) mode = S_IRUSR; mode = (mode & S_IALLUGO) | S_IFREG; - return relayfs_create_entry(name, parent, mode, chan); + return relayfs_create_entry(name, parent, mode, fops, data); } /** @@ -183,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent, struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) { int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; - return relayfs_create_entry(name, parent, mode, NULL); + return relayfs_create_entry(name, parent, mode, NULL, NULL); } /** @@ -225,6 +225,17 @@ int relayfs_remove(struct dentry *dentry) } /** + * relayfs_remove_file - remove a file from relay filesystem + * @dentry: directory dentry + * + * Returns 0 if successful, negative otherwise. + */ +int relayfs_remove_file(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + +/** * relayfs_remove_dir - remove a directory in the relay filesystem * @dentry: directory dentry * @@ -236,45 +247,45 @@ int relayfs_remove_dir(struct dentry *dentry) } /** - * relayfs_open - open file op for relayfs files + * relay_file_open - open file op for relay files * @inode: the inode * @filp: the file * * Increments the channel buffer refcount. */ -static int relayfs_open(struct inode *inode, struct file *filp) +static int relay_file_open(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = inode->u.generic_ip; kref_get(&buf->kref); + filp->private_data = buf; return 0; } /** - * relayfs_mmap - mmap file op for relayfs files + * relay_file_mmap - mmap file op for relay files * @filp: the file * @vma: the vma describing what to map * * Calls upon relay_mmap_buf to map the file into user space. */ -static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) +static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma) { - struct inode *inode = filp->f_dentry->d_inode; - return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); + struct rchan_buf *buf = filp->private_data; + return relay_mmap_buf(buf, vma); } /** - * relayfs_poll - poll file op for relayfs files + * relay_file_poll - poll file op for relay files * @filp: the file * @wait: poll table * * Poll implemention. */ -static unsigned int relayfs_poll(struct file *filp, poll_table *wait) +static unsigned int relay_file_poll(struct file *filp, poll_table *wait) { unsigned int mask = 0; - struct inode *inode = filp->f_dentry->d_inode; - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = filp->private_data; if (buf->finalized) return POLLERR; @@ -289,27 +300,27 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait) } /** - * relayfs_release - release file op for relayfs files + * relay_file_release - release file op for relay files * @inode: the inode * @filp: the file * * Decrements the channel refcount, as the filesystem is * no longer using it. */ -static int relayfs_release(struct inode *inode, struct file *filp) +static int relay_file_release(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = filp->private_data; kref_put(&buf->kref, relay_remove_buf); return 0; } /** - * relayfs_read_consume - update the consumed count for the buffer + * relay_file_read_consume - update the consumed count for the buffer */ -static void relayfs_read_consume(struct rchan_buf *buf, - size_t read_pos, - size_t bytes_consumed) +static void relay_file_read_consume(struct rchan_buf *buf, + size_t read_pos, + size_t bytes_consumed) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; @@ -332,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf, } /** - * relayfs_read_avail - boolean, are there unconsumed bytes available? + * relay_file_read_avail - boolean, are there unconsumed bytes available? */ -static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) +static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) { size_t bytes_produced, bytes_consumed, write_offset; size_t subbuf_size = buf->chan->subbuf_size; @@ -365,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) if (bytes_produced == bytes_consumed) return 0; - relayfs_read_consume(buf, read_pos, 0); + relay_file_read_consume(buf, read_pos, 0); return 1; } /** - * relayfs_read_subbuf_avail - return bytes available in sub-buffer + * relay_file_read_subbuf_avail - return bytes available in sub-buffer */ -static size_t relayfs_read_subbuf_avail(size_t read_pos, - struct rchan_buf *buf) +static size_t relay_file_read_subbuf_avail(size_t read_pos, + struct rchan_buf *buf) { size_t padding, avail = 0; size_t read_subbuf, read_offset, write_subbuf, write_offset; @@ -396,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos, } /** - * relayfs_read_start_pos - find the first available byte to read + * relay_file_read_start_pos - find the first available byte to read * * If the read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise * return the original value. */ -static size_t relayfs_read_start_pos(size_t read_pos, - struct rchan_buf *buf) +static size_t relay_file_read_start_pos(size_t read_pos, + struct rchan_buf *buf) { size_t read_subbuf, padding, padding_start, padding_end; size_t subbuf_size = buf->chan->subbuf_size; @@ -422,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos, } /** - * relayfs_read_end_pos - return the new read position + * relay_file_read_end_pos - return the new read position */ -static size_t relayfs_read_end_pos(struct rchan_buf *buf, - size_t read_pos, - size_t count) +static size_t relay_file_read_end_pos(struct rchan_buf *buf, + size_t read_pos, + size_t count) { size_t read_subbuf, padding, end_pos; size_t subbuf_size = buf->chan->subbuf_size; @@ -445,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf, } /** - * relayfs_read - read file op for relayfs files + * relay_file_read - read file op for relay files * @filp: the file * @buffer: the userspace buffer * @count: number of bytes to read @@ -454,23 +465,23 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf, * Reads count bytes or the number of bytes available in the * current sub-buffer being read, whichever is smaller. */ -static ssize_t relayfs_read(struct file *filp, - char __user *buffer, - size_t count, - loff_t *ppos) +static ssize_t relay_file_read(struct file *filp, + char __user *buffer, + size_t count, + loff_t *ppos) { + struct rchan_buf *buf = filp->private_data; struct inode *inode = filp->f_dentry->d_inode; - struct rchan_buf *buf = RELAYFS_I(inode)->buf; size_t read_start, avail; ssize_t ret = 0; void *from; down(&inode->i_sem); - if(!relayfs_read_avail(buf, *ppos)) + if(!relay_file_read_avail(buf, *ppos)) goto out; - read_start = relayfs_read_start_pos(*ppos, buf); - avail = relayfs_read_subbuf_avail(read_start, buf); + read_start = relay_file_read_start_pos(*ppos, buf); + avail = relay_file_read_subbuf_avail(read_start, buf); if (!avail) goto out; @@ -480,58 +491,25 @@ static ssize_t relayfs_read(struct file *filp, ret = -EFAULT; goto out; } - relayfs_read_consume(buf, read_start, count); - *ppos = relayfs_read_end_pos(buf, read_start, count); + relay_file_read_consume(buf, read_start, count); + *ppos = relay_file_read_end_pos(buf, read_start, count); out: up(&inode->i_sem); return ret; } -/** - * relayfs alloc_inode() implementation - */ -static struct inode *relayfs_alloc_inode(struct super_block *sb) -{ - struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL); - if (!p) - return NULL; - p->buf = NULL; - - return &p->vfs_inode; -} - -/** - * relayfs destroy_inode() implementation - */ -static void relayfs_destroy_inode(struct inode *inode) -{ - if (RELAYFS_I(inode)->buf) - relay_destroy_buf(RELAYFS_I(inode)->buf); - - kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode)); -} - -static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) -{ - struct relayfs_inode_info *i = p; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&i->vfs_inode); -} - -struct file_operations relayfs_file_operations = { - .open = relayfs_open, - .poll = relayfs_poll, - .mmap = relayfs_mmap, - .read = relayfs_read, +struct file_operations relay_file_operations = { + .open = relay_file_open, + .poll = relay_file_poll, + .mmap = relay_file_mmap, + .read = relay_file_read, .llseek = no_llseek, - .release = relayfs_release, + .release = relay_file_release, }; static struct super_operations relayfs_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, - .alloc_inode = relayfs_alloc_inode, - .destroy_inode = relayfs_destroy_inode, }; static int relayfs_fill_super(struct super_block * sb, void * data, int silent) @@ -544,7 +522,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RELAYFS_MAGIC; sb->s_op = &relayfs_ops; - inode = relayfs_get_inode(sb, mode, NULL); + inode = relayfs_get_inode(sb, mode, NULL, NULL); if (!inode) return -ENOMEM; @@ -575,33 +553,27 @@ static struct file_system_type relayfs_fs_type = { static int __init init_relayfs_fs(void) { - int err; - - relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache", - sizeof(struct relayfs_inode_info), 0, - 0, init_once, NULL); - if (!relayfs_inode_cachep) - return -ENOMEM; - - err = register_filesystem(&relayfs_fs_type); - if (err) - kmem_cache_destroy(relayfs_inode_cachep); - - return err; + return register_filesystem(&relayfs_fs_type); } static void __exit exit_relayfs_fs(void) { + + + + + unregister_filesystem(&relayfs_fs_type); - kmem_cache_destroy(relayfs_inode_cachep); } module_init(init_relayfs_fs) module_exit(exit_relayfs_fs) -EXPORT_SYMBOL_GPL(relayfs_file_operations); +EXPORT_SYMBOL_GPL(relay_file_operations); EXPORT_SYMBOL_GPL(relayfs_create_dir); EXPORT_SYMBOL_GPL(relayfs_remove_dir); +EXPORT_SYMBOL_GPL(relayfs_create_file); +EXPORT_SYMBOL_GPL(relayfs_remove_file); MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>"); MODULE_DESCRIPTION("Relay Filesystem"); diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c index 2a6f7f12b7f9..abf3ceaace49 100644 --- a/fs/relayfs/relay.c +++ b/fs/relayfs/relay.c @@ -80,11 +80,34 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf, { } +/* + * create_buf_file_create() default callback. Creates file to represent buf. + */ +static struct dentry *create_buf_file_default_callback(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + return relayfs_create_file(filename, parent, mode, + &relay_file_operations, buf); +} + +/* + * remove_buf_file() default callback. Removes file representing relay buffer. + */ +static int remove_buf_file_default_callback(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + /* relay channel default callbacks */ static struct rchan_callbacks default_channel_callbacks = { .subbuf_start = subbuf_start_default_callback, .buf_mapped = buf_mapped_default_callback, .buf_unmapped = buf_unmapped_default_callback, + .create_buf_file = create_buf_file_default_callback, + .remove_buf_file = remove_buf_file_default_callback, }; /** @@ -148,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init) void relay_reset(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; __relay_reset(chan->buf[i], 0); + prev = chan->buf[i]; } } @@ -166,17 +191,27 @@ void relay_reset(struct rchan *chan) */ static struct rchan_buf *relay_open_buf(struct rchan *chan, const char *filename, - struct dentry *parent) + struct dentry *parent, + int *is_global) { struct rchan_buf *buf; struct dentry *dentry; + if (*is_global) + return chan->buf[0]; + + buf = relay_create_buf(chan); + if (!buf) + return NULL; + /* Create file in fs */ - dentry = relayfs_create_file(filename, parent, S_IRUSR, chan); - if (!dentry) + dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR, + buf, is_global); + if (!dentry) { + relay_destroy_buf(buf); return NULL; + } - buf = RELAYFS_I(dentry->d_inode)->buf; buf->dentry = dentry; __relay_reset(buf, 1); @@ -214,6 +249,10 @@ static inline void setup_callbacks(struct rchan *chan, cb->buf_mapped = buf_mapped_default_callback; if (!cb->buf_unmapped) cb->buf_unmapped = buf_unmapped_default_callback; + if (!cb->create_buf_file) + cb->create_buf_file = create_buf_file_default_callback; + if (!cb->remove_buf_file) + cb->remove_buf_file = remove_buf_file_default_callback; chan->cb = cb; } @@ -241,6 +280,7 @@ struct rchan *relay_open(const char *base_filename, unsigned int i; struct rchan *chan; char *tmpname; + int is_global = 0; if (!base_filename) return NULL; @@ -265,7 +305,8 @@ struct rchan *relay_open(const char *base_filename, for_each_online_cpu(i) { sprintf(tmpname, "%s%d", base_filename, i); - chan->buf[i] = relay_open_buf(chan, tmpname, parent); + chan->buf[i] = relay_open_buf(chan, tmpname, parent, + &is_global); chan->buf[i]->cpu = i; if (!chan->buf[i]) goto free_bufs; @@ -279,6 +320,8 @@ free_bufs: if (!chan->buf[i]) break; relay_close_buf(chan->buf[i]); + if (is_global) + break; } kfree(tmpname); @@ -388,14 +431,16 @@ void relay_destroy_channel(struct kref *kref) void relay_close(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; relay_close_buf(chan->buf[i]); + prev = chan->buf[i]; } if (chan->last_toobig) @@ -415,14 +460,16 @@ void relay_close(struct rchan *chan) void relay_flush(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; relay_switch_subbuf(chan->buf[i], 0); + prev = chan->buf[i]; } } diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h index 703503fa22b6..0993d3e5753b 100644 --- a/fs/relayfs/relay.h +++ b/fs/relayfs/relay.h @@ -1,10 +1,6 @@ #ifndef _RELAY_H #define _RELAY_H -struct dentry *relayfs_create_file(const char *name, - struct dentry *parent, - int mode, - struct rchan *chan); extern int relayfs_remove(struct dentry *dentry); extern int relay_buf_empty(struct rchan_buf *buf); extern void relay_destroy_channel(struct kref *kref); diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index c74f382dabba..0a13859fd57b 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -418,7 +418,7 @@ static int romfs_readpage(struct file *file, struct page * page) { struct inode *inode = page->mapping->host; - unsigned long offset, avail, readlen; + loff_t offset, avail, readlen; void *buf; int result = -EIO; @@ -429,8 +429,8 @@ romfs_readpage(struct file *file, struct page * page) goto err_out; /* 32 bit warning -- but not for us :) */ - offset = page->index << PAGE_CACHE_SHIFT; - if (offset < inode->i_size) { + offset = page_offset(page); + if (offset < i_size_read(inode)) { avail = inode->i_size-offset; readlen = min_t(unsigned long, avail, PAGE_SIZE); if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c index f3e6b81288ab..74b86d9725a6 100644 --- a/fs/smbfs/cache.c +++ b/fs/smbfs/cache.c @@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); dentry->d_fsdata = NULL; smb_age_dentry(server, dentry); next = next->next; @@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_child); + dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget_locked(dent); diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index b4fcfa8b55a1..7042e62726a4 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -209,8 +209,8 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset, { struct dentry *dentry = file->f_dentry; - DEBUG1("(%s/%s %d@%ld)\n", DENTRY_PATH(dentry), - count, (page->index << PAGE_CACHE_SHIFT)+offset); + DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count, + ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset); return smb_writepage_sync(dentry->d_inode, page, offset, count); } @@ -374,8 +374,7 @@ smb_file_release(struct inode *inode, struct file * file) /* We must flush any dirty pages now as we won't be able to write anything after close. mmap can trigger this. "openers" should perhaps include mmap'ers ... */ - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); smb_close(inode); } unlock_kernel(); diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 10b994428fef..6ec88bf59b2d 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -697,8 +697,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr) DENTRY_PATH(dentry), (long) inode->i_size, (long) attr->ia_size); - filemap_fdatawrite(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); + filemap_write_and_wait(inode->i_mapping); error = smb_open(dentry, O_WRONLY); if (error) diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 38ab558835c4..d6baec0f24ad 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -3113,7 +3113,7 @@ smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, LSET(data, 32, SMB_TIME_NO_CHANGE); LSET(data, 40, SMB_UID_NO_CHANGE); LSET(data, 48, SMB_GID_NO_CHANGE); - LSET(data, 56, smb_filetype_from_mode(attr->ia_mode)); + DSET(data, 56, smb_filetype_from_mode(attr->ia_mode)); LSET(data, 60, major); LSET(data, 68, minor); LSET(data, 76, 0); diff --git a/fs/super.c b/fs/super.c index 5a347a4f673a..0a30e51692cf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - s->s_old_blocksize = block_size(bdev); - sb_set_blocksize(s, s->s_old_blocksize); + sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 69a085abad6f..cce8b05cba5a 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -103,7 +103,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) offset = (char *)de - kaddr; over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN), - (n<<PAGE_CACHE_SHIFT) | offset, + ((loff_t)n<<PAGE_CACHE_SHIFT) | offset, fs16_to_cpu(SYSV_SB(sb), de->inode), DT_UNKNOWN); if (over) { @@ -115,7 +115,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) } done: - filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; + filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; unlock_kernel(); return 0; } diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 6598a5037ac8..4fae57d9d115 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -41,7 +41,7 @@ #define uint(x) xuint(x) #define xuint(x) __le ## x -extern inline int find_next_one_bit (void * addr, int size, int offset) +static inline int find_next_one_bit (void * addr, int size, int offset) { uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG); int result = offset & ~(BITS_PER_LONG-1); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 4014f17d382e..395e582ee542 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1957,11 +1957,6 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t printk(KERN_ERR "udf: inode_bmap: block < 0\n"); return -1; } - if (!inode) - { - printk(KERN_ERR "udf: inode_bmap: NULL inode\n"); - return -1; - } *extoffset = 0; *elen = 0; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 54828ebcf1ba..2ba11a9aa995 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1296,8 +1296,10 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + up(&inode->i_sem); return err; + } if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); inode->i_version++; diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index f89340c61bf2..4fa4b1a5187e 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -79,8 +79,7 @@ fs_flushinval_pages( struct inode *ip = LINVFS_GET_IP(vp); if (VN_CACHED(vp)) { - filemap_fdatawrite(ip->i_mapping); - filemap_fdatawait(ip->i_mapping); + filemap_write_and_wait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, first); } diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 158829ca56f6..f40d4391fcfc 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -30,13 +30,7 @@ * By comparing each compnent, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number */ -static -#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96)) -__attribute__((unused)) /* gcc 2.95, 2.96 miscompile this when inlined */ -#else -__inline__ -#endif -xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) +static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) { if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2)) return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999; diff --git a/include/asm-alpha/cache.h b/include/asm-alpha/cache.h index e69b29501a5f..e6d4d1695e25 100644 --- a/include/asm-alpha/cache.h +++ b/include/asm-alpha/cache.h @@ -20,6 +20,5 @@ #define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define L1_CACHE_SHIFT_MAX L1_CACHE_SHIFT #endif diff --git a/include/asm-alpha/compiler.h b/include/asm-alpha/compiler.h index 0a4a8b40dfcd..00c6f57ad9a7 100644 --- a/include/asm-alpha/compiler.h +++ b/include/asm-alpha/compiler.h @@ -98,9 +98,7 @@ #undef inline #undef __inline__ #undef __inline -#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 #undef __always_inline #define __always_inline inline __attribute__((always_inline)) -#endif #endif /* __ALPHA_COMPILER_H */ diff --git a/include/asm-alpha/futex.h b/include/asm-alpha/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-alpha/futex.h +++ b/include/asm-alpha/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-alpha/processor.h b/include/asm-alpha/processor.h index 059780a7d3d7..bb1a7a3abb8b 100644 --- a/include/asm-alpha/processor.h +++ b/include/asm-alpha/processor.h @@ -77,7 +77,6 @@ unsigned long get_wchan(struct task_struct *p); #define spin_lock_prefetch(lock) do { } while (0) #endif -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) extern inline void prefetch(const void *ptr) { __builtin_prefetch(ptr, 0, 3); @@ -95,24 +94,4 @@ extern inline void spin_lock_prefetch(const void *ptr) } #endif -#else -extern inline void prefetch(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - -extern inline void prefetchw(const void *ptr) -{ - __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr)); -} - -#ifdef CONFIG_SMP -extern inline void spin_lock_prefetch(const void *ptr) -{ - __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr)); -} -#endif - -#endif /* GCC 3.1 */ - #endif /* __ASM_ALPHA_PROCESSOR_H */ diff --git a/include/asm-arm/cache.h b/include/asm-arm/cache.h index 8d161f7c87ff..31332c8ac04e 100644 --- a/include/asm-arm/cache.h +++ b/include/asm-arm/cache.h @@ -7,9 +7,4 @@ #define L1_CACHE_SHIFT 5 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -/* - * largest L1 which this arch supports - */ -#define L1_CACHE_SHIFT_MAX 5 - #endif diff --git a/include/asm-arm/futex.h b/include/asm-arm/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-arm/futex.h +++ b/include/asm-arm/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-arm/irq.h b/include/asm-arm/irq.h index 59975ee43cf1..7772432d3fd7 100644 --- a/include/asm-arm/irq.h +++ b/include/asm-arm/irq.h @@ -25,10 +25,14 @@ extern void disable_irq_nosync(unsigned int); extern void disable_irq(unsigned int); extern void enable_irq(unsigned int); -#define __IRQT_FALEDGE (1 << 0) -#define __IRQT_RISEDGE (1 << 1) -#define __IRQT_LOWLVL (1 << 2) -#define __IRQT_HIGHLVL (1 << 3) +/* + * These correspond with the SA_TRIGGER_* defines, and therefore the + * IRQRESOURCE_IRQ_* defines. + */ +#define __IRQT_RISEDGE (1 << 0) +#define __IRQT_FALEDGE (1 << 1) +#define __IRQT_HIGHLVL (1 << 2) +#define __IRQT_LOWLVL (1 << 3) #define IRQT_NOEDGE (0) #define IRQT_RISING (__IRQT_RISEDGE) diff --git a/include/asm-arm26/futex.h b/include/asm-arm26/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-arm26/futex.h +++ b/include/asm-arm26/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-cris/arch-v10/cache.h b/include/asm-cris/arch-v10/cache.h index 1d1d1ba65b1a..aea27184d2d2 100644 --- a/include/asm-cris/arch-v10/cache.h +++ b/include/asm-cris/arch-v10/cache.h @@ -4,6 +4,5 @@ /* Etrax 100LX have 32-byte cache-lines. */ #define L1_CACHE_BYTES 32 #define L1_CACHE_SHIFT 5 -#define L1_CACHE_SHIFT_MAX 5 #endif /* _ASM_ARCH_CACHE_H */ diff --git a/include/asm-cris/arch-v32/cache.h b/include/asm-cris/arch-v32/cache.h index 4fed8d62ccc8..80b236b15319 100644 --- a/include/asm-cris/arch-v32/cache.h +++ b/include/asm-cris/arch-v32/cache.h @@ -4,6 +4,5 @@ /* A cache-line is 32 bytes. */ #define L1_CACHE_BYTES 32 #define L1_CACHE_SHIFT 5 -#define L1_CACHE_SHIFT_MAX 5 #endif /* _ASM_CRIS_ARCH_CACHE_H */ diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index 8eff51349ae7..cbf1a98f0129 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -153,7 +153,7 @@ dma_set_mask(struct device *dev, u64 mask) static inline int dma_get_cache_alignment(void) { - return (1 << L1_CACHE_SHIFT_MAX); + return (1 << INTERNODE_CACHE_SHIFT); } #define dma_is_consistent(d) (1) diff --git a/include/asm-cris/futex.h b/include/asm-cris/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-cris/futex.h +++ b/include/asm-cris/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h index 3f54fea2b051..9c9e9499cfd8 100644 --- a/include/asm-frv/atomic.h +++ b/include/asm-frv/atomic.h @@ -218,51 +218,12 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig __typeof__(*(ptr)) __xg_orig; \ \ switch (sizeof(__xg_orig)) { \ - case 1: \ - asm volatile( \ - "0: \n" \ - " orcc gr0,gr0,gr0,icc3 \n" \ - " ckeq icc3,cc7 \n" \ - " ldub.p %M0,%1 \n" \ - " orcr cc7,cc7,cc3 \n" \ - " cstb.p %2,%M0 ,cc3,#1 \n" \ - " corcc gr29,gr29,gr0 ,cc3,#1 \n" \ - " beq icc3,#0,0b \n" \ - : "+U"(*__xg_ptr), "=&r"(__xg_orig) \ - : "r"(x) \ - : "memory", "cc7", "cc3", "icc3" \ - ); \ - break; \ - \ - case 2: \ - asm volatile( \ - "0: \n" \ - " orcc gr0,gr0,gr0,icc3 \n" \ - " ckeq icc3,cc7 \n" \ - " lduh.p %M0,%1 \n" \ - " orcr cc7,cc7,cc3 \n" \ - " csth.p %2,%M0 ,cc3,#1 \n" \ - " corcc gr29,gr29,gr0 ,cc3,#1 \n" \ - " beq icc3,#0,0b \n" \ - : "+U"(*__xg_ptr), "=&r"(__xg_orig) \ - : "r"(x) \ - : "memory", "cc7", "cc3", "icc3" \ - ); \ - break; \ - \ case 4: \ asm volatile( \ - "0: \n" \ - " orcc gr0,gr0,gr0,icc3 \n" \ - " ckeq icc3,cc7 \n" \ - " ld.p %M0,%1 \n" \ - " orcr cc7,cc7,cc3 \n" \ - " cst.p %2,%M0 ,cc3,#1 \n" \ - " corcc gr29,gr29,gr0 ,cc3,#1 \n" \ - " beq icc3,#0,0b \n" \ - : "+U"(*__xg_ptr), "=&r"(__xg_orig) \ + "swap%I0 %2,%M0" \ + : "+m"(*__xg_ptr), "=&r"(__xg_orig) \ : "r"(x) \ - : "memory", "cc7", "cc3", "icc3" \ + : "memory" \ ); \ break; \ \ @@ -277,8 +238,6 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig #else -extern uint8_t __xchg_8 (uint8_t i, volatile void *v); -extern uint16_t __xchg_16(uint16_t i, volatile void *v); extern uint32_t __xchg_32(uint32_t i, volatile void *v); #define xchg(ptr, x) \ @@ -287,8 +246,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v); __typeof__(*(ptr)) __xg_orig; \ \ switch (sizeof(__xg_orig)) { \ - case 1: __xg_orig = (__typeof__(*(ptr))) __xchg_8 ((uint8_t) x, __xg_ptr); break; \ - case 2: __xg_orig = (__typeof__(*(ptr))) __xchg_16((uint16_t) x, __xg_ptr); break; \ case 4: __xg_orig = (__typeof__(*(ptr))) __xchg_32((uint32_t) x, __xg_ptr); break; \ default: \ __xg_orig = 0; \ @@ -318,46 +275,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v); __typeof__(*(ptr)) __xg_new = (new); \ \ switch (sizeof(__xg_orig)) { \ - case 1: \ - asm volatile( \ - "0: \n" \ - " orcc gr0,gr0,gr0,icc3 \n" \ - " ckeq icc3,cc7 \n" \ - " ldub.p %M0,%1 \n" \ - " orcr cc7,cc7,cc3 \n" \ - " sub%I4 %1,%4,%2 \n" \ - " sllcc %2,#24,gr0,icc0 \n" \ - " bne icc0,#0,1f \n" \ - " cstb.p %3,%M0 ,cc3,#1 \n" \ - " corcc gr29,gr29,gr0 ,cc3,#1 \n" \ - " beq icc3,#0,0b \n" \ - "1: \n" \ - : "+U"(*__xg_ptr), "=&r"(__xg_orig), "=&r"(__xg_tmp) \ - : "r"(__xg_new), "NPr"(__xg_test) \ - : "memory", "cc7", "cc3", "icc3", "icc0" \ - ); \ - break; \ - \ - case 2: \ - asm volatile( \ - "0: \n" \ - " orcc gr0,gr0,gr0,icc3 \n" \ - " ckeq icc3,cc7 \n" \ - " lduh.p %M0,%1 \n" \ - " orcr cc7,cc7,cc3 \n" \ - " sub%I4 %1,%4,%2 \n" \ - " sllcc %2,#16,gr0,icc0 \n" \ - " bne icc0,#0,1f \n" \ - " csth.p %3,%M0 ,cc3,#1 \n" \ - " corcc gr29,gr29,gr0 ,cc3,#1 \n" \ - " beq icc3,#0,0b \n" \ - "1: \n" \ - : "+U"(*__xg_ptr), "=&r"(__xg_orig), "=&r"(__xg_tmp) \ - : "r"(__xg_new), "NPr"(__xg_test) \ - : "memory", "cc7", "cc3", "icc3", "icc0" \ - ); \ - break; \ - \ case 4: \ asm volatile( \ "0: \n" \ @@ -388,8 +305,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v); #else -extern uint8_t __cmpxchg_8 (uint8_t *v, uint8_t test, uint8_t new); -extern uint16_t __cmpxchg_16(uint16_t *v, uint16_t test, uint16_t new); extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new); #define cmpxchg(ptr, test, new) \ @@ -400,8 +315,6 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new); __typeof__(*(ptr)) __xg_new = (new); \ \ switch (sizeof(__xg_orig)) { \ - case 1: __xg_orig = __cmpxchg_8 (__xg_ptr, __xg_test, __xg_new); break; \ - case 2: __xg_orig = __cmpxchg_16(__xg_ptr, __xg_test, __xg_new); break; \ case 4: __xg_orig = __cmpxchg_32(__xg_ptr, __xg_test, __xg_new); break; \ default: \ __xg_orig = 0; \ @@ -414,7 +327,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new); #endif -#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new)) +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) #define atomic_add_unless(v, a, u) \ ({ \ @@ -424,6 +337,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new); c = old; \ c != (u); \ }) + #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #include <asm-generic/atomic.h> diff --git a/include/asm-frv/bug.h b/include/asm-frv/bug.h index 074c0d5770eb..451712cc3060 100644 --- a/include/asm-frv/bug.h +++ b/include/asm-frv/bug.h @@ -12,6 +12,7 @@ #define _ASM_BUG_H #include <linux/config.h> +#include <linux/linkage.h> #ifdef CONFIG_BUG /* diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h index 5003e017fd1e..e9fc1d47797e 100644 --- a/include/asm-frv/dma-mapping.h +++ b/include/asm-frv/dma-mapping.h @@ -23,7 +23,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t * returns, or alternatively stop on the first sg_dma_len(sg) which * is 0. */ -#define sg_dma_address(sg) ((unsigned long) (page_to_phys((sg)->page) + (sg)->offset)) +#define sg_dma_address(sg) ((sg)->dma_address) #define sg_dma_len(sg) ((sg)->length) /* diff --git a/include/asm-frv/io.h b/include/asm-frv/io.h index 48829f727242..075369b1a34b 100644 --- a/include/asm-frv/io.h +++ b/include/asm-frv/io.h @@ -18,6 +18,7 @@ #ifdef __KERNEL__ #include <linux/config.h> +#include <linux/types.h> #include <asm/virtconvert.h> #include <asm/string.h> #include <asm/mb-regs.h> @@ -104,6 +105,8 @@ static inline void __insl(unsigned long addr, void *buf, int len, int swap) __insl_sw(addr, buf, len); } +#define mmiowb() mb() + /* * make the short names macros so specific devices * can override them as required @@ -209,6 +212,10 @@ static inline uint32_t readl(const volatile void __iomem *addr) return ret; } +#define readb_relaxed readb +#define readw_relaxed readw +#define readl_relaxed readl + static inline void writeb(uint8_t datum, volatile void __iomem *addr) { __builtin_write8((volatile uint8_t __force *) addr, datum); @@ -268,11 +275,106 @@ static inline void __iomem *ioremap_fullcache(unsigned long physaddr, unsigned l extern void iounmap(void __iomem *addr); +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return (void __iomem *) port; +} + +static inline void ioport_unmap(void __iomem *p) +{ +} + static inline void flush_write_buffers(void) { __asm__ __volatile__ ("membar" : : :"memory"); } +/* + * do appropriate I/O accesses for token type + */ +static inline unsigned int ioread8(void __iomem *p) +{ + return __builtin_read8(p); +} + +static inline unsigned int ioread16(void __iomem *p) +{ + uint16_t ret = __builtin_read16(p); + if (__is_PCI_addr(p)) + ret = _swapw(ret); + return ret; +} + +static inline unsigned int ioread32(void __iomem *p) +{ + uint32_t ret = __builtin_read32(p); + if (__is_PCI_addr(p)) + ret = _swapl(ret); + return ret; +} + +static inline void iowrite8(u8 val, void __iomem *p) +{ + __builtin_write8(p, val); + if (__is_PCI_MEM(p)) + __flush_PCI_writes(); +} + +static inline void iowrite16(u16 val, void __iomem *p) +{ + if (__is_PCI_addr(p)) + val = _swapw(val); + __builtin_write16(p, val); + if (__is_PCI_MEM(p)) + __flush_PCI_writes(); +} + +static inline void iowrite32(u32 val, void __iomem *p) +{ + if (__is_PCI_addr(p)) + val = _swapl(val); + __builtin_write32(p, val); + if (__is_PCI_MEM(p)) + __flush_PCI_writes(); +} + +static inline void ioread8_rep(void __iomem *p, void *dst, unsigned long count) +{ + io_insb((unsigned long) p, dst, count); +} + +static inline void ioread16_rep(void __iomem *p, void *dst, unsigned long count) +{ + io_insw((unsigned long) p, dst, count); +} + +static inline void ioread32_rep(void __iomem *p, void *dst, unsigned long count) +{ + __insl_ns((unsigned long) p, dst, count); +} + +static inline void iowrite8_rep(void __iomem *p, const void *src, unsigned long count) +{ + io_outsb((unsigned long) p, src, count); +} + +static inline void iowrite16_rep(void __iomem *p, const void *src, unsigned long count) +{ + io_outsw((unsigned long) p, src, count); +} + +static inline void iowrite32_rep(void __iomem *p, const void *src, unsigned long count) +{ + __outsl_ns((unsigned long) p, src, count); +} + +/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ +struct pci_dev; +extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) +{ +} + /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem @@ -285,6 +387,27 @@ static inline void flush_write_buffers(void) */ #define xlate_dev_kmem_ptr(p) p +/* + * Check BIOS signature + */ +static inline int check_signature(volatile void __iomem *io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + + retval = 1; +out: + return retval; +} + #endif /* __KERNEL__ */ #endif /* _ASM_IO_H */ diff --git a/include/asm-frv/mb-regs.h b/include/asm-frv/mb-regs.h index c8f575fc42fa..93fa732fb0cd 100644 --- a/include/asm-frv/mb-regs.h +++ b/include/asm-frv/mb-regs.h @@ -68,6 +68,9 @@ do { \ #define __is_PCI_MEM(addr) \ ((unsigned long)(addr) - __region_PCI_MEM < 0x08000000UL) +#define __is_PCI_addr(addr) \ + ((unsigned long)(addr) - __region_PCI_IO < 0x0c000000UL) + #define __get_CLKSW() ({ *(volatile unsigned long *)(__region_CS2 + 0x0130000cUL) & 0xffUL; }) #define __get_CLKIN() (__get_CLKSW() * 125U * 100000U / 24U) @@ -149,6 +152,7 @@ do { \ #define __is_PCI_IO(addr) 0 /* no PCI */ #define __is_PCI_MEM(addr) 0 +#define __is_PCI_addr(addr) 0 #define __region_PCI_IO 0 #define __region_PCI_MEM 0 #define __flush_PCI_writes() do { } while(0) diff --git a/include/asm-frv/mc146818rtc.h b/include/asm-frv/mc146818rtc.h new file mode 100644 index 000000000000..90dfb7a633d1 --- /dev/null +++ b/include/asm-frv/mc146818rtc.h @@ -0,0 +1,16 @@ +/* mc146818rtc.h: RTC defs + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_MC146818RTC_H +#define _ASM_MC146818RTC_H + + +#endif /* _ASM_MC146818RTC_H */ diff --git a/include/asm-frv/module.h b/include/asm-frv/module.h index 3223cfaef743..3d5c6360289a 100644 --- a/include/asm-frv/module.h +++ b/include/asm-frv/module.h @@ -11,10 +11,18 @@ #ifndef _ASM_MODULE_H #define _ASM_MODULE_H -#define module_map(x) vmalloc(x) -#define module_unmap(x) vfree(x) -#define module_arch_init(x) (0) -#define arch_init_modules(x) do { } while (0) +struct mod_arch_specific +{ +}; + +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr + +/* + * Include the architecture version. + */ +#define MODULE_ARCH_VERMAGIC __stringify(PROCESSOR_MODEL_NAME) " " #endif /* _ASM_MODULE_H */ diff --git a/include/asm-frv/pci.h b/include/asm-frv/pci.h index 1168451c275f..598b0c6b695d 100644 --- a/include/asm-frv/pci.h +++ b/include/asm-frv/pci.h @@ -57,6 +57,14 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, */ #define PCI_DMA_BUS_IS_PHYS (1) +/* pci_unmap_{page,single} is a nop so... */ +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) +#define pci_unmap_addr(PTR, ADDR_NAME) (0) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) +#define pci_unmap_len(PTR, LEN_NAME) (0) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) + #ifdef CONFIG_PCI static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 844666377dcb..d1c3b182c691 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -421,6 +421,11 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, } /* + * Macro to mark a page protection value as "uncacheable" + */ +#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NOCACHE)) + +/* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ diff --git a/include/asm-frv/types.h b/include/asm-frv/types.h index 50605df6d8ac..2560f596a75d 100644 --- a/include/asm-frv/types.h +++ b/include/asm-frv/types.h @@ -59,7 +59,6 @@ typedef unsigned int u32; typedef signed long long s64; typedef unsigned long long u64; -typedef u64 u_quad_t; /* Dma addresses are 32-bits wide. */ diff --git a/include/asm-frv/uaccess.h b/include/asm-frv/uaccess.h index 991b50fbba24..b6bcbe01f6ee 100644 --- a/include/asm-frv/uaccess.h +++ b/include/asm-frv/uaccess.h @@ -180,16 +180,16 @@ do { \ \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm(__gu_err, __gu_val, ptr, "ub", "=r"); \ + __get_user_asm(__gu_err, *(u8*)&__gu_val, ptr, "ub", "=r"); \ break; \ case 2: \ - __get_user_asm(__gu_err, __gu_val, ptr, "uh", "=r"); \ + __get_user_asm(__gu_err, *(u16*)&__gu_val, ptr, "uh", "=r"); \ break; \ case 4: \ - __get_user_asm(__gu_err, __gu_val, ptr, "", "=r"); \ + __get_user_asm(__gu_err, *(u32*)&__gu_val, ptr, "", "=r"); \ break; \ case 8: \ - __get_user_asm(__gu_err, __gu_val, ptr, "d", "=e"); \ + __get_user_asm(__gu_err, *(u64*)&__gu_val, ptr, "d", "=e"); \ break; \ default: \ __gu_err = __get_user_bad(); \ diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index 5cf989b448d5..cde376a7a857 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -313,7 +313,7 @@ do { \ unsigned long __sr2 = (res); \ if (__builtin_expect(__sr2 >= (unsigned long)(-4095), 0)) { \ errno = (-__sr2); \ - __sr2 = ULONG_MAX; \ + __sr2 = ~0UL; \ } \ return (type) __sr2; \ } while (0) diff --git a/include/asm-frv/vga.h b/include/asm-frv/vga.h new file mode 100644 index 000000000000..a702c800a229 --- /dev/null +++ b/include/asm-frv/vga.h @@ -0,0 +1,17 @@ +/* vga.h: VGA register stuff + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_VGA_H +#define _ASM_VGA_H + + + +#endif /* _ASM_VGA_H */ diff --git a/include/asm-frv/xor.h b/include/asm-frv/xor.h new file mode 100644 index 000000000000..c82eb12a5b18 --- /dev/null +++ b/include/asm-frv/xor.h @@ -0,0 +1 @@ +#include <asm-generic/xor.h> diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index e0a28b925ef0..0fada8f16dc6 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -8,6 +8,7 @@ * edit all arch specific atomic.h files. */ +#include <asm/types.h> /* * Suppport for atomic_long_t diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 747d790295f3..1b356207712c 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -274,7 +274,7 @@ dma_get_cache_alignment(void) { /* no easy way to get cache size on all processors, so return * the maximum possible, to be safe */ - return (1 << L1_CACHE_SHIFT_MAX); + return (1 << INTERNODE_CACHE_SHIFT); } static inline void diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h new file mode 100644 index 000000000000..3ae2c7347549 --- /dev/null +++ b/include/asm-generic/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_GENERIC_FUTEX_H +#define _ASM_GENERIC_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <asm/errno.h> +#include <asm/uaccess.h> + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-h8300/futex.h b/include/asm-h8300/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-h8300/futex.h +++ b/include/asm-h8300/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-i386/cache.h b/include/asm-i386/cache.h index 849788710feb..615911e5bd24 100644 --- a/include/asm-i386/cache.h +++ b/include/asm-i386/cache.h @@ -10,6 +10,4 @@ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ - #endif diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h index e56c335f8ef9..6c37a9ab8d60 100644 --- a/include/asm-i386/dma-mapping.h +++ b/include/asm-i386/dma-mapping.h @@ -150,7 +150,7 @@ dma_get_cache_alignment(void) { /* no easy way to get cache size on all x86, so return the * maximum possible, to be safe */ - return (1 << L1_CACHE_SHIFT_MAX); + return (1 << INTERNODE_CACHE_SHIFT); } #define dma_is_consistent(d) (1) diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 270f1986b19f..5169d7af456f 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -21,8 +21,6 @@ static __inline__ int irq_canonicalize(int irq) return ((irq == 2) ? 9 : irq); } -extern void release_vm86_irqs(struct task_struct *); - #ifdef CONFIG_X86_LOCAL_APIC # define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ #endif diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 7e0f2945d17d..f324c53b6f9a 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -54,6 +54,9 @@ struct pt_regs { #define PTRACE_GET_THREAD_AREA 25 #define PTRACE_SET_THREAD_AREA 26 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 + #ifdef __KERNEL__ #include <asm/vm86.h> diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index fe38b9a96233..481c3c0ea720 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -299,8 +299,9 @@ #define __NR_inotify_init 291 #define __NR_inotify_add_watch 292 #define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 -#define NR_syscalls 294 +#define NR_syscalls 295 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/asm-i386/vm86.h b/include/asm-i386/vm86.h index 40ec82c6914d..952fd6957380 100644 --- a/include/asm-i386/vm86.h +++ b/include/asm-i386/vm86.h @@ -16,7 +16,11 @@ #define IF_MASK 0x00000200 #define IOPL_MASK 0x00003000 #define NT_MASK 0x00004000 +#ifdef CONFIG_VM86 #define VM_MASK 0x00020000 +#else +#define VM_MASK 0 /* ignored */ +#endif #define AC_MASK 0x00040000 #define VIF_MASK 0x00080000 /* virtual interrupt flag */ #define VIP_MASK 0x00100000 /* virtual interrupt pending */ @@ -200,9 +204,25 @@ struct kernel_vm86_struct { */ }; +#ifdef CONFIG_VM86 + void handle_vm86_fault(struct kernel_vm86_regs *, long); int handle_vm86_trap(struct kernel_vm86_regs *, long, int); +struct task_struct; +void release_vm86_irqs(struct task_struct *); + +#else + +#define handle_vm86_fault(a, b) +#define release_vm86_irqs(a) + +static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) { + return 0; +} + +#endif /* CONFIG_VM86 */ + #endif /* __KERNEL__ */ #endif diff --git a/include/asm-ia64/bug.h b/include/asm-ia64/bug.h index 3aa0a0a5474b..823616b5020b 100644 --- a/include/asm-ia64/bug.h +++ b/include/asm-ia64/bug.h @@ -2,11 +2,7 @@ #define _ASM_IA64_BUG_H #ifdef CONFIG_BUG -#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) -# define ia64_abort() __builtin_trap() -#else -# define ia64_abort() (*(volatile int *) 0 = 0) -#endif +#define ia64_abort() __builtin_trap() #define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) /* should this BUG be made generic? */ diff --git a/include/asm-ia64/cache.h b/include/asm-ia64/cache.h index 666d8f175cb3..40dd25195d65 100644 --- a/include/asm-ia64/cache.h +++ b/include/asm-ia64/cache.h @@ -12,8 +12,6 @@ #define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ - #ifdef CONFIG_SMP # define SMP_CACHE_SHIFT L1_CACHE_SHIFT # define SMP_CACHE_BYTES L1_CACHE_BYTES diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-ia64/futex.h +++ b/include/asm-ia64/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h index cf772a67f858..b64fdb985494 100644 --- a/include/asm-ia64/io.h +++ b/include/asm-ia64/io.h @@ -89,6 +89,7 @@ phys_to_virt (unsigned long address) #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range (unsigned long addr, size_t *count); /* efi.c */ +extern int valid_mmap_phys_addr_range (unsigned long addr, size_t *count); /* * The following two macros are deprecated and scheduled for removal. diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h index 0c91a76c5ea3..9e83210dc312 100644 --- a/include/asm-ia64/spinlock.h +++ b/include/asm-ia64/spinlock.h @@ -34,7 +34,7 @@ __raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags) { register volatile unsigned int *ptr asm ("r31") = &lock->lock; -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) # ifdef CONFIG_ITANIUM /* don't use brl on Itanium... */ asm volatile ("{\n\t" diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 2bf543493cb8..962f9bd1bdff 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -269,12 +269,13 @@ #define __NR_inotify_init 1277 #define __NR_inotify_add_watch 1278 #define __NR_inotify_rm_watch 1279 +#define __NR_migrate_pages 1280 #ifdef __KERNEL__ #include <linux/config.h> -#define NR_syscalls 256 /* length of syscall table */ +#define NR_syscalls 270 /* length of syscall table */ #define __ARCH_WANT_SYS_RT_SIGACTION diff --git a/include/asm-m32r/cache.h b/include/asm-m32r/cache.h index 724820596980..9c2b2d9998bc 100644 --- a/include/asm-m32r/cache.h +++ b/include/asm-m32r/cache.h @@ -7,6 +7,4 @@ #define L1_CACHE_SHIFT 4 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 4 - #endif /* _ASM_M32R_CACHE_H */ diff --git a/include/asm-m32r/futex.h b/include/asm-m32r/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-m32r/futex.h +++ b/include/asm-m32r/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-m68k/cache.h b/include/asm-m68k/cache.h index 6161fd3d8600..fed3fd30de7e 100644 --- a/include/asm-m68k/cache.h +++ b/include/asm-m68k/cache.h @@ -8,6 +8,4 @@ #define L1_CACHE_SHIFT 4 #define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 4 /* largest L1 which this arch supports */ - #endif diff --git a/include/asm-m68k/futex.h b/include/asm-m68k/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-m68k/futex.h +++ b/include/asm-m68k/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-m68knommu/futex.h b/include/asm-m68knommu/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-m68knommu/futex.h +++ b/include/asm-m68knommu/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-mips/cache.h b/include/asm-mips/cache.h index 1a5d1a669db3..55e19f2ff0e0 100644 --- a/include/asm-mips/cache.h +++ b/include/asm-mips/cache.h @@ -15,7 +15,6 @@ #define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 6 #define SMP_CACHE_SHIFT L1_CACHE_SHIFT #define SMP_CACHE_BYTES L1_CACHE_BYTES diff --git a/include/asm-parisc/cache.h b/include/asm-parisc/cache.h index 5da72e38bdde..38d201b5652d 100644 --- a/include/asm-parisc/cache.h +++ b/include/asm-parisc/cache.h @@ -28,7 +28,6 @@ #define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */ extern void flush_data_cache_local(void); /* flushes local data-cache only */ extern void flush_instruction_cache_local(void); /* flushes local code-cache only */ diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-parisc/futex.h +++ b/include/asm-parisc/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-powerpc/cache.h b/include/asm-powerpc/cache.h index 26ce502e76e8..6379c2df5c40 100644 --- a/include/asm-powerpc/cache.h +++ b/include/asm-powerpc/cache.h @@ -19,7 +19,6 @@ #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ #if defined(__powerpc64__) && !defined(__ASSEMBLY__) struct ppc64_caches { diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h index 59a80163f75f..a96e5742ca32 100644 --- a/include/asm-powerpc/dma-mapping.h +++ b/include/asm-powerpc/dma-mapping.h @@ -229,7 +229,7 @@ static inline int dma_get_cache_alignment(void) #ifdef CONFIG_PPC64 /* no easy way to get cache size on all processors, so return * the maximum possible, to be safe */ - return (1 << L1_CACHE_SHIFT_MAX); + return (1 << INTERNODE_CACHE_SHIFT); #else /* * Each processor family will define its own L1_CACHE_SHIFT, diff --git a/include/asm-s390/cache.h b/include/asm-s390/cache.h index 29845378b206..e20cdd9074db 100644 --- a/include/asm-s390/cache.h +++ b/include/asm-s390/cache.h @@ -13,7 +13,6 @@ #define L1_CACHE_BYTES 256 #define L1_CACHE_SHIFT 8 -#define L1_CACHE_SHIFT_MAX 8 /* largest L1 which this arch supports */ #define ARCH_KMALLOC_MINALIGN 8 diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-s390/futex.h +++ b/include/asm-s390/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-sh/cache.h b/include/asm-sh/cache.h index 9b4dd6d8212e..656fdfe9e8b4 100644 --- a/include/asm-sh/cache.h +++ b/include/asm-sh/cache.h @@ -22,8 +22,6 @@ #define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) -#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */ - struct cache_info { unsigned int ways; unsigned int sets; diff --git a/include/asm-sh/futex.h b/include/asm-sh/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-sh/futex.h +++ b/include/asm-sh/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-sh64/cache.h b/include/asm-sh64/cache.h index f54e85e8a470..a4f36f0036e1 100644 --- a/include/asm-sh64/cache.h +++ b/include/asm-sh64/cache.h @@ -20,8 +20,6 @@ #define L1_CACHE_ALIGN_MASK (~(L1_CACHE_BYTES - 1)) #define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES - 1)) & L1_CACHE_ALIGN_MASK) #define L1_CACHE_SIZE_BYTES (L1_CACHE_BYTES << 10) -/* Largest L1 which this arch supports */ -#define L1_CACHE_SHIFT_MAX 5 #ifdef MODULE #define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES))) diff --git a/include/asm-sh64/futex.h b/include/asm-sh64/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-sh64/futex.h +++ b/include/asm-sh64/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-sparc/cache.h b/include/asm-sparc/cache.h index a10522cb21b7..cb971e88aea4 100644 --- a/include/asm-sparc/cache.h +++ b/include/asm-sparc/cache.h @@ -13,7 +13,6 @@ #define L1_CACHE_SHIFT 5 #define L1_CACHE_BYTES 32 #define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))) -#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */ #define SMP_CACHE_BYTES 32 diff --git a/include/asm-sparc/futex.h b/include/asm-sparc/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-sparc/futex.h +++ b/include/asm-sparc/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-sparc64/cache.h b/include/asm-sparc64/cache.h index ade5ec3bfd5a..f7d35a2ae9b8 100644 --- a/include/asm-sparc64/cache.h +++ b/include/asm-sparc64/cache.h @@ -9,7 +9,6 @@ #define L1_CACHE_BYTES 32 /* Two 16-byte sub-blocks per line. */ #define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) -#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */ #define SMP_CACHE_BYTES_SHIFT 6 #define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */ diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-sparc64/futex.h +++ b/include/asm-sparc64/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index b5417529f6f1..309f1466b6fa 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -193,11 +193,7 @@ do { \ * not preserve it's value. Hairy, but it lets us remove 2 loads * and 2 stores in this critical code path. -DaveM */ -#if __GNUC__ >= 3 #define EXTRA_CLOBBER ,"%l1" -#else -#define EXTRA_CLOBBER -#endif #define switch_to(prev, next, last) \ do { if (test_thread_flag(TIF_PERFCTR)) { \ unsigned long __tmp; \ diff --git a/include/asm-um/cache.h b/include/asm-um/cache.h index a10602a5b2d6..3d0587075521 100644 --- a/include/asm-um/cache.h +++ b/include/asm-um/cache.h @@ -13,9 +13,6 @@ # define L1_CACHE_SHIFT 5 #endif -/* XXX: this is valid for x86 and x86_64. */ -#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ - #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #endif diff --git a/include/asm-um/futex.h b/include/asm-um/futex.h index 142ee2d8e0fd..6a332a9f099c 100644 --- a/include/asm-um/futex.h +++ b/include/asm-um/futex.h @@ -1,12 +1,6 @@ -#ifndef __UM_FUTEX_H -#define __UM_FUTEX_H +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/system.h> -#include <asm/processor.h> -#include <asm/uaccess.h> - -#include "asm/arch/futex.h" +#include <asm-generic/futex.h> #endif diff --git a/include/asm-um/rwsem.h b/include/asm-um/rwsem.h index 661c0e54702b..b5fc449dc86b 100644 --- a/include/asm-um/rwsem.h +++ b/include/asm-um/rwsem.h @@ -1,10 +1,6 @@ #ifndef __UM_RWSEM_H__ #define __UM_RWSEM_H__ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) -#define __builtin_expect(exp,c) (exp) -#endif - #include "asm/arch/rwsem.h" #endif diff --git a/include/asm-v850/cache.h b/include/asm-v850/cache.h index cbf9096e8517..8832c7ea3242 100644 --- a/include/asm-v850/cache.h +++ b/include/asm-v850/cache.h @@ -23,6 +23,4 @@ #define L1_CACHE_SHIFT 4 #endif -#define L1_CACHE_SHIFT_MAX L1_CACHE_SHIFT - #endif /* __V850_CACHE_H__ */ diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h index 9feff4ce1424..6a332a9f099c 100644 --- a/include/asm-v850/futex.h +++ b/include/asm-v850/futex.h @@ -1,53 +1,6 @@ #ifndef _ASM_FUTEX_H #define _ASM_FUTEX_H -#ifdef __KERNEL__ +#include <asm-generic/futex.h> -#include <linux/futex.h> -#include <asm/errno.h> -#include <asm/uaccess.h> - -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } - - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h index 5a86f8e976ec..82460a7bb233 100644 --- a/include/asm-v850/unistd.h +++ b/include/asm-v850/unistd.h @@ -241,9 +241,6 @@ /* User programs sometimes end up including this header file (indirectly, via uClibc header files), so I'm a bit nervous just including <linux/compiler.h>. */ -#if !defined(__builtin_expect) && __GNUC__ == 2 && __GNUC_MINOR__ < 96 -#define __builtin_expect(x, expected_value) (x) -#endif #define __syscall_return(type, res) \ do { \ @@ -346,20 +343,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e) \ __syscall_return (type, __ret); \ } -#if __GNUC__ < 3 -/* In older versions of gcc, `asm' statements with more than 10 - input/output arguments produce a fatal error. To work around this - problem, we use two versions, one for gcc-3.x and one for earlier - versions of gcc (the `earlier gcc' version doesn't work with gcc-3.x - because gcc-3.x doesn't allow clobbers to also be input arguments). */ -#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \ - __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ - : "=r" (ret), "=r" (syscall) \ - : "1" (syscall), \ - "r" (a), "r" (b), "r" (c), "r" (d), \ - "r" (e), "r" (f) \ - : SYSCALL_CLOBBERS, SYSCALL_ARG4, SYSCALL_ARG5); -#else /* __GNUC__ >= 3 */ #define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \ __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ : "=r" (ret), "=r" (syscall), \ @@ -368,7 +351,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e) \ "r" (a), "r" (b), "r" (c), "r" (d), \ "2" (e), "3" (f) \ : SYSCALL_CLOBBERS); -#endif #define _syscall6(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e, ftype, f) \ type name (atype a, btype b, ctype c, dtype d, etype e, ftype f) \ diff --git a/include/asm-x86_64/cache.h b/include/asm-x86_64/cache.h index 33e53424128b..b4a2401de77b 100644 --- a/include/asm-x86_64/cache.h +++ b/include/asm-x86_64/cache.h @@ -9,6 +9,5 @@ /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ #endif diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h index d5166ec3868d..e8843362a6cc 100644 --- a/include/asm-x86_64/ia32_unistd.h +++ b/include/asm-x86_64/ia32_unistd.h @@ -299,7 +299,8 @@ #define __NR_ia32_inotify_init 291 #define __NR_ia32_inotify_add_watch 292 #define __NR_ia32_inotify_rm_watch 293 +#define __NR_ia32_migrate_pages 294 -#define IA32_NR_syscalls 294 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 295 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 2c42150bce0c..e6f896161c11 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -571,8 +571,10 @@ __SYSCALL(__NR_inotify_init, sys_inotify_init) __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 255 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_migrate_pages 256 +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) -#define __NR_syscall_max __NR_inotify_rm_watch +#define __NR_syscall_max __NR_migrate_pages #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ diff --git a/include/linux/aio.h b/include/linux/aio.h index 49fd37629ee4..00c8efa95cc3 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -94,26 +94,27 @@ struct kiocb { ssize_t (*ki_retry)(struct kiocb *); void (*ki_dtor)(struct kiocb *); - struct list_head ki_list; /* the aio core uses this - * for cancellation */ - union { void __user *user; struct task_struct *tsk; } ki_obj; + __u64 ki_user_data; /* user's data for completion */ + wait_queue_t ki_wait; loff_t ki_pos; + + void *private; /* State that we remember to be able to restart/retry */ unsigned short ki_opcode; size_t ki_nbytes; /* copy of iocb->aio_nbytes */ char __user *ki_buf; /* remaining iocb->aio_buf */ size_t ki_left; /* remaining bytes */ - wait_queue_t ki_wait; long ki_retried; /* just for testing */ long ki_kicked; /* just for testing */ long ki_queued; /* just for testing */ - void *private; + struct list_head ki_list; /* the aio core uses this + * for cancellation */ }; #define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY) @@ -126,6 +127,7 @@ struct kiocb { (x)->ki_filp = (filp); \ (x)->ki_ctx = NULL; \ (x)->ki_cancel = NULL; \ + (x)->ki_retry = NULL; \ (x)->ki_dtor = NULL; \ (x)->ki_obj.tsk = tsk; \ (x)->ki_user_data = 0; \ diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 911c09cb9bf9..6ba3aa8a81f4 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -155,15 +155,15 @@ struct elapaarp { #define AARP_REQUEST 1 #define AARP_REPLY 2 #define AARP_PROBE 3 - __u8 hw_src[ETH_ALEN] __attribute__ ((packed)); - __u8 pa_src_zero __attribute__ ((packed)); - __be16 pa_src_net __attribute__ ((packed)); - __u8 pa_src_node __attribute__ ((packed)); - __u8 hw_dst[ETH_ALEN] __attribute__ ((packed)); - __u8 pa_dst_zero __attribute__ ((packed)); - __be16 pa_dst_net __attribute__ ((packed)); - __u8 pa_dst_node __attribute__ ((packed)); -}; + __u8 hw_src[ETH_ALEN]; + __u8 pa_src_zero; + __be16 pa_src_net; + __u8 pa_src_node; + __u8 hw_dst[ETH_ALEN]; + __u8 pa_dst_zero; + __be16 pa_dst_net; + __u8 pa_dst_node; +} __attribute__ ((packed)); static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1db061bb6b08..9f159baf153f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -197,7 +197,8 @@ int block_read_full_page(struct page*, get_block_t*); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, loff_t *); -int generic_cont_expand(struct inode *inode, loff_t size) ; +int generic_cont_expand(struct inode *inode, loff_t size); +int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); int block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 04bd756efc67..e86e4a938373 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -156,7 +156,7 @@ extern __be32 htonl(__u32); extern __u16 ntohs(__be16); extern __be16 htons(__u16); -#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +#if defined(__GNUC__) && defined(__OPTIMIZE__) #define ___htonl(x) __cpu_to_be32(x) #define ___htons(x) __cpu_to_be16(x) diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h index 2f1cb775125a..25f7f32883ec 100644 --- a/include/linux/byteorder/swab.h +++ b/include/linux/byteorder/swab.h @@ -110,7 +110,7 @@ /* * Allow constant folding */ -#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +#if defined(__GNUC__) && defined(__OPTIMIZE__) # define __swab16(x) \ (__builtin_constant_p((__u16)(x)) ? \ ___swab16((x)) : \ diff --git a/include/linux/byteorder/swabb.h b/include/linux/byteorder/swabb.h index d5f2a3205109..ae5e5f914bf4 100644 --- a/include/linux/byteorder/swabb.h +++ b/include/linux/byteorder/swabb.h @@ -77,7 +77,7 @@ /* * Allow constant folding */ -#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +#if defined(__GNUC__) && defined(__OPTIMIZE__) # define __swahw32(x) \ (__builtin_constant_p((__u32)(x)) ? \ ___swahw32((x)) : \ diff --git a/include/linux/cache.h b/include/linux/cache.h index 0b7ecf3af78a..ffe52210fc4f 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -45,12 +45,21 @@ #endif /* CONFIG_SMP */ #endif -#if !defined(____cacheline_maxaligned_in_smp) +/* + * The maximum alignment needed for some critical structures + * These could be inter-node cacheline sizes/L3 cacheline + * size etc. Define this in asm/cache.h for your arch + */ +#ifndef INTERNODE_CACHE_SHIFT +#define INTERNODE_CACHE_SHIFT L1_CACHE_SHIFT +#endif + +#if !defined(____cacheline_internodealigned_in_smp) #if defined(CONFIG_SMP) -#define ____cacheline_maxaligned_in_smp \ - __attribute__((__aligned__(1 << (L1_CACHE_SHIFT_MAX)))) +#define ____cacheline_internodealigned_in_smp \ + __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) #else -#define ____cacheline_maxaligned_in_smp +#define ____cacheline_internodealigned_in_smp #endif #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 152734055403..2e05e1e6b0e6 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,3 +15,12 @@ ({ unsigned long __ptr; \ __asm__ ("" : "=g"(__ptr) : "0"(ptr)); \ (typeof(ptr)) (__ptr + (off)); }) + + +#define inline inline __attribute__((always_inline)) +#define __inline__ __inline__ __attribute__((always_inline)) +#define __inline __inline __attribute__((always_inline)) +#define __deprecated __attribute__((deprecated)) +#define noinline __attribute__((noinline)) +#define __attribute_pure__ __attribute__((pure)) +#define __attribute_const__ __attribute__((__const__)) diff --git a/include/linux/compiler-gcc2.h b/include/linux/compiler-gcc2.h deleted file mode 100644 index ebed17660c5f..000000000000 --- a/include/linux/compiler-gcc2.h +++ /dev/null @@ -1,29 +0,0 @@ -/* Never include this file directly. Include <linux/compiler.h> instead. */ - -/* These definitions are for GCC v2.x. */ - -/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented - a mechanism by which the user can annotate likely branch directions and - expect the blocks to be reordered appropriately. Define __builtin_expect - to nothing for earlier compilers. */ -#include <linux/compiler-gcc.h> - -#if __GNUC_MINOR__ < 96 -# define __builtin_expect(x, expected_value) (x) -#endif - -#define __attribute_used__ __attribute__((__unused__)) - -/* - * The attribute `pure' is not implemented in GCC versions earlier - * than 2.96. - */ -#if __GNUC_MINOR__ >= 96 -# define __attribute_pure__ __attribute__((pure)) -# define __attribute_const__ __attribute__((__const__)) -#endif - -/* GCC 2.95.x/2.96 recognize __va_copy, but not va_copy. Actually later GCC's - * define both va_copy and __va_copy, but the latter may go away, so limit this - * to this header */ -#define va_copy __va_copy diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index a6fa615afab5..4209082ee934 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h @@ -3,29 +3,12 @@ /* These definitions are for GCC v3.x. */ #include <linux/compiler-gcc.h> -#if __GNUC_MINOR__ >= 1 -# define inline inline __attribute__((always_inline)) -# define __inline__ __inline__ __attribute__((always_inline)) -# define __inline __inline __attribute__((always_inline)) -#endif - -#if __GNUC_MINOR__ > 0 -# define __deprecated __attribute__((deprecated)) -#endif - #if __GNUC_MINOR__ >= 3 # define __attribute_used__ __attribute__((__used__)) #else # define __attribute_used__ __attribute__((__unused__)) #endif -#define __attribute_pure__ __attribute__((pure)) -#define __attribute_const__ __attribute__((__const__)) - -#if __GNUC_MINOR__ >= 1 -#define noinline __attribute__((noinline)) -#endif - #if __GNUC_MINOR__ >= 4 #define __must_check __attribute__((warn_unused_result)) #endif diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 53686c037a06..e913e9beaf69 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -3,14 +3,7 @@ /* These definitions are for GCC v4.x. */ #include <linux/compiler-gcc.h> -#define inline inline __attribute__((always_inline)) -#define __inline__ __inline__ __attribute__((always_inline)) -#define __inline __inline __attribute__((always_inline)) -#define __deprecated __attribute__((deprecated)) #define __attribute_used__ __attribute__((__used__)) -#define __attribute_pure__ __attribute__((pure)) -#define __attribute_const__ __attribute__((__const__)) -#define noinline __attribute__((noinline)) #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7378215b851..f23d3c6fc2c0 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -42,8 +42,6 @@ extern void __chk_io_ptr(void __iomem *); # include <linux/compiler-gcc4.h> #elif __GNUC__ == 3 # include <linux/compiler-gcc3.h> -#elif __GNUC__ == 2 -# include <linux/compiler-gcc2.h> #else # error Sorry, your compiler is too old/not recognized. #endif diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 6e2deef96b34..c472f972bd6d 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -14,22 +14,43 @@ #ifdef CONFIG_CPUSETS +extern int number_of_cpusets; /* How many cpusets are defined in system? */ + +extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_fork(struct task_struct *p); extern void cpuset_exit(struct task_struct *p); -extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); +extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); +extern nodemask_t cpuset_mems_allowed(struct task_struct *p); void cpuset_init_current_mems_allowed(void); -void cpuset_update_current_mems_allowed(void); -void cpuset_restrict_to_mems_allowed(unsigned long *nodes); +void cpuset_update_task_memory_state(void); +#define cpuset_nodes_subset_current_mems_allowed(nodes) \ + nodes_subset((nodes), current->mems_allowed) int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); -extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); + +extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); +static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +{ + return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask); +} + extern int cpuset_excl_nodes_overlap(const struct task_struct *p); + +#define cpuset_memory_pressure_bump() \ + do { \ + if (cpuset_memory_pressure_enabled) \ + __cpuset_memory_pressure_bump(); \ + } while (0) +extern int cpuset_memory_pressure_enabled; +extern void __cpuset_memory_pressure_bump(void); + extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); #else /* !CONFIG_CPUSETS */ +static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} static inline void cpuset_fork(struct task_struct *p) {} @@ -40,9 +61,14 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) return cpu_possible_map; } +static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) +{ + return node_possible_map; +} + static inline void cpuset_init_current_mems_allowed(void) {} -static inline void cpuset_update_current_mems_allowed(void) {} -static inline void cpuset_restrict_to_mems_allowed(unsigned long *nodes) {} +static inline void cpuset_update_task_memory_state(void) {} +#define cpuset_nodes_subset_current_mems_allowed(nodes) (1) static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) { @@ -59,6 +85,8 @@ static inline int cpuset_excl_nodes_overlap(const struct task_struct *p) return 1; } +static inline void cpuset_memory_pressure_bump(void) {} + static inline char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) { diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h index b10a7f3a8cac..f7a906583463 100644 --- a/include/linux/cycx_x25.h +++ b/include/linux/cycx_x25.h @@ -38,11 +38,11 @@ extern unsigned int cycx_debug; /* Data Structures */ /* X.25 Command Block. */ struct cycx_x25_cmd { - u16 command PACKED; - u16 link PACKED; /* values: 0 or 1 */ - u16 len PACKED; /* values: 0 thru 0x205 (517) */ - u32 buf PACKED; -}; + u16 command; + u16 link; /* values: 0 or 1 */ + u16 len; /* values: 0 thru 0x205 (517) */ + u32 buf; +} PACKED; /* Defines for the 'command' field. */ #define X25_CONNECT_REQUEST 0x4401 @@ -92,34 +92,34 @@ struct cycx_x25_cmd { * @flags - see dosx25.doc, in portuguese, for details */ struct cycx_x25_config { - u8 link PACKED; - u8 speed PACKED; - u8 clock PACKED; - u8 n2 PACKED; - u8 n2win PACKED; - u8 n3win PACKED; - u8 nvc PACKED; - u8 pktlen PACKED; - u8 locaddr PACKED; - u8 remaddr PACKED; - u16 t1 PACKED; - u16 t2 PACKED; - u8 t21 PACKED; - u8 npvc PACKED; - u8 t23 PACKED; - u8 flags PACKED; -}; + u8 link; + u8 speed; + u8 clock; + u8 n2; + u8 n2win; + u8 n3win; + u8 nvc; + u8 pktlen; + u8 locaddr; + u8 remaddr; + u16 t1; + u16 t2; + u8 t21; + u8 npvc; + u8 t23; + u8 flags; +} PACKED; struct cycx_x25_stats { - u16 rx_crc_errors PACKED; - u16 rx_over_errors PACKED; - u16 n2_tx_frames PACKED; - u16 n2_rx_frames PACKED; - u16 tx_timeouts PACKED; - u16 rx_timeouts PACKED; - u16 n3_tx_packets PACKED; - u16 n3_rx_packets PACKED; - u16 tx_aborts PACKED; - u16 rx_aborts PACKED; -}; + u16 rx_crc_errors; + u16 rx_over_errors; + u16 n2_tx_frames; + u16 n2_rx_frames; + u16 tx_timeouts; + u16 rx_timeouts; + u16 n3_tx_packets; + u16 n3_rx_packets; + u16 tx_aborts; + u16 rx_aborts; +} PACKED; #endif /* _CYCX_X25_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 46a2ba617595..a3ed5e059d47 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -95,14 +95,19 @@ struct dentry { struct qstr d_name; struct list_head d_lru; /* LRU list */ - struct list_head d_child; /* child of parent list */ + /* + * d_child and d_rcu can share memory + */ + union { + struct list_head d_child; /* child of parent list */ + struct rcu_head d_rcu; + } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ unsigned long d_time; /* used by d_revalidate */ struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ - struct rcu_head d_rcu; struct dcookie_struct *d_cookie; /* cookie, if any */ int d_mounted; unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ diff --git a/include/linux/elf.h b/include/linux/elf.h index ff955dbf510d..d3bfacb24496 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -151,6 +151,8 @@ typedef __s64 Elf64_Sxword; #define STT_FUNC 2 #define STT_SECTION 3 #define STT_FILE 4 +#define STT_COMMON 5 +#define STT_TLS 6 #define ELF_ST_BIND(x) ((x) >> 4) #define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c9c48d65630..4c82219b0fae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -9,7 +9,6 @@ #include <linux/config.h> #include <linux/limits.h> #include <linux/ioctl.h> -#include <linux/rcuref.h> /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -104,11 +103,11 @@ extern int dir_notify_enable; #define MS_MOVE 8192 #define MS_REC 16384 #define MS_VERBOSE 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ #define MS_UNBINDABLE (1<<17) /* change to unbindable */ #define MS_PRIVATE (1<<18) /* change to private */ #define MS_SLAVE (1<<19) /* change to slave */ #define MS_SHARED (1<<20) /* change to shared */ -#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -225,6 +224,7 @@ extern int dir_notify_enable; #include <asm/semaphore.h> #include <asm/byteorder.h> +struct hd_geometry; struct iovec; struct nameidata; struct kiocb; @@ -653,7 +653,7 @@ extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); #define file_list_unlock() spin_unlock(&files_lock); -#define get_file(x) rcuref_inc(&(x)->f_count) +#define get_file(x) atomic_inc(&(x)->f_count) #define file_count(x) atomic_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) @@ -808,7 +808,6 @@ struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ unsigned long s_blocksize; - unsigned long s_old_blocksize; unsigned char s_blocksize_bits; unsigned char s_dirt; unsigned long long s_maxbytes; /* Max file size */ @@ -963,6 +962,7 @@ struct block_device_operations { int (*direct_access) (struct block_device *, sector_t, unsigned long *); int (*media_changed) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); + int (*getgeo)(struct block_device *, struct hd_geometry *); struct module *owner; }; @@ -1345,7 +1345,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode) /* fs/open.c */ -extern int do_truncate(struct dentry *, loff_t start, struct file *filp); +extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, + struct file *filp); extern long do_sys_open(const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); diff --git a/include/linux/ide.h b/include/linux/ide.h index 7b6a6a58e465..4dd6694963c0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -801,7 +801,7 @@ typedef struct hwif_s { unsigned dma; void (*led_act)(void *data, int rw); -} ____cacheline_maxaligned_in_smp ide_hwif_t; +} ____cacheline_internodealigned_in_smp ide_hwif_t; /* * internal ide interrupt handler type diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 511999c7eeda..395f0aad9cbf 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -131,17 +131,17 @@ struct frad_conf /* these are the fields of an RFC 1490 header */ struct frhdr { - unsigned char control __attribute__((packed)); + unsigned char control; /* for IP packets, this can be the NLPID */ - unsigned char pad __attribute__((packed)); + unsigned char pad; - unsigned char NLPID __attribute__((packed)); - unsigned char OUI[3] __attribute__((packed)); - unsigned short PID __attribute__((packed)); + unsigned char NLPID; + unsigned char OUI[3]; + unsigned short PID; #define IP_NLPID pad -}; +} __attribute__((packed)); /* see RFC 1490 for the definition of the following */ #define FRAD_I_UI 0x03 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 41f150a3d2dd..e50a95fbeb11 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -79,7 +79,7 @@ static inline void __deprecated save_flags(unsigned long *x) { local_save_flags(*x); } -#define save_flags(x) save_flags(&x); +#define save_flags(x) save_flags(&x) static inline void __deprecated restore_flags(unsigned long x) { local_irq_restore(x); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 93bbed5c6cf4..9c8f4c9ed429 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -191,6 +191,10 @@ struct inet6_skb_parm { __u16 srcrt; __u16 dst1; __u16 lastopt; + __u32 nhoff; + __u16 flags; + +#define IP6SKB_XFRM_TRANSFORMED 1 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h index 7a4eacd77cb2..04e10f9f14f8 100644 --- a/include/linux/isdnif.h +++ b/include/linux/isdnif.h @@ -282,43 +282,43 @@ typedef struct setup_parm { typedef struct T30_s { /* session parameters */ - __u8 resolution __attribute__ ((packed)); - __u8 rate __attribute__ ((packed)); - __u8 width __attribute__ ((packed)); - __u8 length __attribute__ ((packed)); - __u8 compression __attribute__ ((packed)); - __u8 ecm __attribute__ ((packed)); - __u8 binary __attribute__ ((packed)); - __u8 scantime __attribute__ ((packed)); - __u8 id[FAXIDLEN] __attribute__ ((packed)); + __u8 resolution; + __u8 rate; + __u8 width; + __u8 length; + __u8 compression; + __u8 ecm; + __u8 binary; + __u8 scantime; + __u8 id[FAXIDLEN]; /* additional parameters */ - __u8 phase __attribute__ ((packed)); - __u8 direction __attribute__ ((packed)); - __u8 code __attribute__ ((packed)); - __u8 badlin __attribute__ ((packed)); - __u8 badmul __attribute__ ((packed)); - __u8 bor __attribute__ ((packed)); - __u8 fet __attribute__ ((packed)); - __u8 pollid[FAXIDLEN] __attribute__ ((packed)); - __u8 cq __attribute__ ((packed)); - __u8 cr __attribute__ ((packed)); - __u8 ctcrty __attribute__ ((packed)); - __u8 minsp __attribute__ ((packed)); - __u8 phcto __attribute__ ((packed)); - __u8 rel __attribute__ ((packed)); - __u8 nbc __attribute__ ((packed)); + __u8 phase; + __u8 direction; + __u8 code; + __u8 badlin; + __u8 badmul; + __u8 bor; + __u8 fet; + __u8 pollid[FAXIDLEN]; + __u8 cq; + __u8 cr; + __u8 ctcrty; + __u8 minsp; + __u8 phcto; + __u8 rel; + __u8 nbc; /* remote station parameters */ - __u8 r_resolution __attribute__ ((packed)); - __u8 r_rate __attribute__ ((packed)); - __u8 r_width __attribute__ ((packed)); - __u8 r_length __attribute__ ((packed)); - __u8 r_compression __attribute__ ((packed)); - __u8 r_ecm __attribute__ ((packed)); - __u8 r_binary __attribute__ ((packed)); - __u8 r_scantime __attribute__ ((packed)); - __u8 r_id[FAXIDLEN] __attribute__ ((packed)); - __u8 r_code __attribute__ ((packed)); -} T30_s; + __u8 r_resolution; + __u8 r_rate; + __u8 r_width; + __u8 r_length; + __u8 r_compression; + __u8 r_ecm; + __u8 r_binary; + __u8 r_scantime; + __u8 r_id[FAXIDLEN]; + __u8 r_code; +} __attribute__((packed)) T30_s; #define ISDN_TTY_FAX_CONN_IN 0 #define ISDN_TTY_FAX_CONN_OUT 1 diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b1e407a4fbda..ca7ff8fdd090 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -316,8 +316,6 @@ extern int randomize_va_space; #endif /* Trap pasters of __FUNCTION__ at compile-time */ -#if __GNUC__ > 2 || __GNUC_MINOR__ >= 95 #define __FUNCTION__ (__func__) -#endif #endif diff --git a/include/linux/key.h b/include/linux/key.h index 4d189e51bc6c..cbf464ad9589 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -177,6 +177,8 @@ struct key { /* * kernel managed key type definition */ +typedef int (*request_key_actor_t)(struct key *key, struct key *authkey, const char *op); + struct key_type { /* name of the type */ const char *name; @@ -218,6 +220,16 @@ struct key_type { */ long (*read)(const struct key *key, char __user *buffer, size_t buflen); + /* handle request_key() for this type instead of invoking + * /sbin/request-key (optional) + * - key is the key to instantiate + * - authkey is the authority to assume when instantiating this key + * - op is the operation to be done, usually "create" + * - the call must not return until the instantiation process has run + * its course + */ + request_key_actor_t request_key; + /* internal fields */ struct list_head link; /* link in types list */ }; diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index 8d7c59a29e09..3365945640c9 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -19,6 +19,7 @@ #define KEY_SPEC_USER_KEYRING -4 /* - key ID for UID-specific keyring */ #define KEY_SPEC_USER_SESSION_KEYRING -5 /* - key ID for UID-session keyring */ #define KEY_SPEC_GROUP_KEYRING -6 /* - key ID for GID-specific keyring */ +#define KEY_SPEC_REQKEY_AUTH_KEY -7 /* - key ID for assumed request_key auth key */ /* request-key default keyrings */ #define KEY_REQKEY_DEFL_NO_CHANGE -1 @@ -46,5 +47,7 @@ #define KEYCTL_INSTANTIATE 12 /* instantiate a partially constructed key */ #define KEYCTL_NEGATE 13 /* negate a partially constructed key */ #define KEYCTL_SET_REQKEY_KEYRING 14 /* set default request-key keyring */ +#define KEYCTL_SET_TIMEOUT 15 /* set key timeout */ +#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index dc4081b6f161..e251dc43d0f5 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -70,21 +70,15 @@ static inline void unregister_memory_notifier(struct notifier_block *nb) { } #else -extern int register_memory(struct memory_block *, struct mem_section *section, struct node *); extern int register_new_memory(struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); -extern int register_memory_notifier(struct notifier_block *nb); -extern void unregister_memory_notifier(struct notifier_block *nb); +extern int remove_memory_block(unsigned long, struct mem_section *, int); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) -extern int invalidate_phys_mapping(unsigned long, unsigned long); struct notifier_block; -extern int register_memory_notifier(struct notifier_block *nb); -extern void unregister_memory_notifier(struct notifier_block *nb); - #endif /* CONFIG_MEMORY_HOTPLUG */ #define hotplug_memory_notifier(fn, pri) { \ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ed00b278cb93..c7ac77e873b3 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -22,6 +22,9 @@ /* Flags for mbind */ #define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ +#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ +#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ +#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ #ifdef __KERNEL__ @@ -65,6 +68,7 @@ struct mempolicy { nodemask_t nodes; /* interleave */ /* undefined for default */ } v; + nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ }; /* @@ -141,12 +145,21 @@ void mpol_free_shared_policy(struct shared_policy *p); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx); -struct mempolicy *get_vma_policy(struct task_struct *task, - struct vm_area_struct *vma, unsigned long addr); - extern void numa_default_policy(void); extern void numa_policy_init(void); -extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); +extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new); +extern void mpol_rebind_task(struct task_struct *tsk, + const nodemask_t *new); +extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); +#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) + +#ifdef CONFIG_CPUSET +#define current_cpuset_is_being_rebound() \ + (cpuset_being_rebound == current->cpuset) +#else +#define current_cpuset_is_being_rebound() 0 +#endif + extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); @@ -159,6 +172,11 @@ static inline void check_highest_zone(int k) policy_zone = k; } +int do_migrate_pages(struct mm_struct *mm, + const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); + +extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */ + #else struct mempolicy {}; @@ -218,17 +236,35 @@ static inline void numa_default_policy(void) { } -static inline void numa_policy_rebind(const nodemask_t *old, +static inline void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new) { } +static inline void mpol_rebind_task(struct task_struct *tsk, + const nodemask_t *new) +{ +} + +static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) +{ +} + +#define set_cpuset_being_rebound(x) do {} while (0) + static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) { return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); } +static inline int do_migrate_pages(struct mm_struct *mm, + const nodemask_t *from_nodes, + const nodemask_t *to_nodes, int flags) +{ + return 0; +} + static inline void check_highest_zone(int k) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index bc01fff3aa01..df80e63903b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -223,24 +223,27 @@ struct page { * & limit reverse map searches. */ union { - unsigned long private; /* Mapping-private opaque data: - * usually used for buffer_heads - * if PagePrivate set; used for - * swp_entry_t if PageSwapCache - * When page is free, this indicates - * order in the buddy system. - */ + struct { + unsigned long private; /* Mapping-private opaque data: + * usually used for buffer_heads + * if PagePrivate set; used for + * swp_entry_t if PageSwapCache. + * When page is free, this + * indicates order in the buddy + * system. + */ + struct address_space *mapping; /* If low bit clear, points to + * inode address_space, or NULL. + * If page mapped as anonymous + * memory, low bit is set, and + * it points to anon_vma object: + * see PAGE_MAPPING_ANON below. + */ + }; #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS - spinlock_t ptl; + spinlock_t ptl; #endif - } u; - struct address_space *mapping; /* If low bit clear, points to - * inode address_space, or NULL. - * If page mapped as anonymous - * memory, low bit is set, and - * it points to anon_vma object: - * see PAGE_MAPPING_ANON below. - */ + }; pgoff_t index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! @@ -261,8 +264,8 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; -#define page_private(page) ((page)->u.private) -#define set_page_private(page, v) ((page)->u.private = (v)) +#define page_private(page) ((page)->private) +#define set_page_private(page, v) ((page)->private = (v)) /* * FIXME: take this include out, include page-flags.h in @@ -308,7 +311,7 @@ struct page { */ #define get_page_testone(p) atomic_inc_and_test(&(p)->_count) -#define set_page_count(p,v) atomic_set(&(p)->_count, v - 1) +#define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1) #define __put_page(p) atomic_dec(&(p)->_count) extern void FASTCALL(__page_cache_release(struct page *)); @@ -815,7 +818,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a * overflow into the next struct page (as it might with DEBUG_SPINLOCK). * When freeing, reset page->mapping so free_pages_check won't complain. */ -#define __pte_lockptr(page) &((page)->u.ptl) +#define __pte_lockptr(page) &((page)->ptl) #define pte_lock_init(_page) do { \ spin_lock_init(__pte_lockptr(_page)); \ } while (0) @@ -1036,5 +1039,12 @@ int in_gate_area_no_task(unsigned long addr); /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ #define OOM_DISABLE -17 +int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +int shrink_slab(unsigned long scanned, gfp_t gfp_mask, + unsigned long lru_pages); +void drop_pagecache(void); +void drop_slab(void); + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 47762ca695a5..49cc68af01f8 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -38,3 +38,25 @@ del_page_from_lru(struct zone *zone, struct page *page) zone->nr_inactive--; } } + +/* + * Isolate one page from the LRU lists. + * + * - zone->lru_lock must be held + */ +static inline int __isolate_lru_page(struct page *page) +{ + if (unlikely(!TestClearPageLRU(page))) + return 0; + + if (get_page_testone(page)) { + /* + * It is being freed elsewhere + */ + __put_page(page); + SetPageLRU(page); + return -ENOENT; + } + + return 1; +} diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c34f4a2c62f8..7e4ae6ab1977 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -38,7 +38,7 @@ struct pglist_data; #if defined(CONFIG_SMP) struct zone_padding { char x[0]; -} ____cacheline_maxaligned_in_smp; +} ____cacheline_internodealigned_in_smp; #define ZONE_PADDING(name) struct zone_padding name; #else #define ZONE_PADDING(name) @@ -233,7 +233,7 @@ struct zone { * rarely used fields: */ char *name; -} ____cacheline_maxaligned_in_smp; +} ____cacheline_internodealigned_in_smp; /* @@ -437,6 +437,8 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); #include <linux/topology.h> /* Returns the number of the current Node. */ diff --git a/include/linux/mount.h b/include/linux/mount.h index dd4e83eba933..b98a709f1794 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,7 +22,8 @@ #define MNT_NOEXEC 0x04 #define MNT_SHARED 0x10 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x20 /* if the vfsmount is a unbindable mount */ -#define MNT_PNODE_MASK 0x30 /* propogation flag mask */ + +#define MNT_PNODE_MASK (MNT_SHARED | MNT_UNBINDABLE) struct vfsmount { struct list_head mnt_hash; diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 941da5c016a0..e933e2a355ad 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -329,7 +329,8 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) extern void fat_cache_inval_inode(struct inode *inode); extern int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus); -extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks); /* fat/dir.c */ extern struct file_operations fat_dir_operations; diff --git a/include/linux/ncp.h b/include/linux/ncp.h index 99f77876b716..99f0adeeb3f3 100644 --- a/include/linux/ncp.h +++ b/include/linux/ncp.h @@ -20,29 +20,29 @@ #define NCP_DEALLOC_SLOT_REQUEST (0x5555) struct ncp_request_header { - __u16 type __attribute__((packed)); - __u8 sequence __attribute__((packed)); - __u8 conn_low __attribute__((packed)); - __u8 task __attribute__((packed)); - __u8 conn_high __attribute__((packed)); - __u8 function __attribute__((packed)); - __u8 data[0] __attribute__((packed)); -}; + __u16 type; + __u8 sequence; + __u8 conn_low; + __u8 task; + __u8 conn_high; + __u8 function; + __u8 data[0]; +} __attribute__((packed)); #define NCP_REPLY (0x3333) #define NCP_WATCHDOG (0x3E3E) #define NCP_POSITIVE_ACK (0x9999) struct ncp_reply_header { - __u16 type __attribute__((packed)); - __u8 sequence __attribute__((packed)); - __u8 conn_low __attribute__((packed)); - __u8 task __attribute__((packed)); - __u8 conn_high __attribute__((packed)); - __u8 completion_code __attribute__((packed)); - __u8 connection_state __attribute__((packed)); - __u8 data[0] __attribute__((packed)); -}; + __u16 type; + __u8 sequence; + __u8 conn_low; + __u8 task; + __u8 conn_high; + __u8 completion_code; + __u8 connection_state; + __u8 data[0]; +} __attribute__((packed)); #define NCP_VOLNAME_LEN (16) #define NCP_NUMBER_OF_VOLUMES (256) @@ -128,37 +128,37 @@ struct nw_nfs_info { }; struct nw_info_struct { - __u32 spaceAlloc __attribute__((packed)); - __le32 attributes __attribute__((packed)); - __u16 flags __attribute__((packed)); - __le32 dataStreamSize __attribute__((packed)); - __le32 totalStreamSize __attribute__((packed)); - __u16 numberOfStreams __attribute__((packed)); - __le16 creationTime __attribute__((packed)); - __le16 creationDate __attribute__((packed)); - __u32 creatorID __attribute__((packed)); - __le16 modifyTime __attribute__((packed)); - __le16 modifyDate __attribute__((packed)); - __u32 modifierID __attribute__((packed)); - __le16 lastAccessDate __attribute__((packed)); - __u16 archiveTime __attribute__((packed)); - __u16 archiveDate __attribute__((packed)); - __u32 archiverID __attribute__((packed)); - __u16 inheritedRightsMask __attribute__((packed)); - __le32 dirEntNum __attribute__((packed)); - __le32 DosDirNum __attribute__((packed)); - __u32 volNumber __attribute__((packed)); - __u32 EADataSize __attribute__((packed)); - __u32 EAKeyCount __attribute__((packed)); - __u32 EAKeySize __attribute__((packed)); - __u32 NSCreator __attribute__((packed)); - __u8 nameLen __attribute__((packed)); - __u8 entryName[256] __attribute__((packed)); + __u32 spaceAlloc; + __le32 attributes; + __u16 flags; + __le32 dataStreamSize; + __le32 totalStreamSize; + __u16 numberOfStreams; + __le16 creationTime; + __le16 creationDate; + __u32 creatorID; + __le16 modifyTime; + __le16 modifyDate; + __u32 modifierID; + __le16 lastAccessDate; + __u16 archiveTime; + __u16 archiveDate; + __u32 archiverID; + __u16 inheritedRightsMask; + __le32 dirEntNum; + __le32 DosDirNum; + __u32 volNumber; + __u32 EADataSize; + __u32 EAKeyCount; + __u32 EAKeySize; + __u32 NSCreator; + __u8 nameLen; + __u8 entryName[256]; /* libncp may depend on there being nothing after entryName */ #ifdef __KERNEL__ struct nw_nfs_info nfs; #endif -}; +} __attribute__((packed)); /* modify mask - use with MODIFY_DOS_INFO structure */ #define DM_ATTRIBUTES (cpu_to_le32(0x02)) @@ -176,26 +176,26 @@ struct nw_info_struct { #define DM_MAXIMUM_SPACE (cpu_to_le32(0x2000)) struct nw_modify_dos_info { - __le32 attributes __attribute__((packed)); - __le16 creationDate __attribute__((packed)); - __le16 creationTime __attribute__((packed)); - __u32 creatorID __attribute__((packed)); - __le16 modifyDate __attribute__((packed)); - __le16 modifyTime __attribute__((packed)); - __u32 modifierID __attribute__((packed)); - __u16 archiveDate __attribute__((packed)); - __u16 archiveTime __attribute__((packed)); - __u32 archiverID __attribute__((packed)); - __le16 lastAccessDate __attribute__((packed)); - __u16 inheritanceGrantMask __attribute__((packed)); - __u16 inheritanceRevokeMask __attribute__((packed)); - __u32 maximumSpace __attribute__((packed)); -}; + __le32 attributes; + __le16 creationDate; + __le16 creationTime; + __u32 creatorID; + __le16 modifyDate; + __le16 modifyTime; + __u32 modifierID; + __u16 archiveDate; + __u16 archiveTime; + __u32 archiverID; + __le16 lastAccessDate; + __u16 inheritanceGrantMask; + __u16 inheritanceRevokeMask; + __u32 maximumSpace; +} __attribute__((packed)); struct nw_search_sequence { - __u8 volNumber __attribute__((packed)); - __u32 dirBase __attribute__((packed)); - __u32 sequence __attribute__((packed)); -}; + __u8 volNumber; + __u32 dirBase; + __u32 sequence; +} __attribute__((packed)); #endif /* _LINUX_NCP_H */ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index be365e70ee99..4cf6088625c1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -168,6 +168,37 @@ void nf_log_packet(int pf, const struct net_device *out, struct nf_loginfo *li, const char *fmt, ...); + +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct sk_buff *), int thresh); + +/** + * nf_hook_thresh - call a netfilter hook + * + * Returns 1 if the hook has allowed the packet to pass. The function + * okfn must be invoked by the caller in this case. Any other return + * value indicates the packet has been consumed by the hook. + */ +static inline int nf_hook_thresh(int pf, unsigned int hook, + struct sk_buff **pskb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), int thresh) +{ +#ifndef CONFIG_NETFILTER_DEBUG + if (list_empty(&nf_hooks[pf][hook])) + return 1; +#endif + return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh); +} + +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN); +} /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with @@ -188,35 +219,17 @@ void nf_log_packet(int pf, /* This is gross, but inline doesn't cut it for avoiding the function call in fast path: gcc doesn't inline (needs value tracking?). --RR */ -#ifdef CONFIG_NETFILTER_DEBUG -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ -({int __ret; \ -if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \ - __ret = (okfn)(skb); \ -__ret;}) -#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ -({int __ret; \ -if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1) \ - __ret = (okfn)(skb); \ -__ret;}) -#else -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ -({int __ret; \ -if (list_empty(&nf_hooks[pf][hook]) || \ - (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \ - __ret = (okfn)(skb); \ -__ret;}) + +/* HX: It's slightly less gross now. */ + #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if (list_empty(&nf_hooks[pf][hook]) || \ - (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1) \ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\ __ret = (okfn)(skb); \ __ret;}) -#endif -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh); +#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ + NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN) /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, @@ -261,6 +274,20 @@ struct nf_queue_rerouter { extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); extern int nf_unregister_queue_rerouter(int pf); +#include <net/flow.h> +extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); + +static inline void +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) +{ +#ifdef CONFIG_IP_NF_NAT_NEEDED + void (*decodefn)(struct sk_buff *, struct flowi *); + + if (family == AF_INET && (decodefn = ip_nat_decode_session) != NULL) + decodefn(skb, fl); +#endif +} + #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> extern struct proc_dir_entry *proc_net_netfilter; @@ -268,7 +295,24 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) +static inline int nf_hook_thresh(int pf, unsigned int hook, + struct sk_buff **pskb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), int thresh) +{ + return okfn(*pskb); +} +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + return okfn(*pskb); +} static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +struct flowi; +static inline void +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} #endif /*CONFIG_NETFILTER*/ #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h new file mode 100644 index 000000000000..7fd1bec453f1 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_policy.h @@ -0,0 +1,52 @@ +#ifndef _IPT_POLICY_H +#define _IPT_POLICY_H + +#define IPT_POLICY_MAX_ELEM 4 + +enum ipt_policy_flags +{ + IPT_POLICY_MATCH_IN = 0x1, + IPT_POLICY_MATCH_OUT = 0x2, + IPT_POLICY_MATCH_NONE = 0x4, + IPT_POLICY_MATCH_STRICT = 0x8, +}; + +enum ipt_policy_modes +{ + IPT_POLICY_MODE_TRANSPORT, + IPT_POLICY_MODE_TUNNEL +}; + +struct ipt_policy_spec +{ + u_int8_t saddr:1, + daddr:1, + proto:1, + mode:1, + spi:1, + reqid:1; +}; + +struct ipt_policy_elem +{ + u_int32_t saddr; + u_int32_t smask; + u_int32_t daddr; + u_int32_t dmask; + u_int32_t spi; + u_int32_t reqid; + u_int8_t proto; + u_int8_t mode; + + struct ipt_policy_spec match; + struct ipt_policy_spec invert; +}; + +struct ipt_policy_info +{ + struct ipt_policy_elem pol[IPT_POLICY_MAX_ELEM]; + u_int16_t flags; + u_int16_t len; +}; + +#endif /* _IPT_POLICY_H */ diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h new file mode 100644 index 000000000000..5a93afcd2ff1 --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_policy.h @@ -0,0 +1,52 @@ +#ifndef _IP6T_POLICY_H +#define _IP6T_POLICY_H + +#define IP6T_POLICY_MAX_ELEM 4 + +enum ip6t_policy_flags +{ + IP6T_POLICY_MATCH_IN = 0x1, + IP6T_POLICY_MATCH_OUT = 0x2, + IP6T_POLICY_MATCH_NONE = 0x4, + IP6T_POLICY_MATCH_STRICT = 0x8, +}; + +enum ip6t_policy_modes +{ + IP6T_POLICY_MODE_TRANSPORT, + IP6T_POLICY_MODE_TUNNEL +}; + +struct ip6t_policy_spec +{ + u_int8_t saddr:1, + daddr:1, + proto:1, + mode:1, + spi:1, + reqid:1; +}; + +struct ip6t_policy_elem +{ + struct in6_addr saddr; + struct in6_addr smask; + struct in6_addr daddr; + struct in6_addr dmask; + u_int32_t spi; + u_int32_t reqid; + u_int8_t proto; + u_int8_t mode; + + struct ip6t_policy_spec match; + struct ip6t_policy_spec invert; +}; + +struct ip6t_policy_info +{ + struct ip6t_policy_elem pol[IP6T_POLICY_MAX_ELEM]; + u_int16_t flags; + u_int16_t len; +}; + +#endif /* _IP6T_POLICY_H */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index def32c5715be..8eb7fa76c1d0 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -5,6 +5,9 @@ * pages. A pagevec is a multipage container which is used for that. */ +#ifndef _LINUX_PAGEVEC_H +#define _LINUX_PAGEVEC_H + /* 14 pointers + two long's align the pagevec structure to a power of two */ #define PAGEVEC_SIZE 14 @@ -83,3 +86,5 @@ static inline void pagevec_lru_add(struct pagevec *pvec) if (pagevec_count(pvec)) __pagevec_lru_add(pvec); } + +#endif /* _LINUX_PAGEVEC_H */ diff --git a/include/linux/parport.h b/include/linux/parport.h index f7ff0b0c4031..f67f838a3a1f 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -236,12 +236,14 @@ struct pardevice { /* IEEE1284 information */ -/* IEEE1284 phases */ +/* IEEE1284 phases. These are exposed to userland through ppdev IOCTL + * PP[GS]ETPHASE, so do not change existing values. */ enum ieee1284_phase { IEEE1284_PH_FWD_DATA, IEEE1284_PH_FWD_IDLE, IEEE1284_PH_TERMINATE, IEEE1284_PH_NEGOTIATION, + IEEE1284_PH_HBUSY_DNA, IEEE1284_PH_REV_IDLE, IEEE1284_PH_HBUSY_DAVAIL, IEEE1284_PH_REV_DATA, diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fb8d2d24e4bb..cb9039a21f2a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -19,7 +19,6 @@ struct percpu_data { void *ptrs[NR_CPUS]; - void *blkp; }; /* @@ -33,14 +32,14 @@ struct percpu_data { (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -extern void *__alloc_percpu(size_t size, size_t align); +extern void *__alloc_percpu(size_t size); extern void free_percpu(const void *); #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static inline void *__alloc_percpu(size_t size, size_t align) +static inline void *__alloc_percpu(size_t size) { void *ret = kmalloc(size, GFP_KERNEL); if (ret) @@ -55,7 +54,6 @@ static inline void free_percpu(const void *ptr) #endif /* CONFIG_SMP */ /* Simple wrapper for the common case: zeros memory. */ -#define alloc_percpu(type) \ - ((type *)(__alloc_percpu(sizeof(type), __alignof__(type)))) +#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index b2b3dba1298d..9d5cd106b344 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -20,8 +20,6 @@ #define PTRACE_DETACH 0x11 #define PTRACE_SYSCALL 24 -#define PTRACE_SYSEMU 31 -#define PTRACE_SYSEMU_SINGLESTEP 32 /* 0x4200-0x4300 are reserved for architecture-independent additions. */ #define PTRACE_SETOPTIONS 0x4200 @@ -80,6 +78,8 @@ extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); +extern struct task_struct *ptrace_get_task_struct(pid_t pid); +extern int ptrace_traceme(void); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern int ptrace_attach(struct task_struct *tsk); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 36e5d269612f..c57ff2fcb30a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -19,6 +19,7 @@ #ifndef _LINUX_RADIX_TREE_H #define _LINUX_RADIX_TREE_H +#include <linux/sched.h> #include <linux/preempt.h> #include <linux/types.h> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a471f3bb713e..51747cd88d1a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -65,7 +65,7 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ int next_pending; /* Is the next batch already waiting? */ -} ____cacheline_maxaligned_in_smp; +} ____cacheline_internodealigned_in_smp; /* Is batch a before batch b ? */ static inline int rcu_batch_before(long a, long b) diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h deleted file mode 100644 index e1adbba14b67..000000000000 --- a/include/linux/rcuref.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * rcuref.h - * - * Reference counting for elements of lists/arrays protected by - * RCU. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2005 - * - * Author: Dipankar Sarma <dipankar@in.ibm.com> - * Ravikiran Thirumalai <kiran_th@gmail.com> - * - * See Documentation/RCU/rcuref.txt for detailed user guide. - * - */ - -#ifndef _RCUREF_H_ -#define _RCUREF_H_ - -#ifdef __KERNEL__ - -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <asm/atomic.h> - -/* - * These APIs work on traditional atomic_t counters used in the - * kernel for reference counting. Under special circumstances - * where a lock-free get() operation races with a put() operation - * these APIs can be used. See Documentation/RCU/rcuref.txt. - */ - -#ifdef __HAVE_ARCH_CMPXCHG - -/** - * rcuref_inc - increment refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline void rcuref_inc(atomic_t *rcuref) -{ - atomic_inc(rcuref); -} - -/** - * rcuref_dec - decrement refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline void rcuref_dec(atomic_t *rcuref) -{ - atomic_dec(rcuref); -} - -/** - * rcuref_dec_and_test - decrement refcount for object and test - * @rcuref: reference counter in the object. - * @release: pointer to the function that will clean up the object - * when the last reference to the object is released. - * This pointer is required. - * - * Decrement the refcount, and if 0, return 1. Else return 0. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline int rcuref_dec_and_test(atomic_t *rcuref) -{ - return atomic_dec_and_test(rcuref); -} - -/* - * cmpxchg is needed on UP too, if deletions to the list/array can happen - * in interrupt context. - */ - -/** - * rcuref_inc_lf - Take reference to an object in a read-side - * critical section protected by RCU. - * @rcuref: reference counter in the object in question. - * - * Try and increment the refcount by 1. The increment might fail if - * the reference counter has been through a 1 to 0 transition and - * is no longer part of the lock-free list. - * Returns non-zero on successful increment and zero otherwise. - */ -static inline int rcuref_inc_lf(atomic_t *rcuref) -{ - int c, old; - c = atomic_read(rcuref); - while (c && (old = cmpxchg(&rcuref->counter, c, c + 1)) != c) - c = old; - return c; -} - -#else /* !__HAVE_ARCH_CMPXCHG */ - -extern spinlock_t __rcuref_hash[]; - -/* - * Use a hash table of locks to protect the reference count - * since cmpxchg is not available in this arch. - */ -#ifdef CONFIG_SMP -#define RCUREF_HASH_SIZE 4 -#define RCUREF_HASH(k) \ - (&__rcuref_hash[(((unsigned long)k)>>8) & (RCUREF_HASH_SIZE-1)]) -#else -#define RCUREF_HASH_SIZE 1 -#define RCUREF_HASH(k) &__rcuref_hash[0] -#endif /* CONFIG_SMP */ - -/** - * rcuref_inc - increment refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline void rcuref_inc(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter += 1; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); -} - -/** - * rcuref_dec - decrement refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline void rcuref_dec(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter -= 1; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); -} - -/** - * rcuref_dec_and_test - decrement refcount for object and test - * @rcuref: reference counter in the object. - * @release: pointer to the function that will clean up the object - * when the last reference to the object is released. - * This pointer is required. - * - * Decrement the refcount, and if 0, return 1. Else return 0. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline int rcuref_dec_and_test(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter--; - if (!rcuref->counter) { - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return 1; - } else { - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return 0; - } -} - -/** - * rcuref_inc_lf - Take reference to an object of a lock-free collection - * by traversing a lock-free list/array. - * @rcuref: reference counter in the object in question. - * - * Try and increment the refcount by 1. The increment might fail if - * the reference counter has been through a 1 to 0 transition and - * object is no longer part of the lock-free list. - * Returns non-zero on successful increment and zero otherwise. - */ -static inline int rcuref_inc_lf(atomic_t *rcuref) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - if (rcuref->counter) - ret = rcuref->counter++; - else - ret = 0; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return ret; -} - - -#endif /* !__HAVE_ARCH_CMPXCHG */ - -#endif /* __KERNEL__ */ -#endif /* _RCUREF_H_ */ diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h index fb7e80737325..7342e66247fb 100644 --- a/include/linux/relayfs_fs.h +++ b/include/linux/relayfs_fs.h @@ -65,20 +65,6 @@ struct rchan }; /* - * Relayfs inode - */ -struct relayfs_inode_info -{ - struct inode vfs_inode; - struct rchan_buf *buf; -}; - -static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode) -{ - return container_of(inode, struct relayfs_inode_info, vfs_inode); -} - -/* * Relay channel client callbacks */ struct rchan_callbacks @@ -124,6 +110,46 @@ struct rchan_callbacks */ void (*buf_unmapped)(struct rchan_buf *buf, struct file *filp); + /* + * create_buf_file - create file to represent a relayfs channel buffer + * @filename: the name of the file to create + * @parent: the parent of the file to create + * @mode: the mode of the file to create + * @buf: the channel buffer + * @is_global: outparam - set non-zero if the buffer should be global + * + * Called during relay_open(), once for each per-cpu buffer, + * to allow the client to create a file to be used to + * represent the corresponding channel buffer. If the file is + * created outside of relayfs, the parent must also exist in + * that filesystem. + * + * The callback should return the dentry of the file created + * to represent the relay buffer. + * + * Setting the is_global outparam to a non-zero value will + * cause relay_open() to create a single global buffer rather + * than the default set of per-cpu buffers. + * + * See Documentation/filesystems/relayfs.txt for more info. + */ + struct dentry *(*create_buf_file)(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global); + + /* + * remove_buf_file - remove file representing a relayfs channel buffer + * @dentry: the dentry of the file to remove + * + * Called during relay_close(), once for each per-cpu buffer, + * to allow the client to remove a file used to represent a + * channel buffer. + * + * The callback should return 0 if successful, negative if not. + */ + int (*remove_buf_file)(struct dentry *dentry); }; /* @@ -148,6 +174,12 @@ extern size_t relay_switch_subbuf(struct rchan_buf *buf, extern struct dentry *relayfs_create_dir(const char *name, struct dentry *parent); extern int relayfs_remove_dir(struct dentry *dentry); +extern struct dentry *relayfs_create_file(const char *name, + struct dentry *parent, + int mode, + struct file_operations *fops, + void *data); +extern int relayfs_remove_file(struct dentry *dentry); /** * relay_write - write data into the channel @@ -247,10 +279,9 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf, } /* - * exported relayfs file operations, fs/relayfs/inode.c + * exported relay file operations, fs/relayfs/inode.c */ - -extern struct file_operations relayfs_file_operations; +extern struct file_operations relay_file_operations; #endif /* _LINUX_RELAYFS_FS_H */ diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 3bd7cce19e26..157d7e3236b5 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -21,6 +21,7 @@ #include <linux/list.h> #include <linux/errno.h> #include <linux/device.h> +#include <linux/string.h> #include <linux/rio.h> extern int __rio_local_read_config_32(struct rio_mport *port, u32 offset, diff --git a/include/linux/rtc.h b/include/linux/rtc.h index e1aaf1fac8e0..0b2ba67ff13c 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -11,6 +11,8 @@ #ifndef _LINUX_RTC_H_ #define _LINUX_RTC_H_ +#include <linux/interrupt.h> + /* * The struct used to pass data via the following ioctl. Similar to the * struct tm in <time.h>, but it needs to be here so that the kernel @@ -102,6 +104,7 @@ int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); void rtc_get_rtc_time(struct rtc_time *rtc_tm); +irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); #endif /* __KERNEL__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 7da33619d5d0..78eb92ae4d94 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include <linux/percpu.h> #include <linux/topology.h> #include <linux/seccomp.h> +#include <linux/rcupdate.h> #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ @@ -350,8 +351,16 @@ struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; + struct rcu_head rcu; }; +extern void sighand_free_cb(struct rcu_head *rhp); + +static inline void sighand_free(struct sighand_struct *sp) +{ + call_rcu(&sp->rcu, sighand_free_cb); +} + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -762,6 +771,7 @@ struct task_struct { unsigned keep_capabilities:1; struct user_struct *user; #ifdef CONFIG_KEYS + struct key *request_key_auth; /* assumed request_key authority */ struct key *thread_keyring; /* keyring private to this thread */ unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif @@ -844,6 +854,7 @@ struct task_struct { int cpuset_mems_generation; #endif atomic_t fs_excl; /* holding fs exclusive resources */ + struct rcu_head rcu; }; static inline pid_t process_group(struct task_struct *tsk) @@ -867,8 +878,14 @@ static inline int pid_alive(struct task_struct *p) extern void free_task(struct task_struct *tsk); extern void __put_task_struct(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) -#define put_task_struct(tsk) \ -do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) + +extern void __put_task_struct_cb(struct rcu_head *rhp); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + call_rcu(&t->rcu, __put_task_struct_cb); +} /* * Per process flags @@ -895,6 +912,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ +#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h new file mode 100644 index 000000000000..76850b75b3f6 --- /dev/null +++ b/include/linux/screen_info.h @@ -0,0 +1,77 @@ +#ifndef _SCREEN_INFO_H +#define _SCREEN_INFO_H + +#include <linux/types.h> + +/* + * These are set up by the setup-routine at boot-time: + */ + +struct screen_info { + u8 orig_x; /* 0x00 */ + u8 orig_y; /* 0x01 */ + u16 dontuse1; /* 0x02 -- EXT_MEM_K sits here */ + u16 orig_video_page; /* 0x04 */ + u8 orig_video_mode; /* 0x06 */ + u8 orig_video_cols; /* 0x07 */ + u16 unused2; /* 0x08 */ + u16 orig_video_ega_bx; /* 0x0a */ + u16 unused3; /* 0x0c */ + u8 orig_video_lines; /* 0x0e */ + u8 orig_video_isVGA; /* 0x0f */ + u16 orig_video_points; /* 0x10 */ + + /* VESA graphic mode -- linear frame buffer */ + u16 lfb_width; /* 0x12 */ + u16 lfb_height; /* 0x14 */ + u16 lfb_depth; /* 0x16 */ + u32 lfb_base; /* 0x18 */ + u32 lfb_size; /* 0x1c */ + u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ + u16 lfb_linelength; /* 0x24 */ + u8 red_size; /* 0x26 */ + u8 red_pos; /* 0x27 */ + u8 green_size; /* 0x28 */ + u8 green_pos; /* 0x29 */ + u8 blue_size; /* 0x2a */ + u8 blue_pos; /* 0x2b */ + u8 rsvd_size; /* 0x2c */ + u8 rsvd_pos; /* 0x2d */ + u16 vesapm_seg; /* 0x2e */ + u16 vesapm_off; /* 0x30 */ + u16 pages; /* 0x32 */ + u16 vesa_attributes; /* 0x34 */ + u32 capabilities; /* 0x36 */ + /* 0x3a -- 0x3f reserved for future expansion */ +}; + +extern struct screen_info screen_info; + +#define ORIG_X (screen_info.orig_x) +#define ORIG_Y (screen_info.orig_y) +#define ORIG_VIDEO_MODE (screen_info.orig_video_mode) +#define ORIG_VIDEO_COLS (screen_info.orig_video_cols) +#define ORIG_VIDEO_EGA_BX (screen_info.orig_video_ega_bx) +#define ORIG_VIDEO_LINES (screen_info.orig_video_lines) +#define ORIG_VIDEO_ISVGA (screen_info.orig_video_isVGA) +#define ORIG_VIDEO_POINTS (screen_info.orig_video_points) + +#define VIDEO_TYPE_MDA 0x10 /* Monochrome Text Display */ +#define VIDEO_TYPE_CGA 0x11 /* CGA Display */ +#define VIDEO_TYPE_EGAM 0x20 /* EGA/VGA in Monochrome Mode */ +#define VIDEO_TYPE_EGAC 0x21 /* EGA in Color Mode */ +#define VIDEO_TYPE_VGAC 0x22 /* VGA+ in Color Mode */ +#define VIDEO_TYPE_VLFB 0x23 /* VESA VGA in graphic mode */ + +#define VIDEO_TYPE_PICA_S3 0x30 /* ACER PICA-61 local S3 video */ +#define VIDEO_TYPE_MIPS_G364 0x31 /* MIPS Magnum 4000 G364 video */ +#define VIDEO_TYPE_SGI 0x33 /* Various SGI graphics hardware */ + +#define VIDEO_TYPE_TGAC 0x40 /* DEC TGA */ + +#define VIDEO_TYPE_SUN 0x50 /* Sun frame buffer. */ +#define VIDEO_TYPE_SUNPCI 0x51 /* Sun PCI based frame buffer. */ + +#define VIDEO_TYPE_PMAC 0x60 /* PowerMacintosh frame buffer. */ + +#endif /* _SCREEN_INFO_H */ diff --git a/include/linux/sdla.h b/include/linux/sdla.h index 3b6afb8caa42..564acd3a71c1 100644 --- a/include/linux/sdla.h +++ b/include/linux/sdla.h @@ -293,46 +293,46 @@ void sdla(void *cfg_info, char *dev, struct frad_conf *conf, int quiet); #define SDLA_S508_INTEN 0x10 struct sdla_cmd { - char opp_flag __attribute__((packed)); - char cmd __attribute__((packed)); - short length __attribute__((packed)); - char retval __attribute__((packed)); - short dlci __attribute__((packed)); - char flags __attribute__((packed)); - short rxlost_int __attribute__((packed)); - long rxlost_app __attribute__((packed)); - char reserve[2] __attribute__((packed)); - char data[SDLA_MAX_DATA] __attribute__((packed)); /* transfer data buffer */ -}; + char opp_flag; + char cmd; + short length; + char retval; + short dlci; + char flags; + short rxlost_int; + long rxlost_app; + char reserve[2]; + char data[SDLA_MAX_DATA]; /* transfer data buffer */ +} __attribute__((packed)); struct intr_info { - char flags __attribute__((packed)); - short txlen __attribute__((packed)); - char irq __attribute__((packed)); - char flags2 __attribute__((packed)); - short timeout __attribute__((packed)); -}; + char flags; + short txlen; + char irq; + char flags2; + short timeout; +} __attribute__((packed)); /* found in the 508's control window at RXBUF_INFO */ struct buf_info { - unsigned short rse_num __attribute__((packed)); - unsigned long rse_base __attribute__((packed)); - unsigned long rse_next __attribute__((packed)); - unsigned long buf_base __attribute__((packed)); - unsigned short reserved __attribute__((packed)); - unsigned long buf_top __attribute__((packed)); -}; + unsigned short rse_num; + unsigned long rse_base; + unsigned long rse_next; + unsigned long buf_base; + unsigned short reserved; + unsigned long buf_top; +} __attribute__((packed)); /* structure pointed to by rse_base in RXBUF_INFO struct */ struct buf_entry { - char opp_flag __attribute__((packed)); - short length __attribute__((packed)); - short dlci __attribute__((packed)); - char flags __attribute__((packed)); - short timestamp __attribute__((packed)); - short reserved[2] __attribute__((packed)); - long buf_addr __attribute__((packed)); -}; + char opp_flag; + short length; + short dlci; + char flags; + short timestamp; + short reserved[2]; + long buf_addr; +} __attribute__((packed)); #endif diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index dc89116bb1ca..cd2773b29a64 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -26,11 +26,7 @@ static inline int has_secure_computing(struct thread_info *ti) #else /* CONFIG_SECCOMP */ -#if (__GNUC__ > 2) - typedef struct { } seccomp_t; -#else - typedef struct { int gcc_is_buggy; } seccomp_t; -#endif +typedef struct { } seccomp_t; #define secure_computing(x) do { } while (0) /* static inline to preserve typechecking */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 5dd5f02c5c5f..b7d093520bb6 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -18,6 +18,19 @@ #define SA_PROBE SA_ONESHOT #define SA_SAMPLE_RANDOM SA_RESTART #define SA_SHIRQ 0x04000000 +/* + * As above, these correspond to the IORESOURCE_IRQ_* defines in + * linux/ioport.h to select the interrupt line behaviour. When + * requesting an interrupt without specifying a SA_TRIGGER, the + * setting should be assumed to be "as already configured", which + * may be as per machine or firmware initialisation. + */ +#define SA_TRIGGER_LOW 0x00000008 +#define SA_TRIGGER_HIGH 0x00000004 +#define SA_TRIGGER_FALLING 0x00000002 +#define SA_TRIGGER_RISING 0x00000001 +#define SA_TRIGGER_MASK (SA_TRIGGER_HIGH|SA_TRIGGER_LOW|\ + SA_TRIGGER_RISING|SA_TRIGGER_FALLING) /* * Real Time signals may be queued. @@ -81,6 +94,23 @@ static inline int sigfindinword(unsigned long word) #endif /* __HAVE_ARCH_SIG_BITOPS */ +static inline int sigisemptyset(sigset_t *set) +{ + extern void _NSIG_WORDS_is_unsupported_size(void); + switch (_NSIG_WORDS) { + case 4: + return (set->sig[3] | set->sig[2] | + set->sig[1] | set->sig[0]) == 0; + case 2: + return (set->sig[1] | set->sig[0]) == 0; + case 1: + return set->sig[0] == 0; + default: + _NSIG_WORDS_is_unsupported_size(); + return 0; + } +} + #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 483cfc47ec34..e5fd66c5650b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -251,7 +251,7 @@ struct sk_buff { * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ - char cb[40]; + char cb[48]; unsigned int len, data_len, diff --git a/include/linux/slab.h b/include/linux/slab.h index d1ea4051b996..1fb77a9cc148 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -53,6 +53,8 @@ typedef struct kmem_cache kmem_cache_t; #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */ #define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ +#ifndef CONFIG_SLOB + /* prototypes */ extern void __init kmem_cache_init(void); @@ -134,6 +136,39 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) extern int FASTCALL(kmem_cache_reap(int)); extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); +#else /* CONFIG_SLOB */ + +/* SLOB allocator routines */ + +void kmem_cache_init(void); +struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags); +struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, + unsigned long, + void (*)(void *, struct kmem_cache *, unsigned long), + void (*)(void *, struct kmem_cache *, unsigned long)); +int kmem_cache_destroy(struct kmem_cache *c); +void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags); +void kmem_cache_free(struct kmem_cache *c, void *b); +const char *kmem_cache_name(struct kmem_cache *); +void *kmalloc(size_t size, gfp_t flags); +void *kzalloc(size_t size, gfp_t flags); +void kfree(const void *m); +unsigned int ksize(const void *m); +unsigned int kmem_cache_size(struct kmem_cache *c); + +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + return kzalloc(n * size, flags); +} + +#define kmem_cache_shrink(d) (0) +#define kmem_cache_reap(a) +#define kmem_ptr_validate(a, b) (0) +#define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f) +#define kmalloc_node(s, f, n) kmalloc(s, f) + +#endif /* CONFIG_SLOB */ + /* System wide caches */ extern kmem_cache_t *vm_area_cachep; extern kmem_cache_t *names_cachep; diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index def2d173a8db..04135b0e198e 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -22,30 +22,16 @@ typedef struct { #else -/* - * All gcc 2.95 versions and early versions of 2.96 have a nasty bug - * with empty initializers. - */ -#if (__GNUC__ > 2) typedef struct { } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { } -#else -typedef struct { int gcc_is_buggy; } raw_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED (raw_spinlock_t) { 0 } -#endif #endif -#if (__GNUC__ > 2) typedef struct { /* no debug version on UP */ } raw_rwlock_t; #define __RAW_RW_LOCK_UNLOCKED { } -#else -typedef struct { int gcc_is_buggy; } raw_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED (raw_rwlock_t) { 0 } -#endif #endif /* __LINUX_SPINLOCK_TYPES_UP_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 556617bcf7ac..389d1c382e20 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -175,6 +175,13 @@ extern int try_to_free_pages(struct zone **, gfp_t); extern int shrink_all_memory(int); extern int vm_swappiness; +#ifdef CONFIG_MIGRATION +extern int isolate_lru_page(struct page *p); +extern int putback_lru_pages(struct list_head *l); +extern int migrate_pages(struct list_head *l, struct list_head *t, + struct list_head *moved, struct list_head *failed); +#endif + #ifdef CONFIG_MMU /* linux/mm/shmem.c */ extern int shmem_unuse(swp_entry_t entry, struct page *page); @@ -192,7 +199,7 @@ extern int rw_swap_page_sync(int, swp_entry_t, struct page *); extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *); +extern int add_to_swap(struct page *, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); extern int move_to_swap_cache(struct page *, swp_entry_t); diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 763bd290f28d..1b7cd8d1a71b 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -1,7 +1,7 @@ /* * SyncLink Multiprotocol Serial Adapter Driver * - * $Id: synclink.h,v 3.6 2002/02/20 21:58:20 paulkf Exp $ + * $Id: synclink.h,v 3.10 2005/11/08 19:50:54 paulkf Exp $ * * Copyright (C) 1998-2000 by Microgate Corporation * @@ -128,10 +128,14 @@ #define MGSL_BUS_TYPE_EISA 2 #define MGSL_BUS_TYPE_PCI 5 +#define MGSL_INTERFACE_MASK 0xf #define MGSL_INTERFACE_DISABLE 0 #define MGSL_INTERFACE_RS232 1 #define MGSL_INTERFACE_V35 2 #define MGSL_INTERFACE_RS422 3 +#define MGSL_INTERFACE_RTS_EN 0x10 +#define MGSL_INTERFACE_LL 0x20 +#define MGSL_INTERFACE_RL 0x40 typedef struct _MGSL_PARAMS { @@ -163,6 +167,9 @@ typedef struct _MGSL_PARAMS #define SYNCLINK_DEVICE_ID 0x0010 #define MGSCC_DEVICE_ID 0x0020 #define SYNCLINK_SCA_DEVICE_ID 0x0030 +#define SYNCLINK_GT_DEVICE_ID 0x0070 +#define SYNCLINK_GT4_DEVICE_ID 0x0080 +#define SYNCLINK_AC_DEVICE_ID 0x0090 #define MGSL_MAX_SERIAL_NUMBER 30 /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c7007b1db91d..e910d1a481df 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -511,5 +511,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio); asmlinkage long sys_ioprio_get(int which, int who); asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, unsigned long maxnode); +asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *from, const unsigned long __user *to); #endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a9b80fc7f0f3..7f472127b7b5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -180,6 +180,8 @@ enum VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ + VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ + VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ }; diff --git a/include/linux/tty.h b/include/linux/tty.h index 1267f88ece6e..57449704a47b 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -23,6 +23,7 @@ #include <linux/workqueue.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> +#include <linux/screen_info.h> #include <asm/system.h> @@ -37,77 +38,6 @@ #define NR_LDISCS 16 /* - * These are set up by the setup-routine at boot-time: - */ - -struct screen_info { - u8 orig_x; /* 0x00 */ - u8 orig_y; /* 0x01 */ - u16 dontuse1; /* 0x02 -- EXT_MEM_K sits here */ - u16 orig_video_page; /* 0x04 */ - u8 orig_video_mode; /* 0x06 */ - u8 orig_video_cols; /* 0x07 */ - u16 unused2; /* 0x08 */ - u16 orig_video_ega_bx; /* 0x0a */ - u16 unused3; /* 0x0c */ - u8 orig_video_lines; /* 0x0e */ - u8 orig_video_isVGA; /* 0x0f */ - u16 orig_video_points; /* 0x10 */ - - /* VESA graphic mode -- linear frame buffer */ - u16 lfb_width; /* 0x12 */ - u16 lfb_height; /* 0x14 */ - u16 lfb_depth; /* 0x16 */ - u32 lfb_base; /* 0x18 */ - u32 lfb_size; /* 0x1c */ - u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ - u16 lfb_linelength; /* 0x24 */ - u8 red_size; /* 0x26 */ - u8 red_pos; /* 0x27 */ - u8 green_size; /* 0x28 */ - u8 green_pos; /* 0x29 */ - u8 blue_size; /* 0x2a */ - u8 blue_pos; /* 0x2b */ - u8 rsvd_size; /* 0x2c */ - u8 rsvd_pos; /* 0x2d */ - u16 vesapm_seg; /* 0x2e */ - u16 vesapm_off; /* 0x30 */ - u16 pages; /* 0x32 */ - u16 vesa_attributes; /* 0x34 */ - u32 capabilities; /* 0x36 */ - /* 0x3a -- 0x3f reserved for future expansion */ -}; - -extern struct screen_info screen_info; - -#define ORIG_X (screen_info.orig_x) -#define ORIG_Y (screen_info.orig_y) -#define ORIG_VIDEO_MODE (screen_info.orig_video_mode) -#define ORIG_VIDEO_COLS (screen_info.orig_video_cols) -#define ORIG_VIDEO_EGA_BX (screen_info.orig_video_ega_bx) -#define ORIG_VIDEO_LINES (screen_info.orig_video_lines) -#define ORIG_VIDEO_ISVGA (screen_info.orig_video_isVGA) -#define ORIG_VIDEO_POINTS (screen_info.orig_video_points) - -#define VIDEO_TYPE_MDA 0x10 /* Monochrome Text Display */ -#define VIDEO_TYPE_CGA 0x11 /* CGA Display */ -#define VIDEO_TYPE_EGAM 0x20 /* EGA/VGA in Monochrome Mode */ -#define VIDEO_TYPE_EGAC 0x21 /* EGA in Color Mode */ -#define VIDEO_TYPE_VGAC 0x22 /* VGA+ in Color Mode */ -#define VIDEO_TYPE_VLFB 0x23 /* VESA VGA in graphic mode */ - -#define VIDEO_TYPE_PICA_S3 0x30 /* ACER PICA-61 local S3 video */ -#define VIDEO_TYPE_MIPS_G364 0x31 /* MIPS Magnum 4000 G364 video */ -#define VIDEO_TYPE_SGI 0x33 /* Various SGI graphics hardware */ - -#define VIDEO_TYPE_TGAC 0x40 /* DEC TGA */ - -#define VIDEO_TYPE_SUN 0x50 /* Sun frame buffer. */ -#define VIDEO_TYPE_SUNPCI 0x51 /* Sun PCI based frame buffer. */ - -#define VIDEO_TYPE_PMAC 0x60 /* PowerMacintosh frame buffer. */ - -/* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character * isn't in use (eg VINTR has no character etc) diff --git a/include/linux/wavefront.h b/include/linux/wavefront.h index 61bd0fd35240..51ab3c933acd 100644 --- a/include/linux/wavefront.h +++ b/include/linux/wavefront.h @@ -434,22 +434,22 @@ typedef struct wf_multisample { } wavefront_multisample; typedef struct wf_alias { - INT16 OriginalSample __attribute__ ((packed)); - - struct wf_sample_offset sampleStartOffset __attribute__ ((packed)); - struct wf_sample_offset loopStartOffset __attribute__ ((packed)); - struct wf_sample_offset sampleEndOffset __attribute__ ((packed)); - struct wf_sample_offset loopEndOffset __attribute__ ((packed)); - - INT16 FrequencyBias __attribute__ ((packed)); - - UCHAR8 SampleResolution:2 __attribute__ ((packed)); - UCHAR8 Unused1:1 __attribute__ ((packed)); - UCHAR8 Loop:1 __attribute__ ((packed)); - UCHAR8 Bidirectional:1 __attribute__ ((packed)); - UCHAR8 Unused2:1 __attribute__ ((packed)); - UCHAR8 Reverse:1 __attribute__ ((packed)); - UCHAR8 Unused3:1 __attribute__ ((packed)); + INT16 OriginalSample; + + struct wf_sample_offset sampleStartOffset; + struct wf_sample_offset loopStartOffset; + struct wf_sample_offset sampleEndOffset; + struct wf_sample_offset loopEndOffset; + + INT16 FrequencyBias; + + UCHAR8 SampleResolution:2; + UCHAR8 Unused1:1; + UCHAR8 Loop:1; + UCHAR8 Bidirectional:1; + UCHAR8 Unused2:1; + UCHAR8 Reverse:1; + UCHAR8 Unused3:1; /* This structure is meant to be padded only to 16 bits on their original. Of course, whoever wrote their documentation didn't @@ -460,8 +460,8 @@ typedef struct wf_alias { standard 16->32 bit issues. */ - UCHAR8 sixteen_bit_padding __attribute__ ((packed)); -} wavefront_alias; + UCHAR8 sixteen_bit_padding; +} __attribute__((packed)) wavefront_alias; typedef struct wf_drum { UCHAR8 PatchNumber; diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ac39d04d027c..86b111300231 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -65,6 +65,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work)); extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay)); extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay); +extern int schedule_on_each_cpu(void (*func)(void *info), void *info); extern void flush_scheduled_work(void); extern int current_is_keventd(void); extern int keventd_up(void); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b096159086e8..beaef5c7a0ea 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -103,7 +103,9 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping); int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int sync_page_range(struct inode *inode, struct address_space *mapping, - loff_t pos, size_t count); + loff_t pos, loff_t count); +int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, + loff_t pos, loff_t count); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 86e8e86e624a..5a86e78081bf 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -88,8 +88,8 @@ struct dn_dev { struct net_device *dev; struct dn_dev_parms parms; char use_long; - struct timer_list timer; - unsigned long t3; + struct timer_list timer; + unsigned long t3; struct neigh_parms *neigh_parms; unsigned char addr[ETH_ALEN]; struct neighbour *router; /* Default router on circuit */ @@ -99,57 +99,57 @@ struct dn_dev { struct dn_short_packet { - unsigned char msgflg __attribute__((packed)); - unsigned short dstnode __attribute__((packed)); - unsigned short srcnode __attribute__((packed)); - unsigned char forward __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstnode; + unsigned short srcnode; + unsigned char forward; +} __attribute__((packed)); struct dn_long_packet { - unsigned char msgflg __attribute__((packed)); - unsigned char d_area __attribute__((packed)); - unsigned char d_subarea __attribute__((packed)); - unsigned char d_id[6] __attribute__((packed)); - unsigned char s_area __attribute__((packed)); - unsigned char s_subarea __attribute__((packed)); - unsigned char s_id[6] __attribute__((packed)); - unsigned char nl2 __attribute__((packed)); - unsigned char visit_ct __attribute__((packed)); - unsigned char s_class __attribute__((packed)); - unsigned char pt __attribute__((packed)); -}; + unsigned char msgflg; + unsigned char d_area; + unsigned char d_subarea; + unsigned char d_id[6]; + unsigned char s_area; + unsigned char s_subarea; + unsigned char s_id[6]; + unsigned char nl2; + unsigned char visit_ct; + unsigned char s_class; + unsigned char pt; +} __attribute__((packed)); /*------------------------- DRP - Routing messages ---------------------*/ struct endnode_hello_message { - unsigned char msgflg __attribute__((packed)); - unsigned char tiver[3] __attribute__((packed)); - unsigned char id[6] __attribute__((packed)); - unsigned char iinfo __attribute__((packed)); - unsigned short blksize __attribute__((packed)); - unsigned char area __attribute__((packed)); - unsigned char seed[8] __attribute__((packed)); - unsigned char neighbor[6] __attribute__((packed)); - unsigned short timer __attribute__((packed)); - unsigned char mpd __attribute__((packed)); - unsigned char datalen __attribute__((packed)); - unsigned char data[2] __attribute__((packed)); -}; + unsigned char msgflg; + unsigned char tiver[3]; + unsigned char id[6]; + unsigned char iinfo; + unsigned short blksize; + unsigned char area; + unsigned char seed[8]; + unsigned char neighbor[6]; + unsigned short timer; + unsigned char mpd; + unsigned char datalen; + unsigned char data[2]; +} __attribute__((packed)); struct rtnode_hello_message { - unsigned char msgflg __attribute__((packed)); - unsigned char tiver[3] __attribute__((packed)); - unsigned char id[6] __attribute__((packed)); - unsigned char iinfo __attribute__((packed)); - unsigned short blksize __attribute__((packed)); - unsigned char priority __attribute__((packed)); - unsigned char area __attribute__((packed)); - unsigned short timer __attribute__((packed)); - unsigned char mpd __attribute__((packed)); -}; + unsigned char msgflg; + unsigned char tiver[3]; + unsigned char id[6]; + unsigned char iinfo; + unsigned short blksize; + unsigned char priority; + unsigned char area; + unsigned short timer; + unsigned char mpd; +} __attribute__((packed)); extern void dn_dev_init(void); diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index 1ba03be0af3a..e6182b86262b 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -72,78 +72,78 @@ extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int nobl struct nsp_data_seg_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; + unsigned short srcaddr; +} __attribute__((packed)); struct nsp_data_opt_msg { - unsigned short acknum __attribute__((packed)); - unsigned short segnum __attribute__((packed)); - unsigned short lsflgs __attribute__((packed)); -}; + unsigned short acknum; + unsigned short segnum; + unsigned short lsflgs; +} __attribute__((packed)); struct nsp_data_opt_msg1 { - unsigned short acknum __attribute__((packed)); - unsigned short segnum __attribute__((packed)); -}; + unsigned short acknum; + unsigned short segnum; +} __attribute__((packed)); /* Acknowledgment Message (data/other data) */ struct nsp_data_ack_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); - unsigned short acknum __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; + unsigned short srcaddr; + unsigned short acknum; +} __attribute__((packed)); /* Connect Acknowledgment Message */ struct nsp_conn_ack_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; +} __attribute__((packed)); /* Connect Initiate/Retransmit Initiate/Connect Confirm */ struct nsp_conn_init_msg { - unsigned char msgflg __attribute__((packed)); + unsigned char msgflg; #define NSP_CI 0x18 /* Connect Initiate */ #define NSP_RCI 0x68 /* Retrans. Conn Init */ - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); - unsigned char services __attribute__((packed)); + unsigned short dstaddr; + unsigned short srcaddr; + unsigned char services; #define NSP_FC_NONE 0x00 /* Flow Control None */ #define NSP_FC_SRC 0x04 /* Seg Req. Count */ #define NSP_FC_SCMC 0x08 /* Sess. Control Mess */ #define NSP_FC_MASK 0x0c /* FC type mask */ - unsigned char info __attribute__((packed)); - unsigned short segsize __attribute__((packed)); -}; + unsigned char info; + unsigned short segsize; +} __attribute__((packed)); /* Disconnect Initiate/Disconnect Confirm */ struct nsp_disconn_init_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); - unsigned short reason __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; + unsigned short srcaddr; + unsigned short reason; +} __attribute__((packed)); struct srcobj_fmt { - char format __attribute__((packed)); - unsigned char task __attribute__((packed)); - unsigned short grpcode __attribute__((packed)); - unsigned short usrcode __attribute__((packed)); - char dlen __attribute__((packed)); -}; + char format; + unsigned char task; + unsigned short grpcode; + unsigned short usrcode; + char dlen; +} __attribute__((packed)); /* * A collection of functions for manipulating the sequence diff --git a/include/net/dst.h b/include/net/dst.h index bee8b84d329d..5161e89017f9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -225,16 +225,7 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) /* Output packet to network from transport. */ static inline int dst_output(struct sk_buff *skb) { - int err; - - for (;;) { - err = skb->dst->output(skb); - - if (likely(err == 0)) - return err; - if (unlikely(err != NET_XMIT_BYPASS)) - return err; - } + return skb->dst->output(skb); } /* Input packet from network to transport. */ diff --git a/include/net/ip.h b/include/net/ip.h index 7bb5804847f2..8de0697b364c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -37,11 +37,10 @@ struct inet_skb_parm struct ip_options opt; /* Compiled IP options */ unsigned char flags; -#define IPSKB_MASQUERADED 1 -#define IPSKB_TRANSLATED 2 -#define IPSKB_FORWARDED 4 -#define IPSKB_XFRM_TUNNEL_SIZE 8 -#define IPSKB_FRAG_COMPLETE 16 +#define IPSKB_FORWARDED 1 +#define IPSKB_XFRM_TUNNEL_SIZE 2 +#define IPSKB_XFRM_TRANSFORMED 4 +#define IPSKB_FRAG_COMPLETE 8 }; struct ipcm_cookie @@ -95,7 +94,6 @@ extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); extern int ip_mc_output(struct sk_buff *skb); -extern int ip_fragment(struct sk_buff *skb, int (*out)(struct sk_buff*)); extern int ip_do_nat(struct sk_buff *skb); extern void ip_send_check(struct iphdr *ip); extern int ip_queue_xmit(struct sk_buff *skb, int ipfragok); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 860bbac4c4ee..3b1d963d396c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -418,6 +418,8 @@ extern int ipv6_rcv(struct sk_buff *skb, struct packet_type *pt, struct net_device *orig_dev); +extern int ip6_rcv_finish(struct sk_buff *skb); + /* * upper-layer output functions */ diff --git a/include/net/protocol.h b/include/net/protocol.h index 63f7db99c2a6..6dc5970612d7 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -43,7 +43,7 @@ struct net_protocol { #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct inet6_protocol { - int (*handler)(struct sk_buff **skb, unsigned int *nhoffp); + int (*handler)(struct sk_buff **skb); void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 07d7b50cdd76..d09ca0e7d139 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -668,7 +668,7 @@ static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *s return xfrm_policy_check(sk, dir, skb, AF_INET6); } - +extern int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family); extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) @@ -831,7 +831,7 @@ struct xfrm_tunnel { }; struct xfrm6_tunnel { - int (*handler)(struct sk_buff **pskb, unsigned int *nhoffp); + int (*handler)(struct sk_buff **pskb); void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info); }; @@ -866,10 +866,11 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); +extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); -extern int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi); -extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); +extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi); +extern int xfrm6_rcv(struct sk_buff **pskb); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); diff --git a/include/sound/wavefront.h b/include/sound/wavefront.h index 9e572aed2435..15d82e594b56 100644 --- a/include/sound/wavefront.h +++ b/include/sound/wavefront.h @@ -454,22 +454,22 @@ typedef struct wf_multisample { } wavefront_multisample; typedef struct wf_alias { - s16 OriginalSample __attribute__ ((packed)); - - struct wf_sample_offset sampleStartOffset __attribute__ ((packed)); - struct wf_sample_offset loopStartOffset __attribute__ ((packed)); - struct wf_sample_offset sampleEndOffset __attribute__ ((packed)); - struct wf_sample_offset loopEndOffset __attribute__ ((packed)); - - s16 FrequencyBias __attribute__ ((packed)); - - u8 SampleResolution:2 __attribute__ ((packed)); - u8 Unused1:1 __attribute__ ((packed)); - u8 Loop:1 __attribute__ ((packed)); - u8 Bidirectional:1 __attribute__ ((packed)); - u8 Unused2:1 __attribute__ ((packed)); - u8 Reverse:1 __attribute__ ((packed)); - u8 Unused3:1 __attribute__ ((packed)); + s16 OriginalSample; + + struct wf_sample_offset sampleStartOffset; + struct wf_sample_offset loopStartOffset; + struct wf_sample_offset sampleEndOffset; + struct wf_sample_offset loopEndOffset; + + s16 FrequencyBias; + + u8 SampleResolution:2; + u8 Unused1:1; + u8 Loop:1; + u8 Bidirectional:1; + u8 Unused2:1; + u8 Reverse:1; + u8 Unused3:1; /* This structure is meant to be padded only to 16 bits on their original. Of course, whoever wrote their documentation didn't @@ -480,8 +480,8 @@ typedef struct wf_alias { standard 16->32 bit issues. */ - u8 sixteen_bit_padding __attribute__ ((packed)); -} wavefront_alias; + u8 sixteen_bit_padding; +} __attribute__((packed)) wavefront_alias; typedef struct wf_drum { u8 PatchNumber; diff --git a/init/Kconfig b/init/Kconfig index ba42f3793a84..f8f6929d8f25 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -228,6 +228,25 @@ config CPUSETS source "usr/Kconfig" +config UID16 + bool "Enable 16-bit UID system calls" if EMBEDDED + depends !ALPHA && !PPC && !PPC64 && !PARISC && !V850 && !ARCH_S390X + depends !X86_64 || IA32_EMULATION + depends !SPARC64 || SPARC32_COMPAT + default y + help + This enables the legacy 16-bit UID syscall wrappers. + +config VM86 + depends X86 + default y + bool "Enable VM86 support" if EMBEDDED + help + This option is required by programs like DOSEMU to run 16-bit legacy + code on X86 processors. It also may be needed by software like + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. + config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size (Look out for broken compilers!)" default y @@ -309,6 +328,21 @@ config BUG option for embedded systems with no facilities for reporting errors. Just say Y. +config DOUBLEFAULT + depends X86 + default y if X86 + bool "Enable doublefault exception handler" if EMBEDDED + help + This option allows trapping of rare doublefault exceptions that + would otherwise cause a system to silently reboot. Disabling this + option saves about 4k. + +config ELF_CORE + default y + bool "Enable ELF core dumps" if EMBEDDED + help + Enable support for generating core dumps. Disabling saves about 4k. + config BASE_FULL default y bool "Enable full-sized data structures for core" if EMBEDDED @@ -380,6 +414,15 @@ config CC_ALIGN_JUMPS no dummy operations need be executed. Zero means use compiler's default. +config SLAB + default y + bool "Use full SLAB allocator" if EMBEDDED + help + Disabling this replaces the advanced SLAB allocator and + kmalloc support with the drastically simpler SLOB allocator. + SLOB is more space efficient but does not scale well and is + more susceptible to fragmentation. + endmenu # General setup config TINY_SHMEM @@ -391,6 +434,10 @@ config BASE_SMALL default 0 if BASE_FULL default 1 if !BASE_FULL +config SLOB + default !SLAB + bool + menu "Loadable module support" config MODULES diff --git a/init/main.c b/init/main.c index 2ed3638deec7..8342c2890b16 100644 --- a/init/main.c +++ b/init/main.c @@ -58,11 +58,6 @@ * This is one of the first .c files built. Error out early * if we have compiler trouble.. */ -#if __GNUC__ == 2 && __GNUC_MINOR__ == 96 -#ifdef CONFIG_FRAME_POINTER -#error This compiler cannot compile correctly with frame pointers enabled -#endif -#endif #ifdef CONFIG_X86_LOCAL_APIC #include <asm/smp.h> @@ -74,7 +69,7 @@ * To avoid associated bogus bug reports, we flatly refuse to compile * with a gcc that is known to be too old from the very beginning. */ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) +#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2) #error Sorry, your GCC is too old. It builds incorrect kernels. #endif @@ -512,6 +507,7 @@ asmlinkage void __init start_kernel(void) } #endif vfs_caches_init_early(); + cpuset_init_early(); mem_init(); kmem_cache_init(); setup_per_cpu_pageset(); diff --git a/ipc/shm.c b/ipc/shm.c index 0ef4a1cf3e27..0b92e874fc06 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -34,8 +34,6 @@ #include "util.h" -#define shm_flags shm_perm.mode - static struct file_operations shm_file_operations; static struct vm_operations_struct shm_vm_ops; @@ -148,7 +146,7 @@ static void shm_close (struct vm_area_struct *shmd) shp->shm_dtim = get_seconds(); shp->shm_nattch--; if(shp->shm_nattch == 0 && - shp->shm_flags & SHM_DEST) + shp->shm_perm.mode & SHM_DEST) shm_destroy (shp); else shm_unlock(shp); @@ -205,7 +203,7 @@ static int newseg (key_t key, int shmflg, size_t size) return -ENOMEM; shp->shm_perm.key = key; - shp->shm_flags = (shmflg & S_IRWXUGO); + shp->shm_perm.mode = (shmflg & S_IRWXUGO); shp->mlock_user = NULL; shp->shm_perm.security = NULL; @@ -345,7 +343,7 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __ out->uid = tbuf.shm_perm.uid; out->gid = tbuf.shm_perm.gid; - out->mode = tbuf.shm_flags; + out->mode = tbuf.shm_perm.mode; return 0; } @@ -358,7 +356,7 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __ out->uid = tbuf_old.shm_perm.uid; out->gid = tbuf_old.shm_perm.gid; - out->mode = tbuf_old.shm_flags; + out->mode = tbuf_old.shm_perm.mode; return 0; } @@ -560,13 +558,13 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err) { - shp->shm_flags |= SHM_LOCKED; + shp->shm_perm.mode |= SHM_LOCKED; shp->mlock_user = user; } } } else if (!is_file_hugepages(shp->shm_file)) { shmem_lock(shp->shm_file, 0, shp->mlock_user); - shp->shm_flags &= ~SHM_LOCKED; + shp->shm_perm.mode &= ~SHM_LOCKED; shp->mlock_user = NULL; } shm_unlock(shp); @@ -605,7 +603,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock_up; if (shp->shm_nattch){ - shp->shm_flags |= SHM_DEST; + shp->shm_perm.mode |= SHM_DEST; /* Do not find it any more */ shp->shm_perm.key = IPC_PRIVATE; shm_unlock(shp); @@ -644,7 +642,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) shp->shm_perm.uid = setbuf.uid; shp->shm_perm.gid = setbuf.gid; - shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO) + shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO) | (setbuf.mode & S_IRWXUGO); shp->shm_ctim = get_seconds(); break; @@ -777,7 +775,7 @@ invalid: BUG(); shp->shm_nattch--; if(shp->shm_nattch == 0 && - shp->shm_flags & SHM_DEST) + shp->shm_perm.mode & SHM_DEST) shm_destroy (shp); else shm_unlock(shp); @@ -902,7 +900,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it) return seq_printf(s, format, shp->shm_perm.key, shp->id, - shp->shm_flags, + shp->shm_perm.mode, shp->shm_segsz, shp->shm_cprid, shp->shm_lprid, diff --git a/kernel/audit.c b/kernel/audit.c index 32fa03ad1984..d13ab7d2d899 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -267,7 +267,7 @@ static int audit_set_failure(int state, uid_t loginuid) return old; } -int kauditd_thread(void *dummy) +static int kauditd_thread(void *dummy) { struct sk_buff *skb; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7430640f9816..eab64e23bcae 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -39,6 +39,7 @@ #include <linux/namei.h> #include <linux/pagemap.h> #include <linux/proc_fs.h> +#include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -54,7 +55,23 @@ #include <asm/atomic.h> #include <asm/semaphore.h> -#define CPUSET_SUPER_MAGIC 0x27e0eb +#define CPUSET_SUPER_MAGIC 0x27e0eb + +/* + * Tracks how many cpusets are currently defined in system. + * When there is only one cpuset (the root cpuset) we can + * short circuit some hooks. + */ +int number_of_cpusets __read_mostly; + +/* See "Frequency meter" comments, below. */ + +struct fmeter { + int cnt; /* unprocessed events count */ + int val; /* most recent output value */ + time_t time; /* clock (secs) when val computed */ + spinlock_t lock; /* guards read or write of above */ +}; struct cpuset { unsigned long flags; /* "unsigned long" so bitops work */ @@ -80,13 +97,16 @@ struct cpuset { * Copy of global cpuset_mems_generation as of the most * recent time this cpuset changed its mems_allowed. */ - int mems_generation; + int mems_generation; + + struct fmeter fmeter; /* memory_pressure filter */ }; /* bits in struct cpuset flags field */ typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, + CS_MEMORY_MIGRATE, CS_REMOVED, CS_NOTIFY_ON_RELEASE } cpuset_flagbits_t; @@ -112,6 +132,11 @@ static inline int notify_on_release(const struct cpuset *cs) return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); } +static inline int is_memory_migrate(const struct cpuset *cs) +{ + return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags); +} + /* * Increment this atomic integer everytime any cpuset changes its * mems_allowed value. Users of cpusets can track this generation @@ -137,13 +162,10 @@ static struct cpuset top_cpuset = { .count = ATOMIC_INIT(0), .sibling = LIST_HEAD_INIT(top_cpuset.sibling), .children = LIST_HEAD_INIT(top_cpuset.children), - .parent = NULL, - .dentry = NULL, - .mems_generation = 0, }; static struct vfsmount *cpuset_mount; -static struct super_block *cpuset_sb = NULL; +static struct super_block *cpuset_sb; /* * We have two global cpuset semaphores below. They can nest. @@ -227,6 +249,11 @@ static struct super_block *cpuset_sb = NULL; * a tasks cpuset pointer we use task_lock(), which acts on a spinlock * (task->alloc_lock) already in the task_struct routinely used for * such matters. + * + * P.S. One more locking exception. RCU is used to guard the + * update of a tasks cpuset pointer by attach_task() and the + * access of task->cpuset->mems_generation via that pointer in + * the routine cpuset_update_task_memory_state(). */ static DECLARE_MUTEX(manage_sem); @@ -304,7 +331,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry) spin_lock(&dcache_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_child); + struct dentry *d = list_entry(node, struct dentry, d_u.d_child); list_del_init(node); if (d->d_inode) { d = dget_locked(d); @@ -316,7 +343,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry) } node = dentry->d_subdirs.next; } - list_del_init(&dentry->d_child); + list_del_init(&dentry->d_u.d_child); spin_unlock(&dcache_lock); remove_dir(dentry); } @@ -570,20 +597,43 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) BUG_ON(!nodes_intersects(*pmask, node_online_map)); } -/* - * Refresh current tasks mems_allowed and mems_generation from current - * tasks cpuset. +/** + * cpuset_update_task_memory_state - update task memory placement * - * Call without callback_sem or task_lock() held. May be called with - * or without manage_sem held. Will acquire task_lock() and might - * acquire callback_sem during call. + * If the current tasks cpusets mems_allowed changed behind our + * backs, update current->mems_allowed, mems_generation and task NUMA + * mempolicy to the new value. + * + * Task mempolicy is updated by rebinding it relative to the + * current->cpuset if a task has its memory placement changed. + * Do not call this routine if in_interrupt(). * - * The task_lock() is required to dereference current->cpuset safely. - * Without it, we could pick up the pointer value of current->cpuset - * in one instruction, and then attach_task could give us a different - * cpuset, and then the cpuset we had could be removed and freed, - * and then on our next instruction, we could dereference a no longer - * valid cpuset pointer to get its mems_generation field. + * Call without callback_sem or task_lock() held. May be called + * with or without manage_sem held. Doesn't need task_lock to guard + * against another task changing a non-NULL cpuset pointer to NULL, + * as that is only done by a task on itself, and if the current task + * is here, it is not simultaneously in the exit code NULL'ing its + * cpuset pointer. This routine also might acquire callback_sem and + * current->mm->mmap_sem during call. + * + * Reading current->cpuset->mems_generation doesn't need task_lock + * to guard the current->cpuset derefence, because it is guarded + * from concurrent freeing of current->cpuset by attach_task(), + * using RCU. + * + * The rcu_dereference() is technically probably not needed, + * as I don't actually mind if I see a new cpuset pointer but + * an old value of mems_generation. However this really only + * matters on alpha systems using cpusets heavily. If I dropped + * that rcu_dereference(), it would save them a memory barrier. + * For all other arch's, rcu_dereference is a no-op anyway, and for + * alpha systems not using cpusets, another planned optimization, + * avoiding the rcu critical section for tasks in the root cpuset + * which is statically allocated, so can't vanish, will make this + * irrelevant. Better to use RCU as intended, than to engage in + * some cute trick to save a memory barrier that is impossible to + * test, for alpha systems using cpusets heavily, which might not + * even exist. * * This routine is needed to update the per-task mems_allowed data, * within the tasks context, when it is trying to allocate memory @@ -591,27 +641,31 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) * task has been modifying its cpuset. */ -static void refresh_mems(void) +void cpuset_update_task_memory_state() { int my_cpusets_mem_gen; + struct task_struct *tsk = current; + struct cpuset *cs; - task_lock(current); - my_cpusets_mem_gen = current->cpuset->mems_generation; - task_unlock(current); - - if (current->cpuset_mems_generation != my_cpusets_mem_gen) { - struct cpuset *cs; - nodemask_t oldmem = current->mems_allowed; + if (tsk->cpuset == &top_cpuset) { + /* Don't need rcu for top_cpuset. It's never freed. */ + my_cpusets_mem_gen = top_cpuset.mems_generation; + } else { + rcu_read_lock(); + cs = rcu_dereference(tsk->cpuset); + my_cpusets_mem_gen = cs->mems_generation; + rcu_read_unlock(); + } + if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { down(&callback_sem); - task_lock(current); - cs = current->cpuset; - guarantee_online_mems(cs, ¤t->mems_allowed); - current->cpuset_mems_generation = cs->mems_generation; - task_unlock(current); + task_lock(tsk); + cs = tsk->cpuset; /* Maybe changed when task not locked */ + guarantee_online_mems(cs, &tsk->mems_allowed); + tsk->cpuset_mems_generation = cs->mems_generation; + task_unlock(tsk); up(&callback_sem); - if (!nodes_equal(oldmem, current->mems_allowed)) - numa_policy_rebind(&oldmem, ¤t->mems_allowed); + mpol_rebind_task(tsk, &tsk->mems_allowed); } } @@ -766,36 +820,150 @@ static int update_cpumask(struct cpuset *cs, char *buf) } /* + * Handle user request to change the 'mems' memory placement + * of a cpuset. Needs to validate the request, update the + * cpusets mems_allowed and mems_generation, and for each + * task in the cpuset, rebind any vma mempolicies and if + * the cpuset is marked 'memory_migrate', migrate the tasks + * pages to the new memory. + * * Call with manage_sem held. May take callback_sem during call. + * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, + * lock each such tasks mm->mmap_sem, scan its vma's and rebind + * their mempolicies to the cpusets new mems_allowed. */ static int update_nodemask(struct cpuset *cs, char *buf) { struct cpuset trialcs; + nodemask_t oldmem; + struct task_struct *g, *p; + struct mm_struct **mmarray; + int i, n, ntasks; + int migrate; + int fudge; int retval; trialcs = *cs; retval = nodelist_parse(buf, trialcs.mems_allowed); if (retval < 0) - return retval; + goto done; nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); - if (nodes_empty(trialcs.mems_allowed)) - return -ENOSPC; + oldmem = cs->mems_allowed; + if (nodes_equal(oldmem, trialcs.mems_allowed)) { + retval = 0; /* Too easy - nothing to do */ + goto done; + } + if (nodes_empty(trialcs.mems_allowed)) { + retval = -ENOSPC; + goto done; + } retval = validate_change(cs, &trialcs); - if (retval == 0) { - down(&callback_sem); - cs->mems_allowed = trialcs.mems_allowed; - atomic_inc(&cpuset_mems_generation); - cs->mems_generation = atomic_read(&cpuset_mems_generation); - up(&callback_sem); + if (retval < 0) + goto done; + + down(&callback_sem); + cs->mems_allowed = trialcs.mems_allowed; + atomic_inc(&cpuset_mems_generation); + cs->mems_generation = atomic_read(&cpuset_mems_generation); + up(&callback_sem); + + set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ + + fudge = 10; /* spare mmarray[] slots */ + fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ + retval = -ENOMEM; + + /* + * Allocate mmarray[] to hold mm reference for each task + * in cpuset cs. Can't kmalloc GFP_KERNEL while holding + * tasklist_lock. We could use GFP_ATOMIC, but with a + * few more lines of code, we can retry until we get a big + * enough mmarray[] w/o using GFP_ATOMIC. + */ + while (1) { + ntasks = atomic_read(&cs->count); /* guess */ + ntasks += fudge; + mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); + if (!mmarray) + goto done; + write_lock_irq(&tasklist_lock); /* block fork */ + if (atomic_read(&cs->count) <= ntasks) + break; /* got enough */ + write_unlock_irq(&tasklist_lock); /* try again */ + kfree(mmarray); } + + n = 0; + + /* Load up mmarray[] with mm reference for each task in cpuset. */ + do_each_thread(g, p) { + struct mm_struct *mm; + + if (n >= ntasks) { + printk(KERN_WARNING + "Cpuset mempolicy rebind incomplete.\n"); + continue; + } + if (p->cpuset != cs) + continue; + mm = get_task_mm(p); + if (!mm) + continue; + mmarray[n++] = mm; + } while_each_thread(g, p); + write_unlock_irq(&tasklist_lock); + + /* + * Now that we've dropped the tasklist spinlock, we can + * rebind the vma mempolicies of each mm in mmarray[] to their + * new cpuset, and release that mm. The mpol_rebind_mm() + * call takes mmap_sem, which we couldn't take while holding + * tasklist_lock. Forks can happen again now - the mpol_copy() + * cpuset_being_rebound check will catch such forks, and rebind + * their vma mempolicies too. Because we still hold the global + * cpuset manage_sem, we know that no other rebind effort will + * be contending for the global variable cpuset_being_rebound. + * It's ok if we rebind the same mm twice; mpol_rebind_mm() + * is idempotent. Also migrate pages in each mm to new nodes. + */ + migrate = is_memory_migrate(cs); + for (i = 0; i < n; i++) { + struct mm_struct *mm = mmarray[i]; + + mpol_rebind_mm(mm, &cs->mems_allowed); + if (migrate) { + do_migrate_pages(mm, &oldmem, &cs->mems_allowed, + MPOL_MF_MOVE_ALL); + } + mmput(mm); + } + + /* We're done rebinding vma's to this cpusets new mems_allowed. */ + kfree(mmarray); + set_cpuset_being_rebound(NULL); + retval = 0; +done: return retval; } /* + * Call with manage_sem held. + */ + +static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) +{ + if (simple_strtoul(buf, NULL, 10) != 0) + cpuset_memory_pressure_enabled = 1; + else + cpuset_memory_pressure_enabled = 0; + return 0; +} + +/* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_NOTIFY_ON_RELEASE) + * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) * cs: the cpuset to update * buf: the buffer where we read the 0 or 1 * @@ -834,6 +1002,104 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) } /* + * Frequency meter - How fast is some event occuring? + * + * These routines manage a digitally filtered, constant time based, + * event frequency meter. There are four routines: + * fmeter_init() - initialize a frequency meter. + * fmeter_markevent() - called each time the event happens. + * fmeter_getrate() - returns the recent rate of such events. + * fmeter_update() - internal routine used to update fmeter. + * + * A common data structure is passed to each of these routines, + * which is used to keep track of the state required to manage the + * frequency meter and its digital filter. + * + * The filter works on the number of events marked per unit time. + * The filter is single-pole low-pass recursive (IIR). The time unit + * is 1 second. Arithmetic is done using 32-bit integers scaled to + * simulate 3 decimal digits of precision (multiplied by 1000). + * + * With an FM_COEF of 933, and a time base of 1 second, the filter + * has a half-life of 10 seconds, meaning that if the events quit + * happening, then the rate returned from the fmeter_getrate() + * will be cut in half each 10 seconds, until it converges to zero. + * + * It is not worth doing a real infinitely recursive filter. If more + * than FM_MAXTICKS ticks have elapsed since the last filter event, + * just compute FM_MAXTICKS ticks worth, by which point the level + * will be stable. + * + * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid + * arithmetic overflow in the fmeter_update() routine. + * + * Given the simple 32 bit integer arithmetic used, this meter works + * best for reporting rates between one per millisecond (msec) and + * one per 32 (approx) seconds. At constant rates faster than one + * per msec it maxes out at values just under 1,000,000. At constant + * rates between one per msec, and one per second it will stabilize + * to a value N*1000, where N is the rate of events per second. + * At constant rates between one per second and one per 32 seconds, + * it will be choppy, moving up on the seconds that have an event, + * and then decaying until the next event. At rates slower than + * about one in 32 seconds, it decays all the way back to zero between + * each event. + */ + +#define FM_COEF 933 /* coefficient for half-life of 10 secs */ +#define FM_MAXTICKS ((time_t)99) /* useless computing more ticks than this */ +#define FM_MAXCNT 1000000 /* limit cnt to avoid overflow */ +#define FM_SCALE 1000 /* faux fixed point scale */ + +/* Initialize a frequency meter */ +static void fmeter_init(struct fmeter *fmp) +{ + fmp->cnt = 0; + fmp->val = 0; + fmp->time = 0; + spin_lock_init(&fmp->lock); +} + +/* Internal meter update - process cnt events and update value */ +static void fmeter_update(struct fmeter *fmp) +{ + time_t now = get_seconds(); + time_t ticks = now - fmp->time; + + if (ticks == 0) + return; + + ticks = min(FM_MAXTICKS, ticks); + while (ticks-- > 0) + fmp->val = (FM_COEF * fmp->val) / FM_SCALE; + fmp->time = now; + + fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE; + fmp->cnt = 0; +} + +/* Process any previous ticks, then bump cnt by one (times scale). */ +static void fmeter_markevent(struct fmeter *fmp) +{ + spin_lock(&fmp->lock); + fmeter_update(fmp); + fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE); + spin_unlock(&fmp->lock); +} + +/* Process any previous ticks, then return current value. */ +static int fmeter_getrate(struct fmeter *fmp) +{ + int val; + + spin_lock(&fmp->lock); + fmeter_update(fmp); + val = fmp->val; + spin_unlock(&fmp->lock); + return val; +} + +/* * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly * writing the path of the old cpuset in 'ppathbuf' if it needs to be * notified on release. @@ -848,6 +1114,8 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) struct task_struct *tsk; struct cpuset *oldcs; cpumask_t cpus; + nodemask_t from, to; + struct mm_struct *mm; if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; @@ -887,14 +1155,27 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) return -ESRCH; } atomic_inc(&cs->count); - tsk->cpuset = cs; + rcu_assign_pointer(tsk->cpuset, cs); task_unlock(tsk); guarantee_online_cpus(cs, &cpus); set_cpus_allowed(tsk, cpus); + from = oldcs->mems_allowed; + to = cs->mems_allowed; + up(&callback_sem); + + mm = get_task_mm(tsk); + if (mm) { + mpol_rebind_mm(mm, &to); + mmput(mm); + } + + if (is_memory_migrate(cs)) + do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); put_task_struct(tsk); + synchronize_rcu(); if (atomic_dec_and_test(&oldcs->count)) check_for_release(oldcs, ppathbuf); return 0; @@ -905,11 +1186,14 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) typedef enum { FILE_ROOT, FILE_DIR, + FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_NOTIFY_ON_RELEASE, + FILE_MEMORY_PRESSURE_ENABLED, + FILE_MEMORY_PRESSURE, FILE_TASKLIST, } cpuset_filetype_t; @@ -960,6 +1244,15 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us case FILE_NOTIFY_ON_RELEASE: retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); break; + case FILE_MEMORY_MIGRATE: + retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); + break; + case FILE_MEMORY_PRESSURE_ENABLED: + retval = update_memory_pressure_enabled(cs, buffer); + break; + case FILE_MEMORY_PRESSURE: + retval = -EACCES; + break; case FILE_TASKLIST: retval = attach_task(cs, buffer, &pathbuf); break; @@ -1060,6 +1353,15 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, case FILE_NOTIFY_ON_RELEASE: *s++ = notify_on_release(cs) ? '1' : '0'; break; + case FILE_MEMORY_MIGRATE: + *s++ = is_memory_migrate(cs) ? '1' : '0'; + break; + case FILE_MEMORY_PRESSURE_ENABLED: + *s++ = cpuset_memory_pressure_enabled ? '1' : '0'; + break; + case FILE_MEMORY_PRESSURE: + s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); + break; default: retval = -EINVAL; goto out; @@ -1178,7 +1480,7 @@ static int cpuset_create_file(struct dentry *dentry, int mode) /* * cpuset_create_dir - create a directory for an object. - * cs: the cpuset we create the directory for. + * cs: the cpuset we create the directory for. * It must have a valid ->parent field * And we are going to fill its ->dentry field. * name: The name to give to the cpuset directory. Will be copied. @@ -1408,6 +1710,21 @@ static struct cftype cft_notify_on_release = { .private = FILE_NOTIFY_ON_RELEASE, }; +static struct cftype cft_memory_migrate = { + .name = "memory_migrate", + .private = FILE_MEMORY_MIGRATE, +}; + +static struct cftype cft_memory_pressure_enabled = { + .name = "memory_pressure_enabled", + .private = FILE_MEMORY_PRESSURE_ENABLED, +}; + +static struct cftype cft_memory_pressure = { + .name = "memory_pressure", + .private = FILE_MEMORY_PRESSURE, +}; + static int cpuset_populate_dir(struct dentry *cs_dentry) { int err; @@ -1422,6 +1739,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry) return err; if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) return err; + if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) + return err; + if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) + return err; if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) return err; return 0; @@ -1446,7 +1767,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) return -ENOMEM; down(&manage_sem); - refresh_mems(); + cpuset_update_task_memory_state(); cs->flags = 0; if (notify_on_release(parent)) set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); @@ -1457,11 +1778,13 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) INIT_LIST_HEAD(&cs->children); atomic_inc(&cpuset_mems_generation); cs->mems_generation = atomic_read(&cpuset_mems_generation); + fmeter_init(&cs->fmeter); cs->parent = parent; down(&callback_sem); list_add(&cs->sibling, &cs->parent->children); + number_of_cpusets++; up(&callback_sem); err = cpuset_create_dir(cs, name, mode); @@ -1503,7 +1826,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) /* the vfs holds both inode->i_sem already */ down(&manage_sem); - refresh_mems(); + cpuset_update_task_memory_state(); if (atomic_read(&cs->count) > 0) { up(&manage_sem); return -EBUSY; @@ -1524,6 +1847,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) spin_unlock(&d->d_lock); cpuset_d_remove_dir(d); dput(d); + number_of_cpusets--; up(&callback_sem); if (list_empty(&parent->children)) check_for_release(parent, &pathbuf); @@ -1532,6 +1856,21 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) return 0; } +/* + * cpuset_init_early - just enough so that the calls to + * cpuset_update_task_memory_state() in early init code + * are harmless. + */ + +int __init cpuset_init_early(void) +{ + struct task_struct *tsk = current; + + tsk->cpuset = &top_cpuset; + tsk->cpuset->mems_generation = atomic_read(&cpuset_mems_generation); + return 0; +} + /** * cpuset_init - initialize cpusets at system boot * @@ -1546,6 +1885,7 @@ int __init cpuset_init(void) top_cpuset.cpus_allowed = CPU_MASK_ALL; top_cpuset.mems_allowed = NODE_MASK_ALL; + fmeter_init(&top_cpuset.fmeter); atomic_inc(&cpuset_mems_generation); top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation); @@ -1566,7 +1906,11 @@ int __init cpuset_init(void) root->d_inode->i_nlink++; top_cpuset.dentry = root; root->d_inode->i_op = &cpuset_dir_inode_operations; + number_of_cpusets = 1; err = cpuset_populate_dir(root); + /* memory_pressure_enabled is in root cpuset only */ + if (err == 0) + err = cpuset_add_file(root, &cft_memory_pressure_enabled); out: return err; } @@ -1632,15 +1976,13 @@ void cpuset_fork(struct task_struct *child) * * We don't need to task_lock() this reference to tsk->cpuset, * because tsk is already marked PF_EXITING, so attach_task() won't - * mess with it. + * mess with it, or task is a failed fork, never visible to attach_task. **/ void cpuset_exit(struct task_struct *tsk) { struct cpuset *cs; - BUG_ON(!(tsk->flags & PF_EXITING)); - cs = tsk->cpuset; tsk->cpuset = NULL; @@ -1667,14 +2009,14 @@ void cpuset_exit(struct task_struct *tsk) * tasks cpuset. **/ -cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk) +cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) { cpumask_t mask; down(&callback_sem); - task_lock((struct task_struct *)tsk); + task_lock(tsk); guarantee_online_cpus(tsk->cpuset, &mask); - task_unlock((struct task_struct *)tsk); + task_unlock(tsk); up(&callback_sem); return mask; @@ -1686,43 +2028,26 @@ void cpuset_init_current_mems_allowed(void) } /** - * cpuset_update_current_mems_allowed - update mems parameters to new values - * - * If the current tasks cpusets mems_allowed changed behind our backs, - * update current->mems_allowed and mems_generation to the new value. - * Do not call this routine if in_interrupt(). + * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset. + * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed. * - * Call without callback_sem or task_lock() held. May be called - * with or without manage_sem held. Unless exiting, it will acquire - * task_lock(). Also might acquire callback_sem during call to - * refresh_mems(). - */ + * Description: Returns the nodemask_t mems_allowed of the cpuset + * attached to the specified @tsk. Guaranteed to return some non-empty + * subset of node_online_map, even if this means going outside the + * tasks cpuset. + **/ -void cpuset_update_current_mems_allowed(void) +nodemask_t cpuset_mems_allowed(struct task_struct *tsk) { - struct cpuset *cs; - int need_to_refresh = 0; + nodemask_t mask; - task_lock(current); - cs = current->cpuset; - if (!cs) - goto done; - if (current->cpuset_mems_generation != cs->mems_generation) - need_to_refresh = 1; -done: - task_unlock(current); - if (need_to_refresh) - refresh_mems(); -} + down(&callback_sem); + task_lock(tsk); + guarantee_online_mems(tsk->cpuset, &mask); + task_unlock(tsk); + up(&callback_sem); -/** - * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed - * @nodes: pointer to a node bitmap that is and-ed with mems_allowed - */ -void cpuset_restrict_to_mems_allowed(unsigned long *nodes) -{ - bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed), - MAX_NUMNODES); + return mask; } /** @@ -1795,7 +2120,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * GFP_USER - only nodes in current tasks mems allowed ok. **/ -int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ @@ -1867,6 +2192,42 @@ done: } /* + * Collection of memory_pressure is suppressed unless + * this flag is enabled by writing "1" to the special + * cpuset file 'memory_pressure_enabled' in the root cpuset. + */ + +int cpuset_memory_pressure_enabled __read_mostly; + +/** + * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. + * + * Keep a running average of the rate of synchronous (direct) + * page reclaim efforts initiated by tasks in each cpuset. + * + * This represents the rate at which some task in the cpuset + * ran low on memory on all nodes it was allowed to use, and + * had to enter the kernels page reclaim code in an effort to + * create more free memory by tossing clean pages or swapping + * or writing dirty pages. + * + * Display to user space in the per-cpuset read-only file + * "memory_pressure". Value displayed is an integer + * representing the recent rate of entry into the synchronous + * (direct) page reclaim by any task attached to the cpuset. + **/ + +void __cpuset_memory_pressure_bump(void) +{ + struct cpuset *cs; + + task_lock(current); + cs = current->cpuset; + fmeter_markevent(&cs->fmeter); + task_unlock(current); +} + +/* * proc_cpuset_show() * - Print tasks cpuset path into seq_file. * - Used for /proc/<pid>/cpuset. diff --git a/kernel/exit.c b/kernel/exit.c index ee515683b92d..caceabf3f230 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -72,7 +72,6 @@ repeat: __ptrace_unlink(p); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); - __exit_sighand(p); /* * Note that the fastpath in sys_times depends on __exit_signal having * updated the counters before a task is removed from the tasklist of @@ -258,7 +257,7 @@ static inline void reparent_to_init(void) void __set_special_pids(pid_t session, pid_t pgrp) { - struct task_struct *curr = current; + struct task_struct *curr = current->group_leader; if (curr->signal->session != session) { detach_pid(curr, PIDTYPE_SID); @@ -926,7 +925,6 @@ do_group_exit(int exit_code) /* Another thread got here before we took the lock. */ exit_code = sig->group_exit_code; else { - sig->flags = SIGNAL_GROUP_EXIT; sig->group_exit_code = exit_code; zap_other_threads(current); } diff --git a/kernel/fork.c b/kernel/fork.c index fb8572a42297..72e3252c6763 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -743,6 +743,14 @@ int unshare_files(void) EXPORT_SYMBOL(unshare_files); +void sighand_free_cb(struct rcu_head *rhp) +{ + struct sighand_struct *sp; + + sp = container_of(rhp, struct sighand_struct, rcu); + kmem_cache_free(sighand_cachep, sp); +} + static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) { struct sighand_struct *sig; @@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t return 0; } sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); - tsk->sighand = sig; + rcu_assign_pointer(tsk->sighand, sig); if (!sig) return -ENOMEM; spin_lock_init(&sig->siglock); @@ -803,9 +811,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts sig->it_prof_expires = cputime_zero; sig->it_prof_incr = cputime_zero; - sig->tty = current->signal->tty; - sig->pgrp = process_group(current); - sig->session = current->signal->session; sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = 0; @@ -964,12 +969,13 @@ static task_t *copy_process(unsigned long clone_flags, p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; + cpuset_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; - goto bad_fork_cleanup; + goto bad_fork_cleanup_cpuset; } #endif @@ -1127,25 +1133,19 @@ static task_t *copy_process(unsigned long clone_flags, attach_pid(p, PIDTYPE_PID, p->pid); attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { + p->signal->tty = current->signal->tty; + p->signal->pgrp = process_group(current); + p->signal->session = current->signal->session; attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } - if (!current->signal->tty && p->signal->tty) - p->signal->tty = NULL; - nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); proc_fork_connector(p); - cpuset_fork(p); - retval = 0; - -fork_out: - if (retval) - return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: @@ -1172,7 +1172,9 @@ bad_fork_cleanup_security: bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); +bad_fork_cleanup_cpuset: #endif + cpuset_exit(p); bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); @@ -1184,7 +1186,8 @@ bad_fork_cleanup_count: free_uid(p->user); bad_fork_free: free_task(p); - goto fork_out; +fork_out: + return ERR_PTR(retval); } struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 8a64a4844cde..d03b5eef8ce0 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -10,6 +10,8 @@ #include <linux/proc_fs.h> #include <linux/interrupt.h> +#include "internals.h" + static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS]; #ifdef CONFIG_SMP diff --git a/kernel/module.c b/kernel/module.c index 4b06bbad49c2..e4276046a1b6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -496,15 +496,15 @@ static void module_unload_free(struct module *mod) } #ifdef CONFIG_MODULE_FORCE_UNLOAD -static inline int try_force(unsigned int flags) +static inline int try_force_unload(unsigned int flags) { int ret = (flags & O_TRUNC); if (ret) - add_taint(TAINT_FORCED_MODULE); + add_taint(TAINT_FORCED_RMMOD); return ret; } #else -static inline int try_force(unsigned int flags) +static inline int try_force_unload(unsigned int flags) { return 0; } @@ -524,7 +524,7 @@ static int __try_stop_module(void *_sref) /* If it's not unused, quit unless we are told to block. */ if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { - if (!(*sref->forced = try_force(sref->flags))) + if (!(*sref->forced = try_force_unload(sref->flags))) return -EWOULDBLOCK; } @@ -609,7 +609,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) /* If it has an init func, it must have an exit func to unload */ if ((mod->init != NULL && mod->exit == NULL) || mod->unsafe) { - forced = try_force(flags); + forced = try_force_unload(flags); if (!forced) { /* This module can't be removed */ ret = -EBUSY; @@ -958,7 +958,6 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, unsigned long ret; const unsigned long *crc; - spin_lock_irq(&modlist_lock); ret = __find_symbol(name, &owner, &crc, mod->license_gplok); if (ret) { /* use_module can fail due to OOM, or module unloading */ @@ -966,7 +965,6 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, !use_module(mod, owner)) ret = 0; } - spin_unlock_irq(&modlist_lock); return ret; } @@ -1204,6 +1202,39 @@ void *__symbol_get(const char *symbol) } EXPORT_SYMBOL_GPL(__symbol_get); +/* + * Ensure that an exported symbol [global namespace] does not already exist + * in the Kernel or in some other modules exported symbol table. + */ +static int verify_export_symbols(struct module *mod) +{ + const char *name = NULL; + unsigned long i, ret = 0; + struct module *owner; + const unsigned long *crc; + + for (i = 0; i < mod->num_syms; i++) + if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) { + name = mod->syms[i].name; + ret = -ENOEXEC; + goto dup; + } + + for (i = 0; i < mod->num_gpl_syms; i++) + if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) { + name = mod->gpl_syms[i].name; + ret = -ENOEXEC; + goto dup; + } + +dup: + if (ret) + printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n", + mod->name, name, module_name(owner)); + + return ret; +} + /* Change all symbols so that sh_value encodes the pointer directly. */ static int simplify_symbols(Elf_Shdr *sechdrs, unsigned int symindex, @@ -1715,6 +1746,11 @@ static struct module *load_module(void __user *umod, /* Set up license info based on the info section */ set_license(mod, get_modinfo(sechdrs, infoindex, "license")); + if (strcmp(mod->name, "ndiswrapper") == 0) + add_taint(TAINT_PROPRIETARY_MODULE); + if (strcmp(mod->name, "driverloader") == 0) + add_taint(TAINT_PROPRIETARY_MODULE); + #ifdef CONFIG_MODULE_UNLOAD /* Set up MODINFO_ATTR fields */ setup_modinfo(mod, sechdrs, infoindex); @@ -1767,6 +1803,12 @@ static struct module *load_module(void __user *umod, goto cleanup; } + /* Find duplicate symbols */ + err = verify_export_symbols(mod); + + if (err < 0) + goto cleanup; + /* Set up and sort exception table */ mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); mod->extable = extable = (void *)sechdrs[exindex].sh_addr; diff --git a/kernel/pid.c b/kernel/pid.c index edba31c681ac..1acc07246991 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr) struct hlist_node *elem; struct pid *pid; - hlist_for_each_entry(pid, elem, + hlist_for_each_entry_rcu(pid, elem, &pid_hash[type][pid_hashfn(nr)], pid_chain) { if (pid->nr == nr) return pid; @@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr) task_pid = &task->pids[type]; pid = find_pid(type, nr); + task_pid->nr = nr; if (pid == NULL) { - hlist_add_head(&task_pid->pid_chain, - &pid_hash[type][pid_hashfn(nr)]); INIT_LIST_HEAD(&task_pid->pid_list); + hlist_add_head_rcu(&task_pid->pid_chain, + &pid_hash[type][pid_hashfn(nr)]); } else { INIT_HLIST_NODE(&task_pid->pid_chain); - list_add_tail(&task_pid->pid_list, &pid->pid_list); + list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list); } - task_pid->nr = nr; return 0; } @@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type) pid = &task->pids[type]; if (!hlist_unhashed(&pid->pid_chain)) { - hlist_del(&pid->pid_chain); - if (list_empty(&pid->pid_list)) + if (list_empty(&pid->pid_list)) { nr = pid->nr; - else { + hlist_del_rcu(&pid->pid_chain); + } else { pid_next = list_entry(pid->pid_list.next, struct pid, pid_list); /* insert next pid from pid_list to hash */ - hlist_add_head(&pid_next->pid_chain, - &pid_hash[type][pid_hashfn(pid_next->nr)]); + hlist_replace_rcu(&pid->pid_chain, + &pid_next->pid_chain); } } - list_del(&pid->pid_list); + list_del_rcu(&pid->pid_list); pid->nr = 0; return nr; diff --git a/kernel/printk.c b/kernel/printk.c index 5287be83e3e7..2251be80cd22 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -569,7 +569,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) p[1] <= '7' && p[2] == '>') { loglev_char = p[1]; p += 3; - printed_len += 3; + printed_len -= 3; } else { loglev_char = default_message_loglevel + '0'; @@ -584,7 +584,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) for (tp = tbuf; tp < tbuf + tlen; tp++) emit_log_char(*tp); - printed_len += tlen - 3; + printed_len += tlen; } else { if (p[0] != '<' || p[1] < '0' || p[1] > '7' || p[2] != '>') { @@ -592,8 +592,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) emit_log_char(default_message_loglevel + '0'); emit_log_char('>'); + printed_len += 3; } - printed_len += 3; } log_level_unknown = 0; if (!*p) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 656476eedb1b..cceaf09ac413 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -408,54 +408,62 @@ int ptrace_request(struct task_struct *child, long request, return ret; } -#ifndef __ARCH_SYS_PTRACE -static int ptrace_get_task_struct(long request, long pid, - struct task_struct **childp) +/** + * ptrace_traceme -- helper for PTRACE_TRACEME + * + * Performs checks and sets PT_PTRACED. + * Should be used by all ptrace implementations for PTRACE_TRACEME. + */ +int ptrace_traceme(void) { - struct task_struct *child; int ret; /* - * Callers use child == NULL as an indication to exit early even - * when the return value is 0, so make sure it is non-NULL here. + * Are we already being traced? + */ + if (current->ptrace & PT_PTRACED) + return -EPERM; + ret = security_ptrace(current->parent, current); + if (ret) + return -EPERM; + /* + * Set the ptrace bit in the process ptrace flags. */ - *childp = NULL; + current->ptrace |= PT_PTRACED; + return 0; +} - if (request == PTRACE_TRACEME) { - /* - * Are we already being traced? - */ - if (current->ptrace & PT_PTRACED) - return -EPERM; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - /* - * Set the ptrace bit in the process ptrace flags. - */ - current->ptrace |= PT_PTRACED; - return 0; - } +/** + * ptrace_get_task_struct -- grab a task struct reference for ptrace + * @pid: process id to grab a task_struct reference of + * + * This function is a helper for ptrace implementations. It checks + * permissions and then grabs a task struct for use of the actual + * ptrace implementation. + * + * Returns the task_struct for @pid or an ERR_PTR() on failure. + */ +struct task_struct *ptrace_get_task_struct(pid_t pid) +{ + struct task_struct *child; /* - * You may not mess with init + * Tracing init is not allowed. */ if (pid == 1) - return -EPERM; + return ERR_PTR(-EPERM); - ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); if (!child) - return -ESRCH; - - *childp = child; - return 0; + return ERR_PTR(-ESRCH); + return child; } +#ifndef __ARCH_SYS_PTRACE asmlinkage long sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; @@ -465,9 +473,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) * This lock_kernel fixes a subtle race with suid exec */ lock_kernel(); - ret = ptrace_get_task_struct(request, pid, &child); - if (!child) + if (request == PTRACE_TRACEME) { + ret = ptrace_traceme(); goto out; + } + + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 48d3bce465b8..30b0bba03859 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -35,6 +35,7 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/smp.h> +#include <linux/rcupdate.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <asm/atomic.h> @@ -45,7 +46,6 @@ #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/rcupdate.h> -#include <linux/rcuref.h> #include <linux/cpu.h> /* Definition for rcupdate control block. */ @@ -61,9 +61,9 @@ struct rcu_state { /* for current batch to proceed. */ }; -static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = +static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp = {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; -static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = +static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp = {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; @@ -73,19 +73,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; static int maxbatch = 10000; -#ifndef __HAVE_ARCH_CMPXCHG -/* - * We use an array of spinlocks for the rcurefs -- similar to ones in sparc - * 32 bit atomic_t implementations, and a hash function similar to that - * for our refcounting needs. - * Can't help multiprocessors which donot have cmpxchg :( - */ - -spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { - [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED -}; -#endif - /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 49fbbeff201c..773219907dd8 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -39,7 +39,6 @@ #include <linux/moduleparam.h> #include <linux/percpu.h> #include <linux/notifier.h> -#include <linux/rcuref.h> #include <linux/cpu.h> #include <linux/random.h> #include <linux/delay.h> @@ -49,9 +48,11 @@ MODULE_LICENSE("GPL"); static int nreaders = -1; /* # reader threads, defaults to 4*ncpus */ -static int stat_interval = 0; /* Interval between stats, in seconds. */ +static int stat_interval; /* Interval between stats, in seconds. */ /* Defaults to "only at end of test". */ -static int verbose = 0; /* Print more debug info. */ +static int verbose; /* Print more debug info. */ +static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ +static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ MODULE_PARM(nreaders, "i"); MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); @@ -59,6 +60,10 @@ MODULE_PARM(stat_interval, "i"); MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); MODULE_PARM(verbose, "i"); MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); +MODULE_PARM(test_no_idle_hz, "i"); +MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); +MODULE_PARM(shuffle_interval, "i"); +MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); #define TORTURE_FLAG "rcutorture: " #define PRINTK_STRING(s) \ do { printk(KERN_ALERT TORTURE_FLAG s "\n"); } while (0) @@ -73,6 +78,7 @@ static int nrealreaders; static struct task_struct *writer_task; static struct task_struct **reader_tasks; static struct task_struct *stats_task; +static struct task_struct *shuffler_task; #define RCU_TORTURE_PIPE_LEN 10 @@ -103,7 +109,7 @@ atomic_t n_rcu_torture_error; /* * Allocate an element from the rcu_tortures pool. */ -struct rcu_torture * +static struct rcu_torture * rcu_torture_alloc(void) { struct list_head *p; @@ -376,12 +382,77 @@ rcu_torture_stats(void *arg) return 0; } +static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ + +/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case + * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. + */ +void rcu_torture_shuffle_tasks(void) +{ + cpumask_t tmp_mask = CPU_MASK_ALL; + int i; + + lock_cpu_hotplug(); + + /* No point in shuffling if there is only one online CPU (ex: UP) */ + if (num_online_cpus() == 1) { + unlock_cpu_hotplug(); + return; + } + + if (rcu_idle_cpu != -1) + cpu_clear(rcu_idle_cpu, tmp_mask); + + set_cpus_allowed(current, tmp_mask); + + if (reader_tasks != NULL) { + for (i = 0; i < nrealreaders; i++) + if (reader_tasks[i]) + set_cpus_allowed(reader_tasks[i], tmp_mask); + } + + if (writer_task) + set_cpus_allowed(writer_task, tmp_mask); + + if (stats_task) + set_cpus_allowed(stats_task, tmp_mask); + + if (rcu_idle_cpu == -1) + rcu_idle_cpu = num_online_cpus() - 1; + else + rcu_idle_cpu--; + + unlock_cpu_hotplug(); +} + +/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the + * system to become idle at a time and cut off its timer ticks. This is meant + * to test the support for such tickless idle CPU in RCU. + */ +static int +rcu_torture_shuffle(void *arg) +{ + VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started"); + do { + schedule_timeout_interruptible(shuffle_interval * HZ); + rcu_torture_shuffle_tasks(); + } while (!kthread_should_stop()); + VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); + return 0; +} + static void rcu_torture_cleanup(void) { int i; fullstop = 1; + if (shuffler_task != NULL) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); + kthread_stop(shuffler_task); + } + shuffler_task = NULL; + if (writer_task != NULL) { VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); kthread_stop(writer_task); @@ -430,9 +501,11 @@ rcu_torture_init(void) nrealreaders = nreaders; else nrealreaders = 2 * num_online_cpus(); - printk(KERN_ALERT TORTURE_FLAG - "--- Start of test: nreaders=%d stat_interval=%d verbose=%d\n", - nrealreaders, stat_interval, verbose); + printk(KERN_ALERT TORTURE_FLAG "--- Start of test: nreaders=%d " + "stat_interval=%d verbose=%d test_no_idle_hz=%d " + "shuffle_interval = %d\n", + nrealreaders, stat_interval, verbose, test_no_idle_hz, + shuffle_interval); fullstop = 0; /* Set up the freelist. */ @@ -502,6 +575,18 @@ rcu_torture_init(void) goto unwind; } } + if (test_no_idle_hz) { + rcu_idle_cpu = num_online_cpus() - 1; + /* Create the shuffler thread */ + shuffler_task = kthread_run(rcu_torture_shuffle, NULL, + "rcu_torture_shuffle"); + if (IS_ERR(shuffler_task)) { + firsterr = PTR_ERR(shuffler_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler"); + shuffler_task = NULL; + goto unwind; + } + } return 0; unwind: diff --git a/kernel/sched.c b/kernel/sched.c index 6f46c94cc29e..92733091154c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p) #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ < (long long) (sd)->cache_hot_time) +void __put_task_struct_cb(struct rcu_head *rhp) +{ + __put_task_struct(container_of(rhp, struct task_struct, rcu)); +} + +EXPORT_SYMBOL_GPL(__put_task_struct_cb); + /* * These are the runqueue data structures: */ diff --git a/kernel/signal.c b/kernel/signal.c index d7611f189ef7..08aa5b263f36 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk) /* Ok, we're done with the signal handlers */ tsk->sighand = NULL; if (atomic_dec_and_test(&sighand->count)) - kmem_cache_free(sighand_cachep, sighand); + sighand_free(sighand); } void exit_sighand(struct task_struct *tsk) { write_lock_irq(&tasklist_lock); - __exit_sighand(tsk); + rcu_read_lock(); + if (tsk->sighand != NULL) { + struct sighand_struct *sighand = rcu_dereference(tsk->sighand); + spin_lock(&sighand->siglock); + __exit_sighand(tsk); + spin_unlock(&sighand->siglock); + } + rcu_read_unlock(); write_unlock_irq(&tasklist_lock); } @@ -345,19 +352,20 @@ void exit_sighand(struct task_struct *tsk) void __exit_signal(struct task_struct *tsk) { struct signal_struct * sig = tsk->signal; - struct sighand_struct * sighand = tsk->sighand; + struct sighand_struct * sighand; if (!sig) BUG(); if (!atomic_read(&sig->count)) BUG(); + rcu_read_lock(); + sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); posix_cpu_timers_exit(tsk); if (atomic_dec_and_test(&sig->count)) { posix_cpu_timers_exit_group(tsk); - if (tsk == sig->curr_target) - sig->curr_target = next_thread(tsk); tsk->signal = NULL; + __exit_sighand(tsk); spin_unlock(&sighand->siglock); flush_sigqueue(&sig->shared_pending); } else { @@ -389,9 +397,11 @@ void __exit_signal(struct task_struct *tsk) sig->nvcsw += tsk->nvcsw; sig->nivcsw += tsk->nivcsw; sig->sched_time += tsk->sched_time; + __exit_sighand(tsk); spin_unlock(&sighand->siglock); sig = NULL; /* Marker for below. */ } + rcu_read_unlock(); clear_tsk_thread_flag(tsk,TIF_SIGPENDING); flush_sigqueue(&tsk->pending); if (sig) { @@ -613,6 +623,33 @@ void signal_wake_up(struct task_struct *t, int resume) * Returns 1 if any signals were found. * * All callers must be holding the siglock. + * + * This version takes a sigset mask and looks at all signals, + * not just those in the first mask word. + */ +static int rm_from_queue_full(sigset_t *mask, struct sigpending *s) +{ + struct sigqueue *q, *n; + sigset_t m; + + sigandsets(&m, mask, &s->signal); + if (sigisemptyset(&m)) + return 0; + + signandsets(&s->signal, &s->signal, mask); + list_for_each_entry_safe(q, n, &s->list, list) { + if (sigismember(mask, q->info.si_signo)) { + list_del_init(&q->list); + __sigqueue_free(q); + } + } + return 1; +} +/* + * Remove signals in mask from the pending set and queue. + * Returns 1 if any signals were found. + * + * All callers must be holding the siglock. */ static int rm_from_queue(unsigned long mask, struct sigpending *s) { @@ -1080,18 +1117,29 @@ void zap_other_threads(struct task_struct *p) } /* - * Must be called with the tasklist_lock held for reading! + * Must be called under rcu_read_lock() or with tasklist_lock read-held. */ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { unsigned long flags; + struct sighand_struct *sp; int ret; +retry: ret = check_kill_permission(sig, info, p); - if (!ret && sig && p->sighand) { - spin_lock_irqsave(&p->sighand->siglock, flags); + if (!ret && sig && (sp = rcu_dereference(p->sighand))) { + spin_lock_irqsave(&sp->siglock, flags); + if (p->sighand != sp) { + spin_unlock_irqrestore(&sp->siglock, flags); + goto retry; + } + if ((atomic_read(&sp->count) == 0) || + (atomic_read(&p->usage) == 0)) { + spin_unlock_irqrestore(&sp->siglock, flags); + return -ESRCH; + } ret = __group_send_sig_info(sig, info, p); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + spin_unlock_irqrestore(&sp->siglock, flags); } return ret; @@ -1136,14 +1184,21 @@ int kill_proc_info(int sig, struct siginfo *info, pid_t pid) { int error; + int acquired_tasklist_lock = 0; struct task_struct *p; - read_lock(&tasklist_lock); + rcu_read_lock(); + if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) { + read_lock(&tasklist_lock); + acquired_tasklist_lock = 1; + } p = find_task_by_pid(pid); error = -ESRCH; if (p) error = group_send_sig_info(sig, info, p); - read_unlock(&tasklist_lock); + if (unlikely(acquired_tasklist_lock)) + read_unlock(&tasklist_lock); + rcu_read_unlock(); return error; } @@ -1163,8 +1218,7 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid, ret = -ESRCH; goto out_unlock; } - if ((!info || ((unsigned long)info != 1 && - (unsigned long)info != 2 && SI_FROMUSER(info))) + if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) && (euid != p->suid) && (euid != p->uid) && (uid != p->suid) && (uid != p->uid)) { ret = -EPERM; @@ -1355,16 +1409,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) { unsigned long flags; int ret = 0; + struct sighand_struct *sh; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); - read_lock(&tasklist_lock); + + /* + * The rcu based delayed sighand destroy makes it possible to + * run this without tasklist lock held. The task struct itself + * cannot go away as create_timer did get_task_struct(). + * + * We return -1, when the task is marked exiting, so + * posix_timer_event can redirect it to the group leader + */ + rcu_read_lock(); if (unlikely(p->flags & PF_EXITING)) { ret = -1; goto out_err; } - spin_lock_irqsave(&p->sighand->siglock, flags); +retry: + sh = rcu_dereference(p->sighand); + + spin_lock_irqsave(&sh->siglock, flags); + if (p->sighand != sh) { + /* We raced with exec() in a multithreaded process... */ + spin_unlock_irqrestore(&sh->siglock, flags); + goto retry; + } + + /* + * We do the check here again to handle the following scenario: + * + * CPU 0 CPU 1 + * send_sigqueue + * check PF_EXITING + * interrupt exit code running + * __exit_signal + * lock sighand->siglock + * unlock sighand->siglock + * lock sh->siglock + * add(tsk->pending) flush_sigqueue(tsk->pending) + * + */ + + if (unlikely(p->flags & PF_EXITING)) { + ret = -1; + goto out; + } if (unlikely(!list_empty(&q->list))) { /* @@ -1388,9 +1480,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) signal_wake_up(p, sig == SIGKILL); out: - spin_unlock_irqrestore(&p->sighand->siglock, flags); + spin_unlock_irqrestore(&sh->siglock, flags); out_err: - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } @@ -1402,7 +1494,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) int ret = 0; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); + read_lock(&tasklist_lock); + /* Since it_lock is held, p->sighand cannot be NULL. */ spin_lock_irqsave(&p->sighand->siglock, flags); handle_stop_signal(sig, p); @@ -1436,7 +1530,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) out: spin_unlock_irqrestore(&p->sighand->siglock, flags); read_unlock(&tasklist_lock); - return(ret); + return ret; } /* @@ -2338,6 +2432,7 @@ int do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) { struct k_sigaction *k; + sigset_t mask; if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) return -EINVAL; @@ -2385,9 +2480,11 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) *k = *act; sigdelsetmask(&k->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - rm_from_queue(sigmask(sig), &t->signal->shared_pending); + sigemptyset(&mask); + sigaddset(&mask, sig); + rm_from_queue_full(&mask, &t->signal->shared_pending); do { - rm_from_queue(sigmask(sig), &t->pending); + rm_from_queue_full(&mask, &t->pending); recalc_sigpending_tsk(t); t = next_thread(t); } while (t != current); diff --git a/kernel/sys.c b/kernel/sys.c index eecf84526afe..b6941e06d5d5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -489,6 +489,12 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user magic2 != LINUX_REBOOT_MAGIC2C)) return -EINVAL; + /* Instead of trying to make the power_off code look like + * halt when pm_power_off is not set do it the easy way. + */ + if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) + cmd = LINUX_REBOOT_CMD_HALT; + lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: @@ -1084,10 +1090,11 @@ asmlinkage long sys_times(struct tms __user * tbuf) asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) { struct task_struct *p; + struct task_struct *group_leader = current->group_leader; int err = -EINVAL; if (!pid) - pid = current->pid; + pid = group_leader->pid; if (!pgid) pgid = pid; if (pgid < 0) @@ -1107,16 +1114,16 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (!thread_group_leader(p)) goto out; - if (p->parent == current || p->real_parent == current) { + if (p->real_parent == group_leader) { err = -EPERM; - if (p->signal->session != current->signal->session) + if (p->signal->session != group_leader->signal->session) goto out; err = -EACCES; if (p->did_exec) goto out; } else { err = -ESRCH; - if (p != current) + if (p != group_leader) goto out; } @@ -1128,7 +1135,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) struct task_struct *p; do_each_task_pid(pgid, PIDTYPE_PGID, p) { - if (p->signal->session == current->signal->session) + if (p->signal->session == group_leader->signal->session) goto ok_pgid; } while_each_task_pid(pgid, PIDTYPE_PGID, p); goto out; @@ -1208,24 +1215,22 @@ asmlinkage long sys_getsid(pid_t pid) asmlinkage long sys_setsid(void) { + struct task_struct *group_leader = current->group_leader; struct pid *pid; int err = -EPERM; - if (!thread_group_leader(current)) - return -EINVAL; - down(&tty_sem); write_lock_irq(&tasklist_lock); - pid = find_pid(PIDTYPE_PGID, current->pid); + pid = find_pid(PIDTYPE_PGID, group_leader->pid); if (pid) goto out; - current->signal->leader = 1; - __set_special_pids(current->pid, current->pid); - current->signal->tty = NULL; - current->signal->tty_old_pgrp = 0; - err = process_group(current); + group_leader->signal->leader = 1; + __set_special_pids(group_leader->pid, group_leader->pid); + group_leader->signal->tty = NULL; + group_leader->signal->tty_old_pgrp = 0; + err = process_group(group_leader); out: write_unlock_irq(&tasklist_lock); up(&tty_sem); @@ -1687,7 +1692,10 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) if (unlikely(!p->signal)) return; + utime = stime = cputime_zero; + switch (who) { + case RUSAGE_BOTH: case RUSAGE_CHILDREN: spin_lock_irqsave(&p->sighand->siglock, flags); utime = p->signal->cutime; @@ -1697,22 +1705,11 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_minflt = p->signal->cmin_flt; r->ru_majflt = p->signal->cmaj_flt; spin_unlock_irqrestore(&p->sighand->siglock, flags); - cputime_to_timeval(utime, &r->ru_utime); - cputime_to_timeval(stime, &r->ru_stime); - break; + + if (who == RUSAGE_CHILDREN) + break; + case RUSAGE_SELF: - spin_lock_irqsave(&p->sighand->siglock, flags); - utime = stime = cputime_zero; - goto sum_group; - case RUSAGE_BOTH: - spin_lock_irqsave(&p->sighand->siglock, flags); - utime = p->signal->cutime; - stime = p->signal->cstime; - r->ru_nvcsw = p->signal->cnvcsw; - r->ru_nivcsw = p->signal->cnivcsw; - r->ru_minflt = p->signal->cmin_flt; - r->ru_majflt = p->signal->cmaj_flt; - sum_group: utime = cputime_add(utime, p->signal->utime); stime = cputime_add(stime, p->signal->stime); r->ru_nvcsw += p->signal->nvcsw; @@ -1729,13 +1726,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_majflt += t->maj_flt; t = next_thread(t); } while (t != p); - spin_unlock_irqrestore(&p->sighand->siglock, flags); - cputime_to_timeval(utime, &r->ru_utime); - cputime_to_timeval(stime, &r->ru_stime); break; + default: BUG(); } + + cputime_to_timeval(utime, &r->ru_utime); + cputime_to_timeval(stime, &r->ru_stime); } int getrusage(struct task_struct *p, int who, struct rusage __user *ru) diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1ab2370e2efa..bd3b9bfcfcec 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -82,6 +82,28 @@ cond_syscall(compat_sys_socketcall); cond_syscall(sys_inotify_init); cond_syscall(sys_inotify_add_watch); cond_syscall(sys_inotify_rm_watch); +cond_syscall(sys_migrate_pages); +cond_syscall(sys_chown16); +cond_syscall(sys_fchown16); +cond_syscall(sys_getegid16); +cond_syscall(sys_geteuid16); +cond_syscall(sys_getgid16); +cond_syscall(sys_getgroups16); +cond_syscall(sys_getresgid16); +cond_syscall(sys_getresuid16); +cond_syscall(sys_getuid16); +cond_syscall(sys_lchown16); +cond_syscall(sys_setfsgid16); +cond_syscall(sys_setfsuid16); +cond_syscall(sys_setgid16); +cond_syscall(sys_setgroups16); +cond_syscall(sys_setregid16); +cond_syscall(sys_setresgid16); +cond_syscall(sys_setresuid16); +cond_syscall(sys_setreuid16); +cond_syscall(sys_setuid16); +cond_syscall(sys_vm86old); +cond_syscall(sys_vm86); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a85047bb5739..03b0598f2369 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -68,6 +68,8 @@ extern int min_free_kbytes; extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; +extern int sysctl_drop_caches; +extern int percpu_pagelist_fraction; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; @@ -78,6 +80,7 @@ extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *, /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; +static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; @@ -775,6 +778,15 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, }, { + .ctl_name = VM_DROP_PAGECACHE, + .procname = "drop_caches", + .data = &sysctl_drop_caches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = drop_caches_sysctl_handler, + .strategy = &sysctl_intvec, + }, + { .ctl_name = VM_MIN_FREE_KBYTES, .procname = "min_free_kbytes", .data = &min_free_kbytes, @@ -784,6 +796,16 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, .extra1 = &zero, }, + { + .ctl_name = VM_PERCPU_PAGELIST_FRACTION, + .procname = "percpu_pagelist_fraction", + .data = &percpu_pagelist_fraction, + .maxlen = sizeof(percpu_pagelist_fraction), + .mode = 0644, + .proc_handler = &percpu_pagelist_fraction_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &min_percpu_pagelist_fract, + }, #ifdef CONFIG_MMU { .ctl_name = VM_MAX_MAP_COUNT, diff --git a/kernel/timer.c b/kernel/timer.c index fd74268d8663..074b4bd5cfd8 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -33,6 +33,7 @@ #include <linux/posix-timers.h> #include <linux/cpu.h> #include <linux/syscalls.h> +#include <linux/delay.h> #include <asm/uaccess.h> #include <asm/unistd.h> diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2bd5aee1c736..82c4fa70595c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -29,7 +29,8 @@ #include <linux/kthread.h> /* - * The per-CPU workqueue (if single thread, we always use cpu 0's). + * The per-CPU workqueue (if single thread, we always use the first + * possible cpu). * * The sequence counters are for flush_scheduled_work(). It wants to wait * until until all currently-scheduled works are completed, but it doesn't @@ -69,6 +70,8 @@ struct workqueue_struct { static DEFINE_SPINLOCK(workqueue_lock); static LIST_HEAD(workqueues); +static int singlethread_cpu; + /* If it's single threaded, it isn't in the list of workqueues. */ static inline int is_single_threaded(struct workqueue_struct *wq) { @@ -102,7 +105,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) if (!test_and_set_bit(0, &work->pending)) { if (unlikely(is_single_threaded(wq))) - cpu = any_online_cpu(cpu_online_map); + cpu = singlethread_cpu; BUG_ON(!list_empty(&work->entry)); __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); ret = 1; @@ -118,7 +121,7 @@ static void delayed_work_timer_fn(unsigned long __data) int cpu = smp_processor_id(); if (unlikely(is_single_threaded(wq))) - cpu = any_online_cpu(cpu_online_map); + cpu = singlethread_cpu; __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); } @@ -267,7 +270,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) if (is_single_threaded(wq)) { /* Always use first cpu's area. */ - flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, any_online_cpu(cpu_online_map))); + flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu)); } else { int cpu; @@ -315,12 +318,17 @@ struct workqueue_struct *__create_workqueue(const char *name, return NULL; wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct); + if (!wq->cpu_wq) { + kfree(wq); + return NULL; + } + wq->name = name; /* We don't need the distraction of CPUs appearing and vanishing. */ lock_cpu_hotplug(); if (singlethread) { INIT_LIST_HEAD(&wq->list); - p = create_workqueue_thread(wq, any_online_cpu(cpu_online_map)); + p = create_workqueue_thread(wq, singlethread_cpu); if (!p) destroy = 1; else @@ -374,7 +382,7 @@ void destroy_workqueue(struct workqueue_struct *wq) /* We don't need the distraction of CPUs appearing and vanishing. */ lock_cpu_hotplug(); if (is_single_threaded(wq)) - cleanup_workqueue_thread(wq, any_online_cpu(cpu_online_map)); + cleanup_workqueue_thread(wq, singlethread_cpu); else { for_each_online_cpu(cpu) cleanup_workqueue_thread(wq, cpu); @@ -419,6 +427,25 @@ int schedule_delayed_work_on(int cpu, return ret; } +int schedule_on_each_cpu(void (*func) (void *info), void *info) +{ + int cpu; + struct work_struct *work; + + work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL); + + if (!work) + return -ENOMEM; + for_each_online_cpu(cpu) { + INIT_WORK(work + cpu, func, info); + __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), + work + cpu); + } + flush_workqueue(keventd_wq); + kfree(work); + return 0; +} + void flush_scheduled_work(void) { flush_workqueue(keventd_wq); @@ -543,6 +570,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, void init_workqueues(void) { + singlethread_cpu = first_cpu(cpu_possible_map); hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 80598cfd728c..c48260fb8fd9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -79,7 +79,7 @@ config SCHEDSTATS config DEBUG_SLAB bool "Debug memory allocations" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && SLAB help Say Y here to have the kernel do limited verification on memory allocation as well as poisoning memory on free to catch use of freed diff --git a/lib/bitmap.c b/lib/bitmap.c index 23d3b1147fe9..48e708381d44 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -519,7 +519,7 @@ EXPORT_SYMBOL(bitmap_parselist); * * Map the bit at position @pos in @buf (of length @bits) to the * ordinal of which set bit it is. If it is not set or if @pos - * is not a valid bit position, map to zero (0). + * is not a valid bit position, map to -1. * * If for example, just bits 4 through 7 are set in @buf, then @pos * values 4 through 7 will get mapped to 0 through 3, respectively, @@ -531,18 +531,19 @@ EXPORT_SYMBOL(bitmap_parselist); */ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) { - int ord = 0; + int i, ord; - if (pos >= 0 && pos < bits) { - int i; + if (pos < 0 || pos >= bits || !test_bit(pos, buf)) + return -1; - for (i = find_first_bit(buf, bits); - i < pos; - i = find_next_bit(buf, bits, i + 1)) - ord++; - if (i > pos) - ord = 0; + i = find_first_bit(buf, bits); + ord = 0; + while (i < pos) { + i = find_next_bit(buf, bits, i + 1); + ord++; } + BUG_ON(i != pos); + return ord; } @@ -553,11 +554,12 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) * @bits: number of valid bit positions in @buf * * Map the ordinal offset of bit @ord in @buf to its position in @buf. - * If @ord is not the ordinal offset of a set bit in @buf, map to zero (0). + * Value of @ord should be in range 0 <= @ord < weight(buf), else + * results are undefined. * * If for example, just bits 4 through 7 are set in @buf, then @ord * values 0 through 3 will get mapped to 4 through 7, respectively, - * and all other @ord valuds will get mapped to 0. When @ord value 3 + * and all other @ord values return undefined values. When @ord value 3 * gets mapped to (returns) @pos value 7 in this example, that means * that the 3rd set bit (starting with 0th) is at position 7 in @buf. * @@ -583,8 +585,8 @@ static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) /** * bitmap_remap - Apply map defined by a pair of bitmaps to another bitmap - * @src: subset to be remapped * @dst: remapped result + * @src: subset to be remapped * @old: defines domain of map * @new: defines range of map * @bits: number of bits in each of these bitmaps @@ -596,49 +598,42 @@ static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) * weight of @old, map the position of the n-th set bit in @old to * the position of the m-th set bit in @new, where m == n % w. * - * If either of the @old and @new bitmaps are empty, or if@src and @dst - * point to the same location, then this routine does nothing. + * If either of the @old and @new bitmaps are empty, or if @src and + * @dst point to the same location, then this routine copies @src + * to @dst. * - * The positions of unset bits in @old are mapped to the position of - * the first set bit in @new. + * The positions of unset bits in @old are mapped to themselves + * (the identify map). * * Apply the above specified mapping to @src, placing the result in * @dst, clearing any bits previously set in @dst. * - * The resulting value of @dst will have either the same weight as - * @src, or less weight in the general case that the mapping wasn't - * injective due to the weight of @new being less than that of @old. - * The resulting value of @dst will never have greater weight than - * that of @src, except perhaps in the case that one of the above - * conditions was not met and this routine just returned. - * * For example, lets say that @old has bits 4 through 7 set, and * @new has bits 12 through 15 set. This defines the mapping of bit * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other - * bit positions to 12 (the first set bit in @new. So if say @src - * comes into this routine with bits 1, 5 and 7 set, then @dst should - * leave with bits 12, 13 and 15 set. + * bit positions unchanged. So if say @src comes into this routine + * with bits 1, 5 and 7 set, then @dst should leave with bits 1, + * 13 and 15 set. */ void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, int bits) { - int s; + int oldbit, w; - if (bitmap_weight(old, bits) == 0) - return; - if (bitmap_weight(new, bits) == 0) - return; if (dst == src) /* following doesn't handle inplace remaps */ return; - bitmap_zero(dst, bits); - for (s = find_first_bit(src, bits); - s < bits; - s = find_next_bit(src, bits, s + 1)) { - int x = bitmap_pos_to_ord(old, s, bits); - int y = bitmap_ord_to_pos(new, x, bits); - set_bit(y, dst); + + w = bitmap_weight(new, bits); + for (oldbit = find_first_bit(src, bits); + oldbit < bits; + oldbit = find_next_bit(src, bits, oldbit + 1)) { + int n = bitmap_pos_to_ord(old, oldbit, bits); + if (n < 0 || w == 0) + set_bit(oldbit, dst); /* identity map */ + else + set_bit(bitmap_ord_to_pos(new, n % w, bits), dst); } } EXPORT_SYMBOL(bitmap_remap); @@ -657,8 +652,8 @@ EXPORT_SYMBOL(bitmap_remap); * weight of @old, map the position of the n-th set bit in @old to * the position of the m-th set bit in @new, where m == n % w. * - * The positions of unset bits in @old are mapped to the position of - * the first set bit in @new. + * The positions of unset bits in @old are mapped to themselves + * (the identify map). * * Apply the above specified mapping to bit position @oldbit, returning * the new bit position. @@ -666,14 +661,18 @@ EXPORT_SYMBOL(bitmap_remap); * For example, lets say that @old has bits 4 through 7 set, and * @new has bits 12 through 15 set. This defines the mapping of bit * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other - * bit positions to 12 (the first set bit in @new. So if say @oldbit - * is 5, then this routine returns 13. + * bit positions unchanged. So if say @oldbit is 5, then this routine + * returns 13. */ int bitmap_bitremap(int oldbit, const unsigned long *old, const unsigned long *new, int bits) { - int x = bitmap_pos_to_ord(old, oldbit, bits); - return bitmap_ord_to_pos(new, x, bits); + int w = bitmap_weight(new, bits); + int n = bitmap_pos_to_ord(old, oldbit, bits); + if (n < 0 || w == 0) + return oldbit; + else + return bitmap_ord_to_pos(new, n % w, bits); } EXPORT_SYMBOL(bitmap_bitremap); diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 305a9663aee3..a65c31455541 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -1,47 +1,11 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <asm/atomic.h> -#include <asm/system.h> -#ifdef __HAVE_ARCH_CMPXCHG /* * This is an implementation of the notion of "decrement a * reference count, and return locked if it decremented to zero". * - * This implementation can be used on any architecture that - * has a cmpxchg, and where atomic->value is an int holding - * the value of the atomic (i.e. the high bits aren't used - * for a lock or anything like that). - */ -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) -{ - int counter; - int newcount; - - for (;;) { - counter = atomic_read(atomic); - newcount = counter - 1; - if (!newcount) - break; /* do it the slow way */ - - newcount = cmpxchg(&atomic->counter, counter, newcount); - if (newcount == counter) - return 0; - } - - spin_lock(lock); - if (atomic_dec_and_test(atomic)) - return 1; - spin_unlock(lock); - return 0; -} -#else -/* - * This is an architecture-neutral, but slow, - * implementation of the notion of "decrement - * a reference count, and return locked if it - * decremented to zero". - * * NOTE NOTE NOTE! This is _not_ equivalent to * * if (atomic_dec_and_test(&atomic)) { @@ -52,21 +16,20 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) * * because the spin-lock and the decrement must be * "atomic". - * - * This slow version gets the spinlock unconditionally, - * and releases it if it isn't needed. Architectures - * are encouraged to come up with better approaches, - * this is trivially done efficiently using a load-locked - * store-conditional approach, for example. */ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { +#ifdef CONFIG_SMP + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; +#endif + /* Otherwise do it the slow way */ spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; spin_unlock(lock); return 0; } -#endif EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index d08302d2a42c..c05b4b19cf6c 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -10,6 +10,7 @@ */ #include <linux/bitops.h> +#include <linux/module.h> int find_next_bit(const unsigned long *addr, int size, int offset) { @@ -53,3 +54,5 @@ int find_next_bit(const unsigned long *addr, int size, int offset) return offset; } + +EXPORT_SYMBOL(find_next_bit); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 88511c3805ad..c0bd4a914803 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -137,18 +137,31 @@ out: static inline void tag_set(struct radix_tree_node *node, int tag, int offset) { - if (!test_bit(offset, &node->tags[tag][0])) - __set_bit(offset, &node->tags[tag][0]); + __set_bit(offset, node->tags[tag]); } static inline void tag_clear(struct radix_tree_node *node, int tag, int offset) { - __clear_bit(offset, &node->tags[tag][0]); + __clear_bit(offset, node->tags[tag]); } static inline int tag_get(struct radix_tree_node *node, int tag, int offset) { - return test_bit(offset, &node->tags[tag][0]); + return test_bit(offset, node->tags[tag]); +} + +/* + * Returns 1 if any slot in the node has this tag set. + * Otherwise returns 0. + */ +static inline int any_tag_set(struct radix_tree_node *node, int tag) +{ + int idx; + for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { + if (node->tags[tag][idx]) + return 1; + } + return 0; } /* @@ -185,15 +198,9 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) * into the newly-pushed top-level node(s) */ for (tag = 0; tag < RADIX_TREE_TAGS; tag++) { - int idx; - tags[tag] = 0; - for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (root->rnode->tags[tag][idx]) { - tags[tag] = 1; - break; - } - } + if (any_tag_set(root->rnode, tag)) + tags[tag] = 1; } do { @@ -246,7 +253,7 @@ int radix_tree_insert(struct radix_tree_root *root, shift = (height-1) * RADIX_TREE_MAP_SHIFT; offset = 0; /* uninitialised var warning */ - while (height > 0) { + do { if (slot == NULL) { /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) @@ -264,18 +271,16 @@ int radix_tree_insert(struct radix_tree_root *root, slot = node->slots[offset]; shift -= RADIX_TREE_MAP_SHIFT; height--; - } + } while (height > 0); if (slot != NULL) return -EEXIST; - if (node) { - node->count++; - node->slots[offset] = item; - BUG_ON(tag_get(node, 0, offset)); - BUG_ON(tag_get(node, 1, offset)); - } else - root->rnode = item; + BUG_ON(!node); + node->count++; + node->slots[offset] = item; + BUG_ON(tag_get(node, 0, offset)); + BUG_ON(tag_get(node, 1, offset)); return 0; } @@ -367,7 +372,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root, int offset; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - tag_set(slot, tag, offset); + if (!tag_get(slot, tag, offset)) + tag_set(slot, tag, offset); slot = slot->slots[offset]; BUG_ON(slot == NULL); shift -= RADIX_TREE_MAP_SHIFT; @@ -427,13 +433,11 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, goto out; do { - int idx; - + if (!tag_get(pathp->node, tag, pathp->offset)) + goto out; tag_clear(pathp->node, tag, pathp->offset); - for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (pathp->node->tags[tag][idx]) - goto out; - } + if (any_tag_set(pathp->node, tag)) + goto out; pathp--; } while (pathp->node); out: @@ -674,6 +678,29 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, EXPORT_SYMBOL(radix_tree_gang_lookup_tag); /** + * radix_tree_shrink - shrink height of a radix tree to minimal + * @root radix tree root + */ +static inline void radix_tree_shrink(struct radix_tree_root *root) +{ + /* try to shrink tree height */ + while (root->height > 1 && + root->rnode->count == 1 && + root->rnode->slots[0]) { + struct radix_tree_node *to_free = root->rnode; + + root->rnode = to_free->slots[0]; + root->height--; + /* must only free zeroed nodes into the slab */ + tag_clear(to_free, 0, 0); + tag_clear(to_free, 1, 0); + to_free->slots[0] = NULL; + to_free->count = 0; + radix_tree_node_free(to_free); + } +} + +/** * radix_tree_delete - delete an item from a radix tree * @root: radix tree root * @index: index key @@ -691,6 +718,8 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) void *ret = NULL; char tags[RADIX_TREE_TAGS]; int nr_cleared_tags; + int tag; + int offset; height = root->height; if (index > radix_tree_maxindex(height)) @@ -701,16 +730,14 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) slot = root->rnode; for ( ; height > 0; height--) { - int offset; - if (slot == NULL) goto out; + pathp++; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp[1].offset = offset; - pathp[1].node = slot; + pathp->offset = offset; + pathp->node = slot; slot = slot->slots[offset]; - pathp++; shift -= RADIX_TREE_MAP_SHIFT; } @@ -723,35 +750,39 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) /* * Clear all tags associated with the just-deleted item */ - memset(tags, 0, sizeof(tags)); - do { - int tag; + nr_cleared_tags = 0; + for (tag = 0; tag < RADIX_TREE_TAGS; tag++) { + if (tag_get(pathp->node, tag, pathp->offset)) { + tag_clear(pathp->node, tag, pathp->offset); + tags[tag] = 0; + nr_cleared_tags++; + } else + tags[tag] = 1; + } - nr_cleared_tags = RADIX_TREE_TAGS; + for (pathp--; nr_cleared_tags && pathp->node; pathp--) { for (tag = 0; tag < RADIX_TREE_TAGS; tag++) { - int idx; - if (tags[tag]) continue; tag_clear(pathp->node, tag, pathp->offset); - - for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (pathp->node->tags[tag][idx]) { - tags[tag] = 1; - nr_cleared_tags--; - break; - } + if (any_tag_set(pathp->node, tag)) { + tags[tag] = 1; + nr_cleared_tags--; } } - pathp--; - } while (pathp->node && nr_cleared_tags); + } /* Now free the nodes we do not need anymore */ for (pathp = orig_pathp; pathp->node; pathp--) { pathp->node->slots[pathp->offset] = NULL; - if (--pathp->node->count) + pathp->node->count--; + + if (pathp->node->count) { + if (pathp->node == root->rnode) + radix_tree_shrink(root); goto out; + } /* Node with zero slots in use so free it */ radix_tree_node_free(pathp->node); @@ -770,15 +801,11 @@ EXPORT_SYMBOL(radix_tree_delete); */ int radix_tree_tagged(struct radix_tree_root *root, int tag) { - int idx; - - if (!root->rnode) - return 0; - for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (root->rnode->tags[tag][idx]) - return 1; - } - return 0; + struct radix_tree_node *rnode; + rnode = root->rnode; + if (!rnode) + return 0; + return any_tag_set(rnode, tag); } EXPORT_SYMBOL(radix_tree_tagged); diff --git a/mm/Kconfig b/mm/Kconfig index b3db11f137e0..a9cb80ae6409 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -132,3 +132,10 @@ config SPLIT_PTLOCK_CPUS default "4096" if ARM && !CPU_CACHE_VIPT default "4096" if PARISC && !PA20 default "4" + +# +# support for page migration +# +config MIGRATION + def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM + depends on SWAP diff --git a/mm/Makefile b/mm/Makefile index 2fa6d2ca9f28..9aa03fa1dcc3 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -9,8 +9,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ page_alloc.o page-writeback.o pdflush.o \ - readahead.o slab.o swap.o truncate.o vmscan.o \ - prio_tree.o $(mmu-y) + readahead.o swap.o truncate.o vmscan.o \ + prio_tree.o util.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o @@ -18,5 +18,7 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o +obj-$(CONFIG_SLOB) += slob.o +obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/fadvise.c b/mm/fadvise.c index 5f19e87bc5af..d257c89e7704 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -37,6 +37,11 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) if (!file) return -EBADF; + if (S_ISFIFO(file->f_dentry->d_inode->i_mode)) { + ret = -ESPIPE; + goto out; + } + mapping = file->f_mapping; if (!mapping || len < 0) { ret = -EINVAL; diff --git a/mm/filemap.c b/mm/filemap.c index 4ef24a397684..478f4c74cc31 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -280,7 +280,7 @@ static int wait_on_page_writeback_range(struct address_space *mapping, * it is otherwise livelockable. */ int sync_page_range(struct inode *inode, struct address_space *mapping, - loff_t pos, size_t count) + loff_t pos, loff_t count) { pgoff_t start = pos >> PAGE_CACHE_SHIFT; pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; @@ -305,9 +305,8 @@ EXPORT_SYMBOL(sync_page_range); * as it forces O_SYNC writers to different parts of the same file * to be serialised right until io completion. */ -static int sync_page_range_nolock(struct inode *inode, - struct address_space *mapping, - loff_t pos, size_t count) +int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, + loff_t pos, loff_t count) { pgoff_t start = pos >> PAGE_CACHE_SHIFT; pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; @@ -322,6 +321,7 @@ static int sync_page_range_nolock(struct inode *inode, ret = wait_on_page_writeback_range(mapping, start, end); return ret; } +EXPORT_SYMBOL(sync_page_range_nolock); /** * filemap_fdatawait - walk the list of under-writeback pages of the given @@ -343,30 +343,44 @@ EXPORT_SYMBOL(filemap_fdatawait); int filemap_write_and_wait(struct address_space *mapping) { - int retval = 0; + int err = 0; if (mapping->nrpages) { - retval = filemap_fdatawrite(mapping); - if (retval == 0) - retval = filemap_fdatawait(mapping); + err = filemap_fdatawrite(mapping); + /* + * Even if the above returned error, the pages may be + * written partially (e.g. -ENOSPC), so we wait for it. + * But the -EIO is special case, it may indicate the worst + * thing (e.g. bug) happened, so we avoid waiting for it. + */ + if (err != -EIO) { + int err2 = filemap_fdatawait(mapping); + if (!err) + err = err2; + } } - return retval; + return err; } +EXPORT_SYMBOL(filemap_write_and_wait); int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend) { - int retval = 0; + int err = 0; if (mapping->nrpages) { - retval = __filemap_fdatawrite_range(mapping, lstart, lend, - WB_SYNC_ALL); - if (retval == 0) - retval = wait_on_page_writeback_range(mapping, - lstart >> PAGE_CACHE_SHIFT, - lend >> PAGE_CACHE_SHIFT); + err = __filemap_fdatawrite_range(mapping, lstart, lend, + WB_SYNC_ALL); + /* See comment of filemap_write_and_wait() */ + if (err != -EIO) { + int err2 = wait_on_page_writeback_range(mapping, + lstart >> PAGE_CACHE_SHIFT, + lend >> PAGE_CACHE_SHIFT); + if (!err) + err = err2; + } } - return retval; + return err; } /* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f4c43d7980ba..b21d78c941b5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -12,6 +12,7 @@ #include <linux/nodemask.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> +#include <linux/cpuset.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -48,7 +49,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, for (z = zonelist->zones; *z; z++) { nid = (*z)->zone_pgdat->node_id; - if (!list_empty(&hugepage_freelists[nid])) + if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && + !list_empty(&hugepage_freelists[nid])) break; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0f1d2b8a952b..1850d0aef4ac 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -83,9 +83,18 @@ #include <linux/init.h> #include <linux/compat.h> #include <linux/mempolicy.h> +#include <linux/swap.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> + #include <asm/tlbflush.h> #include <asm/uaccess.h> +/* Internal flags */ +#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ +#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ +#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ + static kmem_cache_t *policy_cache; static kmem_cache_t *sn_cache; @@ -171,12 +180,19 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) break; } policy->policy = mode; + policy->cpuset_mems_allowed = cpuset_mems_allowed(current); return policy; } -/* Ensure all existing pages follow the policy. */ +static void gather_stats(struct page *, void *); +static void migrate_page_add(struct vm_area_struct *vma, + struct page *page, struct list_head *pagelist, unsigned long flags); + +/* Scan through pages checking if pages follow certain conditions. */ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + void *private) { pte_t *orig_pte; pte_t *pte; @@ -193,7 +209,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (!page) continue; nid = page_to_nid(page); - if (!node_isset(nid, *nodes)) + if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) + continue; + + if (flags & MPOL_MF_STATS) + gather_stats(page, private); + else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + spin_unlock(ptl); + migrate_page_add(vma, page, private, flags); + spin_lock(ptl); + } + else break; } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); @@ -201,7 +227,9 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + void *private) { pmd_t *pmd; unsigned long next; @@ -211,14 +239,17 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; - if (check_pte_range(vma, pmd, addr, next, nodes)) + if (check_pte_range(vma, pmd, addr, next, nodes, + flags, private)) return -EIO; } while (pmd++, addr = next, addr != end); return 0; } static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + void *private) { pud_t *pud; unsigned long next; @@ -228,14 +259,17 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - if (check_pmd_range(vma, pud, addr, next, nodes)) + if (check_pmd_range(vma, pud, addr, next, nodes, + flags, private)) return -EIO; } while (pud++, addr = next, addr != end); return 0; } static inline int check_pgd_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + void *private) { pgd_t *pgd; unsigned long next; @@ -245,16 +279,30 @@ static inline int check_pgd_range(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - if (check_pud_range(vma, pgd, addr, next, nodes)) + if (check_pud_range(vma, pgd, addr, next, nodes, + flags, private)) return -EIO; } while (pgd++, addr = next, addr != end); return 0; } -/* Step 1: check the range */ +/* Check if a vma is migratable */ +static inline int vma_migratable(struct vm_area_struct *vma) +{ + if (vma->vm_flags & ( + VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP)) + return 0; + return 1; +} + +/* + * Check if all pages in a range are on a set of nodes. + * If pagelist != NULL then isolate pages from the LRU and + * put them on the pagelist. + */ static struct vm_area_struct * check_range(struct mm_struct *mm, unsigned long start, unsigned long end, - nodemask_t *nodes, unsigned long flags) + const nodemask_t *nodes, unsigned long flags, void *private) { int err; struct vm_area_struct *first, *vma, *prev; @@ -264,17 +312,24 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return ERR_PTR(-EFAULT); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { - if (!vma->vm_next && vma->vm_end < end) - return ERR_PTR(-EFAULT); - if (prev && prev->vm_end < vma->vm_start) - return ERR_PTR(-EFAULT); - if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { + if (!(flags & MPOL_MF_DISCONTIG_OK)) { + if (!vma->vm_next && vma->vm_end < end) + return ERR_PTR(-EFAULT); + if (prev && prev->vm_end < vma->vm_start) + return ERR_PTR(-EFAULT); + } + if (!is_vm_hugetlb_page(vma) && + ((flags & MPOL_MF_STRICT) || + ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && + vma_migratable(vma)))) { unsigned long endvma = vma->vm_end; + if (endvma > end) endvma = end; if (vma->vm_start > start) start = vma->vm_start; - err = check_pgd_range(vma, start, endvma, nodes); + err = check_pgd_range(vma, start, endvma, nodes, + flags, private); if (err) { first = ERR_PTR(err); break; @@ -333,51 +388,10 @@ static int contextualize_policy(int mode, nodemask_t *nodes) if (!nodes) return 0; - /* Update current mems_allowed */ - cpuset_update_current_mems_allowed(); - /* Ignore nodes not set in current->mems_allowed */ - cpuset_restrict_to_mems_allowed(nodes->bits); - return mpol_check_policy(mode, nodes); -} - -long do_mbind(unsigned long start, unsigned long len, - unsigned long mode, nodemask_t *nmask, unsigned long flags) -{ - struct vm_area_struct *vma; - struct mm_struct *mm = current->mm; - struct mempolicy *new; - unsigned long end; - int err; - - if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX) - return -EINVAL; - if (start & ~PAGE_MASK) - return -EINVAL; - if (mode == MPOL_DEFAULT) - flags &= ~MPOL_MF_STRICT; - len = (len + PAGE_SIZE - 1) & PAGE_MASK; - end = start + len; - if (end < start) + cpuset_update_task_memory_state(); + if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) return -EINVAL; - if (end == start) - return 0; - if (mpol_check_policy(mode, nmask)) - return -EINVAL; - new = mpol_new(mode, nmask); - if (IS_ERR(new)) - return PTR_ERR(new); - - PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, - mode,nodes_addr(nodes)[0]); - - down_write(&mm->mmap_sem); - vma = check_range(mm, start, end, nmask, flags); - err = PTR_ERR(vma); - if (!IS_ERR(vma)) - err = mbind_range(vma, start, end, new); - up_write(&mm->mmap_sem); - mpol_free(new); - return err; + return mpol_check_policy(mode, nodes); } /* Set the process memory policy */ @@ -448,7 +462,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, struct vm_area_struct *vma = NULL; struct mempolicy *pol = current->mempolicy; - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; if (flags & MPOL_F_ADDR) { @@ -500,11 +514,177 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, } /* + * page migration + */ + +/* Check if we are the only process mapping the page in question */ +static inline int single_mm_mapping(struct mm_struct *mm, + struct address_space *mapping) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + int rc = 1; + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) + if (mm != vma->vm_mm) { + rc = 0; + goto out; + } + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) + if (mm != vma->vm_mm) { + rc = 0; + goto out; + } +out: + spin_unlock(&mapping->i_mmap_lock); + return rc; +} + +/* + * Add a page to be migrated to the pagelist + */ +static void migrate_page_add(struct vm_area_struct *vma, + struct page *page, struct list_head *pagelist, unsigned long flags) +{ + /* + * Avoid migrating a page that is shared by others and not writable. + */ + if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || + mapping_writably_mapped(page->mapping) || + single_mm_mapping(vma->vm_mm, page->mapping)) { + int rc = isolate_lru_page(page); + + if (rc == 1) + list_add(&page->lru, pagelist); + /* + * If the isolate attempt was not successful then we just + * encountered an unswappable page. Something must be wrong. + */ + WARN_ON(rc == 0); + } +} + +static int swap_pages(struct list_head *pagelist) +{ + LIST_HEAD(moved); + LIST_HEAD(failed); + int n; + + n = migrate_pages(pagelist, NULL, &moved, &failed); + putback_lru_pages(&failed); + putback_lru_pages(&moved); + + return n; +} + +/* + * For now migrate_pages simply swaps out the pages from nodes that are in + * the source set but not in the target set. In the future, we would + * want a function that moves pages between the two nodesets in such + * a way as to preserve the physical layout as much as possible. + * + * Returns the number of page that could not be moved. + */ +int do_migrate_pages(struct mm_struct *mm, + const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) +{ + LIST_HEAD(pagelist); + int count = 0; + nodemask_t nodes; + + nodes_andnot(nodes, *from_nodes, *to_nodes); + + down_read(&mm->mmap_sem); + check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, + flags | MPOL_MF_DISCONTIG_OK, &pagelist); + + if (!list_empty(&pagelist)) { + count = swap_pages(&pagelist); + putback_lru_pages(&pagelist); + } + + up_read(&mm->mmap_sem); + return count; +} + +long do_mbind(unsigned long start, unsigned long len, + unsigned long mode, nodemask_t *nmask, unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + struct mempolicy *new; + unsigned long end; + int err; + LIST_HEAD(pagelist); + + if ((flags & ~(unsigned long)(MPOL_MF_STRICT | + MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + || mode > MPOL_MAX) + return -EINVAL; + if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + if (start & ~PAGE_MASK) + return -EINVAL; + + if (mode == MPOL_DEFAULT) + flags &= ~MPOL_MF_STRICT; + + len = (len + PAGE_SIZE - 1) & PAGE_MASK; + end = start + len; + + if (end < start) + return -EINVAL; + if (end == start) + return 0; + + if (mpol_check_policy(mode, nmask)) + return -EINVAL; + + new = mpol_new(mode, nmask); + if (IS_ERR(new)) + return PTR_ERR(new); + + /* + * If we are using the default policy then operation + * on discontinuous address spaces is okay after all + */ + if (!new) + flags |= MPOL_MF_DISCONTIG_OK; + + PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, + mode,nodes_addr(nodes)[0]); + + down_write(&mm->mmap_sem); + vma = check_range(mm, start, end, nmask, + flags | MPOL_MF_INVERT, &pagelist); + + err = PTR_ERR(vma); + if (!IS_ERR(vma)) { + int nr_failed = 0; + + err = mbind_range(vma, start, end, new); + if (!list_empty(&pagelist)) + nr_failed = swap_pages(&pagelist); + + if (!err && nr_failed && (flags & MPOL_MF_STRICT)) + err = -EIO; + } + if (!list_empty(&pagelist)) + putback_lru_pages(&pagelist); + + up_write(&mm->mmap_sem); + mpol_free(new); + return err; +} + +/* * User space interface with variable sized bitmaps for nodelists. */ /* Copy a node mask from user space. */ -static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask, +static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long maxnode) { unsigned long k; @@ -593,6 +773,65 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, return do_set_mempolicy(mode, &nodes); } +asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *old_nodes, + const unsigned long __user *new_nodes) +{ + struct mm_struct *mm; + struct task_struct *task; + nodemask_t old; + nodemask_t new; + nodemask_t task_nodes; + int err; + + err = get_nodes(&old, old_nodes, maxnode); + if (err) + return err; + + err = get_nodes(&new, new_nodes, maxnode); + if (err) + return err; + + /* Find the mm_struct */ + read_lock(&tasklist_lock); + task = pid ? find_task_by_pid(pid) : current; + if (!task) { + read_unlock(&tasklist_lock); + return -ESRCH; + } + mm = get_task_mm(task); + read_unlock(&tasklist_lock); + + if (!mm) + return -EINVAL; + + /* + * Check if this process has the right to modify the specified + * process. The right exists if the process has administrative + * capabilities, superuser priviledges or the same + * userid as the target process. + */ + if ((current->euid != task->suid) && (current->euid != task->uid) && + (current->uid != task->suid) && (current->uid != task->uid) && + !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; + } + + task_nodes = cpuset_mems_allowed(task); + /* Is the user allowed to access the target nodes? */ + if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; + } + + err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE); +out: + mmput(mm); + return err; +} + + /* Retrieve NUMA policy */ asmlinkage long sys_get_mempolicy(int __user *policy, unsigned long __user *nmask, @@ -699,8 +938,8 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, #endif /* Return effective policy for a VMA */ -struct mempolicy * -get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) +static struct mempolicy * get_vma_policy(struct task_struct *task, + struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = task->mempolicy; @@ -848,7 +1087,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; @@ -874,7 +1113,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) * interrupt context and apply the current process NUMA policy. * Returns NULL when no page can be allocated. * - * Don't call cpuset_update_current_mems_allowed() unless + * Don't call cpuset_update_task_memory_state() unless * 1) it's ok to take cpuset_sem (can WAIT), and * 2) allocating for current task (not interrupt). */ @@ -883,7 +1122,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) struct mempolicy *pol = current->mempolicy; if ((gfp & __GFP_WAIT) && !in_interrupt()) - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (!pol || in_interrupt()) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE) @@ -892,6 +1131,15 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages_current); +/* + * If mpol_copy() sees current->cpuset == cpuset_being_rebound, then it + * rebinds the mempolicy its copying by calling mpol_rebind_policy() + * with the mems_allowed returned by cpuset_mems_allowed(). This + * keeps mempolicies cpuset relative after its cpuset moves. See + * further kernel/cpuset.c update_nodemask(). + */ +void *cpuset_being_rebound; + /* Slow path of a mempolicy copy */ struct mempolicy *__mpol_copy(struct mempolicy *old) { @@ -899,6 +1147,10 @@ struct mempolicy *__mpol_copy(struct mempolicy *old) if (!new) return ERR_PTR(-ENOMEM); + if (current_cpuset_is_being_rebound()) { + nodemask_t mems = cpuset_mems_allowed(current); + mpol_rebind_policy(old, &mems); + } *new = *old; atomic_set(&new->refcnt, 1); if (new->policy == MPOL_BIND) { @@ -1173,25 +1425,31 @@ void numa_default_policy(void) } /* Migrate a policy to a different set of nodes */ -static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, - const nodemask_t *new) +void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { + nodemask_t *mpolmask; nodemask_t tmp; if (!pol) return; + mpolmask = &pol->cpuset_mems_allowed; + if (nodes_equal(*mpolmask, *newmask)) + return; switch (pol->policy) { case MPOL_DEFAULT: break; case MPOL_INTERLEAVE: - nodes_remap(tmp, pol->v.nodes, *old, *new); + nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask); pol->v.nodes = tmp; - current->il_next = node_remap(current->il_next, *old, *new); + *mpolmask = *newmask; + current->il_next = node_remap(current->il_next, + *mpolmask, *newmask); break; case MPOL_PREFERRED: pol->v.preferred_node = node_remap(pol->v.preferred_node, - *old, *new); + *mpolmask, *newmask); + *mpolmask = *newmask; break; case MPOL_BIND: { nodemask_t nodes; @@ -1201,7 +1459,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, nodes_clear(nodes); for (z = pol->v.zonelist->zones; *z; z++) node_set((*z)->zone_pgdat->node_id, nodes); - nodes_remap(tmp, nodes, *old, *new); + nodes_remap(tmp, nodes, *mpolmask, *newmask); nodes = tmp; zonelist = bind_zonelist(&nodes); @@ -1216,6 +1474,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, kfree(pol->v.zonelist); pol->v.zonelist = zonelist; } + *mpolmask = *newmask; break; } default: @@ -1225,12 +1484,156 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, } /* - * Someone moved this task to different nodes. Fixup mempolicies. + * Wrapper for mpol_rebind_policy() that just requires task + * pointer, and updates task mempolicy. + */ + +void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) +{ + mpol_rebind_policy(tsk->mempolicy, new); +} + +/* + * Rebind each vma in mm to new nodemask. * - * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well, - * once we have a cpuset mechanism to mark which cpuset subtree is migrating. + * Call holding a reference to mm. Takes mm->mmap_sem during call. */ -void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new) + +void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { - rebind_policy(current->mempolicy, old, new); + struct vm_area_struct *vma; + + down_write(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + mpol_rebind_policy(vma->vm_policy, new); + up_write(&mm->mmap_sem); } + +/* + * Display pages allocated per node and memory policy via /proc. + */ + +static const char *policy_types[] = { "default", "prefer", "bind", + "interleave" }; + +/* + * Convert a mempolicy into a string. + * Returns the number of characters in buffer (if positive) + * or an error (negative) + */ +static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) +{ + char *p = buffer; + int l; + nodemask_t nodes; + int mode = pol ? pol->policy : MPOL_DEFAULT; + + switch (mode) { + case MPOL_DEFAULT: + nodes_clear(nodes); + break; + + case MPOL_PREFERRED: + nodes_clear(nodes); + node_set(pol->v.preferred_node, nodes); + break; + + case MPOL_BIND: + get_zonemask(pol, &nodes); + break; + + case MPOL_INTERLEAVE: + nodes = pol->v.nodes; + break; + + default: + BUG(); + return -EFAULT; + } + + l = strlen(policy_types[mode]); + if (buffer + maxlen < p + l + 1) + return -ENOSPC; + + strcpy(p, policy_types[mode]); + p += l; + + if (!nodes_empty(nodes)) { + if (buffer + maxlen < p + 2) + return -ENOSPC; + *p++ = '='; + p += nodelist_scnprintf(p, buffer + maxlen - p, nodes); + } + return p - buffer; +} + +struct numa_maps { + unsigned long pages; + unsigned long anon; + unsigned long mapped; + unsigned long mapcount_max; + unsigned long node[MAX_NUMNODES]; +}; + +static void gather_stats(struct page *page, void *private) +{ + struct numa_maps *md = private; + int count = page_mapcount(page); + + if (count) + md->mapped++; + + if (count > md->mapcount_max) + md->mapcount_max = count; + + md->pages++; + + if (PageAnon(page)) + md->anon++; + + md->node[page_to_nid(page)]++; + cond_resched(); +} + +int show_numa_map(struct seq_file *m, void *v) +{ + struct task_struct *task = m->private; + struct vm_area_struct *vma = v; + struct numa_maps *md; + int n; + char buffer[50]; + + if (!vma->vm_mm) + return 0; + + md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); + if (!md) + return 0; + + check_pgd_range(vma, vma->vm_start, vma->vm_end, + &node_online_map, MPOL_MF_STATS, md); + + if (md->pages) { + mpol_to_str(buffer, sizeof(buffer), + get_vma_policy(task, vma, vma->vm_start)); + + seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu", + vma->vm_start, buffer, md->pages, + md->mapped, md->mapcount_max); + + if (md->anon) + seq_printf(m," anon=%lu",md->anon); + + for_each_online_node(n) + if (md->node[n]) + seq_printf(m, " N%d=%lu", n, md->node[n]); + + seq_putc(m, '\n'); + } + kfree(md); + + if (m->count < m->size) + m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + return 0; +} + diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d348b9035955..4748b906aff2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,7 +298,8 @@ retry: /* * Give "p" a good chance of killing itself before we - * retry to allocate memory. + * retry to allocate memory unless "p" is current */ - schedule_timeout_interruptible(1); + if (!test_thread_flag(TIF_MEMDIE)) + schedule_timeout_interruptible(1); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd47494cb989..e0e84924171b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly; unsigned long totalram_pages __read_mostly; unsigned long totalhigh_pages __read_mostly; long nr_swap_pages; +int percpu_pagelist_fraction; static void fastcall free_hot_cold_page(struct page *page, int cold); @@ -307,7 +308,7 @@ static inline int page_is_buddy(struct page *page, int order) * -- wli */ -static inline void __free_pages_bulk (struct page *page, +static inline void __free_one_page(struct page *page, struct zone *zone, unsigned int order) { unsigned long page_idx; @@ -382,40 +383,42 @@ static inline int free_pages_check(struct page *page) * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ -static int -free_pages_bulk(struct zone *zone, int count, - struct list_head *list, unsigned int order) +static void free_pages_bulk(struct zone *zone, int count, + struct list_head *list, int order) { - struct page *page = NULL; - int ret = 0; - spin_lock(&zone->lock); zone->all_unreclaimable = 0; zone->pages_scanned = 0; - while (!list_empty(list) && count--) { + while (count--) { + struct page *page; + + BUG_ON(list_empty(list)); page = list_entry(list->prev, struct page, lru); - /* have to delete it as __free_pages_bulk list manipulates */ + /* have to delete it as __free_one_page list manipulates */ list_del(&page->lru); - __free_pages_bulk(page, zone, order); - ret++; + __free_one_page(page, zone, order); } spin_unlock(&zone->lock); - return ret; } -void __free_pages_ok(struct page *page, unsigned int order) +static void free_one_page(struct zone *zone, struct page *page, int order) { - unsigned long flags; LIST_HEAD(list); + list_add(&page->lru, &list); + free_pages_bulk(zone, 1, &list, order); +} + +static void __free_pages_ok(struct page *page, unsigned int order) +{ + unsigned long flags; int i; int reserved = 0; arch_free_page(page, order); #ifndef CONFIG_MMU - if (order > 0) - for (i = 1 ; i < (1 << order) ; ++i) - __put_page(page + i); + for (i = 1 ; i < (1 << order) ; ++i) + __put_page(page + i); #endif for (i = 0 ; i < (1 << order) ; ++i) @@ -423,11 +426,10 @@ void __free_pages_ok(struct page *page, unsigned int order) if (reserved) return; - list_add(&page->lru, &list); - kernel_map_pages(page, 1<<order, 0); + kernel_map_pages(page, 1 << order, 0); local_irq_save(flags); __mod_page_state(pgfree, 1 << order); - free_pages_bulk(page_zone(page), 1, &list, order); + free_one_page(page_zone(page), page, order); local_irq_restore(flags); } @@ -596,14 +598,13 @@ void drain_remote_pages(void) if (zone->zone_pgdat->node_id == numa_node_id()) continue; - pset = zone->pageset[smp_processor_id()]; + pset = zone_pcp(zone, smp_processor_id()); for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { struct per_cpu_pages *pcp; pcp = &pset->pcp[i]; - if (pcp->count) - pcp->count -= free_pages_bulk(zone, pcp->count, - &pcp->list, 0); + free_pages_bulk(zone, pcp->count, &pcp->list, 0); + pcp->count = 0; } } local_irq_restore(flags); @@ -626,8 +627,8 @@ static void __drain_pages(unsigned int cpu) pcp = &pset->pcp[i]; local_irq_save(flags); - pcp->count -= free_pages_bulk(zone, pcp->count, - &pcp->list, 0); + free_pages_bulk(zone, pcp->count, &pcp->list, 0); + pcp->count = 0; local_irq_restore(flags); } } @@ -718,8 +719,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) __inc_page_state(pgfree); list_add(&page->lru, &pcp->list); pcp->count++; - if (pcp->count >= pcp->high) - pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); + if (pcp->count >= pcp->high) { + free_pages_bulk(zone, pcp->batch, &pcp->list, 0); + pcp->count -= pcp->batch; + } local_irq_restore(flags); put_cpu(); } @@ -758,7 +761,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist, again: cpu = get_cpu(); - if (order == 0) { + if (likely(order == 0)) { struct per_cpu_pages *pcp; pcp = &zone_pcp(zone, cpu)->pcp[cold]; @@ -973,6 +976,7 @@ rebalance: cond_resched(); /* We now go into synchronous reclaim */ + cpuset_memory_pressure_bump(); p->flags |= PF_MEMALLOC; reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; @@ -1204,6 +1208,7 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) int cpu = 0; memset(ret, 0, sizeof(*ret)); + cpus_and(*cpumask, *cpumask, cpu_online_map); cpu = first_cpu(*cpumask); while (cpu < NR_CPUS) { @@ -1256,7 +1261,7 @@ unsigned long read_page_state_offset(unsigned long offset) unsigned long ret = 0; int cpu; - for_each_cpu(cpu) { + for_each_online_cpu(cpu) { unsigned long in; in = (unsigned long)&per_cpu(page_states, cpu) + offset; @@ -1830,6 +1835,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) INIT_LIST_HEAD(&pcp->list); } +/* + * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist + * to the value high for the pageset p. + */ + +static void setup_pagelist_highmark(struct per_cpu_pageset *p, + unsigned long high) +{ + struct per_cpu_pages *pcp; + + pcp = &p->pcp[0]; /* hot list */ + pcp->high = high; + pcp->batch = max(1UL, high/4); + if ((high/4) > (PAGE_SHIFT * 8)) + pcp->batch = PAGE_SHIFT * 8; +} + + #ifdef CONFIG_NUMA /* * Boot pageset table. One per cpu which is going to be used for all @@ -1861,12 +1884,16 @@ static int __devinit process_zones(int cpu) for_each_zone(zone) { - zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), + zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), GFP_KERNEL, cpu_to_node(cpu)); - if (!zone->pageset[cpu]) + if (!zone_pcp(zone, cpu)) goto bad; - setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); + setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); + + if (percpu_pagelist_fraction) + setup_pagelist_highmark(zone_pcp(zone, cpu), + (zone->present_pages / percpu_pagelist_fraction)); } return 0; @@ -1874,15 +1901,14 @@ bad: for_each_zone(dzone) { if (dzone == zone) break; - kfree(dzone->pageset[cpu]); - dzone->pageset[cpu] = NULL; + kfree(zone_pcp(dzone, cpu)); + zone_pcp(dzone, cpu) = NULL; } return -ENOMEM; } static inline void free_zone_pagesets(int cpu) { -#ifdef CONFIG_NUMA struct zone *zone; for_each_zone(zone) { @@ -1891,7 +1917,6 @@ static inline void free_zone_pagesets(int cpu) zone_pcp(zone, cpu) = NULL; kfree(pset); } -#endif } static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, @@ -1962,7 +1987,7 @@ static __devinit void zone_pcp_init(struct zone *zone) for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_NUMA /* Early boot. Slab allocator not functional yet */ - zone->pageset[cpu] = &boot_pageset[cpu]; + zone_pcp(zone, cpu) = &boot_pageset[cpu]; setup_pageset(&boot_pageset[cpu],0); #else setup_pageset(zone_pcp(zone,cpu), batch); @@ -2205,7 +2230,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) seq_printf(m, ")" "\n pagesets"); - for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) { + for_each_online_cpu(i) { struct per_cpu_pageset *pageset; int j; @@ -2568,6 +2593,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, return 0; } +/* + * percpu_pagelist_fraction - changes the pcp->high for each zone on each + * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist + * can have before it gets flushed back to buddy allocator. + */ + +int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + unsigned int cpu; + int ret; + + ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (!write || (ret == -EINVAL)) + return ret; + for_each_zone(zone) { + for_each_online_cpu(cpu) { + unsigned long high; + high = zone->present_pages / percpu_pagelist_fraction; + setup_pagelist_highmark(zone_pcp(zone, cpu), high); + } + } + return 0; +} + __initdata int hashdist = HASHDIST_DEFAULT; #ifdef CONFIG_NUMA diff --git a/mm/pdflush.c b/mm/pdflush.c index 52822c98c489..c4b6d0afd736 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -90,7 +90,7 @@ struct pdflush_work { static int __pdflush(struct pdflush_work *my_work) { - current->flags |= PF_FLUSHER; + current->flags |= PF_FLUSHER | PF_SWAPWRITE; my_work->fn = NULL; my_work->who = current; INIT_LIST_HEAD(&my_work->list); diff --git a/mm/rmap.c b/mm/rmap.c index 6f3f7db27128..66ec43053a4d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -514,6 +514,13 @@ void page_add_file_rmap(struct page *page) void page_remove_rmap(struct page *page) { if (atomic_add_negative(-1, &page->_mapcount)) { + if (page_mapcount(page) < 0) { + printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); + printk (KERN_EMERG " page->flags = %lx\n", page->flags); + printk (KERN_EMERG " page->count = %x\n", page_count(page)); + printk (KERN_EMERG " page->mapping = %p\n", page->mapping); + } + BUG_ON(page_mapcount(page) < 0); /* * It would be tidy to reset the PageAnon mapping here, diff --git a/mm/slab.c b/mm/slab.c index e5ec26e0c460..1c46c6383552 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -130,7 +130,6 @@ #define FORCED_DEBUG 0 #endif - /* Shouldn't this be in a header file somewhere? */ #define BYTES_PER_WORD sizeof(void *) @@ -217,12 +216,12 @@ static unsigned long offslab_limit; * Slabs are chained into three list: fully used, partial, fully free slabs. */ struct slab { - struct list_head list; - unsigned long colouroff; - void *s_mem; /* including colour offset */ - unsigned int inuse; /* num of objs active in slab */ - kmem_bufctl_t free; - unsigned short nodeid; + struct list_head list; + unsigned long colouroff; + void *s_mem; /* including colour offset */ + unsigned int inuse; /* num of objs active in slab */ + kmem_bufctl_t free; + unsigned short nodeid; }; /* @@ -242,9 +241,9 @@ struct slab { * We assume struct slab_rcu can overlay struct slab when destroying. */ struct slab_rcu { - struct rcu_head head; - kmem_cache_t *cachep; - void *addr; + struct rcu_head head; + kmem_cache_t *cachep; + void *addr; }; /* @@ -279,23 +278,23 @@ struct array_cache { #define BOOT_CPUCACHE_ENTRIES 1 struct arraycache_init { struct array_cache cache; - void * entries[BOOT_CPUCACHE_ENTRIES]; + void *entries[BOOT_CPUCACHE_ENTRIES]; }; /* * The slab lists for all objects. */ struct kmem_list3 { - struct list_head slabs_partial; /* partial list first, better asm code */ - struct list_head slabs_full; - struct list_head slabs_free; - unsigned long free_objects; - unsigned long next_reap; - int free_touched; - unsigned int free_limit; - spinlock_t list_lock; - struct array_cache *shared; /* shared per node */ - struct array_cache **alien; /* on other nodes */ + struct list_head slabs_partial; /* partial list first, better asm code */ + struct list_head slabs_full; + struct list_head slabs_free; + unsigned long free_objects; + unsigned long next_reap; + int free_touched; + unsigned int free_limit; + spinlock_t list_lock; + struct array_cache *shared; /* shared per node */ + struct array_cache **alien; /* on other nodes */ }; /* @@ -367,63 +366,63 @@ static inline void kmem_list3_init(struct kmem_list3 *parent) * * manages a cache. */ - + struct kmem_cache { /* 1) per-cpu data, touched during every alloc/free */ - struct array_cache *array[NR_CPUS]; - unsigned int batchcount; - unsigned int limit; - unsigned int shared; - unsigned int objsize; + struct array_cache *array[NR_CPUS]; + unsigned int batchcount; + unsigned int limit; + unsigned int shared; + unsigned int objsize; /* 2) touched by every alloc & free from the backend */ - struct kmem_list3 *nodelists[MAX_NUMNODES]; - unsigned int flags; /* constant flags */ - unsigned int num; /* # of objs per slab */ - spinlock_t spinlock; + struct kmem_list3 *nodelists[MAX_NUMNODES]; + unsigned int flags; /* constant flags */ + unsigned int num; /* # of objs per slab */ + spinlock_t spinlock; /* 3) cache_grow/shrink */ /* order of pgs per slab (2^n) */ - unsigned int gfporder; + unsigned int gfporder; /* force GFP flags, e.g. GFP_DMA */ - gfp_t gfpflags; + gfp_t gfpflags; - size_t colour; /* cache colouring range */ - unsigned int colour_off; /* colour offset */ - unsigned int colour_next; /* cache colouring */ - kmem_cache_t *slabp_cache; - unsigned int slab_size; - unsigned int dflags; /* dynamic flags */ + size_t colour; /* cache colouring range */ + unsigned int colour_off; /* colour offset */ + unsigned int colour_next; /* cache colouring */ + kmem_cache_t *slabp_cache; + unsigned int slab_size; + unsigned int dflags; /* dynamic flags */ /* constructor func */ - void (*ctor)(void *, kmem_cache_t *, unsigned long); + void (*ctor) (void *, kmem_cache_t *, unsigned long); /* de-constructor func */ - void (*dtor)(void *, kmem_cache_t *, unsigned long); + void (*dtor) (void *, kmem_cache_t *, unsigned long); /* 4) cache creation/removal */ - const char *name; - struct list_head next; + const char *name; + struct list_head next; /* 5) statistics */ #if STATS - unsigned long num_active; - unsigned long num_allocations; - unsigned long high_mark; - unsigned long grown; - unsigned long reaped; - unsigned long errors; - unsigned long max_freeable; - unsigned long node_allocs; - unsigned long node_frees; - atomic_t allochit; - atomic_t allocmiss; - atomic_t freehit; - atomic_t freemiss; + unsigned long num_active; + unsigned long num_allocations; + unsigned long high_mark; + unsigned long grown; + unsigned long reaped; + unsigned long errors; + unsigned long max_freeable; + unsigned long node_allocs; + unsigned long node_frees; + atomic_t allochit; + atomic_t allocmiss; + atomic_t freehit; + atomic_t freemiss; #endif #if DEBUG - int dbghead; - int reallen; + int dbghead; + int reallen; #endif }; @@ -523,14 +522,15 @@ static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); if (cachep->flags & SLAB_STORE_USER) - return (unsigned long*) (objp+cachep->objsize-2*BYTES_PER_WORD); - return (unsigned long*) (objp+cachep->objsize-BYTES_PER_WORD); + return (unsigned long *)(objp + cachep->objsize - + 2 * BYTES_PER_WORD); + return (unsigned long *)(objp + cachep->objsize - BYTES_PER_WORD); } static void **dbg_userword(kmem_cache_t *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_STORE_USER)); - return (void**)(objp+cachep->objsize-BYTES_PER_WORD); + return (void **)(objp + cachep->objsize - BYTES_PER_WORD); } #else @@ -607,31 +607,31 @@ struct cache_names { static struct cache_names __initdata cache_names[] = { #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, #include <linux/kmalloc_sizes.h> - { NULL, } + {NULL,} #undef CACHE }; static struct arraycache_init initarray_cache __initdata = - { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; + { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; static struct arraycache_init initarray_generic = - { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; + { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ static kmem_cache_t cache_cache = { - .batchcount = 1, - .limit = BOOT_CPUCACHE_ENTRIES, - .shared = 1, - .objsize = sizeof(kmem_cache_t), - .flags = SLAB_NO_REAP, - .spinlock = SPIN_LOCK_UNLOCKED, - .name = "kmem_cache", + .batchcount = 1, + .limit = BOOT_CPUCACHE_ENTRIES, + .shared = 1, + .objsize = sizeof(kmem_cache_t), + .flags = SLAB_NO_REAP, + .spinlock = SPIN_LOCK_UNLOCKED, + .name = "kmem_cache", #if DEBUG - .reallen = sizeof(kmem_cache_t), + .reallen = sizeof(kmem_cache_t), #endif }; /* Guard access to the cache-chain. */ -static struct semaphore cache_chain_sem; +static struct semaphore cache_chain_sem; static struct list_head cache_chain; /* @@ -655,9 +655,9 @@ static enum { static DEFINE_PER_CPU(struct work_struct, reap_work); -static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node); -static void enable_cpucache (kmem_cache_t *cachep); -static void cache_reap (void *unused); +static void free_block(kmem_cache_t *cachep, void **objpp, int len, int node); +static void enable_cpucache(kmem_cache_t *cachep); +static void cache_reap(void *unused); static int __node_shrink(kmem_cache_t *cachep, int node); static inline struct array_cache *ac_data(kmem_cache_t *cachep) @@ -671,9 +671,9 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags) #if DEBUG /* This happens if someone tries to call - * kmem_cache_create(), or __kmalloc(), before - * the generic caches are initialized. - */ + * kmem_cache_create(), or __kmalloc(), before + * the generic caches are initialized. + */ BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); #endif while (size > csizep->cs_size) @@ -697,10 +697,10 @@ EXPORT_SYMBOL(kmem_find_general_cachep); /* Cal the num objs, wastage, and bytes left over for a given slab size. */ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, - int flags, size_t *left_over, unsigned int *num) + int flags, size_t *left_over, unsigned int *num) { int i; - size_t wastage = PAGE_SIZE<<gfporder; + size_t wastage = PAGE_SIZE << gfporder; size_t extra = 0; size_t base = 0; @@ -709,7 +709,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, extra = sizeof(kmem_bufctl_t); } i = 0; - while (i*size + ALIGN(base+i*extra, align) <= wastage) + while (i * size + ALIGN(base + i * extra, align) <= wastage) i++; if (i > 0) i--; @@ -718,8 +718,8 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, i = SLAB_LIMIT; *num = i; - wastage -= i*size; - wastage -= ALIGN(base+i*extra, align); + wastage -= i * size; + wastage -= ALIGN(base + i * extra, align); *left_over = wastage; } @@ -728,7 +728,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg) { printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", - function, cachep->name, msg); + function, cachep->name, msg); dump_stack(); } @@ -755,9 +755,9 @@ static void __devinit start_cpu_timer(int cpu) } static struct array_cache *alloc_arraycache(int node, int entries, - int batchcount) + int batchcount) { - int memsize = sizeof(void*)*entries+sizeof(struct array_cache); + int memsize = sizeof(void *) * entries + sizeof(struct array_cache); struct array_cache *nc = NULL; nc = kmalloc_node(memsize, GFP_KERNEL, node); @@ -775,7 +775,7 @@ static struct array_cache *alloc_arraycache(int node, int entries, static inline struct array_cache **alloc_alien_cache(int node, int limit) { struct array_cache **ac_ptr; - int memsize = sizeof(void*)*MAX_NUMNODES; + int memsize = sizeof(void *) * MAX_NUMNODES; int i; if (limit > 1) @@ -789,7 +789,7 @@ static inline struct array_cache **alloc_alien_cache(int node, int limit) } ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); if (!ac_ptr[i]) { - for (i--; i <=0; i--) + for (i--; i <= 0; i--) kfree(ac_ptr[i]); kfree(ac_ptr); return NULL; @@ -807,12 +807,13 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) return; for_each_node(i) - kfree(ac_ptr[i]); + kfree(ac_ptr[i]); kfree(ac_ptr); } -static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache *ac, int node) +static inline void __drain_alien_cache(kmem_cache_t *cachep, + struct array_cache *ac, int node) { struct kmem_list3 *rl3 = cachep->nodelists[node]; @@ -826,7 +827,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3) { - int i=0; + int i = 0; struct array_cache *ac; unsigned long flags; @@ -846,14 +847,13 @@ static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3) #endif static int __devinit cpuup_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) + unsigned long action, void *hcpu) { long cpu = (long)hcpu; - kmem_cache_t* cachep; + kmem_cache_t *cachep; struct kmem_list3 *l3 = NULL; int node = cpu_to_node(cpu); int memsize = sizeof(struct kmem_list3); - struct array_cache *nc = NULL; switch (action) { case CPU_UP_PREPARE: @@ -871,27 +871,29 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, */ if (!cachep->nodelists[node]) { if (!(l3 = kmalloc_node(memsize, - GFP_KERNEL, node))) + GFP_KERNEL, node))) goto bad; kmem_list3_init(l3); l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + - ((unsigned long)cachep)%REAPTIMEOUT_LIST3; + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; cachep->nodelists[node] = l3; } spin_lock_irq(&cachep->nodelists[node]->list_lock); cachep->nodelists[node]->free_limit = - (1 + nr_cpus_node(node)) * - cachep->batchcount + cachep->num; + (1 + nr_cpus_node(node)) * + cachep->batchcount + cachep->num; spin_unlock_irq(&cachep->nodelists[node]->list_lock); } /* Now we can go ahead with allocating the shared array's - & array cache's */ + & array cache's */ list_for_each_entry(cachep, &cache_chain, next) { + struct array_cache *nc; + nc = alloc_arraycache(node, cachep->limit, - cachep->batchcount); + cachep->batchcount); if (!nc) goto bad; cachep->array[cpu] = nc; @@ -900,12 +902,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, BUG_ON(!l3); if (!l3->shared) { if (!(nc = alloc_arraycache(node, - cachep->shared*cachep->batchcount, - 0xbaadf00d))) - goto bad; + cachep->shared * + cachep->batchcount, + 0xbaadf00d))) + goto bad; /* we are serialised from CPU_DEAD or - CPU_UP_CANCELLED by the cpucontrol lock */ + CPU_UP_CANCELLED by the cpucontrol lock */ l3->shared = nc; } } @@ -942,13 +945,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, free_block(cachep, nc->entry, nc->avail, node); if (!cpus_empty(mask)) { - spin_unlock(&l3->list_lock); - goto unlock_cache; - } + spin_unlock(&l3->list_lock); + goto unlock_cache; + } if (l3->shared) { free_block(cachep, l3->shared->entry, - l3->shared->avail, node); + l3->shared->avail, node); kfree(l3->shared); l3->shared = NULL; } @@ -966,7 +969,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, } else { spin_unlock(&l3->list_lock); } -unlock_cache: + unlock_cache: spin_unlock_irq(&cachep->spinlock); kfree(nc); } @@ -975,7 +978,7 @@ unlock_cache: #endif } return NOTIFY_OK; -bad: + bad: up(&cache_chain_sem); return NOTIFY_BAD; } @@ -985,8 +988,7 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; /* * swap the static kmem_list3 with kmalloced memory */ -static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, - int nodeid) +static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, int nodeid) { struct kmem_list3 *ptr; @@ -1055,14 +1057,14 @@ void __init kmem_cache_init(void) cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size()); cache_estimate(0, cache_cache.objsize, cache_line_size(), 0, - &left_over, &cache_cache.num); + &left_over, &cache_cache.num); if (!cache_cache.num) BUG(); - cache_cache.colour = left_over/cache_cache.colour_off; + cache_cache.colour = left_over / cache_cache.colour_off; cache_cache.colour_next = 0; - cache_cache.slab_size = ALIGN(cache_cache.num*sizeof(kmem_bufctl_t) + - sizeof(struct slab), cache_line_size()); + cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + + sizeof(struct slab), cache_line_size()); /* 2+3) create the kmalloc caches */ sizes = malloc_sizes; @@ -1074,14 +1076,18 @@ void __init kmem_cache_init(void) */ sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, - sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); + sizes[INDEX_AC].cs_size, + ARCH_KMALLOC_MINALIGN, + (ARCH_KMALLOC_FLAGS | + SLAB_PANIC), NULL, NULL); if (INDEX_AC != INDEX_L3) sizes[INDEX_L3].cs_cachep = - kmem_cache_create(names[INDEX_L3].name, - sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); + kmem_cache_create(names[INDEX_L3].name, + sizes[INDEX_L3].cs_size, + ARCH_KMALLOC_MINALIGN, + (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, + NULL); while (sizes->cs_size != ULONG_MAX) { /* @@ -1091,35 +1097,41 @@ void __init kmem_cache_init(void) * Note for systems short on memory removing the alignment will * allow tighter packing of the smaller caches. */ - if(!sizes->cs_cachep) + if (!sizes->cs_cachep) sizes->cs_cachep = kmem_cache_create(names->name, - sizes->cs_size, ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL); + sizes->cs_size, + ARCH_KMALLOC_MINALIGN, + (ARCH_KMALLOC_FLAGS + | SLAB_PANIC), + NULL, NULL); /* Inc off-slab bufctl limit until the ceiling is hit. */ if (!(OFF_SLAB(sizes->cs_cachep))) { - offslab_limit = sizes->cs_size-sizeof(struct slab); + offslab_limit = sizes->cs_size - sizeof(struct slab); offslab_limit /= sizeof(kmem_bufctl_t); } sizes->cs_dmacachep = kmem_cache_create(names->name_dma, - sizes->cs_size, ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC), - NULL, NULL); + sizes->cs_size, + ARCH_KMALLOC_MINALIGN, + (ARCH_KMALLOC_FLAGS | + SLAB_CACHE_DMA | + SLAB_PANIC), NULL, + NULL); sizes++; names++; } /* 4) Replace the bootstrap head arrays */ { - void * ptr; + void *ptr; ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); local_irq_disable(); BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache); memcpy(ptr, ac_data(&cache_cache), - sizeof(struct arraycache_init)); + sizeof(struct arraycache_init)); cache_cache.array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1127,11 +1139,11 @@ void __init kmem_cache_init(void) local_irq_disable(); BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep) - != &initarray_generic.cache); + != &initarray_generic.cache); memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep), - sizeof(struct arraycache_init)); + sizeof(struct arraycache_init)); malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = - ptr; + ptr; local_irq_enable(); } /* 5) Replace the bootstrap kmem_list3's */ @@ -1139,16 +1151,16 @@ void __init kmem_cache_init(void) int node; /* Replace the static kmem_list3 structures for the boot cpu */ init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], - numa_node_id()); + numa_node_id()); for_each_online_node(node) { init_list(malloc_sizes[INDEX_AC].cs_cachep, - &initkmem_list3[SIZE_AC+node], node); + &initkmem_list3[SIZE_AC + node], node); if (INDEX_AC != INDEX_L3) { init_list(malloc_sizes[INDEX_L3].cs_cachep, - &initkmem_list3[SIZE_L3+node], - node); + &initkmem_list3[SIZE_L3 + node], + node); } } } @@ -1158,7 +1170,7 @@ void __init kmem_cache_init(void) kmem_cache_t *cachep; down(&cache_chain_sem); list_for_each_entry(cachep, &cache_chain, next) - enable_cpucache(cachep); + enable_cpucache(cachep); up(&cache_chain_sem); } @@ -1184,7 +1196,7 @@ static int __init cpucache_init(void) * pages to gfp. */ for_each_online_cpu(cpu) - start_cpu_timer(cpu); + start_cpu_timer(cpu); return 0; } @@ -1226,7 +1238,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid) */ static void kmem_freepages(kmem_cache_t *cachep, void *addr) { - unsigned long i = (1<<cachep->gfporder); + unsigned long i = (1 << cachep->gfporder); struct page *page = virt_to_page(addr); const unsigned long nr_freed = i; @@ -1239,13 +1251,13 @@ static void kmem_freepages(kmem_cache_t *cachep, void *addr) if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; free_pages((unsigned long)addr, cachep->gfporder); - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages); + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) + atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); } static void kmem_rcu_free(struct rcu_head *head) { - struct slab_rcu *slab_rcu = (struct slab_rcu *) head; + struct slab_rcu *slab_rcu = (struct slab_rcu *)head; kmem_cache_t *cachep = slab_rcu->cachep; kmem_freepages(cachep, slab_rcu->addr); @@ -1257,19 +1269,19 @@ static void kmem_rcu_free(struct rcu_head *head) #ifdef CONFIG_DEBUG_PAGEALLOC static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, - unsigned long caller) + unsigned long caller) { int size = obj_reallen(cachep); - addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; + addr = (unsigned long *)&((char *)addr)[obj_dbghead(cachep)]; - if (size < 5*sizeof(unsigned long)) + if (size < 5 * sizeof(unsigned long)) return; - *addr++=0x12345678; - *addr++=caller; - *addr++=smp_processor_id(); - size -= 3*sizeof(unsigned long); + *addr++ = 0x12345678; + *addr++ = caller; + *addr++ = smp_processor_id(); + size -= 3 * sizeof(unsigned long); { unsigned long *sptr = &caller; unsigned long svalue; @@ -1277,7 +1289,7 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, while (!kstack_end(sptr)) { svalue = *sptr++; if (kernel_text_address(svalue)) { - *addr++=svalue; + *addr++ = svalue; size -= sizeof(unsigned long); if (size <= sizeof(unsigned long)) break; @@ -1285,25 +1297,25 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, } } - *addr++=0x87654321; + *addr++ = 0x87654321; } #endif static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) { int size = obj_reallen(cachep); - addr = &((char*)addr)[obj_dbghead(cachep)]; + addr = &((char *)addr)[obj_dbghead(cachep)]; memset(addr, val, size); - *(unsigned char *)(addr+size-1) = POISON_END; + *(unsigned char *)(addr + size - 1) = POISON_END; } static void dump_line(char *data, int offset, int limit) { int i; printk(KERN_ERR "%03x:", offset); - for (i=0;i<limit;i++) { - printk(" %02x", (unsigned char)data[offset+i]); + for (i = 0; i < limit; i++) { + printk(" %02x", (unsigned char)data[offset + i]); } printk("\n"); } @@ -1318,24 +1330,24 @@ static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines) if (cachep->flags & SLAB_RED_ZONE) { printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", - *dbg_redzone1(cachep, objp), - *dbg_redzone2(cachep, objp)); + *dbg_redzone1(cachep, objp), + *dbg_redzone2(cachep, objp)); } if (cachep->flags & SLAB_STORE_USER) { printk(KERN_ERR "Last user: [<%p>]", - *dbg_userword(cachep, objp)); + *dbg_userword(cachep, objp)); print_symbol("(%s)", - (unsigned long)*dbg_userword(cachep, objp)); + (unsigned long)*dbg_userword(cachep, objp)); printk("\n"); } - realobj = (char*)objp+obj_dbghead(cachep); + realobj = (char *)objp + obj_dbghead(cachep); size = obj_reallen(cachep); - for (i=0; i<size && lines;i+=16, lines--) { + for (i = 0; i < size && lines; i += 16, lines--) { int limit; limit = 16; - if (i+limit > size) - limit = size-i; + if (i + limit > size) + limit = size - i; dump_line(realobj, i, limit); } } @@ -1346,27 +1358,28 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) int size, i; int lines = 0; - realobj = (char*)objp+obj_dbghead(cachep); + realobj = (char *)objp + obj_dbghead(cachep); size = obj_reallen(cachep); - for (i=0;i<size;i++) { + for (i = 0; i < size; i++) { char exp = POISON_FREE; - if (i == size-1) + if (i == size - 1) exp = POISON_END; if (realobj[i] != exp) { int limit; /* Mismatch ! */ /* Print header */ if (lines == 0) { - printk(KERN_ERR "Slab corruption: start=%p, len=%d\n", - realobj, size); + printk(KERN_ERR + "Slab corruption: start=%p, len=%d\n", + realobj, size); print_objinfo(cachep, objp, 0); } /* Hexdump the affected line */ - i = (i/16)*16; + i = (i / 16) * 16; limit = 16; - if (i+limit > size) - limit = size-i; + if (i + limit > size) + limit = size - i; dump_line(realobj, i, limit); i += 16; lines++; @@ -1382,19 +1395,19 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) struct slab *slabp = page_get_slab(virt_to_page(objp)); int objnr; - objnr = (objp-slabp->s_mem)/cachep->objsize; + objnr = (objp - slabp->s_mem) / cachep->objsize; if (objnr) { - objp = slabp->s_mem+(objnr-1)*cachep->objsize; - realobj = (char*)objp+obj_dbghead(cachep); + objp = slabp->s_mem + (objnr - 1) * cachep->objsize; + realobj = (char *)objp + obj_dbghead(cachep); printk(KERN_ERR "Prev obj: start=%p, len=%d\n", - realobj, size); + realobj, size); print_objinfo(cachep, objp, 2); } - if (objnr+1 < cachep->num) { - objp = slabp->s_mem+(objnr+1)*cachep->objsize; - realobj = (char*)objp+obj_dbghead(cachep); + if (objnr + 1 < cachep->num) { + objp = slabp->s_mem + (objnr + 1) * cachep->objsize; + realobj = (char *)objp + obj_dbghead(cachep); printk(KERN_ERR "Next obj: start=%p, len=%d\n", - realobj, size); + realobj, size); print_objinfo(cachep, objp, 2); } } @@ -1405,7 +1418,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp) * Before calling the slab must have been unlinked from the cache. * The cache-lock is not held/needed. */ -static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) +static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp) { void *addr = slabp->s_mem - slabp->colouroff; @@ -1416,8 +1429,11 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) - kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); + if ((cachep->objsize % PAGE_SIZE) == 0 + && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), + cachep->objsize / PAGE_SIZE, + 1); else check_poison_obj(cachep, objp); #else @@ -1427,20 +1443,20 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) slab_error(cachep, "start of a freed object " - "was overwritten"); + "was overwritten"); if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) slab_error(cachep, "end of a freed object " - "was overwritten"); + "was overwritten"); } if (cachep->dtor && !(cachep->flags & SLAB_POISON)) - (cachep->dtor)(objp+obj_dbghead(cachep), cachep, 0); + (cachep->dtor) (objp + obj_dbghead(cachep), cachep, 0); } #else if (cachep->dtor) { int i; for (i = 0; i < cachep->num; i++) { - void* objp = slabp->s_mem+cachep->objsize*i; - (cachep->dtor)(objp, cachep, 0); + void *objp = slabp->s_mem + cachep->objsize * i; + (cachep->dtor) (objp, cachep, 0); } } #endif @@ -1448,7 +1464,7 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp) if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { struct slab_rcu *slab_rcu; - slab_rcu = (struct slab_rcu *) slabp; + slab_rcu = (struct slab_rcu *)slabp; slab_rcu->cachep = cachep; slab_rcu->addr = addr; call_rcu(&slab_rcu->head, kmem_rcu_free); @@ -1466,11 +1482,58 @@ static inline void set_up_list3s(kmem_cache_t *cachep, int index) int node; for_each_online_node(node) { - cachep->nodelists[node] = &initkmem_list3[index+node]; + cachep->nodelists[node] = &initkmem_list3[index + node]; cachep->nodelists[node]->next_reap = jiffies + - REAPTIMEOUT_LIST3 + - ((unsigned long)cachep)%REAPTIMEOUT_LIST3; + REAPTIMEOUT_LIST3 + + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; + } +} + +/** + * calculate_slab_order - calculate size (page order) of slabs and the number + * of objects per slab. + * + * This could be made much more intelligent. For now, try to avoid using + * high order pages for slabs. When the gfp() functions are more friendly + * towards high-order requests, this should be changed. + */ +static inline size_t calculate_slab_order(kmem_cache_t *cachep, size_t size, + size_t align, gfp_t flags) +{ + size_t left_over = 0; + + for (;; cachep->gfporder++) { + unsigned int num; + size_t remainder; + + if (cachep->gfporder > MAX_GFP_ORDER) { + cachep->num = 0; + break; + } + + cache_estimate(cachep->gfporder, size, align, flags, + &remainder, &num); + if (!num) + continue; + /* More than offslab_limit objects will cause problems */ + if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) + break; + + cachep->num = num; + left_over = remainder; + + /* + * Large number of objects is good, but very large slabs are + * currently bad for the gfp()s. + */ + if (cachep->gfporder >= slab_break_gfp_order) + break; + + if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder)) + /* Acceptable internal fragmentation */ + break; } + return left_over; } /** @@ -1519,14 +1582,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, * Sanity checks... these are all serious usage bugs. */ if ((!name) || - in_interrupt() || - (size < BYTES_PER_WORD) || - (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) || - (dtor && !ctor)) { - printk(KERN_ERR "%s: Early error in slab %s\n", - __FUNCTION__, name); - BUG(); - } + in_interrupt() || + (size < BYTES_PER_WORD) || + (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { + printk(KERN_ERR "%s: Early error in slab %s\n", + __FUNCTION__, name); + BUG(); + } down(&cache_chain_sem); @@ -1546,11 +1608,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, set_fs(old_fs); if (res) { printk("SLAB: cache with size %d has lost its name\n", - pc->objsize); + pc->objsize); continue; } - if (!strcmp(pc->name,name)) { + if (!strcmp(pc->name, name)) { printk("kmem_cache_create: duplicate cache %s\n", name); dump_stack(); goto oops; @@ -1562,10 +1624,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { /* No constructor, but inital state check requested */ printk(KERN_ERR "%s: No con, but init state check " - "requested - %s\n", __FUNCTION__, name); + "requested - %s\n", __FUNCTION__, name); flags &= ~SLAB_DEBUG_INITIAL; } - #if FORCED_DEBUG /* * Enable redzoning and last user accounting, except for caches with @@ -1573,8 +1634,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, * above the next power of two: caches with object sizes just above a * power of two have a significant amount of internal fragmentation. */ - if ((size < 4096 || fls(size-1) == fls(size-1+3*BYTES_PER_WORD))) - flags |= SLAB_RED_ZONE|SLAB_STORE_USER; + if ((size < 4096 + || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD))) + flags |= SLAB_RED_ZONE | SLAB_STORE_USER; if (!(flags & SLAB_DESTROY_BY_RCU)) flags |= SLAB_POISON; #endif @@ -1595,9 +1657,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, * unaligned accesses for some archs when redzoning is used, and makes * sure any on-slab bufctl's are also correctly aligned. */ - if (size & (BYTES_PER_WORD-1)) { - size += (BYTES_PER_WORD-1); - size &= ~(BYTES_PER_WORD-1); + if (size & (BYTES_PER_WORD - 1)) { + size += (BYTES_PER_WORD - 1); + size &= ~(BYTES_PER_WORD - 1); } /* calculate out the final buffer alignment: */ @@ -1608,7 +1670,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, * objects into one cacheline. */ ralign = cache_line_size(); - while (size <= ralign/2) + while (size <= ralign / 2) ralign /= 2; } else { ralign = BYTES_PER_WORD; @@ -1617,13 +1679,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (ralign < ARCH_SLAB_MINALIGN) { ralign = ARCH_SLAB_MINALIGN; if (ralign > BYTES_PER_WORD) - flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER); + flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); } /* 3) caller mandated alignment: disables debug if necessary */ if (ralign < align) { ralign = align; if (ralign > BYTES_PER_WORD) - flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER); + flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); } /* 4) Store it. Note that the debug code below can reduce * the alignment to BYTES_PER_WORD. @@ -1645,7 +1707,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* add space for red zone words */ cachep->dbghead += BYTES_PER_WORD; - size += 2*BYTES_PER_WORD; + size += 2 * BYTES_PER_WORD; } if (flags & SLAB_STORE_USER) { /* user store requires word alignment and @@ -1656,7 +1718,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= malloc_sizes[INDEX_L3+1].cs_size && cachep->reallen > cache_line_size() && size < PAGE_SIZE) { + if (size >= malloc_sizes[INDEX_L3 + 1].cs_size + && cachep->reallen > cache_line_size() && size < PAGE_SIZE) { cachep->dbghead += PAGE_SIZE - size; size = PAGE_SIZE; } @@ -1664,7 +1727,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, #endif /* Determine if the slab management is 'on' or 'off' slab. */ - if (size >= (PAGE_SIZE>>3)) + if (size >= (PAGE_SIZE >> 3)) /* * Size is large, assume best to place the slab management obj * off-slab (should allow better packing of objs). @@ -1681,47 +1744,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ cachep->gfporder = 0; cache_estimate(cachep->gfporder, size, align, flags, - &left_over, &cachep->num); - } else { - /* - * Calculate size (in pages) of slabs, and the num of objs per - * slab. This could be made much more intelligent. For now, - * try to avoid using high page-orders for slabs. When the - * gfp() funcs are more friendly towards high-order requests, - * this should be changed. - */ - do { - unsigned int break_flag = 0; -cal_wastage: - cache_estimate(cachep->gfporder, size, align, flags, - &left_over, &cachep->num); - if (break_flag) - break; - if (cachep->gfporder >= MAX_GFP_ORDER) - break; - if (!cachep->num) - goto next; - if (flags & CFLGS_OFF_SLAB && - cachep->num > offslab_limit) { - /* This num of objs will cause problems. */ - cachep->gfporder--; - break_flag++; - goto cal_wastage; - } - - /* - * Large num of objs is good, but v. large slabs are - * currently bad for the gfp()s. - */ - if (cachep->gfporder >= slab_break_gfp_order) - break; - - if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder)) - break; /* Acceptable internal fragmentation. */ -next: - cachep->gfporder++; - } while (1); - } + &left_over, &cachep->num); + } else + left_over = calculate_slab_order(cachep, size, align, flags); if (!cachep->num) { printk("kmem_cache_create: couldn't create cache %s.\n", name); @@ -1729,8 +1754,8 @@ next: cachep = NULL; goto oops; } - slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t) - + sizeof(struct slab), align); + slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) + + sizeof(struct slab), align); /* * If the slab has been placed off-slab, and we have enough space then @@ -1743,14 +1768,15 @@ next: if (flags & CFLGS_OFF_SLAB) { /* really off slab. No need for manual alignment */ - slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab); + slab_size = + cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); } cachep->colour_off = cache_line_size(); /* Offset must be a multiple of the alignment. */ if (cachep->colour_off < align) cachep->colour_off = align; - cachep->colour = left_over/cachep->colour_off; + cachep->colour = left_over / cachep->colour_off; cachep->slab_size = slab_size; cachep->flags = flags; cachep->gfpflags = 0; @@ -1777,7 +1803,7 @@ next: * the creation of further caches will BUG(). */ cachep->array[smp_processor_id()] = - &initarray_generic.cache; + &initarray_generic.cache; /* If the cache that's used by * kmalloc(sizeof(kmem_list3)) is the first cache, @@ -1791,8 +1817,7 @@ next: g_cpucache_up = PARTIAL_AC; } else { cachep->array[smp_processor_id()] = - kmalloc(sizeof(struct arraycache_init), - GFP_KERNEL); + kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); if (g_cpucache_up == PARTIAL_AC) { set_up_list3s(cachep, SIZE_L3); @@ -1802,16 +1827,18 @@ next: for_each_online_node(node) { cachep->nodelists[node] = - kmalloc_node(sizeof(struct kmem_list3), - GFP_KERNEL, node); + kmalloc_node(sizeof + (struct kmem_list3), + GFP_KERNEL, node); BUG_ON(!cachep->nodelists[node]); - kmem_list3_init(cachep->nodelists[node]); + kmem_list3_init(cachep-> + nodelists[node]); } } } cachep->nodelists[numa_node_id()]->next_reap = - jiffies + REAPTIMEOUT_LIST3 + - ((unsigned long)cachep)%REAPTIMEOUT_LIST3; + jiffies + REAPTIMEOUT_LIST3 + + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; BUG_ON(!ac_data(cachep)); ac_data(cachep)->avail = 0; @@ -1820,15 +1847,15 @@ next: ac_data(cachep)->touched = 0; cachep->batchcount = 1; cachep->limit = BOOT_CPUCACHE_ENTRIES; - } + } /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); unlock_cpu_hotplug(); -oops: + oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", - name); + name); up(&cache_chain_sem); return cachep; } @@ -1871,7 +1898,7 @@ static inline void check_spinlock_acquired_node(kmem_cache_t *cachep, int node) /* * Waits for all CPUs to execute func(). */ -static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) +static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg) { check_irq_on(); preempt_disable(); @@ -1886,12 +1913,12 @@ static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) preempt_enable(); } -static void drain_array_locked(kmem_cache_t* cachep, - struct array_cache *ac, int force, int node); +static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac, + int force, int node); static void do_drain(void *arg) { - kmem_cache_t *cachep = (kmem_cache_t*)arg; + kmem_cache_t *cachep = (kmem_cache_t *) arg; struct array_cache *ac; int node = numa_node_id(); @@ -1911,7 +1938,7 @@ static void drain_cpu_caches(kmem_cache_t *cachep) smp_call_function_all_cpus(do_drain, cachep); check_irq_on(); spin_lock_irq(&cachep->spinlock); - for_each_online_node(node) { + for_each_online_node(node) { l3 = cachep->nodelists[node]; if (l3) { spin_lock(&l3->list_lock); @@ -1949,8 +1976,7 @@ static int __node_shrink(kmem_cache_t *cachep, int node) slab_destroy(cachep, slabp); spin_lock_irq(&l3->list_lock); } - ret = !list_empty(&l3->slabs_full) || - !list_empty(&l3->slabs_partial); + ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial); return ret; } @@ -2006,7 +2032,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); * The caller must guarantee that noone will allocate memory from the cache * during the kmem_cache_destroy(). */ -int kmem_cache_destroy(kmem_cache_t * cachep) +int kmem_cache_destroy(kmem_cache_t *cachep) { int i; struct kmem_list3 *l3; @@ -2028,7 +2054,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep) if (__cache_shrink(cachep)) { slab_error(cachep, "Can't free all objects"); down(&cache_chain_sem); - list_add(&cachep->next,&cache_chain); + list_add(&cachep->next, &cache_chain); up(&cache_chain_sem); unlock_cpu_hotplug(); return 1; @@ -2038,7 +2064,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep) synchronize_rcu(); for_each_online_cpu(i) - kfree(cachep->array[i]); + kfree(cachep->array[i]); /* NUMA: free the list3 structures */ for_each_online_node(i) { @@ -2057,39 +2083,39 @@ int kmem_cache_destroy(kmem_cache_t * cachep) EXPORT_SYMBOL(kmem_cache_destroy); /* Get the memory for a slab management obj. */ -static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp, - int colour_off, gfp_t local_flags) +static struct slab *alloc_slabmgmt(kmem_cache_t *cachep, void *objp, + int colour_off, gfp_t local_flags) { struct slab *slabp; - + if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); if (!slabp) return NULL; } else { - slabp = objp+colour_off; + slabp = objp + colour_off; colour_off += cachep->slab_size; } slabp->inuse = 0; slabp->colouroff = colour_off; - slabp->s_mem = objp+colour_off; + slabp->s_mem = objp + colour_off; return slabp; } static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) { - return (kmem_bufctl_t *)(slabp+1); + return (kmem_bufctl_t *) (slabp + 1); } static void cache_init_objs(kmem_cache_t *cachep, - struct slab *slabp, unsigned long ctor_flags) + struct slab *slabp, unsigned long ctor_flags) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = slabp->s_mem+cachep->objsize*i; + void *objp = slabp->s_mem + cachep->objsize * i; #if DEBUG /* need to poison the objs? */ if (cachep->flags & SLAB_POISON) @@ -2107,25 +2133,28 @@ static void cache_init_objs(kmem_cache_t *cachep, * Otherwise, deadlock. They must also be threaded. */ if (cachep->ctor && !(cachep->flags & SLAB_POISON)) - cachep->ctor(objp+obj_dbghead(cachep), cachep, ctor_flags); + cachep->ctor(objp + obj_dbghead(cachep), cachep, + ctor_flags); if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) slab_error(cachep, "constructor overwrote the" - " end of an object"); + " end of an object"); if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) slab_error(cachep, "constructor overwrote the" - " start of an object"); + " start of an object"); } - if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) - kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) + && cachep->flags & SLAB_POISON) + kernel_map_pages(virt_to_page(objp), + cachep->objsize / PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp, cachep, ctor_flags); #endif - slab_bufctl(slabp)[i] = i+1; + slab_bufctl(slabp)[i] = i + 1; } - slab_bufctl(slabp)[i-1] = BUFCTL_END; + slab_bufctl(slabp)[i - 1] = BUFCTL_END; slabp->free = 0; } @@ -2161,17 +2190,17 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) */ static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) { - struct slab *slabp; - void *objp; - size_t offset; - gfp_t local_flags; - unsigned long ctor_flags; + struct slab *slabp; + void *objp; + size_t offset; + gfp_t local_flags; + unsigned long ctor_flags; struct kmem_list3 *l3; /* Be lazy and only check for valid flags here, - * keeping it out of the critical path in kmem_cache_alloc(). + * keeping it out of the critical path in kmem_cache_alloc(). */ - if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) + if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) BUG(); if (flags & SLAB_NO_GROW) return 0; @@ -2237,9 +2266,9 @@ static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) l3->free_objects += cachep->num; spin_unlock(&l3->list_lock); return 1; -opps1: + opps1: kmem_freepages(cachep, objp); -failed: + failed: if (local_flags & __GFP_WAIT) local_irq_disable(); return 0; @@ -2259,18 +2288,19 @@ static void kfree_debugcheck(const void *objp) if (!virt_addr_valid(objp)) { printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", - (unsigned long)objp); - BUG(); + (unsigned long)objp); + BUG(); } page = virt_to_page(objp); if (!PageSlab(page)) { - printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", (unsigned long)objp); + printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", + (unsigned long)objp); BUG(); } } static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, - void *caller) + void *caller) { struct page *page; unsigned int objnr; @@ -2281,20 +2311,26 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, page = virt_to_page(objp); if (page_get_cache(page) != cachep) { - printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n", - page_get_cache(page),cachep); + printk(KERN_ERR + "mismatch in kmem_cache_free: expected cache %p, got %p\n", + page_get_cache(page), cachep); printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); - printk(KERN_ERR "%p is %s.\n", page_get_cache(page), page_get_cache(page)->name); + printk(KERN_ERR "%p is %s.\n", page_get_cache(page), + page_get_cache(page)->name); WARN_ON(1); } slabp = page_get_slab(page); if (cachep->flags & SLAB_RED_ZONE) { - if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { - slab_error(cachep, "double free, or memory outside" - " object was overwritten"); - printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", - objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp)); + if (*dbg_redzone1(cachep, objp) != RED_ACTIVE + || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { + slab_error(cachep, + "double free, or memory outside" + " object was overwritten"); + printk(KERN_ERR + "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", + objp, *dbg_redzone1(cachep, objp), + *dbg_redzone2(cachep, objp)); } *dbg_redzone1(cachep, objp) = RED_INACTIVE; *dbg_redzone2(cachep, objp) = RED_INACTIVE; @@ -2302,30 +2338,31 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp, if (cachep->flags & SLAB_STORE_USER) *dbg_userword(cachep, objp) = caller; - objnr = (objp-slabp->s_mem)/cachep->objsize; + objnr = (objp - slabp->s_mem) / cachep->objsize; BUG_ON(objnr >= cachep->num); - BUG_ON(objp != slabp->s_mem + objnr*cachep->objsize); + BUG_ON(objp != slabp->s_mem + objnr * cachep->objsize); if (cachep->flags & SLAB_DEBUG_INITIAL) { /* Need to call the slab's constructor so the * caller can perform a verify of its state (debugging). * Called without the cache-lock held. */ - cachep->ctor(objp+obj_dbghead(cachep), - cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); + cachep->ctor(objp + obj_dbghead(cachep), + cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); } if (cachep->flags & SLAB_POISON && cachep->dtor) { /* we want to cache poison the object, * call the destruction callback */ - cachep->dtor(objp+obj_dbghead(cachep), cachep, 0); + cachep->dtor(objp + obj_dbghead(cachep), cachep, 0); } if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { store_stackinfo(cachep, objp, (unsigned long)caller); - kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); + kernel_map_pages(virt_to_page(objp), + cachep->objsize / PAGE_SIZE, 0); } else { poison_obj(cachep, objp, POISON_FREE); } @@ -2340,7 +2377,7 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp) { kmem_bufctl_t i; int entries = 0; - + /* Check slab's freelist to see if this obj is there. */ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { entries++; @@ -2348,13 +2385,16 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp) goto bad; } if (entries != cachep->num - slabp->inuse) { -bad: - printk(KERN_ERR "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", - cachep->name, cachep->num, slabp, slabp->inuse); - for (i=0;i<sizeof(slabp)+cachep->num*sizeof(kmem_bufctl_t);i++) { - if ((i%16)==0) + bad: + printk(KERN_ERR + "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", + cachep->name, cachep->num, slabp, slabp->inuse); + for (i = 0; + i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t); + i++) { + if ((i % 16) == 0) printk("\n%03x:", i); - printk(" %02x", ((unsigned char*)slabp)[i]); + printk(" %02x", ((unsigned char *)slabp)[i]); } printk("\n"); BUG(); @@ -2374,7 +2414,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) check_irq_off(); ac = ac_data(cachep); -retry: + retry: batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { /* if there was little recent activity on this @@ -2396,8 +2436,8 @@ retry: shared_array->avail -= batchcount; ac->avail = batchcount; memcpy(ac->entry, - &(shared_array->entry[shared_array->avail]), - sizeof(void*)*batchcount); + &(shared_array->entry[shared_array->avail]), + sizeof(void *) * batchcount); shared_array->touched = 1; goto alloc_done; } @@ -2425,7 +2465,7 @@ retry: /* get obj pointer */ ac->entry[ac->avail++] = slabp->s_mem + - slabp->free*cachep->objsize; + slabp->free * cachep->objsize; slabp->inuse++; next = slab_bufctl(slabp)[slabp->free]; @@ -2433,7 +2473,7 @@ retry: slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; WARN_ON(numa_node_id() != slabp->nodeid); #endif - slabp->free = next; + slabp->free = next; } check_slabp(cachep, slabp); @@ -2445,9 +2485,9 @@ retry: list_add(&slabp->list, &l3->slabs_partial); } -must_grow: + must_grow: l3->free_objects -= ac->avail; -alloc_done: + alloc_done: spin_unlock(&l3->list_lock); if (unlikely(!ac->avail)) { @@ -2459,7 +2499,7 @@ alloc_done: if (!x && ac->avail == 0) // no objects in sight? abort return NULL; - if (!ac->avail) // objects refilled by interrupt? + if (!ac->avail) // objects refilled by interrupt? goto retry; } ac->touched = 1; @@ -2476,16 +2516,16 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags) } #if DEBUG -static void * -cache_alloc_debugcheck_after(kmem_cache_t *cachep, - gfp_t flags, void *objp, void *caller) +static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags, + void *objp, void *caller) { - if (!objp) + if (!objp) return objp; - if (cachep->flags & SLAB_POISON) { + if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) - kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); + kernel_map_pages(virt_to_page(objp), + cachep->objsize / PAGE_SIZE, 1); else check_poison_obj(cachep, objp); #else @@ -2497,24 +2537,28 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, *dbg_userword(cachep, objp) = caller; if (cachep->flags & SLAB_RED_ZONE) { - if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { - slab_error(cachep, "double free, or memory outside" - " object was overwritten"); - printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", - objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp)); + if (*dbg_redzone1(cachep, objp) != RED_INACTIVE + || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { + slab_error(cachep, + "double free, or memory outside" + " object was overwritten"); + printk(KERN_ERR + "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", + objp, *dbg_redzone1(cachep, objp), + *dbg_redzone2(cachep, objp)); } *dbg_redzone1(cachep, objp) = RED_ACTIVE; *dbg_redzone2(cachep, objp) = RED_ACTIVE; } objp += obj_dbghead(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) { - unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; + unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; if (!(flags & __GFP_WAIT)) ctor_flags |= SLAB_CTOR_ATOMIC; cachep->ctor(objp, cachep, ctor_flags); - } + } return objp; } #else @@ -2523,7 +2567,7 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) { - void* objp; + void *objp; struct array_cache *ac; check_irq_off(); @@ -2542,7 +2586,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) { unsigned long save_flags; - void* objp; + void *objp; cache_alloc_debugcheck_before(cachep, flags); @@ -2550,7 +2594,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) objp = ____cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, - __builtin_return_address(0)); + __builtin_return_address(0)); prefetchw(objp); return objp; } @@ -2562,74 +2606,75 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) { struct list_head *entry; - struct slab *slabp; - struct kmem_list3 *l3; - void *obj; - kmem_bufctl_t next; - int x; - - l3 = cachep->nodelists[nodeid]; - BUG_ON(!l3); - -retry: - spin_lock(&l3->list_lock); - entry = l3->slabs_partial.next; - if (entry == &l3->slabs_partial) { - l3->free_touched = 1; - entry = l3->slabs_free.next; - if (entry == &l3->slabs_free) - goto must_grow; - } - - slabp = list_entry(entry, struct slab, list); - check_spinlock_acquired_node(cachep, nodeid); - check_slabp(cachep, slabp); - - STATS_INC_NODEALLOCS(cachep); - STATS_INC_ACTIVE(cachep); - STATS_SET_HIGH(cachep); - - BUG_ON(slabp->inuse == cachep->num); - - /* get obj pointer */ - obj = slabp->s_mem + slabp->free*cachep->objsize; - slabp->inuse++; - next = slab_bufctl(slabp)[slabp->free]; + struct slab *slabp; + struct kmem_list3 *l3; + void *obj; + kmem_bufctl_t next; + int x; + + l3 = cachep->nodelists[nodeid]; + BUG_ON(!l3); + + retry: + spin_lock(&l3->list_lock); + entry = l3->slabs_partial.next; + if (entry == &l3->slabs_partial) { + l3->free_touched = 1; + entry = l3->slabs_free.next; + if (entry == &l3->slabs_free) + goto must_grow; + } + + slabp = list_entry(entry, struct slab, list); + check_spinlock_acquired_node(cachep, nodeid); + check_slabp(cachep, slabp); + + STATS_INC_NODEALLOCS(cachep); + STATS_INC_ACTIVE(cachep); + STATS_SET_HIGH(cachep); + + BUG_ON(slabp->inuse == cachep->num); + + /* get obj pointer */ + obj = slabp->s_mem + slabp->free * cachep->objsize; + slabp->inuse++; + next = slab_bufctl(slabp)[slabp->free]; #if DEBUG - slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; + slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; #endif - slabp->free = next; - check_slabp(cachep, slabp); - l3->free_objects--; - /* move slabp to correct slabp list: */ - list_del(&slabp->list); - - if (slabp->free == BUFCTL_END) { - list_add(&slabp->list, &l3->slabs_full); - } else { - list_add(&slabp->list, &l3->slabs_partial); - } + slabp->free = next; + check_slabp(cachep, slabp); + l3->free_objects--; + /* move slabp to correct slabp list: */ + list_del(&slabp->list); + + if (slabp->free == BUFCTL_END) { + list_add(&slabp->list, &l3->slabs_full); + } else { + list_add(&slabp->list, &l3->slabs_partial); + } - spin_unlock(&l3->list_lock); - goto done; + spin_unlock(&l3->list_lock); + goto done; -must_grow: - spin_unlock(&l3->list_lock); - x = cache_grow(cachep, flags, nodeid); + must_grow: + spin_unlock(&l3->list_lock); + x = cache_grow(cachep, flags, nodeid); - if (!x) - return NULL; + if (!x) + return NULL; - goto retry; -done: - return obj; + goto retry; + done: + return obj; } #endif /* * Caller needs to acquire correct kmem_list's list_lock */ -static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node) +static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, + int node) { int i; struct kmem_list3 *l3; @@ -2652,7 +2697,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int n if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { printk(KERN_ERR "slab: double free detected in cache " - "'%s', objp %p\n", cachep->name, objp); + "'%s', objp %p\n", cachep->name, objp); BUG(); } #endif @@ -2696,20 +2741,19 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) spin_lock(&l3->list_lock); if (l3->shared) { struct array_cache *shared_array = l3->shared; - int max = shared_array->limit-shared_array->avail; + int max = shared_array->limit - shared_array->avail; if (max) { if (batchcount > max) batchcount = max; memcpy(&(shared_array->entry[shared_array->avail]), - ac->entry, - sizeof(void*)*batchcount); + ac->entry, sizeof(void *) * batchcount); shared_array->avail += batchcount; goto free_done; } } free_block(cachep, ac->entry, batchcount, node); -free_done: + free_done: #if STATS { int i = 0; @@ -2731,10 +2775,9 @@ free_done: spin_unlock(&l3->list_lock); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), - sizeof(void*)*ac->avail); + sizeof(void *) * ac->avail); } - /* * __cache_free * Release an obj back to its cache. If the obj has a constructed @@ -2759,7 +2802,8 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) if (unlikely(slabp->nodeid != numa_node_id())) { struct array_cache *alien = NULL; int nodeid = slabp->nodeid; - struct kmem_list3 *l3 = cachep->nodelists[numa_node_id()]; + struct kmem_list3 *l3 = + cachep->nodelists[numa_node_id()]; STATS_INC_NODEFREES(cachep); if (l3->alien && l3->alien[nodeid]) { @@ -2767,15 +2811,15 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) spin_lock(&alien->lock); if (unlikely(alien->avail == alien->limit)) __drain_alien_cache(cachep, - alien, nodeid); + alien, nodeid); alien->entry[alien->avail++] = objp; spin_unlock(&alien->lock); } else { spin_lock(&(cachep->nodelists[nodeid])-> - list_lock); + list_lock); free_block(cachep, &objp, 1, nodeid); spin_unlock(&(cachep->nodelists[nodeid])-> - list_lock); + list_lock); } return; } @@ -2822,9 +2866,9 @@ EXPORT_SYMBOL(kmem_cache_alloc); */ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) { - unsigned long addr = (unsigned long) ptr; + unsigned long addr = (unsigned long)ptr; unsigned long min_addr = PAGE_OFFSET; - unsigned long align_mask = BYTES_PER_WORD-1; + unsigned long align_mask = BYTES_PER_WORD - 1; unsigned long size = cachep->objsize; struct page *page; @@ -2844,7 +2888,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr) if (unlikely(page_get_cache(page) != cachep)) goto out; return 1; -out: + out: return 0; } @@ -2871,8 +2915,10 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) if (unlikely(!cachep->nodelists[nodeid])) { /* Fall back to __cache_alloc if we run into trouble */ - printk(KERN_WARNING "slab: not allocating in inactive node %d for cache %s\n", nodeid, cachep->name); - return __cache_alloc(cachep,flags); + printk(KERN_WARNING + "slab: not allocating in inactive node %d for cache %s\n", + nodeid, cachep->name); + return __cache_alloc(cachep, flags); } cache_alloc_debugcheck_before(cachep, flags); @@ -2882,7 +2928,9 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) else ptr = __cache_alloc_node(cachep, flags, nodeid); local_irq_restore(save_flags); - ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0)); + ptr = + cache_alloc_debugcheck_after(cachep, flags, ptr, + __builtin_return_address(0)); return ptr; } @@ -2944,12 +2992,11 @@ EXPORT_SYMBOL(__kmalloc); * Objects should be dereferenced using the per_cpu_ptr macro only. * * @size: how many bytes of memory are required. - * @align: the alignment, which can't be greater than SMP_CACHE_BYTES. */ -void *__alloc_percpu(size_t size, size_t align) +void *__alloc_percpu(size_t size) { int i; - struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); + struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); if (!pdata) return NULL; @@ -2973,9 +3020,9 @@ void *__alloc_percpu(size_t size, size_t align) } /* Catch derefs w/o wrappers */ - return (void *) (~(unsigned long) pdata); + return (void *)(~(unsigned long)pdata); -unwind_oom: + unwind_oom: while (--i >= 0) { if (!cpu_possible(i)) continue; @@ -3006,20 +3053,6 @@ void kmem_cache_free(kmem_cache_t *cachep, void *objp) EXPORT_SYMBOL(kmem_cache_free); /** - * kzalloc - allocate memory. The memory is set to zero. - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. - */ -void *kzalloc(size_t size, gfp_t flags) -{ - void *ret = kmalloc(size, flags); - if (ret) - memset(ret, 0, size); - return ret; -} -EXPORT_SYMBOL(kzalloc); - -/** * kfree - free previously allocated memory * @objp: pointer returned by kmalloc. * @@ -3038,7 +3071,7 @@ void kfree(const void *objp) local_irq_save(flags); kfree_debugcheck(objp); c = page_get_cache(virt_to_page(objp)); - __cache_free(c, (void*)objp); + __cache_free(c, (void *)objp); local_irq_restore(flags); } EXPORT_SYMBOL(kfree); @@ -3051,17 +3084,16 @@ EXPORT_SYMBOL(kfree); * Don't free memory not originally allocated by alloc_percpu() * The complemented objp is to check for that. */ -void -free_percpu(const void *objp) +void free_percpu(const void *objp) { int i; - struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); + struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); /* * We allocate for all cpus so we cannot use for online cpu here. */ for_each_cpu(i) - kfree(p->ptrs[i]); + kfree(p->ptrs[i]); kfree(p); } EXPORT_SYMBOL(free_percpu); @@ -3095,44 +3127,44 @@ static int alloc_kmemlist(kmem_cache_t *cachep) if (!(new_alien = alloc_alien_cache(node, cachep->limit))) goto fail; #endif - if (!(new = alloc_arraycache(node, (cachep->shared* - cachep->batchcount), 0xbaadf00d))) + if (!(new = alloc_arraycache(node, (cachep->shared * + cachep->batchcount), + 0xbaadf00d))) goto fail; if ((l3 = cachep->nodelists[node])) { spin_lock_irq(&l3->list_lock); if ((nc = cachep->nodelists[node]->shared)) - free_block(cachep, nc->entry, - nc->avail, node); + free_block(cachep, nc->entry, nc->avail, node); l3->shared = new; if (!cachep->nodelists[node]->alien) { l3->alien = new_alien; new_alien = NULL; } - l3->free_limit = (1 + nr_cpus_node(node))* - cachep->batchcount + cachep->num; + l3->free_limit = (1 + nr_cpus_node(node)) * + cachep->batchcount + cachep->num; spin_unlock_irq(&l3->list_lock); kfree(nc); free_alien_cache(new_alien); continue; } if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), - GFP_KERNEL, node))) + GFP_KERNEL, node))) goto fail; kmem_list3_init(l3); l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + - ((unsigned long)cachep)%REAPTIMEOUT_LIST3; + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; l3->shared = new; l3->alien = new_alien; - l3->free_limit = (1 + nr_cpus_node(node))* - cachep->batchcount + cachep->num; + l3->free_limit = (1 + nr_cpus_node(node)) * + cachep->batchcount + cachep->num; cachep->nodelists[node] = l3; } return err; -fail: + fail: err = -ENOMEM; return err; } @@ -3154,18 +3186,19 @@ static void do_ccupdate_local(void *info) new->new[smp_processor_id()] = old; } - static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, - int shared) + int shared) { struct ccupdate_struct new; int i, err; - memset(&new.new,0,sizeof(new.new)); + memset(&new.new, 0, sizeof(new.new)); for_each_online_cpu(i) { - new.new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount); + new.new[i] = + alloc_arraycache(cpu_to_node(i), limit, batchcount); if (!new.new[i]) { - for (i--; i >= 0; i--) kfree(new.new[i]); + for (i--; i >= 0; i--) + kfree(new.new[i]); return -ENOMEM; } } @@ -3193,13 +3226,12 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, err = alloc_kmemlist(cachep); if (err) { printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", - cachep->name, -err); + cachep->name, -err); BUG(); } return 0; } - static void enable_cpucache(kmem_cache_t *cachep) { int err; @@ -3246,14 +3278,14 @@ static void enable_cpucache(kmem_cache_t *cachep) if (limit > 32) limit = 32; #endif - err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared); + err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", - cachep->name, -err); + cachep->name, -err); } -static void drain_array_locked(kmem_cache_t *cachep, - struct array_cache *ac, int force, int node) +static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac, + int force, int node) { int tofree; @@ -3261,14 +3293,14 @@ static void drain_array_locked(kmem_cache_t *cachep, if (ac->touched && !force) { ac->touched = 0; } else if (ac->avail) { - tofree = force ? ac->avail : (ac->limit+4)/5; + tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) { - tofree = (ac->avail+1)/2; + tofree = (ac->avail + 1) / 2; } free_block(cachep, ac->entry, tofree, node); ac->avail -= tofree; memmove(ac->entry, &(ac->entry[tofree]), - sizeof(void*)*ac->avail); + sizeof(void *) * ac->avail); } } @@ -3291,13 +3323,14 @@ static void cache_reap(void *unused) if (down_trylock(&cache_chain_sem)) { /* Give up. Setup the next iteration. */ - schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); + schedule_delayed_work(&__get_cpu_var(reap_work), + REAPTIMEOUT_CPUC); return; } list_for_each(walk, &cache_chain) { kmem_cache_t *searchp; - struct list_head* p; + struct list_head *p; int tofree; struct slab *slabp; @@ -3314,7 +3347,7 @@ static void cache_reap(void *unused) spin_lock_irq(&l3->list_lock); drain_array_locked(searchp, ac_data(searchp), 0, - numa_node_id()); + numa_node_id()); if (time_after(l3->next_reap, jiffies)) goto next_unlock; @@ -3323,14 +3356,16 @@ static void cache_reap(void *unused) if (l3->shared) drain_array_locked(searchp, l3->shared, 0, - numa_node_id()); + numa_node_id()); if (l3->free_touched) { l3->free_touched = 0; goto next_unlock; } - tofree = (l3->free_limit+5*searchp->num-1)/(5*searchp->num); + tofree = + (l3->free_limit + 5 * searchp->num - + 1) / (5 * searchp->num); do { p = l3->slabs_free.next; if (p == &(l3->slabs_free)) @@ -3350,10 +3385,10 @@ static void cache_reap(void *unused) spin_unlock_irq(&l3->list_lock); slab_destroy(searchp, slabp); spin_lock_irq(&l3->list_lock); - } while(--tofree > 0); -next_unlock: + } while (--tofree > 0); + next_unlock: spin_unlock_irq(&l3->list_lock); -next: + next: cond_resched(); } check_irq_on(); @@ -3365,32 +3400,37 @@ next: #ifdef CONFIG_PROC_FS -static void *s_start(struct seq_file *m, loff_t *pos) +static void print_slabinfo_header(struct seq_file *m) { - loff_t n = *pos; - struct list_head *p; - - down(&cache_chain_sem); - if (!n) { - /* - * Output format version, so at least we can change it - * without _too_ many complaints. - */ + /* + * Output format version, so at least we can change it + * without _too_ many complaints. + */ #if STATS - seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); + seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); #else - seq_puts(m, "slabinfo - version: 2.1\n"); + seq_puts(m, "slabinfo - version: 2.1\n"); #endif - seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>"); - seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); - seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); + seq_puts(m, "# name <active_objs> <num_objs> <objsize> " + "<objperslab> <pagesperslab>"); + seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); + seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); #if STATS - seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped>" - " <error> <maxfreeable> <nodeallocs> <remotefrees>"); - seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); + seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " + "<error> <maxfreeable> <nodeallocs> <remotefrees>"); + seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); #endif - seq_putc(m, '\n'); - } + seq_putc(m, '\n'); +} + +static void *s_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + struct list_head *p; + + down(&cache_chain_sem); + if (!n) + print_slabinfo_header(m); p = cache_chain.next; while (n--) { p = p->next; @@ -3405,7 +3445,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) kmem_cache_t *cachep = p; ++*pos; return cachep->next.next == &cache_chain ? NULL - : list_entry(cachep->next.next, kmem_cache_t, next); + : list_entry(cachep->next.next, kmem_cache_t, next); } static void s_stop(struct seq_file *m, void *p) @@ -3417,11 +3457,11 @@ static int s_show(struct seq_file *m, void *p) { kmem_cache_t *cachep = p; struct list_head *q; - struct slab *slabp; - unsigned long active_objs; - unsigned long num_objs; - unsigned long active_slabs = 0; - unsigned long num_slabs, free_objects = 0, shared_avail = 0; + struct slab *slabp; + unsigned long active_objs; + unsigned long num_objs; + unsigned long active_slabs = 0; + unsigned long num_slabs, free_objects = 0, shared_avail = 0; const char *name; char *error = NULL; int node; @@ -3438,14 +3478,14 @@ static int s_show(struct seq_file *m, void *p) spin_lock(&l3->list_lock); - list_for_each(q,&l3->slabs_full) { + list_for_each(q, &l3->slabs_full) { slabp = list_entry(q, struct slab, list); if (slabp->inuse != cachep->num && !error) error = "slabs_full accounting error"; active_objs += cachep->num; active_slabs++; } - list_for_each(q,&l3->slabs_partial) { + list_for_each(q, &l3->slabs_partial) { slabp = list_entry(q, struct slab, list); if (slabp->inuse == cachep->num && !error) error = "slabs_partial inuse accounting error"; @@ -3454,7 +3494,7 @@ static int s_show(struct seq_file *m, void *p) active_objs += slabp->inuse; active_slabs++; } - list_for_each(q,&l3->slabs_free) { + list_for_each(q, &l3->slabs_free) { slabp = list_entry(q, struct slab, list); if (slabp->inuse && !error) error = "slabs_free/inuse accounting error"; @@ -3465,25 +3505,24 @@ static int s_show(struct seq_file *m, void *p) spin_unlock(&l3->list_lock); } - num_slabs+=active_slabs; - num_objs = num_slabs*cachep->num; + num_slabs += active_slabs; + num_objs = num_slabs * cachep->num; if (num_objs - active_objs != free_objects && !error) error = "free_objects accounting error"; - name = cachep->name; + name = cachep->name; if (error) printk(KERN_ERR "slab: cache %s error: %s\n", name, error); seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", - name, active_objs, num_objs, cachep->objsize, - cachep->num, (1<<cachep->gfporder)); + name, active_objs, num_objs, cachep->objsize, + cachep->num, (1 << cachep->gfporder)); seq_printf(m, " : tunables %4u %4u %4u", - cachep->limit, cachep->batchcount, - cachep->shared); + cachep->limit, cachep->batchcount, cachep->shared); seq_printf(m, " : slabdata %6lu %6lu %6lu", - active_slabs, num_slabs, shared_avail); + active_slabs, num_slabs, shared_avail); #if STATS - { /* list3 stats */ + { /* list3 stats */ unsigned long high = cachep->high_mark; unsigned long allocs = cachep->num_allocations; unsigned long grown = cachep->grown; @@ -3494,9 +3533,7 @@ static int s_show(struct seq_file *m, void *p) unsigned long node_frees = cachep->node_frees; seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ - %4lu %4lu %4lu %4lu", - allocs, high, grown, reaped, errors, - max_freeable, node_allocs, node_frees); + %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); } /* cpu stats */ { @@ -3506,7 +3543,7 @@ static int s_show(struct seq_file *m, void *p) unsigned long freemiss = atomic_read(&cachep->freemiss); seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", - allochit, allocmiss, freehit, freemiss); + allochit, allocmiss, freehit, freemiss); } #endif seq_putc(m, '\n'); @@ -3529,10 +3566,10 @@ static int s_show(struct seq_file *m, void *p) */ struct seq_operations slabinfo_op = { - .start = s_start, - .next = s_next, - .stop = s_stop, - .show = s_show, + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, }; #define MAX_SLABINFO_WRITE 128 @@ -3543,18 +3580,18 @@ struct seq_operations slabinfo_op = { * @count: data length * @ppos: unused */ -ssize_t slabinfo_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) +ssize_t slabinfo_write(struct file *file, const char __user * buffer, + size_t count, loff_t *ppos) { - char kbuf[MAX_SLABINFO_WRITE+1], *tmp; + char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; int limit, batchcount, shared, res; struct list_head *p; - + if (count > MAX_SLABINFO_WRITE) return -EINVAL; if (copy_from_user(&kbuf, buffer, count)) return -EFAULT; - kbuf[MAX_SLABINFO_WRITE] = '\0'; + kbuf[MAX_SLABINFO_WRITE] = '\0'; tmp = strchr(kbuf, ' '); if (!tmp) @@ -3567,18 +3604,17 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, /* Find the cache in the chain of caches. */ down(&cache_chain_sem); res = -EINVAL; - list_for_each(p,&cache_chain) { + list_for_each(p, &cache_chain) { kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next); if (!strcmp(cachep->name, kbuf)) { if (limit < 1 || batchcount < 1 || - batchcount > limit || - shared < 0) { + batchcount > limit || shared < 0) { res = 0; } else { res = do_tune_cpucache(cachep, limit, - batchcount, shared); + batchcount, shared); } break; } @@ -3609,26 +3645,3 @@ unsigned int ksize(const void *objp) return obj_reallen(page_get_cache(virt_to_page(objp))); } - - -/* - * kstrdup - allocate space for and copy an existing string - * - * @s: the string to duplicate - * @gfp: the GFP mask used in the kmalloc() call when allocating memory - */ -char *kstrdup(const char *s, gfp_t gfp) -{ - size_t len; - char *buf; - - if (!s) - return NULL; - - len = strlen(s) + 1; - buf = kmalloc(len, gfp); - if (buf) - memcpy(buf, s, len); - return buf; -} -EXPORT_SYMBOL(kstrdup); diff --git a/mm/slob.c b/mm/slob.c new file mode 100644 index 000000000000..1c240c4b71d9 --- /dev/null +++ b/mm/slob.c @@ -0,0 +1,385 @@ +/* + * SLOB Allocator: Simple List Of Blocks + * + * Matt Mackall <mpm@selenic.com> 12/30/03 + * + * How SLOB works: + * + * The core of SLOB is a traditional K&R style heap allocator, with + * support for returning aligned objects. The granularity of this + * allocator is 8 bytes on x86, though it's perhaps possible to reduce + * this to 4 if it's deemed worth the effort. The slob heap is a + * singly-linked list of pages from __get_free_page, grown on demand + * and allocation from the heap is currently first-fit. + * + * Above this is an implementation of kmalloc/kfree. Blocks returned + * from kmalloc are 8-byte aligned and prepended with a 8-byte header. + * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls + * __get_free_pages directly so that it can return page-aligned blocks + * and keeps a linked list of such pages and their orders. These + * objects are detected in kfree() by their page alignment. + * + * SLAB is emulated on top of SLOB by simply calling constructors and + * destructors for every SLAB allocation. Objects are returned with + * the 8-byte alignment unless the SLAB_MUST_HWCACHE_ALIGN flag is + * set, in which case the low-level allocator will fragment blocks to + * create the proper alignment. Again, objects of page-size or greater + * are allocated by calling __get_free_pages. As SLAB objects know + * their size, no separate size bookkeeping is necessary and there is + * essentially no allocation space overhead. + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/cache.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/timer.h> + +struct slob_block { + int units; + struct slob_block *next; +}; +typedef struct slob_block slob_t; + +#define SLOB_UNIT sizeof(slob_t) +#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) +#define SLOB_ALIGN L1_CACHE_BYTES + +struct bigblock { + int order; + void *pages; + struct bigblock *next; +}; +typedef struct bigblock bigblock_t; + +static slob_t arena = { .next = &arena, .units = 1 }; +static slob_t *slobfree = &arena; +static bigblock_t *bigblocks; +static DEFINE_SPINLOCK(slob_lock); +static DEFINE_SPINLOCK(block_lock); + +static void slob_free(void *b, int size); + +static void *slob_alloc(size_t size, gfp_t gfp, int align) +{ + slob_t *prev, *cur, *aligned = 0; + int delta = 0, units = SLOB_UNITS(size); + unsigned long flags; + + spin_lock_irqsave(&slob_lock, flags); + prev = slobfree; + for (cur = prev->next; ; prev = cur, cur = cur->next) { + if (align) { + aligned = (slob_t *)ALIGN((unsigned long)cur, align); + delta = aligned - cur; + } + if (cur->units >= units + delta) { /* room enough? */ + if (delta) { /* need to fragment head to align? */ + aligned->units = cur->units - delta; + aligned->next = cur->next; + cur->next = aligned; + cur->units = delta; + prev = cur; + cur = aligned; + } + + if (cur->units == units) /* exact fit? */ + prev->next = cur->next; /* unlink */ + else { /* fragment */ + prev->next = cur + units; + prev->next->units = cur->units - units; + prev->next->next = cur->next; + cur->units = units; + } + + slobfree = prev; + spin_unlock_irqrestore(&slob_lock, flags); + return cur; + } + if (cur == slobfree) { + spin_unlock_irqrestore(&slob_lock, flags); + + if (size == PAGE_SIZE) /* trying to shrink arena? */ + return 0; + + cur = (slob_t *)__get_free_page(gfp); + if (!cur) + return 0; + + slob_free(cur, PAGE_SIZE); + spin_lock_irqsave(&slob_lock, flags); + cur = slobfree; + } + } +} + +static void slob_free(void *block, int size) +{ + slob_t *cur, *b = (slob_t *)block; + unsigned long flags; + + if (!block) + return; + + if (size) + b->units = SLOB_UNITS(size); + + /* Find reinsertion point */ + spin_lock_irqsave(&slob_lock, flags); + for (cur = slobfree; !(b > cur && b < cur->next); cur = cur->next) + if (cur >= cur->next && (b > cur || b < cur->next)) + break; + + if (b + b->units == cur->next) { + b->units += cur->next->units; + b->next = cur->next->next; + } else + b->next = cur->next; + + if (cur + cur->units == b) { + cur->units += b->units; + cur->next = b->next; + } else + cur->next = b; + + slobfree = cur; + + spin_unlock_irqrestore(&slob_lock, flags); +} + +static int FASTCALL(find_order(int size)); +static int fastcall find_order(int size) +{ + int order = 0; + for ( ; size > 4096 ; size >>=1) + order++; + return order; +} + +void *kmalloc(size_t size, gfp_t gfp) +{ + slob_t *m; + bigblock_t *bb; + unsigned long flags; + + if (size < PAGE_SIZE - SLOB_UNIT) { + m = slob_alloc(size + SLOB_UNIT, gfp, 0); + return m ? (void *)(m + 1) : 0; + } + + bb = slob_alloc(sizeof(bigblock_t), gfp, 0); + if (!bb) + return 0; + + bb->order = find_order(size); + bb->pages = (void *)__get_free_pages(gfp, bb->order); + + if (bb->pages) { + spin_lock_irqsave(&block_lock, flags); + bb->next = bigblocks; + bigblocks = bb; + spin_unlock_irqrestore(&block_lock, flags); + return bb->pages; + } + + slob_free(bb, sizeof(bigblock_t)); + return 0; +} + +EXPORT_SYMBOL(kmalloc); + +void kfree(const void *block) +{ + bigblock_t *bb, **last = &bigblocks; + unsigned long flags; + + if (!block) + return; + + if (!((unsigned long)block & (PAGE_SIZE-1))) { + /* might be on the big block list */ + spin_lock_irqsave(&block_lock, flags); + for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) { + if (bb->pages == block) { + *last = bb->next; + spin_unlock_irqrestore(&block_lock, flags); + free_pages((unsigned long)block, bb->order); + slob_free(bb, sizeof(bigblock_t)); + return; + } + } + spin_unlock_irqrestore(&block_lock, flags); + } + + slob_free((slob_t *)block - 1, 0); + return; +} + +EXPORT_SYMBOL(kfree); + +unsigned int ksize(const void *block) +{ + bigblock_t *bb; + unsigned long flags; + + if (!block) + return 0; + + if (!((unsigned long)block & (PAGE_SIZE-1))) { + spin_lock_irqsave(&block_lock, flags); + for (bb = bigblocks; bb; bb = bb->next) + if (bb->pages == block) { + spin_unlock_irqrestore(&slob_lock, flags); + return PAGE_SIZE << bb->order; + } + spin_unlock_irqrestore(&block_lock, flags); + } + + return ((slob_t *)block - 1)->units * SLOB_UNIT; +} + +struct kmem_cache { + unsigned int size, align; + const char *name; + void (*ctor)(void *, struct kmem_cache *, unsigned long); + void (*dtor)(void *, struct kmem_cache *, unsigned long); +}; + +struct kmem_cache *kmem_cache_create(const char *name, size_t size, + size_t align, unsigned long flags, + void (*ctor)(void*, struct kmem_cache *, unsigned long), + void (*dtor)(void*, struct kmem_cache *, unsigned long)) +{ + struct kmem_cache *c; + + c = slob_alloc(sizeof(struct kmem_cache), flags, 0); + + if (c) { + c->name = name; + c->size = size; + c->ctor = ctor; + c->dtor = dtor; + /* ignore alignment unless it's forced */ + c->align = (flags & SLAB_MUST_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; + if (c->align < align) + c->align = align; + } + + return c; +} +EXPORT_SYMBOL(kmem_cache_create); + +int kmem_cache_destroy(struct kmem_cache *c) +{ + slob_free(c, sizeof(struct kmem_cache)); + return 0; +} +EXPORT_SYMBOL(kmem_cache_destroy); + +void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags) +{ + void *b; + + if (c->size < PAGE_SIZE) + b = slob_alloc(c->size, flags, c->align); + else + b = (void *)__get_free_pages(flags, find_order(c->size)); + + if (c->ctor) + c->ctor(b, c, SLAB_CTOR_CONSTRUCTOR); + + return b; +} +EXPORT_SYMBOL(kmem_cache_alloc); + +void kmem_cache_free(struct kmem_cache *c, void *b) +{ + if (c->dtor) + c->dtor(b, c, 0); + + if (c->size < PAGE_SIZE) + slob_free(b, c->size); + else + free_pages((unsigned long)b, find_order(c->size)); +} +EXPORT_SYMBOL(kmem_cache_free); + +unsigned int kmem_cache_size(struct kmem_cache *c) +{ + return c->size; +} +EXPORT_SYMBOL(kmem_cache_size); + +const char *kmem_cache_name(struct kmem_cache *c) +{ + return c->name; +} +EXPORT_SYMBOL(kmem_cache_name); + +static struct timer_list slob_timer = TIMER_INITIALIZER( + (void (*)(unsigned long))kmem_cache_init, 0, 0); + +void kmem_cache_init(void) +{ + void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1); + + if (p) + free_page((unsigned long)p); + + mod_timer(&slob_timer, jiffies + HZ); +} + +atomic_t slab_reclaim_pages = ATOMIC_INIT(0); +EXPORT_SYMBOL(slab_reclaim_pages); + +#ifdef CONFIG_SMP + +void *__alloc_percpu(size_t size, size_t align) +{ + int i; + struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); + + if (!pdata) + return NULL; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); + if (!pdata->ptrs[i]) + goto unwind_oom; + memset(pdata->ptrs[i], 0, size); + } + + /* Catch derefs w/o wrappers */ + return (void *) (~(unsigned long) pdata); + +unwind_oom: + while (--i >= 0) { + if (!cpu_possible(i)) + continue; + kfree(pdata->ptrs[i]); + } + kfree(pdata); + return NULL; +} +EXPORT_SYMBOL(__alloc_percpu); + +void +free_percpu(const void *objp) +{ + int i; + struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + kfree(p->ptrs[i]); + } + kfree(p); +} +EXPORT_SYMBOL(free_percpu); + +#endif diff --git a/mm/sparse.c b/mm/sparse.c index 72079b538e2d..0a51f36ba3a1 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -18,10 +18,10 @@ */ #ifdef CONFIG_SPARSEMEM_EXTREME struct mem_section *mem_section[NR_SECTION_ROOTS] - ____cacheline_maxaligned_in_smp; + ____cacheline_internodealigned_in_smp; #else struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] - ____cacheline_maxaligned_in_smp; + ____cacheline_internodealigned_in_smp; #endif EXPORT_SYMBOL(mem_section); diff --git a/mm/swap_state.c b/mm/swap_state.c index fc2aecb70a95..7b09ac503fec 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -141,7 +141,7 @@ void __delete_from_swap_cache(struct page *page) * Allocate swap space for the page and add the page to the * swap cache. Caller needs to hold the page lock. */ -int add_to_swap(struct page * page) +int add_to_swap(struct page * page, gfp_t gfp_mask) { swp_entry_t entry; int err; @@ -166,7 +166,7 @@ int add_to_swap(struct page * page) * Add it to the swap cache and mark it dirty */ err = __add_to_swap_cache(page, entry, - GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN); + gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); switch (err) { case 0: /* Success */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 6da4b28b896b..80f948a2028b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1493,7 +1493,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) goto bad_swap; if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) goto bad_swap; - + /* OK, set up the swap map and apply the bad block list */ if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) { error = -ENOMEM; @@ -1502,17 +1502,17 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) error = 0; memset(p->swap_map, 0, maxpages * sizeof(short)); - for (i=0; i<swap_header->info.nr_badpages; i++) { - int page = swap_header->info.badpages[i]; - if (page <= 0 || page >= swap_header->info.last_page) + for (i = 0; i < swap_header->info.nr_badpages; i++) { + int page_nr = swap_header->info.badpages[i]; + if (page_nr <= 0 || page_nr >= swap_header->info.last_page) error = -EINVAL; else - p->swap_map[page] = SWAP_MAP_BAD; + p->swap_map[page_nr] = SWAP_MAP_BAD; } nr_good_pages = swap_header->info.last_page - swap_header->info.nr_badpages - 1 /* header page */; - if (error) + if (error) goto bad_swap; } diff --git a/mm/truncate.c b/mm/truncate.c index 7dee32745901..b1a463d0fe71 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -249,7 +249,6 @@ unlock: break; } pagevec_release(&pvec); - cond_resched(); } return ret; } diff --git a/mm/util.c b/mm/util.c new file mode 100644 index 000000000000..5f4bb59da63c --- /dev/null +++ b/mm/util.c @@ -0,0 +1,39 @@ +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/module.h> + +/** + * kzalloc - allocate memory. The memory is set to zero. + * @size: how many bytes of memory are required. + * @flags: the type of memory to allocate. + */ +void *kzalloc(size_t size, gfp_t flags) +{ + void *ret = kmalloc(size, flags); + if (ret) + memset(ret, 0, size); + return ret; +} +EXPORT_SYMBOL(kzalloc); + +/* + * kstrdup - allocate space for and copy an existing string + * + * @s: the string to duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kstrdup(const char *s, gfp_t gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s) + 1; + buf = kmalloc(len, gfp); + if (buf) + memcpy(buf, s, len); + return buf; +} +EXPORT_SYMBOL(kstrdup); diff --git a/mm/vmscan.c b/mm/vmscan.c index be8235fb1939..bf903b2d198f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker); * * Returns the number of slab objects which we shrunk. */ -static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, - unsigned long lru_pages) +int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) { struct shrinker *shrinker; int ret = 0; @@ -269,9 +268,7 @@ static inline int is_page_cache_freeable(struct page *page) static int may_write_to_queue(struct backing_dev_info *bdi) { - if (current_is_kswapd()) - return 1; - if (current_is_pdflush()) /* This is unlikely, but why not... */ + if (current->flags & PF_SWAPWRITE) return 1; if (!bdi_write_congested(bdi)) return 1; @@ -376,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) return PAGE_CLEAN; } +static int remove_mapping(struct address_space *mapping, struct page *page) +{ + if (!mapping) + return 0; /* truncate got there first */ + + write_lock_irq(&mapping->tree_lock); + + /* + * The non-racy check for busy page. It is critical to check + * PageDirty _after_ making sure that the page is freeable and + * not in use by anybody. (pagecache + us == 2) + */ + if (unlikely(page_count(page) != 2)) + goto cannot_free; + smp_rmb(); + if (unlikely(PageDirty(page))) + goto cannot_free; + + if (PageSwapCache(page)) { + swp_entry_t swap = { .val = page_private(page) }; + __delete_from_swap_cache(page); + write_unlock_irq(&mapping->tree_lock); + swap_free(swap); + __put_page(page); /* The pagecache ref */ + return 1; + } + + __remove_from_page_cache(page); + write_unlock_irq(&mapping->tree_lock); + __put_page(page); + return 1; + +cannot_free: + write_unlock_irq(&mapping->tree_lock); + return 0; +} + /* * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed */ @@ -424,7 +458,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) * Try to allocate it some swap space here. */ if (PageAnon(page) && !PageSwapCache(page)) { - if (!add_to_swap(page)) + if (!add_to_swap(page, GFP_ATOMIC)) goto activate_locked; } #endif /* CONFIG_SWAP */ @@ -507,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) goto free_it; } - if (!mapping) - goto keep_locked; /* truncate got there first */ - - write_lock_irq(&mapping->tree_lock); - - /* - * The non-racy check for busy page. It is critical to check - * PageDirty _after_ making sure that the page is freeable and - * not in use by anybody. (pagecache + us == 2) - */ - if (unlikely(page_count(page) != 2)) - goto cannot_free; - smp_rmb(); - if (unlikely(PageDirty(page))) - goto cannot_free; - -#ifdef CONFIG_SWAP - if (PageSwapCache(page)) { - swp_entry_t swap = { .val = page_private(page) }; - __delete_from_swap_cache(page); - write_unlock_irq(&mapping->tree_lock); - swap_free(swap); - __put_page(page); /* The pagecache ref */ - goto free_it; - } -#endif /* CONFIG_SWAP */ - - __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); - __put_page(page); + if (!remove_mapping(mapping, page)) + goto keep_locked; free_it: unlock_page(page); @@ -545,10 +551,6 @@ free_it: __pagevec_release_nonlru(&freed_pvec); continue; -cannot_free: - write_unlock_irq(&mapping->tree_lock); - goto keep_locked; - activate_locked: SetPageActive(page); pgactivate++; @@ -566,6 +568,241 @@ keep: return reclaimed; } +#ifdef CONFIG_MIGRATION +static inline void move_to_lru(struct page *page) +{ + list_del(&page->lru); + if (PageActive(page)) { + /* + * lru_cache_add_active checks that + * the PG_active bit is off. + */ + ClearPageActive(page); + lru_cache_add_active(page); + } else { + lru_cache_add(page); + } + put_page(page); +} + +/* + * Add isolated pages on the list back to the LRU + * + * returns the number of pages put back. + */ +int putback_lru_pages(struct list_head *l) +{ + struct page *page; + struct page *page2; + int count = 0; + + list_for_each_entry_safe(page, page2, l, lru) { + move_to_lru(page); + count++; + } + return count; +} + +/* + * swapout a single page + * page is locked upon entry, unlocked on exit + */ +static int swap_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + + if (page_mapped(page) && mapping) + if (try_to_unmap(page) != SWAP_SUCCESS) + goto unlock_retry; + + if (PageDirty(page)) { + /* Page is dirty, try to write it out here */ + switch(pageout(page, mapping)) { + case PAGE_KEEP: + case PAGE_ACTIVATE: + goto unlock_retry; + + case PAGE_SUCCESS: + goto retry; + + case PAGE_CLEAN: + ; /* try to free the page below */ + } + } + + if (PagePrivate(page)) { + if (!try_to_release_page(page, GFP_KERNEL) || + (!mapping && page_count(page) == 1)) + goto unlock_retry; + } + + if (remove_mapping(mapping, page)) { + /* Success */ + unlock_page(page); + return 0; + } + +unlock_retry: + unlock_page(page); + +retry: + return -EAGAIN; +} +/* + * migrate_pages + * + * Two lists are passed to this function. The first list + * contains the pages isolated from the LRU to be migrated. + * The second list contains new pages that the pages isolated + * can be moved to. If the second list is NULL then all + * pages are swapped out. + * + * The function returns after 10 attempts or if no pages + * are movable anymore because t has become empty + * or no retryable pages exist anymore. + * + * SIMPLIFIED VERSION: This implementation of migrate_pages + * is only swapping out pages and never touches the second + * list. The direct migration patchset + * extends this function to avoid the use of swap. + * + * Return: Number of pages not migrated when "to" ran empty. + */ +int migrate_pages(struct list_head *from, struct list_head *to, + struct list_head *moved, struct list_head *failed) +{ + int retry; + int nr_failed = 0; + int pass = 0; + struct page *page; + struct page *page2; + int swapwrite = current->flags & PF_SWAPWRITE; + int rc; + + if (!swapwrite) + current->flags |= PF_SWAPWRITE; + +redo: + retry = 0; + + list_for_each_entry_safe(page, page2, from, lru) { + cond_resched(); + + rc = 0; + if (page_count(page) == 1) + /* page was freed from under us. So we are done. */ + goto next; + + /* + * Skip locked pages during the first two passes to give the + * functions holding the lock time to release the page. Later we + * use lock_page() to have a higher chance of acquiring the + * lock. + */ + rc = -EAGAIN; + if (pass > 2) + lock_page(page); + else + if (TestSetPageLocked(page)) + goto next; + + /* + * Only wait on writeback if we have already done a pass where + * we we may have triggered writeouts for lots of pages. + */ + if (pass > 0) { + wait_on_page_writeback(page); + } else { + if (PageWriteback(page)) + goto unlock_page; + } + + /* + * Anonymous pages must have swap cache references otherwise + * the information contained in the page maps cannot be + * preserved. + */ + if (PageAnon(page) && !PageSwapCache(page)) { + if (!add_to_swap(page, GFP_KERNEL)) { + rc = -ENOMEM; + goto unlock_page; + } + } + + /* + * Page is properly locked and writeback is complete. + * Try to migrate the page. + */ + rc = swap_page(page); + goto next; + +unlock_page: + unlock_page(page); + +next: + if (rc == -EAGAIN) { + retry++; + } else if (rc) { + /* Permanent failure */ + list_move(&page->lru, failed); + nr_failed++; + } else { + /* Success */ + list_move(&page->lru, moved); + } + } + if (retry && pass++ < 10) + goto redo; + + if (!swapwrite) + current->flags &= ~PF_SWAPWRITE; + + return nr_failed + retry; +} + +static void lru_add_drain_per_cpu(void *dummy) +{ + lru_add_drain(); +} + +/* + * Isolate one page from the LRU lists and put it on the + * indicated list. Do necessary cache draining if the + * page is not on the LRU lists yet. + * + * Result: + * 0 = page not on LRU list + * 1 = page removed from LRU list and added to the specified list. + * -ENOENT = page is being freed elsewhere. + */ +int isolate_lru_page(struct page *page) +{ + int rc = 0; + struct zone *zone = page_zone(page); + +redo: + spin_lock_irq(&zone->lru_lock); + rc = __isolate_lru_page(page); + if (rc == 1) { + if (PageActive(page)) + del_page_from_active_list(zone, page); + else + del_page_from_inactive_list(zone, page); + } + spin_unlock_irq(&zone->lru_lock); + if (rc == 0) { + /* + * Maybe this page is still waiting for a cpu to drain it + * from one of the lru lists? + */ + rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); + if (rc == 0 && PageLRU(page)) + goto redo; + } + return rc; +} +#endif + /* * zone->lru_lock is heavily contended. Some of the functions that * shrink the lists perform better by taking out a batch of pages @@ -594,20 +831,18 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - if (!TestClearPageLRU(page)) - BUG(); - list_del(&page->lru); - if (get_page_testone(page)) { - /* - * It is being freed elsewhere - */ - __put_page(page); - SetPageLRU(page); - list_add(&page->lru, src); - continue; - } else { - list_add(&page->lru, dst); + switch (__isolate_lru_page(page)) { + case 1: + /* Succeeded to isolate page */ + list_move(&page->lru, dst); nr_taken++; + break; + case -ENOENT: + /* Not possible to isolate */ + list_move(&page->lru, src); + break; + default: + BUG(); } } @@ -1226,7 +1461,7 @@ static int kswapd(void *p) * us from recursively trying to free more memory as we're * trying to free the first piece of memory in the first place). */ - tsk->flags |= PF_MEMALLOC|PF_KSWAPD; + tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; order = 0; for ( ; ; ) { diff --git a/net/802/Makefile b/net/802/Makefile index 01861929591a..977704a54f68 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -2,8 +2,6 @@ # Makefile for the Linux 802.x protocol layers. # -obj-y := p8023.o - # Check the p8022 selections against net/core/Makefile. obj-$(CONFIG_SYSCTL) += sysctl_net_802.o obj-$(CONFIG_LLC) += p8022.o psnap.o @@ -11,5 +9,5 @@ obj-$(CONFIG_TR) += p8022.o psnap.o tr.o sysctl_net_802.o obj-$(CONFIG_NET_FC) += fc.o obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o -obj-$(CONFIG_IPX) += p8022.o psnap.o +obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f244670764a..00f983226672 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -986,6 +986,7 @@ int dccp_v4_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset(skb); return sk_receive_skb(sk, skb); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c609dc78f487..df074259f9c3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -27,6 +27,7 @@ #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> +#include <net/ip6_checksum.h> #include <net/xfrm.h> #include "dccp.h" @@ -1028,7 +1029,7 @@ discard: return 0; } -static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int dccp_v6_rcv(struct sk_buff **pskb) { const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 912c42f57c79..de16e944777f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -832,6 +832,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e45846ae570b..18d7fad474d7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -185,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb) raw_rcv(last, skb2); } last = sk; - nf_reset(skb); } } @@ -204,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) __skb_pull(skb, ihl); - /* Free reference early: we don't need it any more, and it may - hold ip_conntrack module loaded indefinitely. */ - nf_reset(skb); - /* Point into the IP datagram, just past the header. */ skb->h.raw = skb->data; @@ -232,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { int ret; - if (!ipprot->no_policy && - !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - goto out; + if (!ipprot->no_policy) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + kfree_skb(skb); + goto out; + } + nf_reset(skb); } ret = ipprot->handler(skb); if (ret < 0) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8b1c9bd0091e..c2169b47ddfd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -85,6 +85,8 @@ int sysctl_ip_default_ttl = IPDEFTTL; +static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)); + /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) { @@ -202,6 +204,11 @@ static inline int ip_finish_output2(struct sk_buff *skb) static inline int ip_finish_output(struct sk_buff *skb) { +#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) + /* Policy lookup after SNAT yielded a new policy */ + if (skb->dst->xfrm != NULL) + return xfrm4_output_finish(skb); +#endif if (skb->len > dst_mtu(skb->dst) && !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) return ip_fragment(skb, ip_finish_output2); @@ -409,7 +416,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) +static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) { struct iphdr *iph; int raw = 0; @@ -1391,7 +1398,6 @@ void __init ip_init(void) #endif } -EXPORT_SYMBOL(ip_fragment); EXPORT_SYMBOL(ip_generic_getfrag); EXPORT_SYMBOL(ip_queue_xmit); EXPORT_SYMBOL(ip_send_check); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 35571cff81c6..bbd85f5ec985 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -621,6 +621,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ae0779d82c5d..3321092b0914 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -7,11 +7,13 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> #include <net/route.h> -#include <linux/ip.h> +#include <net/xfrm.h> +#include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff **pskb) @@ -33,7 +35,6 @@ int ip_route_me_harder(struct sk_buff **pskb) #ifdef CONFIG_IP_ROUTE_FWMARK fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; #endif - fl.proto = iph->protocol; if (ip_route_output_key(&rt, &fl) != 0) return -1; @@ -60,6 +61,13 @@ int ip_route_me_harder(struct sk_buff **pskb) if ((*pskb)->dst->error) return -1; +#ifdef CONFIG_XFRM + if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(*pskb, &fl, AF_INET) == 0) + if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) + return -1; +#endif + /* Change in oif may mean change in hh_len. */ hh_len = (*pskb)->dst->dev->hard_header_len; if (skb_headroom(*pskb) < hh_len) { @@ -78,6 +86,9 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); +EXPORT_SYMBOL(ip_nat_decode_session); + /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 88a60650e6b8..a9893ec03e02 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d0a447e520a2..549b01a648b3 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o +obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 977fb59d4563..0b25050981a1 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> +#include <linux/interrupt.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/in.h> diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index f04111f74e09..8b8a1f00bbf4 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -55,6 +55,44 @@ : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ : "*ERROR*"))) +#ifdef CONFIG_XFRM +static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct ip_conntrack *ct; + struct ip_conntrack_tuple *t; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + + ct = ip_conntrack_get(skb, &ctinfo); + if (ct == NULL) + return; + dir = CTINFO2DIR(ctinfo); + t = &ct->tuplehash[dir].tuple; + + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + if (ct->status & statusbit) { + fl->fl4_dst = t->dst.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_dport = t->dst.u.tcp.port; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl->fl4_src = t->src.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_sport = t->src.u.tcp.port; + } +} +#endif + static unsigned int ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb, @@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - u_int32_t saddr, daddr; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; unsigned int ret; - saddr = (*pskb)->nh.iph->saddr; - daddr = (*pskb)->nh.iph->daddr; - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN - && ((*pskb)->nh.iph->saddr != saddr - || (*pskb)->nh.iph->daddr != daddr)) { - dst_release((*pskb)->dst); - (*pskb)->dst = NULL; + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.ip != + ct->tuplehash[!dir].tuple.dst.ip) { + dst_release((*pskb)->dst); + (*pskb)->dst = NULL; + } } return ret; } @@ -185,12 +225,30 @@ ip_nat_out(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) return NF_ACCEPT; - return ip_nat_fn(hooknum, pskb, in, out, okfn); + ret = ip_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.ip != + ct->tuplehash[!dir].tuple.dst.ip +#ifdef CONFIG_XFRM + || ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all +#endif + ) + return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + } + return ret; } static unsigned int @@ -200,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - u_int32_t saddr, daddr; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; unsigned int ret; /* root is playing with raw sockets. */ @@ -208,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum, || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) return NF_ACCEPT; - saddr = (*pskb)->nh.iph->saddr; - daddr = (*pskb)->nh.iph->daddr; - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN - && ((*pskb)->nh.iph->saddr != saddr - || (*pskb)->nh.iph->daddr != daddr)) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.ip != + ct->tuplehash[!dir].tuple.src.ip +#ifdef CONFIG_XFRM + || ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[dir].tuple.src.u.all +#endif + ) + return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + } return ret; } @@ -303,10 +368,14 @@ static int init_or_cleanup(int init) if (!init) goto cleanup; +#ifdef CONFIG_XFRM + BUG_ON(ip_nat_decode_session != NULL); + ip_nat_decode_session = nat_decode_session; +#endif ret = ip_nat_rule_init(); if (ret < 0) { printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_nothing; + goto cleanup_decode_session; } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { @@ -354,7 +423,11 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_in_ops); cleanup_rule_init: ip_nat_rule_cleanup(); - cleanup_nothing: + cleanup_decode_session: +#ifdef CONFIG_XFRM + ip_nat_decode_session = NULL; + synchronize_net(); +#endif return ret; } diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c new file mode 100644 index 000000000000..709debcc69c9 --- /dev/null +++ b/net/ipv4/netfilter/ipt_policy.c @@ -0,0 +1,170 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/xfrm.h> + +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_policy.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e) +{ +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH(saddr, x->props.saddr.a4 & e->smask) && + MATCH(daddr, x->id.daddr.a4 & e->dmask) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, int offset, int *hotdrop) +{ + const struct ipt_policy_info *info = matchinfo; + int ret; + + if (info->flags & IPT_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IPT_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_policy_info *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n", + matchsize, IPT_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ipt_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) + && info->flags & IPT_POLICY_MATCH_OUT) { + printk(KERN_ERR "ipt_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) + && info->flags & IPT_POLICY_MATCH_IN) { + printk(KERN_ERR "ipt_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IPT_POLICY_MAX_ELEM) { + printk(KERN_ERR "ipt_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ipt_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b0d7e4d6269..165a4d81efa4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } + nf_reset(skb); skb_push(skb, skb->data - skb->nh.raw); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9f83e5b28ce..6ea353907af5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1080,6 +1080,7 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset(skb); if (sk_filter(sk, skb, 0)) goto discard_and_relse; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 223abaa72bc5..00840474a449 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -989,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + nf_reset(skb); if (up->encap_type) { /* @@ -1149,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_checksum_complete(skb)) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 2d3849c38a0f..850d919591d1 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -11,6 +11,8 @@ #include <linux/module.h> #include <linux/string.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> @@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) return xfrm_parse_spi(skb, nexthdr, spi, seq); } +#ifdef CONFIG_NETFILTER +static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + + if (skb->dst == NULL) { + if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, + skb->dev)) + goto drop; + } + return dst_input(skb); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} +#endif + int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { int err; @@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state)); skb->sp->len += xfrm_nr; + nf_reset(skb); + if (decaps) { if (!(skb->dev->flags&IFF_LOOPBACK)) { dst_release(skb->dst); @@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) netif_rx(skb); return 0; } else { +#ifdef CONFIG_NETFILTER + __skb_push(skb, skb->data - skb->nh.raw); + skb->nh.iph->tot_len = htons(skb->len); + ip_send_check(skb->nh.iph); + + NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, + xfrm4_rcv_encap_finish); + return 0; +#else return -skb->nh.iph->protocol; +#endif } drop_unlock: diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 66620a95942a..d4df0ddd424b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -8,8 +8,10 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compiler.h> #include <linux/skbuff.h> #include <linux/spinlock.h> +#include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> @@ -95,7 +97,7 @@ out: return ret; } -int xfrm4_output(struct sk_buff *skb) +static int xfrm4_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm4_encap(skb); + xfrm4_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; + err = 0; out_exit: return err; @@ -143,3 +151,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +int xfrm4_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm4_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm4_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm4_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish); +} diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 704fb73e6c5f..e53e421eeee9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1228,7 +1228,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* Gets referenced address, destroys ifaddr */ -void addrconf_dad_stop(struct inet6_ifaddr *ifp) +static void addrconf_dad_stop(struct inet6_ifaddr *ifp) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 68afc53be662..25c3fe5005d9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -689,11 +689,11 @@ snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) if (ptr == NULL) return -EINVAL; - ptr[0] = __alloc_percpu(mibsize, mibalign); + ptr[0] = __alloc_percpu(mibsize); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, mibalign); + ptr[1] = __alloc_percpu(mibsize); if (!ptr[1]) goto err1; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 113374dc342c..2a1e7e45b890 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->h.raw += ((skb->h.raw[1]+1)<<3); - *nhoffp = opt->dst1; + opt->nhoff = opt->dst1; return 1; } @@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_nodata_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; @@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -249,7 +249,7 @@ looped_back: skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; opt->dst1 = 0; - *nhoffp = (&hdr->nexthdr) - skb->nh.raw; + opt->nhoff = (&hdr->nexthdr) - skb->nh.raw; return 1; } @@ -487,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff) { - IP6CB(skb)->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) + struct inet6_skb_parm *opt = IP6CB(skb); + + opt->hop = sizeof(struct ipv6hdr); + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + skb->h.raw += (skb->h.raw[1]+1)<<3; + opt->nhoff = sizeof(struct ipv6hdr); return sizeof(struct ipv6hdr); + } return -1; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6ec6a2b549bb..53c81fcd20ba 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); +static int icmpv6_rcv(struct sk_buff **pskb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -581,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int icmpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 792f90f0f9ec..f8f3a37a1494 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -25,6 +25,7 @@ #include <net/inet_hashtables.h> #include <net/ip6_route.h> #include <net/sock.h> +#include <net/inet6_connection_sock.h> int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a6026d2787d2..29f73592e68e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -48,7 +48,7 @@ -static inline int ip6_rcv_finish( struct sk_buff *skb) +inline int ip6_rcv_finish( struct sk_buff *skb) { if (skb->dst == NULL) ip6_route_input(skb); @@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + skb->h.raw = (u8 *)(hdr + 1); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + pkt_len = ntohs(hdr->payload_len); /* pkt_len may be zero if Jumbo payload option is present */ @@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - skb->h.raw = (u8*)(hdr+1); - if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) { + if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } @@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb) int nexthdr; u8 hash; - skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr); - /* * Parse extension headers */ - nexthdr = skb->nh.ipv6h->nexthdr; - nhoff = offsetof(struct ipv6hdr, nexthdr); - - /* Skip hop-by-hop options, they are already parsed. */ - if (nexthdr == NEXTHDR_HOP) { - nhoff = sizeof(struct ipv6hdr); - nexthdr = skb->h.raw[0]; - skb->h.raw += (skb->h.raw[1]+1)<<3; - } - rcu_read_lock(); resubmit: if (!pskb_pull(skb, skb->h.raw - skb->data)) goto discard; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); @@ -194,7 +185,7 @@ resubmit: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(&skb, &nhoff); + ret = ipprot->handler(&skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e315d0f80af1..f079621c8b67 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -510,7 +510,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph, **/ static int -ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +ip6ip6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct ipv6hdr *ipv6h; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index f8626ebf90fd..b63678328a3b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -10,6 +10,7 @@ #include <net/dst.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/xfrm.h> int ip6_route_me_harder(struct sk_buff *skb) { @@ -21,11 +22,17 @@ int ip6_route_me_harder(struct sk_buff *skb) { .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, } }, - .proto = iph->nexthdr, }; dst = ip6_route_output(skb->sk, &fl); +#ifdef CONFIG_XFRM + if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + xfrm_decode_session(skb, &fl, AF_INET6) == 0) + if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + return -1; +#endif + if (dst->error) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 04912f9b35c3..105dd69ee9fb 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -179,6 +179,16 @@ config IP6_NF_MATCH_PHYSDEV To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP6_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + # The targets config IP6_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 9ab5b2ca1f59..c0c809b426e8 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o +obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c new file mode 100644 index 000000000000..13fedad48c1d --- /dev/null +++ b/net/ipv6/netfilter/ip6t_policy.c @@ -0,0 +1,175 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/xfrm.h> + +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_ipv6/ip6t_policy.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e) +{ +#define MATCH_ADDR(x,y,z) (!e->match.x || \ + ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x) +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) && + MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct ip6t_policy_info *info = matchinfo; + int ret; + + if (info->flags & IP6T_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IP6T_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ip6t_ip6 *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ip6t_policy_info *info = matchinfo; + + if (matchsize != IP6T_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n", + matchsize, IP6T_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ip6t_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN) + && info->flags & IP6T_POLICY_MATCH_OUT) { + printk(KERN_ERR "ip6t_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT) + && info->flags & IP6T_POLICY_MATCH_IN) { + printk(KERN_ERR "ip6t_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IP6T_POLICY_MAX_ELEM) { + printk(KERN_ERR "ip6t_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ip6t_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5d316cb72ec9..15e1456b3f18 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -581,7 +581,6 @@ err: * the last and the first frames arrived and all the bits are here. */ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, - unsigned int *nhoffp, struct net_device *dev) { struct sk_buff *fp, *head = fq->fragments; @@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->dev = dev; skb_set_timestamp(head, &fq->stamp); head->nh.ipv6h->payload_len = htons(payload_len); + IP6CB(head)->nhoff = nhoff; *skb_in = head; @@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); fq->fragments = NULL; - *nhoffp = nhoff; return 1; out_oversize: @@ -678,7 +677,7 @@ out_fail: return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_frag_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct net_device *dev = skb->dev; @@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) skb->h.raw += sizeof(struct frag_hdr); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - *nhoffp = (u8*)fhdr - skb->nh.raw; + IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw; return 1; } @@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) spin_lock(&fq->lock); - ip6_frag_queue(fq, skb, fhdr, *nhoffp); + ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, nhoffp, dev); + ret = ip6_frag_reasm(fq, skbp, dev); spin_unlock(&fq->lock); fq_put(fq, NULL); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 577d49732b0f..02872ae8a439 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -381,6 +381,7 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac.raw = skb->nh.raw; skb->nh.raw = skb->data; memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; tunnel->stat.rx_packets++; @@ -552,6 +553,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags = 0; dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2947bc56d8a0..a25f4e8a8ada 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1153,7 +1153,7 @@ ipv6_pktoptions: return 0; } -static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int tcp_v6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d8538dcea813..c47648892c04 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -435,7 +435,7 @@ out: read_unlock(&udp_hash_lock); } -static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int udpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct sock *sk; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 28c29d78338e..1ca2da68ef69 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -11,6 +11,8 @@ #include <linux/module.h> #include <linux/string.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/ip.h> @@ -26,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } -int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) +int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi) { struct sk_buff *skb = *pskb; int err; @@ -38,7 +40,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) int nexthdr; unsigned int nhoff; - nhoff = *nhoffp; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; seq = 0; @@ -121,6 +123,8 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) skb->sp->len += xfrm_nr; skb->ip_summed = CHECKSUM_NONE; + nf_reset(skb); + if (decaps) { if (!(skb->dev->flags&IFF_LOOPBACK)) { dst_release(skb->dst); @@ -129,7 +133,16 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) netif_rx(skb); return -1; } else { +#ifdef CONFIG_NETFILTER + skb->nh.ipv6h->payload_len = htons(skb->len); + __skb_push(skb, skb->data - skb->nh.raw); + + NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, + ip6_rcv_finish); + return -1; +#else return 1; +#endif } drop_unlock: @@ -144,7 +157,7 @@ drop: EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +int xfrm6_rcv(struct sk_buff **pskb) { - return xfrm6_rcv_spi(pskb, nhoffp, 0); + return xfrm6_rcv_spi(pskb, 0); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6b9867717d11..80242172a5df 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -9,9 +9,11 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compiler.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/icmpv6.h> +#include <linux/netfilter_ipv6.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/ipv6.h> @@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } -int xfrm6_output(struct sk_buff *skb) +static int xfrm6_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm6_encap(skb); + xfrm6_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - skb->nh.raw = skb->data; - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + skb->nh.raw = skb->data; + + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; + err = 0; out_exit: return err; @@ -142,3 +150,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +static int xfrm6_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm6_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm6_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm6_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm6_output_finish); +} diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index fbef7826a74f..da09ff258648 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -397,7 +397,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int xfrm6_tunnel_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler = xfrm6_tunnel_handler; @@ -405,11 +405,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) u32 spi; /* device-like_ip6ip6_handler() */ - if (handler && handler->handler(pskb, nhoffp) == 0) + if (handler && handler->handler(pskb) == 0) return 0; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(pskb, nhoffp, spi); + return xfrm6_rcv_spi(pskb, spi); } static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/net/sctp/input.c b/net/sctp/input.c index 238f1bffa684..4aa6fc60357c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -225,6 +225,7 @@ int sctp_rcv(struct sk_buff *skb) if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; + nf_reset(skb); ret = sk_filter(sk, skb, 1); if (ret) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 15c05165c905..04c7fab4edc4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -905,7 +905,7 @@ static struct inet_protosw sctpv6_stream_protosw = { .flags = SCTP_PROTOSW_FLAG, }; -static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int sctp6_rcv(struct sk_buff **pskb) { return sctp_rcv(*pskb) ? -1 : 0; } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 24cc23af9b95..e14c1cae7460 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -495,7 +495,7 @@ rpc_depopulate(struct dentry *parent) repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { - dentry = list_entry(pos, struct dentry, d_child); + dentry = list_entry(pos, struct dentry, d_u.d_child); spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { dget_locked(dentry); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 64a447375fdb..59614a994b4e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -22,6 +22,7 @@ #include <linux/workqueue.h> #include <linux/notifier.h> #include <linux/netdevice.h> +#include <linux/netfilter.h> #include <linux/module.h> #include <net/xfrm.h> #include <net/ip.h> @@ -951,8 +952,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, return start; } -static int -_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) +int +xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -963,6 +964,7 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) xfrm_policy_put_afinfo(afinfo); return 0; } +EXPORT_SYMBOL(xfrm_decode_session); static inline int secpath_has_tunnel(struct sec_path *sp, int k) { @@ -982,8 +984,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, u8 fl_dir = policy_to_flow_dir(dir); u32 sk_sid; - if (_decode_session(skb, &fl, family) < 0) + if (xfrm_decode_session(skb, &fl, family) < 0) return 0; + nf_nat_decode_session(skb, &fl, family); sk_sid = security_sk_sid(sk, &fl, fl_dir); @@ -1055,7 +1058,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct flowi fl; - if (_decode_session(skb, &fl, family) < 0) + if (xfrm_decode_session(skb, &fl, family) < 0) return 0; return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter new file mode 100644 index 000000000000..75f21d843c1d --- /dev/null +++ b/scripts/bloat-o-meter @@ -0,0 +1,58 @@ +#!/usr/bin/python +# +# Copyright 2004 Matt Mackall <mpm@selenic.com> +# +# inspired by perl Bloat-O-Meter (c) 1997 by Andi Kleen +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +import sys, os, re + +if len(sys.argv) != 3: + sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0]) + sys.exit(-1) + +def getsizes(file): + sym = {} + for l in os.popen("nm --size-sort " + file).readlines(): + size, type, name = l[:-1].split() + if type in "tTdDbB": + sym[name] = int(size, 16) + return sym + +old = getsizes(sys.argv[1]) +new = getsizes(sys.argv[2]) +grow, shrink, add, remove, up, down = 0, 0, 0, 0, 0, 0 +delta, common = [], {} + +for a in old: + if a in new: + common[a] = 1 + +for name in old: + if name not in common: + remove += 1 + down += old[name] + delta.append((-old[name], name)) + +for name in new: + if name not in common: + add += 1 + up += new[name] + delta.append((new[name], name)) + +for name in common: + d = new.get(name, 0) - old.get(name, 0) + if d>0: grow, up = grow+1, up+d + if d<0: shrink, down = shrink+1, down-d + delta.append((d, name)) + +delta.sort() +delta.reverse() + +print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \ + (add, remove, grow, shrink, up, -down, up-down) +print "%-40s %7s %7s %+7s" % ("function", "old", "new", "delta") +for d, n in delta: + if d: print "%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d) diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 8ba5d29d3d42..10eeae53d827 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -63,6 +63,20 @@ static void check_stdin(void) } } +static char *fgets_check_stream(char *s, int size, FILE *stream) +{ + char *ret = fgets(s, size, stream); + + if (ret == NULL && feof(stream)) { + printf(_("aborted!\n\n")); + printf(_("Console input is closed. ")); + printf(_("Run 'make oldconfig' to update configuration.\n\n")); + exit(1); + } + + return ret; +} + static void conf_askvalue(struct symbol *sym, const char *def) { enum symbol_type type = sym_get_type(sym); @@ -100,7 +114,7 @@ static void conf_askvalue(struct symbol *sym, const char *def) check_stdin(); case ask_all: fflush(stdout); - fgets(line, 128, stdin); + fgets_check_stream(line, 128, stdin); return; case set_default: printf("%s\n", def); @@ -356,7 +370,7 @@ static int conf_choice(struct menu *menu) check_stdin(); case ask_all: fflush(stdout); - fgets(line, 128, stdin); + fgets_check_stream(line, 128, stdin); strip(line); if (line[0] == '?') { printf("\n%s\n", menu->sym->help ? diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 7c03927d4c7c..e52f3e90bf0c 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -22,8 +22,8 @@ public: #if QT_VERSION >= 300 void readListSettings(); - QValueList<int> ConfigSettings::readSizes(const QString& key, bool *ok); - bool ConfigSettings::writeSizes(const QString& key, const QValueList<int>& value); + QValueList<int> readSizes(const QString& key, bool *ok); + bool writeSizes(const QString& key, const QValueList<int>& value); #endif bool showAll; @@ -124,7 +124,7 @@ public: void setParentMenu(void); template <class P> - void ConfigList::updateMenuList(P*, struct menu*); + void updateMenuList(P*, struct menu*); bool updateAll; diff --git a/security/keys/compat.c b/security/keys/compat.c index 3303673c636e..bcdb28533733 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -74,6 +74,12 @@ asmlinkage long compat_sys_keyctl(u32 option, case KEYCTL_SET_REQKEY_KEYRING: return keyctl_set_reqkey_keyring(arg2); + case KEYCTL_SET_TIMEOUT: + return keyctl_set_timeout(arg2, arg3); + + case KEYCTL_ASSUME_AUTHORITY: + return keyctl_assume_authority(arg2); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 39cba97c5eb9..e066e6057955 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -107,12 +107,13 @@ extern struct key *request_key_and_link(struct key_type *type, struct request_key_auth { struct key *target_key; struct task_struct *context; + const char *callout_info; pid_t pid; }; extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, - struct key **_rkakey); + const char *callout_info); extern struct key *key_get_instantiation_authkey(key_serial_t target_id); @@ -136,6 +137,8 @@ extern long keyctl_instantiate_key(key_serial_t, const void __user *, size_t, key_serial_t); extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t); extern long keyctl_set_reqkey_keyring(int); +extern long keyctl_set_timeout(key_serial_t, unsigned); +extern long keyctl_assume_authority(key_serial_t); /* diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index b7a468fabdf9..3d2ebae029c1 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -834,6 +834,17 @@ long keyctl_instantiate_key(key_serial_t id, if (plen > 32767) goto error; + /* the appropriate instantiation authorisation key must have been + * assumed before calling this */ + ret = -EPERM; + instkey = current->request_key_auth; + if (!instkey) + goto error; + + rka = instkey->payload.data; + if (rka->target_key->serial != id) + goto error; + /* pull the payload in if one was supplied */ payload = NULL; @@ -848,15 +859,6 @@ long keyctl_instantiate_key(key_serial_t id, goto error2; } - /* find the instantiation authorisation key */ - instkey = key_get_instantiation_authkey(id); - if (IS_ERR(instkey)) { - ret = PTR_ERR(instkey); - goto error2; - } - - rka = instkey->payload.data; - /* find the destination keyring amongst those belonging to the * requesting task */ keyring_ref = NULL; @@ -865,7 +867,7 @@ long keyctl_instantiate_key(key_serial_t id, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); - goto error3; + goto error2; } } @@ -874,11 +876,17 @@ long keyctl_instantiate_key(key_serial_t id, key_ref_to_ptr(keyring_ref), instkey); key_ref_put(keyring_ref); - error3: - key_put(instkey); - error2: + + /* discard the assumed authority if it's just been disabled by + * instantiation of the key */ + if (ret == 0) { + key_put(current->request_key_auth); + current->request_key_auth = NULL; + } + +error2: kfree(payload); - error: +error: return ret; } /* end keyctl_instantiate_key() */ @@ -895,14 +903,16 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) key_ref_t keyring_ref; long ret; - /* find the instantiation authorisation key */ - instkey = key_get_instantiation_authkey(id); - if (IS_ERR(instkey)) { - ret = PTR_ERR(instkey); + /* the appropriate instantiation authorisation key must have been + * assumed before calling this */ + ret = -EPERM; + instkey = current->request_key_auth; + if (!instkey) goto error; - } rka = instkey->payload.data; + if (rka->target_key->serial != id) + goto error; /* find the destination keyring if present (which must also be * writable) */ @@ -911,7 +921,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); - goto error2; + goto error; } } @@ -920,9 +930,15 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) key_ref_to_ptr(keyring_ref), instkey); key_ref_put(keyring_ref); - error2: - key_put(instkey); - error: + + /* discard the assumed authority if it's just been disabled by + * instantiation of the key */ + if (ret == 0) { + key_put(current->request_key_auth); + current->request_key_auth = NULL; + } + +error: return ret; } /* end keyctl_negate_key() */ @@ -967,6 +983,88 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) /*****************************************************************************/ /* + * set or clear the timeout for a key + */ +long keyctl_set_timeout(key_serial_t id, unsigned timeout) +{ + struct timespec now; + struct key *key; + key_ref_t key_ref; + time_t expiry; + long ret; + + key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); + goto error; + } + + key = key_ref_to_ptr(key_ref); + + /* make the changes with the locks held to prevent races */ + down_write(&key->sem); + + expiry = 0; + if (timeout > 0) { + now = current_kernel_time(); + expiry = now.tv_sec + timeout; + } + + key->expiry = expiry; + + up_write(&key->sem); + key_put(key); + + ret = 0; +error: + return ret; + +} /* end keyctl_set_timeout() */ + +/*****************************************************************************/ +/* + * assume the authority to instantiate the specified key + */ +long keyctl_assume_authority(key_serial_t id) +{ + struct key *authkey; + long ret; + + /* special key IDs aren't permitted */ + ret = -EINVAL; + if (id < 0) + goto error; + + /* we divest ourselves of authority if given an ID of 0 */ + if (id == 0) { + key_put(current->request_key_auth); + current->request_key_auth = NULL; + ret = 0; + goto error; + } + + /* attempt to assume the authority temporarily granted to us whilst we + * instantiate the specified key + * - the authorisation key must be in the current task's keyrings + * somewhere + */ + authkey = key_get_instantiation_authkey(id); + if (IS_ERR(authkey)) { + ret = PTR_ERR(authkey); + goto error; + } + + key_put(current->request_key_auth); + current->request_key_auth = authkey; + ret = authkey->serial; + +error: + return ret; + +} /* end keyctl_assume_authority() */ + +/*****************************************************************************/ +/* * the key control system call */ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, @@ -1038,6 +1136,13 @@ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, case KEYCTL_SET_REQKEY_KEYRING: return keyctl_set_reqkey_keyring(arg2); + case KEYCTL_SET_TIMEOUT: + return keyctl_set_timeout((key_serial_t) arg2, + (unsigned) arg3); + + case KEYCTL_ASSUME_AUTHORITY: + return keyctl_assume_authority((key_serial_t) arg2); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 5d22c0388b32..d65a180f888d 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -481,51 +481,6 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, /*****************************************************************************/ /* - * search for an instantiation authorisation key matching a target key - * - the RCU read lock must be held by the caller - * - a target_id of zero specifies any valid token - */ -struct key *keyring_search_instkey(struct key *keyring, - key_serial_t target_id) -{ - struct request_key_auth *rka; - struct keyring_list *klist; - struct key *instkey; - int loop; - - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) { - for (loop = 0; loop < klist->nkeys; loop++) { - instkey = klist->keys[loop]; - - if (instkey->type != &key_type_request_key_auth) - continue; - - rka = instkey->payload.data; - if (target_id && rka->target_key->serial != target_id) - continue; - - /* the auth key is revoked during instantiation */ - if (!test_bit(KEY_FLAG_REVOKED, &instkey->flags)) - goto found; - - instkey = ERR_PTR(-EKEYREVOKED); - goto error; - } - } - - instkey = ERR_PTR(-EACCES); - goto error; - -found: - atomic_inc(&instkey->usage); -error: - return instkey; - -} /* end keyring_search_instkey() */ - -/*****************************************************************************/ -/* * find a keyring with the specified name * - all named keyrings are searched * - only find keyrings with search permission for the process @@ -684,15 +639,31 @@ static void keyring_link_rcu_disposal(struct rcu_head *rcu) /*****************************************************************************/ /* + * dispose of a keyring list after the RCU grace period, freeing the unlinked + * key + */ +static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) +{ + struct keyring_list *klist = + container_of(rcu, struct keyring_list, rcu); + + key_put(klist->keys[klist->delkey]); + kfree(klist); + +} /* end keyring_unlink_rcu_disposal() */ + +/*****************************************************************************/ +/* * link a key into to a keyring * - must be called with the keyring's semaphore write-locked + * - discard already extant link to matching key if there is one */ int __key_link(struct key *keyring, struct key *key) { struct keyring_list *klist, *nklist; unsigned max; size_t size; - int ret; + int loop, ret; ret = -EKEYREVOKED; if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) @@ -714,6 +685,48 @@ int __key_link(struct key *keyring, struct key *key) goto error2; } + /* see if there's a matching key we can displace */ + klist = keyring->payload.subscriptions; + + if (klist && klist->nkeys > 0) { + struct key_type *type = key->type; + + for (loop = klist->nkeys - 1; loop >= 0; loop--) { + if (klist->keys[loop]->type == type && + strcmp(klist->keys[loop]->description, + key->description) == 0 + ) { + /* found a match - replace with new key */ + size = sizeof(struct key *) * klist->maxkeys; + size += sizeof(*klist); + BUG_ON(size > PAGE_SIZE); + + ret = -ENOMEM; + nklist = kmalloc(size, GFP_KERNEL); + if (!nklist) + goto error2; + + memcpy(nklist, klist, size); + + /* replace matched key */ + atomic_inc(&key->usage); + nklist->keys[loop] = key; + + rcu_assign_pointer( + keyring->payload.subscriptions, + nklist); + + /* dispose of the old keyring list and the + * displaced key */ + klist->delkey = loop; + call_rcu(&klist->rcu, + keyring_unlink_rcu_disposal); + + goto done; + } + } + } + /* check that we aren't going to overrun the user's quota */ ret = key_payload_reserve(keyring, keyring->datalen + KEYQUOTA_LINK_BYTES); @@ -730,8 +743,6 @@ int __key_link(struct key *keyring, struct key *key) smp_wmb(); klist->nkeys++; smp_wmb(); - - ret = 0; } else { /* grow the key list */ @@ -769,16 +780,16 @@ int __key_link(struct key *keyring, struct key *key) /* dispose of the old keyring list */ if (klist) call_rcu(&klist->rcu, keyring_link_rcu_disposal); - - ret = 0; } - error2: +done: + ret = 0; +error2: up_write(&keyring_serialise_link_sem); - error: +error: return ret; - error3: +error3: /* undo the quota changes */ key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); @@ -809,21 +820,6 @@ EXPORT_SYMBOL(key_link); /*****************************************************************************/ /* - * dispose of a keyring list after the RCU grace period, freeing the unlinked - * key - */ -static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) -{ - struct keyring_list *klist = - container_of(rcu, struct keyring_list, rcu); - - key_put(klist->keys[klist->delkey]); - kfree(klist); - -} /* end keyring_unlink_rcu_disposal() */ - -/*****************************************************************************/ -/* * unlink the first link to a key from a keyring */ int key_unlink(struct key *keyring, struct key *key) diff --git a/security/keys/permission.c b/security/keys/permission.c index e7f579c0eaf5..3b41f9b52537 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -73,3 +73,35 @@ use_these_perms: } /* end key_task_permission() */ EXPORT_SYMBOL(key_task_permission); + +/*****************************************************************************/ +/* + * validate a key + */ +int key_validate(struct key *key) +{ + struct timespec now; + int ret = 0; + + if (key) { + /* check it's still accessible */ + ret = -EKEYREVOKED; + if (test_bit(KEY_FLAG_REVOKED, &key->flags) || + test_bit(KEY_FLAG_DEAD, &key->flags)) + goto error; + + /* check it hasn't expired */ + ret = 0; + if (key->expiry) { + now = current_kernel_time(); + if (now.tv_sec >= key->expiry) + ret = -EKEYEXPIRED; + } + } + + error: + return ret; + +} /* end key_validate() */ + +EXPORT_SYMBOL(key_validate); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 566b1cc0118a..74cb79eb917e 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -270,9 +270,14 @@ int copy_thread_group_keys(struct task_struct *tsk) int copy_keys(unsigned long clone_flags, struct task_struct *tsk) { key_check(tsk->thread_keyring); + key_check(tsk->request_key_auth); /* no thread keyring yet */ tsk->thread_keyring = NULL; + + /* copy the request_key() authorisation for this thread */ + key_get(tsk->request_key_auth); + return 0; } /* end copy_keys() */ @@ -290,11 +295,12 @@ void exit_thread_group_keys(struct signal_struct *tg) /*****************************************************************************/ /* - * dispose of keys upon thread exit + * dispose of per-thread keys upon thread exit */ void exit_keys(struct task_struct *tsk) { key_put(tsk->thread_keyring); + key_put(tsk->request_key_auth); } /* end exit_keys() */ @@ -382,7 +388,7 @@ key_ref_t search_process_keyrings(struct key_type *type, struct task_struct *context) { struct request_key_auth *rka; - key_ref_t key_ref, ret, err, instkey_ref; + key_ref_t key_ref, ret, err; /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; @@ -461,30 +467,12 @@ key_ref_t search_process_keyrings(struct key_type *type, err = key_ref; break; } - - /* if this process has a session keyring and that has an - * instantiation authorisation key in the bottom level, then we - * also search the keyrings of the process mentioned there */ - if (context != current) - goto no_key; - - rcu_read_lock(); - instkey_ref = __keyring_search_one( - make_key_ref(rcu_dereference( - context->signal->session_keyring), - 1), - &key_type_request_key_auth, NULL, 0); - rcu_read_unlock(); - - if (IS_ERR(instkey_ref)) - goto no_key; - - rka = key_ref_to_ptr(instkey_ref)->payload.data; - - key_ref = search_process_keyrings(type, description, match, - rka->context); - key_ref_put(instkey_ref); - + } + /* or search the user-session keyring */ + else { + key_ref = keyring_search_aux( + make_key_ref(context->user->session_keyring, 1), + context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -500,11 +488,21 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } - /* or search the user-session keyring */ - else { - key_ref = keyring_search_aux( - make_key_ref(context->user->session_keyring, 1), - context, type, description, match); + + /* if this process has an instantiation authorisation key, then we also + * search the keyrings of the process mentioned there + * - we don't permit access to request_key auth keys via this method + */ + if (context->request_key_auth && + context == current && + type != &key_type_request_key_auth && + key_validate(context->request_key_auth) == 0 + ) { + rka = context->request_key_auth->payload.data; + + key_ref = search_process_keyrings(type, description, match, + rka->context); + if (!IS_ERR(key_ref)) goto found; @@ -521,8 +519,6 @@ key_ref_t search_process_keyrings(struct key_type *type, } } - -no_key: /* no key - decide on the error we're going to go for */ key_ref = ret ? ret : err; @@ -628,6 +624,15 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, key = ERR_PTR(-EINVAL); goto error; + case KEY_SPEC_REQKEY_AUTH_KEY: + key = context->request_key_auth; + if (!key) + goto error; + + atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); + break; + default: key_ref = ERR_PTR(-EINVAL); if (id < 1) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 5cc4bba70db6..f030a0ccbb93 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -29,28 +29,36 @@ DECLARE_WAIT_QUEUE_HEAD(request_key_conswq); /*****************************************************************************/ /* * request userspace finish the construction of a key - * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring> <info>" + * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring>" */ -static int call_request_key(struct key *key, - const char *op, - const char *callout_info) +static int call_sbin_request_key(struct key *key, + struct key *authkey, + const char *op) { struct task_struct *tsk = current; key_serial_t prkey, sskey; - struct key *session_keyring, *rkakey; - char *argv[10], *envp[3], uid_str[12], gid_str[12]; + struct key *keyring; + char *argv[9], *envp[3], uid_str[12], gid_str[12]; char key_str[12], keyring_str[3][12]; + char desc[20]; int ret, i; - kenter("{%d},%s,%s", key->serial, op, callout_info); + kenter("{%d},{%d},%s", key->serial, authkey->serial, op); - /* generate a new session keyring with an auth key in it */ - session_keyring = request_key_auth_new(key, &rkakey); - if (IS_ERR(session_keyring)) { - ret = PTR_ERR(session_keyring); - goto error; + /* allocate a new session keyring */ + sprintf(desc, "_req.%u", key->serial); + + keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto error_alloc; } + /* attach the auth key to the session keyring */ + ret = __key_link(keyring, authkey); + if (ret < 0) + goto error_link; + /* record the UID and GID */ sprintf(uid_str, "%d", current->fsuid); sprintf(gid_str, "%d", current->fsgid); @@ -95,22 +103,19 @@ static int call_request_key(struct key *key, argv[i++] = keyring_str[0]; argv[i++] = keyring_str[1]; argv[i++] = keyring_str[2]; - argv[i++] = (char *) callout_info; argv[i] = NULL; /* do it */ - ret = call_usermodehelper_keys(argv[0], argv, envp, session_keyring, 1); + ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1); - /* dispose of the special keys */ - key_revoke(rkakey); - key_put(rkakey); - key_put(session_keyring); +error_link: + key_put(keyring); - error: +error_alloc: kleave(" = %d", ret); return ret; -} /* end call_request_key() */ +} /* end call_sbin_request_key() */ /*****************************************************************************/ /* @@ -122,9 +127,10 @@ static struct key *__request_key_construction(struct key_type *type, const char *description, const char *callout_info) { + request_key_actor_t actor; struct key_construction cons; struct timespec now; - struct key *key; + struct key *key, *authkey; int ret, negated; kenter("%s,%s,%s", type->name, description, callout_info); @@ -143,8 +149,19 @@ static struct key *__request_key_construction(struct key_type *type, /* we drop the construction sem here on behalf of the caller */ up_write(&key_construction_sem); + /* allocate an authorisation key */ + authkey = request_key_auth_new(key, callout_info); + if (IS_ERR(authkey)) { + ret = PTR_ERR(authkey); + authkey = NULL; + goto alloc_authkey_failed; + } + /* make the call */ - ret = call_request_key(key, "create", callout_info); + actor = call_sbin_request_key; + if (type->request_key) + actor = type->request_key; + ret = actor(key, authkey, "create"); if (ret < 0) goto request_failed; @@ -153,22 +170,29 @@ static struct key *__request_key_construction(struct key_type *type, if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) goto request_failed; + key_revoke(authkey); + key_put(authkey); + down_write(&key_construction_sem); list_del(&cons.link); up_write(&key_construction_sem); /* also give an error if the key was negatively instantiated */ - check_not_negative: +check_not_negative: if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { key_put(key); key = ERR_PTR(-ENOKEY); } - out: +out: kleave(" = %p", key); return key; - request_failed: +request_failed: + key_revoke(authkey); + key_put(authkey); + +alloc_authkey_failed: /* it wasn't instantiated * - remove from construction queue * - mark the key as dead @@ -217,7 +241,7 @@ static struct key *__request_key_construction(struct key_type *type, key = ERR_PTR(ret); goto out; - alloc_failed: +alloc_failed: up_write(&key_construction_sem); goto out; @@ -464,35 +488,3 @@ struct key *request_key(struct key_type *type, } /* end request_key() */ EXPORT_SYMBOL(request_key); - -/*****************************************************************************/ -/* - * validate a key - */ -int key_validate(struct key *key) -{ - struct timespec now; - int ret = 0; - - if (key) { - /* check it's still accessible */ - ret = -EKEYREVOKED; - if (test_bit(KEY_FLAG_REVOKED, &key->flags) || - test_bit(KEY_FLAG_DEAD, &key->flags)) - goto error; - - /* check it hasn't expired */ - ret = 0; - if (key->expiry) { - now = current_kernel_time(); - if (now.tv_sec >= key->expiry) - ret = -EKEYEXPIRED; - } - } - - error: - return ret; - -} /* end key_validate() */ - -EXPORT_SYMBOL(key_validate); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index a8e4069d48cb..cce6ba6b0323 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -15,11 +15,13 @@ #include <linux/sched.h> #include <linux/err.h> #include <linux/seq_file.h> +#include <asm/uaccess.h> #include "internal.h" static int request_key_auth_instantiate(struct key *, const void *, size_t); static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_destroy(struct key *); +static long request_key_auth_read(const struct key *, char __user *, size_t); /* * the request-key authorisation key type definition @@ -30,51 +32,25 @@ struct key_type key_type_request_key_auth = { .instantiate = request_key_auth_instantiate, .describe = request_key_auth_describe, .destroy = request_key_auth_destroy, + .read = request_key_auth_read, }; /*****************************************************************************/ /* - * instantiate a request-key authorisation record + * instantiate a request-key authorisation key */ static int request_key_auth_instantiate(struct key *key, const void *data, size_t datalen) { - struct request_key_auth *rka, *irka; - struct key *instkey; - int ret; - - ret = -ENOMEM; - rka = kmalloc(sizeof(*rka), GFP_KERNEL); - if (rka) { - /* see if the calling process is already servicing the key - * request of another process */ - instkey = key_get_instantiation_authkey(0); - if (!IS_ERR(instkey)) { - /* it is - use that instantiation context here too */ - irka = instkey->payload.data; - rka->context = irka->context; - rka->pid = irka->pid; - key_put(instkey); - } - else { - /* it isn't - use this process as the context */ - rka->context = current; - rka->pid = current->pid; - } - - rka->target_key = key_get((struct key *) data); - key->payload.data = rka; - ret = 0; - } - - return ret; + key->payload.data = (struct request_key_auth *) data; + return 0; } /* end request_key_auth_instantiate() */ /*****************************************************************************/ /* - * + * reading a request-key authorisation key retrieves the callout information */ static void request_key_auth_describe(const struct key *key, struct seq_file *m) @@ -83,12 +59,40 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - seq_printf(m, " pid:%d", rka->pid); + seq_printf(m, " pid:%d ci:%zu", rka->pid, strlen(rka->callout_info)); } /* end request_key_auth_describe() */ /*****************************************************************************/ /* + * read the callout_info data + * - the key's semaphore is read-locked + */ +static long request_key_auth_read(const struct key *key, + char __user *buffer, size_t buflen) +{ + struct request_key_auth *rka = key->payload.data; + size_t datalen; + long ret; + + datalen = strlen(rka->callout_info); + ret = datalen; + + /* we can return the data as is */ + if (buffer && buflen > 0) { + if (buflen > datalen) + buflen = datalen; + + if (copy_to_user(buffer, rka->callout_info, buflen) != 0) + ret = -EFAULT; + } + + return ret; + +} /* end request_key_auth_read() */ + +/*****************************************************************************/ +/* * destroy an instantiation authorisation token key */ static void request_key_auth_destroy(struct key *key) @@ -104,56 +108,89 @@ static void request_key_auth_destroy(struct key *key) /*****************************************************************************/ /* - * create a session keyring to be for the invokation of /sbin/request-key and - * stick an authorisation token in it + * create an authorisation token for /sbin/request-key or whoever to gain + * access to the caller's security data */ -struct key *request_key_auth_new(struct key *target, struct key **_rkakey) +struct key *request_key_auth_new(struct key *target, const char *callout_info) { - struct key *keyring, *rkakey = NULL; + struct request_key_auth *rka, *irka; + struct key *authkey = NULL; char desc[20]; int ret; kenter("%d,", target->serial); - /* allocate a new session keyring */ - sprintf(desc, "_req.%u", target->serial); + /* allocate a auth record */ + rka = kmalloc(sizeof(*rka), GFP_KERNEL); + if (!rka) { + kleave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } - keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL); - if (IS_ERR(keyring)) { - kleave("= %ld", PTR_ERR(keyring)); - return keyring; + /* see if the calling process is already servicing the key request of + * another process */ + if (current->request_key_auth) { + /* it is - use that instantiation context here too */ + irka = current->request_key_auth->payload.data; + rka->context = irka->context; + rka->pid = irka->pid; } + else { + /* it isn't - use this process as the context */ + rka->context = current; + rka->pid = current->pid; + } + + rka->target_key = key_get(target); + rka->callout_info = callout_info; /* allocate the auth key */ sprintf(desc, "%x", target->serial); - rkakey = key_alloc(&key_type_request_key_auth, desc, - current->fsuid, current->fsgid, - KEY_POS_VIEW | KEY_USR_VIEW, 1); - if (IS_ERR(rkakey)) { - key_put(keyring); - kleave("= %ld", PTR_ERR(rkakey)); - return rkakey; + authkey = key_alloc(&key_type_request_key_auth, desc, + current->fsuid, current->fsgid, + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | + KEY_USR_VIEW, 1); + if (IS_ERR(authkey)) { + ret = PTR_ERR(authkey); + goto error_alloc; } /* construct and attach to the keyring */ - ret = key_instantiate_and_link(rkakey, target, 0, keyring, NULL); - if (ret < 0) { - key_revoke(rkakey); - key_put(rkakey); - key_put(keyring); - kleave("= %d", ret); - return ERR_PTR(ret); - } + ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); + if (ret < 0) + goto error_inst; - *_rkakey = rkakey; - kleave(" = {%d} ({%d})", keyring->serial, rkakey->serial); - return keyring; + kleave(" = {%d})", authkey->serial); + return authkey; + +error_inst: + key_revoke(authkey); + key_put(authkey); +error_alloc: + key_put(rka->target_key); + kfree(rka); + kleave("= %d", ret); + return ERR_PTR(ret); } /* end request_key_auth_new() */ /*****************************************************************************/ /* + * see if an authorisation key is associated with a particular key + */ +static int key_get_instantiation_authkey_match(const struct key *key, + const void *_id) +{ + struct request_key_auth *rka = key->payload.data; + key_serial_t id = (key_serial_t)(unsigned long) _id; + + return rka->target_key->serial == id; + +} /* end key_get_instantiation_authkey_match() */ + +/*****************************************************************************/ +/* * get the authorisation key for instantiation of a specific key if attached to * the current process's keyrings * - this key is inserted into a keyring and that is set as /sbin/request-key's @@ -162,22 +199,27 @@ struct key *request_key_auth_new(struct key *target, struct key **_rkakey) */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { - struct task_struct *tsk = current; - struct key *instkey; - - /* we must have our own personal session keyring */ - if (!tsk->signal->session_keyring) - return ERR_PTR(-EACCES); - - /* and it must contain a suitable request authorisation key - * - lock RCU against session keyring changing - */ - rcu_read_lock(); + struct key *authkey; + key_ref_t authkey_ref; + + authkey_ref = search_process_keyrings( + &key_type_request_key_auth, + (void *) (unsigned long) target_id, + key_get_instantiation_authkey_match, + current); + + if (IS_ERR(authkey_ref)) { + authkey = ERR_PTR(PTR_ERR(authkey_ref)); + goto error; + } - instkey = keyring_search_instkey( - rcu_dereference(tsk->signal->session_keyring), target_id); + authkey = key_ref_to_ptr(authkey_ref); + if (test_bit(KEY_FLAG_REVOKED, &authkey->flags)) { + key_put(authkey); + authkey = ERR_PTR(-EKEYREVOKED); + } - rcu_read_unlock(); - return instkey; +error: + return authkey; } /* end key_get_instantiation_authkey() */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3d496eae1b47..6647204e4636 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1663,7 +1663,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) continue; } if (devnull) { - rcuref_inc(&devnull->f_count); + get_file(devnull); } else { devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); if (!devnull) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index e59da6398d44..b5fa02d17b1e 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -889,7 +889,7 @@ static void sel_remove_bools(struct dentry *de) spin_lock(&dcache_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_child); + struct dentry *d = list_entry(node, struct dentry, d_u.d_child); list_del_init(node); if (d->d_inode) { diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 5b7776504e4c..b2af7ca496c1 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -146,7 +146,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_us return rc; out: - *ctxp = 0; + *ctxp = NULL; kfree(ctx); return rc; } diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 679d0ae97e4f..ed81eec6e732 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -115,18 +115,11 @@ MODULE_PARM_DESC(osrun_time, "how many seconds to wait for the ICS2115 OS"); #ifdef WF_DEBUG -#if defined(NEW_MACRO_VARARGS) || __GNUC__ >= 3 #define DPRINT(cond, ...) \ if ((dev->debug & (cond)) == (cond)) { \ snd_printk (__VA_ARGS__); \ } #else -#define DPRINT(cond, args...) \ - if ((dev->debug & (cond)) == (cond)) { \ - snd_printk (args); \ - } -#endif -#else #define DPRINT(cond, args...) #endif /* WF_DEBUG */ diff --git a/sound/oss/i810_audio.c b/sound/oss/i810_audio.c index b9a640fe48b1..4600cd6742ce 100644 --- a/sound/oss/i810_audio.c +++ b/sound/oss/i810_audio.c @@ -3359,12 +3359,6 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device goto out_region2; } - if (request_irq(card->irq, &i810_interrupt, SA_SHIRQ, - card_names[pci_id->driver_data], card)) { - printk(KERN_ERR "i810_audio: unable to allocate irq %d\n", card->irq); - goto out_pio; - } - if (card->use_mmio) { if (request_mem_region(card->ac97base_mmio_phys, 512, "ich_audio MMBAR")) { if ((card->ac97base_mmio = ioremap(card->ac97base_mmio_phys, 512))) { /*@FIXME can ioremap fail? don't know (jsaw) */ @@ -3395,10 +3389,8 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device } /* initialize AC97 codec and register /dev/mixer */ - if (i810_ac97_init(card) <= 0) { - free_irq(card->irq, card); + if (i810_ac97_init(card) <= 0) goto out_iospace; - } pci_set_drvdata(pci_dev, card); if(clocking == 0) { @@ -3410,7 +3402,6 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device if ((card->dev_audio = register_sound_dsp(&i810_audio_fops, -1)) < 0) { int i; printk(KERN_ERR "i810_audio: couldn't register DSP device!\n"); - free_irq(card->irq, card); for (i = 0; i < NR_AC97; i++) if (card->ac97_codec[i] != NULL) { unregister_sound_mixer(card->ac97_codec[i]->dev_mixer); @@ -3419,6 +3410,13 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device goto out_iospace; } + if (request_irq(card->irq, &i810_interrupt, SA_SHIRQ, + card_names[pci_id->driver_data], card)) { + printk(KERN_ERR "i810_audio: unable to allocate irq %d\n", card->irq); + goto out_iospace; + } + + card->initializing = 0; return 0; |