summaryrefslogtreecommitdiffstats
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig439
-rw-r--r--init/do_mounts.c87
-rw-r--r--init/do_mounts_initrd.c11
-rw-r--r--init/do_mounts_md.c2
-rw-r--r--init/do_mounts_rd.c5
-rw-r--r--init/initramfs.c29
-rw-r--r--init/main.c149
-rw-r--r--init/noinitramfs.c6
8 files changed, 461 insertions, 267 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 313506d8be6e..4e337906016e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -21,6 +21,13 @@ config CONSTRUCTORS
depends on !UML
default y
+config HAVE_IRQ_WORK
+ bool
+
+config IRQ_WORK
+ bool
+ depends on HAVE_IRQ_WORK
+
menu "General setup"
config EXPERIMENTAL
@@ -64,7 +71,7 @@ config BROKEN_ON_SMP
config LOCK_KERNEL
bool
- depends on SMP || PREEMPT
+ depends on (SMP || PREEMPT) && BKL
default y
config INIT_ENV_ARG_LIMIT
@@ -123,13 +130,16 @@ config HAVE_KERNEL_BZIP2
config HAVE_KERNEL_LZMA
bool
+config HAVE_KERNEL_XZ
+ bool
+
config HAVE_KERNEL_LZO
bool
choice
prompt "Kernel compression mode"
default KERNEL_GZIP
- depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO
+ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO
help
The linux kernel is a kind of self-extracting executable.
Several compression algorithms are available, which differ
@@ -174,12 +184,27 @@ config KERNEL_LZMA
two. Compression is slowest. The kernel size is about 33%
smaller with LZMA in comparison to gzip.
+config KERNEL_XZ
+ bool "XZ"
+ depends on HAVE_KERNEL_XZ
+ help
+ XZ uses the LZMA2 algorithm and instruction set specific
+ BCJ filters which can improve compression ratio of executable
+ code. The size of the kernel is about 30% smaller with XZ in
+ comparison to gzip. On architectures for which there is a BCJ
+ filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ
+ will create a few percent smaller kernel than plain LZMA.
+
+ The speed is about the same as with LZMA: The decompression
+ speed of XZ is better than that of bzip2 but worse than gzip
+ and LZO. Compression is slow.
+
config KERNEL_LZO
bool "LZO"
depends on HAVE_KERNEL_LZO
help
Its compression ratio is the poorest among the 4. The kernel
- size is about about 10% bigger than gzip; however its speed
+ size is about 10% bigger than gzip; however its speed
(both compression and decompression) is the fastest.
endchoice
@@ -320,13 +345,19 @@ config AUDITSYSCALL
help
Enable low-overhead system-call auditing infrastructure that
can be used independently or with another kernel subsystem,
- such as SELinux. To use audit's filesystem watch feature, please
- ensure that INOTIFY is configured.
+ such as SELinux.
+
+config AUDIT_WATCH
+ def_bool y
+ depends on AUDITSYSCALL
+ select FSNOTIFY
config AUDIT_TREE
def_bool y
depends on AUDITSYSCALL
- select INOTIFY
+ select FSNOTIFY
+
+source "kernel/irq/Kconfig"
menu "RCU Subsystem"
@@ -336,6 +367,7 @@ choice
config TREE_RCU
bool "Tree-based hierarchical RCU"
+ depends on !PREEMPT && SMP
help
This option selects the RCU implementation that is
designed for very large SMP system with hundreds or
@@ -343,7 +375,7 @@ config TREE_RCU
smaller systems.
config TREE_PREEMPT_RCU
- bool "Preemptable tree-based hierarchical RCU"
+ bool "Preemptible tree-based hierarchical RCU"
depends on PREEMPT
help
This option selects the RCU implementation that is
@@ -361,11 +393,24 @@ config TINY_RCU
is not required. This option greatly reduces the
memory footprint of RCU.
+config TINY_PREEMPT_RCU
+ bool "Preemptible UP-only small-memory-footprint RCU"
+ depends on !SMP && PREEMPT
+ help
+ This option selects the RCU implementation that is designed
+ for real-time UP systems. This option greatly reduces the
+ memory footprint of RCU.
+
endchoice
+config PREEMPT_RCU
+ def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
+ help
+ This option enables preemptible-RCU code that is common between
+ the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+
config RCU_TRACE
bool "Enable tracing for RCU"
- depends on TREE_RCU || TREE_PREEMPT_RCU
help
This option provides tracing in RCU which presents stats
in debugfs for debugging RCU implementation.
@@ -383,9 +428,12 @@ config RCU_FANOUT
help
This option controls the fanout of hierarchical implementations
of RCU, allowing RCU to work efficiently on machines with
- large numbers of CPUs. This value must be at least the cube
- root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
- systems and up to 262,144 for 64-bit systems.
+ large numbers of CPUs. This value must be at least the fourth
+ root of NR_CPUS, which allows NR_CPUS to be insanely large.
+ The default value of RCU_FANOUT should be used for production
+ systems, but if you are stress-testing the RCU implementation
+ itself, small RCU_FANOUT values allow you to test large-system
+ code paths on small(er) systems.
Select a specific number if testing RCU itself.
Take the default if unsure.
@@ -404,6 +452,22 @@ config RCU_FANOUT_EXACT
Say N if unsure.
+config RCU_FAST_NO_HZ
+ bool "Accelerate last non-dyntick-idle CPU's grace periods"
+ depends on TREE_RCU && NO_HZ && SMP
+ default n
+ help
+ This option causes RCU to attempt to accelerate grace periods
+ in order to allow the final CPU to enter dynticks-idle state
+ more quickly. On the other hand, this option increases the
+ overhead of the dynticks-idle checking, particularly on systems
+ with large numbers of CPUs.
+
+ Say Y if energy efficiency is critically important, particularly
+ if you have relatively few CPUs.
+
+ Say N if you are unsure.
+
config TREE_RCU_TRACE
def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
select DEBUG_FS
@@ -412,6 +476,45 @@ config TREE_RCU_TRACE
TREE_PREEMPT_RCU implementations, permitting Makefile to
trivially select kernel/rcutree_trace.c.
+config RCU_BOOST
+ bool "Enable RCU priority boosting"
+ depends on RT_MUTEXES && TINY_PREEMPT_RCU
+ default n
+ help
+ This option boosts the priority of preempted RCU readers that
+ block the current preemptible RCU grace period for too long.
+ This option also prevents heavy loads from blocking RCU
+ callback invocation for all flavors of RCU.
+
+ Say Y here if you are working with real-time apps or heavy loads
+ Say N here if you are unsure.
+
+config RCU_BOOST_PRIO
+ int "Real-time priority to boost RCU readers to"
+ range 1 99
+ depends on RCU_BOOST
+ default 1
+ help
+ This option specifies the real-time priority to which preempted
+ RCU readers are to be boosted. If you are working with CPU-bound
+ real-time applications, you should specify a priority higher then
+ the highest-priority CPU-bound application.
+
+ Specify the real-time priority, or take the default if unsure.
+
+config RCU_BOOST_DELAY
+ int "Milliseconds to delay boosting after RCU grace-period start"
+ range 0 3000
+ depends on RCU_BOOST
+ default 500
+ help
+ This option specifies the time to wait after the beginning of
+ a given grace period before priority-boosting preempted RCU
+ readers blocking that grace period. Note that any RCU reader
+ blocking an expedited RCU grace period is boosted immediately.
+
+ Accept the default if unsure.
+
endmenu # "RCU Subsystem"
config IKCONFIG
@@ -453,59 +556,9 @@ config LOG_BUF_SHIFT
config HAVE_UNSTABLE_SCHED_CLOCK
bool
-config GROUP_SCHED
- bool "Group CPU scheduler"
- depends on EXPERIMENTAL
- default n
- help
- This feature lets CPU scheduler recognize task groups and control CPU
- bandwidth allocation to such task groups.
- In order to create a group from arbitrary set of processes, use
- CONFIG_CGROUPS. (See Control Group support.)
-
-config FAIR_GROUP_SCHED
- bool "Group scheduling for SCHED_OTHER"
- depends on GROUP_SCHED
- default GROUP_SCHED
-
-config RT_GROUP_SCHED
- bool "Group scheduling for SCHED_RR/FIFO"
- depends on EXPERIMENTAL
- depends on GROUP_SCHED
- default n
- help
- This feature lets you explicitly allocate real CPU bandwidth
- to users or control groups (depending on the "Basis for grouping tasks"
- setting below. If enabled, it will also make it impossible to
- schedule realtime tasks for non-root users until you allocate
- realtime bandwidth for them.
- See Documentation/scheduler/sched-rt-group.txt for more information.
-
-choice
- depends on GROUP_SCHED
- prompt "Basis for grouping tasks"
- default USER_SCHED
-
-config USER_SCHED
- bool "user id"
- help
- This option will choose userid as the basis for grouping
- tasks, thus providing equal CPU bandwidth to each user.
-
-config CGROUP_SCHED
- bool "Control groups"
- depends on CGROUPS
- help
- This option allows you to create arbitrary task groups
- using the "cgroup" pseudo filesystem and control
- the cpu bandwidth allocated to each such task group.
- Refer to Documentation/cgroups/cgroups.txt for more
- information on "cgroup" pseudo filesystem.
-
-endchoice
-
menuconfig CGROUPS
boolean "Control Group support"
+ depends on EVENTFD
help
This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory
@@ -521,7 +574,6 @@ if CGROUPS
config CGROUP_DEBUG
bool "Example debug cgroup subsystem"
- depends on CGROUPS
default n
help
This option enables a simple cgroup subsystem that
@@ -532,7 +584,6 @@ config CGROUP_DEBUG
config CGROUP_NS
bool "Namespace cgroup subsystem"
- depends on CGROUPS
help
Provides a simple namespace cgroup subsystem to
provide hierarchical naming of sets of namespaces,
@@ -541,21 +592,18 @@ config CGROUP_NS
config CGROUP_FREEZER
bool "Freezer cgroup subsystem"
- depends on CGROUPS
help
Provides a way to freeze and unfreeze all tasks in a
cgroup.
config CGROUP_DEVICE
bool "Device controller for cgroups"
- depends on CGROUPS && EXPERIMENTAL
help
Provides a cgroup implementing whitelists for devices which
a process in the cgroup can mknod or open.
config CPUSETS
bool "Cpuset support"
- depends on CGROUPS
help
This option will let you create and manage CPUSETs which
allow dynamically partitioning a system into sets of CPUs and
@@ -571,7 +619,6 @@ config PROC_PID_CPUSET
config CGROUP_CPUACCT
bool "Simple CPU accounting cgroup subsystem"
- depends on CGROUPS
help
Provides a simple Resource Controller for monitoring the
total CPU consumed by the tasks in a cgroup.
@@ -581,11 +628,10 @@ config RESOURCE_COUNTERS
help
This option enables controller independent resource accounting
infrastructure that works with cgroups.
- depends on CGROUPS
config CGROUP_MEM_RES_CTLR
bool "Memory Resource Controller for Control Groups"
- depends on CGROUPS && RESOURCE_COUNTERS
+ depends on RESOURCE_COUNTERS
select MM_OWNER
help
Provides a memory resource controller that manages both anonymous
@@ -607,8 +653,8 @@ config CGROUP_MEM_RES_CTLR
could in turn add some fork/exit overhead.
config CGROUP_MEM_RES_CTLR_SWAP
- bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
- depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
+ bool "Memory Resource Controller Swap Extension"
+ depends on CGROUP_MEM_RES_CTLR && SWAP
help
Add swap management feature to memory resource controller. When you
enable this, you can limit mem+swap usage per cgroup. In other words,
@@ -623,60 +669,82 @@ config CGROUP_MEM_RES_CTLR_SWAP
if boot option "noswapaccount" is set, swap will not be accounted.
Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
size is 4096bytes, 512k per 1Gbytes of swap.
+config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
+ bool "Memory Resource Controller Swap Extension enabled by default"
+ depends on CGROUP_MEM_RES_CTLR_SWAP
+ default y
+ help
+ Memory Resource Controller Swap Extension comes with its price in
+ a bigger memory consumption. General purpose distribution kernels
+ which want to enable the feature but keep it disabled by default
+ and let the user enable it by swapaccount boot command line
+ parameter should have this option unselected.
+ For those who want to have the feature enabled by default should
+ select this option (if, for some reason, they need to disable it
+ then noswapaccount does the trick).
-endif # CGROUPS
-
-config MM_OWNER
- bool
+menuconfig CGROUP_SCHED
+ bool "Group CPU scheduler"
+ depends on EXPERIMENTAL
+ default n
+ help
+ This feature lets CPU scheduler recognize task groups and control CPU
+ bandwidth allocation to such task groups. It uses cgroups to group
+ tasks.
-config SYSFS_DEPRECATED
- bool
+if CGROUP_SCHED
+config FAIR_GROUP_SCHED
+ bool "Group scheduling for SCHED_OTHER"
+ depends on CGROUP_SCHED
+ default CGROUP_SCHED
-config SYSFS_DEPRECATED_V2
- bool "enable deprecated sysfs features to support old userspace tools"
- depends on SYSFS
+config RT_GROUP_SCHED
+ bool "Group scheduling for SCHED_RR/FIFO"
+ depends on EXPERIMENTAL
+ depends on CGROUP_SCHED
default n
- select SYSFS_DEPRECATED
- help
- This option switches the layout of sysfs to the deprecated
- version. Do not use it on recent distributions.
-
- The current sysfs layout features a unified device tree at
- /sys/devices/, which is able to express a hierarchy between
- class devices. If the deprecated option is set to Y, the
- unified device tree is split into a bus device tree at
- /sys/devices/ and several individual class device trees at
- /sys/class/. The class and bus devices will be connected by
- "<subsystem>:<name>" and the "device" links. The "block"
- class devices, will not show up in /sys/class/block/. Some
- subsystems will suppress the creation of some devices which
- depend on the unified device tree.
-
- This option is not a pure compatibility option that can
- be safely enabled on newer distributions. It will change the
- layout of sysfs to the non-extensible deprecated version,
- and disable some features, which can not be exported without
- confusing older userspace tools. Since 2007/2008 all major
- distributions do not enable this option, and ship no tools which
- depend on the deprecated layout or this option.
-
- If you are using a new kernel on an older distribution, or use
- older userspace tools, you might need to say Y here. Do not say Y,
- if the original kernel, that came with your distribution, has
- this option set to N.
-
-config RELAY
- bool "Kernel->user space relay support (formerly relayfs)"
help
- This option enables support for relay interface support in
- certain file systems (such as debugfs).
- It is designed to provide an efficient mechanism for tools and
- facilities to relay large amounts of data from kernel space to
- user space.
+ This feature lets you explicitly allocate real CPU bandwidth
+ to task groups. If enabled, it will also make it impossible to
+ schedule realtime tasks for non-root users until you allocate
+ realtime bandwidth for them.
+ See Documentation/scheduler/sched-rt-group.txt for more information.
- If unsure, say N.
+endif #CGROUP_SCHED
+
+config BLK_CGROUP
+ tristate "Block IO controller"
+ depends on BLOCK
+ default n
+ ---help---
+ Generic block IO controller cgroup interface. This is the common
+ cgroup interface which should be used by various IO controlling
+ policies.
+
+ Currently, CFQ IO scheduler uses it to recognize task groups and
+ control disk bandwidth allocation (proportional time slice allocation)
+ to such task groups. It is also used by bio throttling logic in
+ block layer to implement upper limit in IO rates on a device.
+
+ This option only enables generic Block IO controller infrastructure.
+ One needs to also enable actual IO controlling logic/policy. For
+ enabling proportional weight division of disk bandwidth in CFQ seti
+ CONFIG_CFQ_GROUP_IOSCHED=y and for enabling throttling policy set
+ CONFIG_BLK_THROTTLE=y.
+
+ See Documentation/cgroups/blkio-controller.txt for more information.
+
+config DEBUG_BLK_CGROUP
+ bool "Enable Block IO controller debugging"
+ depends on BLK_CGROUP
+ default n
+ ---help---
+ Enable some debugging help. Currently it exports additional stat
+ files in a cgroup which can be useful for debugging.
-config NAMESPACES
+endif # CGROUPS
+
+menuconfig NAMESPACES
bool "Namespaces support" if EMBEDDED
default !EMBEDDED
help
@@ -685,48 +753,115 @@ config NAMESPACES
or same user id or pid may refer to different tasks when used in
different namespaces.
+if NAMESPACES
+
config UTS_NS
bool "UTS namespace"
- depends on NAMESPACES
+ default y
help
In this namespace tasks see different info provided with the
uname() system call
config IPC_NS
bool "IPC namespace"
- depends on NAMESPACES && (SYSVIPC || POSIX_MQUEUE)
+ depends on (SYSVIPC || POSIX_MQUEUE)
+ default y
help
In this namespace tasks work with IPC ids which correspond to
different IPC objects in different namespaces.
config USER_NS
bool "User namespace (EXPERIMENTAL)"
- depends on NAMESPACES && EXPERIMENTAL
+ depends on EXPERIMENTAL
+ default y
help
This allows containers, i.e. vservers, to use user namespaces
to provide different user info for different servers.
If unsure, say N.
config PID_NS
- bool "PID Namespaces (EXPERIMENTAL)"
- default n
- depends on NAMESPACES && EXPERIMENTAL
+ bool "PID Namespaces"
+ default y
help
Support process id namespaces. This allows having multiple
processes with the same pid as long as they are in different
pid namespaces. This is a building block of containers.
- Unless you want to work with an experimental feature
- say N here.
-
config NET_NS
bool "Network namespace"
- default n
- depends on NAMESPACES && EXPERIMENTAL && NET
+ depends on NET
+ default y
help
Allow user space to create what appear to be multiple instances
of the network stack.
+endif # NAMESPACES
+
+config SCHED_AUTOGROUP
+ bool "Automatic process group scheduling"
+ select EVENTFD
+ select CGROUPS
+ select CGROUP_SCHED
+ select FAIR_GROUP_SCHED
+ help
+ This option optimizes the scheduler for common desktop workloads by
+ automatically creating and populating task groups. This separation
+ of workloads isolates aggressive CPU burners (like build jobs) from
+ desktop applications. Task group autogeneration is currently based
+ upon task session.
+
+config MM_OWNER
+ bool
+
+config SYSFS_DEPRECATED
+ bool "enable deprecated sysfs features to support old userspace tools"
+ depends on SYSFS
+ default n
+ help
+ This option adds code that switches the layout of the "block" class
+ devices, to not show up in /sys/class/block/, but only in
+ /sys/block/.
+
+ This switch is only active when the sysfs.deprecated=1 boot option is
+ passed or the SYSFS_DEPRECATED_V2 option is set.
+
+ This option allows new kernels to run on old distributions and tools,
+ which might get confused by /sys/class/block/. Since 2007/2008 all
+ major distributions and tools handle this just fine.
+
+ Recent distributions and userspace tools after 2009/2010 depend on
+ the existence of /sys/class/block/, and will not work with this
+ option enabled.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here.
+
+config SYSFS_DEPRECATED_V2
+ bool "enabled deprecated sysfs features by default"
+ default n
+ depends on SYSFS
+ depends on SYSFS_DEPRECATED
+ help
+ Enable deprecated sysfs by default.
+
+ See the CONFIG_SYSFS_DEPRECATED option for more details about this
+ option.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here. Even then, odds are you would not need it
+ enabled, you can always pass the boot option if absolutely necessary.
+
+config RELAY
+ bool "Kernel->user space relay support (formerly relayfs)"
+ help
+ This option enables support for relay interface support in
+ certain file systems (such as debugfs).
+ It is designed to provide an efficient mechanism for tools and
+ facilities to relay large amounts of data from kernel space to
+ user space.
+
+ If unsure, say N.
+
config BLK_DEV_INITRD
bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
depends on BROKEN || !FRV
@@ -961,6 +1096,7 @@ config PERF_EVENTS
default y if (PROFILING || PERF_COUNTERS)
depends on HAVE_PERF_EVENTS
select ANON_INODES
+ select IRQ_WORK
help
Enable kernel support for various performance events provided
by software and hardware.
@@ -984,19 +1120,6 @@ config PERF_EVENTS
Say Y if unsure.
-config EVENT_PROFILE
- bool "Tracepoint profiling sources"
- depends on PERF_EVENTS && EVENT_TRACING
- default y
- help
- Allow the use of tracepoints as software performance events.
-
- When this is enabled, you can create perf events based on
- tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
- found in debugfs://tracing/events/*/*/id. (The -e/--events
- option to the perf tool can parse and interpret symbolic
- tracepoints, in the subsystem:tracepoint_name format.)
-
config PERF_COUNTERS
bool "Kernel performance counters (old config option)"
depends on HAVE_PERF_EVENTS
@@ -1120,7 +1243,7 @@ config MMAP_ALLOW_UNINITIALIZED
See Documentation/nommu-mmap.txt for more information.
config PROFILING
- bool "Profiling support (EXPERIMENTAL)"
+ bool "Profiling support"
help
Say Y here to enable the extended profiling support mechanisms used
by profilers such as OProfile.
@@ -1134,30 +1257,6 @@ config TRACEPOINTS
source "arch/Kconfig"
-config SLOW_WORK
- default n
- bool
- help
- The slow work thread pool provides a number of dynamically allocated
- threads that can be used by the kernel to perform operations that
- take a relatively long time.
-
- An example of this would be CacheFiles doing a path lookup followed
- by a series of mkdirs and a create call, all of which have to touch
- disk.
-
- See Documentation/slow-work.txt.
-
-config SLOW_WORK_DEBUG
- bool "Slow work debugging through debugfs"
- default n
- depends on SLOW_WORK && DEBUG_FS
- help
- Display the contents of the slow work run queue through debugfs,
- including items currently executing.
-
- See Documentation/slow-work.txt.
-
endmenu # General setup
config HAVE_GENERIC_DMA_COHERENT
@@ -1270,4 +1369,8 @@ source "block/Kconfig"
config PREEMPT_NOTIFIERS
bool
+config PADATA
+ depends on SMP
+ bool
+
source "kernel/Kconfig.locks"
diff --git a/init/do_mounts.c b/init/do_mounts.c
index bb008d064c1a..2b54bef33b55 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -15,6 +15,7 @@
#include <linux/initrd.h>
#include <linux/async.h>
#include <linux/fs_struct.h>
+#include <linux/slab.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -57,6 +58,62 @@ static int __init readwrite(char *str)
__setup("ro", readonly);
__setup("rw", readwrite);
+#ifdef CONFIG_BLOCK
+/**
+ * match_dev_by_uuid - callback for finding a partition using its uuid
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to a 36 byte char array with a UUID
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_uuid(struct device *dev, void *data)
+{
+ u8 *uuid = data;
+ struct hd_struct *part = dev_to_part(dev);
+
+ if (!part->info)
+ goto no_match;
+
+ if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
+ goto no_match;
+
+ return 1;
+no_match:
+ return 0;
+}
+
+
+/**
+ * devt_from_partuuid - looks up the dev_t of a partition by its UUID
+ * @uuid: 36 byte char array containing a hex ascii UUID
+ *
+ * The function will return the first partition which contains a matching
+ * UUID value in its partition_meta_info struct. This does not search
+ * by filesystem UUIDs.
+ *
+ * Returns the matching dev_t on success or 0 on failure.
+ */
+static dev_t devt_from_partuuid(char *uuid_str)
+{
+ dev_t res = 0;
+ struct device *dev = NULL;
+ u8 uuid[16];
+
+ /* Pack the requested UUID in the expected format. */
+ part_pack_uuid(uuid_str, uuid);
+
+ dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
+ if (!dev)
+ goto done;
+
+ res = dev->devt;
+ put_device(dev);
+
+done:
+ return res;
+}
+#endif
+
/*
* Convert a name into device number. We accept the following variants:
*
@@ -67,6 +124,8 @@ __setup("rw", readwrite);
* of partition - device number of disk plus the partition number
* 5) /dev/<disk_name>p<decimal> - same as the above, that form is
* used when disk name of partitioned disk ends on a digit.
+ * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -81,6 +140,18 @@ dev_t name_to_dev_t(char *name)
dev_t res = 0;
int part;
+#ifdef CONFIG_BLOCK
+ if (strncmp(name, "PARTUUID=", 9) == 0) {
+ name += 9;
+ if (strlen(name) != 36)
+ goto fail;
+ res = devt_from_partuuid(name);
+ if (!res)
+ goto fail;
+ goto done;
+ }
+#endif
+
if (strncmp(name, "/dev/", 5) != 0) {
unsigned maj, min;
@@ -220,7 +291,7 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data)
if (err)
return err;
- sys_chdir("/root");
+ sys_chdir((const char __user __force *)"/root");
ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
current->fs->pwd.mnt->mnt_sb->s_type->name,
@@ -290,13 +361,13 @@ out:
#ifdef CONFIG_ROOT_NFS
static int __init mount_nfs_root(void)
{
- void *data = nfs_root_data();
+ char *root_dev, *root_data;
- create_dev("/dev/root", ROOT_DEV);
- if (data &&
- do_mount_root("/dev/root", "nfs", root_mountflags, data) == 0)
- return 1;
- return 0;
+ if (nfs_root_data(&root_dev, &root_data) != 0)
+ return 0;
+ if (do_mount_root(root_dev, "nfs", root_mountflags, root_data) != 0)
+ return 0;
+ return 1;
}
#endif
@@ -417,5 +488,5 @@ void __init prepare_namespace(void)
out:
devtmpfs_mount("dev");
sys_mount(".", "/", NULL, MS_MOVE, NULL);
- sys_chroot(".");
+ sys_chroot((const char __user __force *)".");
}
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 614241b5200c..3098a38f3ae1 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -24,17 +24,14 @@ static int __init no_initrd(char *str)
__setup("noinitrd", no_initrd);
-static int __init do_linuxrc(void * shell)
+static int __init do_linuxrc(void *_shell)
{
- static char *argv[] = { "linuxrc", NULL, };
- extern char * envp_init[];
+ static const char *argv[] = { "linuxrc", NULL, };
+ extern const char *envp_init[];
+ const char *shell = _shell;
sys_close(old_fd);sys_close(root_fd);
- sys_close(0);sys_close(1);sys_close(2);
sys_setsid();
- (void) sys_open("/dev/console",O_RDWR,0);
- (void) sys_dup(0);
- (void) sys_dup(0);
return kernel_execve(shell, argv, envp_init);
}
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index 69aebbf8fd2d..32c4799b8c91 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -283,7 +283,7 @@ static void __init autodetect_raid(void)
wait_for_device_probe();
- fd = sys_open("/dev/md0", 0, 0);
+ fd = sys_open((const char __user __force *) "/dev/md0", 0, 0);
if (fd >= 0) {
sys_ioctl(fd, RAID_AUTORUN, raid_autopart);
sys_close(fd);
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 027a402708de..6e1ee6987c78 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -7,6 +7,7 @@
#include <linux/cramfs_fs.h>
#include <linux/initrd.h>
#include <linux/string.h>
+#include <linux/slab.h>
#include "do_mounts.h"
#include "../fs/squashfs/squashfs_fs.h"
@@ -167,7 +168,7 @@ int __init rd_load_image(char *from)
char rotator[4] = { '|' , '/' , '-' , '\\' };
#endif
- out_fd = sys_open("/dev/ram", O_RDWR, 0);
+ out_fd = sys_open((const char __user __force *) "/dev/ram", O_RDWR, 0);
if (out_fd < 0)
goto out;
@@ -266,7 +267,7 @@ noclose_input:
sys_close(out_fd);
out:
kfree(buf);
- sys_unlink("/dev/ram");
+ sys_unlink((const char __user __force *) "/dev/ram");
return res;
}
diff --git a/init/initramfs.c b/init/initramfs.c
index b37d34beb90b..2531811d42cb 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -457,7 +457,8 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
compress_name);
message = msg_buf;
}
- }
+ } else
+ error("junk in compressed archive");
if (state != Reset)
error("junk in compressed archive");
this_header = saved_offset + my_inptr;
@@ -482,7 +483,8 @@ static int __init retain_initrd_param(char *str)
}
__setup("retain_initrd", retain_initrd_param);
-extern char __initramfs_start[], __initramfs_end[];
+extern char __initramfs_start[];
+extern unsigned long __initramfs_size;
#include <linux/initrd.h>
#include <linux/kexec.h>
@@ -525,9 +527,9 @@ static void __init clean_rootfs(void)
int fd;
void *buf;
struct linux_dirent64 *dirp;
- int count;
+ int num;
- fd = sys_open("/", O_RDONLY, 0);
+ fd = sys_open((const char __user __force *) "/", O_RDONLY, 0);
WARN_ON(fd < 0);
if (fd < 0)
return;
@@ -539,9 +541,9 @@ static void __init clean_rootfs(void)
}
dirp = buf;
- count = sys_getdents64(fd, dirp, BUF_SIZE);
- while (count > 0) {
- while (count > 0) {
+ num = sys_getdents64(fd, dirp, BUF_SIZE);
+ while (num > 0) {
+ while (num > 0) {
struct stat st;
int ret;
@@ -554,12 +556,12 @@ static void __init clean_rootfs(void)
sys_unlink(dirp->d_name);
}
- count -= dirp->d_reclen;
+ num -= dirp->d_reclen;
dirp = (void *)dirp + dirp->d_reclen;
}
dirp = buf;
memset(buf, 0, BUF_SIZE);
- count = sys_getdents64(fd, dirp, BUF_SIZE);
+ num = sys_getdents64(fd, dirp, BUF_SIZE);
}
sys_close(fd);
@@ -569,8 +571,7 @@ static void __init clean_rootfs(void)
static int __init populate_rootfs(void)
{
- char *err = unpack_to_rootfs(__initramfs_start,
- __initramfs_end - __initramfs_start);
+ char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err)
panic(err); /* Failed to decompress INTERNAL initramfs */
if (initrd_start) {
@@ -584,12 +585,12 @@ static int __init populate_rootfs(void)
return 0;
} else {
clean_rootfs();
- unpack_to_rootfs(__initramfs_start,
- __initramfs_end - __initramfs_start);
+ unpack_to_rootfs(__initramfs_start, __initramfs_size);
}
printk(KERN_INFO "rootfs image is not initramfs (%s)"
"; looks like an initrd\n", err);
- fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700);
+ fd = sys_open((const char __user __force *) "/initrd.image",
+ O_WRONLY|O_CREAT, 0700);
if (fd >= 0) {
sys_write(fd, (char *)initrd_start,
initrd_end - initrd_start);
diff --git a/init/main.c b/init/main.c
index dac44a9356a5..00799c1d4628 100644
--- a/init/main.c
+++ b/init/main.c
@@ -20,12 +20,10 @@
#include <linux/delay.h>
#include <linux/ioport.h>
#include <linux/init.h>
-#include <linux/smp_lock.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
#include <linux/acpi.h>
#include <linux/tty.h>
-#include <linux/gfp.h>
#include <linux/percpu.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
@@ -33,7 +31,6 @@
#include <linux/start_kernel.h>
#include <linux/security.h>
#include <linux/smp.h>
-#include <linux/workqueue.h>
#include <linux/profile.h>
#include <linux/rcupdate.h>
#include <linux/moduleparam.h>
@@ -63,13 +60,14 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/idr.h>
+#include <linux/kgdb.h>
#include <linux/ftrace.h>
#include <linux/async.h>
#include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
#include <linux/sfi.h>
#include <linux/shmem_fs.h>
-#include <trace/boot.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -124,7 +122,9 @@ static char *ramdisk_execute_command;
#ifdef CONFIG_SMP
/* Setup configured maximum number of CPUs to activate */
-unsigned int __initdata setup_max_cpus = NR_CPUS;
+unsigned int setup_max_cpus = NR_CPUS;
+EXPORT_SYMBOL(setup_max_cpus);
+
/*
* Setup routine for controlling SMP activation
@@ -149,6 +149,20 @@ static int __init nosmp(char *str)
early_param("nosmp", nosmp);
+/* this is hard limit */
+static int __init nrcpus(char *str)
+{
+ int nr_cpus;
+
+ get_option(&str, &nr_cpus);
+ if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
+ nr_cpu_ids = nr_cpus;
+
+ return 0;
+}
+
+early_param("nr_cpus", nrcpus);
+
static int __init maxcpus(char *str)
{
get_option(&str, &setup_max_cpus);
@@ -160,7 +174,7 @@ static int __init maxcpus(char *str)
early_param("maxcpus", maxcpus);
#else
-const unsigned int setup_max_cpus = NR_CPUS;
+static const unsigned int setup_max_cpus = NR_CPUS;
#endif
/*
@@ -183,15 +197,15 @@ static int __init set_reset_devices(char *str)
__setup("reset_devices", set_reset_devices);
-static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
-extern struct obs_kernel_param __setup_start[], __setup_end[];
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
static int __init obsolete_checksetup(char *line)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
int had_early_param = 0;
p = __setup_start;
@@ -407,17 +421,25 @@ static void __init setup_command_line(char *command_line)
* gcc-3.4 accidentally inlines this function, so use noinline.
*/
+static __initdata DECLARE_COMPLETION(kthreadd_done);
+
static noinline void __init_refok rest_init(void)
- __releases(kernel_lock)
{
int pid;
rcu_scheduler_starting();
+ /*
+ * We need to spawn init first so that it obtains pid 1, however
+ * the init task will end up wanting to create kthreads, which, if
+ * we schedule it before we create kthreadd, will OOPS.
+ */
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+ rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
- unlock_kernel();
+ rcu_read_unlock();
+ complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
@@ -435,7 +457,7 @@ static noinline void __init_refok rest_init(void)
/* Check for early params. */
static int __init do_early_param(char *param, char *val)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
for (p = __setup_start; p < __setup_end; p++) {
if ((p->early && strcmp(param, p->str) == 0) ||
@@ -505,6 +527,7 @@ static void __init mm_init(void)
page_cgroup_init_flatmem();
mem_init();
kmem_cache_init();
+ percpu_init_late();
pgtable_cache_init();
vmalloc_init();
}
@@ -512,7 +535,7 @@ static void __init mm_init(void)
asmlinkage void __init start_kernel(void)
{
char * command_line;
- extern struct kernel_param __start___param[], __stop___param[];
+ extern const struct kernel_param __start___param[], __stop___param[];
smp_setup_processor_id();
@@ -532,13 +555,11 @@ asmlinkage void __init start_kernel(void)
local_irq_disable();
early_boot_irqs_off();
- early_init_irq_lock_class();
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
- lock_kernel();
tick_init();
boot_cpu_init();
page_address_init();
@@ -550,7 +571,7 @@ asmlinkage void __init start_kernel(void)
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
- build_all_zonelists();
+ build_all_zonelists(NULL);
page_alloc_init();
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
@@ -583,7 +604,10 @@ asmlinkage void __init start_kernel(void)
"enabled *very* early, fixing it\n");
local_irq_disable();
}
+ idr_init_cache();
+ perf_event_init();
rcu_init();
+ radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
@@ -601,7 +625,7 @@ asmlinkage void __init start_kernel(void)
local_irq_enable();
/* Interrupts are enabled now so all GFP allocations are safe. */
- set_gfp_allowed_mask(__GFP_BITS_MASK);
+ gfp_allowed_mask = __GFP_BITS_MASK;
kmem_cache_init_late();
@@ -635,10 +659,8 @@ asmlinkage void __init start_kernel(void)
#endif
page_cgroup_init();
enable_debug_pagealloc();
- kmemtrace_init();
kmemleak_init();
debug_objects_mem_init();
- idr_init_cache();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
@@ -658,8 +680,8 @@ asmlinkage void __init start_kernel(void)
buffer_init();
key_init();
security_init();
+ dbg_late_init();
vfs_caches_init(totalram_pages);
- radix_tree_init();
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
@@ -697,38 +719,39 @@ int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
-int do_one_initcall(initcall_t fn)
+static int __init_or_module do_one_initcall_debug(initcall_t fn)
{
- int count = preempt_count();
ktime_t calltime, delta, rettime;
+ unsigned long long duration;
+ int ret;
+
+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
+ calltime = ktime_get();
+ ret = fn();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn,
+ ret, duration);
+
+ return ret;
+}
- if (initcall_debug) {
- call.caller = task_pid_nr(current);
- printk("calling %pF @ %i\n", fn, call.caller);
- calltime = ktime_get();
- trace_boot_call(&call, fn);
- enable_boot_trace();
- }
-
- ret.result = fn();
+int __init_or_module do_one_initcall(initcall_t fn)
+{
+ int count = preempt_count();
+ int ret;
- if (initcall_debug) {
- disable_boot_trace();
- rettime = ktime_get();
- delta = ktime_sub(rettime, calltime);
- ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- trace_boot_ret(&ret, fn);
- printk("initcall %pF returned %d after %Ld usecs\n", fn,
- ret.result, ret.duration);
- }
+ if (initcall_debug)
+ ret = do_one_initcall_debug(fn);
+ else
+ ret = fn();
msgbuf[0] = 0;
- if (ret.result && ret.result != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", ret.result);
+ if (ret && ret != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret);
if (preempt_count() != count) {
strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -742,7 +765,7 @@ int do_one_initcall(initcall_t fn)
printk("initcall %pF returned with %s\n", fn, msgbuf);
}
- return ret.result;
+ return ret;
}
@@ -754,9 +777,6 @@ static void __init do_initcalls(void)
for (fn = __early_initcall_end; fn < __initcall_end; fn++)
do_one_initcall(*fn);
-
- /* Make sure there is no pending stuff from the initcall sequence */
- flush_scheduled_work();
}
/*
@@ -768,7 +788,6 @@ static void __init do_initcalls(void)
*/
static void __init do_basic_setup(void)
{
- init_workqueues();
cpuset_init_smp();
usermodehelper_init();
init_tmpfs();
@@ -786,7 +805,7 @@ static void __init do_pre_smp_initcalls(void)
do_one_initcall(*fn);
}
-static void run_init_process(char *init_filename)
+static void run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
kernel_execve(init_filename, argv_init, envp_init);
@@ -796,21 +815,14 @@ static void run_init_process(char *init_filename)
* makes it inline to init() and it becomes part of init.text section
*/
static noinline int init_post(void)
- __releases(kernel_lock)
{
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
- unlock_kernel();
mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
- if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- printk(KERN_WARNING "Warning: unable to open an initial console.\n");
-
- (void) sys_dup(0);
- (void) sys_dup(0);
current->signal->flags |= SIGNAL_UNKILLABLE;
@@ -836,17 +848,20 @@ static noinline int init_post(void)
run_init_process("/bin/init");
run_init_process("/bin/sh");
- panic("No init found. Try passing init= option to kernel.");
+ panic("No init found. Try passing init= option to kernel. "
+ "See Linux Documentation/init.txt for guidance.");
}
static int __init kernel_init(void * unused)
{
- lock_kernel();
-
+ /*
+ * Wait until kthreadd is all set-up.
+ */
+ wait_for_completion(&kthreadd_done);
/*
* init can allocate pages on any node
*/
- set_mems_allowed(node_possible_map);
+ set_mems_allowed(node_states[N_HIGH_MEMORY]);
/*
* init can run on any cpu.
*/
@@ -866,13 +881,19 @@ static int __init kernel_init(void * unused)
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
- start_boot_trace();
+ lockup_detector_init();
smp_init();
sched_init_smp();
do_basic_setup();
+ /* Open the /dev/console on the rootfs, this should never fail */
+ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+ printk(KERN_WARNING "Warning: unable to open an initial console.\n");
+
+ (void) sys_dup(0);
+ (void) sys_dup(0);
/*
* check if there is an early userspace init. If yes, let it do all
* the work
diff --git a/init/noinitramfs.c b/init/noinitramfs.c
index f4c1a3a1b8c5..267739d85179 100644
--- a/init/noinitramfs.c
+++ b/init/noinitramfs.c
@@ -29,17 +29,17 @@ static int __init default_rootfs(void)
{
int err;
- err = sys_mkdir("/dev", 0755);
+ err = sys_mkdir((const char __user __force *) "/dev", 0755);
if (err < 0)
goto out;
- err = sys_mknod((const char __user *) "/dev/console",
+ err = sys_mknod((const char __user __force *) "/dev/console",
S_IFCHR | S_IRUSR | S_IWUSR,
new_encode_dev(MKDEV(5, 1)));
if (err < 0)
goto out;
- err = sys_mkdir("/root", 0700);
+ err = sys_mkdir((const char __user __force *) "/root", 0700);
if (err < 0)
goto out;