From 50e02fd84543d82e663000e780e0ec0cfde52283 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 14 Apr 2014 23:37:35 -0400 Subject: ext4: remove temporary shim used to merge COLLAPSE_RANGE and ZERO_RANGE In retrospect, this was a bad way to handle things, since it limited testing of these patches. We should just get the VFS level changes merged in first. Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 010ea89eeb0e..6a1a0245474f 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -16,15 +16,6 @@ struct mpage_da_data; struct ext4_map_blocks; struct extent_status; -/* shim until we merge in the xfs_collapse_range branch */ -#ifndef FALLOC_FL_COLLAPSE_RANGE -#define FALLOC_FL_COLLAPSE_RANGE 0x08 -#endif - -#ifndef FALLOC_FL_ZERO_RANGE -#define FALLOC_FL_ZERO_RANGE 0x10 -#endif - #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define show_mballoc_flags(flags) __print_flags(flags, "|", \ -- cgit v1.2.3 From 01f8fa4f01d8362358eb90e412bd7ae18a3ec1ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Apr 2014 14:36:44 +0000 Subject: genirq: Allow forcing cpu affinity of interrupts The current implementation of irq_set_affinity() refuses rightfully to route an interrupt to an offline cpu. But there is a special case, where this is actually desired. Some of the ARM SoCs have per cpu timers which require setting the affinity during cpu startup where the cpu is not yet in the online mask. If we can't do that, then the local timer interrupt for the about to become online cpu is routed to some random online cpu. The developers of the affected machines tried to work around that issue, but that results in a massive mess in that timer code. We have a yet unused argument in the set_affinity callbacks of the irq chips, which I added back then for a similar reason. It was never required so it got not used. But I'm happy that I never removed it. That allows us to implement a sane handling of the above scenario. So the affected SoC drivers can add the required force handling to their interrupt chip, switch the timer code to irq_force_affinity() and things just work. This does not affect any existing user of irq_set_affinity(). Tagged for stable to allow a simple fix of the affected SoC clock event drivers. Reported-and-tested-by: Krzysztof Kozlowski Signed-off-by: Thomas Gleixner Cc: Kyungmin Park Cc: Marek Szyprowski Cc: Bartlomiej Zolnierkiewicz Cc: Tomasz Figa , Cc: Daniel Lezcano , Cc: Kukjin Kim Cc: linux-arm-kernel@lists.infradead.org, Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20140416143315.717251504@linutronix.de Signed-off-by: Thomas Gleixner --- arch/mips/cavium-octeon/octeon-irq.c | 2 +- include/linux/interrupt.h | 35 ++++++++++++++++++++++++++++++++++- include/linux/irq.h | 3 ++- kernel/irq/manage.c | 17 ++++++----------- 4 files changed, 43 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index c2bb4f896ce7..3aa5b46b2d40 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -635,7 +635,7 @@ static void octeon_irq_cpu_offline_ciu(struct irq_data *data) cpumask_clear(&new_affinity); cpumask_set_cpu(cpumask_first(cpu_online_mask), &new_affinity); } - __irq_set_affinity_locked(data, &new_affinity); + irq_set_affinity_locked(data, &new_affinity, false); } static int octeon_irq_ciu_set_affinity(struct irq_data *data, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c7bfac1c4a7b..8834a7e5b944 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -203,7 +203,40 @@ static inline int check_wakeup_irqs(void) { return 0; } extern cpumask_var_t irq_default_affinity; -extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); +/* Internal implementation. Use the helpers below */ +extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, + bool force); + +/** + * irq_set_affinity - Set the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @mask: cpumask + * + * Fails if cpumask does not contain an online CPU + */ +static inline int +irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, false); +} + +/** + * irq_force_affinity - Force the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @mask: cpumask + * + * Same as irq_set_affinity, but without checking the mask against + * online cpus. + * + * Solely for low level cpu hotplug code, where we need to make per + * cpu interrupts affine before the cpu becomes online. + */ +static inline int +irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, true); +} + extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); diff --git a/include/linux/irq.h b/include/linux/irq.h index d278838908cb..10a0b1ac4ea0 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -394,7 +394,8 @@ extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); extern void irq_cpu_online(void); extern void irq_cpu_offline(void); -extern int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask); +extern int irq_set_affinity_locked(struct irq_data *data, + const struct cpumask *cpumask, bool force); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2486a4c1a710..d34131ca372b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -180,7 +180,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_chip *chip = irq_data_get_irq_chip(data); int ret; - ret = chip->irq_set_affinity(data, mask, false); + ret = chip->irq_set_affinity(data, mask, force); switch (ret) { case IRQ_SET_MASK_OK: cpumask_copy(data->affinity, mask); @@ -192,7 +192,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) +int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, + bool force) { struct irq_chip *chip = irq_data_get_irq_chip(data); struct irq_desc *desc = irq_data_to_desc(data); @@ -202,7 +203,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return -EINVAL; if (irq_can_move_pcntxt(data)) { - ret = irq_do_set_affinity(data, mask, false); + ret = irq_do_set_affinity(data, mask, force); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); @@ -217,13 +218,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return ret; } -/** - * irq_set_affinity - Set the irq affinity of a given irq - * @irq: Interrupt to set affinity - * @mask: cpumask - * - */ -int irq_set_affinity(unsigned int irq, const struct cpumask *mask) +int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -233,7 +228,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) return -EINVAL; raw_spin_lock_irqsave(&desc->lock, flags); - ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); + ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } -- cgit v1.2.3 From df7926fffa9a4c0bceb0189386b4c5edc012fcbb Mon Sep 17 00:00:00 2001 From: Tim Kryger Date: Thu, 17 Apr 2014 11:55:24 -0700 Subject: regulator: core: Return error in get optional stub Drivers that call regulator_get_optional are tolerant to the absence of that regulator. By modifying the value returned from the stub function to match that seen when a regulator isn't present, callers can wrap the regulator logic with an IS_ERR based conditional even if they happen to call regulator_is_supported_voltage. This improves efficiency as well as eliminates the possibility for a very subtle bug. Signed-off-by: Tim Kryger Reviewed-by: Alex Elder Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index e530681bea70..1a4a8c157b31 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -258,14 +258,14 @@ regulator_get_exclusive(struct device *dev, const char *id) static inline struct regulator *__must_check regulator_get_optional(struct device *dev, const char *id) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct regulator *__must_check devm_regulator_get_optional(struct device *dev, const char *id) { - return NULL; + return ERR_PTR(-ENODEV); } static inline void regulator_put(struct regulator *regulator) -- cgit v1.2.3 From 8a4aeec8d2d6a3edeffbdfae451cdf05cbf0fefd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Apr 2014 11:48:21 -0700 Subject: libata/ahci: accommodate tag ordered controllers The AHCI spec allows implementations to issue commands in tag order rather than FIFO order: 5.3.2.12 P:SelectCmd HBA sets pSlotLoc = (pSlotLoc + 1) mod (CAP.NCS + 1) or HBA selects the command to issue that has had the PxCI bit set to '1' longer than any other command pending to be issued. The result is that commands posted sequentially (time-wise) may play out of sequence when issued by hardware. This behavior has likely been hidden by drives that arrange for commands to complete in issue order. However, it appears recent drives (two from different vendors that we have found so far) inflict out-of-order completions as a matter of course. So, we need to take care to maintain ordered submission, otherwise we risk triggering a drive to fall out of sequential-io automation and back to random-io processing, which incurs large latency and degrades throughput. This issue was found in simple benchmarks where QD=2 seq-write performance was 30-50% *greater* than QD=32 seq-write performance. Tagging for -stable and making the change globally since it has a low risk-to-reward ratio. Also, word is that recent versions of an unnamed OS also does it this way now. So, drives in the field are already experienced with this tag ordering scheme. Cc: Cc: Dave Jiang Cc: Ed Ciechanowski Reviewed-by: Matthew Wilcox Signed-off-by: Dan Williams Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 21 +++++++++++++-------- include/linux/libata.h | 1 + 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index f2a6020366e1..73c5d0410d47 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4794,21 +4794,26 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) { struct ata_queued_cmd *qc = NULL; - unsigned int i; + unsigned int i, tag; /* no command while frozen */ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) return NULL; - /* the last tag is reserved for internal command. */ - for (i = 0; i < ATA_MAX_QUEUE - 1; i++) - if (!test_and_set_bit(i, &ap->qc_allocated)) { - qc = __ata_qc_from_tag(ap, i); + for (i = 0; i < ATA_MAX_QUEUE; i++) { + tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE; + + /* the last tag is reserved for internal command. */ + if (tag == ATA_TAG_INTERNAL) + continue; + + if (!test_and_set_bit(tag, &ap->qc_allocated)) { + qc = __ata_qc_from_tag(ap, tag); + qc->tag = tag; + ap->last_tag = tag; break; } - - if (qc) - qc->tag = i; + } return qc; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 1de36be64df4..5ab4e3a76721 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -822,6 +822,7 @@ struct ata_port { unsigned long qc_allocated; unsigned int qc_active; int nr_active_links; /* #links with active qcs */ + unsigned int last_tag; /* track next tag hw expects */ struct ata_link link; /* host default link */ struct ata_link *slave_link; /* see ata_slave_link_init() */ -- cgit v1.2.3 From 0456c66f4e905e1ca839318219c770988b47975c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 19 Apr 2014 20:39:35 -0700 Subject: Input: serio - add firmware_id sysfs attribute serio devices exposed via platform firmware interfaces such as ACPI may provide additional identifying information of use to userspace. We don't associate the serio devices with the firmware device (we don't set it as parent), so there's no way for userspace to make use of this information. We cannot change the parent for serio devices instantiated though a firmware interface as that would break suspend / resume ordering. Therefore this patch adds a new firmware_id sysfs attribute so that userspace can get a string from there with any additional identifying information the firmware interface may provide. Signed-off-by: Hans de Goede Acked-by: Peter Hutterer Signed-off-by: Dmitry Torokhov --- drivers/input/serio/serio.c | 14 ++++++++++++++ include/linux/serio.h | 1 + 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 8f4c4ab04bc2..b29134de983b 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -451,6 +451,13 @@ static ssize_t serio_set_bind_mode(struct device *dev, struct device_attribute * return retval; } +static ssize_t firmware_id_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct serio *serio = to_serio_port(dev); + + return sprintf(buf, "%s\n", serio->firmware_id); +} + static DEVICE_ATTR_RO(type); static DEVICE_ATTR_RO(proto); static DEVICE_ATTR_RO(id); @@ -473,12 +480,14 @@ static DEVICE_ATTR_RO(modalias); static DEVICE_ATTR_WO(drvctl); static DEVICE_ATTR(description, S_IRUGO, serio_show_description, NULL); static DEVICE_ATTR(bind_mode, S_IWUSR | S_IRUGO, serio_show_bind_mode, serio_set_bind_mode); +static DEVICE_ATTR_RO(firmware_id); static struct attribute *serio_device_attrs[] = { &dev_attr_modalias.attr, &dev_attr_description.attr, &dev_attr_drvctl.attr, &dev_attr_bind_mode.attr, + &dev_attr_firmware_id.attr, NULL }; @@ -921,9 +930,14 @@ static int serio_uevent(struct device *dev, struct kobj_uevent_env *env) SERIO_ADD_UEVENT_VAR("SERIO_PROTO=%02x", serio->id.proto); SERIO_ADD_UEVENT_VAR("SERIO_ID=%02x", serio->id.id); SERIO_ADD_UEVENT_VAR("SERIO_EXTRA=%02x", serio->id.extra); + SERIO_ADD_UEVENT_VAR("MODALIAS=serio:ty%02Xpr%02Xid%02Xex%02X", serio->id.type, serio->id.proto, serio->id.id, serio->id.extra); + if (serio->firmware_id[0]) + SERIO_ADD_UEVENT_VAR("SERIO_FIRMWARE_ID=%s", + serio->firmware_id); + return 0; } #undef SERIO_ADD_UEVENT_VAR diff --git a/include/linux/serio.h b/include/linux/serio.h index 36aac733840a..9f779c7a2da4 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -23,6 +23,7 @@ struct serio { char name[32]; char phys[32]; + char firmware_id[128]; bool manual_bind; -- cgit v1.2.3 From f37c013409bb78ebb958821aa10d069e707cabac Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 19 Apr 2014 22:25:45 -0700 Subject: Input: Add INPUT_PROP_TOPBUTTONPAD device property On some newer laptops with a trackpoint the physical buttons for the trackpoint have been removed to allow for a larger touchpad. On these laptops the buttonpad has clearly marked areas on the top which are to be used as trackpad buttons. Users of the event device-node need to know about this, so that they can properly interpret BTN_LEFT events as being a left / right / middle click depending on where on the button pad the clicking finger is. This commits adds a INPUT_PROP_TOPBUTTONPAD device property which drivers for such buttonpads will use to signal to the user that this buttonpad not only has the normal bottom button area, but also a top button area. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index bd24470d24a2..f4849525519c 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -164,6 +164,7 @@ struct input_keymap_entry { #define INPUT_PROP_DIRECT 0x01 /* direct input devices */ #define INPUT_PROP_BUTTONPAD 0x02 /* has button(s) under pad */ #define INPUT_PROP_SEMI_MT 0x03 /* touch rectangle only */ +#define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */ #define INPUT_PROP_MAX 0x1f #define INPUT_PROP_CNT (INPUT_PROP_MAX + 1) -- cgit v1.2.3 From 0d3f7a2dd2f5cf9642982515e020c1aee2cf7af6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 22 Apr 2014 08:23:58 -0400 Subject: locks: rename file-private locks to "open file description locks" File-private locks have been merged into Linux for v3.15, and *now* people are commenting that the name and macro definitions for the new file-private locks suck. ...and I can't even disagree. The names and command macros do suck. We're going to have to live with these for a long time, so it's important that we be happy with the names before we're stuck with them. The consensus on the lists so far is that they should be rechristened as "open file description locks". The name isn't a big deal for the kernel, but the command macros are not visually distinct enough from the traditional POSIX lock macros. The glibc and documentation folks are recommending that we change them to look like F_OFD_{GETLK|SETLK|SETLKW}. That lessens the chance that a programmer will typo one of the commands wrong, and also makes it easier to spot this difference when reading code. This patch makes the following changes that I think are necessary before v3.15 ships: 1) rename the command macros to their new names. These end up in the uapi headers and so are part of the external-facing API. It turns out that glibc doesn't actually use the fcntl.h uapi header, but it's hard to be sure that something else won't. Changing it now is safest. 2) make the the /proc/locks output display these as type "OFDLCK" Cc: Michael Kerrisk Cc: Christoph Hellwig Cc: Carlos O'Donell Cc: Stefan Metzmacher Cc: Andy Lutomirski Cc: Frank Filz Cc: Theodore Ts'o Signed-off-by: Jeff Layton --- arch/arm/kernel/sys_oabi-compat.c | 6 +++--- fs/compat.c | 14 +++++++------- fs/fcntl.c | 12 ++++++------ fs/locks.c | 14 +++++++------- include/uapi/asm-generic/fcntl.h | 20 ++++++++++---------- security/selinux/hooks.c | 6 +++--- 6 files changed, 36 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 702bd329d9d0..e90a3148f385 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -203,9 +203,9 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, int ret; switch (cmd) { - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: case F_GETLK64: case F_SETLK64: case F_SETLKW64: diff --git a/fs/compat.c b/fs/compat.c index ca926ad0430c..66d3d3c6b4b2 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -457,9 +457,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: ret = get_compat_flock64(&f, compat_ptr(arg)); if (ret != 0) break; @@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, conv_cmd = convert_fcntl_cmd(cmd); ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); set_fs(old_fs); - if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { + if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { /* need to return lock information - see above for commentary */ if (f.l_start > COMPAT_LOFF_T_MAX) ret = -EOVERFLOW; @@ -493,9 +493,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: return -EINVAL; } return compat_sys_fcntl64(fd, cmd, arg); diff --git a/fs/fcntl.c b/fs/fcntl.c index 9ead1596399a..72c82f69b01b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -274,15 +274,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ - case F_GETLKP: + case F_OFD_GETLK: #endif case F_GETLK: err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ - case F_SETLKP: - case F_SETLKPW: + case F_OFD_SETLK: + case F_OFD_SETLKW: #endif /* Fallthrough */ case F_SETLK: @@ -399,13 +399,13 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, switch (cmd) { case F_GETLK64: - case F_GETLKP: + case F_OFD_GETLK: err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); break; case F_SETLK64: case F_SETLKW64: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_SETLK: + case F_OFD_SETLKW: err = fcntl_setlk64(fd, f.file, cmd, (struct flock64 __user *) arg); break; diff --git a/fs/locks.c b/fs/locks.c index b380f5543614..e1023b504279 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1941,7 +1941,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) if (error) goto out; - if (cmd == F_GETLKP) { + if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2076,7 +2076,7 @@ again: * FL_FILE_PVT flag and override the owner. */ switch (cmd) { - case F_SETLKP: + case F_OFD_SETLK: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2085,7 +2085,7 @@ again: file_lock->fl_flags |= FL_FILE_PVT; file_lock->fl_owner = (fl_owner_t)filp; break; - case F_SETLKPW: + case F_OFD_SETLKW: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2143,7 +2143,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) if (error) goto out; - if (cmd == F_GETLKP) { + if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2211,7 +2211,7 @@ again: * FL_FILE_PVT flag and override the owner. */ switch (cmd) { - case F_SETLKP: + case F_OFD_SETLK: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2220,7 +2220,7 @@ again: file_lock->fl_flags |= FL_FILE_PVT; file_lock->fl_owner = (fl_owner_t)filp; break; - case F_SETLKPW: + case F_OFD_SETLKW: error = -EINVAL; if (flock.l_pid != 0) goto out; @@ -2413,7 +2413,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, if (fl->fl_flags & FL_ACCESS) seq_printf(f, "ACCESS"); else if (IS_FILE_PVT(fl)) - seq_printf(f, "FLPVT "); + seq_printf(f, "OFDLCK"); else seq_printf(f, "POSIX "); diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index a9b13f8b3595..7543b3e51331 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -133,20 +133,20 @@ #endif /* - * fd "private" POSIX locks. + * Open File Description Locks * - * Usually POSIX locks held by a process are released on *any* close and are + * Usually record locks held by a process are released on *any* close and are * not inherited across a fork(). * - * These cmd values will set locks that conflict with normal POSIX locks, but - * are "owned" by the opened file, not the process. This means that they are - * inherited across fork() like BSD (flock) locks, and they are only released - * automatically when the last reference to the the open file against which - * they were acquired is put. + * These cmd values will set locks that conflict with process-associated + * record locks, but are "owned" by the open file description, not the + * process. This means that they are inherited across fork() like BSD (flock) + * locks, and they are only released automatically when the last reference to + * the the open file against which they were acquired is put. */ -#define F_GETLKP 36 -#define F_SETLKP 37 -#define F_SETLKPW 38 +#define F_OFD_GETLK 36 +#define F_OFD_SETLK 37 +#define F_OFD_SETLKW 38 #define F_OWNER_TID 0 #define F_OWNER_PID 1 diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b4beb77967b1..2c7341dbc5d6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3317,9 +3317,9 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd, case F_GETLK: case F_SETLK: case F_SETLKW: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: #if BITS_PER_LONG == 32 case F_GETLK64: case F_SETLK64: -- cgit v1.2.3 From 132e9a68a5794ad2e4a3d0c07dfb7133a70b1651 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 15 Apr 2014 11:05:16 -0400 Subject: Fix: tracing: use 'E' instead of 'X' for unsigned module tain flag In the following commit: commit 57673c2b0baa900dddae3b9eb3d7748ebf550eb3 Author: Rusty Russell Date: Mon Mar 31 14:39:57 2014 +1030 Use 'E' instead of 'X' for unsigned module taint flag. One site has been forgotten in trace events module.h. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt CC: Rusty Russell Signed-off-by: Rusty Russell --- include/trace/events/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 11fd51b413de..ed0b2c599a64 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -25,7 +25,7 @@ struct module; { (1UL << TAINT_OOT_MODULE), "O" }, \ { (1UL << TAINT_FORCED_MODULE), "F" }, \ { (1UL << TAINT_CRAP), "C" }, \ - { (1UL << TAINT_UNSIGNED_MODULE), "X" }) + { (1UL << TAINT_UNSIGNED_MODULE), "E" }) TRACE_EVENT(module_load, -- cgit v1.2.3 From cff2fce58b2b0f59089e7edcdc38803d65057b9f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 22 Apr 2014 08:24:32 -0400 Subject: locks: rename FL_FILE_PVT and IS_FILE_PVT to use "*_OFDLCK" instead File-private locks have been re-christened as "open file description" locks. Finish the symbol name cleanup in the internal implementation. Signed-off-by: Jeff Layton --- fs/locks.c | 34 +++++++++++++++++----------------- include/linux/fs.h | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/locks.c b/fs/locks.c index e1023b504279..e663aeac579e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -135,7 +135,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) -#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) +#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) static bool lease_breaking(struct file_lock *fl) { @@ -564,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker, BUG_ON(!list_empty(&waiter->fl_block)); waiter->fl_next = blocker; list_add_tail(&waiter->fl_block, &blocker->fl_block); - if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) + if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); } @@ -759,12 +759,12 @@ EXPORT_SYMBOL(posix_test_lock); * of tasks (such as posix threads) sharing the same open file table. * To handle those cases, we just bail out after a few iterations. * - * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. + * For FL_OFDLCK locks, the owner is the filp, not the files_struct. * Because the owner is not even nominally tied to a thread of * execution, the deadlock detection below can't reasonably work well. Just * skip it for those. * - * In principle, we could do a more limited deadlock detection on FL_FILE_PVT + * In principle, we could do a more limited deadlock detection on FL_OFDLCK * locks that just checks for the case where two tasks are attempting to * upgrade from read to write locks on the same inode. */ @@ -791,9 +791,9 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, /* * This deadlock detector can't reasonably detect deadlocks with - * FL_FILE_PVT locks, since they aren't owned by a process, per-se. + * FL_OFDLCK locks, since they aren't owned by a process, per-se. */ - if (IS_FILE_PVT(caller_fl)) + if (IS_OFDLCK(caller_fl)) return 0; while ((block_fl = what_owner_is_waiting_for(block_fl))) { @@ -1890,7 +1890,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { - flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via @@ -1912,7 +1912,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { - flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; @@ -1947,7 +1947,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) goto out; cmd = F_GETLK; - file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_flags |= FL_OFDLCK; file_lock.fl_owner = (fl_owner_t)filp; } @@ -2073,7 +2073,7 @@ again: /* * If the cmd is requesting file-private locks, then set the - * FL_FILE_PVT flag and override the owner. + * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: @@ -2082,7 +2082,7 @@ again: goto out; cmd = F_SETLK; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; break; case F_OFD_SETLKW: @@ -2091,7 +2091,7 @@ again: goto out; cmd = F_SETLKW; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; /* Fallthrough */ case F_SETLKW: @@ -2149,7 +2149,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) goto out; cmd = F_GETLK64; - file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_flags |= FL_OFDLCK; file_lock.fl_owner = (fl_owner_t)filp; } @@ -2208,7 +2208,7 @@ again: /* * If the cmd is requesting file-private locks, then set the - * FL_FILE_PVT flag and override the owner. + * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: @@ -2217,7 +2217,7 @@ again: goto out; cmd = F_SETLK64; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; break; case F_OFD_SETLKW: @@ -2226,7 +2226,7 @@ again: goto out; cmd = F_SETLKW64; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; /* Fallthrough */ case F_SETLKW64: @@ -2412,7 +2412,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, if (IS_POSIX(fl)) { if (fl->fl_flags & FL_ACCESS) seq_printf(f, "ACCESS"); - else if (IS_FILE_PVT(fl)) + else if (IS_OFDLCK(fl)) seq_printf(f, "OFDLCK"); else seq_printf(f, "POSIX "); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a9c5bca2b76..878031227c57 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -815,7 +815,7 @@ static inline struct file *get_file(struct file *f) #define FL_SLEEP 128 /* A blocking lock */ #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ -#define FL_FILE_PVT 1024 /* lock is private to the file */ +#define FL_OFDLCK 1024 /* lock is "owned" by struct file */ /* * Special return value from posix_lock_file() and vfs_lock_file() for -- cgit v1.2.3 From 9ef1af9ea28c23d0eaed97f7f5142788b6cf570a Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 1 Apr 2014 14:13:17 -0600 Subject: dt: tegra: remove non-existent clock IDs The Tegra124 clock DT binding currently provides 3 clocks that don't actually exist; 2 for NAND and one for UART5/UARTE. Delete these. While this is technically an incompatible DT ABI change, nothing could have used these clock IDs for anything practical, since the HW doesn't exist. Cc: Signed-off-by: Stephen Warren Signed-off-by: Arnd Bergmann --- include/dt-bindings/clock/tegra124-car.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra124-car.h b/include/dt-bindings/clock/tegra124-car.h index 8c1603b10665..433528ab5161 100644 --- a/include/dt-bindings/clock/tegra124-car.h +++ b/include/dt-bindings/clock/tegra124-car.h @@ -29,7 +29,7 @@ /* 10 (register bit affects spdif_in and spdif_out) */ #define TEGRA124_CLK_I2S1 11 #define TEGRA124_CLK_I2C1 12 -#define TEGRA124_CLK_NDFLASH 13 +/* 13 */ #define TEGRA124_CLK_SDMMC1 14 #define TEGRA124_CLK_SDMMC4 15 /* 16 */ @@ -83,7 +83,7 @@ /* 64 */ #define TEGRA124_CLK_UARTD 65 -#define TEGRA124_CLK_UARTE 66 +/* 66 */ #define TEGRA124_CLK_I2C3 67 #define TEGRA124_CLK_SBC4 68 #define TEGRA124_CLK_SDMMC3 69 @@ -97,7 +97,7 @@ #define TEGRA124_CLK_TRACE 77 #define TEGRA124_CLK_SOC_THERM 78 #define TEGRA124_CLK_DTV 79 -#define TEGRA124_CLK_NDSPEED 80 +/* 80 */ #define TEGRA124_CLK_I2CSLOW 81 #define TEGRA124_CLK_DSIB 82 #define TEGRA124_CLK_TSEC 83 -- cgit v1.2.3 From a53b72c83a4216f2eb883ed45a0cbce014b8e62d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:26:25 -0700 Subject: net: Move the permission check in sock_diag_put_filterinfo to packet_diag_dump The permission check in sock_diag_put_filterinfo is wrong, and it is so removed from it's sources it is not clear why it is wrong. Move the computation into packet_diag_dump and pass a bool of the result into sock_diag_filterinfo. This does not yet correct the capability check but instead simply moves it to make it clear what is going on. Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 +- net/core/sock_diag.c | 4 ++-- net/packet/diag.c | 7 ++++++- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 302ab805b0bb..46cca4c06848 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -23,7 +23,7 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); -int sock_diag_put_filterinfo(struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); #endif diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 9deb6abd6cf6..a4216a4c9572 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); -int sock_diag_put_filterinfo(struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype) { struct sock_fprog_kern *fprog; @@ -58,7 +58,7 @@ int sock_diag_put_filterinfo(struct sock *sk, unsigned int flen; int err = 0; - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!may_report_filterinfo) { nla_reserve(skb, attrtype, 0); return 0; } diff --git a/net/packet/diag.c b/net/packet/diag.c index 435ff99ba8c7..b34d0de24091 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -128,6 +128,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + bool may_report_filterinfo, struct user_namespace *user_ns, u32 portid, u32 seq, u32 flags, int sk_ino) { @@ -172,7 +173,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FILTER) && - sock_diag_put_filterinfo(sk, skb, PACKET_DIAG_FILTER)) + sock_diag_put_filterinfo(may_report_filterinfo, sk, skb, + PACKET_DIAG_FILTER)) goto out_nlmsg_trim; return nlmsg_end(skb, nlh); @@ -188,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; + bool may_report_filterinfo; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); + may_report_filterinfo = ns_capable(net->user_ns, CAP_NET_ADMIN); mutex_lock(&net->packet.sklist_lock); sk_for_each(sk, &net->packet.sklist) { @@ -200,6 +204,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (sk_diag_fill(sk, skb, req, + may_report_filterinfo, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, -- cgit v1.2.3 From a3b299da869d6e78cf42ae0b1b41797bcb8c5e4b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:26:56 -0700 Subject: net: Add variants of capable for use on on sockets sk_net_capable - The common case, operations that are safe in a network namespace. sk_capable - Operations that are not known to be safe in a network namespace sk_ns_capable - The general case for special cases. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ net/core/sock.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8338a14e4805..21569cf456ed 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2255,6 +2255,11 @@ int sock_get_timestampns(struct sock *, struct timespec __user *); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap); +bool sk_capable(const struct sock *sk, int cap); +bool sk_net_capable(const struct sock *sk, int cap); + /* * Enable debug/info messages */ diff --git a/net/core/sock.c b/net/core/sock.c index b4fff008136f..664ee4295b6f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,55 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +/** + * sk_ns_capable - General socket capability test + * @sk: Socket to use a capability on or through + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in the user + * namespace @user_ns. + */ +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap) +{ + return file_ns_capable(sk->sk_socket->file, user_ns, cap) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(sk_ns_capable); + +/** + * sk_capable - Socket global capability test + * @sk: Socket to use a capability on or through + * @cap: The global capbility to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in all user + * namespaces. + */ +bool sk_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, &init_user_ns, cap); +} +EXPORT_SYMBOL(sk_capable); + +/** + * sk_net_capable - Network namespace socket capability test + * @sk: Socket to use a capability on or through + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socke was created + * and the current process has the capability @cap over the network namespace + * the socket is a member of. + */ +bool sk_net_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); +} +EXPORT_SYMBOL(sk_net_capable); + + #ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { -- cgit v1.2.3 From aa4cf9452f469f16cea8c96283b641b4576d4a7b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:28:03 -0700 Subject: net: Add variants of capable for use on netlink messages netlink_net_capable - The common case use, for operations that are safe on a network namespace netlink_capable - For operations that are only known to be safe for the global root netlink_ns_capable - The general case of capable used to handle special cases __netlink_ns_capable - Same as netlink_ns_capable except taking a netlink_skb_parms instead of the skbuff of a netlink message. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netlink.h | 7 ++++++ net/netlink/af_netlink.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index aad8eeaf416d..f64b01787ddc 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -169,4 +169,11 @@ struct netlink_tap { extern int netlink_add_tap(struct netlink_tap *nt); extern int netlink_remove_tap(struct netlink_tap *nt); +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *ns, int cap); +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *ns, int cap); +bool netlink_capable(const struct sk_buff *skb, int cap); +bool netlink_net_capable(const struct sk_buff *skb, int cap); + #endif /* __LINUX_NETLINK_H */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7f931fe4d187..81dca96d2be6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1360,6 +1360,71 @@ retry: return err; } +/** + * __netlink_ns_capable - General netlink message capability test + * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *user_ns, int cap) +{ + return sk_ns_capable(nsp->sk, user_ns, cap); +} +EXPORT_SYMBOL(__netlink_ns_capable); + +/** + * netlink_ns_capable - General netlink message capability test + * @skb: socket buffer holding a netlink command from userspace + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *user_ns, int cap) +{ + return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); +} +EXPORT_SYMBOL(netlink_ns_capable); + +/** + * netlink_capable - Netlink global message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in all user namespaces. + */ +bool netlink_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, &init_user_ns, cap); +} +EXPORT_SYMBOL(netlink_capable); + +/** + * netlink_net_capable - Netlink network namespace message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap over the network namespace of + * the socket we received the message from. + */ +bool netlink_net_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); +} +EXPORT_SYMBOL(netlink_net_capable); + static inline int netlink_allowed(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].flags & flag) || -- cgit v1.2.3 From 2b97789fa289d531e767d994a77e34ec58f328c4 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Sat, 19 Apr 2014 08:51:44 +0530 Subject: phy: core: make NULL a valid phy reference if !CONFIG_GENERIC_PHY This fixes a regression on Keystone 2 platforms caused by patch 57303488cd37da58263e842de134dc65f7c626d5 "usb: dwc3: adapt dwc3 core to use Generic PHY Framework" which adds optional support of generic phy in DWC3 core. On Keystone 2 platforms the USB is not working now because CONFIG_GENERIC_PHY isn't set and, as result, Generic PHY APIs stubs return -ENOSYS always. The log shows: dwc3 2690000.dwc3: failed to initialize core dwc3: probe of 2690000.dwc3 failed with error -38 Hence, fix it by making NULL a valid phy reference in Generic PHY APIs stubs in the same way as it was done by the patch 04c2facad8fee66c981a51852806d8923336f362 "drivers: phy: Make NULL a valid phy reference". Acked-by: Felipe Balbi Acked-by: Santosh Shilimkar Signed-off-by: Grygorii Strashko Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- include/linux/phy/phy.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index e2f5ca96cddc..2760744cb2a7 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -174,21 +174,29 @@ void devm_of_phy_provider_unregister(struct device *dev, #else static inline int phy_pm_runtime_get(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } static inline int phy_pm_runtime_get_sync(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } static inline int phy_pm_runtime_put(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } static inline int phy_pm_runtime_put_sync(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } @@ -204,21 +212,29 @@ static inline void phy_pm_runtime_forbid(struct phy *phy) static inline int phy_init(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } static inline int phy_exit(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } static inline int phy_power_on(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } static inline int phy_power_off(struct phy *phy) { + if (!phy) + return 0; return -ENOSYS; } -- cgit v1.2.3 From 9ec36cafe43bf835f8f29273597a5b0cbc8267ef Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 23 Apr 2014 17:57:41 -0500 Subject: of/irq: do irq resolution in platform_get_irq Currently we get the following kind of errors if we try to use interrupt phandles to irqchips that have not yet initialized: irq: no irq domain found for /ocp/pinmux@48002030 ! ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/of/platform.c:171 of_device_alloc+0x144/0x184() Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.12.0-00038-g42a9708 #1012 (show_stack+0x14/0x1c) (dump_stack+0x6c/0xa0) (warn_slowpath_common+0x64/0x84) (warn_slowpath_null+0x1c/0x24) (of_device_alloc+0x144/0x184) (of_platform_device_create_pdata+0x44/0x9c) (of_platform_bus_create+0xd0/0x170) (of_platform_bus_create+0x12c/0x170) (of_platform_populate+0x60/0x98) This is because we're wrongly trying to populate resources that are not yet available. It's perfectly valid to create irqchips dynamically, so let's fix up the issue by resolving the interrupt resources when platform_get_irq is called. And then we also need to accept the fact that some irqdomains do not exist that early on, and only get initialized later on. So we can make the current WARN_ON into just into a pr_debug(). We still attempt to populate irq resources when we create the devices. This allows current drivers which don't use platform_get_irq to continue to function. Once all drivers are fixed, this code can be removed. Suggested-by: Russell King Signed-off-by: Rob Herring Signed-off-by: Tony Lindgren Tested-by: Tony Lindgren Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Grant Likely --- drivers/base/platform.c | 7 ++++++- drivers/of/irq.c | 26 ++++++++++++++++++++++++++ drivers/of/platform.c | 4 +++- include/linux/of_irq.h | 5 +++++ 4 files changed, 40 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index e714709704e4..5b47210889e0 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +88,11 @@ int platform_get_irq(struct platform_device *dev, unsigned int num) return -ENXIO; return dev->archdata.irqs[num]; #else - struct resource *r = platform_get_resource(dev, IORESOURCE_IRQ, num); + struct resource *r; + if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) + return of_irq_get(dev->dev.of_node, num); + + r = platform_get_resource(dev, IORESOURCE_IRQ, num); return r ? r->start : -ENXIO; #endif diff --git a/drivers/of/irq.c b/drivers/of/irq.c index e2e4c548a42f..5aeb89411350 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -379,6 +379,32 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r) } EXPORT_SYMBOL_GPL(of_irq_to_resource); +/** + * of_irq_get - Decode a node's IRQ and return it as a Linux irq number + * @dev: pointer to device tree node + * @index: zero-based index of the irq + * + * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain + * is not yet created. + * + */ +int of_irq_get(struct device_node *dev, int index) +{ + int rc; + struct of_phandle_args oirq; + struct irq_domain *domain; + + rc = of_irq_parse_one(dev, index, &oirq); + if (rc) + return rc; + + domain = irq_find_host(oirq.np); + if (!domain) + return -EPROBE_DEFER; + + return irq_create_of_mapping(&oirq); +} + /** * of_irq_count - Count the number of IRQs a node uses * @dev: pointer to device tree node diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 404d1daebefa..bd47fbc53dc9 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -168,7 +168,9 @@ struct platform_device *of_device_alloc(struct device_node *np, rc = of_address_to_resource(np, i, res); WARN_ON(rc); } - WARN_ON(of_irq_to_resource_table(np, res, num_irq) != num_irq); + if (of_irq_to_resource_table(np, res, num_irq) != num_irq) + pr_debug("not all legacy IRQ resources mapped for %s\n", + np->name); } dev->dev.of_node = of_node_get(np); diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 3f23b4472c31..6404253d810d 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -44,11 +44,16 @@ extern void of_irq_init(const struct of_device_id *matches); #ifdef CONFIG_OF_IRQ extern int of_irq_count(struct device_node *dev); +extern int of_irq_get(struct device_node *dev, int index); #else static inline int of_irq_count(struct device_node *dev) { return 0; } +static inline int of_irq_get(struct device_node *dev, int index) +{ + return 0; +} #endif #if defined(CONFIG_OF) -- cgit v1.2.3 From 6a20dbd6caa2358716136144bf524331d70b1e03 Mon Sep 17 00:00:00 2001 From: Manfred Schlaegl Date: Tue, 8 Apr 2014 14:42:04 +0200 Subject: tty: Fix race condition between __tty_buffer_request_room and flush_to_ldisc The race was introduced while development of linux-3.11 by e8437d7ecbc50198705331449367d401ebb3181f and e9975fdec0138f1b2a85b9624e41660abd9865d4. Originally it was found and reproduced on linux-3.12.15 and linux-3.12.15-rt25, by sending 500 byte blocks with 115kbaud to the target uart in a loop with 100 milliseconds delay. In short: 1. The consumer flush_to_ldisc is on to remove the head tty_buffer. 2. The producer adds a number of bytes, so that a new tty_buffer must be allocated and added by __tty_buffer_request_room. 3. The consumer removes the head tty_buffer element, without handling newly committed data. Detailed example: * Initial buffer: * Head, Tail -> 0: used=250; commit=250; read=240; next=NULL * Consumer: ''flush_to_ldisc'' * consumed 10 Byte * buffer: * Head, Tail -> 0: used=250; commit=250; read=250; next=NULL {{{ count = head->commit - head->read; // count = 0 if (!count) { // enter // INTERRUPTED BY PRODUCER -> if (head->next == NULL) break; buf->head = head->next; tty_buffer_free(port, head); continue; } }}} * Producer: tty_insert_flip_... 10 bytes + tty_flip_buffer_push * buffer: * Head, Tail -> 0: used=250; commit=250; read=250; next=NULL * added 6 bytes: head-element filled to maximum. * buffer: * Head, Tail -> 0: used=256; commit=250; read=250; next=NULL * added 4 bytes: __tty_buffer_request_room is called * buffer: * Head -> 0: used=256; commit=256; read=250; next=1 * Tail -> 1: used=4; commit=0; read=250 next=NULL * push (tty_flip_buffer_push) * buffer: * Head -> 0: used=256; commit=256; read=250; next=1 * Tail -> 1: used=4; commit=4; read=250 next=NULL * Consumer {{{ count = head->commit - head->read; if (!count) { // INTERRUPTED BY PRODUCER <- if (head->next == NULL) // -> no break break; buf->head = head->next; tty_buffer_free(port, head); // ERROR: tty_buffer head freed -> 6 bytes lost continue; } }}} This patch reintroduces a spin_lock to protect this case. Perhaps later a lock-less solution could be found. Signed-off-by: Manfred Schlaegl Cc: stable # 3.11 Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 16 ++++++++++++++-- include/linux/tty.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 8ebd9f88a6f6..f1d30f6945af 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -255,11 +255,16 @@ static int __tty_buffer_request_room(struct tty_port *port, size_t size, if (change || left < size) { /* This is the slow path - looking for new buffers to use */ if ((n = tty_buffer_alloc(port, size)) != NULL) { + unsigned long iflags; + n->flags = flags; buf->tail = n; + + spin_lock_irqsave(&buf->flush_lock, iflags); b->commit = b->used; - smp_mb(); b->next = n; + spin_unlock_irqrestore(&buf->flush_lock, iflags); + } else if (change) size = 0; else @@ -443,6 +448,7 @@ static void flush_to_ldisc(struct work_struct *work) mutex_lock(&buf->lock); while (1) { + unsigned long flags; struct tty_buffer *head = buf->head; int count; @@ -450,14 +456,19 @@ static void flush_to_ldisc(struct work_struct *work) if (atomic_read(&buf->priority)) break; + spin_lock_irqsave(&buf->flush_lock, flags); count = head->commit - head->read; if (!count) { - if (head->next == NULL) + if (head->next == NULL) { + spin_unlock_irqrestore(&buf->flush_lock, flags); break; + } buf->head = head->next; + spin_unlock_irqrestore(&buf->flush_lock, flags); tty_buffer_free(port, head); continue; } + spin_unlock_irqrestore(&buf->flush_lock, flags); count = receive_buf(tty, head, count); if (!count) @@ -512,6 +523,7 @@ void tty_buffer_init(struct tty_port *port) struct tty_bufhead *buf = &port->buf; mutex_init(&buf->lock); + spin_lock_init(&buf->flush_lock); tty_buffer_reset(&buf->sentinel, 0); buf->head = &buf->sentinel; buf->tail = &buf->sentinel; diff --git a/include/linux/tty.h b/include/linux/tty.h index 1c3316a47d7e..036cccd80d9f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -61,6 +61,7 @@ struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; + spinlock_t flush_lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ -- cgit v1.2.3 From ec6931b281797b69e6cf109f9cc94d5a2bf994e0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 23 Apr 2014 17:52:52 +0100 Subject: word-at-a-time: avoid undefined behaviour in zero_bytemask macro The asm-generic, big-endian version of zero_bytemask creates a mask of bytes preceding the first zero-byte by left shifting ~0ul based on the position of the first zero byte. Unfortunately, if the first (top) byte is zero, the output of prep_zero_mask has only the top bit set, resulting in undefined C behaviour as we shift left by an amount equal to the width of the type. As it happens, GCC doesn't manage to spot this through the call to fls(), but the issue remains if architectures choose to implement their shift instructions differently. An example would be arch/arm/ (AArch32), where LSL Rd, Rn, #32 results in Rd == 0x0, whilst on arch/arm64 (AArch64) LSL Xd, Xn, #64 results in Xd == Xn. Rather than check explicitly for the problematic shift, this patch adds an extra shift by 1, replacing fls with __fls. Since zero_bytemask is never called with a zero argument (has_zero() is used to check the data first), we don't need to worry about calling __fls(0), which is undefined. Cc: Cc: Victor Kamensky Signed-off-by: Will Deacon Signed-off-by: Linus Torvalds --- include/asm-generic/word-at-a-time.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index d3909effd725..d96deb443f18 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -50,11 +50,7 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct } #ifndef zero_bytemask -#ifdef CONFIG_64BIT -#define zero_bytemask(mask) (~0ul << fls64(mask)) -#else -#define zero_bytemask(mask) (~0ul << fls(mask)) -#endif /* CONFIG_64BIT */ -#endif /* zero_bytemask */ +#define zero_bytemask(mask) (~0ul << __fls(mask) << 1) +#endif #endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v1.2.3 From def5f1273c5f18abf8fcaee03a115d3e907ad407 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Apr 2014 21:03:09 -0700 Subject: linux/interrupt.h: fix new kernel-doc warnings Fix new kernel-doc warnings in : Warning(include/linux/interrupt.h:219): No description found for parameter 'cpumask' Warning(include/linux/interrupt.h:219): Excess function parameter 'mask' description in 'irq_set_affinity' Warning(include/linux/interrupt.h:236): No description found for parameter 'cpumask' Warning(include/linux/interrupt.h:236): Excess function parameter 'mask' description in 'irq_force_affinity' Signed-off-by: Randy Dunlap Link: http://lkml.kernel.org/r/535DD2FD.7030804@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8834a7e5b944..97ac926c78a7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -210,7 +210,7 @@ extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, /** * irq_set_affinity - Set the irq affinity of a given irq * @irq: Interrupt to set affinity - * @mask: cpumask + * @cpumask: cpumask * * Fails if cpumask does not contain an online CPU */ @@ -223,7 +223,7 @@ irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) /** * irq_force_affinity - Force the irq affinity of a given irq * @irq: Interrupt to set affinity - * @mask: cpumask + * @cpumask: cpumask * * Same as irq_set_affinity, but without checking the mask against * online cpus. -- cgit v1.2.3 From 62a08ae2a5763aabeee98264605236b001503e0c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Apr 2014 09:50:53 +0200 Subject: genirq: x86: Ensure that dynamic irq allocation does not conflict On x86 the allocation of irq descriptors may allocate interrupts which are in the range of the GSI interrupts. That's wrong as those interrupts are hardwired and we don't have the irq domain translation like PPC. So one of these interrupts can be hooked up later to one of the devices which are hard wired to it and the io_apic init code for that particular interrupt line happily reuses that descriptor with a completely different configuration so hell breaks lose. Inside x86 we allocate dynamic interrupts from above nr_gsi_irqs, except for a few usage sites which have not yet blown up in our face for whatever reason. But for drivers which need an irq range, like the GPIO drivers, we have no limit in place and we don't want to expose such a detail to a driver. To cure this introduce a function which an architecture can implement to impose a lower bound on the dynamic interrupt allocations. Implement it for x86 and set the lower bound to nr_gsi_irqs, which is the end of the hardwired interrupt space, so all dynamic allocations happen above. That not only allows the GPIO driver to work sanely, it also protects the bogus callsites of create_irq_nr() in hpet, uv, irq_remapping and htirq code. They need to be cleaned up as well, but that's a separate issue. Reported-by: Jin Yao Signed-off-by: Thomas Gleixner Tested-by: Mika Westerberg Cc: Mathias Nyman Cc: Linus Torvalds Cc: Grant Likely Cc: H. Peter Anvin Cc: Rafael J. Wysocki Cc: Andy Shevchenko Cc: Krogerus Heikki Cc: Linus Walleij Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1404241617360.28206@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 5 +++++ include/linux/irq.h | 2 ++ kernel/irq/irqdesc.c | 7 +++++++ kernel/softirq.c | 5 +++++ 4 files changed, 19 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6ad4658de705..d23aa82e7a7b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3425,6 +3425,11 @@ int get_nr_irqs_gsi(void) return nr_irqs_gsi; } +unsigned int arch_dynirq_lower_bound(unsigned int from) +{ + return from < nr_irqs_gsi ? nr_irqs_gsi : from; +} + int __init arch_probe_nr_irqs(void) { int nr; diff --git a/include/linux/irq.h b/include/linux/irq.h index 10a0b1ac4ea0..5c57efb863d0 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -603,6 +603,8 @@ static inline u32 irq_get_trigger_type(unsigned int irq) return d ? irqd_get_trigger_type(d) : 0; } +unsigned int arch_dynirq_lower_bound(unsigned int from); + int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, struct module *owner); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a7174617616b..bb07f2928f4b 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, if (from > irq) return -EINVAL; from = irq; + } else { + /* + * For interrupts which are freely allocated the + * architecture can force a lower bound to the @from + * argument. x86 uses this to exclude the GSI space. + */ + from = arch_dynirq_lower_bound(from); } mutex_lock(&sparse_irq_lock); diff --git a/kernel/softirq.c b/kernel/softirq.c index b50990a5bea0..33e4648ae0e7 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void) { return 0; } + +unsigned int __weak arch_dynirq_lower_bound(unsigned int from) +{ + return from; +} -- cgit v1.2.3 From a949ae560a511fe4e3adf48fa44fefded93e5c2b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 24 Apr 2014 10:40:12 -0400 Subject: ftrace/module: Hardcode ftrace_module_init() call into load_module() A race exists between module loading and enabling of function tracer. CPU 1 CPU 2 ----- ----- load_module() module->state = MODULE_STATE_COMING register_ftrace_function() mutex_lock(&ftrace_lock); ftrace_startup() update_ftrace_function(); ftrace_arch_code_modify_prepare() set_all_module_text_rw(); ftrace_arch_code_modify_post_process() set_all_module_text_ro(); [ here all module text is set to RO, including the module that is loading!! ] blocking_notifier_call_chain(MODULE_STATE_COMING); ftrace_init_module() [ tries to modify code, but it's RO, and fails! ftrace_bug() is called] When this race happens, ftrace_bug() will produces a nasty warning and all of the function tracing features will be disabled until reboot. The simple solution is to treate module load the same way the core kernel is treated at boot. To hardcode the ftrace function modification of converting calls to mcount into nops. This is done in init/main.c there's no reason it could not be done in load_module(). This gives a better control of the changes and doesn't tie the state of the module to its notifiers as much. Ftrace is special, it needs to be treated as such. The reason this would work, is that the ftrace_module_init() would be called while the module is in MODULE_STATE_UNFORMED, which is ignored by the set_all_module_text_ro() call. Link: http://lkml.kernel.org/r/1395637826-3312-1-git-send-email-indou.takao@jp.fujitsu.com Reported-by: Takao Indoh Acked-by: Rusty Russell Cc: stable@vger.kernel.org # 2.6.38+ Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ kernel/module.c | 3 +++ kernel/trace/ftrace.c | 27 ++++----------------------- 3 files changed, 9 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9212b017bc72..ae9504b4b67d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -535,6 +535,7 @@ static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_a extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); +extern void ftrace_module_init(struct module *mod); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); @@ -544,6 +545,7 @@ static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} +static inline void ftrace_module_init(struct module *mod) {} static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; diff --git a/kernel/module.c b/kernel/module.c index 11869408f79b..5f14fec9f825 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3271,6 +3271,9 @@ static int load_module(struct load_info *info, const char __user *uargs, dynamic_debug_setup(info->debug, info->num_debug); + /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ + ftrace_module_init(mod); + /* Finally it's fully formed, ready to start executing. */ err = complete_formation(mod, info); if (err) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b9479210..4a54a25afa2f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) +void ftrace_module_init(struct module *mod) { - struct module *mod = data; - - if (val == MODULE_STATE_COMING) - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); - return 0; + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, return 0; } #else -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} static int ftrace_module_notify_exit(struct notifier_block *self, unsigned long val, void *data) { @@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { - .notifier_call = ftrace_module_notify_enter, - .priority = INT_MAX, /* Run before anything that can use kprobes */ -}; - struct notifier_block ftrace_module_exit_nb = { .notifier_call = ftrace_module_notify_exit, .priority = INT_MIN, /* Run after anything that can remove kprobes */ @@ -4403,10 +4388,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_enter_nb); - if (ret) - pr_warning("Failed to register trace ftrace module enter notifier\n"); - ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n"); -- cgit v1.2.3 From 789ce9dca8007ab5d7c72b9a174a29243817ac32 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 30 Apr 2014 14:22:19 -0700 Subject: word-at-a-time: simplify big-endian zero_bytemask macro This is simpler and cleaner. Depending on architecture, a smart compiler may or may not generate the same code. Acked-by: Will Deacon Signed-off-by: Linus Torvalds --- include/asm-generic/word-at-a-time.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index d96deb443f18..94f9ea8abcae 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -50,7 +50,7 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct } #ifndef zero_bytemask -#define zero_bytemask(mask) (~0ul << __fls(mask) << 1) +#define zero_bytemask(mask) (~1ul << __fls(mask)) #endif #endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v1.2.3 From f774b7d10e2155e52b92dfce2f8cb099a6d6d0e6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Apr 2014 19:50:06 +0100 Subject: arm64: fixmap: fix missing sub-page offset for earlyprintk Commit d57c33c5daa4 (add generic fixmap.h) added (among other similar things) set_fixmap_io to deal with early ioremap of devices. More recently, commit bf4b558eba92 (arm64: add early_ioremap support) converted the arm64 earlyprintk to use set_fixmap_io. A side effect of this conversion is that my virtual machines have stopped booting when I pass "earlyprintk=uart8250-8bit,0x3f8" to the guest kernel. Turns out that the new earlyprintk code doesn't care at all about sub-page offsets, and just assumes that the earlyprintk device will be page-aligned. Obviously, that doesn't play well with the above example. Further investigation shows that set_fixmap_io uses __set_fixmap instead of __set_fixmap_offset. A fix is to introduce a set_fixmap_offset_io that uses the latter, and to remove the superflous call to fix_to_virt (which only returns the value that set_fixmap_io has already given us). With this applied, my VMs are back in business. Tested on a Cortex-A57 platform with kvmtool as platform emulation. Cc: Will Deacon Acked-by: Mark Salter Acked-by: Arnd Bergmann Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/early_printk.c | 6 ++---- include/asm-generic/fixmap.h | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c index ffbbdde7aba1..2dc36d00addf 100644 --- a/arch/arm64/kernel/early_printk.c +++ b/arch/arm64/kernel/early_printk.c @@ -143,10 +143,8 @@ static int __init setup_early_printk(char *buf) } /* no options parsing yet */ - if (paddr) { - set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr); - early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE); - } + if (paddr) + early_base = (void __iomem *)set_fixmap_offset_io(FIX_EARLYCON_MEM_BASE, paddr); printch = match->printch; early_console = &early_console_dev; diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 5a64ca4621f3..f23174fb9ec4 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -93,5 +93,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #define set_fixmap_io(idx, phys) \ __set_fixmap(idx, phys, FIXMAP_PAGE_IO) +#define set_fixmap_offset_io(idx, phys) \ + __set_fixmap_offset(idx, phys, FIXMAP_PAGE_IO) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_GENERIC_FIXMAP_H */ -- cgit v1.2.3 From 5fbf1a65dd53ef313783c34a0e93a6e29def6136 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 2 May 2014 10:56:11 -0400 Subject: Revert "tty: Fix race condition between __tty_buffer_request_room and flush_to_ldisc" This reverts commit 6a20dbd6caa2358716136144bf524331d70b1e03. Although the commit correctly identifies an unsafe race condition between __tty_buffer_request_room() and flush_to_ldisc(), the commit fixes the race with an unnecessary spinlock in a lockless algorithm. The follow-on commit, "tty: Fix lockless tty buffer race" fixes the race locklessly. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_buffer.c | 16 ++-------------- include/linux/tty.h | 1 - 2 files changed, 2 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index f1d30f6945af..8ebd9f88a6f6 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -255,16 +255,11 @@ static int __tty_buffer_request_room(struct tty_port *port, size_t size, if (change || left < size) { /* This is the slow path - looking for new buffers to use */ if ((n = tty_buffer_alloc(port, size)) != NULL) { - unsigned long iflags; - n->flags = flags; buf->tail = n; - - spin_lock_irqsave(&buf->flush_lock, iflags); b->commit = b->used; + smp_mb(); b->next = n; - spin_unlock_irqrestore(&buf->flush_lock, iflags); - } else if (change) size = 0; else @@ -448,7 +443,6 @@ static void flush_to_ldisc(struct work_struct *work) mutex_lock(&buf->lock); while (1) { - unsigned long flags; struct tty_buffer *head = buf->head; int count; @@ -456,19 +450,14 @@ static void flush_to_ldisc(struct work_struct *work) if (atomic_read(&buf->priority)) break; - spin_lock_irqsave(&buf->flush_lock, flags); count = head->commit - head->read; if (!count) { - if (head->next == NULL) { - spin_unlock_irqrestore(&buf->flush_lock, flags); + if (head->next == NULL) break; - } buf->head = head->next; - spin_unlock_irqrestore(&buf->flush_lock, flags); tty_buffer_free(port, head); continue; } - spin_unlock_irqrestore(&buf->flush_lock, flags); count = receive_buf(tty, head, count); if (!count) @@ -523,7 +512,6 @@ void tty_buffer_init(struct tty_port *port) struct tty_bufhead *buf = &port->buf; mutex_init(&buf->lock); - spin_lock_init(&buf->flush_lock); tty_buffer_reset(&buf->sentinel, 0); buf->head = &buf->sentinel; buf->tail = &buf->sentinel; diff --git a/include/linux/tty.h b/include/linux/tty.h index 036cccd80d9f..1c3316a47d7e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -61,7 +61,6 @@ struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; - spinlock_t flush_lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ -- cgit v1.2.3 From 792e6aa7a15ea0fb16f8687e93caede1ea9118c7 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 30 Apr 2014 16:14:23 +0300 Subject: cfg80211: add cfg80211_sched_scan_stopped_rtnl Add locked-version for cfg80211_sched_scan_stopped. This is used for some users that might want to call it when rtnl is already locked. Fixes: d43c6b6 ("mac80211: reschedule sched scan after HW restart") Cc: stable@vger.kernel.org (3.14+) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ net/wireless/scan.c | 12 ++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f3539a15c411..f856e5a746fa 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3668,6 +3668,18 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy); */ void cfg80211_sched_scan_stopped(struct wiphy *wiphy); +/** + * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped + * + * @wiphy: the wiphy on which the scheduled scan stopped + * + * The driver can call this function to inform cfg80211 that the + * scheduled scan had to be stopped, for whatever reason. The driver + * is then called back via the sched_scan_stop operation when done. + * This function should be called with rtnl locked. + */ +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy); + /** * cfg80211_inform_bss_width_frame - inform cfg80211 of a received BSS frame * diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7d09a712cb1f..88f108edfb58 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -284,14 +284,22 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) } EXPORT_SYMBOL(cfg80211_sched_scan_results); -void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + ASSERT_RTNL(); + trace_cfg80211_sched_scan_stopped(wiphy); - rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); +} +EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); + +void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +{ + rtnl_lock(); + cfg80211_sched_scan_stopped_rtnl(wiphy); rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); -- cgit v1.2.3 From 2c4a336e0a3e203fab6aa8d8f7bb70a0ad968a6b Mon Sep 17 00:00:00 2001 From: Andy King Date: Thu, 1 May 2014 15:20:43 -0700 Subject: vsock: Make transport the proto owner Right now the core vsock module is the owner of the proto family. This means there's nothing preventing the transport module from unloading if there are open sockets, which results in a panic. Fix that by allowing the transport to be the owner, which will refcount it properly. Includes version bump to 1.0.1.0-k Passes checkpatch this time, I swear... Acked-by: Dmitry Torokhov Signed-off-by: Andy King Signed-off-by: David S. Miller --- include/net/af_vsock.h | 6 +++++- net/vmw_vsock/af_vsock.c | 47 ++++++++++++++++++++++------------------------- 2 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 7d64d3609ec9..428277869400 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -155,7 +155,11 @@ struct vsock_transport { /**** CORE ****/ -int vsock_core_init(const struct vsock_transport *t); +int __vsock_core_init(const struct vsock_transport *t, struct module *owner); +static inline int vsock_core_init(const struct vsock_transport *t) +{ + return __vsock_core_init(t, THIS_MODULE); +} void vsock_core_exit(void); /**** UTILS ****/ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5adfd94c5b85..85d232bed87d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = { .fops = &vsock_device_ops, }; -static int __vsock_core_init(void) +int __vsock_core_init(const struct vsock_transport *t, struct module *owner) { - int err; + int err = mutex_lock_interruptible(&vsock_register_mutex); + + if (err) + return err; + + if (transport) { + err = -EBUSY; + goto err_busy; + } + + /* Transport must be the owner of the protocol so that it can't + * unload while there are open sockets. + */ + vsock_proto.owner = owner; + transport = t; vsock_init_tables(); @@ -1951,36 +1965,19 @@ static int __vsock_core_init(void) goto err_unregister_proto; } + mutex_unlock(&vsock_register_mutex); return 0; err_unregister_proto: proto_unregister(&vsock_proto); err_misc_deregister: misc_deregister(&vsock_device); - return err; -} - -int vsock_core_init(const struct vsock_transport *t) -{ - int retval = mutex_lock_interruptible(&vsock_register_mutex); - if (retval) - return retval; - - if (transport) { - retval = -EBUSY; - goto out; - } - - transport = t; - retval = __vsock_core_init(); - if (retval) - transport = NULL; - -out: + transport = NULL; +err_busy: mutex_unlock(&vsock_register_mutex); - return retval; + return err; } -EXPORT_SYMBOL_GPL(vsock_core_init); +EXPORT_SYMBOL_GPL(__vsock_core_init); void vsock_core_exit(void) { @@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.0.0-k"); +MODULE_VERSION("1.0.1.0-k"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From c1e756bfcbcac838a86a23f3e4501b556a961e3c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 15:00:44 +0200 Subject: Revert "net: core: introduce netif_skb_dev_features" This reverts commit d206940319c41df4299db75ed56142177bb2e5f6, there are no more callers. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +------ net/core/dev.c | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ed3a3aa6604..20e99efb1ca6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev); -static inline netdev_features_t netif_skb_features(struct sk_buff *skb) -{ - return netif_skb_dev_features(skb, skb->dev); -} +netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a98..c619b8641337 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - const struct net_device *dev, - netdev_features_t features) + netdev_features_t features) { int tmp; if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(dev, skb)) { + } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev) +netdev_features_t netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - netdev_features_t features = dev->features; + netdev_features_t features = skb->dev->features; - if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } - features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } -EXPORT_SYMBOL(netif_skb_dev_features); +EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) -- cgit v1.2.3 From 23a456f05353035d1a2b3f1b9a92707acdc036e0 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 6 May 2014 18:52:16 +0200 Subject: net: mdio: of_mdiobus_register(): fall back to mdiobus_register() for !CONFIG_OF If CONFIG_OF is not set, make of_mdiobus_register() call mdiobus_register() instead of returning -ENOSYS. This way, we can just call of_mdiobus_register() from all DT-enabled drivers to handle the compat cases. Signed-off-by: Daniel Mack Suggested-by: Florian Fainelli Acked-by: Florian Fainelli Acked-by: Mugunthan V N Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 6fe8464ed767..881a7c3571f4 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -31,7 +31,12 @@ extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { - return -ENOSYS; + /* + * Fall back to the non-DT function to register a bus. + * This way, we don't have to keep compat bits around in drivers. + */ + + return mdiobus_register(mdio); } static inline struct phy_device *of_phy_find_device(struct device_node *phy_np) -- cgit v1.2.3 From c9d8f1a64225dfcc2f721d73a5984a2444920744 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2014 11:02:49 -0700 Subject: ipv4: move local_port_range out of CONFIG_SYSCTL When CONFIG_SYSCTL is not set, ip_local_port_range should still work, just that no one can change it. Therefore we should move it out of sysctl_inet.c. Also, rename it to ->ip_local_ports instead. Cc: David S. Miller Cc: Francois Romieu Reported-by: Stefan de Konink Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- net/ipv4/af_inet.c | 28 ++++++++++++++++++++++++++++ net/ipv4/inet_connection_sock.c | 8 ++++---- net/ipv4/ping.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 29 +++++++++++------------------ 5 files changed, 46 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80f500a29498..3d95cd475316 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -66,7 +66,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; - struct local_ports sysctl_local_ports; + struct local_ports ip_local_ports; int sysctl_tcp_ecn; int sysctl_ip_no_pmtu_disc; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8c54870db792..cccc8e487c7e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1650,6 +1650,31 @@ static int __init init_ipv4_mibs(void) return register_pernet_subsys(&ipv4_mib_ops); } +static __net_init int inet_init_net(struct net *net) +{ + /* + * Set defaults for local port range + */ + seqlock_init(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = 32768; + net->ipv4.ip_local_ports.range[1] = 61000; + return 0; +} + +static __net_exit void inet_exit_net(struct net *net) +{ +} + +static __net_initdata struct pernet_operations af_inet_ops = { + .init = inet_init_net, + .exit = inet_exit_net, +}; + +static int __init init_inet_pernet_ops(void) +{ + return register_pernet_subsys(&af_inet_ops); +} + static int ipv4_proc_init(void); /* @@ -1794,6 +1819,9 @@ static int __init inet_init(void) if (ip_mr_init()) pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif + + if (init_inet_pernet_ops()) + pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); /* * Initialise per-cpu ipv4 mibs */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0d1e2cb877ec..a56b8e6e866a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -37,11 +37,11 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); - *low = net->ipv4.sysctl_local_ports.range[0]; - *high = net->ipv4.sysctl_local_ports.range[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + *low = net->ipv4.ip_local_ports.range[0]; + *high = net->ipv4.ip_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8210964a9f19..347bdde9a585 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -240,11 +240,11 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 44eba052b43d..a116c41b05dd 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = range[0]; - net->ipv4.sysctl_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = range[0]; + net->ipv4.ip_local_ports.range[1] = range[1]; + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos) { struct net *net = - container_of(table->data, struct net, ipv4.sysctl_local_ports.range); + container_of(table->data, struct net, ipv4.ip_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { @@ -90,11 +90,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low container_of(table->data, struct net, ipv4.sysctl_ping_group_range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } /* Update system visible IP port range */ @@ -103,10 +103,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.sysctl_ping_group_range); - write_seqlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -819,8 +819,8 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ip_local_port_range", - .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), - .data = &init_net.ipv4.sysctl_local_ports.range, + .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), + .data = &init_net.ipv4.ip_local_ports.range, .mode = 0644, .proc_handler = ipv4_local_port_range, }, @@ -865,13 +865,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - /* - * Set defaults for local port range - */ - seqlock_init(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = 32768; - net->ipv4.sysctl_local_ports.range[1] = 61000; - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; -- cgit v1.2.3 From ba6b918ab234186d3aa1663e296586a1b526b77a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2014 11:02:50 -0700 Subject: ping: move ping_group_range out of CONFIG_SYSCTL Similarly, when CONFIG_SYSCTL is not set, ping_group_range should still work, just that no one can change it. Therefore we should move it out of sysctl_net_ipv4.c. And, it should not share the same seqlock with ip_local_port_range. BTW, rename it to ->ping_group_range instead. Cc: David S. Miller Cc: Francois Romieu Reported-by: Stefan de Konink Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 7 ++++++- net/ipv4/af_inet.c | 8 ++++++++ net/ipv4/ping.c | 6 +++--- net/ipv4/sysctl_net_ipv4.c | 13 +++---------- 4 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3d95cd475316..b2704fd0ec80 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -20,6 +20,11 @@ struct local_ports { int range[2]; }; +struct ping_group_range { + seqlock_t lock; + kgid_t range[2]; +}; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -72,7 +77,7 @@ struct netns_ipv4 { int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; - kgid_t sysctl_ping_group_range[2]; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cccc8e487c7e..6d6dd345bc4d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1658,6 +1658,14 @@ static __net_init int inet_init_net(struct net *net) seqlock_init(&net->ipv4.ip_local_ports.lock); net->ipv4.ip_local_ports.range[0] = 32768; net->ipv4.ip_local_ports.range[1] = 61000; + + seqlock_init(&net->ipv4.ping_group_range.lock); + /* + * Sane defaults - nobody may create ping sockets. + * Boot scripts should set this to distro-specific group. + */ + net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); return 0; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 347bdde9a585..044a0ddf6a79 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -236,15 +236,15 @@ exit: static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, kgid_t *high) { - kgid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.ping_group_range.range; unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a116c41b05dd..5cde8f263d40 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -87,7 +87,7 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); @@ -102,7 +102,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; @@ -805,7 +805,7 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ping_group_range", - .data = &init_net.ipv4.sysctl_ping_group_range, + .data = &init_net.ipv4.ping_group_range.range, .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, @@ -858,13 +858,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[i].data += (void *)net - (void *)&init_net; } - /* - * Sane defaults - nobody may create ping sockets. - * Boot scripts should set this to distro-specific group. - */ - net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); - net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; -- cgit v1.2.3