From b383a73f2b832491a2f9e6e8ada26aad53b5763d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 28 May 2020 08:00:02 -0700 Subject: fs/ext4: Introduce DAX inode flag Add a flag ([EXT4|FS]_DAX_FL) to preserve FS_XFLAG_DAX in the ext4 inode. Set the flag to be user visible and changeable. Set the flag to be inherited. Allow applications to change the flag at any time except if it conflicts with the set of mutually exclusive flags (Currently VERITY, ENCRYPT, JOURNAL_DATA). Furthermore, restrict setting any of the exclusive flags if DAX is set. While conceptually possible, we do not allow setting EXT4_DAX_FL while at the same time clearing exclusion flags (or vice versa) for 2 reasons: 1) The DAX flag does not take effect immediately which introduces quite a bit of complexity 2) There is no clear use case for being this flexible Finally, on regular files, flag the inode to not be cached to facilitate changing S_DAX on the next creation of the inode. Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20200528150003.828793-9-ira.weiny@intel.com Signed-off-by: Theodore Ts'o --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 379a612f8f1d..f44eb0a04afd 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -262,6 +262,7 @@ struct fsxattr { #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_DAX_FL 0x02000000 /* Inode is DAX */ #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ -- cgit v1.2.3 From 94579ac3f6d0820adc83b5dc5358ead0158101e9 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Mon, 1 Jun 2020 16:39:37 -0500 Subject: xfrm: Fix double ESP trailer insertion in IPsec crypto offload. During IPsec performance testing, we see bad ICMP checksum. The error packet has duplicated ESP trailer due to double validate_xmit_xfrm calls. The first call is from ip_output, but the packet cannot be sent because netif_xmit_frozen_or_stopped is true and the packet gets dev_requeue_skb. The second call is from NET_TX softirq. However after the first call, the packet already has the ESP trailer. Fix by marking the skb with XFRM_XMIT bit after the packet is handled by validate_xmit_xfrm to avoid duplicate ESP trailer insertion. Fixes: f6e27114a60a ("net: Add a xfrm validate function to validate_xmit_skb") Signed-off-by: Huy Nguyen Reviewed-by: Boris Pismenny Reviewed-by: Raed Salem Reviewed-by: Saeed Mahameed Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/xfrm/xfrm_device.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 094fe682f5d7..c7d213c9f9d8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1008,6 +1008,7 @@ struct xfrm_offload { #define XFRM_GRO 32 #define XFRM_ESP_NO_TRAILER 64 #define XFRM_DEV_RESUME 128 +#define XFRM_XMIT 256 __u32 status; #define CRYPTO_SUCCESS 1 diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index f50d1f97cf8e..626096bd0d29 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -108,7 +108,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (!xo) + if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) @@ -129,6 +129,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } + xo->flags |= XFRM_XMIT; + if (skb_is_gso(skb)) { struct net_device *dev = skb->dev; -- cgit v1.2.3 From 7bb64402a092136fb2fda995b0e0304b43c163c5 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Thu, 11 Jun 2020 20:56:52 +0800 Subject: spi: tools: Add macro definitions to fix build errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SPI_TX_OCTAL and SPI_RX_OCTAL to fix the following build errors: CC spidev_test.o spidev_test.c: In function ‘transfer’: spidev_test.c:131:13: error: ‘SPI_TX_OCTAL’ undeclared (first use in this function) if (mode & SPI_TX_OCTAL) ^ spidev_test.c:131:13: note: each undeclared identifier is reported only once for each function it appears in spidev_test.c:137:13: error: ‘SPI_RX_OCTAL’ undeclared (first use in this function) if (mode & SPI_RX_OCTAL) ^ spidev_test.c: In function ‘parse_opts’: spidev_test.c:290:12: error: ‘SPI_TX_OCTAL’ undeclared (first use in this function) mode |= SPI_TX_OCTAL; ^ spidev_test.c:308:12: error: ‘SPI_RX_OCTAL’ undeclared (first use in this function) mode |= SPI_RX_OCTAL; ^ LD spidev_test-in.o ld: cannot find spidev_test.o: No such file or directory Additionally, maybe SPI_CS_WORD and SPI_3WIRE_HIZ will be used in the future, so add them too. Fixes: 896fa735084e ("spi: spidev_test: Add support for Octal mode data transfers") Signed-off-by: Qing Zhang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1591880212-13479-2-git-send-email-zhangqing@loongson.cn Signed-off-by: Mark Brown --- include/uapi/linux/spi/spidev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index ee0f2460bff6..9390615d52f0 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -48,6 +48,10 @@ #define SPI_TX_QUAD 0x200 #define SPI_RX_DUAL 0x400 #define SPI_RX_QUAD 0x800 +#define SPI_CS_WORD 0x1000 +#define SPI_TX_OCTAL 0x2000 +#define SPI_RX_OCTAL 0x4000 +#define SPI_3WIRE_HIZ 0x8000 /*---------------------------------------------------------------------------*/ -- cgit v1.2.3 From 6fbea6b6a838f9aa941fe53a3637fd8d8aab1eba Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 12 Jun 2020 15:37:48 +0800 Subject: ASoC: soc-card: export snd_soc_lookup_component_nolocked snd_soc_lookup_component_nolocked can be used for the DPCM case that Front-End needs to get the unused platform component but added by Back-End cpu dai driver. If the component is gotten, then we can get the dma chan created by Back-End component and reused it in Front-End. Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/55f6e0d76f67a517b9a44136d790ff2a06b5caa8.1591947428.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 74868436ac79..565612a8d690 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -444,6 +444,8 @@ int devm_snd_soc_register_component(struct device *dev, const struct snd_soc_component_driver *component_driver, struct snd_soc_dai_driver *dai_drv, int num_dai); void snd_soc_unregister_component(struct device *dev); +struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev, + const char *driver_name); struct snd_soc_component *snd_soc_lookup_component(struct device *dev, const char *driver_name); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 7b387202c5db..0f30f5aabaa8 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -310,7 +310,7 @@ struct snd_soc_component *snd_soc_rtdcom_lookup(struct snd_soc_pcm_runtime *rtd, } EXPORT_SYMBOL_GPL(snd_soc_rtdcom_lookup); -static struct snd_soc_component +struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev, const char *driver_name) { struct snd_soc_component *component; @@ -329,6 +329,7 @@ static struct snd_soc_component return found_component; } +EXPORT_SYMBOL_GPL(snd_soc_lookup_component_nolocked); struct snd_soc_component *snd_soc_lookup_component(struct device *dev, const char *driver_name) -- cgit v1.2.3 From a9a21e1eafc94b79502cab8272b392f7f63ef7bb Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Fri, 12 Jun 2020 15:37:49 +0800 Subject: ASoC: dmaengine_pcm: export soc_component_to_pcm In DPCM case, Front-End needs to get the dma chan which has been requested by Back-End and reuse it. Signed-off-by: Shengjiu Wang Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/429c6ae1f3c5b47eb893f475d531d71cdcfe34c0.1591947428.git.shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/sound/dmaengine_pcm.h | 11 +++++++++++ sound/soc/soc-generic-dmaengine-pcm.c | 12 ------------ 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index b65220685920..8c5e38180fb0 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -161,4 +161,15 @@ int snd_dmaengine_pcm_prepare_slave_config(struct snd_pcm_substream *substream, #define SND_DMAENGINE_PCM_DRV_NAME "snd_dmaengine_pcm" +struct dmaengine_pcm { + struct dma_chan *chan[SNDRV_PCM_STREAM_LAST + 1]; + const struct snd_dmaengine_pcm_config *config; + struct snd_soc_component component; + unsigned int flags; +}; + +static inline struct dmaengine_pcm *soc_component_to_pcm(struct snd_soc_component *p) +{ + return container_of(p, struct dmaengine_pcm, component); +} #endif diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index f728309a0833..80a4e71f2d95 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -21,18 +21,6 @@ */ #define SND_DMAENGINE_PCM_FLAG_NO_RESIDUE BIT(31) -struct dmaengine_pcm { - struct dma_chan *chan[SNDRV_PCM_STREAM_LAST + 1]; - const struct snd_dmaengine_pcm_config *config; - struct snd_soc_component component; - unsigned int flags; -}; - -static struct dmaengine_pcm *soc_component_to_pcm(struct snd_soc_component *p) -{ - return container_of(p, struct dmaengine_pcm, component); -} - static struct device *dmaengine_dma_dev(struct dmaengine_pcm *pcm, struct snd_pcm_substream *substream) { -- cgit v1.2.3 From 88ee9d571b6d8ed345f877e05f685814412e359b Mon Sep 17 00:00:00 2001 From: "Jan (janneke) Nieuwenhuizen" Date: Mon, 25 May 2020 21:39:40 +0200 Subject: ext4: support xattr gnu.* namespace for the Hurd The Hurd gained[0] support for moving the translator and author fields out of the inode and into the "gnu.*" xattr namespace. In anticipation of that, an xattr INDEX was reserved[1]. The Hurd has now been brought into compliance[2] with that. This patch adds support for reading and writing such attributes from Linux; you can now do something like mkdir -p hurd-root/servers/socket touch hurd-root/servers/socket/1 setfattr --name=gnu.translator --value='"/hurd/pflocal\0"' \ hurd-root/servers/socket/1 getfattr --name=gnu.translator hurd-root/servers/socket/1 # file: 1 gnu.translator="/hurd/pflocal" to setup a pipe translator, which is being used to create[3] a vm-image for the Hurd from GNU Guix. [0] https://summerofcode.withgoogle.com/projects/#5869799859027968 [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3980bd3b406addb327d858aebd19e229ea340b9a [2] https://git.savannah.gnu.org/cgit/hurd/hurd.git/commit/?id=a04c7bf83172faa7cb080fbe3b6c04a8415ca645 [3] https://git.savannah.gnu.org/cgit/guix.git/log/?h=wip-hurd-vm Signed-off-by: Jan Nieuwenhuizen Link: https://lore.kernel.org/r/20200525193940.878-1-janneke@gnu.org Signed-off-by: Theodore Ts'o --- fs/ext4/Makefile | 3 ++- fs/ext4/xattr.c | 2 ++ fs/ext4/xattr.h | 1 + fs/ext4/xattr_hurd.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/xattr.h | 4 ++++ 5 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 fs/ext4/xattr_hurd.c (limited to 'include') diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 4ccb3c9189d8..2e42f47a7f98 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -9,7 +9,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \ indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \ mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \ - super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o + super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \ + xattr_user.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 9b29a40738ac..7d2f6576d954 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -93,6 +93,7 @@ static const struct xattr_handler * const ext4_xattr_handler_map[] = { #ifdef CONFIG_EXT4_FS_SECURITY [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, #endif + [EXT4_XATTR_INDEX_HURD] = &ext4_xattr_hurd_handler, }; const struct xattr_handler *ext4_xattr_handlers[] = { @@ -105,6 +106,7 @@ const struct xattr_handler *ext4_xattr_handlers[] = { #ifdef CONFIG_EXT4_FS_SECURITY &ext4_xattr_security_handler, #endif + &ext4_xattr_hurd_handler, NULL }; diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index ffe21ac77f78..730b91fa0dd7 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -124,6 +124,7 @@ struct ext4_xattr_inode_array { extern const struct xattr_handler ext4_xattr_user_handler; extern const struct xattr_handler ext4_xattr_trusted_handler; extern const struct xattr_handler ext4_xattr_security_handler; +extern const struct xattr_handler ext4_xattr_hurd_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" diff --git a/fs/ext4/xattr_hurd.c b/fs/ext4/xattr_hurd.c new file mode 100644 index 000000000000..8cfa74a56361 --- /dev/null +++ b/fs/ext4/xattr_hurd.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/fs/ext4/xattr_hurd.c + * Handler for extended gnu attributes for the Hurd. + * + * Copyright (C) 2001 by Andreas Gruenbacher, + * Copyright (C) 2020 by Jan (janneke) Nieuwenhuizen, + */ + +#include +#include +#include "ext4.h" +#include "xattr.h" + +static bool +ext4_xattr_hurd_list(struct dentry *dentry) +{ + return test_opt(dentry->d_sb, XATTR_USER); +} + +static int +ext4_xattr_hurd_get(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_HURD, + name, buffer, size); +} + +static int +ext4_xattr_hurd_set(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_HURD, + name, value, size, flags); +} + +const struct xattr_handler ext4_xattr_hurd_handler = { + .prefix = XATTR_HURD_PREFIX, + .list = ext4_xattr_hurd_list, + .get = ext4_xattr_hurd_get, + .set = ext4_xattr_hurd_set, +}; diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index c1395b5bd432..9463db2dfa9d 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -7,6 +7,7 @@ Copyright (C) 2001 by Andreas Gruenbacher Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. Copyright (c) 2004 Red Hat, Inc., James Morris + Copyright (c) 2020 Jan (janneke) Nieuwenhuizen */ #include @@ -31,6 +32,9 @@ #define XATTR_BTRFS_PREFIX "btrfs." #define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1) +#define XATTR_HURD_PREFIX "gnu." +#define XATTR_HURD_PREFIX_LEN (sizeof(XATTR_HURD_PREFIX) - 1) + #define XATTR_SECURITY_PREFIX "security." #define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) -- cgit v1.2.3 From 7b97d868b7ab2448859668de9222b8af43f76e78 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Tue, 9 Jun 2020 15:35:40 +0800 Subject: ext4, jbd2: ensure panic by fix a race between jbd2 abort and ext4 error handlers In the ext4 filesystem with errors=panic, if one process is recording errno in the superblock when invoking jbd2_journal_abort() due to some error cases, it could be raced by another __ext4_abort() which is setting the SB_RDONLY flag but missing panic because errno has not been recorded. jbd2_journal_commit_transaction() jbd2_journal_abort() journal->j_flags |= JBD2_ABORT; jbd2_journal_update_sb_errno() | ext4_journal_check_start() | __ext4_abort() | sb->s_flags |= SB_RDONLY; | if (!JBD2_REC_ERR) | return; journal->j_flags |= JBD2_REC_ERR; Finally, it will no longer trigger panic because the filesystem has already been set read-only. Fix this by introduce j_abort_mutex to make sure journal abort is completed before panic, and remove JBD2_REC_ERR flag. Fixes: 4327ba52afd03 ("ext4, jbd2: ensure entering into panic after recording an error in superblock") Signed-off-by: zhangyi (F) Reviewed-by: Jan Kara Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200609073540.3810702-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 16 +++++----------- fs/jbd2/journal.c | 17 ++++++++++++----- include/linux/jbd2.h | 6 +++++- 3 files changed, 22 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 127b8efa8d54..2660a7e47eef 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -522,9 +522,6 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= SB_RDONLY; } else if (test_opt(sb, ERRORS_PANIC)) { - if (EXT4_SB(sb)->s_journal && - !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) - return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); } @@ -725,23 +722,20 @@ void __ext4_abort(struct super_block *sb, const char *function, va_end(args); if (sb_rdonly(sb) == 0) { - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; + if (EXT4_SB(sb)->s_journal) + jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); + + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* * Make sure updated value of ->s_mount_flags will be visible * before ->s_flags update */ smp_wmb(); sb->s_flags |= SB_RDONLY; - if (EXT4_SB(sb)->s_journal) - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } - if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { - if (EXT4_SB(sb)->s_journal && - !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) - return; + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) panic("EXT4-fs panic from previous error\n"); - } } void __ext4_msg(struct super_block *sb, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a49d0e670ddf..e4944436e733 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1140,6 +1140,7 @@ static journal_t *journal_init_common(struct block_device *bdev, init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); init_waitqueue_head(&journal->j_wait_reserved); + mutex_init(&journal->j_abort_mutex); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); @@ -1402,7 +1403,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) printk(KERN_ERR "JBD2: Error %d detected when updating " "journal superblock for %s.\n", ret, journal->j_devname); - jbd2_journal_abort(journal, ret); + if (!is_journal_aborted(journal)) + jbd2_journal_abort(journal, ret); } return ret; @@ -2153,6 +2155,13 @@ void jbd2_journal_abort(journal_t *journal, int errno) { transaction_t *transaction; + /* + * Lock the aborting procedure until everything is done, this avoid + * races between filesystem's error handling flow (e.g. ext4_abort()), + * ensure panic after the error info is written into journal's + * superblock. + */ + mutex_lock(&journal->j_abort_mutex); /* * ESHUTDOWN always takes precedence because a file system check * caused by any other journal abort error is not required after @@ -2167,6 +2176,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) journal->j_errno = errno; jbd2_journal_update_sb_errno(journal); } + mutex_unlock(&journal->j_abort_mutex); return; } @@ -2188,10 +2198,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) * layer could realise that a filesystem check is needed. */ jbd2_journal_update_sb_errno(journal); - - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); + mutex_unlock(&journal->j_abort_mutex); } /** diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f613d8529863..d56128df2aff 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -765,6 +765,11 @@ struct journal_s */ int j_errno; + /** + * @j_abort_mutex: Lock the whole aborting procedure. + */ + struct mutex j_abort_mutex; + /** * @j_sb_buffer: The first part of the superblock buffer. */ @@ -1247,7 +1252,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ -#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer -- cgit v1.2.3 From 376bd28d03c97b4d53f5797d5f9b3522c8bd4d3d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Jun 2020 18:09:41 +1000 Subject: crypto: ccp - Fix sparse warnings in sev-dev This patch fixes a bunch of sparse warnings in sev-dev where the __user marking is incorrectly handled. Reported-by: kbuild test robot Fixes: 7360e4b14350 ("crypto: ccp: Implement SEV_PEK_CERT_IMPORT...") Fixes: e799035609e1 ("crypto: ccp: Implement SEV_PEK_CSR ioctl...") Fixes: 76a2b524a4b1 ("crypto: ccp: Implement SEV_PDH_CERT_EXPORT...") Fixes: d6112ea0cb34 ("crypto: ccp - introduce SEV_GET_ID2 command") Signed-off-by: Herbert Xu Reviewed-by: Brijesh Singh Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 23 ++++++++++++++++------- include/linux/psp-sev.h | 2 +- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index a2426334be61..476113e12489 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -376,6 +376,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_csr input; struct sev_data_pek_csr *data; + void __user *input_address; void *blob = NULL; int ret; @@ -394,6 +395,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) goto cmd; /* allocate a physically contiguous buffer to store the CSR blob */ + input_address = (void __user *)input.address; if (input.length > SEV_FW_BLOB_MAX_SIZE) { ret = -EFAULT; goto e_free; @@ -426,7 +428,7 @@ cmd: } if (blob) { - if (copy_to_user((void __user *)input.address, blob, input.length)) + if (copy_to_user(input_address, blob, input.length)) ret = -EFAULT; } @@ -437,7 +439,7 @@ e_free: return ret; } -void *psp_copy_user_blob(u64 __user uaddr, u32 len) +void *psp_copy_user_blob(u64 uaddr, u32 len) { if (!uaddr || !len) return ERR_PTR(-EINVAL); @@ -446,7 +448,7 @@ void *psp_copy_user_blob(u64 __user uaddr, u32 len) if (len > SEV_FW_BLOB_MAX_SIZE) return ERR_PTR(-EINVAL); - return memdup_user((void __user *)(uintptr_t)uaddr, len); + return memdup_user((void __user *)uaddr, len); } EXPORT_SYMBOL_GPL(psp_copy_user_blob); @@ -621,6 +623,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) { struct sev_user_data_get_id2 input; struct sev_data_get_id *data; + void __user *input_address; void *id_blob = NULL; int ret; @@ -631,6 +634,8 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) return -EFAULT; + input_address = (void __user *)input.address; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -660,8 +665,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) } if (id_blob) { - if (copy_to_user((void __user *)input.address, - id_blob, data->len)) { + if (copy_to_user(input_address, id_blob, data->len)) { ret = -EFAULT; goto e_free; } @@ -720,6 +724,8 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) struct sev_user_data_pdh_cert_export input; void *pdh_blob = NULL, *cert_blob = NULL; struct sev_data_pdh_cert_export *data; + void __user *input_cert_chain_address; + void __user *input_pdh_cert_address; int ret; /* If platform is not in INIT state then transition it to INIT. */ @@ -745,6 +751,9 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) !input.cert_chain_address) goto cmd; + input_pdh_cert_address = (void __user *)input.pdh_cert_address; + input_cert_chain_address = (void __user *)input.cert_chain_address; + /* Allocate a physically contiguous buffer to store the PDH blob. */ if (input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) { ret = -EFAULT; @@ -788,7 +797,7 @@ cmd: } if (pdh_blob) { - if (copy_to_user((void __user *)input.pdh_cert_address, + if (copy_to_user(input_pdh_cert_address, pdh_blob, input.pdh_cert_len)) { ret = -EFAULT; goto e_free_cert; @@ -796,7 +805,7 @@ cmd: } if (cert_blob) { - if (copy_to_user((void __user *)input.cert_chain_address, + if (copy_to_user(input_cert_chain_address, cert_blob, input.cert_chain_len)) ret = -EFAULT; } diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 7fbc8679145c..49d155cd2dfe 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -597,7 +597,7 @@ int sev_guest_df_flush(int *error); */ int sev_guest_decommission(struct sev_data_decommission *data, int *error); -void *psp_copy_user_blob(u64 __user uaddr, u32 len); +void *psp_copy_user_blob(u64 uaddr, u32 len); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ -- cgit v1.2.3 From 8e742aa79780b13cd300a42198c1a4cea9c89905 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Jun 2020 13:48:51 +0200 Subject: syscalls: Fix offset type of ksys_ftruncate() After the commit below, truncate() on x86 32bit uses ksys_ftruncate(). But ksys_ftruncate() truncates the offset to unsigned long. Switch the type of offset to loff_t which is what do_sys_ftruncate() expects. Fixes: 121b32a58a3a (x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments) Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Reviewed-by: Brian Gerst Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200610114851.28549-1-jslaby@suse.cz --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7c354c2955f5..b951a87da987 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1360,7 +1360,7 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); -static inline long ksys_ftruncate(unsigned int fd, unsigned long length) +static inline long ksys_ftruncate(unsigned int fd, loff_t length) { return do_sys_ftruncate(fd, length, 1); } -- cgit v1.2.3 From e79302ae8c8cceb51cf642d5ace9da02668cb7b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2020 17:04:03 +0200 Subject: kcsan: Remove __no_kcsan_or_inline There are no more user of this function attribute, also, with us now actively supporting '__no_kcsan inline' it doesn't make sense to have in any case. Signed-off-by: Peter Zijlstra (Intel) --- Documentation/dev-tools/kcsan.rst | 6 ------ include/linux/compiler_types.h | 5 +---- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst index ce4bbd918648..b38379f06194 100644 --- a/Documentation/dev-tools/kcsan.rst +++ b/Documentation/dev-tools/kcsan.rst @@ -114,12 +114,6 @@ the below options are available: To dynamically limit for which functions to generate reports, see the `DebugFS interface`_ blacklist/whitelist feature. - For ``__always_inline`` functions, replace ``__always_inline`` with - ``__no_kcsan_or_inline`` (which implies ``__always_inline``):: - - static __no_kcsan_or_inline void foo(void) { - ... - * To disable data race detection for a particular compilation unit, add to the ``Makefile``:: diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 21aed0981edf..938249809511 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -193,10 +193,7 @@ struct ftrace_likely_data { #define __no_kcsan __no_sanitize_thread #ifdef __SANITIZE_THREAD__ -# define __no_kcsan_or_inline __no_kcsan notrace __maybe_unused -# define __no_sanitize_or_inline __no_kcsan_or_inline -#else -# define __no_kcsan_or_inline __always_inline +# define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused #endif #ifndef __no_sanitize_or_inline -- cgit v1.2.3 From 5ddbc4082e1072eeeae52ff561a88620a05be08f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2020 18:47:11 +0200 Subject: x86, kcsan: Add __no_kcsan to noinstr The 'noinstr' function attribute means no-instrumentation, this should very much include *SAN. Because lots of that is broken at present, only include KCSAN for now, as that is limited to clang11, which has sane function attribute behaviour. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/compiler_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 938249809511..a8b4266084a1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,10 +118,6 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif -/* Section for code which can't be instrumented at all */ -#define noinstr \ - noinline notrace __attribute((__section__(".noinstr.text"))) - /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without @@ -200,6 +196,10 @@ struct ftrace_likely_data { #define __no_sanitize_or_inline __always_inline #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) __no_kcsan + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 5144f8a8dfd7b3681f0a2b5bf599a210b2315018 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 4 Jun 2020 07:58:11 +0200 Subject: compiler_types.h: Add __no_sanitize_{address,undefined} to noinstr Adds the portable definitions for __no_sanitize_address, and __no_sanitize_undefined, and subsequently changes noinstr to use the attributes to disable instrumentation via KASAN or UBSAN. Reported-by: syzbot+dc1fa714cb070b184db5@syzkaller.appspotmail.com Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Miguel Ojeda Link: https://lore.kernel.org/lkml/000000000000d2474c05a6c938fe@google.com/ --- include/linux/compiler-clang.h | 8 ++++++++ include/linux/compiler-gcc.h | 6 ++++++ include/linux/compiler_types.h | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ee37256ec8bd..5e55302e3bf6 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -33,6 +33,14 @@ #define __no_sanitize_thread #endif +#if __has_feature(undefined_behavior_sanitizer) +/* GCC does not have __SANITIZE_UNDEFINED__ */ +#define __no_sanitize_undefined \ + __attribute__((no_sanitize("undefined"))) +#else +#define __no_sanitize_undefined +#endif + /* * Not all versions of clang implement the the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7dd4e0349ef3..1c74464c80c6 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -150,6 +150,12 @@ #define __no_sanitize_thread #endif +#if __has_attribute(__no_sanitize_undefined__) +#define __no_sanitize_undefined __attribute__((no_sanitize_undefined)) +#else +#define __no_sanitize_undefined +#endif + #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a8b4266084a1..85b8d2370c24 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -198,7 +198,8 @@ struct ftrace_likely_data { /* Section for code which can't be instrumented at all */ #define noinstr \ - noinline notrace __attribute((__section__(".noinstr.text"))) __no_kcsan + noinline notrace __attribute((__section__(".noinstr.text"))) \ + __no_kcsan __no_sanitize_address __no_sanitize_undefined #endif /* __KERNEL__ */ -- cgit v1.2.3 From 6b643a07a7e41f9e11cfbb9bba4c5c9791ac2997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 20:09:06 +0200 Subject: x86/entry, ubsan, objtool: Whitelist __ubsan_handle_*() The UBSAN instrumentation only inserts external CALLs when things go 'BAD', much like WARN(). So treat them similar to WARN()s for noinstr, that is: allow them, at the risk of taking the machine down, to get their message out. Suggested-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver --- include/linux/compiler_types.h | 2 +- tools/objtool/check.c | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 85b8d2370c24..14513e88b7e0 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -199,7 +199,7 @@ struct ftrace_likely_data { /* Section for code which can't be instrumented at all */ #define noinstr \ noinline notrace __attribute((__section__(".noinstr.text"))) \ - __no_kcsan __no_sanitize_address __no_sanitize_undefined + __no_kcsan __no_sanitize_address #endif /* __KERNEL__ */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5fbb90a80d23..3e214f879ada 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2190,10 +2190,36 @@ static inline const char *call_dest_name(struct instruction *insn) return "{dynamic}"; } +static inline bool noinstr_call_dest(struct symbol *func) +{ + /* + * We can't deal with indirect function calls at present; + * assume they're instrumented. + */ + if (!func) + return false; + + /* + * If the symbol is from a noinstr section; we good. + */ + if (func->sec->noinstr) + return true; + + /* + * The __ubsan_handle_*() calls are like WARN(), they only happen when + * something 'BAD' happened. At the risk of taking the machine down, + * let them proceed to get the message out. + */ + if (!strncmp(func->name, "__ubsan_handle_", 15)) + return true; + + return false; +} + static int validate_call(struct instruction *insn, struct insn_state *state) { if (state->noinstr && state->instr <= 0 && - (!insn->call_dest || !insn->call_dest->sec->noinstr)) { + !noinstr_call_dest(insn->call_dest)) { WARN_FUNC("call to %s() leaves .noinstr.text section", insn->sec, insn->offset, call_dest_name(insn)); return 1; -- cgit v1.2.3 From 7dfc06a0f25b593a9f51992f540c0f80a57f3629 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Mon, 15 Jun 2020 09:16:36 +0200 Subject: efi/tpm: Verify event log header before parsing It is possible that the first event in the event log is not actually a log header at all, but rather a normal event. This leads to the cast in __calc_tpm2_event_size being an invalid conversion, which means that the values read are effectively garbage. Depending on the first event's contents, this leads either to apparently normal behaviour, a crash or a freeze. While this behaviour of the firmware is not in accordance with the TCG Client EFI Specification, this happens on a Dell Precision 5510 with the TPM enabled but hidden from the OS ("TPM On" disabled, state otherwise untouched). The EFI firmware claims that the TPM is present and active and that it supports the TCG 2.0 event log format. Fortunately, this can be worked around by simply checking the header of the first event and the event log header signature itself. Commit b4f1874c6216 ("tpm: check event log version before reading final events") addressed a similar issue also found on Dell models. Fixes: 6b0326190205 ("efi: Attempt to get the TCG2 event log in the boot stub") Signed-off-by: Fabian Vogt Link: https://lore.kernel.org/r/1927248.evlx2EsYKh@linux-e202.suse.de Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1165773 Signed-off-by: Ard Biesheuvel --- include/linux/tpm_eventlog.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 4f8c90c93c29..64356b199e94 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -81,6 +81,8 @@ struct tcg_efi_specid_event_algs { u16 digest_size; } __packed; +#define TCG_SPECID_SIG "Spec ID Event03" + struct tcg_efi_specid_event_head { u8 signature[16]; u32 platform_class; @@ -171,6 +173,7 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, int i; int j; u32 count, event_type; + const u8 zero_digest[sizeof(event_header->digest)] = {0}; marker = event; marker_start = marker; @@ -198,10 +201,19 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, count = READ_ONCE(event->count); event_type = READ_ONCE(event->event_type); + /* Verify that it's the log header */ + if (event_header->pcr_idx != 0 || + event_header->event_type != NO_ACTION || + memcmp(event_header->digest, zero_digest, sizeof(zero_digest))) { + size = 0; + goto out; + } + efispecid = (struct tcg_efi_specid_event_head *)event_header->event; /* Check if event is malformed. */ - if (count > efispecid->num_algs) { + if (memcmp(efispecid->signature, TCG_SPECID_SIG, + sizeof(TCG_SPECID_SIG)) || count > efispecid->num_algs) { size = 0; goto out; } -- cgit v1.2.3 From 2963795122f50b36ed16e3ba880c3ed2de1bda6e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 27 May 2020 12:14:25 -0500 Subject: efi: Replace zero-length array and use struct_size() helper The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. Lastly, make use of the sizeof_field() helper instead of an open-coded version. This issue was found with the help of Coccinelle and audited _manually_. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20200527171425.GA4053@embeddedor Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 3 ++- include/linux/efi.h | 7 ++----- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 7f1657b6c30d..edc5d36caf54 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -622,7 +622,8 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, rsv = (void *)(p + prsv % PAGE_SIZE); /* reserve the entry itself */ - memblock_reserve(prsv, EFI_MEMRESERVE_SIZE(rsv->size)); + memblock_reserve(prsv, + struct_size(rsv, entry, rsv->size)); for (i = 0; i < atomic_read(&rsv->count); i++) { memblock_reserve(rsv->entry[i].base, diff --git a/include/linux/efi.h b/include/linux/efi.h index 2c6495f72f79..c3449c9699d0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1236,14 +1236,11 @@ struct linux_efi_memreserve { struct { phys_addr_t base; phys_addr_t size; - } entry[0]; + } entry[]; }; -#define EFI_MEMRESERVE_SIZE(count) (sizeof(struct linux_efi_memreserve) + \ - (count) * sizeof(((struct linux_efi_memreserve *)0)->entry[0])) - #define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ - / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) + / sizeof_field(struct linux_efi_memreserve, entry[0])) void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); -- cgit v1.2.3 From 0fae253af563cf5d1f5dc651d520c3eafd74f183 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 12 Jun 2020 15:59:37 -0500 Subject: ASoC: soc-devres: add devm_snd_soc_register_dai() The registration of DAIs may be done at two distinct times, once during a component registration and later when loading a topology. Since devm_ managed resources are freed in the reverse order they were allocated, when a component starts unregistering DAIs by walking through the DAI list, the memory allocated for the topology-registered DAIs was freed already, which leads to 100% reproducible KASAN use-after-free reports. This patch suggests a new devm_ function to force the DAI list to be updated prior to freeing the memory chunks referenced by the list pointers. Suggested-by: Bard Liao Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen BugLink: https://github.com/thesofproject/linux/issues/2186 Link: https://lore.kernel.org/r/20200612205938.26415-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc.h | 4 ++++ sound/soc/soc-devres.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 565612a8d690..fddab504c227 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1363,6 +1363,10 @@ void snd_soc_remove_pcm_runtime(struct snd_soc_card *card, struct snd_soc_dai *snd_soc_register_dai(struct snd_soc_component *component, struct snd_soc_dai_driver *dai_drv, bool legacy_dai_naming); +struct snd_soc_dai *devm_snd_soc_register_dai(struct device *dev, + struct snd_soc_component *component, + struct snd_soc_dai_driver *dai_drv, + bool legacy_dai_naming); void snd_soc_unregister_dai(struct snd_soc_dai *dai); struct snd_soc_dai *snd_soc_find_dai( diff --git a/sound/soc/soc-devres.c b/sound/soc/soc-devres.c index a9ea172a66a7..11e5d7962370 100644 --- a/sound/soc/soc-devres.c +++ b/sound/soc/soc-devres.c @@ -9,6 +9,43 @@ #include #include +static void devm_dai_release(struct device *dev, void *res) +{ + snd_soc_unregister_dai(*(struct snd_soc_dai **)res); +} + +/** + * devm_snd_soc_register_dai - resource-managed dai registration + * @dev: Device used to manage component + * @component: The component the DAIs are registered for + * @dai_drv: DAI driver to use for the DAI + * @legacy_dai_naming: if %true, use legacy single-name format; + * if %false, use multiple-name format; + */ +struct snd_soc_dai *devm_snd_soc_register_dai(struct device *dev, + struct snd_soc_component *component, + struct snd_soc_dai_driver *dai_drv, + bool legacy_dai_naming) +{ + struct snd_soc_dai **ptr; + struct snd_soc_dai *dai; + + ptr = devres_alloc(devm_dai_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + dai = snd_soc_register_dai(component, dai_drv, legacy_dai_naming); + if (dai) { + *ptr = dai; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return dai; +} +EXPORT_SYMBOL_GPL(devm_snd_soc_register_dai); + static void devm_component_release(struct device *dev, void *res) { snd_soc_unregister_component(*(struct device **)res); -- cgit v1.2.3 From 27784a256c2a453d891c0aaff84c3ac3f2e8a5a0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 13 Jun 2020 09:37:54 +0200 Subject: spi: uapi: spidev: Use TABs for alignment The UAPI uses TABs for alignment. Convert the recently introduced spaces to TABs to restore consistency. Fixes: 7bb64402a092136 ("spi: tools: Add macro definitions to fix build errors") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200613073755.15906-1-geert+renesas@glider.be Signed-off-by: Mark Brown --- include/uapi/linux/spi/spidev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index 9390615d52f0..d56427c0b3e0 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -48,10 +48,10 @@ #define SPI_TX_QUAD 0x200 #define SPI_RX_DUAL 0x400 #define SPI_RX_QUAD 0x800 -#define SPI_CS_WORD 0x1000 -#define SPI_TX_OCTAL 0x2000 -#define SPI_RX_OCTAL 0x4000 -#define SPI_3WIRE_HIZ 0x8000 +#define SPI_CS_WORD 0x1000 +#define SPI_TX_OCTAL 0x2000 +#define SPI_RX_OCTAL 0x4000 +#define SPI_3WIRE_HIZ 0x8000 /*---------------------------------------------------------------------------*/ -- cgit v1.2.3 From 5bcc066c05909d2af5c954911309f02312b0b02e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Jun 2020 12:22:53 -0700 Subject: trace/events/block.h: drop kernel-doc for dropped function parameter Fix kernel-doc warning: the parameter was removed, so also remove the kernel-doc notation for it. ../include/trace/events/block.h:278: warning: Excess function parameter 'error' description in 'trace_block_bio_complete' Fixes: d24de76af836 ("block: remove the error argument to the block_bio_complete tracepoint") Signed-off-by: Randy Dunlap Cc: Christoph Hellwig Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/trace/events/block.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 1257f26bb887..93b114226af8 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -254,7 +254,6 @@ TRACE_EVENT(block_bio_bounce, * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed - * @error: io error value * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. -- cgit v1.2.3 From b0659d8a950d424e57cc0a67afc4740ee561224e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Jun 2020 14:49:26 -0700 Subject: bpf: Fix definition of bpf_ringbuf_output() helper in UAPI comments Fix definition of bpf_ringbuf_output() in UAPI header comments, which is used to generate libbpf's bpf_helper_defs.h header. Return value is a number (error code), not a pointer. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200615214926.3638836-1-andriin@fb.com --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 19684813faae..974a71342aea 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3168,7 +3168,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 19684813faae..974a71342aea 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3168,7 +3168,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of -- cgit v1.2.3 From 762f926d6f19b9ab4dee0f98d55fc67e276cd094 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Sun, 14 Jun 2020 14:12:48 +0300 Subject: net/sched: act_ct: Make tcf_ct_flow_table_restore_skb inline Currently, tcf_ct_flow_table_restore_skb is exported by act_ct module, therefore modules using it will have hard-dependency on act_ct and will require loading it all the time. This can lead to an unnecessary overhead on systems that do not use hardware connection tracking action (ct_metadata action) in the first place. To relax the hard-dependency between the modules, we unexport this function and make it a static inline one. Fixes: 30b0cf90c6dd ("net/sched: act_ct: Support restoring conntrack info on skbs") Signed-off-by: Alaa Hleihel Reviewed-by: Roi Dayan Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/tc_act/tc_ct.h | 11 ++++++++++- net/sched/act_ct.c | 11 ----------- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 79654bcb9a29..8250d6f0a462 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -66,7 +66,16 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) #endif /* CONFIG_NF_CONNTRACK */ #if IS_ENABLED(CONFIG_NET_ACT_CT) -void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie); +static inline void +tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) +{ + enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK; + struct nf_conn *ct; + + ct = (struct nf_conn *)(cookie & NFCT_PTRMASK); + nf_conntrack_get(&ct->ct_general); + nf_ct_set(skb, ct, ctinfo); +} #else static inline void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) { } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e29f0f45d688..e9f3576cbf71 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1543,17 +1543,6 @@ static void __exit ct_cleanup_module(void) destroy_workqueue(act_ct_wq); } -void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) -{ - enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK; - struct nf_conn *ct; - - ct = (struct nf_conn *)(cookie & NFCT_PTRMASK); - nf_conntrack_get(&ct->ct_general); - nf_ct_set(skb, ct, ctinfo); -} -EXPORT_SYMBOL_GPL(tcf_ct_flow_table_restore_skb); - module_init(ct_init_module); module_exit(ct_cleanup_module); MODULE_AUTHOR("Paul Blakey "); -- cgit v1.2.3 From 505ee3a1cab96785fbc2c7cdb41ab677ec270c3c Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Sun, 14 Jun 2020 14:12:49 +0300 Subject: netfilter: flowtable: Make nf_flow_table_offload_add/del_cb inline Currently, nf_flow_table_offload_add/del_cb are exported by nf_flow_table module, therefore modules using them will have hard-dependency on nf_flow_table and will require loading it all the time. This can lead to an unnecessary overhead on systems that do not use this API. To relax the hard-dependency between the modules, we unexport these functions and make them static inline. Fixes: 978703f42549 ("netfilter: flowtable: Add API for registering to flow table events") Signed-off-by: Alaa Hleihel Reviewed-by: Roi Dayan Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/netfilter/nf_flow_table.h | 49 ++++++++++++++++++++++++++++++++--- net/netfilter/nf_flow_table_core.c | 45 -------------------------------- 2 files changed, 45 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d7338bfd7b0f..16e8b2f8d006 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -161,10 +161,51 @@ struct nf_flow_route { struct flow_offload *flow_offload_alloc(struct nf_conn *ct); void flow_offload_free(struct flow_offload *flow); -int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv); -void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv); +static inline int +nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + int err = 0; + + down_write(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + err = -EEXIST; + goto unlock; + } + + block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); + if (IS_ERR(block_cb)) { + err = PTR_ERR(block_cb); + goto unlock; + } + + list_add_tail(&block_cb->list, &block->cb_list); + +unlock: + up_write(&flow_table->flow_block_lock); + return err; +} + +static inline void +nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + + down_write(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + list_del(&block_cb->list); + flow_block_cb_free(block_cb); + } else { + WARN_ON(true); + } + up_write(&flow_table->flow_block_lock); +} int flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 6a3034f84ab6..afa85171df38 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -387,51 +387,6 @@ static void nf_flow_offload_work_gc(struct work_struct *work) queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } -int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv) -{ - struct flow_block *block = &flow_table->flow_block; - struct flow_block_cb *block_cb; - int err = 0; - - down_write(&flow_table->flow_block_lock); - block_cb = flow_block_cb_lookup(block, cb, cb_priv); - if (block_cb) { - err = -EEXIST; - goto unlock; - } - - block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); - if (IS_ERR(block_cb)) { - err = PTR_ERR(block_cb); - goto unlock; - } - - list_add_tail(&block_cb->list, &block->cb_list); - -unlock: - up_write(&flow_table->flow_block_lock); - return err; -} -EXPORT_SYMBOL_GPL(nf_flow_table_offload_add_cb); - -void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv) -{ - struct flow_block *block = &flow_table->flow_block; - struct flow_block_cb *block_cb; - - down_write(&flow_table->flow_block_lock); - block_cb = flow_block_cb_lookup(block, cb, cb_priv); - if (block_cb) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } else { - WARN_ON(true); - } - up_write(&flow_table->flow_block_lock); -} -EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb); static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) -- cgit v1.2.3 From f517f7925b7b453cb83be06c268ba057b78e4792 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:06 +0530 Subject: ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm module and add the command family NVDIMM_FAMILY_PAPR to the white list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the nvdimm command mask and implement necessary scaffolding in the module to handle ND_CMD_CALL ioctl and PDSM requests that we receive. The layout of the PDSM request as we expect from libnvdimm/libndctl is described in newly introduced uapi header 'papr_pdsm.h' which defines a 'struct nd_pkg_pdsm' and a maximal union named 'nd_pdsm_payload'. These new structs together with 'struct nd_cmd_pkg' for a pdsm envelop thats sent by libndctl to libnvdimm and serviced by papr_scm in 'papr_scm_service_pdsm()'. The PDSM request is communicated by member 'struct nd_cmd_pkg.nd_command' together with other information on the pdsm payload (size-in, size-out). The patch also introduces 'struct pdsm_cmd_desc' instances of which are stored in an array __pdsm_cmd_descriptors[] indexed with PDSM cmd and corresponding access function pdsm_cmd_desc() is introduced. 'struct pdsm_cdm_desc' holds the service function for a given PDSM and corresponding payload in/out sizes. A new function papr_scm_service_pdsm() is introduced and is called from papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL command from libnvdimm. The function performs validation on the PDSM payload based on info present in corresponding PDSM descriptor and if valid calls the 'struct pdcm_cmd_desc.service' function to service the PDSM. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-6-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 95 +++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 193 +++++++++++++++++++++++++++++- include/uapi/linux/ndctl.h | 1 + 3 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h (limited to 'include') diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h new file mode 100644 index 000000000000..28115152aa4e --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl + * + * (C) Copyright IBM 2020 + * + * Author: Vaibhav Jain + */ + +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ + +#include +#include + +/* + * PDSM Envelope: + * + * The ioctl ND_CMD_CALL exchange data between user-space and kernel via + * envelope which consists of 2 headers sections and payload sections as + * illustrated below: + * +-----------------+---------------+---------------------------+ + * | 64-Bytes | 8-Bytes | Max 184-Bytes | + * +-----------------+---------------+---------------------------+ + * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | + * +-----------------+---------------+---------------------------+ + * | nd_family | | | + * | nd_size_out | cmd_status | | + * | nd_size_in | reserved | nd_pdsm_payload | + * | nd_command | payload --> | | + * | nd_fw_size | | | + * | nd_payload ---> | | | + * +---------------+-----------------+---------------------------+ + * + * ND Header: + * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' + * which is interpreted by libnvdimm before passed on to papr_scm. Important + * member fields used are: + * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM + * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) + * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD + * 'nd_command' : (In) One of PAPR_PDSM_XXX + * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned + * + * PDSM Header: + * This is papr-scm specific header that precedes the payload. This is defined + * as nd_cmd_pdsm_pkg. Following fields aare available in this header: + * + * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. + * 'reserved' : Not used, reserved for future and should be set to 0. + * 'payload' : A union of all the possible payload structs + * + * PDSM Payload: + * + * The layout of the PDSM Payload is defined by various structs shared between + * papr_scm and libndctl so that contents of payload can be interpreted. As such + * its defined as a union of all possible payload structs as + * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' + * appropriate member of the union is accessed. + */ + +/* Max payload size that we can handle */ +#define ND_PDSM_PAYLOAD_MAX_SIZE 184 + +/* Max payload size that we can handle */ +#define ND_PDSM_HDR_SIZE \ + (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) + +/* + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel + * via 'nd_cmd_pkg.nd_command' member of the ioctl struct + */ +enum papr_pdsm { + PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_MAX, +}; + +/* Maximal union that can hold all possible payload types */ +union nd_pdsm_payload { + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; +} __packed; + +/* + * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm + * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' + * that should always precede this struct when sent to papr_scm via CMD_CALL + * interface. + */ +struct nd_pkg_pdsm { + __s32 cmd_status; /* Out: Sub-cmd status returned back */ + __u16 reserved[2]; /* Ignored and to be set as '0' */ + union nd_pdsm_payload payload; +} __packed; + +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 692ad3d79826..d3bbf9940ba4 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -15,13 +15,15 @@ #include #include +#include #define BIND_ANY_ADDR (~0ul) #define PAPR_SCM_DIMM_CMD_MASK \ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ (1ul << ND_CMD_GET_CONFIG_DATA) | \ - (1ul << ND_CMD_SET_CONFIG_DATA)) + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) /* DIMM health bitmap bitmap indicators */ /* SCM device is unable to persist memory contents */ @@ -349,17 +351,195 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } +/* + * Do a sanity checks on the inputs args to dimm-control function and return + * '0' if valid. Validation of PDSM payloads happens later in + * papr_scm_service_pdsm. + */ +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; + struct nd_cmd_pkg *nd_cmd; + struct papr_scm_priv *p; + enum papr_pdsm pdsm; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + /* get the provider data from struct nvdimm */ + p = nvdimm_provider_data(nvdimm); + + if (!test_bit(cmd, &cmd_mask)) { + dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); + return -EINVAL; + } + + /* For CMD_CALL verify pdsm request */ + if (cmd == ND_CMD_CALL) { + /* Verify the envelope and envelop size */ + if (!buf || + buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", + buf_len); + return -EINVAL; + } + + /* Verify that the nd_cmd_pkg.nd_family is correct */ + nd_cmd = (struct nd_cmd_pkg *)buf; + + if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { + dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", + nd_cmd->nd_family); + return -EINVAL; + } + + pdsm = (enum papr_pdsm)nd_cmd->nd_command; + + /* Verify if the pdsm command is valid */ + if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", + pdsm); + return -EINVAL; + } + + /* Have enough space to hold returned 'nd_pkg_pdsm' header */ + if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", + pdsm); + return -EINVAL; + } + } + + /* Let the command be further processed */ + return 0; +} + +/* + * 'struct pdsm_cmd_desc' + * Identifies supported PDSMs' expected length of in/out payloads + * and pdsm service function. + * + * size_in : Size of input payload if any in the PDSM request. + * size_out : Size of output payload if any in the PDSM request. + * service : Service function for the PDSM request. Return semantics: + * rc < 0 : Error servicing PDSM and rc indicates the error. + * rc >=0 : Serviced successfully and 'rc' indicate number of + * bytes written to payload. + */ +struct pdsm_cmd_desc { + u32 size_in; + u32 size_out; + int (*service)(struct papr_scm_priv *dimm, + union nd_pdsm_payload *payload); +}; + +/* Holds all supported PDSMs' command descriptors */ +static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { + [PAPR_PDSM_MIN] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, + /* New PDSM command descriptors to be added below */ + + /* Empty */ + [PAPR_PDSM_MAX] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, +}; + +/* Given a valid pdsm cmd return its command descriptor else return NULL */ +static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) +{ + if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) + return &__pdsm_cmd_descriptors[cmd]; + + return NULL; +} + +/* + * For a given pdsm request call an appropriate service function. + * Returns errors if any while handling the pdsm command package. + */ +static int papr_scm_service_pdsm(struct papr_scm_priv *p, + struct nd_cmd_pkg *pkg) +{ + /* Get the PDSM header and PDSM command */ + struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; + enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; + const struct pdsm_cmd_desc *pdsc; + int rc; + + /* Fetch corresponding pdsm descriptor for validation and servicing */ + pdsc = pdsm_cmd_desc(pdsm); + + /* Validate pdsm descriptor */ + /* Ensure that reserved fields are 0 */ + if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", + pdsm); + return -EINVAL; + } + + /* If pdsm expects some input, then ensure that the size_in matches */ + if (pdsc->size_in && + pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", + pdsm, pkg->nd_size_in); + return -EINVAL; + } + + /* If pdsm wants to return data, then ensure that size_out matches */ + if (pdsc->size_out && + pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", + pdsm, pkg->nd_size_out); + return -EINVAL; + } + + /* Service the pdsm */ + if (pdsc->service) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); + + rc = pdsc->service(p, &pdsm_pkg->payload); + + if (rc < 0) { + /* error encountered while servicing pdsm */ + pdsm_pkg->cmd_status = rc; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } else { + /* pdsm serviced and 'rc' bytes written to payload */ + pdsm_pkg->cmd_status = 0; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; + } + } else { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", + pdsm); + pdsm_pkg->cmd_status = -ENOENT; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } + + return pdsm_pkg->cmd_status; +} + static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; + struct nd_cmd_pkg *call_pkg = NULL; struct papr_scm_priv *p; int rc; - /* Only dimm-specific calls are supported atm */ - if (!nvdimm) - return -EINVAL; + rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); + if (rc) { + pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); + return rc; + } /* Use a local variable in case cmd_rc pointer is NULL */ if (!cmd_rc) @@ -385,6 +565,11 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, *cmd_rc = papr_scm_meta_set(p, buf); break; + case ND_CMD_CALL: + call_pkg = (struct nd_cmd_pkg *)buf; + *cmd_rc = papr_scm_service_pdsm(p, call_pkg); + break; + default: dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index de5d90212409..0e09dc5cec19 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -244,6 +244,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_HPE2 2 #define NVDIMM_FAMILY_MSFT 3 #define NVDIMM_FAMILY_HYPERV 4 +#define NVDIMM_FAMILY_PAPR 5 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) -- cgit v1.2.3 From 7bb7ee8704fea9fec9eea53322b8464c3ed70a3d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 Jun 2020 08:46:23 +0200 Subject: scsi: libata: Provide an ata_scsi_dma_need_drain stub for !CONFIG_ATA SAS drivers can be compiled with ata support disabled. Provide a stub so that the drivers don't have to ifdef around wiring up ata_scsi_dma_need_drain. Link: https://lore.kernel.org/r/20200615064624.37317-2-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/libata.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index af832852e620..042e584daca7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1092,7 +1092,11 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); +#if IS_ENABLED(CONFIG_ATA) bool ata_scsi_dma_need_drain(struct request *rq); +#else +#define ata_scsi_dma_need_drain NULL +#endif extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); -- cgit v1.2.3 From 466f966b1f4545acad150331911784d3f49ab1c3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: dmaengine: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- drivers/dma/milbeaut-hdmac.c | 2 +- drivers/dma/milbeaut-xdmac.c | 2 +- drivers/dma/moxart-dma.c | 2 +- drivers/dma/ti/edma.c | 2 +- drivers/dma/ti/k3-udma.c | 2 +- drivers/dma/timb_dma.c | 2 +- include/linux/dmaengine.h | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/dma/milbeaut-hdmac.c b/drivers/dma/milbeaut-hdmac.c index 8853d442430b..a8cfb59f6efe 100644 --- a/drivers/dma/milbeaut-hdmac.c +++ b/drivers/dma/milbeaut-hdmac.c @@ -77,7 +77,7 @@ struct milbeaut_hdmac_device { struct dma_device ddev; struct clk *clk; void __iomem *reg_base; - struct milbeaut_hdmac_chan channels[0]; + struct milbeaut_hdmac_chan channels[]; }; static struct milbeaut_hdmac_chan * diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c index ab3d2f395378..85a597228fb0 100644 --- a/drivers/dma/milbeaut-xdmac.c +++ b/drivers/dma/milbeaut-xdmac.c @@ -74,7 +74,7 @@ struct milbeaut_xdmac_chan { struct milbeaut_xdmac_device { struct dma_device ddev; void __iomem *reg_base; - struct milbeaut_xdmac_chan channels[0]; + struct milbeaut_xdmac_chan channels[]; }; static struct milbeaut_xdmac_chan * diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c index 4ab493d46375..347146a6e1d0 100644 --- a/drivers/dma/moxart-dma.c +++ b/drivers/dma/moxart-dma.c @@ -127,7 +127,7 @@ struct moxart_desc { unsigned int dma_cycles; struct virt_dma_desc vd; uint8_t es; - struct moxart_sg sg[0]; + struct moxart_sg sg[]; }; struct moxart_chan { diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index c4a5c170c1f9..35d81bd857f1 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -211,7 +211,7 @@ struct edma_desc { u32 residue; u32 residue_stat; - struct edma_pset pset[0]; + struct edma_pset pset[]; }; struct edma_cc; diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 945b7c604f91..c91e2dc1bb72 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -170,7 +170,7 @@ struct udma_desc { void *metadata; /* pointer to provided metadata buffer (EPIP, PSdata) */ unsigned int hwdesc_count; - struct udma_hwdesc hwdesc[0]; + struct udma_hwdesc hwdesc[]; }; enum udma_chan_state { diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index 39382694fdfc..68e48bf54d78 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -88,7 +88,7 @@ struct timb_dma { struct dma_device dma; void __iomem *membase; struct tasklet_struct tasklet; - struct timb_dma_chan channels[0]; + struct timb_dma_chan channels[]; }; static struct device *chan2dev(struct dma_chan *chan) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e1c03339918f..6283917edd90 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -153,7 +153,7 @@ struct dma_interleaved_template { bool dst_sgl; size_t numf; size_t frame_size; - struct data_chunk sgl[0]; + struct data_chunk sgl[]; }; /** @@ -535,7 +535,7 @@ struct dmaengine_unmap_data { struct device *dev; struct kref kref; size_t len; - dma_addr_t addr[0]; + dma_addr_t addr[]; }; struct dma_async_tx_descriptor; -- cgit v1.2.3 From d6562f1ca877e0c2b020be0a66d59592e9f37f24 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: can: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/can/skb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index a954def26c0d..900b9f4e0605 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -34,7 +34,7 @@ struct can_skb_priv { int ifindex; int skbcnt; - struct can_frame cf[0]; + struct can_frame cf[]; }; static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb) -- cgit v1.2.3 From ec4ac369397aa9ef81031cb7445bf1fbc677a393 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: drm/edid: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/drm/drm_displayid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_displayid.h b/include/drm/drm_displayid.h index 27bdd273fc4e..77941efb5426 100644 --- a/include/drm/drm_displayid.h +++ b/include/drm/drm_displayid.h @@ -89,7 +89,7 @@ struct displayid_detailed_timings_1 { struct displayid_detailed_timing_block { struct displayid_block base; - struct displayid_detailed_timings_1 timings[0]; + struct displayid_detailed_timings_1 timings[]; }; #define for_each_displayid_db(displayid, block, idx, length) \ -- cgit v1.2.3 From 6b5679d2378324d0ed1c02c8e828b3b8687d3b08 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: cb710: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/cb710.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cb710.h b/include/linux/cb710.h index 60de3fedd3a7..405657a9a0d5 100644 --- a/include/linux/cb710.h +++ b/include/linux/cb710.h @@ -36,7 +36,7 @@ struct cb710_chip { unsigned slot_mask; unsigned slots; spinlock_t irq_lock; - struct cb710_slot slot[0]; + struct cb710_slot slot[]; }; /* NOTE: cb710_chip.slots is modified only during device init/exit and -- cgit v1.2.3 From 67cd46244611ca7b0d5447dac9a0cbd76b875210 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: FS-Cache: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/fscache-cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ce0b5fbf239d..3f0b19dcfae7 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -46,7 +46,7 @@ struct fscache_cache_tag { unsigned long flags; #define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ atomic_t usage; - char name[0]; /* tag name */ + char name[]; /* tag name */ }; /* -- cgit v1.2.3 From 764e515f41c82220b20099962f47ec952c9ca6de Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: KVM: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 62ec926c78a0..d564855243d8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -409,7 +409,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[0]; + struct hlist_head map[]; }; #endif -- cgit v1.2.3 From 50b6951feb621ac6a246a6f773edc0a3b9b7f3b4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: kexec: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1776eb2e43a4..ea67910ae6b7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -208,7 +208,7 @@ struct crash_mem_range { struct crash_mem { unsigned int max_nr_ranges; unsigned int nr_ranges; - struct crash_mem_range ranges[0]; + struct crash_mem_range ranges[]; }; extern int crash_exclude_mem_range(struct crash_mem *mem, -- cgit v1.2.3 From ad8cb1654d2f123bbff317a8168028e2b451ed11 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: keys: encrypted-type: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/keys/encrypted-type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/keys/encrypted-type.h b/include/keys/encrypted-type.h index 9e9ccb20d586..38afb341c3f2 100644 --- a/include/keys/encrypted-type.h +++ b/include/keys/encrypted-type.h @@ -27,7 +27,7 @@ struct encrypted_key_payload { unsigned short payload_datalen; /* payload data length */ unsigned short encrypted_key_format; /* encrypted key format */ u8 *decrypted_data; /* decrypted data */ - u8 payload_data[0]; /* payload data + datablob + hmac */ + u8 payload_data[]; /* payload data + datablob + hmac */ }; extern struct key_type key_type_encrypted; -- cgit v1.2.3 From 67a862a94d5d55fbfde6942c81f9e51f943f452c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: kprobes: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/kprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 594265bfd390..e210af75ee38 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -161,7 +161,7 @@ struct kretprobe_instance { kprobe_opcode_t *ret_addr; struct task_struct *task; void *fp; - char data[0]; + char data[]; }; struct kretprobe_blackpoint { -- cgit v1.2.3 From 9c5fbf05cb00cc9e6f1baf42751c6bfe1e6859ba Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: libata: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index af832852e620..8bf5e59a7859 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -609,7 +609,7 @@ struct ata_host { struct task_struct *eh_owner; struct ata_port *simplex_claimed; /* channel owning the DMA */ - struct ata_port *ports[0]; + struct ata_port *ports[]; }; struct ata_queued_cmd { -- cgit v1.2.3 From 18bdc20be19741ac97499a37e42323248114b496 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: RxRPC: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/keys/rxrpc-type.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h index a183278c3e9e..2b0b15a71228 100644 --- a/include/keys/rxrpc-type.h +++ b/include/keys/rxrpc-type.h @@ -28,7 +28,7 @@ struct rxkad_key { u8 primary_flag; /* T if key for primary cell for this user */ u16 ticket_len; /* length of ticket[] */ u8 session_key[8]; /* DES session key */ - u8 ticket[0]; /* the encrypted ticket */ + u8 ticket[]; /* the encrypted ticket */ }; /* @@ -100,7 +100,7 @@ struct rxrpc_key_data_v1 { u32 expiry; /* time_t */ u32 kvno; u8 session_key[8]; - u8 ticket[0]; + u8 ticket[]; }; /* -- cgit v1.2.3 From af6bb61cc0a25fba4411f4348f01011c9a769047 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: sctp: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/sctp.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 8ccd82105de8..76731230bbc5 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -221,7 +221,7 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - __u8 payload[0]; + __u8 payload[]; }; struct sctp_data_chunk { @@ -269,7 +269,7 @@ struct sctp_inithdr { __be16 num_outbound_streams; __be16 num_inbound_streams; __be32 initial_tsn; - __u8 params[0]; + __u8 params[]; }; struct sctp_init_chunk { @@ -299,13 +299,13 @@ struct sctp_cookie_preserve_param { /* Section 3.3.2.1 Host Name Address (11) */ struct sctp_hostname_param { struct sctp_paramhdr param_hdr; - uint8_t hostname[0]; + uint8_t hostname[]; }; /* Section 3.3.2.1 Supported Address Types (12) */ struct sctp_supported_addrs_param { struct sctp_paramhdr param_hdr; - __be16 types[0]; + __be16 types[]; }; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ @@ -317,25 +317,25 @@ struct sctp_adaptation_ind_param { /* ADDIP Section 4.2.7 Supported Extensions Parameter */ struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; - __u8 chunks[0]; + __u8 chunks[]; }; /* AUTH Section 3.1 Random */ struct sctp_random_param { struct sctp_paramhdr param_hdr; - __u8 random_val[0]; + __u8 random_val[]; }; /* AUTH Section 3.2 Chunk List */ struct sctp_chunks_param { struct sctp_paramhdr param_hdr; - __u8 chunks[0]; + __u8 chunks[]; }; /* AUTH Section 3.3 HMAC Algorithm */ struct sctp_hmac_algo_param { struct sctp_paramhdr param_hdr; - __be16 hmac_ids[0]; + __be16 hmac_ids[]; }; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): @@ -350,7 +350,7 @@ struct sctp_initack_chunk { /* Section 3.3.3.1 State Cookie (7) */ struct sctp_cookie_param { struct sctp_paramhdr p; - __u8 body[0]; + __u8 body[]; }; /* Section 3.3.3.1 Unrecognized Parameters (8) */ @@ -384,7 +384,7 @@ struct sctp_sackhdr { __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; - union sctp_sack_variable variable[0]; + union sctp_sack_variable variable[]; }; struct sctp_sack_chunk { @@ -436,7 +436,7 @@ struct sctp_shutdown_chunk { struct sctp_errhdr { __be16 cause; __be16 length; - __u8 variable[0]; + __u8 variable[]; }; struct sctp_operr_chunk { @@ -594,7 +594,7 @@ struct sctp_fwdtsn_skip { struct sctp_fwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_fwdtsn_skip skip[0]; + struct sctp_fwdtsn_skip skip[]; }; struct sctp_fwdtsn_chunk { @@ -611,7 +611,7 @@ struct sctp_ifwdtsn_skip { struct sctp_ifwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_ifwdtsn_skip skip[0]; + struct sctp_ifwdtsn_skip skip[]; }; struct sctp_ifwdtsn_chunk { @@ -658,7 +658,7 @@ struct sctp_addip_param { struct sctp_addiphdr { __be32 serial; - __u8 params[0]; + __u8 params[]; }; struct sctp_addip_chunk { @@ -718,7 +718,7 @@ struct sctp_addip_chunk { struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; - __u8 hmac[0]; + __u8 hmac[]; }; struct sctp_auth_chunk { @@ -733,7 +733,7 @@ struct sctp_infox { struct sctp_reconf_chunk { struct sctp_chunkhdr chunk_hdr; - __u8 params[0]; + __u8 params[]; }; struct sctp_strreset_outreq { @@ -741,13 +741,13 @@ struct sctp_strreset_outreq { __be32 request_seq; __be32 response_seq; __be32 send_reset_at_tsn; - __be16 list_of_streams[0]; + __be16 list_of_streams[]; }; struct sctp_strreset_inreq { struct sctp_paramhdr param_hdr; __be32 request_seq; - __be16 list_of_streams[0]; + __be16 list_of_streams[]; }; struct sctp_strreset_tsnreq { -- cgit v1.2.3 From 5cab1634e485ce7b33839f762f3bb9b5f009dc87 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: tifm: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- include/linux/tifm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tifm.h b/include/linux/tifm.h index 299cbb8c63bb..44073d06710f 100644 --- a/include/linux/tifm.h +++ b/include/linux/tifm.h @@ -124,7 +124,7 @@ struct tifm_adapter { int (*has_ms_pif)(struct tifm_adapter *fm, struct tifm_dev *sock); - struct tifm_dev *sockets[0]; + struct tifm_dev *sockets[]; }; struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets, -- cgit v1.2.3 From 33aea07f30c261eff7ba229f19fd1b161e0fb851 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 16 Jun 2020 01:15:29 +0200 Subject: compiler_attributes.h: Support no_sanitize_undefined check with GCC 4 UBSAN is supported since GCC 4.9, which unfortunately did not yet have __has_attribute(). To work around, the __GCC4_has_attribute workaround requires defining which compiler version supports the given attribute. In the case of no_sanitize_undefined, it is the first version that supports UBSAN, which is GCC 4.9. Reported-by: kernel test robot Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miguel Ojeda Link: https://lkml.kernel.org/r/20200615231529.GA119644@google.com --- include/linux/compiler_attributes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index cdf016596659..c8f03d2969df 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -40,6 +40,7 @@ # define __GCC4_has_attribute___noclone__ 1 # define __GCC4_has_attribute___nonstring__ 0 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8) +# define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9) # define __GCC4_has_attribute___fallthrough__ 0 #endif -- cgit v1.2.3 From 22d2cfdffa5bff3566e16cb7320e13ceb814674b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 4 Jun 2020 11:12:34 +0200 Subject: libceph: move away from global osd_req_flags osd_req_flags is overly general and doesn't suit its only user (read_from_replica option) well: - applying osd_req_flags in account_request() affects all OSD requests, including linger (i.e. watch and notify). However, linger requests should always go to the primary even though some of them are reads (e.g. notify has side effects but it is a read because it doesn't result in mutation on the OSDs). - calls to class methods that are reads are allowed to go to the replica, but most such calls issued for "rbd map" and/or exclusive lock transitions are requested to be resent to the primary via EAGAIN, doubling the latency. Get rid of global osd_req_flags and set read_from_replica flag only on specific OSD requests instead. Fixes: 8ad44d5e0d1e ("libceph: read_from_replica option") Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- drivers/block/rbd.c | 4 +++- include/linux/ceph/libceph.h | 4 ++-- net/ceph/ceph_common.c | 14 ++++++-------- net/ceph/osd_client.c | 7 ++----- 4 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7420648a1de6..4f61e9209461 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1451,8 +1451,10 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req) static void rbd_osd_format_read(struct ceph_osd_request *osd_req) { struct rbd_obj_request *obj_request = osd_req->r_priv; + struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; + struct ceph_options *opt = rbd_dev->rbd_client->client->options; - osd_req->r_flags = CEPH_OSD_FLAG_READ; + osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica; osd_req->r_snapid = obj_request->img_request->snap_id; } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2247e71beb83..e5ed1c541e7f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -52,8 +52,7 @@ struct ceph_options { unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ unsigned long osd_request_timeout; /* jiffies */ - - u32 osd_req_flags; /* CEPH_OSD_FLAG_*, applied to each OSD request */ + u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */ /* * any type that can't be simply compared or doesn't need @@ -76,6 +75,7 @@ struct ceph_options { #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ +#define CEPH_READ_FROM_REPLICA_DEFAULT 0 /* read from primary */ #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index afe0e8184c23..4e7edd707a14 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -332,6 +332,7 @@ struct ceph_options *ceph_alloc_options(void) opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; + opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT; return opt; } EXPORT_SYMBOL(ceph_alloc_options); @@ -490,16 +491,13 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_read_from_replica: switch (result.uint_32) { case Opt_read_from_replica_no: - opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS); + opt->read_from_replica = 0; break; case Opt_read_from_replica_balance: - opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS; - opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS; + opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS; break; case Opt_read_from_replica_localize: - opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; - opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS; + opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS; break; default: BUG(); @@ -613,9 +611,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, } seq_putc(m, ','); } - if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) { + if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) { seq_puts(m, "read_from_replica=balance,"); - } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) { + } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) { seq_puts(m, "read_from_replica=localize,"); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4fea3c33af2a..a37d159019a0 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1117,10 +1117,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, truncate_size, truncate_seq); } - req->r_flags = flags; req->r_base_oloc.pool = layout->pool_id; req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); + req->r_flags = flags | osdc->client->options->read_from_replica; req->r_snapid = vino.snap; if (flags & CEPH_OSD_FLAG_WRITE) @@ -2431,14 +2431,11 @@ promote: static void account_request(struct ceph_osd_request *req) { - struct ceph_osd_client *osdc = req->r_osdc; - WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK)); WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE))); req->r_flags |= CEPH_OSD_FLAG_ONDISK; - req->r_flags |= osdc->client->options->osd_req_flags; - atomic_inc(&osdc->num_requests); + atomic_inc(&req->r_osdc->num_requests); req->r_start_stamp = jiffies; req->r_start_latency = ktime_get(); -- cgit v1.2.3 From 2fd2bc7f49324e7b439e3c5584762abd0a9a13bc Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Sun, 14 Jun 2020 04:08:04 +0000 Subject: gpu: host1x: Correct trivial kernel-doc inconsistencies Silence documentation build warnings by adding kernel-doc fields. ./include/linux/host1x.h:69: warning: Function parameter or member 'parent' not described in 'host1x_client' ./include/linux/host1x.h:69: warning: Function parameter or member 'usecount' not described in 'host1x_client' ./include/linux/host1x.h:69: warning: Function parameter or member 'lock' not described in 'host1x_client' Signed-off-by: Colton Lewis Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index c230b4e70d75..a3a568bf9686 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -48,6 +48,9 @@ struct host1x_client_ops { * @channel: host1x channel associated with this client * @syncpts: array of syncpoints requested for this client * @num_syncpts: number of syncpoints requested for this client + * @parent: pointer to parent structure + * @usecount: reference count for this structure + * @lock: mutex for mutually exclusive concurrency */ struct host1x_client { struct list_head list; -- cgit v1.2.3 From 9b38cc704e844e41d9cf74e647bff1d249512cb3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 12 May 2020 17:03:18 +0900 Subject: kretprobe: Prevent triggering kretprobe from within kprobe_flush_task Ziqian reported lockup when adding retprobe on _raw_spin_lock_irqsave. My test was also able to trigger lockdep output: ============================================ WARNING: possible recursive locking detected 5.6.0-rc6+ #6 Not tainted -------------------------------------------- sched-messaging/2767 is trying to acquire lock: ffffffff9a492798 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_hash_lock+0x52/0xa0 but task is already holding lock: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(kretprobe_table_locks[i].lock)); lock(&(kretprobe_table_locks[i].lock)); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by sched-messaging/2767: #0: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 stack backtrace: CPU: 3 PID: 2767 Comm: sched-messaging Not tainted 5.6.0-rc6+ #6 Call Trace: dump_stack+0x96/0xe0 __lock_acquire.cold.57+0x173/0x2b7 ? native_queued_spin_lock_slowpath+0x42b/0x9e0 ? lockdep_hardirqs_on+0x590/0x590 ? __lock_acquire+0xf63/0x4030 lock_acquire+0x15a/0x3d0 ? kretprobe_hash_lock+0x52/0xa0 _raw_spin_lock_irqsave+0x36/0x70 ? kretprobe_hash_lock+0x52/0xa0 kretprobe_hash_lock+0x52/0xa0 trampoline_handler+0xf8/0x940 ? kprobe_fault_handler+0x380/0x380 ? find_held_lock+0x3a/0x1c0 kretprobe_trampoline+0x25/0x50 ? lock_acquired+0x392/0xbc0 ? _raw_spin_lock_irqsave+0x50/0x70 ? __get_valid_kprobe+0x1f0/0x1f0 ? _raw_spin_unlock_irqrestore+0x3b/0x40 ? finish_task_switch+0x4b9/0x6d0 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 The code within the kretprobe handler checks for probe reentrancy, so we won't trigger any _raw_spin_lock_irqsave probe in there. The problem is in outside kprobe_flush_task, where we call: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave where _raw_spin_lock_irqsave triggers the kretprobe and installs kretprobe_trampoline handler on _raw_spin_lock_irqsave return. The kretprobe_trampoline handler is then executed with already locked kretprobe_table_locks, and first thing it does is to lock kretprobe_table_locks ;-) the whole lockup path like: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave ---> probe triggered, kretprobe_trampoline installed ---> kretprobe_table_locks locked kretprobe_trampoline trampoline_handler kretprobe_hash_lock(current, &head, &flags); <--- deadlock Adding kprobe_busy_begin/end helpers that mark code with fake probe installed to prevent triggering of another kprobe within this code. Using these helpers in kprobe_flush_task, so the probe recursion protection check is hit and the probe is never set to prevent above lockup. Link: http://lkml.kernel.org/r/158927059835.27680.7011202830041561604.stgit@devnote2 Fixes: ef53d9c5e4da ("kprobes: improve kretprobe scalability with hashed locking") Cc: Ingo Molnar Cc: "Gustavo A . R . Silva" Cc: Anders Roxell Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: stable@vger.kernel.org Reported-by: "Ziqian SUN (Zamir)" Acked-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/kprobes/core.c | 16 +++------------- include/linux/kprobes.h | 4 ++++ kernel/kprobes.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..8a5ec10e95dc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -753,16 +753,11 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); -static struct kprobe kretprobe_kprobe = { - .addr = (void *)kretprobe_trampoline, -}; - /* * Called from kretprobe_trampoline */ __used __visible void *trampoline_handler(struct pt_regs *regs) { - struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -772,16 +767,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; - preempt_disable(); - /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never run in kprobe handler, kprobe must not * be running at this point. */ - kcb = get_kprobe_ctlblk(); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; + kprobe_busy_begin(); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -857,7 +848,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) __this_cpu_write(current_kprobe, &ri->rp->kp); ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -873,8 +864,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); + kprobe_busy_end(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 594265bfd390..05ed663e6c7b 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -350,6 +350,10 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +extern struct kprobe kprobe_busy; +void kprobe_busy_begin(void); +void kprobe_busy_end(void); + kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5cb7791c16b3..4a904cc56d68 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1241,6 +1241,26 @@ __releases(hlist_lock) } NOKPROBE_SYMBOL(kretprobe_table_unlock); +struct kprobe kprobe_busy = { + .addr = (void *) get_kprobe, +}; + +void kprobe_busy_begin(void) +{ + struct kprobe_ctlblk *kcb; + + preempt_disable(); + __this_cpu_write(current_kprobe, &kprobe_busy); + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; +} + +void kprobe_busy_end(void) +{ + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); +} + /* * This function is called from finish_task_switch when task tk becomes dead, * so that we can recycle any function-return probe instances associated @@ -1258,6 +1278,8 @@ void kprobe_flush_task(struct task_struct *tk) /* Early boot. kretprobe_table_locks not yet initialized. */ return; + kprobe_busy_begin(); + INIT_HLIST_HEAD(&empty_rp); hash = hash_ptr(tk, KPROBE_HASH_BITS); head = &kretprobe_inst_table[hash]; @@ -1271,6 +1293,8 @@ void kprobe_flush_task(struct task_struct *tk) hlist_del(&ri->hlist); kfree(ri); } + + kprobe_busy_end(); } NOKPROBE_SYMBOL(kprobe_flush_task); -- cgit v1.2.3 From b19d57d0f3cc6f1022edf94daf1d70506a09e3c2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 8 Jun 2020 20:22:33 -0500 Subject: overflow.h: Add flex_array_size() helper Add flex_array_size() helper for the calculation of the size, in bytes, of a flexible array member contained within an enclosing structure. Example of usage: struct something { size_t count; struct foo items[]; }; struct something *instance; instance = kmalloc(struct_size(instance, items, count), GFP_KERNEL); instance->count = count; memcpy(instance->items, src, flex_array_size(instance, items, instance->count)); The helper returns SIZE_MAX on overflow instead of wrapping around. Additionally replaces parameter "n" with "count" in struct_size() helper for greater clarity and unification. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200609012233.GA3371@embeddedor Signed-off-by: Kees Cook --- include/linux/overflow.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 659045046468..93fcef105061 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -304,16 +304,33 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) * struct_size() - Calculate size of structure with trailing array. * @p: Pointer to the structure. * @member: Name of the array member. - * @n: Number of elements in the array. + * @count: Number of elements in the array. * * Calculates size of memory needed for structure @p followed by an - * array of @n @member elements. + * array of @count number of @member elements. * * Return: number of bytes needed or SIZE_MAX on overflow. */ -#define struct_size(p, member, n) \ - __ab_c_size(n, \ +#define struct_size(p, member, count) \ + __ab_c_size(count, \ sizeof(*(p)->member) + __must_be_array((p)->member),\ sizeof(*(p))) +/** + * flex_array_size() - Calculate size of a flexible array member + * within an enclosing structure. + * + * @p: Pointer to the structure. + * @member: Name of the flexible array member. + * @count: Number of elements in the array. + * + * Calculates size of a flexible array of @count number of @member + * elements, at the end of structure @p. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define flex_array_size(p, member, count) \ + array_size(count, \ + sizeof(*(p)->member) + __must_be_array((p)->member)) + #endif /* __LINUX_OVERFLOW_H */ -- cgit v1.2.3 From 26749b3201ab05e288fbf78fbc8585dfa2da3218 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 Jun 2020 08:52:31 +0200 Subject: dma-direct: mark __dma_direct_alloc_pages static Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 2 -- kernel/dma/direct.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 136f984df0d9..cdfa400f89b3 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -77,8 +77,6 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp, unsigned long attrs); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 9ec6a5c3fc57..30c41b57acd9 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -109,7 +109,7 @@ static inline bool dma_should_free_from_pool(struct device *dev, return false; } -struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, +static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp, unsigned long attrs) { size_t alloc_size = PAGE_ALIGN(size); -- cgit v1.2.3 From 2a55280a3675203496d302463b941834228b9875 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 7 Jun 2020 15:41:35 +0200 Subject: efi/libstub: arm: Print CPU boot mode and MMU state at boot On 32-bit ARM, we may boot at HYP mode, or with the MMU and caches off (or both), even though the EFI spec does not actually support this. While booting at HYP mode is something we might tolerate, fiddling with the caches is a more serious issue, as disabling the caches is tricky to do safely from C code, and running without the Dcache makes it impossible to support unaligned memory accesses, which is another explicit requirement imposed by the EFI spec. So take note of the CPU mode and MMU state in the EFI stub diagnostic output so that we can easily diagnose any issues that may arise from this. E.g., EFI stub: Entering in SVC mode with MMU enabled Also, capture the CPSR and SCTLR system register values at EFI stub entry, and after ExitBootServices() returns, and check whether the MMU and Dcache were disabled at any point. If this is the case, a diagnostic message like the following will be emitted: efi: [Firmware Bug]: EFI stub was entered with MMU and Dcache disabled, please fix your firmware! efi: CPSR at EFI stub entry : 0x600001d3 efi: SCTLR at EFI stub entry : 0x00c51838 efi: CPSR after ExitBootServices() : 0x600001d3 efi: SCTLR after ExitBootServices(): 0x00c50838 Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm --- arch/arm/include/asm/efi.h | 7 ++++ drivers/firmware/efi/arm-init.c | 34 +++++++++++++++++-- drivers/firmware/efi/libstub/arm32-stub.c | 54 ++++++++++++++++++++++++++++++- drivers/firmware/efi/libstub/efi-stub.c | 3 ++ drivers/firmware/efi/libstub/efistub.h | 2 ++ include/linux/efi.h | 1 + 6 files changed, 98 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 84dc0ba822f5..5dcf3c6011b7 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -87,4 +87,11 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return dram_base + SZ_512M; } +struct efi_arm_entry_state { + u32 cpsr_before_ebs; + u32 sctlr_before_ebs; + u32 cpsr_after_ebs; + u32 sctlr_after_ebs; +}; + #endif /* _ASM_ARM_EFI_H */ diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 6f4baf70db16..71c445d20258 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -52,9 +52,11 @@ static phys_addr_t __init efi_to_phys(unsigned long addr) } static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR; +static __initdata unsigned long cpu_state_table = EFI_INVALID_TABLE_ADDR; static const efi_config_table_type_t arch_tables[] __initconst = { {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table}, + {LINUX_EFI_ARM_CPU_STATE_TABLE_GUID, &cpu_state_table}, {} }; @@ -240,9 +242,37 @@ void __init efi_init(void) init_screen_info(); +#ifdef CONFIG_ARM /* ARM does not permit early mappings to persist across paging_init() */ - if (IS_ENABLED(CONFIG_ARM)) - efi_memmap_unmap(); + efi_memmap_unmap(); + + if (cpu_state_table != EFI_INVALID_TABLE_ADDR) { + struct efi_arm_entry_state *state; + bool dump_state = true; + + state = early_memremap_ro(cpu_state_table, + sizeof(struct efi_arm_entry_state)); + if (state == NULL) { + pr_warn("Unable to map CPU entry state table.\n"); + return; + } + + if ((state->sctlr_before_ebs & 1) == 0) + pr_warn(FW_BUG "EFI stub was entered with MMU and Dcache disabled, please fix your firmware!\n"); + else if ((state->sctlr_after_ebs & 1) == 0) + pr_warn(FW_BUG "ExitBootServices() returned with MMU and Dcache disabled, please fix your firmware!\n"); + else + dump_state = false; + + if (dump_state || efi_enabled(EFI_DBG)) { + pr_info("CPSR at EFI stub entry : 0x%08x\n", state->cpsr_before_ebs); + pr_info("SCTLR at EFI stub entry : 0x%08x\n", state->sctlr_before_ebs); + pr_info("CPSR after ExitBootServices() : 0x%08x\n", state->cpsr_after_ebs); + pr_info("SCTLR after ExitBootServices(): 0x%08x\n", state->sctlr_after_ebs); + } + early_memunmap(state, sizeof(struct efi_arm_entry_state)); + } +#endif } static bool efifb_overlaps_pci_range(const struct of_pci_range *range) diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c index 40243f524556..d08e5d55838c 100644 --- a/drivers/firmware/efi/libstub/arm32-stub.c +++ b/drivers/firmware/efi/libstub/arm32-stub.c @@ -7,10 +7,49 @@ #include "efistub.h" +static efi_guid_t cpu_state_guid = LINUX_EFI_ARM_CPU_STATE_TABLE_GUID; + +struct efi_arm_entry_state *efi_entry_state; + +static void get_cpu_state(u32 *cpsr, u32 *sctlr) +{ + asm("mrs %0, cpsr" : "=r"(*cpsr)); + if ((*cpsr & MODE_MASK) == HYP_MODE) + asm("mrc p15, 4, %0, c1, c0, 0" : "=r"(*sctlr)); + else + asm("mrc p15, 0, %0, c1, c0, 0" : "=r"(*sctlr)); +} + efi_status_t check_platform_features(void) { + efi_status_t status; + u32 cpsr, sctlr; int block; + get_cpu_state(&cpsr, &sctlr); + + efi_info("Entering in %s mode with MMU %sabled\n", + ((cpsr & MODE_MASK) == HYP_MODE) ? "HYP" : "SVC", + (sctlr & 1) ? "en" : "dis"); + + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + sizeof(*efi_entry_state), + (void **)&efi_entry_state); + if (status != EFI_SUCCESS) { + efi_err("allocate_pool() failed\n"); + return status; + } + + efi_entry_state->cpsr_before_ebs = cpsr; + efi_entry_state->sctlr_before_ebs = sctlr; + + status = efi_bs_call(install_configuration_table, &cpu_state_guid, + efi_entry_state); + if (status != EFI_SUCCESS) { + efi_err("install_configuration_table() failed\n"); + goto free_state; + } + /* non-LPAE kernels can run anywhere */ if (!IS_ENABLED(CONFIG_ARM_LPAE)) return EFI_SUCCESS; @@ -19,9 +58,22 @@ efi_status_t check_platform_features(void) block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); if (block < 5) { efi_err("This LPAE kernel is not supported by your CPU\n"); - return EFI_UNSUPPORTED; + status = EFI_UNSUPPORTED; + goto drop_table; } return EFI_SUCCESS; + +drop_table: + efi_bs_call(install_configuration_table, &cpu_state_guid, NULL); +free_state: + efi_bs_call(free_pool, efi_entry_state); + return status; +} + +void efi_handle_post_ebs_state(void) +{ + get_cpu_state(&efi_entry_state->cpsr_after_ebs, + &efi_entry_state->sctlr_after_ebs); } static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID; diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index e97370bdfdb0..3318ec3f8e5b 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -329,6 +329,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (status != EFI_SUCCESS) goto fail_free_initrd; + if (IS_ENABLED(CONFIG_ARM)) + efi_handle_post_ebs_state(); + efi_enter_kernel(image_addr, fdt_addr, fdt_totalsize((void *)fdt_addr)); /* not reached */ diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index ac756f1fdb1a..2c9d42264c29 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -777,4 +777,6 @@ efi_status_t efi_load_initrd(efi_loaded_image_t *image, unsigned long soft_limit, unsigned long hard_limit); +void efi_handle_post_ebs_state(void); + #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index c3449c9699d0..bb35f3305e55 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -350,6 +350,7 @@ void efi_native_runtime_setup(void); * associated with ConOut */ #define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) +#define LINUX_EFI_ARM_CPU_STATE_TABLE_GUID EFI_GUID(0xef79e4aa, 0x3c3d, 0x4989, 0xb9, 0x02, 0x07, 0xa9, 0x43, 0xe5, 0x50, 0xd2) #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) -- cgit v1.2.3 From fe557319aa06c23cffc9346000f119547e0f289a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:53 +0200 Subject: maccess: rename probe_kernel_{read,write} to copy_{from,to}_kernel_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/ftrace.c | 3 ++- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/insn.c | 4 ++-- arch/csky/kernel/ftrace.c | 5 ++-- arch/ia64/kernel/ftrace.c | 6 ++--- arch/mips/kernel/kprobes.c | 6 ++--- arch/nds32/kernel/ftrace.c | 5 ++-- arch/parisc/kernel/ftrace.c | 2 +- arch/parisc/kernel/kgdb.c | 4 ++-- arch/parisc/lib/memcpy.c | 2 +- arch/powerpc/kernel/module_64.c | 6 +++-- arch/powerpc/kernel/trace/ftrace.c | 4 ++-- arch/powerpc/lib/inst.c | 6 ++--- arch/powerpc/perf/core-book3s.c | 3 ++- arch/riscv/kernel/ftrace.c | 3 ++- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/patch.c | 4 ++-- arch/s390/kernel/ftrace.c | 4 ++-- arch/sh/kernel/ftrace.c | 6 ++--- arch/um/kernel/maccess.c | 2 +- arch/x86/include/asm/ptrace.h | 4 ++-- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/ftrace.c | 10 ++++---- arch/x86/kernel/kgdb.c | 6 ++--- arch/x86/kernel/kprobes/core.c | 5 ++-- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/traps.c | 3 ++- arch/x86/mm/fault.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/maccess.c | 4 ++-- arch/x86/xen/enlighten_pv.c | 2 +- drivers/char/mem.c | 2 +- drivers/dio/dio.c | 6 +++-- drivers/input/serio/hp_sdc.c | 2 +- drivers/misc/kgdbts.c | 6 ++--- drivers/video/fbdev/hpfb.c | 2 +- fs/proc/kcore.c | 3 ++- include/linux/uaccess.h | 13 +++++----- kernel/debug/debug_core.c | 6 ++--- kernel/debug/gdbstub.c | 6 ++--- kernel/debug/kdb/kdb_main.c | 3 ++- kernel/debug/kdb/kdb_support.c | 7 +++--- kernel/kthread.c | 2 +- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 4 ++-- kernel/workqueue.c | 10 ++++---- mm/debug.c | 8 +++---- mm/maccess.c | 49 +++++++++++++++++++------------------- mm/rodata_test.c | 2 +- mm/slub.c | 2 +- 50 files changed, 138 insertions(+), 122 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 10499d44964a..9a79ef6b1876 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -84,7 +84,8 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, old = __opcode_to_mem_arm(old); if (validate) { - if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&replaced, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (replaced != old) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 6a95b9296640..7bd30c0a4280 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -236,7 +236,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) /* patch_text() only supports int-sized breakpoints */ BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE); - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 684d871ae38d..a107375005bc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -135,7 +135,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) int ret; __le32 val; - ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE); if (!ret) *insnp = le32_to_cpu(val); @@ -151,7 +151,7 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) raw_spin_lock_irqsave(&patch_lock, flags); waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE); patch_unmap(FIX_TEXT_POKE0); raw_spin_unlock_irqrestore(&patch_lock, flags); diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index 3c425b84e3be..b4a7ec1517ff 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -72,7 +72,8 @@ static int ftrace_check_current_nop(unsigned long hook) uint16_t olds[7]; unsigned long hook_pos = hook - 2; - if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops))) + if (copy_from_kernel_nofault((void *)olds, (void *)hook_pos, + sizeof(nops))) return -EFAULT; if (memcmp((void *)nops, (void *)olds, sizeof(nops))) { @@ -97,7 +98,7 @@ static int ftrace_modify_code(unsigned long hook, unsigned long target, make_jbsr(target, hook, call, nolr); - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, + ret = copy_to_kernel_nofault((void *)hook_pos, enable ? call : nops, sizeof(nops)); if (ret) return -EPERM; diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index cee411e647ca..b2ab2d58fb30 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -108,7 +108,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, goto skip_check; /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -117,7 +117,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, skip_check: /* replace the text with the new text */ - if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault(((void *)ip), new_code, MCOUNT_INSN_SIZE)) return -EPERM; flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); @@ -129,7 +129,7 @@ static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; unsigned long ip = rec->ip; - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (rec->flags & FTRACE_FL_CONVERTED) { struct ftrace_call_insn *call_insn, *tmp_call; diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 6cfae2411c04..d043c2f897fc 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -86,9 +86,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) goto out; } - if ((probe_kernel_read(&prev_insn, p->addr - 1, - sizeof(mips_instruction)) == 0) && - insn_has_delayslot(prev_insn)) { + if (copy_from_kernel_nofault(&prev_insn, p->addr - 1, + sizeof(mips_instruction)) == 0 && + insn_has_delayslot(prev_insn)) { pr_notice("Kprobes for branch delayslot are not supported\n"); ret = -EINVAL; goto out; diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 22ab77ea27ad..3763b3f8c3db 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -131,13 +131,14 @@ static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn, unsigned long orig_insn[3]; if (validate) { - if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(orig_insn, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE)) return -EINVAL; } - if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault((void *)pc, new_insn, MCOUNT_INSN_SIZE)) return -EPERM; return 0; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index b836fc61a24f..1df0f67ed667 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -172,7 +172,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ip = (void *)(rec->ip + 4 - size); - ret = probe_kernel_read(insn, ip, size); + ret = copy_from_kernel_nofault(insn, ip, size); if (ret) return ret; diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index 664278db9b97..c4554ac13eac 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -154,8 +154,8 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { - int ret = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, - BREAK_INSTR_SIZE); + int ret = copy_from_kernel_nofault(bpt->saved_instr, + (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (ret) return ret; diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 94a9fe2702c2..4b75388190b4 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -57,7 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count) EXPORT_SYMBOL(raw_copy_in_user); EXPORT_SYMBOL(memcpy); -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { if ((unsigned long)unsafe_src < PAGE_SIZE) return false; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f4c2fa190192..ae2b188365b1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -756,7 +756,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, stub = (struct ppc64_stub_entry *)addr; - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + if (copy_from_kernel_nofault(&magic, &stub->magic, + sizeof(magic))) { pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } @@ -766,7 +767,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return -EFAULT; } - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + if (copy_from_kernel_nofault(&funcdata, &stub->funcdata, + sizeof(funcdata))) { pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5e399628f51a..c1fede6ec934 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -226,7 +226,7 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; unsigned long tramp; - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure that that this is still a 24bit jump */ @@ -249,7 +249,7 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { pr_err("Failed to read %lx\n", tramp); return -EFAULT; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index aedfd6e31e53..6c7a20af9fd6 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -33,11 +33,11 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_kernel_read(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -64,7 +64,7 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 13b9dd5e4a76..efe97ff82557 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -418,7 +418,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) __u64 target; if (is_kernel_addr(addr)) { - if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + if (copy_from_kernel_nofault(&instr, (void *)addr, + sizeof(instr))) return 0; return branch_target((struct ppc_inst *)&instr); diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 08396614d6f4..2ff63d0cbb50 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -38,7 +38,8 @@ static int ftrace_check_current_call(unsigned long hook_pos, * Read the text we want to modify; * return must be -EFAULT on read error */ - if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)hook_pos, + MCOUNT_INSN_SIZE)) return -EFAULT; /* diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index f16ade84a11f..a21fb21883e7 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -153,7 +153,7 @@ int do_single_step(struct pt_regs *regs) stepped_address = addr; /* Replace the op code with the break instruction */ - error = probe_kernel_write((void *)stepped_address, + error = copy_to_kernel_nofault((void *)stepped_address, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); /* Flush and return */ @@ -173,7 +173,7 @@ int do_single_step(struct pt_regs *regs) static void undo_single_step(struct pt_regs *regs) { if (stepped_opcode != 0) { - probe_kernel_write((void *)stepped_address, + copy_to_kernel_nofault((void *)stepped_address, (void *)&stepped_opcode, BREAK_INSTR_SIZE); flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index d4a64dfed342..3fe7a5296aa5 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -63,7 +63,7 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, insn, len); + ret = copy_to_kernel_nofault(waddr, insn, len); patch_unmap(FIX_TEXT_POKE0); @@ -76,7 +76,7 @@ NOKPROBE_SYMBOL(patch_insn_write); #else static int patch_insn_write(void *addr, const void *insn, size_t len) { - return probe_kernel_write(addr, insn, len); + return copy_to_kernel_nofault(addr, insn, len); } NOKPROBE_SYMBOL(patch_insn_write); #endif /* CONFIG_MMU */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 44e01dd1e624..b388e87a08bf 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -83,7 +83,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ @@ -105,7 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 1b04270e5460..0646c5961846 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -119,7 +119,7 @@ static void ftrace_mod_code(void) * But if one were to fail, then they all should, and if one were * to succeed, then they all should. */ - mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + mod_code_status = copy_to_kernel_nofault(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); /* if we fail, then kill any new writers */ @@ -203,7 +203,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -268,7 +268,7 @@ static int ftrace_mod(unsigned long ip, unsigned long old_addr, { unsigned char code[MCOUNT_INSN_SIZE]; - if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(code, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (old_addr != __raw_readl((unsigned long *)code)) diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c index e929c0966696..8ccd56813f68 100644 --- a/arch/um/kernel/maccess.c +++ b/arch/um/kernel/maccess.c @@ -7,7 +7,7 @@ #include #include -bool probe_kernel_read_allowed(const void *src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *src, size_t size) { void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index ebedeab48704..255b2dde2c1b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -278,7 +278,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs } /* To avoid include hell, we can't include uaccess.h */ -extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack @@ -298,7 +298,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { - ret = probe_kernel_read(&val, addr, sizeof(val)); + ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 456511b2284e..b037cfa7c0c5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -106,7 +106,7 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) bad_ip = user_mode(regs) && __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue, + if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, OPCODE_BUFSIZE)) { printk("%sCode: Bad RIP value.\n", loglvl); } else { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c84d28e90a58..51504566b3a6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -86,7 +86,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ - if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { WARN_ON(1); return -EFAULT; } @@ -355,7 +355,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = probe_kernel_read(trampoline, (void *)start_offset, size); + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) goto fail; @@ -363,13 +363,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* The trampoline ends with ret(q) */ retq = (unsigned long)ftrace_stub; - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller); - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; } @@ -506,7 +506,7 @@ static void *addr_from_call(void *ptr) union text_poke_insn call; int ret; - ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); + ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index c44fe7d8d9a4..68acd30c6b87 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -732,11 +732,11 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) int err; bpt->type = BP_BREAKPOINT; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); if (!err) return err; @@ -768,7 +768,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) return 0; knl_write: - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..f09985c87d73 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -243,7 +243,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Fortunately, we know that the original code is the ideal 5-byte * long NOP. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; @@ -346,7 +346,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* This can access kernel text if given address is not recovered */ - if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(dest, (void *)recovered_insn, + MAX_INSN_SIZE)) return 0; kernel_insn_init(insn, dest, MAX_INSN_SIZE); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 321c19950285..7af4c61dde52 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -56,7 +56,7 @@ found: * overwritten by jump destination address. In this case, original * bytes must be recovered from op->optinsn.copied_insn buffer. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..7003f2e7b163 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -488,7 +488,8 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, u8 insn_buf[MAX_INSN_SIZE]; struct insn insn; - if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, + MAX_INSN_SIZE)) return GP_NO_HINT; kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 66be9bd60307..e996aa3833b8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -442,7 +442,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) return; } - if (probe_kernel_read(&desc, (void *)(gdt->address + offset), + if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset), sizeof(struct ldttss_desc))) { pr_alert("%s: 0x%hx -- GDT entry is not readable\n", name, index); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bda909e3e37e..8b4afad84f4a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -737,7 +737,7 @@ static void __init test_wp_bit(void) __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO); - if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { + if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { clear_fixmap(FIX_WP_TEST); printk(KERN_CONT "Ok.\n"); return; diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index e1d7d7477c22..92ec176a7293 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -9,7 +9,7 @@ static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { unsigned long vaddr = (unsigned long)unsafe_src; @@ -22,7 +22,7 @@ bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr; } #else -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return (unsigned long)unsafe_src >= TASK_SIZE_MAX; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 33b309d65955..acc49fa6a097 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -386,7 +386,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - probe_kernel_read(&dummy, v, 1); + copy_from_kernel_nofault(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 31cae88a730b..934c92dcb9ab 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -171,7 +171,7 @@ static ssize_t read_mem(struct file *file, char __user *buf, if (!ptr) goto failed; - probe = probe_kernel_read(bounce, ptr, sz); + probe = copy_from_kernel_nofault(bounce, ptr, sz); unxlate_dev_mem_ptr(p, ptr); if (probe) goto failed; diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index c9aa15fb86a9..193b40e7aec0 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -135,7 +135,8 @@ int __init dio_find(int deviceid) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ @@ -208,7 +209,8 @@ static int __init dio_init(void) else va = ioremap(pa, PAGE_SIZE); - if (probe_kernel_read(&i, (unsigned char *)va + DIO_IDOFF, 1)) { + if (copy_from_kernel_nofault(&i, + (unsigned char *)va + DIO_IDOFF, 1)) { if (scode >= DIOII_SCBASE) iounmap(va); continue; /* no board present at that select code */ diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 654252361653..13eacf6ab431 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -1021,7 +1021,7 @@ static int __init hp_sdc_register(void) hp_sdc.base_io = (unsigned long) 0xf0428000; hp_sdc.data_io = (unsigned long) hp_sdc.base_io + 1; hp_sdc.status_io = (unsigned long) hp_sdc.base_io + 3; - if (!probe_kernel_read(&i, (unsigned char *)hp_sdc.data_io, 1)) + if (!copy_from_kernel_nofault(&i, (unsigned char *)hp_sdc.data_io, 1)) hp_sdc.dev = (void *)1; hp_sdc.dev_err = hp_sdc_init(); #endif diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index bccd341e9ae1..d5d2af4d10e6 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -828,7 +828,7 @@ static void run_plant_and_detach_test(int is_early) char before[BREAK_INSTR_SIZE]; char after[BREAK_INSTR_SIZE]; - probe_kernel_read(before, (char *)kgdbts_break_test, + copy_from_kernel_nofault(before, (char *)kgdbts_break_test, BREAK_INSTR_SIZE); init_simple_test(); ts.tst = plant_and_detach_test; @@ -836,8 +836,8 @@ static void run_plant_and_detach_test(int is_early) /* Activate test with initial breakpoint */ if (!is_early) kgdb_breakpoint(); - probe_kernel_read(after, (char *)kgdbts_break_test, - BREAK_INSTR_SIZE); + copy_from_kernel_nofault(after, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); if (memcmp(before, after, BREAK_INSTR_SIZE)) { printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n"); panic("kgdb memory corruption"); diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index f02be0db335e..8d418abdd767 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -402,7 +402,7 @@ int __init hpfb_init(void) if (err) return err; - err = probe_kernel_read(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); + err = copy_from_kernel_nofault(&i, (unsigned char *)INTFBVADDR + DIO_IDOFF, 1); if (!err && (i == DIO_ID_FBUFFER) && topcat_sid_ok(sid = DIO_SECID(INTFBVADDR))) { if (!request_mem_region(INTFBPADDR, DIO_DEVSIZE, "Internal Topcat")) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 8ba492d44e68..e502414b3556 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,7 +512,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - if (probe_kernel_read(buf, (void *) start, tsz)) { + if (copy_from_kernel_nofault(buf, (void *)start, + tsz)) { if (clear_user(buffer, tsz)) { ret = -EFAULT; goto out; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7bcadca22100..70a3d9cd9113 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -301,13 +301,14 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size); +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); -extern long probe_kernel_read(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); +long copy_from_kernel_nofault(void *dst, const void *src, size_t size); +long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); +extern long probe_user_read(void *dst, const void __user *src, size_t size); +extern long notrace probe_user_write(void __user *dst, const void *src, + size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count); @@ -324,7 +325,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * Returns 0 on success, or -EFAULT. */ #define probe_kernel_address(addr, retval) \ - probe_kernel_read(&retval, addr, sizeof(retval)) + copy_from_kernel_nofault(&retval, addr, sizeof(retval)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index ccc0f98abdd4..bc8d25f2ac8a 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -169,18 +169,18 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); return err; } int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 4b280fc7dd67..61774aec46b4 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -247,7 +247,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count) */ tmp = buf + count; - err = probe_kernel_read(tmp, mem, count); + err = copy_from_kernel_nofault(tmp, mem, count); if (err) return NULL; while (count > 0) { @@ -283,7 +283,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count) *tmp_raw |= hex_to_bin(*tmp_hex--) << 4; } - return probe_kernel_write(mem, tmp_raw, count); + return copy_to_kernel_nofault(mem, tmp_raw, count); } /* @@ -335,7 +335,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count) size++; } - return probe_kernel_write(mem, c, size); + return copy_to_kernel_nofault(mem, c, size); } #if DBG_MAX_REG_NUM > 0 diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index ec190569f690..5c7949061671 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2326,7 +2326,8 @@ void kdb_ps1(const struct task_struct *p) int cpu; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return; cpu = kdb_process_cpu(p); diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index b8e6306e7e13..004c5b6c87f8 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -325,7 +325,7 @@ char *kdb_strdup(const char *str, gfp_t type) */ int kdb_getarea_size(void *res, unsigned long addr, size_t size) { - int ret = probe_kernel_read((char *)res, (char *)addr, size); + int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr); @@ -350,7 +350,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) */ int kdb_putarea_size(unsigned long addr, void *res, size_t size) { - int ret = probe_kernel_read((char *)addr, (char *)res, size); + int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); if (ret) { if (!KDB_STATE(SUPPRESS)) { kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr); @@ -624,7 +624,8 @@ char kdb_task_state_char (const struct task_struct *p) char state; unsigned long tmp; - if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long))) + if (!p || + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) return 'E'; cpu = kdb_process_cpu(p); diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e3d2d7fdf5e..132f84a5fde3 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -201,7 +201,7 @@ void *kthread_probe_data(struct task_struct *task) struct kthread *kthread = to_kthread(task); void *data = NULL; - probe_kernel_read(&data, &kthread->data, sizeof(data)); + copy_from_kernel_nofault(&data, &kthread->data, sizeof(data)); return data; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e729c9e587a0..204afc12425f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -196,7 +196,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) if (unlikely(ret < 0)) goto fail; - ret = probe_kernel_read(dst, unsafe_ptr, size); + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) goto fail; return ret; @@ -661,7 +661,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, copy_size = (fmt[i + 2] == '4') ? 4 : 16; - err = probe_kernel_read(bufs->buf[memcpy_cnt], + err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt], (void *) (long) args[fmt_cnt], copy_size); if (err < 0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6048f1be26d2..841c74863ff8 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1222,7 +1222,7 @@ fetch_store_strlen(unsigned long addr) #endif do { - ret = probe_kernel_read(&c, (u8 *)addr + len, 1); + ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1); len++; } while (c && ret == 0 && len < MAX_STRING_SIZE); @@ -1300,7 +1300,7 @@ probe_mem_read(void *dest, void *src, size_t size) if ((unsigned long)src < TASK_SIZE) return probe_mem_read_user(dest, src, size); #endif - return probe_kernel_read(dest, src, size); + return copy_from_kernel_nofault(dest, src, size); } /* Note that we don't verify it, since the code does not come from user space */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9fbe1e237563..c41c3c17b86a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4638,11 +4638,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) * Carefully copy the associated workqueue's workfn, name and desc. * Keep the original last '\0' in case the original is garbage. */ - probe_kernel_read(&fn, &worker->current_func, sizeof(fn)); - probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq)); - probe_kernel_read(&wq, &pwq->wq, sizeof(wq)); - probe_kernel_read(name, wq->name, sizeof(name) - 1); - probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); + copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); + copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); + copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); + copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); + copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { printk("%sWorkqueue: %s %ps", log_lvl, name, fn); diff --git a/mm/debug.c b/mm/debug.c index b5b1de8c71ac..4f376514744d 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -120,9 +120,9 @@ void __dump_page(struct page *page, const char *reason) * mapping can be invalid pointer and we don't want to crash * accessing it, so probe everything depending on it carefully */ - if (probe_kernel_read(&host, &mapping->host, + if (copy_from_kernel_nofault(&host, &mapping->host, sizeof(struct inode *)) || - probe_kernel_read(&a_ops, &mapping->a_ops, + copy_from_kernel_nofault(&a_ops, &mapping->a_ops, sizeof(struct address_space_operations *))) { pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n"); goto out_mapping; @@ -133,7 +133,7 @@ void __dump_page(struct page *page, const char *reason) goto out_mapping; } - if (probe_kernel_read(&dentry_first, + if (copy_from_kernel_nofault(&dentry_first, &host->i_dentry.first, sizeof(struct hlist_node *))) { pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n", a_ops, host); @@ -146,7 +146,7 @@ void __dump_page(struct page *page, const char *reason) } dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); - if (probe_kernel_read(&dentry, dentry_ptr, + if (copy_from_kernel_nofault(&dentry, dentry_ptr, sizeof(struct dentry))) { pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n", a_ops, dentry_ptr); diff --git a/mm/maccess.c b/mm/maccess.c index 88845eda5047..cc5d8c6233c0 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -6,14 +6,15 @@ #include #include -bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, + size_t size) { return true; } #ifdef HAVE_GET_KERNEL_NOFAULT -#define probe_kernel_read_loop(dst, src, len, type, err_label) \ +#define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -21,25 +22,25 @@ bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size) len -= sizeof(type); \ } -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); - probe_kernel_read_loop(dst, src, size, u64, Efault); - probe_kernel_read_loop(dst, src, size, u32, Efault); - probe_kernel_read_loop(dst, src, size, u16, Efault); - probe_kernel_read_loop(dst, src, size, u8, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); -#define probe_kernel_write_loop(dst, src, len, type, err_label) \ +#define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ @@ -47,13 +48,13 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); len -= sizeof(type); \ } -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { pagefault_disable(); - probe_kernel_write_loop(dst, src, size, u64, Efault); - probe_kernel_write_loop(dst, src, size, u32, Efault); - probe_kernel_write_loop(dst, src, size, u16, Efault); - probe_kernel_write_loop(dst, src, size, u8, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); + copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: @@ -67,7 +68,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); @@ -87,7 +88,7 @@ Efault: } #else /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_kernel_read(): safely attempt to read from kernel-space + * copy_from_kernel_nofault(): safely attempt to read from kernel-space * @dst: pointer to the buffer that shall take the data * @src: address to read from * @size: size of the data chunk @@ -98,15 +99,15 @@ Efault: * * We ensure that the copy_from_user is executed in atomic context so that * do_page_fault() doesn't attempt to take mmap_lock. This makes - * probe_kernel_read() suitable for use within regions where the caller + * copy_from_kernel_nofault() suitable for use within regions where the caller * already holds mmap_lock, or other locks which nest inside mmap_lock. */ -long probe_kernel_read(void *dst, const void *src, size_t size) +long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); - if (!probe_kernel_read_allowed(src, size)) + if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; set_fs(KERNEL_DS); @@ -120,10 +121,10 @@ long probe_kernel_read(void *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_kernel_read); +EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); /** - * probe_kernel_write(): safely attempt to write to a location + * copy_to_kernel_nofault(): safely attempt to write to a location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_kernel_write(void *dst, const void *src, size_t size) +long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -174,7 +175,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) if (unlikely(count <= 0)) return 0; - if (!probe_kernel_read_allowed(unsafe_addr, count)) + if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; set_fs(KERNEL_DS); diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 5e313fa93276..2a99df7beeb3 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -25,7 +25,7 @@ void rodata_test(void) } /* test 2: write to the variable; this should fault */ - if (!probe_kernel_write((void *)&rodata_test_data, + if (!copy_to_kernel_nofault((void *)&rodata_test_data, (void *)&zero, sizeof(zero))) { pr_err("test data was not read only\n"); return; diff --git a/mm/slub.c b/mm/slub.c index b8f798b50d44..fe81773fd97e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -292,7 +292,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) return get_freepointer(s, object); freepointer_addr = (unsigned long)object + s->offset; - probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p)); + copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p)); return freelist_ptr(s, p, freepointer_addr); } -- cgit v1.2.3 From c0ee37e85e0e47402b8bbe35b6cec8e06937ca58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:54 +0200 Subject: maccess: rename probe_user_{read,write} to copy_{from,to}_user_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/process.c | 3 ++- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++-- arch/powerpc/lib/inst.c | 6 +++--- arch/powerpc/oprofile/backtrace.c | 6 ++++-- arch/powerpc/perf/callchain_32.c | 2 +- arch/powerpc/perf/callchain_64.c | 2 +- arch/powerpc/perf/core-book3s.c | 3 ++- arch/powerpc/sysdev/fsl_pci.c | 4 ++-- include/linux/uaccess.h | 4 ++-- kernel/trace/bpf_trace.c | 4 ++-- kernel/trace/trace_kprobe.c | 2 +- mm/maccess.c | 12 ++++++------ 12 files changed, 28 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..d4d0d1048500 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1305,7 +1305,8 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { + if (copy_from_user_nofault(&instr, (void __user *)pc, + sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 3cb0c9843d01..e738ea652192 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -64,9 +64,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, isync(); if (is_load) - ret = probe_user_read(to, (const void __user *)from, n); + ret = copy_from_user_nofault(to, (const void __user *)from, n); else - ret = probe_user_write((void __user *)to, from, n); + ret = copy_to_user_nofault((void __user *)to, from, n); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 6c7a20af9fd6..9cc17eb62462 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -15,11 +15,11 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_user_read(&suffix, (void __user *)nip + 4, 4); + err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -51,7 +51,7 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6f347fa29f41..9db7ada79d10 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -33,7 +33,8 @@ static unsigned int user_getsp32(unsigned int sp, int is_first) * which means that we've done all that we can do from * interrupt context. */ - if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)p, + sizeof(stack_frame))) return 0; if (!is_first) @@ -51,7 +52,8 @@ static unsigned long user_getsp64(unsigned long sp, int is_first) { unsigned long stack_frame[3]; - if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)sp, + sizeof(stack_frame))) return 0; if (!is_first) diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index f7d888d39cd3..542e68b8eae0 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -44,7 +44,7 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ((unsigned long)ptr & 3)) return -EFAULT; - rc = probe_user_read(ret, ptr, sizeof(*ret)); + rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); if (IS_ENABLED(CONFIG_PPC64) && rc) return read_user_stack_slow(ptr, ret, 4); diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 814d1c2c2b9c..fa2a1b83b9b0 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -50,7 +50,7 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) ((unsigned long)ptr & 7)) return -EFAULT; - if (!probe_user_read(ret, ptr, sizeof(*ret))) + if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) return 0; return read_user_stack_slow(ptr, ret, 8); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index efe97ff82557..cd6a742ac6ef 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -426,7 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Userspace: need copy instruction here then translate it */ - if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) + if (copy_from_user_nofault(&instr, (unsigned int __user *)addr, + sizeof(instr))) return 0; target = branch_target((struct ppc_inst *)&instr); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4a8874bc1057..73fa37ca40ef 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1066,8 +1066,8 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) - ret = probe_user_read(&inst, (void __user *)regs->nip, - sizeof(inst)); + ret = copy_from_user_nofault(&inst, + (void __user *)regs->nip, sizeof(inst)); else ret = probe_kernel_address((void *)regs->nip, inst); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 70a3d9cd9113..bef48da242cc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -306,8 +306,8 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); long notrace copy_to_kernel_nofault(void *dst, const void *src, size_t size); -extern long probe_user_read(void *dst, const void __user *src, size_t size); -extern long notrace probe_user_write(void __user *dst, const void *src, +long copy_from_user_nofault(void *dst, const void __user *src, size_t size); +long notrace copy_to_user_nofault(void __user *dst, const void *src, size_t size); long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 204afc12425f..dc05626979b8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -141,7 +141,7 @@ bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr) { int ret; - ret = probe_user_read(dst, unsafe_ptr, size); + ret = copy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); return ret; @@ -326,7 +326,7 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src, if (unlikely(!nmi_uaccess_okay())) return -EPERM; - return probe_user_write(unsafe_ptr, src, size); + return copy_to_user_nofault(unsafe_ptr, src, size); } static const struct bpf_func_proto bpf_probe_write_user_proto = { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 841c74863ff8..aefb6065b508 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1290,7 +1290,7 @@ probe_mem_read_user(void *dest, void *src, size_t size) { const void __user *uaddr = (__force const void __user *)src; - return probe_user_read(dest, uaddr, size); + return copy_from_user_nofault(dest, uaddr, size); } static nokprobe_inline int diff --git a/mm/maccess.c b/mm/maccess.c index cc5d8c6233c0..f98ff91e32c6 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -194,7 +194,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) #endif /* HAVE_GET_KERNEL_NOFAULT */ /** - * probe_user_read(): safely attempt to read from a user-space location + * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk @@ -202,7 +202,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_read(void *dst, const void __user *src, size_t size) +long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -219,10 +219,10 @@ long probe_user_read(void *dst, const void __user *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_read); +EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** - * probe_user_write(): safely attempt to write to a user-space location + * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(probe_user_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_user_write(void __user *dst, const void *src, size_t size) +long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; mm_segment_t old_fs = get_fs(); @@ -247,7 +247,7 @@ long probe_user_write(void __user *dst, const void *src, size_t size) return -EFAULT; return 0; } -EXPORT_SYMBOL_GPL(probe_user_write); +EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user -- cgit v1.2.3 From aadf9dcef9d4cd68c73a4ab934f93319c4becc47 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Jun 2020 22:50:33 +0100 Subject: rxrpc: Fix trace string The trace symbol printer (__print_symbolic()) ignores symbols that map to an empty string and prints the hex value instead. Fix the symbol for rxrpc_cong_no_change to " -" instead of "" to avoid this. Fixes: b54a134a7de4 ("rxrpc: Fix handling of enums-to-string translation in tracing") Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index ba9efdc848f9..059b6e45a028 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -400,7 +400,7 @@ enum rxrpc_tx_point { EM(rxrpc_cong_begin_retransmission, " Retrans") \ EM(rxrpc_cong_cleared_nacks, " Cleared") \ EM(rxrpc_cong_new_low_nack, " NewLowN") \ - EM(rxrpc_cong_no_change, "") \ + EM(rxrpc_cong_no_change, " -") \ EM(rxrpc_cong_progress, " Progres") \ EM(rxrpc_cong_retransmit_again, " ReTxAgn") \ EM(rxrpc_cong_rtt_window_end, " RttWinE") \ -- cgit v1.2.3 From 34c86f4c4a7be3b3e35aa48bd18299d4c756064d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 8 Jun 2020 16:48:43 +1000 Subject: crypto: af_alg - fix use-after-free in af_alg_accept() due to bh_lock_sock() The locking in af_alg_release_parent is broken as the BH socket lock can only be taken if there is a code-path to handle the case where the lock is owned by process-context. Instead of adding such handling, we can fix this by changing the ref counts to atomic_t. This patch also modifies the main refcnt to include both normal and nokey sockets. This way we don't have to fudge the nokey ref count when a socket changes from nokey to normal. Credits go to Mauricio Faria de Oliveira who diagnosed this bug and sent a patch for it: https://lore.kernel.org/linux-crypto/20200605161657.535043-1-mfo@canonical.com/ Reported-by: Brian Moyles Reported-by: Mauricio Faria de Oliveira Fixes: 37f96694cf73 ("crypto: af_alg - Use bh_lock_sock in...") Cc: Signed-off-by: Herbert Xu --- crypto/af_alg.c | 26 +++++++++++--------------- crypto/algif_aead.c | 9 +++------ crypto/algif_hash.c | 9 +++------ crypto/algif_skcipher.c | 9 +++------ include/crypto/if_alg.h | 4 ++-- 5 files changed, 22 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index b1cd3535c525..28fc323e3fe3 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -128,21 +128,15 @@ EXPORT_SYMBOL_GPL(af_alg_release); void af_alg_release_parent(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); - unsigned int nokey = ask->nokey_refcnt; - bool last = nokey && !ask->refcnt; + unsigned int nokey = atomic_read(&ask->nokey_refcnt); sk = ask->parent; ask = alg_sk(sk); - local_bh_disable(); - bh_lock_sock(sk); - ask->nokey_refcnt -= nokey; - if (!last) - last = !--ask->refcnt; - bh_unlock_sock(sk); - local_bh_enable(); + if (nokey) + atomic_dec(&ask->nokey_refcnt); - if (last) + if (atomic_dec_and_test(&ask->refcnt)) sock_put(sk); } EXPORT_SYMBOL_GPL(af_alg_release_parent); @@ -187,7 +181,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EBUSY; lock_sock(sk); - if (ask->refcnt | ask->nokey_refcnt) + if (atomic_read(&ask->refcnt)) goto unlock; swap(ask->type, type); @@ -236,7 +230,7 @@ static int alg_setsockopt(struct socket *sock, int level, int optname, int err = -EBUSY; lock_sock(sk); - if (ask->refcnt) + if (atomic_read(&ask->refcnt) != atomic_read(&ask->nokey_refcnt)) goto unlock; type = ask->type; @@ -301,12 +295,14 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) if (err) goto unlock; - if (nokey || !ask->refcnt++) + if (atomic_inc_return_relaxed(&ask->refcnt) == 1) sock_hold(sk); - ask->nokey_refcnt += nokey; + if (nokey) { + atomic_inc(&ask->nokey_refcnt); + atomic_set(&alg_sk(sk2)->nokey_refcnt, 1); + } alg_sk(sk2)->parent = sk; alg_sk(sk2)->type = type; - alg_sk(sk2)->nokey_refcnt = nokey; newsock->ops = type->ops; newsock->state = SS_CONNECTED; diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index eb1910b6d434..0ae000a61c7f 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -384,7 +384,7 @@ static int aead_check_key(struct socket *sock) struct alg_sock *ask = alg_sk(sk); lock_sock(sk); - if (ask->refcnt) + if (!atomic_read(&ask->nokey_refcnt)) goto unlock_child; psk = ask->parent; @@ -396,11 +396,8 @@ static int aead_check_key(struct socket *sock) if (crypto_aead_get_flags(tfm->aead) & CRYPTO_TFM_NEED_KEY) goto unlock; - if (!pask->refcnt++) - sock_hold(psk); - - ask->refcnt = 1; - sock_put(psk); + atomic_dec(&pask->nokey_refcnt); + atomic_set(&ask->nokey_refcnt, 0); err = 0; diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index da1ffa4f7f8d..e71727c25a7d 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -301,7 +301,7 @@ static int hash_check_key(struct socket *sock) struct alg_sock *ask = alg_sk(sk); lock_sock(sk); - if (ask->refcnt) + if (!atomic_read(&ask->nokey_refcnt)) goto unlock_child; psk = ask->parent; @@ -313,11 +313,8 @@ static int hash_check_key(struct socket *sock) if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) goto unlock; - if (!pask->refcnt++) - sock_hold(psk); - - ask->refcnt = 1; - sock_put(psk); + atomic_dec(&pask->nokey_refcnt); + atomic_set(&ask->nokey_refcnt, 0); err = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 4c3bdffe0c3a..ec5567c87a6d 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -211,7 +211,7 @@ static int skcipher_check_key(struct socket *sock) struct alg_sock *ask = alg_sk(sk); lock_sock(sk); - if (ask->refcnt) + if (!atomic_read(&ask->nokey_refcnt)) goto unlock_child; psk = ask->parent; @@ -223,11 +223,8 @@ static int skcipher_check_key(struct socket *sock) if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) goto unlock; - if (!pask->refcnt++) - sock_hold(psk); - - ask->refcnt = 1; - sock_put(psk); + atomic_dec(&pask->nokey_refcnt); + atomic_set(&ask->nokey_refcnt, 0); err = 0; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 56527c85d122..088c1ded2714 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -29,8 +29,8 @@ struct alg_sock { struct sock *parent; - unsigned int refcnt; - unsigned int nokey_refcnt; + atomic_t refcnt; + atomic_t nokey_refcnt; const struct af_alg_type *type; void *private; -- cgit v1.2.3 From f097eb38f71391ff2cf078788bad5a00eb3bd96a Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 9 Jun 2020 10:17:26 +0200 Subject: timekeeping: Fix kerneldoc system_device_crosststamp & al Make kernel doc comments actually work and fix the syncronized typo. [ tglx: Added the missing /** and fixed up formatting ] Signed-off-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200609081726.5657-1-kurt@linutronix.de --- include/linux/timekeeping.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b27e2ffa96c1..d5471d6fa778 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -222,9 +222,9 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* +/** * struct system_time_snapshot - simultaneous raw/real time capture with - * counter value + * counter value * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time @@ -239,9 +239,9 @@ struct system_time_snapshot { u8 cs_was_changed_seq; }; -/* +/** * struct system_device_crosststamp - system/device cross-timestamp - * (syncronized capture) + * (synchronized capture) * @device: Device time * @sys_realtime: Realtime simultaneous with device time * @sys_monoraw: Monotonic raw simultaneous with device time @@ -252,12 +252,12 @@ struct system_device_crosststamp { ktime_t sys_monoraw; }; -/* +/** * struct system_counterval_t - system counter value with the pointer to the - * corresponding clocksource + * corresponding clocksource * @cycles: System counter value * @cs: Clocksource corresponding to system counter value. Used by - * timekeeping code to verify comparibility of two cycle values + * timekeeping code to verify comparibility of two cycle values */ struct system_counterval_t { u64 cycles; -- cgit v1.2.3 From ab183d460daac6292cb0cfd989d88b37b2437844 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 16 Jun 2020 13:45:36 +0300 Subject: RDMA/mlx5: Add missed RST2INIT and INIT2INIT steps during ECE handshake Missed steps during ECE handshake left userspace application with less options for the ECE handshake. Pass ECE options in the additional transitions. Fixes: 50aec2c3135e ("RDMA/mlx5: Return ECE data after modify QP") Link: https://lore.kernel.org/r/20200616104536.2426384-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qpc.c | 8 ++++++++ include/linux/mlx5/mlx5_ifc.h | 10 ++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index c19d91d6dce8..7c3968ef9cd1 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -346,6 +346,9 @@ static int get_ece_from_mbox(void *out, u16 opcode) int ece = 0; switch (opcode) { + case MLX5_CMD_OP_INIT2INIT_QP: + ece = MLX5_GET(init2init_qp_out, out, ece); + break; case MLX5_CMD_OP_INIT2RTR_QP: ece = MLX5_GET(init2rtr_qp_out, out, ece); break; @@ -355,6 +358,9 @@ static int get_ece_from_mbox(void *out, u16 opcode) case MLX5_CMD_OP_RTS2RTS_QP: ece = MLX5_GET(rts2rts_qp_out, out, ece); break; + case MLX5_CMD_OP_RST2INIT_QP: + ece = MLX5_GET(rst2init_qp_out, out, ece); + break; default: break; } @@ -406,6 +412,7 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, return -ENOMEM; MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); + MLX5_SET(rst2init_qp_in, mbox->in, ece, ece); break; case MLX5_CMD_OP_INIT2RTR_QP: if (MBOX_ALLOC(mbox, init2rtr_qp)) @@ -439,6 +446,7 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, return -ENOMEM; MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, opt_param_mask, qpc, uid); + MLX5_SET(init2init_qp_in, mbox->in, ece, ece); break; default: return -EINVAL; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 116bd9bb347f..ca1887dd0423 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4283,7 +4283,8 @@ struct mlx5_ifc_rst2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rst2init_qp_in_bits { @@ -4300,7 +4301,7 @@ struct mlx5_ifc_rst2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -6619,7 +6620,8 @@ struct mlx5_ifc_init2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_init2init_qp_in_bits { @@ -6636,7 +6638,7 @@ struct mlx5_ifc_init2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; -- cgit v1.2.3 From b5292111de9bb70cba3489075970889765302136 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 3 Jun 2020 15:48:19 +0800 Subject: libata: Use per port sync for detach Commit 130f4caf145c ("libata: Ensure ata_port probe has completed before detach") may cause system freeze during suspend. Using async_synchronize_full() in PM callbacks is wrong, since async callbacks that are already scheduled may wait for not-yet-scheduled callbacks, causes a circular dependency. Instead of using big hammer like async_synchronize_full(), use async cookie to make sure port probe are synced, without affecting other scheduled PM callbacks. Fixes: 130f4caf145c ("libata: Ensure ata_port probe has completed before detach") Suggested-by: John Garry Signed-off-by: Kai-Heng Feng Tested-by: John Garry BugLink: https://bugs.launchpad.net/bugs/1867983 Signed-off-by: Jens Axboe --- drivers/ata/libata-core.c | 11 +++++------ include/linux/libata.h | 3 +++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 69361ec43db5..b1cd4d97bc2a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -5778,7 +5777,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) /* perform each probe asynchronously */ for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; - async_schedule(async_port_probe, ap); + ap->cookie = async_schedule(async_port_probe, ap); } return 0; @@ -5920,11 +5919,11 @@ void ata_host_detach(struct ata_host *host) { int i; - /* Ensure ata_port probe has completed */ - async_synchronize_full(); - - for (i = 0; i < host->n_ports; i++) + for (i = 0; i < host->n_ports; i++) { + /* Ensure ata_port probe has completed */ + async_synchronize_cookie(host->ports[i]->cookie + 1); ata_port_detach(host->ports[i]); + } /* the host is dead now, dissociate ACPI */ ata_acpi_dissociate(host); diff --git a/include/linux/libata.h b/include/linux/libata.h index af832852e620..8a4843704d28 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -22,6 +22,7 @@ #include #include #include +#include /* * Define if arch has non-standard setup. This is a _PCI_ standard @@ -872,6 +873,8 @@ struct ata_port { struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; + async_cookie_t cookie; + int em_message_type; void *private_data; -- cgit v1.2.3 From 3373a3461aa15b7f9a871fa4cb2c9ef21ac20b47 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Thu, 18 Jun 2020 12:21:38 +0800 Subject: block: make function 'kill_bdev' static kill_bdev does not have any external user, so make it static. Signed-off-by: Zheng Bin Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- fs/block_dev.c | 5 ++--- include/linux/fs.h | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/block_dev.c b/fs/block_dev.c index 08c87db3a92b..0ae656e022fd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -75,7 +75,7 @@ static void bdev_write_inode(struct block_device *bdev) } /* Kill _all_ buffers and pagecache , dirty or not.. */ -void kill_bdev(struct block_device *bdev) +static void kill_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; @@ -84,8 +84,7 @@ void kill_bdev(struct block_device *bdev) invalidate_bh_lrus(); truncate_inode_pages(mapping, 0); -} -EXPORT_SYMBOL(kill_bdev); +} /* Invalidate clean unused buffers and pagecache. */ void invalidate_bdev(struct block_device *bdev) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c4ab4dc1cd7..3f881a892ea7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2592,7 +2592,6 @@ extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern int sync_blockdev(struct block_device *bdev); -extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern void emergency_thaw_bdev(struct super_block *sb); @@ -2608,7 +2607,6 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void kill_bdev(struct block_device *bdev) {} static inline void invalidate_bdev(struct block_device *bdev) {} static inline struct super_block *freeze_bdev(struct block_device *sb) -- cgit v1.2.3 From 670d0a4b10704667765f7d18f7592993d02783aa Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 18 Jun 2020 00:02:26 +0200 Subject: sparse: use identifiers to define address spaces Currently, address spaces in warnings are displayed as '' with 'X' being the address space's arbitrary number. But since sparse v0.6.0-rc1 (late December 2018), sparse allows you to define the address spaces using an identifier instead of a number. This identifier is then directly used in the warnings. So, use the identifiers '__user', '__iomem', '__percpu' & '__rcu' for the corresponding address spaces. The default address space, __kernel, being not displayed in warnings, stays defined as '0'. With this change, warnings that used to be displayed as: cast removes address space '' of expression ... void [noderef] * will now be displayed as: cast removes address space '__user' of expression ... void [noderef] __iomem * This also moves the __kernel annotation to be the first one, since it is quite different from the others because it's the default one, and so: - it's never displayed - it's normally not needed, nor in type annotations, nor in cast between address spaces. The only time it's needed is when it's combined with a typeof to express "the same type as this one but without the address space" - it can't be defined with a name, '0' must be used. So, it seemed strange to me to have it in the middle of the other ones. Signed-off-by: Luc Van Oostenryck Acked-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- include/linux/compiler_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 21aed0981edf..e368384445b6 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -5,20 +5,20 @@ #ifndef __ASSEMBLY__ #ifdef __CHECKER__ -# define __user __attribute__((noderef, address_space(1))) # define __kernel __attribute__((address_space(0))) +# define __user __attribute__((noderef, address_space(__user))) # define __safe __attribute__((safe)) # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) -# define __iomem __attribute__((noderef, address_space(2))) +# define __iomem __attribute__((noderef, address_space(__iomem))) # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -# define __percpu __attribute__((noderef, address_space(3))) -# define __rcu __attribute__((noderef, address_space(4))) +# define __percpu __attribute__((noderef, address_space(__percpu))) +# define __rcu __attribute__((noderef, address_space(__rcu))) # define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -- cgit v1.2.3 From 25f12ae45fc1931a1dce3cc59f9989a9d87834b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:55 +0200 Subject: maccess: rename probe_kernel_address to get_kernel_nofault Better describe what this helper does, and match the naming of copy_from_kernel_nofault. Also switch the argument order around, so that it acts and looks like get_user(). Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/traps.c | 4 ++-- arch/s390/mm/fault.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/x86/kernel/probe_roms.c | 20 ++++++++++---------- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/fault.c | 6 +++--- arch/x86/pci/pcbios.c | 2 +- include/linux/uaccess.h | 10 +++++----- lib/test_lockup.c | 6 +++--- 20 files changed, 40 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 65a3b1e75480..49ce15c3612d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (probe_kernel_address((unsigned *)pc, bkpt)) + if (get_kernel_nofault(bkpt, (unsigned *)pc)) return 0; return bkpt == insn; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 84718eddae60..81a627e6e1c5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -774,7 +774,7 @@ static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_arm(instr); @@ -789,7 +789,7 @@ static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_thumb16(instr); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..227b2d9bae3d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -376,7 +376,7 @@ static int call_undef_hook(struct pt_regs *regs) if (!user_mode(regs)) { __le32 instr_le; - if (probe_kernel_address((__force __le32 *)pc, instr_le)) + if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index cea15f2dd38d..ad4fc06e5f4b 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->ip, p)) + if (!get_kernel_nofault(p, &desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 230a6422b99f..6c435dbccca0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->addr, p)) + if (!get_kernel_nofault(p, &desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d19871763ed4..bd311616fca8 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!probe_kernel_address(&desc->funcaddr, p)) + if (!get_kernel_nofault(p, &desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 652b2852bea3..e14a1862a3ca 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = probe_kernel_address(addr, instr); + err = get_kernel_nofault(instr, addr); if (err) return err; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 6f96f65ebfe8..9cc792a3a6a9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -289,7 +289,7 @@ int kprobe_handler(struct pt_regs *regs) if (!p) { unsigned int instr; - if (probe_kernel_address(addr, instr)) + if (get_kernel_nofault(instr, addr)) goto no_kprobe; if (instr != BREAKPOINT_INSTRUCTION) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d4d0d1048500..30955a0c32d0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1271,7 +1271,7 @@ static void show_instructions(struct pt_regs *regs) #endif if (!__kernel_text_address(pc) || - probe_kernel_address((const void *)pc, instr)) { + get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 73fa37ca40ef..040b9d01c079 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1069,7 +1069,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = copy_from_user_nofault(&inst, (void __user *)regs->nip, sizeof(inst)); else - ret = probe_kernel_address((void *)regs->nip, inst); + ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index a21fb21883e7..c3275f42d1ac 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -62,7 +62,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr) unsigned int rs1_num, rs2_num; int op_code; - if (probe_kernel_address((void *)pc, op_code)) + if (get_kernel_nofault(op_code, (void *)pc)) return -EINVAL; if ((op_code & __INSN_LENGTH_MASK) != __INSN_LENGTH_GE_32) { if (is_c_jalr_insn(op_code) || is_c_jr_insn(op_code)) { @@ -146,7 +146,7 @@ int do_single_step(struct pt_regs *regs) return error; /* Store the op code in the stepped address */ - error = probe_kernel_address((void *)addr, stepped_opcode); + error = get_kernel_nofault(stepped_opcode, (void *)addr); if (error) return error; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index ecec1778e3a4..7d95cce5e47c 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -137,7 +137,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) { bug_insn_t insn; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; return GET_INSN_LENGTH(insn); @@ -165,7 +165,7 @@ int is_valid_bugaddr(unsigned long pc) if (pc < VMALLOC_START) return 0; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) return (insn == __BUG_INSN_32); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6a24751557f0..d53c2e2ea1fd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -105,7 +105,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long asce, unsigned long address) diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index a33025451fcd..9c3d32b80038 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -118,7 +118,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < PAGE_OFFSET) return 0; - if (probe_kernel_address((insn_size_t *)addr, opcode)) + if (get_kernel_nofault(opcode, (insn_size_t *)addr)) return 0; if (opcode == TRAPA_BUG_OPCODE) return 1; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index ee0286390a4c..65b0dd2bf25c 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -99,7 +99,7 @@ static bool probe_list(struct pci_dev *pdev, unsigned short vendor, unsigned short device; do { - if (probe_kernel_address(rom_list, device) != 0) + if (get_kernel_nofault(device, rom_list) != 0) device = 0; if (device && match_id(pdev, vendor, device)) @@ -125,13 +125,13 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; rom = isa_bus_to_virt(res->start); - if (probe_kernel_address(rom + 0x18, offset) != 0) + if (get_kernel_nofault(offset, rom + 0x18) != 0) continue; - if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) + if (get_kernel_nofault(vendor, rom + offset + 0x4) != 0) continue; - if (probe_kernel_address(rom + offset + 0x6, device) != 0) + if (get_kernel_nofault(device, rom + offset + 0x6) != 0) continue; if (match_id(pdev, vendor, device)) { @@ -139,8 +139,8 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; } - if (probe_kernel_address(rom + offset + 0x8, list) == 0 && - probe_kernel_address(rom + offset + 0xc, rev) == 0 && + if (get_kernel_nofault(list, rom + offset + 0x8) == 0 && + get_kernel_nofault(rev, rom + offset + 0xc) == 0 && rev >= 3 && list && probe_list(pdev, vendor, rom + offset + list)) { oprom = res; @@ -183,14 +183,14 @@ static int __init romsignature(const unsigned char *rom) const unsigned short * const ptr = (const unsigned short *)rom; unsigned short sig; - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; + return get_kernel_nofault(sig, ptr) == 0 && sig == ROMSIGNATURE; } static int __init romchecksum(const unsigned char *rom, unsigned long length) { unsigned char sum, c; - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) + for (sum = 0; length && get_kernel_nofault(c, rom++) == 0; length--) sum += c; return !length && !sum; } @@ -211,7 +211,7 @@ void __init probe_roms(void) video_rom_resource.start = start; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ @@ -249,7 +249,7 @@ void __init probe_roms(void) if (!romsignature(rom)) continue; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7003f2e7b163..f9727b96961f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -91,7 +91,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < TASK_SIZE_MAX) return 0; - if (probe_kernel_address((unsigned short *)addr, ud)) + if (get_kernel_nofault(ud, (unsigned short *)addr)) return 0; return ud == INSN_UD0 || ud == INSN_UD2; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e996aa3833b8..1ead568c0101 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -99,7 +99,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) return 0; *prefetch = (instr_lo == 0xF) && @@ -133,7 +133,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) while (instr < max_instr) { unsigned char opcode; - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) break; instr++; @@ -301,7 +301,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long address) diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 9c97d814125e..4f15280732ed 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -302,7 +302,7 @@ static const struct pci_raw_ops *__init pci_find_bios(void) check <= (union bios32 *) __va(0xffff0); ++check) { long sig; - if (probe_kernel_address(&check->fields.signature, sig)) + if (get_kernel_nofault(sig, &check->fields.signature)) continue; if (check->fields.signature != BIOS32_SIGNATURE) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index bef48da242cc..a508a3c08879 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -318,14 +318,14 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** - * probe_kernel_address(): safely attempt to read from a location - * @addr: address to read from - * @retval: read into this variable + * get_kernel_nofault(): safely attempt to read from a location + * @val: read into this variable + * @ptr: address to read from * * Returns 0 on success, or -EFAULT. */ -#define probe_kernel_address(addr, retval) \ - copy_from_kernel_nofault(&retval, addr, sizeof(retval)) +#define get_kernel_nofault(val, ptr) \ + copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) diff --git a/lib/test_lockup.c b/lib/test_lockup.c index f258743a0d83..bd7c7ff39f6b 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -419,8 +419,8 @@ static bool test_kernel_ptr(unsigned long addr, int size) /* should be at least readable kernel address */ if (access_ok(ptr, 1) || access_ok(ptr + size - 1, 1) || - probe_kernel_address(ptr, buf) || - probe_kernel_address(ptr + size - 1, buf)) { + get_kernel_nofault(buf, ptr) || + get_kernel_nofault(buf, ptr + size - 1)) { pr_err("invalid kernel ptr: %#lx\n", addr); return true; } @@ -437,7 +437,7 @@ static bool __maybe_unused test_magic(unsigned long addr, int offset, if (!addr) return false; - if (probe_kernel_address(ptr, magic) || magic != expected) { + if (get_kernel_nofault(magic, ptr) || magic != expected) { pr_err("invalid magic at %#lx + %#x = %#x, expected %#x\n", addr, offset, magic, expected); return true; -- cgit v1.2.3 From f751820bc319a30d31a80fe511d88642aaefcdee Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 18 Jun 2020 13:07:13 -0600 Subject: vfio/type1: Fix migration info capability ID ID 1 is already used by the IOVA range capability, use ID 2. Reported-by: Liu Yi L Fixes: ad721705d09c ("vfio iommu: Add migration capability to report supported features") Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index eca6692667a3..920470502329 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1030,7 +1030,7 @@ struct vfio_iommu_type1_info_cap_iova_range { * size in bytes that can be used by user applications when getting the dirty * bitmap. */ -#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 struct vfio_iommu_type1_info_cap_migration { struct vfio_info_cap_header header; -- cgit v1.2.3 From 0c389d89abc28edf70ae847ee2fa55acb267b826 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Jun 2020 12:10:37 -0700 Subject: maccess: make get_kernel_nofault() check for minimal type compatibility Now that we've renamed probe_kernel_address() to get_kernel_nofault() and made it look and behave more in line with get_user(), some of the subtle type behavior differences end up being more obvious and possibly dangerous. When you do get_user(val, user_ptr); the type of the access comes from the "user_ptr" part, and the above basically acts as val = *user_ptr; by design (except, of course, for the fact that the actual dereference is done with a user access). Note how in the above case, the type of the end result comes from the pointer argument, and then the value is cast to the type of 'val' as part of the assignment. So the type of the pointer is ultimately the more important type both for the access itself. But 'get_kernel_nofault()' may now _look_ similar, but it behaves very differently. When you do get_kernel_nofault(val, kernel_ptr); it behaves like val = *(typeof(val) *)kernel_ptr; except, of course, for the fact that the actual dereference is done with exception handling so that a faulting access is suppressed and returned as the error code. But note how different the casting behavior of the two superficially similar accesses are: one does the actual access in the size of the type the pointer points to, while the other does the access in the size of the target, and ignores the pointer type entirely. Actually changing get_kernel_nofault() to act like get_user() is almost certainly the right thing to do eventually, but in the meantime this patch adds logit to at least verify that the pointer type is compatible with the type of the result. In many cases, this involves just casting the pointer to 'void *' to make it obvious that the type of the pointer is not the important part. It's not how 'get_user()' acts, but at least the behavioral difference is now obvious and explicit. Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/x86/kernel/probe_roms.c | 4 ++-- include/linux/uaccess.h | 6 ++++-- 7 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 49ce15c3612d..17d5a785df28 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (get_kernel_nofault(bkpt, (unsigned *)pc)) + if (get_kernel_nofault(bkpt, (void *)pc)) return 0; return bkpt == insn; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index ad4fc06e5f4b..3a033d2008b3 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->ip)) + if (!get_kernel_nofault(p, (void *)&desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 6c435dbccca0..b7abb12edd3a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->addr)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index bd311616fca8..324d7b298ec3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->funcaddr)) + if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e14a1862a3ca..409080208a6c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, addr); + err = get_kernel_nofault(instr, (unsigned *) addr); if (err) return err; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 65b0dd2bf25c..9e1def3744f2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -94,7 +94,7 @@ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short } static bool probe_list(struct pci_dev *pdev, unsigned short vendor, - const unsigned char *rom_list) + const void *rom_list) { unsigned short device; @@ -119,7 +119,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { struct resource *res = &adapter_rom_resources[i]; unsigned short offset, vendor, device, list, rev; - const unsigned char *rom; + const void *rom; if (res->end == 0) break; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a508a3c08879..0a76ddc07d59 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -324,8 +324,10 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); * * Returns 0 on success, or -EFAULT. */ -#define get_kernel_nofault(val, ptr) \ - copy_from_kernel_nofault(&(val), (ptr), sizeof(val)) +#define get_kernel_nofault(val, ptr) ({ \ + const typeof(val) *__gk_ptr = (ptr); \ + copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ +}) #ifndef user_access_begin #define user_access_begin(ptr,len) access_ok(ptr, len) -- cgit v1.2.3 From fb7861d14c8d7edac65b2fcb6e8031cb138457b2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 15:52:05 +0000 Subject: net: core: reduce recursion limit value In the current code, ->ndo_start_xmit() can be executed recursively only 10 times because of stack memory. But, in the case of the vxlan, 10 recursion limit value results in a stack overflow. In the current code, the nested interface is limited by 8 depth. There is no critical reason that the recursion limitation value should be 10. So, it would be good to be the same value with the limitation value of nesting interface depth. Test commands: ip link add vxlan10 type vxlan vni 10 dstport 4789 srcport 4789 4789 ip link set vxlan10 up ip a a 192.168.10.1/24 dev vxlan10 ip n a 192.168.10.2 dev vxlan10 lladdr fc:22:33:44:55:66 nud permanent for i in {9..0} do let A=$i+1 ip link add vxlan$i type vxlan vni $i dstport 4789 srcport 4789 4789 ip link set vxlan$i up ip a a 192.168.$i.1/24 dev vxlan$i ip n a 192.168.$i.2 dev vxlan$i lladdr fc:22:33:44:55:66 nud permanent bridge fdb add fc:22:33:44:55:66 dev vxlan$A dst 192.168.$i.2 self done hping3 192.168.10.2 -2 -d 60000 Splat looks like: [ 103.814237][ T1127] ============================================================================= [ 103.871955][ T1127] BUG kmalloc-2k (Tainted: G B ): Padding overwritten. 0x00000000897a2e4f-0x000 [ 103.873187][ T1127] ----------------------------------------------------------------------------- [ 103.873187][ T1127] [ 103.874252][ T1127] INFO: Slab 0x000000005cccc724 objects=5 used=5 fp=0x0000000000000000 flags=0x10000000001020 [ 103.881323][ T1127] CPU: 3 PID: 1127 Comm: hping3 Tainted: G B 5.7.0+ #575 [ 103.882131][ T1127] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 103.883006][ T1127] Call Trace: [ 103.883324][ T1127] dump_stack+0x96/0xdb [ 103.883716][ T1127] slab_err+0xad/0xd0 [ 103.884106][ T1127] ? _raw_spin_unlock+0x1f/0x30 [ 103.884620][ T1127] ? get_partial_node.isra.78+0x140/0x360 [ 103.885214][ T1127] slab_pad_check.part.53+0xf7/0x160 [ 103.885769][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.886316][ T1127] check_slab+0x97/0xb0 [ 103.886763][ T1127] alloc_debug_processing+0x84/0x1a0 [ 103.887308][ T1127] ___slab_alloc+0x5a5/0x630 [ 103.887765][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.888265][ T1127] ? lock_downgrade+0x730/0x730 [ 103.888762][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.889244][ T1127] ? __slab_alloc+0x3e/0x80 [ 103.889675][ T1127] __slab_alloc+0x3e/0x80 [ 103.890108][ T1127] __kmalloc_node_track_caller+0xc7/0x420 [ ... ] Fixes: 11a766ce915f ("net: Increase xmit RECURSION_LIMIT to 10.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6fc613ed8eae..39e28e11863c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3157,7 +3157,7 @@ static inline int dev_recursion_level(void) return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 10 +#define XMIT_RECURSION_LIMIT 8 static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > -- cgit v1.2.3 From 390fd0475af565d2fc31de98fcc84f3c2922e008 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:15 +0200 Subject: i2c: remove deprecated i2c_new_device API All in-tree users have been converted to the new i2c_new_client_device function, so remove this deprecated one. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 25 ------------------------- include/linux/i2c.h | 8 +++----- 2 files changed, 3 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index d1f278f73011..26f03a14a478 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -815,31 +815,6 @@ out_err_silent: } EXPORT_SYMBOL_GPL(i2c_new_client_device); -/** - * i2c_new_device - instantiate an i2c device - * @adap: the adapter managing the device - * @info: describes one I2C device; bus_num is ignored - * Context: can sleep - * - * This deprecated function has the same functionality as - * @i2c_new_client_device, it just returns NULL instead of an ERR_PTR in case of - * an error for compatibility with current I2C API. It will be removed once all - * users are converted. - * - * This returns the new i2c client, which may be saved for later use with - * i2c_unregister_device(); or NULL to indicate an error. - */ -struct i2c_client * -i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) -{ - struct i2c_client *ret; - - ret = i2c_new_client_device(adap, info); - return IS_ERR(ret) ? NULL : ret; -} -EXPORT_SYMBOL_GPL(i2c_new_device); - - /** * i2c_unregister_device - reverse effect of i2c_new_*_device() * @client: value returned from i2c_new_*_device() diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c10617bb980a..b8b8963f8bb9 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -408,7 +408,7 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * that are present. This information is used to grow the driver model tree. * For mainboards this is done statically using i2c_register_board_info(); * bus numbers identify adapters that aren't yet available. For add-on boards, - * i2c_new_device() does this dynamically with the adapter already known. + * i2c_new_client_device() does this dynamically with the adapter already known. */ struct i2c_board_info { char type[I2C_NAME_SIZE]; @@ -439,13 +439,11 @@ struct i2c_board_info { #if IS_ENABLED(CONFIG_I2C) -/* Add-on boards should register/unregister their devices; e.g. a board +/* + * Add-on boards should register/unregister their devices; e.g. a board * with integrated I2C, a config eeprom, sensors, and a codec that's * used in conjunction with the primary hardware. */ -struct i2c_client * -i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); - struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); -- cgit v1.2.3 From 26f2eb27d081081dbea6d3c11602c9ece36f4d9c Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 18 Jun 2020 20:49:08 +0800 Subject: flow_offload: add flow_indr_block_cb_alloc/remove function Add flow_indr_block_cb_alloc/remove function for next fix patch. Signed-off-by: wenxu Signed-off-by: David S. Miller --- include/net/flow_offload.h | 13 +++++++++++++ net/core/flow_offload.c | 21 +++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index f2c8311a0433..bf4343042956 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -467,6 +467,12 @@ struct flow_block_cb { struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)); +struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, + void (*release)(void *cb_priv), + struct flow_block_offload *bo, + struct net_device *dev, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); void flow_block_cb_free(struct flow_block_cb *block_cb); struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block, @@ -488,6 +494,13 @@ static inline void flow_block_cb_remove(struct flow_block_cb *block_cb, list_move(&block_cb->list, &offload->cb_list); } +static inline void flow_indr_block_cb_remove(struct flow_block_cb *block_cb, + struct flow_block_offload *offload) +{ + list_del(&block_cb->indr.list); + list_move(&block_cb->list, &offload->cb_list); +} + bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident, struct list_head *driver_block_list); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 0cfc35e6be28..1fd781d155b4 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -437,6 +437,27 @@ static void flow_block_indr_init(struct flow_block_cb *flow_block, flow_block->indr.cleanup = cleanup; } +struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, + void (*release)(void *cb_priv), + struct flow_block_offload *bo, + struct net_device *dev, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct flow_block_cb *block_cb; + + block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, release); + if (IS_ERR(block_cb)) + goto out; + + flow_block_indr_init(block_cb, bo, dev, data, cleanup); + list_add(&block_cb->indr.list, &flow_block_indr_list); + +out: + return block_cb; +} +EXPORT_SYMBOL(flow_indr_block_cb_alloc); + static void __flow_block_indr_binding(struct flow_block_offload *bo, struct net_device *dev, void *data, void (*cleanup)(struct flow_block_cb *block_cb)) -- cgit v1.2.3 From 66f1939a1b705305df820d65f4d9a8457d05759c Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 18 Jun 2020 20:49:09 +0800 Subject: flow_offload: use flow_indr_block_cb_alloc/remove function Prepare fix the bug in the next patch. use flow_indr_block_cb_alloc/remove function and remove the __flow_block_indr_binding. Signed-off-by: wenxu Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 19 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 21 ++++++++++++++------- drivers/net/ethernet/netronome/nfp/flower/main.h | 4 +++- .../net/ethernet/netronome/nfp/flower/offload.c | 18 +++++++++++------- include/net/flow_offload.h | 4 +++- net/core/flow_offload.c | 22 +--------------------- 6 files changed, 44 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 0eef4f5e4a46..3e3a8841689d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1889,7 +1889,8 @@ static void bnxt_tc_setup_indr_rel(void *cb_priv) } static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, - struct flow_block_offload *f) + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct bnxt_flower_indr_block_cb_priv *cb_priv; struct flow_block_cb *block_cb; @@ -1907,9 +1908,10 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, cb_priv->bp = bp; list_add(&cb_priv->list, &bp->tc_indr_block_list); - block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb, - cb_priv, cb_priv, - bnxt_tc_setup_indr_rel); + block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb, + cb_priv, cb_priv, + bnxt_tc_setup_indr_rel, f, + netdev, data, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); @@ -1930,7 +1932,7 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, if (!block_cb) return -ENOENT; - flow_block_cb_remove(block_cb, f); + flow_indr_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); break; default: @@ -1945,14 +1947,17 @@ static bool bnxt_is_netdev_indr_offload(struct net_device *netdev) } static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { if (!bnxt_is_netdev_indr_offload(netdev)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_BLOCK: - return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data); + return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data, data, + cleanup); default: break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 80713123de5c..d629864fc996 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -407,7 +407,9 @@ static int mlx5e_rep_indr_setup_block(struct net_device *netdev, struct mlx5e_rep_priv *rpriv, struct flow_block_offload *f, - flow_setup_cb_t *setup_cb) + flow_setup_cb_t *setup_cb, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); struct mlx5e_rep_indr_block_priv *indr_priv; @@ -438,8 +440,9 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv, - mlx5e_rep_indr_block_unbind); + block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind, + f, netdev, data, cleanup); if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); @@ -458,7 +461,7 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, if (!block_cb) return -ENOENT; - flow_block_cb_remove(block_cb, f); + flow_indr_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); return 0; default: @@ -469,15 +472,19 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, static int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { switch (type) { case TC_SETUP_BLOCK: return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, - mlx5e_rep_indr_setup_tc_cb); + mlx5e_rep_indr_setup_tc_cb, + data, cleanup); case TC_SETUP_FT: return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, - mlx5e_rep_indr_setup_ft_cb); + mlx5e_rep_indr_setup_ft_cb, + data, cleanup); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 6c3dc3baf387..56b3b68e273e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -459,7 +459,9 @@ int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, struct tc_cls_matchall_offload *flow); void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb); int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data); + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 695d24b9dd92..95c75250c355 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1647,7 +1647,8 @@ static void nfp_flower_setup_indr_tc_release(void *cb_priv) static int nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, - struct flow_block_offload *f) + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct nfp_flower_indr_block_cb_priv *cb_priv; struct nfp_flower_priv *priv = app->priv; @@ -1676,9 +1677,10 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, cb_priv->app = app; list_add(&cb_priv->list, &priv->indr_block_cb_priv); - block_cb = flow_block_cb_alloc(nfp_flower_setup_indr_block_cb, - cb_priv, cb_priv, - nfp_flower_setup_indr_tc_release); + block_cb = flow_indr_block_cb_alloc(nfp_flower_setup_indr_block_cb, + cb_priv, cb_priv, + nfp_flower_setup_indr_tc_release, + f, netdev, data, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); @@ -1699,7 +1701,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, if (!block_cb) return -ENOENT; - flow_block_cb_remove(block_cb, f); + flow_indr_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); return 0; default: @@ -1710,7 +1712,9 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) { if (!nfp_fl_is_netdev_to_offload(netdev)) return -EOPNOTSUPP; @@ -1718,7 +1722,7 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, switch (type) { case TC_SETUP_BLOCK: return nfp_flower_setup_indr_tc_block(netdev, cb_priv, - type_data); + type_data, data, cleanup); default: return -EOPNOTSUPP; } diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index bf4343042956..1961c7982273 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -545,7 +545,9 @@ static inline void flow_block_init(struct flow_block *flow_block) } typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, - enum tc_setup_type type, void *type_data); + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv); void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 1fd781d155b4..ddd958c6040e 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -458,25 +458,6 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -static void __flow_block_indr_binding(struct flow_block_offload *bo, - struct net_device *dev, void *data, - void (*cleanup)(struct flow_block_cb *block_cb)) -{ - struct flow_block_cb *block_cb; - - list_for_each_entry(block_cb, &bo->cb_list, list) { - switch (bo->command) { - case FLOW_BLOCK_BIND: - flow_block_indr_init(block_cb, bo, dev, data, cleanup); - list_add(&block_cb->indr.list, &flow_block_indr_list); - break; - case FLOW_BLOCK_UNBIND: - list_del(&block_cb->indr.list); - break; - } - } -} - int flow_indr_dev_setup_offload(struct net_device *dev, enum tc_setup_type type, void *data, struct flow_block_offload *bo, @@ -486,9 +467,8 @@ int flow_indr_dev_setup_offload(struct net_device *dev, mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, this->cb_priv, type, bo); + this->cb(dev, this->cb_priv, type, bo, data, cleanup); - __flow_block_indr_binding(bo, dev, data, cleanup); mutex_unlock(&flow_indr_block_lock); return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; -- cgit v1.2.3 From a1db217861f33b8d9ea8171bcacee51186e2d5ba Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 18 Jun 2020 20:49:10 +0800 Subject: net: flow_offload: fix flow_indr_dev_unregister path If the representor is removed, then identify the indirect flow_blocks that need to be removed by the release callback and the port representor structure. To identify the port representor structure, a new indr.cb_priv field needs to be introduced. The flow_block also needs to be removed from the driver list from the cleanup path. Fixes: 1fac52da5942 ("net: flow_offload: consolidate indirect flow_block infrastructure") Signed-off-by: wenxu Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 5 +++-- drivers/net/ethernet/netronome/nfp/flower/main.c | 2 +- drivers/net/ethernet/netronome/nfp/flower/main.h | 3 +-- drivers/net/ethernet/netronome/nfp/flower/offload.c | 8 ++++---- include/net/flow_offload.h | 4 +++- net/core/flow_offload.c | 16 ++++++++++------ net/netfilter/nf_flow_table_offload.c | 1 + net/netfilter/nf_tables_offload.c | 1 + net/sched/cls_api.c | 1 + 10 files changed, 27 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 3e3a8841689d..4a11c1e7cc02 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1911,7 +1911,7 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb, cb_priv, cb_priv, bnxt_tc_setup_indr_rel, f, - netdev, data, cleanup); + netdev, data, bp, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); @@ -2079,7 +2079,7 @@ void bnxt_shutdown_tc(struct bnxt *bp) return; flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp, - bnxt_tc_setup_indr_block_cb); + bnxt_tc_setup_indr_rel); rhashtable_destroy(&tc_info->flow_table); rhashtable_destroy(&tc_info->l2_table); rhashtable_destroy(&tc_info->decap_l2_table); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index d629864fc996..eefeb1cdc2ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -442,7 +442,8 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv, mlx5e_rep_indr_block_unbind, - f, netdev, data, cleanup); + f, netdev, data, rpriv, + cleanup); if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); @@ -503,7 +504,7 @@ int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) { flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv, - mlx5e_rep_indr_setup_tc_cb); + mlx5e_rep_indr_block_unbind); } #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index c39327677a7d..bb448c82cdc2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -861,7 +861,7 @@ static void nfp_flower_clean(struct nfp_app *app) flush_work(&app_priv->cmsg_work); flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, - nfp_flower_setup_indr_block_cb); + nfp_flower_setup_indr_tc_release); if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) nfp_flower_qos_cleanup(app); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 56b3b68e273e..7f54a620acad 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -462,8 +462,7 @@ int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)); -int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv); +void nfp_flower_setup_indr_tc_release(void *cb_priv); void __nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 95c75250c355..d7340dc09b4c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1619,8 +1619,8 @@ nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app, return NULL; } -int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, - void *type_data, void *cb_priv) +static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) { struct nfp_flower_indr_block_cb_priv *priv = cb_priv; struct flow_cls_offload *flower = type_data; @@ -1637,7 +1637,7 @@ int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, } } -static void nfp_flower_setup_indr_tc_release(void *cb_priv) +void nfp_flower_setup_indr_tc_release(void *cb_priv) { struct nfp_flower_indr_block_cb_priv *priv = cb_priv; @@ -1680,7 +1680,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, block_cb = flow_indr_block_cb_alloc(nfp_flower_setup_indr_block_cb, cb_priv, cb_priv, nfp_flower_setup_indr_tc_release, - f, netdev, data, cleanup); + f, netdev, data, app, cleanup); if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 1961c7982273..6315324b9dc2 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -450,6 +450,7 @@ struct flow_block_indr { struct net_device *dev; enum flow_block_binder_type binder_type; void *data; + void *cb_priv; void (*cleanup)(struct flow_block_cb *block_cb); }; @@ -472,6 +473,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void (*release)(void *cb_priv), struct flow_block_offload *bo, struct net_device *dev, void *data, + void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)); void flow_block_cb_free(struct flow_block_cb *block_cb); @@ -551,7 +553,7 @@ typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv); void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, - flow_setup_cb_t *setup_cb); + void (*release)(void *cb_priv)); int flow_indr_dev_setup_offload(struct net_device *dev, enum tc_setup_type type, void *data, struct flow_block_offload *bo, diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index ddd958c6040e..b739cfab796e 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -372,14 +372,15 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) } EXPORT_SYMBOL(flow_indr_dev_register); -static void __flow_block_indr_cleanup(flow_setup_cb_t *setup_cb, void *cb_priv, +static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), + void *cb_priv, struct list_head *cleanup_list) { struct flow_block_cb *this, *next; list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { - if (this->cb == setup_cb && - this->cb_priv == cb_priv) { + if (this->release == release && + this->indr.cb_priv == cb_priv) { list_move(&this->indr.list, cleanup_list); return; } @@ -397,7 +398,7 @@ static void flow_block_indr_notify(struct list_head *cleanup_list) } void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, - flow_setup_cb_t *setup_cb) + void (*release)(void *cb_priv)) { struct flow_indr_dev *this, *next, *indr_dev = NULL; LIST_HEAD(cleanup_list); @@ -418,7 +419,7 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, return; } - __flow_block_indr_cleanup(setup_cb, cb_priv, &cleanup_list); + __flow_block_indr_cleanup(release, cb_priv, &cleanup_list); mutex_unlock(&flow_indr_block_lock); flow_block_indr_notify(&cleanup_list); @@ -429,10 +430,12 @@ EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, struct net_device *dev, void *data, + void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { flow_block->indr.binder_type = bo->binder_type; flow_block->indr.data = data; + flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; flow_block->indr.cleanup = cleanup; } @@ -442,6 +445,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void (*release)(void *cb_priv), struct flow_block_offload *bo, struct net_device *dev, void *data, + void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_block_cb *block_cb; @@ -450,7 +454,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, if (IS_ERR(block_cb)) goto out; - flow_block_indr_init(block_cb, bo, dev, data, cleanup); + flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup); list_add(&block_cb->indr.list, &flow_block_indr_list); out: diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 62651e6683f6..5fff1e040168 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -950,6 +950,7 @@ static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb) nf_flow_table_gc_cleanup(flowtable, dev); down_write(&flowtable->flow_block_lock); list_del(&block_cb->list); + list_del(&block_cb->driver_list); flow_block_cb_free(block_cb); up_write(&flowtable->flow_block_lock); } diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 185fc82c99aa..c7cf1cde46de 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -296,6 +296,7 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb) nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND, basechain, &extack); mutex_lock(&net->nft.commit_mutex); + list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); nft_flow_offload_unbind(&bo, basechain); mutex_unlock(&net->nft.commit_mutex); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a00a203b2ef5..f8028d73edf1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -652,6 +652,7 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) &block->flow_block, tcf_block_shared(block), &extack); down_write(&block->cb_lock); + list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); up_write(&block->cb_lock); rtnl_lock(); -- cgit v1.2.3 From 481e980a7c199c5a4634fd7ea308067dd4ba75fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Jun 2020 12:57:58 +0000 Subject: mm: Allow arches to provide ptep_get() Since commit 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") it is not possible anymore to use READ_ONCE() to access complex page table entries like the one defined for powerpc 8xx with 16k size pages. Define a ptep_get() helper that architectures can override instead of performing a READ_ONCE() on the page table entry pointer. Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") Signed-off-by: Christophe Leroy Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/087fa12b6e920e32315136b998aa834f99242695.1592225558.git.christophe.leroy@csgroup.eu --- include/asm-generic/hugetlb.h | 2 +- include/linux/pgtable.h | 7 +++++++ mm/gup.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 40f85decc2ee..8e1e6244a89d 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -122,7 +122,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, #ifndef __HAVE_ARCH_HUGE_PTEP_GET static inline pte_t huge_ptep_get(pte_t *ptep) { - return READ_ONCE(*ptep); + return ptep_get(ptep); } #endif diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 32b6c52d41b9..56c1e8eb7bb0 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -249,6 +249,13 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #endif +#ifndef __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, diff --git a/mm/gup.c b/mm/gup.c index 761df4944ef5..6f47697f8fb0 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2196,7 +2196,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) */ static inline pte_t gup_get_pte(pte_t *ptep) { - return READ_ONCE(*ptep); + return ptep_get(ptep); } #endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ -- cgit v1.2.3 From 4bc799dcb67066e0531004d5bdbe755bb02b5488 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 15 Jun 2020 11:12:32 -0700 Subject: security: fix the key_permission LSM hook function type Commit 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask") changed the type of the key_permission callback functions, but didn't change the type of the hook, which trips indirect call checking with Control-Flow Integrity (CFI). This change fixes the issue by changing the hook type to match the functions. Fixes: 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask") Signed-off-by: Sami Tolvanen Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 6791813cd439..24f6683f1cfc 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -360,7 +360,7 @@ LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, unsigned long flags) LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, - unsigned perm) + enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **_buffer) #endif /* CONFIG_KEYS */ -- cgit v1.2.3 From 26ac10be3c80a26d03c950b7387c4b9566c260b6 Mon Sep 17 00:00:00 2001 From: Aiden Leong Date: Mon, 22 Jun 2020 20:04:58 -0700 Subject: GUE: Fix a typo Fix a typo in gue.h Signed-off-by: Aiden Leong Signed-off-by: David S. Miller --- include/net/gue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/gue.h b/include/net/gue.h index 3a6595bfa641..e42402f180b7 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -21,7 +21,7 @@ * | | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * - * C bit indicates contol message when set, data message when unset. + * C bit indicates control message when set, data message when unset. * For a control message, proto/ctype is interpreted as a type of * control message. For data messages, proto/ctype is the IP protocol * of the next header. -- cgit v1.2.3 From 16ecf10e815d70d11d2300243f4a3b4c7c5acac7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Jun 2020 07:13:41 +0800 Subject: iommu/vt-d: Set U/S bit in first level page table by default When using first-level translation for IOVA, currently the U/S bit in the page table is cleared which implies DMA requests with user privilege are blocked. As the result, following error messages might be observed when passing through a device to user level: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Read] Request device [41:00.0] PASID 1 fault addr 7ecdcd000 [fault reason 129] SM: U/S set 0 for first-level translation with user privilege This fixes it by setting U/S bit in the first level page table and makes IOVA over first level compatible with previous second-level translation. Fixes: b802d070a52a1 ("iommu/vt-d: Use iova over first level") Reported-by: Xin Zeng Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200622231345.29722-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 5 ++--- include/linux/intel-iommu.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9129663a7406..0fa394f7bbf9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -921,7 +921,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; if (domain_use_first_level(domain)) - pteval |= DMA_FL_PTE_XD; + pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US; if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); @@ -1951,7 +1951,6 @@ static inline void context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) { context->hi |= pasid & ((1 << 20) - 1); - context->hi |= (1 << 20); } /* @@ -2243,7 +2242,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); if (domain_use_first_level(domain)) - attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD; + attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; if (!sg) { sg_res = nr_pages; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4100bd224f5c..3e8fa1c7a1e6 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -41,6 +41,7 @@ #define DMA_PTE_SNP BIT_ULL(11) #define DMA_FL_PTE_PRESENT BIT_ULL(0) +#define DMA_FL_PTE_US BIT_ULL(2) #define DMA_FL_PTE_XD BIT_ULL(63) #define ADDR_WIDTH_5LEVEL (57) -- cgit v1.2.3 From 2464bc7c2897b7549146a743045089d3aea7b85b Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 23 Jun 2020 11:05:40 +0200 Subject: bridge: uapi: mrp: Fix MRP_PORT_ROLE Currently the MRP_PORT_ROLE_NONE has the value 0x2 but this is in conflict with the IEC 62439-2 standard. The standard defines the following port roles: primary (0x0), secondary(0x1), interconnect(0x2). Therefore remove the port role none. Fixes: 4714d13791f831 ("bridge: uapi: mrp: Add mrp attributes.") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/uapi/linux/mrp_bridge.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index 84f15f48a7cb..bee366540212 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -36,7 +36,6 @@ enum br_mrp_port_state_type { enum br_mrp_port_role_type { BR_MRP_PORT_ROLE_PRIMARY, BR_MRP_PORT_ROLE_SECONDARY, - BR_MRP_PORT_ROLE_NONE, }; enum br_mrp_tlv_header_type { -- cgit v1.2.3 From 97dd1abd026ae4e6a82fa68645928404ad483409 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:29 +0300 Subject: net: qed: fix left elements count calculation qed_chain_get_element_left{,_u32} returned 0 when the difference between producer and consumer page count was equal to the total page count. Fix this by conditional expanding of producer value (vs unconditional). This allowed to eliminate normalizaton against total page count, which was the cause of this bug. Misc: replace open-coded constants with common defines. Fixes: a91eb52abb50 ("qed: Revisit chain implementation") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 733fad7dfbed..6d15040c642c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -207,28 +207,34 @@ static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain) static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u32 prod = p_chain->u.chain16.prod_idx; + u32 cons = p_chain->u.chain16.cons_idx; u16 used; - used = (u16) (((u32)0x10000 + - (u32)p_chain->u.chain16.prod_idx) - - (u32)p_chain->u.chain16.cons_idx); + if (prod < cons) + prod += (u32)U16_MAX + 1; + + used = (u16)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page - - p_chain->u.chain16.cons_idx / p_chain->elem_per_page; + used -= prod / elem_per_page - cons / elem_per_page; return (u16)(p_chain->capacity - used); } static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u64 prod = p_chain->u.chain32.prod_idx; + u64 cons = p_chain->u.chain32.cons_idx; u32 used; - used = (u32) (((u64)0x100000000ULL + - (u64)p_chain->u.chain32.prod_idx) - - (u64)p_chain->u.chain32.cons_idx); + if (prod < cons) + prod += (u64)U32_MAX + 1; + + used = (u32)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page - - p_chain->u.chain32.cons_idx / p_chain->elem_per_page; + used -= (u32)(prod / elem_per_page - cons / elem_per_page); return p_chain->capacity - used; } -- cgit v1.2.3 From 23e390cdbe6f85827a43d38f9288dcd3066fa376 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Mon, 22 Jun 2020 00:21:35 +0200 Subject: security: Fix hook iteration and default value for inode_copy_up_xattr inode_copy_up_xattr returns 0 to indicate the acceptance of the xattr and 1 to reject it. If the LSM does not know about the xattr, it's expected to return -EOPNOTSUPP, which is the correct default value for this hook. BPF LSM, currently, uses 0 as the default value and thereby falsely allows all overlay fs xattributes to be copied up. The iteration logic is also updated from the "bail-on-fail" call_int_hook to continue on the non-decisive -EOPNOTSUPP and bail out on other values. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: KP Singh Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- security/security.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 24f6683f1cfc..af998f93d256 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -150,7 +150,7 @@ LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) -LSM_HOOK(int, 0, inode_copy_up_xattr, const char *name) +LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, const char *name) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) diff --git a/security/security.c b/security/security.c index 0ce3e73edd42..70a7ad357bc6 100644 --- a/security/security.c +++ b/security/security.c @@ -1414,7 +1414,22 @@ EXPORT_SYMBOL(security_inode_copy_up); int security_inode_copy_up_xattr(const char *name) { - return call_int_hook(inode_copy_up_xattr, -EOPNOTSUPP, name); + struct security_hook_list *hp; + int rc; + + /* + * The implementation can return 0 (accept the xattr), 1 (discard the + * xattr), -EOPNOTSUPP if it does not know anything about the xattr or + * any other error code incase of an error. + */ + hlist_for_each_entry(hp, + &security_hook_heads.inode_copy_up_xattr, list) { + rc = hp->hook.inode_copy_up_xattr(name); + if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr)) + return rc; + } + + return LSM_RET_DEFAULT(inode_copy_up_xattr); } EXPORT_SYMBOL(security_inode_copy_up_xattr); -- cgit v1.2.3 From bcc7f554cfa7e0ac77c7adc4027c16f4a2f99c6f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 23 Jun 2020 16:39:35 +0100 Subject: bpf: Fix formatting in documentation for BPF helpers When producing the bpf-helpers.7 man page from the documentation from the BPF user space header file, rst2man complains: :2636: (ERROR/3) Unexpected indentation. :2640: (WARNING/2) Block quote ends without a blank line; unexpected unindent. Let's fix formatting for the relevant chunk (item list in bpf_ringbuf_query()'s description), and for a couple other functions. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200623153935.6215-1-quentin@isovalent.com --- include/uapi/linux/bpf.h | 41 +++++++++++++++++++++-------------------- tools/include/uapi/linux/bpf.h | 41 +++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 974a71342aea..8bd33050b7bb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3171,13 +3171,12 @@ union bpf_attr { * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return - * 0, on success; - * < 0, on error. + * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description @@ -3189,20 +3188,20 @@ union bpf_attr { * void bpf_ringbuf_submit(void *data, u64 flags) * Description * Submit reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * * void bpf_ringbuf_discard(void *data, u64 flags) * Description * Discard reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * @@ -3210,16 +3209,18 @@ union bpf_attr { * Description * Query various characteristics of provided ring buffer. What * exactly is queries is determined by *flags*: - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; - * - BPF_RB_RING_SIZE - the size of ring buffer; - * - BPF_RB_CONS_POS - consumer position (can wrap around); - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); - * Data returned is just a momentary snapshots of actual values + * + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. + * * **BPF_RB_RING_SIZE**: The size of ring buffer. + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * + * Data returned is just a momentary snapshot of actual values * and could be inaccurate, so this facility should be used to * power heuristics and for reporting, not to make 100% correct * calculation. * Return - * Requested value, or 0, if flags are not recognized. + * Requested value, or 0, if *flags* are not recognized. * * int bpf_csum_level(struct sk_buff *skb, u64 level) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 974a71342aea..8bd33050b7bb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3171,13 +3171,12 @@ union bpf_attr { * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return - * 0, on success; - * < 0, on error. + * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description @@ -3189,20 +3188,20 @@ union bpf_attr { * void bpf_ringbuf_submit(void *data, u64 flags) * Description * Submit reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * * void bpf_ringbuf_discard(void *data, u64 flags) * Description * Discard reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * @@ -3210,16 +3209,18 @@ union bpf_attr { * Description * Query various characteristics of provided ring buffer. What * exactly is queries is determined by *flags*: - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; - * - BPF_RB_RING_SIZE - the size of ring buffer; - * - BPF_RB_CONS_POS - consumer position (can wrap around); - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); - * Data returned is just a momentary snapshots of actual values + * + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. + * * **BPF_RB_RING_SIZE**: The size of ring buffer. + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * + * Data returned is just a momentary snapshot of actual values * and could be inaccurate, so this facility should be used to * power heuristics and for reporting, not to make 100% correct * calculation. * Return - * Requested value, or 0, if flags are not recognized. + * Requested value, or 0, if *flags* are not recognized. * * int bpf_csum_level(struct sk_buff *skb, u64 level) * Description -- cgit v1.2.3 From 41b14fb8724d5a4b382a63cb4a1a61880347ccb8 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 22 Jun 2020 23:26:04 +0300 Subject: net: Do not clear the sock TX queue in sk_set_socket() Clearing the sock TX queue in sk_set_socket() might cause unexpected out-of-order transmit when called from sock_orphan(), as outstanding packets can pick a different TX queue and bypass the ones already queued. This is undesired in general. More specifically, it breaks the in-order scheduling property guarantee for device-offloaded TLS sockets. Remove the call to sk_tx_queue_clear() in sk_set_socket(), and add it explicitly only where needed. Fixes: e022f0b4a03f ("net: Introduce sk_tx_queue_mapping") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller --- include/net/sock.h | 1 - net/core/sock.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c53cc42b5ab9..3428619faae4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1848,7 +1848,6 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk_tx_queue_clear(sk); sk->sk_socket = sock; } diff --git a/net/core/sock.c b/net/core/sock.c index 94391da27754..d832c650287c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1767,6 +1767,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); + sk_tx_queue_clear(sk); } return sk; @@ -1990,6 +1991,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); + sk_tx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) -- cgit v1.2.3 From aad4b4d15f30de087c5972cfb767fadb5dbc3c52 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:13:02 +0200 Subject: scsi: libata: Fix the ata_scsi_dma_need_drain stub We not only need the stub when libata is disabled, but also if it is modular and there are built-in SAS drivers (which can happen when SCSI_SAS_ATA is disabled). Link: https://lore.kernel.org/r/20200620071302.462974-2-hch@lst.de Fixes: b8f1d1e05817 ("scsi: Wire up ata_scsi_dma_need_drain for SAS HBA drivers") Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 042e584daca7..c57bf6749681 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1092,7 +1092,7 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); -#if IS_ENABLED(CONFIG_ATA) +#if IS_REACHABLE(CONFIG_ATA) bool ata_scsi_dma_need_drain(struct request *rq); #else #define ata_scsi_dma_need_drain NULL -- cgit v1.2.3 From 0b8975bdc0cc5310d48d9bdd871cefebe1f94c99 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 3 Jun 2020 10:27:48 -0700 Subject: dmaengine: idxd: fix hw descriptor fields for delta record Fix the hw descriptor fields for delta record in user exported idxd.h header. Missing the "expected result mask" field. Reported-by: Mona Hossain Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/159120526866.65385.536565786678052944.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1f412fbf561b..e103c1434e4b 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -110,9 +110,12 @@ struct dsa_hw_desc { uint16_t rsvd1; union { uint8_t expected_res; + /* create delta record */ struct { uint64_t delta_addr; uint32_t max_delta_size; + uint32_t delt_rsvd; + uint8_t expected_res_mask; }; uint32_t delta_rec_size; uint64_t dest2; -- cgit v1.2.3 From 3dd4ef1bdbac959bb20faec93937720ddd9917c6 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 Jun 2020 15:58:24 +0800 Subject: net: phy: make phy_disable_interrupts() non-static We face an issue with rtl8211f, a pin is shared between INTB and PMEB, and the PHY Register Accessible Interrupt is enabled by default, so the INTB/PMEB pin is always active in polling mode case. As Heiner pointed out "I was thinking about calling phy_disable_interrupts() in phy_init_hw(), to have a defined init state as we don't know in which state the PHY is if the PHY driver is loaded. We shouldn't assume that it's the chip power-on defaults, BIOS or boot loader could have changed this. Or in case of dual-boot systems the other OS could leave the PHY in whatever state." Make phy_disable_interrupts() non-static so that it could be used in phy_init_hw() to have a defined init state. Suggested-by: Heiner Kallweit Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- include/linux/phy.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1de3938628f4..56cfae950472 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -840,7 +840,7 @@ static void phy_error(struct phy_device *phydev) * phy_disable_interrupts - Disable the PHY interrupts from the PHY side * @phydev: target phy_device struct */ -static int phy_disable_interrupts(struct phy_device *phydev) +int phy_disable_interrupts(struct phy_device *phydev) { int err; diff --git a/include/linux/phy.h b/include/linux/phy.h index 8c05d0fb5c00..b693b609b2f5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1416,6 +1416,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd); +int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -- cgit v1.2.3 From 1cbf90985f7448f1b0dd630e17ee1070f7d58665 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:11 -0700 Subject: netfilter: iptables: Split ipt_unregister_table() into pre_exit and exit helpers. The pre_exit will un-register the underlying hook and .exit will do the table freeing. The netns core does an unconditional synchronize_rcu after the pre_exit hooks insuring no packets are in flight that have picked up the pointer before completing the un-register. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4/ip_tables.h | 6 ++++++ net/ipv4/netfilter/ip_tables.c | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index b394bd4f68a3..c4676d6feeff 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -25,6 +25,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); + +void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); + +void ipt_unregister_table_exit(struct net *net, struct xt_table *table); + void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c2670eaa74e6..5bf9fa06aee0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1797,11 +1797,22 @@ out_free: return ret; } +void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) +{ + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} + +void ipt_unregister_table_exit(struct net *net, struct xt_table *table) +{ + __ipt_unregister_table(net, table); +} + void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + ipt_unregister_table_pre_exit(net, table, ops); __ipt_unregister_table(net, table); } @@ -1958,6 +1969,8 @@ static void __exit ip_tables_fini(void) EXPORT_SYMBOL(ipt_register_table); EXPORT_SYMBOL(ipt_unregister_table); +EXPORT_SYMBOL(ipt_unregister_table_pre_exit); +EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); module_exit(ip_tables_fini); -- cgit v1.2.3 From 57ea5f18882a3d7cf6135fa8c949a37c89395837 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:13 -0700 Subject: netfilter: ip6tables: Split ip6t_unregister_table() into pre_exit and exit helpers. The pre_exit will un-register the underlying hook and .exit will do the table freeing. The netns core does an unconditional synchronize_rcu after the pre_exit hooks insuring no packets are in flight that have picked up the pointer before completing the un-register. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6/ip6_tables.h | 3 +++ net/ipv6/netfilter/ip6_tables.c | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8225f7821a29..1547d5f9ae06 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -29,6 +29,9 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops, struct xt_table **res); void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); +void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); +void ip6t_unregister_table_exit(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e27393498ecb..e96a431549bc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1807,11 +1807,22 @@ out_free: return ret; } +void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) +{ + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} + +void ip6t_unregister_table_exit(struct net *net, struct xt_table *table) +{ + __ip6t_unregister_table(net, table); +} + void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + ip6t_unregister_table_pre_exit(net, table, ops); __ip6t_unregister_table(net, table); } @@ -1969,6 +1980,8 @@ static void __exit ip6_tables_fini(void) EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); +EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); +EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); module_init(ip6_tables_init); -- cgit v1.2.3 From 0c1a7f13c9ec1ceb18d97ef4b1dd20ec71ffba31 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Wed, 17 Jun 2020 17:01:32 +0530 Subject: ieee80211: Add missing and new AKM suite selector definitions Add the definitions for missing AKM selectors defined in IEEE P802.11-REVmd/D3.0, table 9-151. These definitions will be used by various drivers that support these new AKM suites. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20200617113132.13477-1-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fe15f831841b..9f732499ea88 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3333,13 +3333,17 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_AP_PEER_KEY SUITE(0x000FAC, 10) #define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) #define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) +#define WLAN_AKM_SUITE_FT_8021X_SHA384 SUITE(0x000FAC, 13) #define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) +#define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) +#define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3 From a9b59159d338d414acaa8e2f569d129d51c76452 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Jun 2020 15:20:39 -0700 Subject: bpf: Do not allow btf_ctx_access with __int128 types To ensure btf_ctx_access() is safe the verifier checks that the BTF arg type is an int, enum, or pointer. When the function does the BTF arg lookup it uses the calculation 'arg = off / 8' using the fact that registers are 8B. This requires that the first arg is in the first reg, the second in the second, and so on. However, for __int128 the arg will consume two registers by default LLVM implementation. So this will cause the arg layout assumed by the 'arg = off / 8' calculation to be incorrect. Because __int128 is uncommon this patch applies the easiest fix and will force int types to be sizeof(u64) or smaller so that they will fit in a single register. v2: remove unneeded parens per Andrii's feedback Fixes: 9e15db66136a1 ("bpf: Implement accurate raw_tp context access via BTF") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159303723962.11287.13309537171132420717.stgit@john-Precision-5820-Tower --- include/linux/btf.h | 5 +++++ kernel/bpf/btf.c | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 5c1ea99b480f..8b81fbb4497c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,6 +82,11 @@ static inline bool btf_type_is_int(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_INT; } +static inline bool btf_type_is_small_int(const struct btf_type *t) +{ + return btf_type_is_int(t) && t->size <= sizeof(u64); +} + static inline bool btf_type_is_enum(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 58c9af1d4808..9a1a98dd9e97 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3746,7 +3746,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, return false; t = btf_type_skip_modifiers(btf, t->type, NULL); - if (!btf_type_is_int(t)) { + if (!btf_type_is_small_int(t)) { bpf_log(log, "ret type %s not allowed for fmod_ret\n", btf_kind_str[BTF_INFO_KIND(t->info)]); @@ -3768,7 +3768,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_int(t) || btf_type_is_enum(t)) + if (btf_type_is_small_int(t) || btf_type_is_enum(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { -- cgit v1.2.3 From 5faafd5685764e4d75376aceac91fdf75b3b16f8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2020 15:55:14 +0200 Subject: locking/atomics: Provide the arch_atomic_ interface to generic code Architectures with instrumented (KASAN/KCSAN) atomic operations natively provide arch_atomic_ variants that are not instrumented. It turns out that some generic code also requires arch_atomic_ in order to avoid instrumentation, so provide the arch_atomic_ interface as a direct map into the regular atomic_ interface for non-instrumented architectures. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- include/linux/atomic-fallback.h | 236 +++++++++++++++++++++++++++++++++- scripts/atomic/gen-atomic-fallback.sh | 31 +++++ 2 files changed, 266 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h index 2c4927bf7b8d..fd525c71d676 100644 --- a/include/linux/atomic-fallback.h +++ b/include/linux/atomic-fallback.h @@ -77,6 +77,9 @@ #endif /* cmpxchg64_relaxed */ +#define arch_atomic_read atomic_read +#define arch_atomic_read_acquire atomic_read_acquire + #ifndef atomic_read_acquire static __always_inline int atomic_read_acquire(const atomic_t *v) @@ -86,6 +89,9 @@ atomic_read_acquire(const atomic_t *v) #define atomic_read_acquire atomic_read_acquire #endif +#define arch_atomic_set atomic_set +#define arch_atomic_set_release atomic_set_release + #ifndef atomic_set_release static __always_inline void atomic_set_release(atomic_t *v, int i) @@ -95,6 +101,13 @@ atomic_set_release(atomic_t *v, int i) #define atomic_set_release atomic_set_release #endif +#define arch_atomic_add atomic_add + +#define arch_atomic_add_return atomic_add_return +#define arch_atomic_add_return_acquire atomic_add_return_acquire +#define arch_atomic_add_return_release atomic_add_return_release +#define arch_atomic_add_return_relaxed atomic_add_return_relaxed + #ifndef atomic_add_return_relaxed #define atomic_add_return_acquire atomic_add_return #define atomic_add_return_release atomic_add_return @@ -137,6 +150,11 @@ atomic_add_return(int i, atomic_t *v) #endif /* atomic_add_return_relaxed */ +#define arch_atomic_fetch_add atomic_fetch_add +#define arch_atomic_fetch_add_acquire atomic_fetch_add_acquire +#define arch_atomic_fetch_add_release atomic_fetch_add_release +#define arch_atomic_fetch_add_relaxed atomic_fetch_add_relaxed + #ifndef atomic_fetch_add_relaxed #define atomic_fetch_add_acquire atomic_fetch_add #define atomic_fetch_add_release atomic_fetch_add @@ -179,6 +197,13 @@ atomic_fetch_add(int i, atomic_t *v) #endif /* atomic_fetch_add_relaxed */ +#define arch_atomic_sub atomic_sub + +#define arch_atomic_sub_return atomic_sub_return +#define arch_atomic_sub_return_acquire atomic_sub_return_acquire +#define arch_atomic_sub_return_release atomic_sub_return_release +#define arch_atomic_sub_return_relaxed atomic_sub_return_relaxed + #ifndef atomic_sub_return_relaxed #define atomic_sub_return_acquire atomic_sub_return #define atomic_sub_return_release atomic_sub_return @@ -221,6 +246,11 @@ atomic_sub_return(int i, atomic_t *v) #endif /* atomic_sub_return_relaxed */ +#define arch_atomic_fetch_sub atomic_fetch_sub +#define arch_atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define arch_atomic_fetch_sub_release atomic_fetch_sub_release +#define arch_atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + #ifndef atomic_fetch_sub_relaxed #define atomic_fetch_sub_acquire atomic_fetch_sub #define atomic_fetch_sub_release atomic_fetch_sub @@ -263,6 +293,8 @@ atomic_fetch_sub(int i, atomic_t *v) #endif /* atomic_fetch_sub_relaxed */ +#define arch_atomic_inc atomic_inc + #ifndef atomic_inc static __always_inline void atomic_inc(atomic_t *v) @@ -272,6 +304,11 @@ atomic_inc(atomic_t *v) #define atomic_inc atomic_inc #endif +#define arch_atomic_inc_return atomic_inc_return +#define arch_atomic_inc_return_acquire atomic_inc_return_acquire +#define arch_atomic_inc_return_release atomic_inc_return_release +#define arch_atomic_inc_return_relaxed atomic_inc_return_relaxed + #ifndef atomic_inc_return_relaxed #ifdef atomic_inc_return #define atomic_inc_return_acquire atomic_inc_return @@ -353,6 +390,11 @@ atomic_inc_return(atomic_t *v) #endif /* atomic_inc_return_relaxed */ +#define arch_atomic_fetch_inc atomic_fetch_inc +#define arch_atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#define arch_atomic_fetch_inc_release atomic_fetch_inc_release +#define arch_atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed + #ifndef atomic_fetch_inc_relaxed #ifdef atomic_fetch_inc #define atomic_fetch_inc_acquire atomic_fetch_inc @@ -434,6 +476,8 @@ atomic_fetch_inc(atomic_t *v) #endif /* atomic_fetch_inc_relaxed */ +#define arch_atomic_dec atomic_dec + #ifndef atomic_dec static __always_inline void atomic_dec(atomic_t *v) @@ -443,6 +487,11 @@ atomic_dec(atomic_t *v) #define atomic_dec atomic_dec #endif +#define arch_atomic_dec_return atomic_dec_return +#define arch_atomic_dec_return_acquire atomic_dec_return_acquire +#define arch_atomic_dec_return_release atomic_dec_return_release +#define arch_atomic_dec_return_relaxed atomic_dec_return_relaxed + #ifndef atomic_dec_return_relaxed #ifdef atomic_dec_return #define atomic_dec_return_acquire atomic_dec_return @@ -524,6 +573,11 @@ atomic_dec_return(atomic_t *v) #endif /* atomic_dec_return_relaxed */ +#define arch_atomic_fetch_dec atomic_fetch_dec +#define arch_atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#define arch_atomic_fetch_dec_release atomic_fetch_dec_release +#define arch_atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed + #ifndef atomic_fetch_dec_relaxed #ifdef atomic_fetch_dec #define atomic_fetch_dec_acquire atomic_fetch_dec @@ -605,6 +659,13 @@ atomic_fetch_dec(atomic_t *v) #endif /* atomic_fetch_dec_relaxed */ +#define arch_atomic_and atomic_and + +#define arch_atomic_fetch_and atomic_fetch_and +#define arch_atomic_fetch_and_acquire atomic_fetch_and_acquire +#define arch_atomic_fetch_and_release atomic_fetch_and_release +#define arch_atomic_fetch_and_relaxed atomic_fetch_and_relaxed + #ifndef atomic_fetch_and_relaxed #define atomic_fetch_and_acquire atomic_fetch_and #define atomic_fetch_and_release atomic_fetch_and @@ -647,6 +708,8 @@ atomic_fetch_and(int i, atomic_t *v) #endif /* atomic_fetch_and_relaxed */ +#define arch_atomic_andnot atomic_andnot + #ifndef atomic_andnot static __always_inline void atomic_andnot(int i, atomic_t *v) @@ -656,6 +719,11 @@ atomic_andnot(int i, atomic_t *v) #define atomic_andnot atomic_andnot #endif +#define arch_atomic_fetch_andnot atomic_fetch_andnot +#define arch_atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define arch_atomic_fetch_andnot_release atomic_fetch_andnot_release +#define arch_atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed + #ifndef atomic_fetch_andnot_relaxed #ifdef atomic_fetch_andnot #define atomic_fetch_andnot_acquire atomic_fetch_andnot @@ -737,6 +805,13 @@ atomic_fetch_andnot(int i, atomic_t *v) #endif /* atomic_fetch_andnot_relaxed */ +#define arch_atomic_or atomic_or + +#define arch_atomic_fetch_or atomic_fetch_or +#define arch_atomic_fetch_or_acquire atomic_fetch_or_acquire +#define arch_atomic_fetch_or_release atomic_fetch_or_release +#define arch_atomic_fetch_or_relaxed atomic_fetch_or_relaxed + #ifndef atomic_fetch_or_relaxed #define atomic_fetch_or_acquire atomic_fetch_or #define atomic_fetch_or_release atomic_fetch_or @@ -779,6 +854,13 @@ atomic_fetch_or(int i, atomic_t *v) #endif /* atomic_fetch_or_relaxed */ +#define arch_atomic_xor atomic_xor + +#define arch_atomic_fetch_xor atomic_fetch_xor +#define arch_atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define arch_atomic_fetch_xor_release atomic_fetch_xor_release +#define arch_atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #ifndef atomic_fetch_xor_relaxed #define atomic_fetch_xor_acquire atomic_fetch_xor #define atomic_fetch_xor_release atomic_fetch_xor @@ -821,6 +903,11 @@ atomic_fetch_xor(int i, atomic_t *v) #endif /* atomic_fetch_xor_relaxed */ +#define arch_atomic_xchg atomic_xchg +#define arch_atomic_xchg_acquire atomic_xchg_acquire +#define arch_atomic_xchg_release atomic_xchg_release +#define arch_atomic_xchg_relaxed atomic_xchg_relaxed + #ifndef atomic_xchg_relaxed #define atomic_xchg_acquire atomic_xchg #define atomic_xchg_release atomic_xchg @@ -863,6 +950,11 @@ atomic_xchg(atomic_t *v, int i) #endif /* atomic_xchg_relaxed */ +#define arch_atomic_cmpxchg atomic_cmpxchg +#define arch_atomic_cmpxchg_acquire atomic_cmpxchg_acquire +#define arch_atomic_cmpxchg_release atomic_cmpxchg_release +#define arch_atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed + #ifndef atomic_cmpxchg_relaxed #define atomic_cmpxchg_acquire atomic_cmpxchg #define atomic_cmpxchg_release atomic_cmpxchg @@ -905,6 +997,11 @@ atomic_cmpxchg(atomic_t *v, int old, int new) #endif /* atomic_cmpxchg_relaxed */ +#define arch_atomic_try_cmpxchg atomic_try_cmpxchg +#define arch_atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#define arch_atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#define arch_atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed + #ifndef atomic_try_cmpxchg_relaxed #ifdef atomic_try_cmpxchg #define atomic_try_cmpxchg_acquire atomic_try_cmpxchg @@ -1002,6 +1099,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) #endif /* atomic_try_cmpxchg_relaxed */ +#define arch_atomic_sub_and_test atomic_sub_and_test + #ifndef atomic_sub_and_test /** * atomic_sub_and_test - subtract value from variable and test result @@ -1020,6 +1119,8 @@ atomic_sub_and_test(int i, atomic_t *v) #define atomic_sub_and_test atomic_sub_and_test #endif +#define arch_atomic_dec_and_test atomic_dec_and_test + #ifndef atomic_dec_and_test /** * atomic_dec_and_test - decrement and test @@ -1037,6 +1138,8 @@ atomic_dec_and_test(atomic_t *v) #define atomic_dec_and_test atomic_dec_and_test #endif +#define arch_atomic_inc_and_test atomic_inc_and_test + #ifndef atomic_inc_and_test /** * atomic_inc_and_test - increment and test @@ -1054,6 +1157,8 @@ atomic_inc_and_test(atomic_t *v) #define atomic_inc_and_test atomic_inc_and_test #endif +#define arch_atomic_add_negative atomic_add_negative + #ifndef atomic_add_negative /** * atomic_add_negative - add and test if negative @@ -1072,6 +1177,8 @@ atomic_add_negative(int i, atomic_t *v) #define atomic_add_negative atomic_add_negative #endif +#define arch_atomic_fetch_add_unless atomic_fetch_add_unless + #ifndef atomic_fetch_add_unless /** * atomic_fetch_add_unless - add unless the number is already a given value @@ -1097,6 +1204,8 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) #define atomic_fetch_add_unless atomic_fetch_add_unless #endif +#define arch_atomic_add_unless atomic_add_unless + #ifndef atomic_add_unless /** * atomic_add_unless - add unless the number is already a given value @@ -1115,6 +1224,8 @@ atomic_add_unless(atomic_t *v, int a, int u) #define atomic_add_unless atomic_add_unless #endif +#define arch_atomic_inc_not_zero atomic_inc_not_zero + #ifndef atomic_inc_not_zero /** * atomic_inc_not_zero - increment unless the number is zero @@ -1131,6 +1242,8 @@ atomic_inc_not_zero(atomic_t *v) #define atomic_inc_not_zero atomic_inc_not_zero #endif +#define arch_atomic_inc_unless_negative atomic_inc_unless_negative + #ifndef atomic_inc_unless_negative static __always_inline bool atomic_inc_unless_negative(atomic_t *v) @@ -1147,6 +1260,8 @@ atomic_inc_unless_negative(atomic_t *v) #define atomic_inc_unless_negative atomic_inc_unless_negative #endif +#define arch_atomic_dec_unless_positive atomic_dec_unless_positive + #ifndef atomic_dec_unless_positive static __always_inline bool atomic_dec_unless_positive(atomic_t *v) @@ -1163,6 +1278,8 @@ atomic_dec_unless_positive(atomic_t *v) #define atomic_dec_unless_positive atomic_dec_unless_positive #endif +#define arch_atomic_dec_if_positive atomic_dec_if_positive + #ifndef atomic_dec_if_positive static __always_inline int atomic_dec_if_positive(atomic_t *v) @@ -1184,6 +1301,9 @@ atomic_dec_if_positive(atomic_t *v) #include #endif +#define arch_atomic64_read atomic64_read +#define arch_atomic64_read_acquire atomic64_read_acquire + #ifndef atomic64_read_acquire static __always_inline s64 atomic64_read_acquire(const atomic64_t *v) @@ -1193,6 +1313,9 @@ atomic64_read_acquire(const atomic64_t *v) #define atomic64_read_acquire atomic64_read_acquire #endif +#define arch_atomic64_set atomic64_set +#define arch_atomic64_set_release atomic64_set_release + #ifndef atomic64_set_release static __always_inline void atomic64_set_release(atomic64_t *v, s64 i) @@ -1202,6 +1325,13 @@ atomic64_set_release(atomic64_t *v, s64 i) #define atomic64_set_release atomic64_set_release #endif +#define arch_atomic64_add atomic64_add + +#define arch_atomic64_add_return atomic64_add_return +#define arch_atomic64_add_return_acquire atomic64_add_return_acquire +#define arch_atomic64_add_return_release atomic64_add_return_release +#define arch_atomic64_add_return_relaxed atomic64_add_return_relaxed + #ifndef atomic64_add_return_relaxed #define atomic64_add_return_acquire atomic64_add_return #define atomic64_add_return_release atomic64_add_return @@ -1244,6 +1374,11 @@ atomic64_add_return(s64 i, atomic64_t *v) #endif /* atomic64_add_return_relaxed */ +#define arch_atomic64_fetch_add atomic64_fetch_add +#define arch_atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define arch_atomic64_fetch_add_release atomic64_fetch_add_release +#define arch_atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed + #ifndef atomic64_fetch_add_relaxed #define atomic64_fetch_add_acquire atomic64_fetch_add #define atomic64_fetch_add_release atomic64_fetch_add @@ -1286,6 +1421,13 @@ atomic64_fetch_add(s64 i, atomic64_t *v) #endif /* atomic64_fetch_add_relaxed */ +#define arch_atomic64_sub atomic64_sub + +#define arch_atomic64_sub_return atomic64_sub_return +#define arch_atomic64_sub_return_acquire atomic64_sub_return_acquire +#define arch_atomic64_sub_return_release atomic64_sub_return_release +#define arch_atomic64_sub_return_relaxed atomic64_sub_return_relaxed + #ifndef atomic64_sub_return_relaxed #define atomic64_sub_return_acquire atomic64_sub_return #define atomic64_sub_return_release atomic64_sub_return @@ -1328,6 +1470,11 @@ atomic64_sub_return(s64 i, atomic64_t *v) #endif /* atomic64_sub_return_relaxed */ +#define arch_atomic64_fetch_sub atomic64_fetch_sub +#define arch_atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define arch_atomic64_fetch_sub_release atomic64_fetch_sub_release +#define arch_atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #ifndef atomic64_fetch_sub_relaxed #define atomic64_fetch_sub_acquire atomic64_fetch_sub #define atomic64_fetch_sub_release atomic64_fetch_sub @@ -1370,6 +1517,8 @@ atomic64_fetch_sub(s64 i, atomic64_t *v) #endif /* atomic64_fetch_sub_relaxed */ +#define arch_atomic64_inc atomic64_inc + #ifndef atomic64_inc static __always_inline void atomic64_inc(atomic64_t *v) @@ -1379,6 +1528,11 @@ atomic64_inc(atomic64_t *v) #define atomic64_inc atomic64_inc #endif +#define arch_atomic64_inc_return atomic64_inc_return +#define arch_atomic64_inc_return_acquire atomic64_inc_return_acquire +#define arch_atomic64_inc_return_release atomic64_inc_return_release +#define arch_atomic64_inc_return_relaxed atomic64_inc_return_relaxed + #ifndef atomic64_inc_return_relaxed #ifdef atomic64_inc_return #define atomic64_inc_return_acquire atomic64_inc_return @@ -1460,6 +1614,11 @@ atomic64_inc_return(atomic64_t *v) #endif /* atomic64_inc_return_relaxed */ +#define arch_atomic64_fetch_inc atomic64_fetch_inc +#define arch_atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#define arch_atomic64_fetch_inc_release atomic64_fetch_inc_release +#define arch_atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed + #ifndef atomic64_fetch_inc_relaxed #ifdef atomic64_fetch_inc #define atomic64_fetch_inc_acquire atomic64_fetch_inc @@ -1541,6 +1700,8 @@ atomic64_fetch_inc(atomic64_t *v) #endif /* atomic64_fetch_inc_relaxed */ +#define arch_atomic64_dec atomic64_dec + #ifndef atomic64_dec static __always_inline void atomic64_dec(atomic64_t *v) @@ -1550,6 +1711,11 @@ atomic64_dec(atomic64_t *v) #define atomic64_dec atomic64_dec #endif +#define arch_atomic64_dec_return atomic64_dec_return +#define arch_atomic64_dec_return_acquire atomic64_dec_return_acquire +#define arch_atomic64_dec_return_release atomic64_dec_return_release +#define arch_atomic64_dec_return_relaxed atomic64_dec_return_relaxed + #ifndef atomic64_dec_return_relaxed #ifdef atomic64_dec_return #define atomic64_dec_return_acquire atomic64_dec_return @@ -1631,6 +1797,11 @@ atomic64_dec_return(atomic64_t *v) #endif /* atomic64_dec_return_relaxed */ +#define arch_atomic64_fetch_dec atomic64_fetch_dec +#define arch_atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#define arch_atomic64_fetch_dec_release atomic64_fetch_dec_release +#define arch_atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed + #ifndef atomic64_fetch_dec_relaxed #ifdef atomic64_fetch_dec #define atomic64_fetch_dec_acquire atomic64_fetch_dec @@ -1712,6 +1883,13 @@ atomic64_fetch_dec(atomic64_t *v) #endif /* atomic64_fetch_dec_relaxed */ +#define arch_atomic64_and atomic64_and + +#define arch_atomic64_fetch_and atomic64_fetch_and +#define arch_atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define arch_atomic64_fetch_and_release atomic64_fetch_and_release +#define arch_atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed + #ifndef atomic64_fetch_and_relaxed #define atomic64_fetch_and_acquire atomic64_fetch_and #define atomic64_fetch_and_release atomic64_fetch_and @@ -1754,6 +1932,8 @@ atomic64_fetch_and(s64 i, atomic64_t *v) #endif /* atomic64_fetch_and_relaxed */ +#define arch_atomic64_andnot atomic64_andnot + #ifndef atomic64_andnot static __always_inline void atomic64_andnot(s64 i, atomic64_t *v) @@ -1763,6 +1943,11 @@ atomic64_andnot(s64 i, atomic64_t *v) #define atomic64_andnot atomic64_andnot #endif +#define arch_atomic64_fetch_andnot atomic64_fetch_andnot +#define arch_atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define arch_atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define arch_atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed + #ifndef atomic64_fetch_andnot_relaxed #ifdef atomic64_fetch_andnot #define atomic64_fetch_andnot_acquire atomic64_fetch_andnot @@ -1844,6 +2029,13 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v) #endif /* atomic64_fetch_andnot_relaxed */ +#define arch_atomic64_or atomic64_or + +#define arch_atomic64_fetch_or atomic64_fetch_or +#define arch_atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define arch_atomic64_fetch_or_release atomic64_fetch_or_release +#define arch_atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed + #ifndef atomic64_fetch_or_relaxed #define atomic64_fetch_or_acquire atomic64_fetch_or #define atomic64_fetch_or_release atomic64_fetch_or @@ -1886,6 +2078,13 @@ atomic64_fetch_or(s64 i, atomic64_t *v) #endif /* atomic64_fetch_or_relaxed */ +#define arch_atomic64_xor atomic64_xor + +#define arch_atomic64_fetch_xor atomic64_fetch_xor +#define arch_atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define arch_atomic64_fetch_xor_release atomic64_fetch_xor_release +#define arch_atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #ifndef atomic64_fetch_xor_relaxed #define atomic64_fetch_xor_acquire atomic64_fetch_xor #define atomic64_fetch_xor_release atomic64_fetch_xor @@ -1928,6 +2127,11 @@ atomic64_fetch_xor(s64 i, atomic64_t *v) #endif /* atomic64_fetch_xor_relaxed */ +#define arch_atomic64_xchg atomic64_xchg +#define arch_atomic64_xchg_acquire atomic64_xchg_acquire +#define arch_atomic64_xchg_release atomic64_xchg_release +#define arch_atomic64_xchg_relaxed atomic64_xchg_relaxed + #ifndef atomic64_xchg_relaxed #define atomic64_xchg_acquire atomic64_xchg #define atomic64_xchg_release atomic64_xchg @@ -1970,6 +2174,11 @@ atomic64_xchg(atomic64_t *v, s64 i) #endif /* atomic64_xchg_relaxed */ +#define arch_atomic64_cmpxchg atomic64_cmpxchg +#define arch_atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire +#define arch_atomic64_cmpxchg_release atomic64_cmpxchg_release +#define arch_atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed + #ifndef atomic64_cmpxchg_relaxed #define atomic64_cmpxchg_acquire atomic64_cmpxchg #define atomic64_cmpxchg_release atomic64_cmpxchg @@ -2012,6 +2221,11 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) #endif /* atomic64_cmpxchg_relaxed */ +#define arch_atomic64_try_cmpxchg atomic64_try_cmpxchg +#define arch_atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#define arch_atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#define arch_atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed + #ifndef atomic64_try_cmpxchg_relaxed #ifdef atomic64_try_cmpxchg #define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg @@ -2109,6 +2323,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) #endif /* atomic64_try_cmpxchg_relaxed */ +#define arch_atomic64_sub_and_test atomic64_sub_and_test + #ifndef atomic64_sub_and_test /** * atomic64_sub_and_test - subtract value from variable and test result @@ -2127,6 +2343,8 @@ atomic64_sub_and_test(s64 i, atomic64_t *v) #define atomic64_sub_and_test atomic64_sub_and_test #endif +#define arch_atomic64_dec_and_test atomic64_dec_and_test + #ifndef atomic64_dec_and_test /** * atomic64_dec_and_test - decrement and test @@ -2144,6 +2362,8 @@ atomic64_dec_and_test(atomic64_t *v) #define atomic64_dec_and_test atomic64_dec_and_test #endif +#define arch_atomic64_inc_and_test atomic64_inc_and_test + #ifndef atomic64_inc_and_test /** * atomic64_inc_and_test - increment and test @@ -2161,6 +2381,8 @@ atomic64_inc_and_test(atomic64_t *v) #define atomic64_inc_and_test atomic64_inc_and_test #endif +#define arch_atomic64_add_negative atomic64_add_negative + #ifndef atomic64_add_negative /** * atomic64_add_negative - add and test if negative @@ -2179,6 +2401,8 @@ atomic64_add_negative(s64 i, atomic64_t *v) #define atomic64_add_negative atomic64_add_negative #endif +#define arch_atomic64_fetch_add_unless atomic64_fetch_add_unless + #ifndef atomic64_fetch_add_unless /** * atomic64_fetch_add_unless - add unless the number is already a given value @@ -2204,6 +2428,8 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) #define atomic64_fetch_add_unless atomic64_fetch_add_unless #endif +#define arch_atomic64_add_unless atomic64_add_unless + #ifndef atomic64_add_unless /** * atomic64_add_unless - add unless the number is already a given value @@ -2222,6 +2448,8 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) #define atomic64_add_unless atomic64_add_unless #endif +#define arch_atomic64_inc_not_zero atomic64_inc_not_zero + #ifndef atomic64_inc_not_zero /** * atomic64_inc_not_zero - increment unless the number is zero @@ -2238,6 +2466,8 @@ atomic64_inc_not_zero(atomic64_t *v) #define atomic64_inc_not_zero atomic64_inc_not_zero #endif +#define arch_atomic64_inc_unless_negative atomic64_inc_unless_negative + #ifndef atomic64_inc_unless_negative static __always_inline bool atomic64_inc_unless_negative(atomic64_t *v) @@ -2254,6 +2484,8 @@ atomic64_inc_unless_negative(atomic64_t *v) #define atomic64_inc_unless_negative atomic64_inc_unless_negative #endif +#define arch_atomic64_dec_unless_positive atomic64_dec_unless_positive + #ifndef atomic64_dec_unless_positive static __always_inline bool atomic64_dec_unless_positive(atomic64_t *v) @@ -2270,6 +2502,8 @@ atomic64_dec_unless_positive(atomic64_t *v) #define atomic64_dec_unless_positive atomic64_dec_unless_positive #endif +#define arch_atomic64_dec_if_positive atomic64_dec_if_positive + #ifndef atomic64_dec_if_positive static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) @@ -2288,4 +2522,4 @@ atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 1fac0941c79bf0ae100723cc2ac9b94061f0b67a +// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh index 0fd1cf0c2b94..693dfa1de430 100755 --- a/scripts/atomic/gen-atomic-fallback.sh +++ b/scripts/atomic/gen-atomic-fallback.sh @@ -58,6 +58,21 @@ cat << EOF EOF } +gen_proto_order_variant() +{ + local meta="$1"; shift + local pfx="$1"; shift + local name="$1"; shift + local sfx="$1"; shift + local order="$1"; shift + local arch="$1" + local atomic="$2" + + local basename="${arch}${atomic}_${pfx}${name}${sfx}" + + printf "#define arch_${basename}${order} ${basename}${order}\n" +} + #gen_proto_order_variants(meta, pfx, name, sfx, arch, atomic, int, args...) gen_proto_order_variants() { @@ -72,6 +87,22 @@ gen_proto_order_variants() local template="$(find_fallback_template "${pfx}" "${name}" "${sfx}" "${order}")" + if [ -z "$arch" ]; then + gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@" + + if meta_has_acquire "${meta}"; then + gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@" + fi + if meta_has_release "${meta}"; then + gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@" + fi + if meta_has_relaxed "${meta}"; then + gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_relaxed" "$@" + fi + + echo "" + fi + # If we don't have relaxed atomics, then we don't bother with ordering fallbacks # read_acquire and set_release need to be templated, though if ! meta_has_relaxed "${meta}"; then -- cgit v1.2.3 From b58e733fd774f3f4b49d9e7640d172a57e35200e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2020 18:24:27 +0200 Subject: rcu: Fixup noinstr warnings A KCSAN build revealed we have explicit annoations through atomic_*() usage, switch to arch_atomic_*() for the respective functions. vmlinux.o: warning: objtool: rcu_nmi_exit()+0x4d: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: rcu_dynticks_eqs_enter()+0x25: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: rcu_nmi_enter()+0x4f: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: rcu_dynticks_eqs_exit()+0x2a: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: __rcu_is_watching()+0x25: call to __kcsan_check_access() leaves .noinstr.text section Additionally, without the NOP in instrumentation_begin(), objtool would not detect the lack of the 'else instrumentation_begin();' branch in rcu_nmi_enter(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- include/linux/compiler.h | 2 +- kernel/rcu/tree.c | 32 +++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 30827f82ad62..204e76856435 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -123,7 +123,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #ifdef CONFIG_DEBUG_ENTRY /* Begin/end of an instrumentation safe region */ #define instrumentation_begin() ({ \ - asm volatile("%c0:\n\t" \ + asm volatile("%c0: nop\n\t" \ ".pushsection .discard.instr_begin\n\t" \ ".long %c0b - .\n\t" \ ".popsection\n\t" : : "i" (__COUNTER__)); \ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c716eadc7617..6c6569e0586c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -250,7 +250,7 @@ static noinstr void rcu_dynticks_eqs_enter(void) * next idle sojourn. */ rcu_dynticks_task_trace_enter(); // Before ->dynticks update! - seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); // RCU is no longer watching. Better be in extended quiescent state! WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICK_CTRL_CTR)); @@ -274,13 +274,13 @@ static noinstr void rcu_dynticks_eqs_exit(void) * and we also must force ordering with the next RCU read-side * critical section. */ - seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); // RCU is now watching. Better not be in an extended quiescent state! rcu_dynticks_task_trace_exit(); // After ->dynticks update! WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICK_CTRL_CTR)); if (seq & RCU_DYNTICK_CTRL_MASK) { - atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks); + arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks); smp_mb__after_atomic(); /* _exit after clearing mask. */ } } @@ -313,7 +313,7 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR); + return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR); } /* @@ -633,6 +633,10 @@ static noinstr void rcu_eqs_enter(bool user) do_nocb_deferred_wakeup(rdp); rcu_prepare_for_idle(); rcu_preempt_deferred_qs(current); + + // instrumentation for the noinstr rcu_dynticks_eqs_enter() + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); + instrumentation_end(); WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ // RCU is watching here ... @@ -692,6 +696,7 @@ noinstr void rcu_nmi_exit(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + instrumentation_begin(); /* * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. * (We are exiting an NMI handler, so RCU better be paying attention @@ -705,7 +710,6 @@ noinstr void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdp->dynticks_nmi_nesting != 1) { - instrumentation_begin(); trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ @@ -714,13 +718,15 @@ noinstr void rcu_nmi_exit(void) return; } - instrumentation_begin(); /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ if (!in_nmi()) rcu_prepare_for_idle(); + + // instrumentation for the noinstr rcu_dynticks_eqs_enter() + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); instrumentation_end(); // RCU is watching here ... @@ -838,6 +844,10 @@ static void noinstr rcu_eqs_exit(bool user) rcu_dynticks_eqs_exit(); // ... but is watching here. instrumentation_begin(); + + // instrumentation for the noinstr rcu_dynticks_eqs_exit() + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); + rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); @@ -983,13 +993,21 @@ noinstr void rcu_nmi_enter(void) if (!in_nmi()) rcu_cleanup_after_idle(); + instrumentation_begin(); + // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs() + instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks)); + // instrumentation for the noinstr rcu_dynticks_eqs_exit() + instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); + incby = 1; } else if (!in_nmi()) { instrumentation_begin(); rcu_irq_enter_check_tick(); instrumentation_end(); + } else { + instrumentation_begin(); } - instrumentation_begin(); + trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks)); -- cgit v1.2.3 From 471e39df96b9a4c4ba88a2da9e25a126624d7a9c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 24 Jun 2020 17:34:18 -0300 Subject: sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket If a socket is set ipv6only, it will still send IPv4 addresses in the INIT and INIT_ACK packets. This potentially misleads the peer into using them, which then would cause association termination. The fix is to not add IPv4 addresses to ipv6only sockets. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Corey Minyard Signed-off-by: Marcelo Ricardo Leitner Tested-by: Corey Minyard Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 +++++--- net/sctp/associola.c | 5 ++++- net/sctp/bind_addr.c | 1 + net/sctp/protocol.c | 3 ++- 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 15b4d9aec7ff..122d9e2d8dfd 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -353,11 +353,13 @@ enum { ipv4_is_anycast_6to4(a)) /* Flags used for the bind address copy functions. */ -#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by +#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by local sock family */ -#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by +#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by + local sock family */ +#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by peer */ -#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by +#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by peer */ /* Reasons to retransmit. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 72315137d7e7..8d735461fa19 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1565,12 +1565,15 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, enum sctp_scope scope, gfp_t gfp) { + struct sock *sk = asoc->base.sk; int flags; /* Use scoping rules to determine the subset of addresses from * the endpoint. */ - flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + if (!inet_v6_ipv6only(sk)) + flags |= SCTP_ADDR4_ALLOWED; if (asoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; if (asoc->peer.ipv6_address) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 53bc61537f44..701c5a4e441d 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, * well as the remote peer. */ if ((((AF_INET == addr->sa.sa_family) && + (flags & SCTP_ADDR4_ALLOWED) && (flags & SCTP_ADDR4_PEERSUPP))) || (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 092d1afdee0d..cde29f3c7fb3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, * sock as well as the remote peer. */ if (addr->a.sa.sa_family == AF_INET && - !(copy_flags & SCTP_ADDR4_PEERSUPP)) + (!(copy_flags & SCTP_ADDR4_ALLOWED) || + !(copy_flags & SCTP_ADDR4_PEERSUPP))) continue; if (addr->a.sa.sa_family == AF_INET6 && (!(copy_flags & SCTP_ADDR6_ALLOWED) || -- cgit v1.2.3 From 4c342f778fe234e0c2a2601d87fec8ba42f0d2c6 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Thu, 25 Jun 2020 13:46:00 -0700 Subject: rds: transport module should be auto loaded when transport is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enhancement auto loads transport module when the transport is set via SO_RDS_TRANSPORT socket option. Reviewed-by: Ka-Cheong Poon Reviewed-by: Håkon Bugge Signed-off-by: Rao Shoaib Signed-off-by: Somasundaram Krishnasamy Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 4 +++- net/rds/transport.c | 26 +++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index cba368e55863..c21edb966c19 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -64,10 +64,12 @@ /* supported values for SO_RDS_TRANSPORT */ #define RDS_TRANS_IB 0 -#define RDS_TRANS_IWARP 1 +#define RDS_TRANS_GAP 1 #define RDS_TRANS_TCP 2 #define RDS_TRANS_COUNT 3 #define RDS_TRANS_NONE (~0) +/* don't use RDS_TRANS_IWARP - it is deprecated */ +#define RDS_TRANS_IWARP RDS_TRANS_GAP /* IOCTLS commands for SOL_RDS */ #define SIOCRDSSETTOS (SIOCPROTOPRIVATE) diff --git a/net/rds/transport.c b/net/rds/transport.c index 46f709a4b577..f8001ec80867 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -38,6 +38,12 @@ #include "rds.h" #include "loop.h" +static char * const rds_trans_modules[] = { + [RDS_TRANS_IB] = "rds_rdma", + [RDS_TRANS_GAP] = NULL, + [RDS_TRANS_TCP] = "rds_tcp", +}; + static struct rds_transport *transports[RDS_TRANS_COUNT]; static DECLARE_RWSEM(rds_trans_sem); @@ -110,18 +116,20 @@ struct rds_transport *rds_trans_get(int t_type) { struct rds_transport *ret = NULL; struct rds_transport *trans; - unsigned int i; down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) { - trans = transports[i]; - - if (trans && trans->t_type == t_type && - (!trans->t_owner || try_module_get(trans->t_owner))) { - ret = trans; - break; - } + trans = transports[t_type]; + if (!trans) { + up_read(&rds_trans_sem); + if (rds_trans_modules[t_type]) + request_module(rds_trans_modules[t_type]); + down_read(&rds_trans_sem); + trans = transports[t_type]; } + if (trans && trans->t_type == t_type && + (!trans->t_owner || try_module_get(trans->t_owner))) + ret = trans; + up_read(&rds_trans_sem); return ret; -- cgit v1.2.3 From 8dbdd5049cfa8f8848711ba83c9bbf67e08f5b2d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 25 Jun 2020 20:30:07 -0700 Subject: make asm-generic/cacheflush.h more standalone Some s390 builds get these warnings: include/asm-generic/cacheflush.h:16:42: warning: 'struct mm_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:22:46: warning: 'struct mm_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:28:45: warning: 'struct vm_area_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:36:44: warning: 'struct vm_area_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:44:45: warning: 'struct page' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:52:50: warning: 'struct address_space' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:58:52: warning: 'struct address_space' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:75:17: warning: 'struct page' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:74:45: warning: 'struct vm_area_struct' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:82:16: warning: 'struct page' declared inside parameter list will not be visible outside of this definition or declaration include/asm-generic/cacheflush.h:81:50: warning: 'struct vm_area_struct' declared inside parameter list will not be visible outside of this definition or declaration Forward declare the named structs to get rid of these. Link: http://lkml.kernel.org/r/20200623135714.4dae4b8a@canb.auug.org.au Fixes: e0cf615d725c ("asm-generic: don't include in cacheflush.h") Signed-off-by: Stephen Rothwell Reviewed-by: Christoph Hellwig Acked-by: Arnd Bergmann Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/cacheflush.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index 907fa5d16494..4a674db4e1fa 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -2,6 +2,11 @@ #ifndef _ASM_GENERIC_CACHEFLUSH_H #define _ASM_GENERIC_CACHEFLUSH_H +struct mm_struct; +struct vm_area_struct; +struct page; +struct address_space; + /* * The cache doesn't need to be flushed when TLB entries change when * the cache is mapped to physical memory, not virtual memory -- cgit v1.2.3 From 31d8fcac00fcf4007f3921edc69ab4dcb3abcd4d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 25 Jun 2020 20:30:31 -0700 Subject: mm: workingset: age nonresident information alongside anonymous pages Patch series "fix for "mm: balance LRU lists based on relative thrashing" patchset" This patchset fixes some problems of the patchset, "mm: balance LRU lists based on relative thrashing", which is now merged on the mainline. Patch "mm: workingset: let cache workingset challenge anon fix" is the result of discussion with Johannes. See following link. http://lkml.kernel.org/r/20200520232525.798933-6-hannes@cmpxchg.org And, the other two are minor things which are found when I try to rebase my patchset. This patch (of 3): After ("mm: workingset: let cache workingset challenge anon fix"), we compare refault distances to active_file + anon. But age of the non-resident information is only driven by the file LRU. As a result, we may overestimate the recency of any incoming refaults and activate them too eagerly, causing unnecessary LRU churn in certain situations. Make anon aging drive nonresident age as well to address that. Link: http://lkml.kernel.org/r/1592288204-27734-1-git-send-email-iamjoonsoo.kim@lge.com Link: http://lkml.kernel.org/r/1592288204-27734-2-git-send-email-iamjoonsoo.kim@lge.com Fixes: 34e58cac6d8f2a ("mm: workingset: let cache workingset challenge anon") Reported-by: Joonsoo Kim Signed-off-by: Johannes Weiner Signed-off-by: Joonsoo Kim Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 4 ++-- include/linux/swap.h | 1 + mm/vmscan.c | 3 +++ mm/workingset.c | 46 +++++++++++++++++++++++++++------------------- 4 files changed, 33 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c4c37fd12104..f6f884970511 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -257,8 +257,8 @@ struct lruvec { */ unsigned long anon_cost; unsigned long file_cost; - /* Evictions & activations on the inactive file list */ - atomic_long_t inactive_age; + /* Non-resident age, driven by LRU movement */ + atomic_long_t nonresident_age; /* Refaults at the time of last reclaim cycle */ unsigned long refaults; /* Various lruvec state flags (enum lruvec_flags) */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 4c5974bb9ba9..5b3216ba39a9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -313,6 +313,7 @@ struct vma_swap_readahead { }; /* linux/mm/workingset.c */ +void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); void workingset_activation(struct page *page); diff --git a/mm/vmscan.c b/mm/vmscan.c index b6d84326bdf2..749d239c62b2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -904,6 +904,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, __delete_from_swap_cache(page, swap); xa_unlock_irqrestore(&mapping->i_pages, flags); put_swap_page(page, swap); + workingset_eviction(page, target_memcg); } else { void (*freepage)(struct page *); void *shadow = NULL; @@ -1884,6 +1885,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec, list_add(&page->lru, &pages_to_free); } else { nr_moved += nr_pages; + if (PageActive(page)) + workingset_age_nonresident(lruvec, nr_pages); } } diff --git a/mm/workingset.c b/mm/workingset.c index d481ea452eeb..50b7937bab32 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -156,8 +156,8 @@ * * Implementation * - * For each node's file LRU lists, a counter for inactive evictions - * and activations is maintained (node->inactive_age). + * For each node's LRU lists, a counter for inactive evictions and + * activations is maintained (node->nonresident_age). * * On eviction, a snapshot of this counter (along with some bits to * identify the node) is stored in the now empty page cache @@ -213,7 +213,17 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, *workingsetp = workingset; } -static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat) +/** + * workingset_age_nonresident - age non-resident entries as LRU ages + * @memcg: the lruvec that was aged + * @nr_pages: the number of pages to count + * + * As in-memory pages are aged, non-resident pages need to be aged as + * well, in order for the refault distances later on to be comparable + * to the in-memory dimensions. This function allows reclaim and LRU + * operations to drive the non-resident aging along in parallel. + */ +void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages) { /* * Reclaiming a cgroup means reclaiming all its children in a @@ -227,11 +237,8 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat) * the root cgroup's, age as well. */ do { - struct lruvec *lruvec; - - lruvec = mem_cgroup_lruvec(memcg, pgdat); - atomic_long_inc(&lruvec->inactive_age); - } while (memcg && (memcg = parent_mem_cgroup(memcg))); + atomic_long_add(nr_pages, &lruvec->nonresident_age); + } while ((lruvec = parent_lruvec(lruvec))); } /** @@ -254,12 +261,11 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - advance_inactive_age(page_memcg(page), pgdat); - lruvec = mem_cgroup_lruvec(target_memcg, pgdat); + workingset_age_nonresident(lruvec, hpage_nr_pages(page)); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); - eviction = atomic_long_read(&lruvec->inactive_age); + eviction = atomic_long_read(&lruvec->nonresident_age); return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); } @@ -309,20 +315,20 @@ void workingset_refault(struct page *page, void *shadow) if (!mem_cgroup_disabled() && !eviction_memcg) goto out; eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); - refault = atomic_long_read(&eviction_lruvec->inactive_age); + refault = atomic_long_read(&eviction_lruvec->nonresident_age); /* * Calculate the refault distance * * The unsigned subtraction here gives an accurate distance - * across inactive_age overflows in most cases. There is a + * across nonresident_age overflows in most cases. There is a * special case: usually, shadow entries have a short lifetime * and are either refaulted or reclaimed along with the inode * before they get too old. But it is not impossible for the - * inactive_age to lap a shadow entry in the field, which can - * then result in a false small refault distance, leading to a - * false activation should this old entry actually refault - * again. However, earlier kernels used to deactivate + * nonresident_age to lap a shadow entry in the field, which + * can then result in a false small refault distance, leading + * to a false activation should this old entry actually + * refault again. However, earlier kernels used to deactivate * unconditionally with *every* reclaim invocation for the * longest time, so the occasional inappropriate activation * leading to pressure on the active list is not a problem. @@ -359,7 +365,7 @@ void workingset_refault(struct page *page, void *shadow) goto out; SetPageActive(page); - advance_inactive_age(memcg, pgdat); + workingset_age_nonresident(lruvec, hpage_nr_pages(page)); inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE); /* Page was active prior to eviction */ @@ -382,6 +388,7 @@ out: void workingset_activation(struct page *page) { struct mem_cgroup *memcg; + struct lruvec *lruvec; rcu_read_lock(); /* @@ -394,7 +401,8 @@ void workingset_activation(struct page *page) memcg = page_memcg_rcu(page); if (!mem_cgroup_disabled() && !memcg) goto out; - advance_inactive_age(memcg, page_pgdat(page)); + lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); + workingset_age_nonresident(lruvec, hpage_nr_pages(page)); out: rcu_read_unlock(); } -- cgit v1.2.3 From 7a0e27b2a0ce2735e27e21ebc8b777550fe0ed81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Jun 2020 20:30:47 -0700 Subject: mm: remove vmalloc_exec Merge vmalloc_exec into its only caller. Note that for !CONFIG_MMU __vmalloc_node_range maps to __vmalloc, which directly clears the __GFP_HIGHMEM added by the vmalloc_exec stub anyway. Link: http://lkml.kernel.org/r/20200618064307.32739-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: David Hildenbrand Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Dexuan Cui Cc: Jessica Yu Cc: Vitaly Kuznetsov Cc: Wei Liu Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 - kernel/module.c | 4 +++- mm/nommu.c | 17 ----------------- mm/vmalloc.c | 20 -------------------- 4 files changed, 3 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 48bb681e6c2a..0221f852a7e1 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,7 +106,6 @@ extern void *vzalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask); diff --git a/kernel/module.c b/kernel/module.c index e8a198588f26..0c6573b98c36 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2783,7 +2783,9 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug) void * __weak module_alloc(unsigned long size) { - return vmalloc_exec(size); + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __func__); } bool __weak module_init_section(const char *name) diff --git a/mm/nommu.c b/mm/nommu.c index cdcad5d61dd1..f32a69095d50 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -290,23 +290,6 @@ void *vzalloc_node(unsigned long size, int node) } EXPORT_SYMBOL(vzalloc_node); -/** - * vmalloc_exec - allocate virtually contiguous, executable memory - * @size: allocation size - * - * Kernel-internal function to allocate enough pages to cover @size - * the page level allocator and map them into contiguous and - * executable kernel virtual space. - * - * For tight control over page level allocator and protection flags - * use __vmalloc() instead. - */ - -void *vmalloc_exec(unsigned long size) -{ - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM); -} - /** * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) * @size: allocation size diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 957a0be77270..5a2b55c8dd9a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2695,26 +2695,6 @@ void *vzalloc_node(unsigned long size, int node) } EXPORT_SYMBOL(vzalloc_node); -/** - * vmalloc_exec - allocate virtually contiguous, executable memory - * @size: allocation size - * - * Kernel-internal function to allocate enough pages to cover @size - * the page level allocator and map them into contiguous and - * executable kernel virtual space. - * - * For tight control over page level allocator and protection flags - * use __vmalloc() instead. - * - * Return: pointer to the allocated memory or %NULL on error - */ -void *vmalloc_exec(unsigned long size) -{ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) -- cgit v1.2.3 From 5946d1f5b309381805bad3ddc3054c04f4ae9c24 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 4 Jun 2020 15:31:19 +0530 Subject: kdb: Switch to use safer dbg_io_ops over console APIs In kgdb context, calling console handlers aren't safe due to locks used in those handlers which could in turn lead to a deadlock. Although, using oops_in_progress increases the chance to bypass locks in most console handlers but it might not be sufficient enough in case a console uses more locks (VT/TTY is good example). Currently when a driver provides both polling I/O and a console then kdb will output using the console. We can increase robustness by using the currently active polling I/O driver (which should be lockless) instead of the corresponding console. For several common cases (e.g. an embedded system with a single serial port that is used both for console output and debugger I/O) this will result in no console handler being used. In order to achieve this we need to reverse the order of preference to use dbg_io_ops (uses polling I/O mode) over console APIs. So we just store "struct console" that represents debugger I/O in dbg_io_ops and while emitting kdb messages, skip console that matches dbg_io_ops console in order to avoid duplicate messages. After this change, "is_console" param becomes redundant and hence removed. Suggested-by: Daniel Thompson Signed-off-by: Sumit Garg Link: https://lore.kernel.org/r/1591264879-25920-5-git-send-email-sumit.garg@linaro.org Reviewed-by: Douglas Anderson Reviewed-by: Petr Mladek Acked-by: Greg Kroah-Hartman Signed-off-by: Daniel Thompson --- drivers/tty/serial/kgdb_nmi.c | 2 +- drivers/tty/serial/kgdboc.c | 32 ++++++++++++++++---------------- drivers/usb/early/ehci-dbgp.c | 3 ++- include/linux/kgdb.h | 5 ++--- kernel/debug/kdb/kdb_io.c | 4 +++- 5 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/tty/serial/kgdb_nmi.c b/drivers/tty/serial/kgdb_nmi.c index 5022447afa23..6004c0c1d173 100644 --- a/drivers/tty/serial/kgdb_nmi.c +++ b/drivers/tty/serial/kgdb_nmi.c @@ -50,7 +50,7 @@ static int kgdb_nmi_console_setup(struct console *co, char *options) * I/O utilities that messages sent to the console will automatically * be displayed on the dbg_io. */ - dbg_io_ops->is_console = true; + dbg_io_ops->cons = co; return 0; } diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 41396982e9e0..84ffede27f23 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -45,7 +45,6 @@ static struct platform_device *kgdboc_pdev; #if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) static struct kgdb_io kgdboc_earlycon_io_ops; -static struct console *earlycon; static int (*earlycon_orig_exit)(struct console *con); #endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */ @@ -145,7 +144,7 @@ static void kgdboc_unregister_kbd(void) #if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) static void cleanup_earlycon(void) { - if (earlycon) + if (kgdboc_earlycon_io_ops.cons) kgdb_unregister_io_module(&kgdboc_earlycon_io_ops); } #else /* !IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */ @@ -178,7 +177,7 @@ static int configure_kgdboc(void) goto noconfig; } - kgdboc_io_ops.is_console = 0; + kgdboc_io_ops.cons = NULL; kgdb_tty_driver = NULL; kgdboc_use_kms = 0; @@ -198,7 +197,7 @@ static int configure_kgdboc(void) int idx; if (cons->device && cons->device(cons, &idx) == p && idx == tty_line) { - kgdboc_io_ops.is_console = 1; + kgdboc_io_ops.cons = cons; break; } } @@ -433,7 +432,8 @@ static int kgdboc_earlycon_get_char(void) { char c; - if (!earlycon->read(earlycon, &c, 1)) + if (!kgdboc_earlycon_io_ops.cons->read(kgdboc_earlycon_io_ops.cons, + &c, 1)) return NO_POLL_CHAR; return c; @@ -441,7 +441,8 @@ static int kgdboc_earlycon_get_char(void) static void kgdboc_earlycon_put_char(u8 chr) { - earlycon->write(earlycon, &chr, 1); + kgdboc_earlycon_io_ops.cons->write(kgdboc_earlycon_io_ops.cons, &chr, + 1); } static void kgdboc_earlycon_pre_exp_handler(void) @@ -461,7 +462,7 @@ static void kgdboc_earlycon_pre_exp_handler(void) * boot if we detect this case. */ for_each_console(con) - if (con == earlycon) + if (con == kgdboc_earlycon_io_ops.cons) return; already_warned = true; @@ -484,25 +485,25 @@ static int kgdboc_earlycon_deferred_exit(struct console *con) static void kgdboc_earlycon_deinit(void) { - if (!earlycon) + if (!kgdboc_earlycon_io_ops.cons) return; - if (earlycon->exit == kgdboc_earlycon_deferred_exit) + if (kgdboc_earlycon_io_ops.cons->exit == kgdboc_earlycon_deferred_exit) /* * kgdboc_earlycon is exiting but original boot console exit * was never called (AKA kgdboc_earlycon_deferred_exit() * didn't ever run). Undo our trap. */ - earlycon->exit = earlycon_orig_exit; - else if (earlycon->exit) + kgdboc_earlycon_io_ops.cons->exit = earlycon_orig_exit; + else if (kgdboc_earlycon_io_ops.cons->exit) /* * We skipped calling the exit() routine so we could try to * keep using the boot console even after it went away. We're * finally done so call the function now. */ - earlycon->exit(earlycon); + kgdboc_earlycon_io_ops.cons->exit(kgdboc_earlycon_io_ops.cons); - earlycon = NULL; + kgdboc_earlycon_io_ops.cons = NULL; } static struct kgdb_io kgdboc_earlycon_io_ops = { @@ -511,7 +512,6 @@ static struct kgdb_io kgdboc_earlycon_io_ops = { .write_char = kgdboc_earlycon_put_char, .pre_exception = kgdboc_earlycon_pre_exp_handler, .deinit = kgdboc_earlycon_deinit, - .is_console = true, }; #define MAX_CONSOLE_NAME_LEN (sizeof((struct console *) 0)->name) @@ -557,10 +557,10 @@ static int __init kgdboc_earlycon_init(char *opt) goto unlock; } - earlycon = con; + kgdboc_earlycon_io_ops.cons = con; pr_info("Going to register kgdb with earlycon '%s'\n", con->name); if (kgdb_register_io_module(&kgdboc_earlycon_io_ops) != 0) { - earlycon = NULL; + kgdboc_earlycon_io_ops.cons = NULL; pr_info("Failed to register kgdb with earlycon\n"); } else { /* Trap exit so we can keep earlycon longer if needed. */ diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c index ea0d531c63e2..775cf70cfb3e 100644 --- a/drivers/usb/early/ehci-dbgp.c +++ b/drivers/usb/early/ehci-dbgp.c @@ -1058,7 +1058,8 @@ static int __init kgdbdbgp_parse_config(char *str) kgdbdbgp_wait_time = simple_strtoul(ptr, &ptr, 10); } kgdb_register_io_module(&kgdbdbgp_io_ops); - kgdbdbgp_io_ops.is_console = early_dbgp_console.index != -1; + if (early_dbgp_console.index != -1) + kgdbdbgp_io_ops.cons = &early_dbgp_console; return 0; } diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index c62d76478adc..529116b0cabe 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -276,8 +276,7 @@ struct kgdb_arch { * the I/O driver. * @post_exception: Pointer to a function that will do any cleanup work * for the I/O driver. - * @is_console: 1 if the end device is a console 0 if the I/O device is - * not a console + * @cons: valid if the I/O device is a console; else NULL. */ struct kgdb_io { const char *name; @@ -288,7 +287,7 @@ struct kgdb_io { void (*deinit) (void); void (*pre_exception) (void); void (*post_exception) (void); - int is_console; + struct console *cons; }; extern const struct kgdb_arch arch_kgdb_ops; diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 0e4f2eda96d8..683a799618ad 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -549,7 +549,7 @@ static void kdb_msg_write(const char *msg, int msg_len) if (msg_len == 0) return; - if (dbg_io_ops && !dbg_io_ops->is_console) { + if (dbg_io_ops) { const char *cp = msg; int len = msg_len; @@ -562,6 +562,8 @@ static void kdb_msg_write(const char *msg, int msg_len) for_each_console(c) { if (!(c->flags & CON_ENABLED)) continue; + if (c == dbg_io_ops->cons) + continue; /* * Set oops_in_progress to encourage the console drivers to * disregard their internal spin locks: in the current calling -- cgit v1.2.3 From 10652a9e9fe3fbcaca090f99cd3060ac3fee2913 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 Jun 2020 17:22:30 +0200 Subject: Revert "serial: core: Refactor uart_unlock_and_check_sysrq()" This reverts commit da9a5aa3402db0ff3b57216d8dbf2478e1046cae. In order to ease backporting a fix for a sysrq regression, revert this rewrite which was since added on top. The other sysrq helpers now bail out early when sysrq is not enabled; it's better to keep that pattern here as well. Note that the __releases() attribute won't be needed after the follow-on fix either. Fixes: da9a5aa3402d ("serial: core: Refactor uart_unlock_and_check_sysrq()") Cc: stable Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200610152232.16925-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 23 +++++++++++++---------- include/linux/serial_core.h | 3 ++- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 13fb92ae3710..fcdb6bfbe2cf 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -3239,19 +3239,22 @@ int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) } EXPORT_SYMBOL_GPL(uart_prepare_sysrq_char); -void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long flags) -__releases(&port->lock) +void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) { - if (port->has_sysrq) { - int sysrq_ch = port->sysrq_ch; + int sysrq_ch; - port->sysrq_ch = 0; - spin_unlock_irqrestore(&port->lock, flags); - if (sysrq_ch) - handle_sysrq(sysrq_ch); - } else { - spin_unlock_irqrestore(&port->lock, flags); + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, irqflags); + return; } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); } EXPORT_SYMBOL_GPL(uart_unlock_and_check_sysrq); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 9fd550e7946a..ef4921ddbe97 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -464,7 +464,8 @@ extern void uart_insert_char(struct uart_port *port, unsigned int status, extern int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch); extern int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch); -extern void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long flags); +extern void uart_unlock_and_check_sysrq(struct uart_port *port, + unsigned long irqflags); extern int uart_handle_break(struct uart_port *port); /* -- cgit v1.2.3 From 08d5470308ac3598e7709d08b8979ce6e9de8da2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 Jun 2020 17:22:31 +0200 Subject: serial: core: fix sysrq overhead regression Commit 8e20fc391711 ("serial_core: Move sysrq functions from header file") converted the inline sysrq helpers to exported functions which are now called for every received character, interrupt and break signal also on systems without CONFIG_MAGIC_SYSRQ_SERIAL instead of being optimised away by the compiler. Inlining these helpers again also avoids the function call overhead when CONFIG_MAGIC_SYSRQ_SERIAL is enabled (e.g. when the port is not used as a console). Fixes: 8e20fc391711 ("serial_core: Move sysrq functions from header file") Cc: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Johan Hovold Cc: stable Reviewed-by: Andy Shevchenko Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20200610152232.16925-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 99 +------------------------------------ include/linux/serial_core.h | 103 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 100 insertions(+), 102 deletions(-) (limited to 'include') diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index fcdb6bfbe2cf..abb102e71b14 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -41,8 +41,6 @@ static struct lock_class_key port_lock_key; #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) -#define SYSRQ_TIMEOUT (HZ * 5) - static void uart_change_speed(struct tty_struct *tty, struct uart_state *state, struct ktermios *old_termios); static void uart_wait_until_sent(struct tty_struct *tty, int timeout); @@ -3163,7 +3161,7 @@ static DECLARE_WORK(sysrq_enable_work, uart_sysrq_on); * Returns false if @ch is out of enabling sequence and should be * handled some other way, true if @ch was consumed. */ -static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch) +bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch) { int sysrq_toggle_seq_len = strlen(sysrq_toggle_seq); @@ -3186,102 +3184,9 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch) port->sysrq = 0; return true; } -#else -static inline bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch) -{ - return false; -} +EXPORT_SYMBOL_GPL(uart_try_toggle_sysrq); #endif -int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) -{ - if (!IS_ENABLED(CONFIG_MAGIC_SYSRQ_SERIAL)) - return 0; - - if (!port->has_sysrq || !port->sysrq) - return 0; - - if (ch && time_before(jiffies, port->sysrq)) { - if (sysrq_mask()) { - handle_sysrq(ch); - port->sysrq = 0; - return 1; - } - if (uart_try_toggle_sysrq(port, ch)) - return 1; - } - port->sysrq = 0; - - return 0; -} -EXPORT_SYMBOL_GPL(uart_handle_sysrq_char); - -int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) -{ - if (!IS_ENABLED(CONFIG_MAGIC_SYSRQ_SERIAL)) - return 0; - - if (!port->has_sysrq || !port->sysrq) - return 0; - - if (ch && time_before(jiffies, port->sysrq)) { - if (sysrq_mask()) { - port->sysrq_ch = ch; - port->sysrq = 0; - return 1; - } - if (uart_try_toggle_sysrq(port, ch)) - return 1; - } - port->sysrq = 0; - - return 0; -} -EXPORT_SYMBOL_GPL(uart_prepare_sysrq_char); - -void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) -{ - int sysrq_ch; - - if (!port->has_sysrq) { - spin_unlock_irqrestore(&port->lock, irqflags); - return; - } - - sysrq_ch = port->sysrq_ch; - port->sysrq_ch = 0; - - spin_unlock_irqrestore(&port->lock, irqflags); - - if (sysrq_ch) - handle_sysrq(sysrq_ch); -} -EXPORT_SYMBOL_GPL(uart_unlock_and_check_sysrq); - -/* - * We do the SysRQ and SAK checking like this... - */ -int uart_handle_break(struct uart_port *port) -{ - struct uart_state *state = port->state; - - if (port->handle_break) - port->handle_break(port); - - if (port->has_sysrq && uart_console(port)) { - if (!port->sysrq) { - port->sysrq = jiffies + SYSRQ_TIMEOUT; - return 1; - } - port->sysrq = 0; - } - - if (port->flags & UPF_SAK) - do_SAK(state->port.tty); - return 0; -} -EXPORT_SYMBOL_GPL(uart_handle_break); - EXPORT_SYMBOL(uart_write_wakeup); EXPORT_SYMBOL(uart_register_driver); EXPORT_SYMBOL(uart_unregister_driver); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index ef4921ddbe97..03fa7b967103 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -462,11 +462,104 @@ extern void uart_handle_cts_change(struct uart_port *uport, extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); -extern int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch); -extern int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch); -extern void uart_unlock_and_check_sysrq(struct uart_port *port, - unsigned long irqflags); -extern int uart_handle_break(struct uart_port *port); +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL +#define SYSRQ_TIMEOUT (HZ * 5) + +bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); + +static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (!port->has_sysrq || !port->sysrq) + return 0; + + if (ch && time_before(jiffies, port->sysrq)) { + if (sysrq_mask()) { + handle_sysrq(ch); + port->sysrq = 0; + return 1; + } + if (uart_try_toggle_sysrq(port, ch)) + return 1; + } + port->sysrq = 0; + + return 0; +} + +static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (!port->has_sysrq || !port->sysrq) + return 0; + + if (ch && time_before(jiffies, port->sysrq)) { + if (sysrq_mask()) { + port->sysrq_ch = ch; + port->sysrq = 0; + return 1; + } + if (uart_try_toggle_sysrq(port, ch)) + return 1; + } + port->sysrq = 0; + + return 0; +} + +static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + int sysrq_ch; + + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, irqflags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} +#else /* CONFIG_MAGIC_SYSRQ_SERIAL */ +static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +{ + return 0; +} +static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + return 0; +} +static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + spin_unlock_irqrestore(&port->lock, irqflags); +} +#endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ + +/* + * We do the SysRQ and SAK checking like this... + */ +static inline int uart_handle_break(struct uart_port *port) +{ + struct uart_state *state = port->state; + + if (port->handle_break) + port->handle_break(port); + +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL + if (port->has_sysrq && uart_console(port)) { + if (!port->sysrq) { + port->sysrq = jiffies + SYSRQ_TIMEOUT; + return 1; + } + port->sysrq = 0; + } +#endif + if (port->flags & UPF_SAK) + do_SAK(state->port.tty); + return 0; +} /* * UART_ENABLE_MS - determine if port should enable modem status irqs -- cgit v1.2.3 From 225385657b7d81a99e17e04cd01f9ed5bb3109a8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 Jun 2020 17:22:32 +0200 Subject: serial: core: drop redundant sysrq checks The sysrq timestamp will never be set unless port->has_sysrq is set (see uart_handle_break()) so drop the redundant checks that were added by commit 1997e9dfdc84 ("serial_core: Un-ifdef sysrq SUPPORT_SYSRQ"). Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20200610152232.16925-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 03fa7b967103..791f4844efeb 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -469,7 +469,7 @@ bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { - if (!port->has_sysrq || !port->sysrq) + if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { @@ -488,7 +488,7 @@ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { - if (!port->has_sysrq || !port->sysrq) + if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { -- cgit v1.2.3 From 4f311afc2035f7b33d97e69ffaa2e6cd67fca16d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2020 12:14:09 +0200 Subject: sched/core: Fix CONFIG_GCC_PLUGIN_RANDSTRUCT build fail As a temporary build fix, the proper cleanup needs more work. Reported-by: Guenter Roeck Reported-by: Eric Biggers Suggested-by: Eric Biggers Suggested-by: Kees Cook Fixes: a148866489fb ("sched: Replace rq::wake_list") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b62e6aaf28f0..224b5de568e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -654,8 +654,10 @@ struct task_struct { unsigned int ptrace; #ifdef CONFIG_SMP - struct llist_node wake_entry; - unsigned int wake_entry_type; + struct { + struct llist_node wake_entry; + unsigned int wake_entry_type; + }; int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ -- cgit v1.2.3 From 8c4890d1c3358fb8023d46e1e554c41d54f02878 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2020 12:01:25 +0200 Subject: smp, irq_work: Continue smp_call_function*() and irq_work*() integration Instead of relying on BUG_ON() to ensure the various data structures line up, use a bunch of horrible unions to make it all automatic. Much of the union magic is to ensure irq_work and smp_call_function do not (yet) see the members of their respective data structures change name. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20200622100825.844455025@infradead.org --- include/linux/irq_work.h | 26 ++++++------------- include/linux/sched.h | 5 +--- include/linux/smp.h | 23 ++++++----------- include/linux/smp_types.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 6 ++--- kernel/smp.c | 18 ------------- 6 files changed, 86 insertions(+), 58 deletions(-) create mode 100644 include/linux/smp_types.h (limited to 'include') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 2735da5f839e..30823780c192 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -2,7 +2,7 @@ #ifndef _LINUX_IRQ_WORK_H #define _LINUX_IRQ_WORK_H -#include +#include /* * An entry can be in one of four states: @@ -13,24 +13,14 @@ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed */ -/* flags share CSD_FLAG_ space */ - -#define IRQ_WORK_PENDING BIT(0) -#define IRQ_WORK_BUSY BIT(1) - -/* Doesn't want IPI, wait for tick: */ -#define IRQ_WORK_LAZY BIT(2) -/* Run hard IRQ context, even on RT */ -#define IRQ_WORK_HARD_IRQ BIT(3) - -#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) - -/* - * structure shares layout with single_call_data_t. - */ struct irq_work { - struct llist_node llnode; - atomic_t flags; + union { + struct __call_single_node node; + struct { + struct llist_node llnode; + atomic_t flags; + }; + }; void (*func)(struct irq_work *); }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 224b5de568e7..692e327d7455 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -654,11 +654,8 @@ struct task_struct { unsigned int ptrace; #ifdef CONFIG_SMP - struct { - struct llist_node wake_entry; - unsigned int wake_entry_type; - }; int on_cpu; + struct __call_single_node wake_entry; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ unsigned int cpu; diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ee202ad21a6..80d557ef8a11 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -12,29 +12,22 @@ #include #include #include -#include +#include typedef void (*smp_call_func_t)(void *info); typedef bool (*smp_cond_func_t)(int cpu, void *info); -enum { - CSD_FLAG_LOCK = 0x01, - - /* IRQ_WORK_flags */ - - CSD_TYPE_ASYNC = 0x00, - CSD_TYPE_SYNC = 0x10, - CSD_TYPE_IRQ_WORK = 0x20, - CSD_TYPE_TTWU = 0x30, - CSD_FLAG_TYPE_MASK = 0xF0, -}; - /* * structure shares (partial) layout with struct irq_work */ struct __call_single_data { - struct llist_node llist; - unsigned int flags; + union { + struct __call_single_node node; + struct { + struct llist_node llist; + unsigned int flags; + }; + }; smp_call_func_t func; void *info; }; diff --git a/include/linux/smp_types.h b/include/linux/smp_types.h new file mode 100644 index 000000000000..364b3ae3e41d --- /dev/null +++ b/include/linux/smp_types.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_SMP_TYPES_H +#define __LINUX_SMP_TYPES_H + +#include + +enum { + CSD_FLAG_LOCK = 0x01, + + IRQ_WORK_PENDING = 0x01, + IRQ_WORK_BUSY = 0x02, + IRQ_WORK_LAZY = 0x04, /* No IPI, wait for tick */ + IRQ_WORK_HARD_IRQ = 0x08, /* IRQ context on PREEMPT_RT */ + + IRQ_WORK_CLAIMED = (IRQ_WORK_PENDING | IRQ_WORK_BUSY), + + CSD_TYPE_ASYNC = 0x00, + CSD_TYPE_SYNC = 0x10, + CSD_TYPE_IRQ_WORK = 0x20, + CSD_TYPE_TTWU = 0x30, + + CSD_FLAG_TYPE_MASK = 0xF0, +}; + +/* + * struct __call_single_node is the primary type on + * smp.c:call_single_queue. + * + * flush_smp_call_function_queue() only reads the type from + * __call_single_node::u_flags as a regular load, the above + * (anonymous) enum defines all the bits of this word. + * + * Other bits are not modified until the type is known. + * + * CSD_TYPE_SYNC/ASYNC: + * struct { + * struct llist_node node; + * unsigned int flags; + * smp_call_func_t func; + * void *info; + * }; + * + * CSD_TYPE_IRQ_WORK: + * struct { + * struct llist_node node; + * atomic_t flags; + * void (*func)(struct irq_work *); + * }; + * + * CSD_TYPE_TTWU: + * struct { + * struct llist_node node; + * unsigned int flags; + * }; + * + */ + +struct __call_single_node { + struct llist_node llist; + union { + unsigned int u_flags; + atomic_t a_flags; + }; +}; + +#endif /* __LINUX_SMP_TYPES_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f778067de277..ca5db40392d4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2293,7 +2293,7 @@ void sched_ttwu_pending(void *arg) rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - llist_for_each_entry_safe(p, t, llist, wake_entry) { + llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { if (WARN_ON_ONCE(p->on_cpu)) smp_cond_load_acquire(&p->on_cpu, !VAL); @@ -2329,7 +2329,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); WRITE_ONCE(rq->ttwu_pending, 1); - __smp_call_single_queue(cpu, &p->wake_entry); + __smp_call_single_queue(cpu, &p->wake_entry.llist); } void wake_up_if_idle(int cpu) @@ -2786,7 +2786,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #endif init_numa_balancing(clone_flags, p); #ifdef CONFIG_SMP - p->wake_entry_type = CSD_TYPE_TTWU; + p->wake_entry.u_flags = CSD_TYPE_TTWU; #endif } diff --git a/kernel/smp.c b/kernel/smp.c index 472c2b274c65..aa17eedff5be 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -669,24 +669,6 @@ void __init smp_init(void) { int num_nodes, num_cpus; - /* - * Ensure struct irq_work layout matches so that - * flush_smp_call_function_queue() can do horrible things. - */ - BUILD_BUG_ON(offsetof(struct irq_work, llnode) != - offsetof(struct __call_single_data, llist)); - BUILD_BUG_ON(offsetof(struct irq_work, func) != - offsetof(struct __call_single_data, func)); - BUILD_BUG_ON(offsetof(struct irq_work, flags) != - offsetof(struct __call_single_data, flags)); - - /* - * Assert the CSD_TYPE_TTWU layout is similar enough - * for task_struct to be on the @call_single_queue. - */ - BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) != - offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist)); - idle_threads_init(); cpuhp_threads_init(); -- cgit v1.2.3 From bfe373f608cf81b7626dfeb904001b0e867c5110 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 28 Apr 2020 09:54:56 +0800 Subject: blk-mq-debugfs: update blk_queue_flag_name[] accordingly for new flags Else there may be magic numbers in /sys/kernel/debug/block/*/state. Signed-off-by: Hou Tao Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 3 +++ include/linux/blkdev.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 15df3a36e9fa..e0b2bc131bf5 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -125,6 +125,9 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), QUEUE_FLAG_NAME(QUIESCED), + QUEUE_FLAG_NAME(PCI_P2PDMA), + QUEUE_FLAG_NAME(ZONE_RESETALL), + QUEUE_FLAG_NAME(RQ_ALLOC_TIME), }; #undef QUEUE_FLAG_NAME diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..57241417ff2f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -590,6 +590,7 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; +/* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ -- cgit v1.2.3 From 85ca6b17e2bb96b19caac3b02c003d670b66de96 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 28 Jun 2020 17:52:28 +0200 Subject: ASoC: rt5670: Add new gpio1_is_ext_spk_en quirk and enable it on the Lenovo Miix 2 10 The Lenovo Miix 2 10 has a keyboard dock with extra speakers in the dock. Rather then the ACL5672's GPIO1 pin being used as IRQ to the CPU, it is actually used to enable the amplifier for these speakers (the IRQ to the CPU comes directly from the jack-detect switch). Add a quirk for having an ext speaker-amplifier enable pin on GPIO1 and replace the Lenovo Miix 2 10's dmi_system_id table entry's wrong GPIO_DEV quirk (which needs to be renamed to GPIO1_IS_IRQ) with the new RT5670_GPIO1_IS_EXT_SPK_EN quirk, so that we enable the external speaker-amplifier as necessary. Also update the ident field for the dmi_system_id table entry, the Miix models are not Thinkpads. Fixes: 67e03ff3f32f ("ASoC: codecs: rt5670: add Thinkpad Tablet 10 quirk") Signed-off-by: Hans de Goede BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1786723 Link: https://lore.kernel.org/r/20200628155231.71089-4-hdegoede@redhat.com Signed-off-by: Mark Brown --- include/sound/rt5670.h | 1 + sound/soc/codecs/rt5670.c | 71 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 57 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/sound/rt5670.h b/include/sound/rt5670.h index f9024c7a1600..02e1d7778354 100644 --- a/include/sound/rt5670.h +++ b/include/sound/rt5670.h @@ -12,6 +12,7 @@ struct rt5670_platform_data { int jd_mode; bool in2_diff; bool dev_gpio; + bool gpio1_is_ext_spk_en; bool dmic_en; unsigned int dmic1_data_pin; diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 70fee6849ab0..f21181734170 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -31,18 +31,19 @@ #include "rt5670.h" #include "rt5670-dsp.h" -#define RT5670_DEV_GPIO BIT(0) -#define RT5670_IN2_DIFF BIT(1) -#define RT5670_DMIC_EN BIT(2) -#define RT5670_DMIC1_IN2P BIT(3) -#define RT5670_DMIC1_GPIO6 BIT(4) -#define RT5670_DMIC1_GPIO7 BIT(5) -#define RT5670_DMIC2_INR BIT(6) -#define RT5670_DMIC2_GPIO8 BIT(7) -#define RT5670_DMIC3_GPIO5 BIT(8) -#define RT5670_JD_MODE1 BIT(9) -#define RT5670_JD_MODE2 BIT(10) -#define RT5670_JD_MODE3 BIT(11) +#define RT5670_DEV_GPIO BIT(0) +#define RT5670_IN2_DIFF BIT(1) +#define RT5670_DMIC_EN BIT(2) +#define RT5670_DMIC1_IN2P BIT(3) +#define RT5670_DMIC1_GPIO6 BIT(4) +#define RT5670_DMIC1_GPIO7 BIT(5) +#define RT5670_DMIC2_INR BIT(6) +#define RT5670_DMIC2_GPIO8 BIT(7) +#define RT5670_DMIC3_GPIO5 BIT(8) +#define RT5670_JD_MODE1 BIT(9) +#define RT5670_JD_MODE2 BIT(10) +#define RT5670_JD_MODE3 BIT(11) +#define RT5670_GPIO1_IS_EXT_SPK_EN BIT(12) static unsigned long rt5670_quirk; static unsigned int quirk_override; @@ -1447,6 +1448,33 @@ static int rt5670_hp_event(struct snd_soc_dapm_widget *w, return 0; } +static int rt5670_spk_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct rt5670_priv *rt5670 = snd_soc_component_get_drvdata(component); + + if (!rt5670->pdata.gpio1_is_ext_spk_en) + return 0; + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + regmap_update_bits(rt5670->regmap, RT5670_GPIO_CTRL2, + RT5670_GP1_OUT_MASK, RT5670_GP1_OUT_HI); + break; + + case SND_SOC_DAPM_PRE_PMD: + regmap_update_bits(rt5670->regmap, RT5670_GPIO_CTRL2, + RT5670_GP1_OUT_MASK, RT5670_GP1_OUT_LO); + break; + + default: + return 0; + } + + return 0; +} + static int rt5670_bst1_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -1860,7 +1888,9 @@ static const struct snd_soc_dapm_widget rt5670_specific_dapm_widgets[] = { }; static const struct snd_soc_dapm_widget rt5672_specific_dapm_widgets[] = { - SND_SOC_DAPM_PGA("SPO Amp", SND_SOC_NOPM, 0, 0, NULL, 0), + SND_SOC_DAPM_PGA_E("SPO Amp", SND_SOC_NOPM, 0, 0, NULL, 0, + rt5670_spk_event, SND_SOC_DAPM_PRE_PMD | + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_OUTPUT("SPOLP"), SND_SOC_DAPM_OUTPUT("SPOLN"), SND_SOC_DAPM_OUTPUT("SPORP"), @@ -2857,14 +2887,14 @@ static const struct dmi_system_id dmi_platform_intel_quirks[] = { }, { .callback = rt5670_quirk_cb, - .ident = "Lenovo Thinkpad Tablet 10", + .ident = "Lenovo Miix 2 10", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Miix 2 10"), }, .driver_data = (unsigned long *)(RT5670_DMIC_EN | RT5670_DMIC1_IN2P | - RT5670_DEV_GPIO | + RT5670_GPIO1_IS_EXT_SPK_EN | RT5670_JD_MODE2), }, { @@ -2924,6 +2954,10 @@ static int rt5670_i2c_probe(struct i2c_client *i2c, rt5670->pdata.dev_gpio = true; dev_info(&i2c->dev, "quirk dev_gpio\n"); } + if (rt5670_quirk & RT5670_GPIO1_IS_EXT_SPK_EN) { + rt5670->pdata.gpio1_is_ext_spk_en = true; + dev_info(&i2c->dev, "quirk GPIO1 is external speaker enable\n"); + } if (rt5670_quirk & RT5670_IN2_DIFF) { rt5670->pdata.in2_diff = true; dev_info(&i2c->dev, "quirk IN2_DIFF\n"); @@ -3023,6 +3057,13 @@ static int rt5670_i2c_probe(struct i2c_client *i2c, RT5670_GP1_PF_MASK, RT5670_GP1_PF_OUT); } + if (rt5670->pdata.gpio1_is_ext_spk_en) { + regmap_update_bits(rt5670->regmap, RT5670_GPIO_CTRL1, + RT5670_GP1_PIN_MASK, RT5670_GP1_PIN_GPIO1); + regmap_update_bits(rt5670->regmap, RT5670_GPIO_CTRL2, + RT5670_GP1_PF_MASK, RT5670_GP1_PF_OUT); + } + if (rt5670->pdata.jd_mode) { regmap_update_bits(rt5670->regmap, RT5670_GLB_CLK, RT5670_SCLK_SRC_MASK, RT5670_SCLK_SRC_RCCLK); -- cgit v1.2.3 From bf64ff4c2aac65d680dc639a511c781cf6b6ec08 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 27 Jun 2020 00:12:24 -0700 Subject: genetlink: get rid of family->attrbuf genl_family_rcv_msg_attrs_parse() reuses the global family->attrbuf when family->parallel_ops is false. However, family->attrbuf is not protected by any lock on the genl_family_rcv_msg_doit() code path. This leads to several different consequences, one of them is UAF, like the following: genl_family_rcv_msg_doit(): genl_start(): genl_family_rcv_msg_attrs_parse() attrbuf = family->attrbuf __nlmsg_parse(attrbuf); genl_family_rcv_msg_attrs_parse() attrbuf = family->attrbuf __nlmsg_parse(attrbuf); info->attrs = attrs; cb->data = info; netlink_unicast_kernel(): consume_skb() genl_lock_dumpit(): genl_dumpit_info(cb)->attrs Note family->attrbuf is an array of pointers to the skb data, once the skb is freed, any dereference of family->attrbuf will be a UAF. Maybe we could serialize the family->attrbuf with genl_mutex too, but that would make the locking more complicated. Instead, we can just get rid of family->attrbuf and always allocate attrbuf from heap like the family->parallel_ops==true code path. This may add some performance overhead but comparing with taking the global genl_mutex, it still looks better. Fixes: 75cdbdd08900 ("net: ieee802154: have genetlink code to parse the attrs during dumpit") Fixes: 057af7071344 ("net: tipc: have genetlink code to parse the attrs during dumpit") Reported-and-tested-by: syzbot+3039ddf6d7b13daf3787@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+80cad1e3cb4c41cde6ff@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+736bcbcb11b60d0c0792@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+520f8704db2b68091d44@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+c96e4dfb32f8987fdeed@syzkaller.appspotmail.com Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/genetlink.h | 2 -- net/netlink/genetlink.c | 48 +++++++++++++----------------------------------- 2 files changed, 13 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 74950663bb00..ad71ed4f55ff 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -41,7 +41,6 @@ struct genl_info; * Note that unbind() will not be called symmetrically if the * generic netlink family is removed while there are still open * sockets. - * @attrbuf: buffer to store parsed attributes (private) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family @@ -66,7 +65,6 @@ struct genl_family { struct genl_info *info); int (*mcast_bind)(struct net *net, int group); void (*mcast_unbind)(struct net *net, int group); - struct nlattr ** attrbuf; /* private */ const struct genl_ops * ops; const struct genl_multicast_group *mcgrps; unsigned int n_ops; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 55ee680e9db1..a914b9365a46 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -351,22 +351,11 @@ int genl_register_family(struct genl_family *family) start = end = GENL_ID_VFS_DQUOT; } - if (family->maxattr && !family->parallel_ops) { - family->attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), - GFP_KERNEL); - if (family->attrbuf == NULL) { - err = -ENOMEM; - goto errout_locked; - } - } else - family->attrbuf = NULL; - family->id = idr_alloc_cyclic(&genl_fam_idr, family, start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_free; + goto errout_locked; } err = genl_validate_assign_mc_groups(family); @@ -385,8 +374,6 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); -errout_free: - kfree(family->attrbuf); errout_locked: genl_unlock_all(); return err; @@ -419,8 +406,6 @@ int genl_unregister_family(const struct genl_family *family) atomic_read(&genl_sk_destructing_cnt) == 0); genl_unlock(); - kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); return 0; @@ -485,30 +470,23 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, if (!family->maxattr) return NULL; - if (family->parallel_ops) { - attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), GFP_KERNEL); - if (!attrbuf) - return ERR_PTR(-ENOMEM); - } else { - attrbuf = family->attrbuf; - } + attrbuf = kmalloc_array(family->maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) + return ERR_PTR(-ENOMEM); err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, family->policy, validate, extack); if (err) { - if (family->parallel_ops) - kfree(attrbuf); + kfree(attrbuf); return ERR_PTR(err); } return attrbuf; } -static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, - struct nlattr **attrbuf) +static void genl_family_rcv_msg_attrs_free(struct nlattr **attrbuf) { - if (family->parallel_ops) - kfree(attrbuf); + kfree(attrbuf); } struct genl_start_context { @@ -542,7 +520,7 @@ static int genl_start(struct netlink_callback *cb) no_attrs: info = genl_dumpit_info_alloc(); if (!info) { - genl_family_rcv_msg_attrs_free(ctx->family, attrs); + genl_family_rcv_msg_attrs_free(attrs); return -ENOMEM; } info->family = ctx->family; @@ -559,7 +537,7 @@ no_attrs: } if (rc) { - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); cb->data = NULL; } @@ -588,7 +566,7 @@ static int genl_lock_done(struct netlink_callback *cb) rc = ops->done(cb); genl_unlock(); } - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -601,7 +579,7 @@ static int genl_parallel_done(struct netlink_callback *cb) if (ops->done) rc = ops->done(cb); - genl_family_rcv_msg_attrs_free(info->family, info->attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -694,7 +672,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, family->post_doit(ops, skb, &info); out: - genl_family_rcv_msg_attrs_free(family, attrbuf); + genl_family_rcv_msg_attrs_free(attrbuf); return err; } -- cgit v1.2.3 From 3aa91625007807bfca4155df1867a5c924a08662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:56 +0200 Subject: dma-mapping: Add a new dma_need_sync API Add a new API to check if calls to dma_sync_single_for_{device,cpu} are required for a given DMA streaming mapping. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-2-hch@lst.de --- Documentation/core-api/dma-api.rst | 8 ++++++++ include/linux/dma-direct.h | 1 + include/linux/dma-mapping.h | 5 +++++ kernel/dma/direct.c | 6 ++++++ kernel/dma/mapping.c | 10 ++++++++++ 5 files changed, 30 insertions(+) (limited to 'include') diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 2d8d2fed7317..f41620439ef3 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter of the mapping functions like dma_map_single(), dma_map_page() and others should not be larger than the returned value. +:: + + bool + dma_need_sync(struct device *dev, dma_addr_t dma_addr); + +Returns %true if dma_sync_single_for_{device,cpu} calls are required to +transfer memory ownership. Returns %false if those calls can be skipped. + :: unsigned long diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 136f984df0d9..8b006730687b 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -87,4 +87,5 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 78f677cf45ab..a33ed3954ed4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -461,6 +461,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, @@ -571,6 +572,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 0a4881e59aa7..ecb922a0bfa0 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -530,3 +530,9 @@ size_t dma_direct_max_mapping_size(struct device *dev) return swiotlb_max_mapping_size(dev); return SIZE_MAX; } + +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return !dev_is_dma_coherent(dev) || + is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); +} diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 98e3d873792e..a8c18c9a796f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -397,6 +397,16 @@ size_t dma_max_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_max_mapping_size); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_is_direct(ops)) + return dma_direct_need_sync(dev, dma_addr); + return ops->sync_single_for_cpu || ops->sync_single_for_device; +} +EXPORT_SYMBOL_GPL(dma_need_sync); + unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); -- cgit v1.2.3 From 91d5b70273267bbae6f5d1fb4cf3510bd31ef9ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:57 +0200 Subject: xsk: Replace the cheap_dma flag with a dma_need_sync flag Invert the polarity and better name the flag so that the use case is properly documented. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-3-hch@lst.de --- include/net/xsk_buff_pool.h | 6 +++--- net/xdp/xsk_buff_pool.c | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index a4ff226505c9..6842990e2712 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -40,7 +40,7 @@ struct xsk_buff_pool { u32 headroom; u32 chunk_size; u32 frame_len; - bool cheap_dma; + bool dma_need_sync; bool unaligned; void *addrs; struct device *dev; @@ -80,7 +80,7 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb) void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb); static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) { - if (xskb->pool->cheap_dma) + if (!xskb->pool->dma_need_sync) return; xp_dma_sync_for_cpu_slow(xskb); @@ -91,7 +91,7 @@ void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - if (pool->cheap_dma) + if (!pool->dma_need_sync) return; xp_dma_sync_for_device_slow(pool, dma, size); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 540ed75e4482..9fe84c797a70 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -55,7 +55,6 @@ struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, pool->free_heads_cnt = chunks; pool->headroom = headroom; pool->chunk_size = chunk_size; - pool->cheap_dma = true; pool->unaligned = unaligned; pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; INIT_LIST_HEAD(&pool->free_list); @@ -195,7 +194,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, xp_check_dma_contiguity(pool); pool->dev = dev; - pool->cheap_dma = xp_check_cheap_dma(pool); + pool->dma_need_sync = !xp_check_cheap_dma(pool); return 0; } EXPORT_SYMBOL(xp_dma_map); @@ -280,7 +279,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; - if (!pool->cheap_dma) { + if (pool->dma_need_sync) { dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, pool->frame_len, DMA_BIDIRECTIONAL); -- cgit v1.2.3 From 3b7016996c4c44db5d499d98759b82fb714bb912 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:54 +0200 Subject: flow_dissector: Pull BPF program assignment up to bpf-netns Prepare for using bpf_prog_array to store attached programs by moving out code that updates the attached program out of flow dissector. Managing bpf_prog_array is more involved than updating a single bpf_prog pointer. This will let us do it all from one place, bpf/net_namespace.c, in the subsequent patch. No functional change intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200625141357.910330-2-jakub@cloudflare.com --- include/net/flow_dissector.h | 3 ++- kernel/bpf/net_namespace.c | 20 ++++++++++++++++++-- net/core/flow_dissector.c | 13 ++----------- 3 files changed, 22 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index a7eba43fe4e4..4b6e36288ddd 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -372,7 +372,8 @@ flow_dissector_init_keys(struct flow_dissector_key_control *key_control, } #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog); +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog); #endif /* CONFIG_BPF_SYSCALL */ #endif diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 78cf061f8179..b951dab2687f 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -189,6 +189,7 @@ int netns_bpf_prog_query(const union bpf_attr *attr, int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { enum netns_bpf_attach_type type; + struct bpf_prog *attached; struct net *net; int ret; @@ -207,12 +208,26 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) switch (type) { case NETNS_BPF_FLOW_DISSECTOR: - ret = flow_dissector_bpf_prog_attach(net, prog); + ret = flow_dissector_bpf_prog_attach_check(net, prog); break; default: ret = -EINVAL; break; } + if (ret) + goto out_unlock; + + attached = rcu_dereference_protected(net->bpf.progs[type], + lockdep_is_held(&netns_bpf_mutex)); + if (attached == prog) { + /* The same program cannot be attached twice */ + ret = -EINVAL; + goto out_unlock; + } + rcu_assign_pointer(net->bpf.progs[type], prog); + if (attached) + bpf_prog_put(attached); + out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -277,7 +292,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, switch (type) { case NETNS_BPF_FLOW_DISSECTOR: - err = flow_dissector_bpf_prog_attach(net, link->prog); + err = flow_dissector_bpf_prog_attach_check(net, link->prog); break; default: err = -EINVAL; @@ -286,6 +301,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, if (err) goto out_unlock; + rcu_assign_pointer(net->bpf.progs[type], link->prog); net->bpf.links[type] = link; out_unlock: diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d02df0b6d0d9..b57fb1359395 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, EXPORT_SYMBOL(skb_flow_dissector_init); #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; - struct bpf_prog *attached; if (net == &init_net) { /* BPF flow dissector in the root namespace overrides @@ -97,15 +97,6 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) return -EEXIST; } - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (attached == prog) - /* The same program cannot be attached twice */ - return -EINVAL; - - rcu_assign_pointer(net->bpf.progs[type], prog); - if (attached) - bpf_prog_put(attached); return 0; } #endif /* CONFIG_BPF_SYSCALL */ -- cgit v1.2.3 From 695c12147a40181fe9221d321c3f2de33c9574ed Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:55 +0200 Subject: bpf, netns: Keep attached programs in bpf_prog_array Prepare for having multi-prog attachments for new netns attach types by storing programs to run in a bpf_prog_array, which is well suited for iterating over programs and running them in sequence. After this change bpf(PROG_QUERY) may block to allocate memory in bpf_prog_array_copy_to_user() for collected program IDs. This forces a change in how we protect access to the attached program in the query callback. Because bpf_prog_array_copy_to_user() can sleep, we switch from an RCU read lock to holding a mutex that serializes updaters. Because we allow only one BPF flow_dissector program to be attached to netns at all times, the bpf_prog_array pointed by net->bpf.run_array is always either detached (null) or one element long. No functional changes intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200625141357.910330-3-jakub@cloudflare.com --- include/net/netns/bpf.h | 5 +- kernel/bpf/net_namespace.c | 120 +++++++++++++++++++++++++++++++-------------- net/core/flow_dissector.c | 19 +++---- 3 files changed, 96 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index a8dce2a380c8..a5015bda9979 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -9,9 +9,12 @@ #include struct bpf_prog; +struct bpf_prog_array; struct netns_bpf { - struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE]; + /* Array of programs to run compiled from progs or links */ + struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; + struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; }; diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index b951dab2687f..0dba97202357 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -33,6 +33,17 @@ static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link) net_link->net = NULL; } +/* Must be called with netns_bpf_mutex held. */ +static void netns_bpf_run_array_detach(struct net *net, + enum netns_bpf_attach_type type) +{ + struct bpf_prog_array *run_array; + + run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL, + lockdep_is_held(&netns_bpf_mutex)); + bpf_prog_array_free(run_array); +} + static void bpf_netns_link_release(struct bpf_link *link) { struct bpf_netns_link *net_link = @@ -54,8 +65,8 @@ static void bpf_netns_link_release(struct bpf_link *link) if (!net) goto out_unlock; + netns_bpf_run_array_detach(net, type); net->bpf.links[type] = NULL; - RCU_INIT_POINTER(net->bpf.progs[type], NULL); out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -76,6 +87,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link, struct bpf_netns_link *net_link = container_of(link, struct bpf_netns_link, link); enum netns_bpf_attach_type type = net_link->netns_type; + struct bpf_prog_array *run_array; struct net *net; int ret = 0; @@ -93,8 +105,11 @@ static int bpf_netns_link_update_prog(struct bpf_link *link, goto out_unlock; } + run_array = rcu_dereference_protected(net->bpf.run_array[type], + lockdep_is_held(&netns_bpf_mutex)); + WRITE_ONCE(run_array->items[0].prog, new_prog); + old_prog = xchg(&link->prog, new_prog); - rcu_assign_pointer(net->bpf.progs[type], new_prog); bpf_prog_put(old_prog); out_unlock: @@ -142,14 +157,38 @@ static const struct bpf_link_ops bpf_netns_link_ops = { .show_fdinfo = bpf_netns_link_show_fdinfo, }; +/* Must be called with netns_bpf_mutex held. */ +static int __netns_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr, + struct net *net, + enum netns_bpf_attach_type type) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + struct bpf_prog_array *run_array; + u32 prog_cnt = 0, flags = 0; + + run_array = rcu_dereference_protected(net->bpf.run_array[type], + lockdep_is_held(&netns_bpf_mutex)); + if (run_array) + prog_cnt = bpf_prog_array_length(run_array); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) + return -EFAULT; + if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) + return -EFAULT; + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) + return 0; + + return bpf_prog_array_copy_to_user(run_array, prog_ids, + attr->query.prog_cnt); +} + int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { - __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); - u32 prog_id, prog_cnt = 0, flags = 0; enum netns_bpf_attach_type type; - struct bpf_prog *attached; struct net *net; + int ret; if (attr->query.query_flags) return -EINVAL; @@ -162,32 +201,17 @@ int netns_bpf_prog_query(const union bpf_attr *attr, if (IS_ERR(net)) return PTR_ERR(net); - rcu_read_lock(); - attached = rcu_dereference(net->bpf.progs[type]); - if (attached) { - prog_cnt = 1; - prog_id = attached->aux->id; - } - rcu_read_unlock(); + mutex_lock(&netns_bpf_mutex); + ret = __netns_bpf_prog_query(attr, uattr, net, type); + mutex_unlock(&netns_bpf_mutex); put_net(net); - - if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) - return -EFAULT; - if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) - return -EFAULT; - - if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) - return 0; - - if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) - return -EFAULT; - - return 0; + return ret; } int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { + struct bpf_prog_array *run_array; enum netns_bpf_attach_type type; struct bpf_prog *attached; struct net *net; @@ -217,14 +241,28 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (ret) goto out_unlock; - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); + attached = net->bpf.progs[type]; if (attached == prog) { /* The same program cannot be attached twice */ ret = -EINVAL; goto out_unlock; } - rcu_assign_pointer(net->bpf.progs[type], prog); + + run_array = rcu_dereference_protected(net->bpf.run_array[type], + lockdep_is_held(&netns_bpf_mutex)); + if (run_array) { + WRITE_ONCE(run_array->items[0].prog, prog); + } else { + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); + if (!run_array) { + ret = -ENOMEM; + goto out_unlock; + } + run_array->items[0].prog = prog; + rcu_assign_pointer(net->bpf.run_array[type], run_array); + } + + net->bpf.progs[type] = prog; if (attached) bpf_prog_put(attached); @@ -244,11 +282,11 @@ static int __netns_bpf_prog_detach(struct net *net, if (net->bpf.links[type]) return -EINVAL; - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); + attached = net->bpf.progs[type]; if (!attached) return -ENOENT; - RCU_INIT_POINTER(net->bpf.progs[type], NULL); + netns_bpf_run_array_detach(net, type); + net->bpf.progs[type] = NULL; bpf_prog_put(attached); return 0; } @@ -272,7 +310,7 @@ int netns_bpf_prog_detach(const union bpf_attr *attr) static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, enum netns_bpf_attach_type type) { - struct bpf_prog *prog; + struct bpf_prog_array *run_array; int err; mutex_lock(&netns_bpf_mutex); @@ -283,9 +321,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, goto out_unlock; } /* Links are not compatible with attaching prog directly */ - prog = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (prog) { + if (net->bpf.progs[type]) { err = -EEXIST; goto out_unlock; } @@ -301,7 +337,14 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, if (err) goto out_unlock; - rcu_assign_pointer(net->bpf.progs[type], link->prog); + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); + if (!run_array) { + err = -ENOMEM; + goto out_unlock; + } + run_array->items[0].prog = link->prog; + rcu_assign_pointer(net->bpf.run_array[type], run_array); + net->bpf.links[type] = link; out_unlock: @@ -368,11 +411,12 @@ static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) mutex_lock(&netns_bpf_mutex); for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { + netns_bpf_run_array_detach(net, type); link = net->bpf.links[type]; if (link) bpf_netns_link_auto_detach(link); - else - __netns_bpf_prog_detach(net, type); + else if (net->bpf.progs[type]) + bpf_prog_put(net->bpf.progs[type]); } mutex_unlock(&netns_bpf_mutex); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b57fb1359395..142a8824f0a8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -86,14 +86,14 @@ int flow_dissector_bpf_prog_attach_check(struct net *net, for_each_net(ns) { if (ns == &init_net) continue; - if (rcu_access_pointer(ns->bpf.progs[type])) + if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST; } } else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace. */ - if (rcu_access_pointer(init_net.bpf.progs[type])) + if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST; } @@ -894,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; @@ -951,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; + struct bpf_prog_array *run_array; rcu_read_lock(); - attached = rcu_dereference(init_net.bpf.progs[type]); + run_array = rcu_dereference(init_net.bpf.run_array[type]); + if (!run_array) + run_array = rcu_dereference(net->bpf.run_array[type]); - if (!attached) - attached = rcu_dereference(net->bpf.progs[type]); - - if (attached) { + if (run_array) { struct bpf_flow_keys flow_keys; struct bpf_flow_dissector ctx = { .flow_keys = &flow_keys, @@ -966,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net, .data_end = data + hlen, }; __be16 n_proto = proto; + struct bpf_prog *prog; if (skb) { ctx.skb = skb; @@ -976,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net, n_proto = skb->protocol; } - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + prog = READ_ONCE(run_array->items[0].prog); + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); -- cgit v1.2.3 From ab53cad90eb10c9991f501ba08904680a074ef3d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:56 +0200 Subject: bpf, netns: Keep a list of attached bpf_link's To support multi-prog link-based attachments for new netns attach types, we need to keep track of more than one bpf_link per attach type. Hence, convert net->bpf.links into a list, that currently can be either empty or have just one item. Instead of reusing bpf_prog_list from bpf-cgroup, we link together bpf_netns_link's themselves. This makes list management simpler as we don't have to allocate, initialize, and later release list elements. We can do this because multi-prog attachment will be available only for bpf_link, and we don't need to build a list of programs attached directly and indirectly via links. No functional changes intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200625141357.910330-4-jakub@cloudflare.com --- include/net/netns/bpf.h | 2 +- kernel/bpf/net_namespace.c | 42 +++++++++++++++++++++++------------------- 2 files changed, 24 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index a5015bda9979..0ca6a1b87185 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -15,7 +15,7 @@ struct netns_bpf { /* Array of programs to run compiled from progs or links */ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; - struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; + struct list_head links[MAX_NETNS_BPF_ATTACH_TYPE]; }; #endif /* __NETNS_BPF_H__ */ diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 0dba97202357..7a34a8caf954 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -19,20 +19,12 @@ struct bpf_netns_link { * with netns_bpf_mutex held. */ struct net *net; + struct list_head node; /* node in list of links attached to net */ }; /* Protects updates to netns_bpf */ DEFINE_MUTEX(netns_bpf_mutex); -/* Must be called with netns_bpf_mutex held. */ -static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link) -{ - struct bpf_netns_link *net_link = - container_of(link, struct bpf_netns_link, link); - - net_link->net = NULL; -} - /* Must be called with netns_bpf_mutex held. */ static void netns_bpf_run_array_detach(struct net *net, enum netns_bpf_attach_type type) @@ -66,7 +58,7 @@ static void bpf_netns_link_release(struct bpf_link *link) goto out_unlock; netns_bpf_run_array_detach(net, type); - net->bpf.links[type] = NULL; + list_del(&net_link->node); out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -225,7 +217,7 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) mutex_lock(&netns_bpf_mutex); /* Attaching prog directly is not compatible with links */ - if (net->bpf.links[type]) { + if (!list_empty(&net->bpf.links[type])) { ret = -EEXIST; goto out_unlock; } @@ -279,7 +271,7 @@ static int __netns_bpf_prog_detach(struct net *net, struct bpf_prog *attached; /* Progs attached via links cannot be detached */ - if (net->bpf.links[type]) + if (!list_empty(&net->bpf.links[type])) return -EINVAL; attached = net->bpf.progs[type]; @@ -310,13 +302,15 @@ int netns_bpf_prog_detach(const union bpf_attr *attr) static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, enum netns_bpf_attach_type type) { + struct bpf_netns_link *net_link = + container_of(link, struct bpf_netns_link, link); struct bpf_prog_array *run_array; int err; mutex_lock(&netns_bpf_mutex); /* Allow attaching only one prog or link for now */ - if (net->bpf.links[type]) { + if (!list_empty(&net->bpf.links[type])) { err = -E2BIG; goto out_unlock; } @@ -345,7 +339,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, run_array->items[0].prog = link->prog; rcu_assign_pointer(net->bpf.run_array[type], run_array); - net->bpf.links[type] = link; + list_add_tail(&net_link->node, &net->bpf.links[type]); out_unlock: mutex_unlock(&netns_bpf_mutex); @@ -404,24 +398,34 @@ out_put_net: return err; } +static int __net_init netns_bpf_pernet_init(struct net *net) +{ + int type; + + for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) + INIT_LIST_HEAD(&net->bpf.links[type]); + + return 0; +} + static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) { enum netns_bpf_attach_type type; - struct bpf_link *link; + struct bpf_netns_link *net_link; mutex_lock(&netns_bpf_mutex); for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { netns_bpf_run_array_detach(net, type); - link = net->bpf.links[type]; - if (link) - bpf_netns_link_auto_detach(link); - else if (net->bpf.progs[type]) + list_for_each_entry(net_link, &net->bpf.links[type], node) + net_link->net = NULL; /* auto-detach link */ + if (net->bpf.progs[type]) bpf_prog_put(net->bpf.progs[type]); } mutex_unlock(&netns_bpf_mutex); } static struct pernet_operations netns_bpf_pernet_ops __net_initdata = { + .init = netns_bpf_pernet_init, .pre_exit = netns_bpf_pernet_pre_exit, }; -- cgit v1.2.3 From 4ac2add65974e4efafb8d4ccd8fc5660417ea312 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:26 +0100 Subject: bpf: flow_dissector: Check value of unused flags to BPF_PROG_DETACH Using BPF_PROG_DETACH on a flow dissector program supports neither attach_flags nor attach_bpf_fd. Yet no value is enforced for them. Enforce that attach_flags are zero, and require the current program to be passed via attach_bpf_fd. This allows us to remove the check for CAP_SYS_ADMIN, since userspace can now no longer remove arbitrary flow dissector programs. Fixes: b27f7bb590ba ("flow_dissector: Move out netns_bpf prog callbacks") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-3-lmb@cloudflare.com --- include/linux/bpf-netns.h | 5 +++-- kernel/bpf/net_namespace.c | 19 +++++++++++++++---- kernel/bpf/syscall.c | 4 +--- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 4052d649f36d..47d5b0c708c9 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -33,7 +33,7 @@ int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); -int netns_bpf_prog_detach(const union bpf_attr *attr); +int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -49,7 +49,8 @@ static inline int netns_bpf_prog_attach(const union bpf_attr *attr, return -EOPNOTSUPP; } -static inline int netns_bpf_prog_detach(const union bpf_attr *attr) +static inline int netns_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) { return -EOPNOTSUPP; } diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c index 03045f45afec..3dbc29b6f51d 100644 --- a/kernel/bpf/net_namespace.c +++ b/kernel/bpf/net_namespace.c @@ -269,7 +269,8 @@ out_unlock: /* Must be called with netns_bpf_mutex held. */ static int __netns_bpf_prog_detach(struct net *net, - enum netns_bpf_attach_type type) + enum netns_bpf_attach_type type, + struct bpf_prog *old) { struct bpf_prog *attached; @@ -278,7 +279,7 @@ static int __netns_bpf_prog_detach(struct net *net, return -EINVAL; attached = net->bpf.progs[type]; - if (!attached) + if (!attached || attached != old) return -ENOENT; netns_bpf_run_array_detach(net, type); net->bpf.progs[type] = NULL; @@ -286,19 +287,29 @@ static int __netns_bpf_prog_detach(struct net *net, return 0; } -int netns_bpf_prog_detach(const union bpf_attr *attr) +int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) { enum netns_bpf_attach_type type; + struct bpf_prog *prog; int ret; + if (attr->target_fd) + return -EINVAL; + type = to_netns_bpf_attach_type(attr->attach_type); if (type < 0) return -EINVAL; + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + return PTR_ERR(prog); + mutex_lock(&netns_bpf_mutex); - ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type); + ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog); mutex_unlock(&netns_bpf_mutex); + bpf_prog_put(prog); + return ret; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7d946435587d..28c6ef759037 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2897,9 +2897,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_LIRC_MODE2: return lirc_prog_detach(attr); case BPF_PROG_TYPE_FLOW_DISSECTOR: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - return netns_bpf_prog_detach(attr); + return netns_bpf_prog_detach(attr, ptype); case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: -- cgit v1.2.3 From bb0de3131f4c60a9bf976681e0fe4d1e55c7a821 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:28 +0100 Subject: bpf: sockmap: Require attach_bpf_fd when detaching a program The sockmap code currently ignores the value of attach_bpf_fd when detaching a program. This is contrary to the usual behaviour of checking that attach_bpf_fd represents the currently attached program. Ensure that attach_bpf_fd is indeed the currently attached program. It turns out that all sockmap selftests already do this, which indicates that this is unlikely to cause breakage. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-5-lmb@cloudflare.com --- include/linux/bpf.h | 13 +++++++++++-- include/linux/skmsg.h | 13 +++++++++++++ kernel/bpf/syscall.c | 2 +- net/core/sock_map.c | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 70 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..9750a1902ee5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1543,13 +1543,16 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_BPF_STREAM_PARSER) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); +int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, + struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, u32 which) + struct bpf_prog *prog, + struct bpf_prog *old, u32 which) { return -EOPNOTSUPP; } @@ -1559,6 +1562,12 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 08674cd14d5a..1e9ed840b9fc 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -430,6 +430,19 @@ static inline void psock_set_prog(struct bpf_prog **pprog, bpf_prog_put(prog); } +static inline int psock_replace_prog(struct bpf_prog **pprog, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + if (cmpxchg(pprog, old, prog) != old) + return -ENOENT; + + if (old) + bpf_prog_put(old); + + return 0; +} + static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 28c6ef759037..a74fce8ce043 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2893,7 +2893,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) switch (ptype) { case BPF_PROG_TYPE_SK_MSG: case BPF_PROG_TYPE_SK_SKB: - return sock_map_get_from_fd(attr, NULL); + return sock_map_prog_detach(attr, ptype); case BPF_PROG_TYPE_LIRC_MODE2: return lirc_prog_detach(attr); case BPF_PROG_TYPE_FLOW_DISSECTOR: diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 58016a5c63ff..0971f17e8e54 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -77,7 +77,42 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, attr->attach_type); + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + fdput(f); + return ret; +} + +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) +{ + u32 ufd = attr->target_fd; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + int ret; + + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + prog = bpf_prog_get(attr->attach_bpf_fd); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto put_map; + } + + if (prog->type != ptype) { + ret = -EINVAL; + goto put_prog; + } + + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); +put_prog: + bpf_prog_put(prog); +put_map: fdput(f); return ret; } @@ -1206,27 +1241,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) } int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - u32 which) + struct bpf_prog *old, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - psock_set_prog(&progs->msg_parser, prog); + pprog = &progs->msg_parser; break; case BPF_SK_SKB_STREAM_PARSER: - psock_set_prog(&progs->skb_parser, prog); + pprog = &progs->skb_parser; break; case BPF_SK_SKB_STREAM_VERDICT: - psock_set_prog(&progs->skb_verdict, prog); + pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + if (old) + return psock_replace_prog(pprog, prog, old); + + psock_set_prog(pprog, prog); return 0; } -- cgit v1.2.3 From e91b48162332480f5840902268108bb7fb7a44c7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Jun 2020 17:32:54 +0200 Subject: task_work: teach task_work_add() to do signal_wake_up() So that the target task will exit the wait_event_interruptible-like loop and call task_work_run() asap. The patch turns "bool notify" into 0,TWA_RESUME,TWA_SIGNAL enum, the new TWA_SIGNAL flag implies signal_wake_up(). However, it needs to avoid the race with recalc_sigpending(), so the patch also adds the new JOBCTL_TASK_WORK bit included in JOBCTL_PENDING_MASK. TODO: once this patch is merged we need to change all current users of task_work_add(notify = true) to use TWA_RESUME. Cc: stable@vger.kernel.org # v5.7 Acked-by: Peter Zijlstra (Intel) Signed-off-by: Oleg Nesterov Signed-off-by: Jens Axboe --- include/linux/sched/jobctl.h | 4 +++- include/linux/task_work.h | 5 ++++- kernel/signal.c | 10 +++++++--- kernel/task_work.c | 16 ++++++++++++++-- 4 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index fa067de9f1a9..d2b4204ba4d3 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,6 +19,7 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ +#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -28,9 +29,10 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) +#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); diff --git a/include/linux/task_work.h b/include/linux/task_work.h index bd9a6a91c097..0fb93aafa478 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func) twork->func = func; } -int task_work_add(struct task_struct *task, struct callback_head *twork, bool); +#define TWA_RESUME 1 +#define TWA_SIGNAL 2 +int task_work_add(struct task_struct *task, struct callback_head *twork, int); + struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); diff --git a/kernel/signal.c b/kernel/signal.c index 5ca48cc5da76..ee22ec78fd6d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; - if (unlikely(current->task_works)) - task_work_run(); - if (unlikely(uprobe_deny_signal())) return false; @@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig) relock: spin_lock_irq(&sighand->siglock); + current->jobctl &= ~JOBCTL_TASK_WORK; + if (unlikely(current->task_works)) { + spin_unlock_irq(&sighand->siglock); + task_work_run(); + goto relock; + } + /* * Every stopped thread goes here after wakeup. Check to see if * we should notify the parent, prepare_signal(SIGCONT) encodes diff --git a/kernel/task_work.c b/kernel/task_work.c index 825f28259a19..5c0848ca1287 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ * 0 if succeeds or -ESRCH. */ int -task_work_add(struct task_struct *task, struct callback_head *work, bool notify) +task_work_add(struct task_struct *task, struct callback_head *work, int notify) { struct callback_head *head; + unsigned long flags; do { head = READ_ONCE(task->task_works); @@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify) work->next = head; } while (cmpxchg(&task->task_works, head, work) != head); - if (notify) + switch (notify) { + case TWA_RESUME: set_notify_resume(task); + break; + case TWA_SIGNAL: + if (lock_task_sighand(task, &flags)) { + task->jobctl |= JOBCTL_TASK_WORK; + signal_wake_up(task, 0); + unlock_task_sighand(task, &flags); + } + break; + } + return 0; } -- cgit v1.2.3 From c463bb2a8f8d7d97aa414bf7714fc77e9d3b10df Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Tue, 30 Jun 2020 11:47:04 -0700 Subject: Input: add `SW_MACHINE_COVER` This event code represents the state of a removable cover of a device. Value 0 means that the cover is open or removed, value 1 means that the cover is closed. Reviewed-by: Sebastian Reichel Acked-by: Tony Lindgren Signed-off-by: Merlijn Wajer Link: https://lore.kernel.org/r/20200612125402.18393-2-merlijn@wizzup.org Signed-off-by: Dmitry Torokhov --- include/linux/mod_devicetable.h | 2 +- include/uapi/linux/input-event-codes.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e3596db077dc..04a19e30b168 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -318,7 +318,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_LED_MAX 0x0f #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f -#define INPUT_DEVICE_ID_SW_MAX 0x0f +#define INPUT_DEVICE_ID_SW_MAX 0x10 #define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index b6a835d37826..0c2e27d28e0a 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -888,7 +888,8 @@ #define SW_LINEIN_INSERT 0x0d /* set = inserted */ #define SW_MUTE_DEVICE 0x0e /* set = device disabled */ #define SW_PEN_INSERTED 0x0f /* set = pen inserted */ -#define SW_MAX 0x0f +#define SW_MACHINE_COVER 0x10 /* set = cover closed */ +#define SW_MAX 0x10 #define SW_CNT (SW_MAX+1) /* -- cgit v1.2.3 From 2606aff916854b61234bf85001be9777bab2d5f8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 29 Jun 2020 19:06:18 -0600 Subject: net: ip_tunnel: add header_ops for layer 3 devices Some devices that take straight up layer 3 packets benefit from having a shared header_ops so that AF_PACKET sockets can inject packets that are recognized. This shared infrastructure will be used by other drivers that currently can't inject packets using AF_PACKET. It also exposes the parser function, as it is useful in standalone form too. Signed-off-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 +++ net/ipv4/ip_tunnel_core.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 076e5d7db7d3..36025dea7612 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -290,6 +290,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); +extern const struct header_ops ip_tunnel_header_ops; +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); + struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 181b7a2a0247..f8b419e2475c 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void) static_branch_dec(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} +EXPORT_SYMBOL(ip_tunnel_parse_protocol); + +const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; +EXPORT_SYMBOL(ip_tunnel_header_ops); -- cgit v1.2.3 From 5396956cc7c6874180c9bfc1ceceb02b739a6a87 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Jun 2020 19:12:48 +0300 Subject: PCI: Make pcie_find_root_port() work for Root Ports Commit 6ae72bfa656e ("PCI: Unify pcie_find_root_port() and pci_find_pcie_root_port()") broke acpi_pci_bridge_d3() because calling pcie_find_root_port() on a Root Port returned NULL when it should return the Root Port, which in turn broke power management of PCIe hierarchies. Rework pcie_find_root_port() so it returns its argument when it is already a Root Port. [bhelgaas: test device only once, test for PCIe] Fixes: 6ae72bfa656e ("PCI: Unify pcie_find_root_port() and pci_find_pcie_root_port()") Link: https://lore.kernel.org/r/20200622161248.51099-1-mika.westerberg@linux.intel.com Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index c79d83304e52..34c1c4f45288 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2169,12 +2169,11 @@ static inline int pci_pcie_type(const struct pci_dev *dev) */ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) { - struct pci_dev *bridge = pci_upstream_bridge(dev); - - while (bridge) { - if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) - return bridge; - bridge = pci_upstream_bridge(bridge); + while (dev) { + if (pci_is_pcie(dev) && + pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + return dev; + dev = pci_upstream_bridge(dev); } return NULL; -- cgit v1.2.3 From 1e82a62fec613844da9e558f3493540a5b7a7b67 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 30 Jun 2020 11:50:17 -0600 Subject: genetlink: remove genl_bind A potential deadlock can occur during registering or unregistering a new generic netlink family between the main nl_table_lock and the cb_lock where each thread wants the lock held by the other, as demonstrated below. 1) Thread 1 is performing a netlink_bind() operation on a socket. As part of this call, it will call netlink_lock_table(), incrementing the nl_table_users count to 1. 2) Thread 2 is registering (or unregistering) a genl_family via the genl_(un)register_family() API. The cb_lock semaphore will be taken for writing. 3) Thread 1 will call genl_bind() as part of the bind operation to handle subscribing to GENL multicast groups at the request of the user. It will attempt to take the cb_lock semaphore for reading, but it will fail and be scheduled away, waiting for Thread 2 to finish the write. 4) Thread 2 will call netlink_table_grab() during the (un)registration call. However, as Thread 1 has incremented nl_table_users, it will not be able to proceed, and both threads will be stuck waiting for the other. genl_bind() is a noop, unless a genl_family implements the mcast_bind() function to handle setting up family-specific multicast operations. Since no one in-tree uses this functionality as Cong pointed out, simply removing the genl_bind() function will remove the possibility for deadlock, as there is no attempt by Thread 1 above to take the cb_lock semaphore. Fixes: c380d9a7afff ("genetlink: pass multicast bind/unbind to families") Suggested-by: Cong Wang Acked-by: Johannes Berg Reported-by: kernel test robot Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller --- include/net/genetlink.h | 8 -------- net/netlink/genetlink.c | 49 ------------------------------------------------- 2 files changed, 57 deletions(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ad71ed4f55ff..6e5f1e1aa822 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -35,12 +35,6 @@ struct genl_info; * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks - * @mcast_bind: a socket bound to the given multicast group (which - * is given as the offset into the groups array) - * @mcast_unbind: a socket was unbound from the given multicast group. - * Note that unbind() will not be called symmetrically if the - * generic netlink family is removed while there are still open - * sockets. * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family @@ -63,8 +57,6 @@ struct genl_family { void (*post_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); - int (*mcast_bind)(struct net *net, int group); - void (*mcast_unbind)(struct net *net, int group); const struct genl_ops * ops; const struct genl_multicast_group *mcgrps; unsigned int n_ops; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a914b9365a46..9395ee8a868d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1144,60 +1144,11 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; -static int genl_bind(struct net *net, int group) -{ - struct genl_family *f; - int err = -ENOENT; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (!f->netnsok && net != &init_net) - err = -ENOENT; - else if (f->mcast_bind) - err = f->mcast_bind(net, fam_grp); - else - err = 0; - break; - } - } - up_read(&cb_lock); - - return err; -} - -static void genl_unbind(struct net *net, int group) -{ - struct genl_family *f; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (f->mcast_unbind) - f->mcast_unbind(net, fam_grp); - break; - } - } - up_read(&cb_lock); -} - static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, - .bind = genl_bind, - .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ -- cgit v1.2.3 From d7bf2ebebc2bd61ab95e2a8e33541ef282f303d4 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Fri, 3 Jul 2020 22:26:43 +0200 Subject: sched: consistently handle layer3 header accesses in the presence of VLANs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple of places in net/sched/ that check skb->protocol and act on the value there. However, in the presence of VLAN tags, the value stored in skb->protocol can be inconsistent based on whether VLAN acceleration is enabled. The commit quoted in the Fixes tag below fixed the users of skb->protocol to use a helper that will always see the VLAN ethertype. However, most of the callers don't actually handle the VLAN ethertype, but expect to find the IP header type in the protocol field. This means that things like changing the ECN field, or parsing diffserv values, stops working if there's a VLAN tag, or if there are multiple nested VLAN tags (QinQ). To fix this, change the helper to take an argument that indicates whether the caller wants to skip the VLAN tags or not. When skipping VLAN tags, we make sure to skip all of them, so behaviour is consistent even in QinQ mode. To make the helper usable from the ECN code, move it to if_vlan.h instead of pkt_sched.h. v3: - Remove empty lines - Move vlan variable definitions inside loop in skb_protocol() - Also use skb_protocol() helper in IP{,6}_ECN_decapsulate() and bpf_skb_ecn_set_ce() v2: - Use eth_type_vlan() helper in skb_protocol() - Also fix code that reads skb->protocol directly - Change a couple of 'if/else if' statements to switch constructs to avoid calling the helper twice Reported-by: Ilya Ponetayev Fixes: d8b9605d2697 ("net: sched: fix skb->protocol use in case of accelerated vlan path") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 28 ++++++++++++++++++++++++++++ include/net/inet_ecn.h | 25 +++++++++++++++++-------- include/net/pkt_sched.h | 11 ----------- net/core/filter.c | 10 +++++++--- net/sched/act_connmark.c | 9 ++++++--- net/sched/act_csum.c | 2 +- net/sched/act_ct.c | 9 ++++----- net/sched/act_ctinfo.c | 9 ++++++--- net/sched/act_mpls.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/cls_api.c | 2 +- net/sched/cls_flow.c | 8 ++++---- net/sched/cls_flower.c | 2 +- net/sched/em_ipset.c | 2 +- net/sched/em_ipt.c | 2 +- net/sched/em_meta.c | 2 +- net/sched/sch_cake.c | 4 ++-- net/sched/sch_dsmark.c | 6 +++--- net/sched/sch_teql.c | 2 +- 19 files changed, 86 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b05e855f1ddd..427a5b8597c2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -308,6 +308,34 @@ static inline bool eth_type_vlan(__be16 ethertype) } } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); + __be16 proto = skb->protocol; + + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; + + while (eth_type_vlan(proto)) { + struct vlan_hdr vhdr, *vh; + + vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); + if (!vh) + break; + + proto = vh->h_vlan_encapsulated_proto; + offset += sizeof(vhdr); + } + + return proto; +} + static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 0f0d1efe06dd..e1eaf1780288 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -172,7 +173,7 @@ static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) static inline int INET_ECN_set_ce(struct sk_buff *skb) { - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) @@ -191,7 +192,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) static inline int INET_ECN_set_ect1(struct sk_buff *skb) { - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case cpu_to_be16(ETH_P_IP): if (skb_network_header(skb) + sizeof(struct iphdr) <= skb_tail_pointer(skb)) @@ -272,12 +273,16 @@ static inline int IP_ECN_decapsulate(const struct iphdr *oiph, { __u8 inner; - if (skb->protocol == htons(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + break; + case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); - else + break; + default: return 0; + } return INET_ECN_decapsulate(skb, oiph->tos, inner); } @@ -287,12 +292,16 @@ static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, { __u8 inner; - if (skb->protocol == htons(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): inner = ip_hdr(skb)->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + break; + case htons(ETH_P_IPV6): inner = ipv6_get_dsfield(ipv6_hdr(skb)); - else + break; + default: return 0; + } return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); } diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9092e697059e..ac8c890a2657 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -136,17 +136,6 @@ static inline void qdisc_run(struct Qdisc *q) } } -static inline __be16 tc_skb_protocol(const struct sk_buff *skb) -{ - /* We need to take extra care in case the skb came via - * vlan accelerated path. In that case, use skb->vlan_proto - * as the original vlan header was already stripped. - */ - if (skb_vlan_tag_present(skb)) - return skb->vlan_proto; - return skb->protocol; -} - /* Calculate maximal size of packet seen by hard_start_xmit routine of this device. */ diff --git a/net/core/filter.c b/net/core/filter.c index 73395384afe2..82e1b5b06167 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5853,12 +5853,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; - if (skb->protocol == cpu_to_be16(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): iphdr_len = sizeof(struct iphdr); - else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) + break; + case cpu_to_be16(ETH_P_IPV6): iphdr_len = sizeof(struct ipv6hdr); - else + break; + default: return 0; + } if (skb_headlen(skb) < iphdr_len) return 0; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 43a243081e7d..f901421b0634 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&ca->tcf_tm); bstats_update(&ca->tcf_bstats, skb); - if (skb->protocol == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index cb8608f0a77a..c60674cf25c4 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, goto drop; update_flags = params->update_flags; - protocol = tc_skb_protocol(skb); + protocol = skb_protocol(skb, false); again: switch (protocol) { case cpu_to_be16(ETH_P_IP): diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e9f3576cbf71..86ed02487467 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -624,7 +624,7 @@ static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) { u8 family = NFPROTO_UNSPEC; - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): family = NFPROTO_IPV4; break; @@ -748,6 +748,7 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype) { + __be16 proto = skb_protocol(skb, true); int hooknum, err = NF_ACCEPT; /* See HOOK2MANIP(). */ @@ -759,14 +760,13 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (skb->protocol == htons(ETH_P_IP) && + if (proto == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto out; - } else if (IS_ENABLED(CONFIG_IPV6) && - skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; int hdrlen = ipv6_skip_exthdr(skb, @@ -1550,4 +1550,3 @@ MODULE_AUTHOR("Yossi Kuperman "); MODULE_AUTHOR("Marcelo Ricardo Leitner "); MODULE_DESCRIPTION("Connection tracking action"); MODULE_LICENSE("GPL v2"); - diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 19649623493b..b5042f3ea079 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -96,19 +96,22 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, action = READ_ONCE(ca->tcf_action); wlen = skb_network_offset(skb); - if (tc_skb_protocol(skb) == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV4; - } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index be3f215cd027..8118e2640979 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -82,7 +82,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_PUSH: - new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol)); + new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb_protocol(skb, true))); if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len, skb->dev && skb->dev->type == ARPHRD_ETHER)) goto drop; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index b125b2be4467..b2b3faa57294 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, if (params->flags & SKBEDIT_F_INHERITDSFIELD) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index faa78b7dd962..e62beec0d844 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1538,7 +1538,7 @@ static inline int __tcf_classify(struct sk_buff *skb, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { - __be16 protocol = tc_skb_protocol(skb); + __be16 protocol = skb_protocol(skb, false); int err; if (tp->protocol != protocol && diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 80ae7b9fa90a..ab53a93b2f2b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) if (dst) return ntohl(dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_proto(const struct sk_buff *skb, @@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, if (flow->ports.ports) return ntohs(flow->ports.dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -164,7 +164,7 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b2da37286082..e30bd969fc48 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -313,7 +313,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, /* skb_flow_dissect() does not set n_proto in case an unknown * protocol, so do it rather here. */ - skb_key.basic.n_proto = skb->protocol; + skb_key.basic.n_proto = skb_protocol(skb, false); skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index df00566d327d..c95cf86fb431 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, }; int ret, network_offset; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): state.pf = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index 18755d29fd15..3650117da47f 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -212,7 +212,7 @@ static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em, struct nf_hook_state state; int ret; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index d99966a55c84..46254968d390 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -195,7 +195,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = tc_skb_protocol(skb); + dst->value = skb_protocol(skb, false); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index ca813697728e..ebaeec1e5c82 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -592,7 +592,7 @@ static bool cake_update_flowkeys(struct flow_keys *keys, bool rev = !skb->_nfct, upd = false; __be32 ip; - if (tc_skb_protocol(skb) != htons(ETH_P_IP)) + if (skb_protocol(skb, true) != htons(ETH_P_IP)) return false; if (!nf_ct_get_tuple_skb(&tuple, skb)) @@ -1557,7 +1557,7 @@ static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) u16 *buf, buf_; u8 dscp; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); if (unlikely(!buf)) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 05605b30bef3..2b88710994d7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (p->set_tc_index) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen) || @@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, p->mv[index].value); @@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mv[index].mask != 0xff || p->mv[index].value) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(tc_skb_protocol(skb))); + __func__, ntohs(skb_protocol(skb, true))); break; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 689ef6f3ded8..2f1f0a378408 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), haddr, NULL, skb->len); if (err < 0) -- cgit v1.2.3 From 68d237056e007c88031d80900cdba0945121a287 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 30 Jun 2020 10:16:02 +0200 Subject: scatterlist: protect parameters of the sg_table related macros Add brackets to protect parameters of the recently added sg_table related macros from side-effects. Fixes: 709d6d73c756 ("scatterlist: add generic wrappers for iterating over sgtable objects") Signed-off-by: Marek Szyprowski Signed-off-by: Christoph Hellwig --- include/linux/scatterlist.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4f922afb607a..45cf7b69d852 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -155,7 +155,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * Loop over each sg element in the given sg_table object. */ #define for_each_sgtable_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i) /* * Loop over each sg element in the given *DMA mapped* sg_table object. @@ -163,7 +163,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * of the each element. */ #define for_each_sgtable_dma_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) /** * sg_chain - Chain two sglists together @@ -451,7 +451,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * See also for_each_sg_page(). In each loop it operates on PAGE_SIZE unit. */ #define for_each_sgtable_page(sgt, piter, pgoffset) \ - for_each_sg_page(sgt->sgl, piter, sgt->orig_nents, pgoffset) + for_each_sg_page((sgt)->sgl, piter, (sgt)->orig_nents, pgoffset) /** * for_each_sgtable_dma_page - iterate over the DMA mapped sg_table object @@ -465,7 +465,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * unit. */ #define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset) \ - for_each_sg_dma_page(sgt->sgl, dma_iter, sgt->nents, pgoffset) + for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset) /* -- cgit v1.2.3 From f79a732a8325dfbd570d87f1435019d7e5501c6d Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 29 Jun 2020 19:17:37 +0530 Subject: ALSA: compress: fix partial_drain completion state On partial_drain completion we should be in SNDRV_PCM_STATE_RUNNING state, so set that for partially draining streams in snd_compr_drain_notify() and use a flag for partially draining streams While at it, add locks for stream state change in snd_compr_drain_notify() as well. Fixes: f44f2a5417b2 ("ALSA: compress: fix drain calls blocking other compress functions (v6)") Reviewed-by: Srinivas Kandagatla Tested-by: Srinivas Kandagatla Reviewed-by: Charles Keepax Tested-by: Charles Keepax Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20200629134737.105993-4-vkoul@kernel.org Signed-off-by: Takashi Iwai --- include/sound/compress_driver.h | 10 +++++++++- sound/core/compress_offload.c | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index 6ce8effa0b12..70cbc5095e72 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -66,6 +66,7 @@ struct snd_compr_runtime { * @direction: stream direction, playback/recording * @metadata_set: metadata set flag, true when set * @next_track: has userspace signal next track transition, true when set + * @partial_drain: undergoing partial_drain for stream, true when set * @private_data: pointer to DSP private data * @dma_buffer: allocated buffer if any */ @@ -78,6 +79,7 @@ struct snd_compr_stream { enum snd_compr_direction direction; bool metadata_set; bool next_track; + bool partial_drain; void *private_data; struct snd_dma_buffer dma_buffer; }; @@ -182,7 +184,13 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - stream->runtime->state = SNDRV_PCM_STATE_SETUP; + /* for partial_drain case we are back to running state on success */ + if (stream->partial_drain) { + stream->runtime->state = SNDRV_PCM_STATE_RUNNING; + stream->partial_drain = false; /* clear this flag as well */ + } else { + stream->runtime->state = SNDRV_PCM_STATE_SETUP; + } wake_up(&stream->runtime->sleep); } diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 509290f2efa8..0e53f6f31916 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -764,6 +764,9 @@ static int snd_compr_stop(struct snd_compr_stream *stream) retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_STOP); if (!retval) { + /* clear flags and stop any drain wait */ + stream->partial_drain = false; + stream->metadata_set = false; snd_compr_drain_notify(stream); stream->runtime->total_bytes_available = 0; stream->runtime->total_bytes_transferred = 0; @@ -921,6 +924,7 @@ static int snd_compr_partial_drain(struct snd_compr_stream *stream) if (stream->next_track == false) return -EPERM; + stream->partial_drain = true; retval = stream->ops->trigger(stream, SND_COMPR_TRIGGER_PARTIAL_DRAIN); if (retval) { pr_debug("Partial drain returned failure\n"); -- cgit v1.2.3 From 58f30150ffd6d95efa524ff05bbcee4e95bfa870 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 7 Jul 2020 09:42:37 +0200 Subject: ASoC: core: Remove only the registered component in devm functions The ASoC devm_ functions that register a component (devm_snd_soc_register_component and devm_snd_dmaengine_pcm_register) will clean their component by running snd_soc_unregister_component. snd_soc_unregister_component will then remove all the components for the device that was used to register the component in the first place. However, some drivers register several components (such as a DAI and a dmaengine PCM) on the same device, and if the dmaengine PCM is registered first, then the DAI will be cleaned up first and snd_dmaengine_pcm_unregister will be called next. snd_dmaengine_pcm_unregister will then lookup the dmaengine PCM component on the device, and if there's one unregister that component and release its dmaengine channels. That doesn't happen in practice though since the first call to snd_soc_unregister_component removed all the components, so we never get the chance to release the dmaengine channels. In order to fix this, instead of removing all the components for a given device, we can simply remove the component that was registered in the first place. We should have the same number of component registration than we have components, so it should work just fine. Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20200707074237.287171-1-maxime@cerno.tech Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 27 +++++++++++++++++++++++++++ sound/soc/soc-devres.c | 8 +++++--- sound/soc/soc-generic-dmaengine-pcm.c | 2 +- 4 files changed, 35 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index fddab504c227..9ad30135b537 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -444,6 +444,8 @@ int devm_snd_soc_register_component(struct device *dev, const struct snd_soc_component_driver *component_driver, struct snd_soc_dai_driver *dai_drv, int num_dai); void snd_soc_unregister_component(struct device *dev); +void snd_soc_unregister_component_by_driver(struct device *dev, + const struct snd_soc_component_driver *component_driver); struct snd_soc_component *snd_soc_lookup_component_nolocked(struct device *dev, const char *driver_name); struct snd_soc_component *snd_soc_lookup_component(struct device *dev, diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 0f30f5aabaa8..2b8abf88ec60 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2572,6 +2572,33 @@ int snd_soc_register_component(struct device *dev, } EXPORT_SYMBOL_GPL(snd_soc_register_component); +/** + * snd_soc_unregister_component_by_driver - Unregister component using a given driver + * from the ASoC core + * + * @dev: The device to unregister + * @component_driver: The component driver to unregister + */ +void snd_soc_unregister_component_by_driver(struct device *dev, + const struct snd_soc_component_driver *component_driver) +{ + struct snd_soc_component *component; + + if (!component_driver) + return; + + mutex_lock(&client_mutex); + component = snd_soc_lookup_component_nolocked(dev, component_driver->name); + if (!component) + goto out; + + snd_soc_del_component_unlocked(component); + +out: + mutex_unlock(&client_mutex); +} +EXPORT_SYMBOL_GPL(snd_soc_unregister_component_by_driver); + /** * snd_soc_unregister_component - Unregister all related component * from the ASoC core diff --git a/sound/soc/soc-devres.c b/sound/soc/soc-devres.c index 11e5d7962370..4534a1c03e8e 100644 --- a/sound/soc/soc-devres.c +++ b/sound/soc/soc-devres.c @@ -48,7 +48,9 @@ EXPORT_SYMBOL_GPL(devm_snd_soc_register_dai); static void devm_component_release(struct device *dev, void *res) { - snd_soc_unregister_component(*(struct device **)res); + const struct snd_soc_component_driver **cmpnt_drv = res; + + snd_soc_unregister_component_by_driver(dev, *cmpnt_drv); } /** @@ -65,7 +67,7 @@ int devm_snd_soc_register_component(struct device *dev, const struct snd_soc_component_driver *cmpnt_drv, struct snd_soc_dai_driver *dai_drv, int num_dai) { - struct device **ptr; + const struct snd_soc_component_driver **ptr; int ret; ptr = devres_alloc(devm_component_release, sizeof(*ptr), GFP_KERNEL); @@ -74,7 +76,7 @@ int devm_snd_soc_register_component(struct device *dev, ret = snd_soc_register_component(dev, cmpnt_drv, dai_drv, num_dai); if (ret == 0) { - *ptr = dev; + *ptr = cmpnt_drv; devres_add(dev, ptr); } else { devres_free(ptr); diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index 80a4e71f2d95..61844403f181 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -478,7 +478,7 @@ void snd_dmaengine_pcm_unregister(struct device *dev) pcm = soc_component_to_pcm(component); - snd_soc_unregister_component(dev); + snd_soc_unregister_component_by_driver(dev, component->driver); dmaengine_pcm_release_chan(pcm); kfree(pcm); } -- cgit v1.2.3 From ad0f75e5f57ccbceec13274e1e242f2b5a6397ed Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 2 Jul 2020 11:52:56 -0700 Subject: cgroup: fix cgroup_sk_alloc() for sk_clone_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we clone a socket in sk_clone_lock(), its sk_cgrp_data is copied, so the cgroup refcnt must be taken too. And, unlike the sk_alloc() path, sock_update_netprioidx() is not called here. Therefore, it is safe and necessary to grab the cgroup refcnt even when cgroup_sk_alloc is disabled. sk_clone_lock() is in BH context anyway, the in_interrupt() would terminate this function if called there. And for sk_alloc() skcd->val is always zero. So it's safe to factor out the code to make it more readable. The global variable 'cgroup_sk_alloc_disabled' is used to determine whether to take these reference counts. It is impossible to make the reference counting correct unless we save this bit of information in skcd->val. So, add a new bit there to record whether the socket has already taken the reference counts. This obviously relies on kmalloc() to align cgroup pointers to at least 4 bytes, ARCH_KMALLOC_MINALIGN is certainly larger than that. This bug seems to be introduced since the beginning, commit d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") tried to fix it but not compeletely. It seems not easy to trigger until the recent commit 090e28b229af ("netprio_cgroup: Fix unlimited memory leak of v2 cgroups") was merged. Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Reported-by: Cameron Berkenpas Reported-by: Peter Geis Reported-by: Lu Fengqi Reported-by: Daniël Sonck Reported-by: Zhang Qiang Tested-by: Cameron Berkenpas Tested-by: Peter Geis Tested-by: Thomas Lamprecht Cc: Daniel Borkmann Cc: Zefan Li Cc: Tejun Heo Cc: Roman Gushchin Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 6 ++++-- include/linux/cgroup.h | 4 +++- kernel/cgroup/cgroup.c | 31 +++++++++++++++++++------------ net/core/sock.c | 2 +- 4 files changed, 27 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 52661155f85f..4f1cd0edc57d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -790,7 +790,8 @@ struct sock_cgroup_data { union { #ifdef __LITTLE_ENDIAN struct { - u8 is_data; + u8 is_data : 1; + u8 no_refcnt : 1; u8 padding; u16 prioidx; u32 classid; @@ -800,7 +801,8 @@ struct sock_cgroup_data { u32 classid; u16 prioidx; u8 padding; - u8 is_data; + u8 no_refcnt : 1; + u8 is_data : 1; } __packed; #endif u64 val; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4598e4da6b1b..618838c48313 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -822,6 +822,7 @@ extern spinlock_t cgroup_sk_update_lock; void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) @@ -835,7 +836,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) */ v = READ_ONCE(skcd->val); - if (v & 1) + if (v & 3) return &cgrp_dfl_root.cgrp; return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; @@ -847,6 +848,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) #else /* CONFIG_CGROUP_DATA */ static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1ea181a58465..dd247747ec14 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6439,18 +6439,8 @@ void cgroup_sk_alloc_disable(void) void cgroup_sk_alloc(struct sock_cgroup_data *skcd) { - if (cgroup_sk_alloc_disabled) - return; - - /* Socket clone path */ - if (skcd->val) { - /* - * We might be cloning a socket which is left in an empty - * cgroup and the cgroup might have already been rmdir'd. - * Don't use cgroup_get_live(). - */ - cgroup_get(sock_cgroup_ptr(skcd)); - cgroup_bpf_get(sock_cgroup_ptr(skcd)); + if (cgroup_sk_alloc_disabled) { + skcd->no_refcnt = 1; return; } @@ -6475,10 +6465,27 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) rcu_read_unlock(); } +void cgroup_sk_clone(struct sock_cgroup_data *skcd) +{ + if (skcd->val) { + if (skcd->no_refcnt) + return; + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ + cgroup_get(sock_cgroup_ptr(skcd)); + cgroup_bpf_get(sock_cgroup_ptr(skcd)); + } +} + void cgroup_sk_free(struct sock_cgroup_data *skcd) { struct cgroup *cgrp = sock_cgroup_ptr(skcd); + if (skcd->no_refcnt) + return; cgroup_bpf_put(cgrp); cgroup_put(cgrp); } diff --git a/net/core/sock.c b/net/core/sock.c index d832c650287c..2e5b7870e5d3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1926,7 +1926,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - cgroup_sk_alloc(&newsk->sk_cgrp_data); + cgroup_sk_clone(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); -- cgit v1.2.3 From 41da51bce36f44eefc1e3d0f47d18841cbd065ba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 21 Nov 2019 23:25:07 +0000 Subject: fs: Add IOCB_NOIO flag for generic_file_read_iter Add an IOCB_NOIO flag that indicates to generic_file_read_iter that it shouldn't trigger any filesystem I/O for the actual request or for readahead. This allows to do tentative reads out of the page cache as some filesystems allow, and to take the appropriate locks and retry the reads only if the requested pages are not cached. Signed-off-by: Andreas Gruenbacher --- include/linux/fs.h | 1 + mm/filemap.c | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..4b7cb76e5837 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,7 @@ enum rw_hint { #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +#define IOCB_NOIO (1 << 9) struct kiocb { struct file *ki_filp; diff --git a/mm/filemap.c b/mm/filemap.c index f0ae9a6308cb..385759c4ce4b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2028,7 +2028,7 @@ find_page: page = find_get_page(mapping, index); if (!page) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) goto would_block; page_cache_sync_readahead(mapping, ra, filp, @@ -2038,6 +2038,10 @@ find_page: goto no_cached_page; } if (PageReadahead(page)) { + if (iocb->ki_flags & IOCB_NOIO) { + put_page(page); + goto out; + } page_cache_async_readahead(mapping, ra, filp, page, index, last_index - index); @@ -2160,6 +2164,11 @@ page_not_up_to_date_locked: } readpage: + if (iocb->ki_flags & IOCB_NOIO) { + unlock_page(page); + put_page(page); + goto would_block; + } /* * A previous I/O error may have been due to temporary * failures, eg. multipath errors. @@ -2249,9 +2258,19 @@ EXPORT_SYMBOL_GPL(generic_file_buffered_read); * * This is the "read_iter()" routine for all filesystems * that can use the page cache directly. + * + * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall + * be returned when no data can be read without waiting for I/O requests + * to complete; it doesn't prevent readahead. + * + * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O + * requests shall be made for the read or for readahead. When no data + * can be read, -EAGAIN shall be returned. When readahead would be + * triggered, a partial, possibly empty read shall be returned. + * * Return: * * number of bytes copied, even for partial reads - * * negative error code if nothing was read + * * negative error code (or 0 if IOCB_NOIO) if nothing was read */ ssize_t generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) -- cgit v1.2.3 From 394de110a73395de2ca4516b0de435e91b11b604 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Sun, 5 Jul 2020 14:23:49 +0530 Subject: net: Added pointer check for dst->ops->neigh_lookup in dst_neigh_lookup_skb The packets from tunnel devices (eg bareudp) may have only metadata in the dst pointer of skb. Hence a pointer check of neigh_lookup is needed in dst_neigh_lookup_skb Kernel crashes when packets from bareudp device is processed in the kernel neighbour subsytem. [ 133.384484] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 133.385240] #PF: supervisor instruction fetch in kernel mode [ 133.385828] #PF: error_code(0x0010) - not-present page [ 133.386603] PGD 0 P4D 0 [ 133.386875] Oops: 0010 [#1] SMP PTI [ 133.387275] CPU: 0 PID: 5045 Comm: ping Tainted: G W 5.8.0-rc2+ #15 [ 133.388052] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 133.391076] RIP: 0010:0x0 [ 133.392401] Code: Bad RIP value. [ 133.394029] RSP: 0018:ffffb79980003d50 EFLAGS: 00010246 [ 133.396656] RAX: 0000000080000102 RBX: ffff9de2fe0d6600 RCX: ffff9de2fe5e9d00 [ 133.399018] RDX: 0000000000000000 RSI: ffff9de2fe5e9d00 RDI: ffff9de2fc21b400 [ 133.399685] RBP: ffff9de2fe5e9d00 R08: 0000000000000000 R09: 0000000000000000 [ 133.400350] R10: ffff9de2fbc6be22 R11: ffff9de2fe0d6600 R12: ffff9de2fc21b400 [ 133.401010] R13: ffff9de2fe0d6628 R14: 0000000000000001 R15: 0000000000000003 [ 133.401667] FS: 00007fe014918740(0000) GS:ffff9de2fec00000(0000) knlGS:0000000000000000 [ 133.402412] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 133.402948] CR2: ffffffffffffffd6 CR3: 000000003bb72000 CR4: 00000000000006f0 [ 133.403611] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 133.404270] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 133.404933] Call Trace: [ 133.405169] [ 133.405367] __neigh_update+0x5a4/0x8f0 [ 133.405734] arp_process+0x294/0x820 [ 133.406076] ? __netif_receive_skb_core+0x866/0xe70 [ 133.406557] arp_rcv+0x129/0x1c0 [ 133.406882] __netif_receive_skb_one_core+0x95/0xb0 [ 133.407340] process_backlog+0xa7/0x150 [ 133.407705] net_rx_action+0x2af/0x420 [ 133.408457] __do_softirq+0xda/0x2a8 [ 133.408813] asm_call_on_stack+0x12/0x20 [ 133.409290] [ 133.409519] do_softirq_own_stack+0x39/0x50 [ 133.410036] do_softirq+0x50/0x60 [ 133.410401] __local_bh_enable_ip+0x50/0x60 [ 133.410871] ip_finish_output2+0x195/0x530 [ 133.411288] ip_output+0x72/0xf0 [ 133.411673] ? __ip_finish_output+0x1f0/0x1f0 [ 133.412122] ip_send_skb+0x15/0x40 [ 133.412471] raw_sendmsg+0x853/0xab0 [ 133.412855] ? insert_pfn+0xfe/0x270 [ 133.413827] ? vvar_fault+0xec/0x190 [ 133.414772] sock_sendmsg+0x57/0x80 [ 133.415685] __sys_sendto+0xdc/0x160 [ 133.416605] ? syscall_trace_enter+0x1d4/0x2b0 [ 133.417679] ? __audit_syscall_exit+0x1d9/0x280 [ 133.418753] ? __prepare_exit_to_usermode+0x5d/0x1a0 [ 133.419819] __x64_sys_sendto+0x24/0x30 [ 133.420848] do_syscall_64+0x4d/0x90 [ 133.421768] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 133.422833] RIP: 0033:0x7fe013689c03 [ 133.423749] Code: Bad RIP value. [ 133.424624] RSP: 002b:00007ffc7288f418 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 133.425940] RAX: ffffffffffffffda RBX: 000056151fc63720 RCX: 00007fe013689c03 [ 133.427225] RDX: 0000000000000040 RSI: 000056151fc63720 RDI: 0000000000000003 [ 133.428481] RBP: 00007ffc72890b30 R08: 000056151fc60500 R09: 0000000000000010 [ 133.429757] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 [ 133.431041] R13: 000056151fc636e0 R14: 000056151fc616bc R15: 0000000000000080 [ 133.432481] Modules linked in: mpls_iptunnel act_mirred act_tunnel_key cls_flower sch_ingress veth mpls_router ip_tunnel bareudp ip6_udp_tunnel udp_tunnel macsec udp_diag inet_diag unix_diag af_packet_diag netlink_diag binfmt_misc xt_MASQUERADE iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc ebtable_filter ebtables overlay ip6table_filter ip6_tables iptable_filter sunrpc ext4 mbcache jbd2 pcspkr i2c_piix4 virtio_balloon joydev ip_tables xfs libcrc32c ata_generic qxl pata_acpi drm_ttm_helper ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm ata_piix libata virtio_net net_failover virtio_console failover virtio_blk i2c_core virtio_pci virtio_ring serio_raw floppy virtio dm_mirror dm_region_hash dm_log dm_mod [ 133.444045] CR2: 0000000000000000 [ 133.445082] ---[ end trace f4aeee1958fd1638 ]--- [ 133.446236] RIP: 0010:0x0 [ 133.447180] Code: Bad RIP value. [ 133.448152] RSP: 0018:ffffb79980003d50 EFLAGS: 00010246 [ 133.449363] RAX: 0000000080000102 RBX: ffff9de2fe0d6600 RCX: ffff9de2fe5e9d00 [ 133.450835] RDX: 0000000000000000 RSI: ffff9de2fe5e9d00 RDI: ffff9de2fc21b400 [ 133.452237] RBP: ffff9de2fe5e9d00 R08: 0000000000000000 R09: 0000000000000000 [ 133.453722] R10: ffff9de2fbc6be22 R11: ffff9de2fe0d6600 R12: ffff9de2fc21b400 [ 133.455149] R13: ffff9de2fe0d6628 R14: 0000000000000001 R15: 0000000000000003 [ 133.456520] FS: 00007fe014918740(0000) GS:ffff9de2fec00000(0000) knlGS:0000000000000000 [ 133.458046] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 133.459342] CR2: ffffffffffffffd6 CR3: 000000003bb72000 CR4: 00000000000006f0 [ 133.460782] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 133.462240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 133.463697] Kernel panic - not syncing: Fatal exception in interrupt [ 133.465226] Kernel Offset: 0xfa00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 133.467025] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: aaa0c23cb901 ("Fix dst_neigh_lookup/dst_neigh_lookup_skb return value handling bug") Signed-off-by: Martin Varghese Signed-off-by: David S. Miller --- include/net/dst.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 07adfacd8088..852d8fb36ab7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -400,7 +400,15 @@ static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, co static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, struct sk_buff *skb) { - struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL); + struct neighbour *n = NULL; + + /* The packets from tunnel devices (eg bareudp) may have only + * metadata in the dst pointer of skb. Hence a pointer check of + * neigh_lookup is needed. + */ + if (dst->ops->neigh_lookup) + n = dst->ops->neigh_lookup(dst, skb, NULL); + return IS_ERR(n) ? NULL : n; } -- cgit v1.2.3 From 469aceddfa3ed16e17ee30533fae45e90f62efd8 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 7 Jul 2020 13:03:25 +0200 Subject: vlan: consolidate VLAN parsing code and limit max parsing depth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toshiaki pointed out that we now have two very similar functions to extract the L3 protocol number in the presence of VLAN tags. And Daniel pointed out that the unbounded parsing loop makes it possible for maliciously crafted packets to loop through potentially hundreds of tags. Fix both of these issues by consolidating the two parsing functions and limiting the VLAN tag parsing to a max depth of 8 tags. As part of this, switch over __vlan_get_protocol() to use skb_header_pointer() instead of pskb_may_pull(), to avoid the possible side effects of the latter and keep the skb pointer 'const' through all the parsing functions. v2: - Use limit of 8 tags instead of 32 (matching XMIT_RECURSION_LIMIT) Reported-by: Toshiaki Makita Reported-by: Daniel Borkmann Fixes: d7bf2ebebc2b ("sched: consistently handle layer3 header accesses in the presence of VLANs") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 57 +++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 427a5b8597c2..41a518336673 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -25,6 +25,8 @@ #define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */ #define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */ +#define VLAN_MAX_DEPTH 8 /* Max. number of nested VLAN tags parsed */ + /* * struct vlan_hdr - vlan header * @h_vlan_TCI: priority and VLAN ID @@ -308,34 +310,6 @@ static inline bool eth_type_vlan(__be16 ethertype) } } -/* A getter for the SKB protocol field which will handle VLAN tags consistently - * whether VLAN acceleration is enabled or not. - */ -static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) -{ - unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); - __be16 proto = skb->protocol; - - if (!skip_vlan) - /* VLAN acceleration strips the VLAN header from the skb and - * moves it to skb->vlan_proto - */ - return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; - - while (eth_type_vlan(proto)) { - struct vlan_hdr vhdr, *vh; - - vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); - if (!vh) - break; - - proto = vh->h_vlan_encapsulated_proto; - offset += sizeof(vhdr); - } - - return proto; -} - static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { @@ -605,10 +579,10 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, +static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, int *depth) { - unsigned int vlan_depth = skb->mac_len; + unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; /* if type is 802.1Q/AD then the header should already be * present at mac_len - VLAN_HLEN (if mac_len > 0), or at @@ -623,13 +597,12 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vlan_depth = ETH_HLEN; } do { - struct vlan_hdr *vh; + struct vlan_hdr vhdr, *vh; - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) + vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); + if (unlikely(!vh || !--parse_depth)) return 0; - vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } while (eth_type_vlan(type)); @@ -648,11 +621,25 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 vlan_get_protocol(struct sk_buff *skb) +static inline __be16 vlan_get_protocol(const struct sk_buff *skb) { return __vlan_get_protocol(skb, skb->protocol, NULL); } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; + + return vlan_get_protocol(skb); +} + static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { -- cgit v1.2.3 From 61a707c543e2afe3aa7e88f87267c5dafa4b5afa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 08:54:16 +0200 Subject: fs: add a __kernel_read helper This is the counterpart to __kernel_write, and skip the rw_verify_area call compared to kernel_read. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 23 +++++++++++++++++++++++ include/linux/fs.h | 1 + 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/fs/read_write.c b/fs/read_write.c index 96e8e354f99b..21c9d90a257e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -430,6 +430,29 @@ ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, return -EINVAL; } +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) +{ + mm_segment_t old_fs = get_fs(); + ssize_t ret; + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) + return -EINVAL; + if (!(file->f_mode & FMODE_CAN_READ)) + return -EINVAL; + + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + set_fs(KERNEL_DS); + ret = __vfs_read(file, (void __user *)buf, count, pos); + set_fs(old_fs); + if (ret > 0) { + fsnotify_access(file); + add_rchar(current, ret); + } + inc_syscr(current); + return ret; +} + ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..22cbe7b2e919 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3033,6 +3033,7 @@ extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, lo extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); -- cgit v1.2.3 From 775802c0571fb438cd4f6548a323f9e4cb89f5aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 11:17:46 +0200 Subject: fs: remove __vfs_read Fold it into the two callers. Signed-off-by: Christoph Hellwig --- fs/read_write.c | 43 +++++++++++++++++++++---------------------- include/linux/fs.h | 1 - 2 files changed, 21 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/read_write.c b/fs/read_write.c index 42a027719324..4fb797822567 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -419,17 +419,6 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo return ret; } -ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, - loff_t *pos) -{ - if (file->f_op->read) - return file->f_op->read(file, buf, count, pos); - else if (file->f_op->read_iter) - return new_sync_read(file, buf, count, pos); - else - return -EINVAL; -} - ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) { mm_segment_t old_fs = get_fs(); @@ -443,7 +432,12 @@ ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; set_fs(KERNEL_DS); - ret = __vfs_read(file, (void __user *)buf, count, pos); + if (file->f_op->read) + ret = file->f_op->read(file, (void __user *)buf, count, pos); + else if (file->f_op->read_iter) + ret = new_sync_read(file, (void __user *)buf, count, pos); + else + ret = -EINVAL; set_fs(old_fs); if (ret > 0) { fsnotify_access(file); @@ -476,17 +470,22 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) return -EFAULT; ret = rw_verify_area(READ, file, pos, count); - if (!ret) { - if (count > MAX_RW_COUNT) - count = MAX_RW_COUNT; - ret = __vfs_read(file, buf, count, pos); - if (ret > 0) { - fsnotify_access(file); - add_rchar(current, ret); - } - inc_syscr(current); - } + if (ret) + return ret; + if (count > MAX_RW_COUNT) + count = MAX_RW_COUNT; + if (file->f_op->read) + ret = file->f_op->read(file, buf, count, pos); + else if (file->f_op->read_iter) + ret = new_sync_read(file, buf, count, pos); + else + ret = -EINVAL; + if (ret > 0) { + fsnotify_access(file); + add_rchar(current, ret); + } + inc_syscr(current); return ret; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 22cbe7b2e919..0c0ec76b600b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1917,7 +1917,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec *fast_pointer, struct iovec **ret_pointer); -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, -- cgit v1.2.3 From dbfb089d360b1cc623c51a2c7cf9b99eff78e0e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Jul 2020 12:40:33 +0200 Subject: sched: Fix loadavg accounting race The recent commit: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") moved these lines in ttwu(): p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; up before: smp_cond_load_acquire(&p->on_cpu, !VAL); into the 'p->on_rq == 0' block, with the thinking that once we hit schedule() the current task cannot change it's ->state anymore. And while this is true, it is both incorrect and flawed. It is incorrect in that we need at least an ACQUIRE on 'p->on_rq == 0' to avoid weak hardware from re-ordering things for us. This can fairly easily be achieved by relying on the control-dependency already in place. The second problem, which makes the flaw in the original argument, is that while schedule() will not change prev->state, it will read it a number of times (arguably too many times since it's marked volatile). The previous condition 'p->on_cpu == 0' was sufficient because that indicates schedule() has completed, and will no longer read prev->state. So now the trick is to make this same true for the (much) earlier 'prev->on_rq == 0' case. Furthermore, in order to make the ordering stick, the 'prev->on_rq = 0' assignment needs to he a RELEASE, but adding additional ordering to schedule() is an unwelcome proposition at the best of times, doubly so for mere accounting. Luckily we can push the prev->state load up before rq->lock, with the only caveat that we then have to re-read the state after. However, we know that if it changed, we no longer have to worry about the blocking path. This gives us the required ordering, if we block, we did the prev->state load before an (effective) smp_mb() and the p->on_rq store needs not change. With this we end up with the effective ordering: LOAD p->state LOAD-ACQUIRE p->on_rq == 0 MB STORE p->on_rq, 0 STORE p->state, TASK_WAKING which ensures the TASK_WAKING store happens after the prev->state load, and all is well again. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Dave Jones Reported-by: Paul Gortmaker Signed-off-by: Peter Zijlstra (Intel) Tested-by: Dave Jones Tested-by: Paul Gortmaker Link: https://lkml.kernel.org/r/20200707102957.GN117543@hirez.programming.kicks-ass.net --- include/linux/sched.h | 4 --- kernel/sched/core.c | 67 +++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 51 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 692e327d7455..683372943093 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -114,10 +114,6 @@ struct task_group; #define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0 && \ - (task->state & TASK_NOLOAD) == 0) - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ca5db40392d4..950ac45d5480 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1311,9 +1311,6 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags) { - if (task_contributes_to_load(p)) - rq->nr_uninterruptible--; - enqueue_task(rq, p, flags); p->on_rq = TASK_ON_RQ_QUEUED; @@ -1323,9 +1320,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) { p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING; - if (task_contributes_to_load(p)) - rq->nr_uninterruptible++; - dequeue_task(rq, p, flags); } @@ -2236,10 +2230,10 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, lockdep_assert_held(&rq->lock); -#ifdef CONFIG_SMP if (p->sched_contributes_to_load) rq->nr_uninterruptible--; +#ifdef CONFIG_SMP if (wake_flags & WF_MIGRATED) en_flags |= ENQUEUE_MIGRATED; #endif @@ -2583,7 +2577,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). */ smp_rmb(); - if (p->on_rq && ttwu_remote(p, wake_flags)) + if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags)) goto unlock; if (p->in_iowait) { @@ -2592,9 +2586,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) } #ifdef CONFIG_SMP - p->sched_contributes_to_load = !!task_contributes_to_load(p); - p->state = TASK_WAKING; - /* * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be * possible to, falsely, observe p->on_cpu == 0. @@ -2613,8 +2604,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in * __schedule(). See the comment for smp_mb__after_spinlock(). + * + * Form a control-dep-acquire with p->on_rq == 0 above, to ensure + * schedule()'s deactivate_task() has 'happened' and p will no longer + * care about it's own p->state. See the comment in __schedule(). */ - smp_rmb(); + smp_acquire__after_ctrl_dep(); + + /* + * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq + * == 0), which means we need to do an enqueue, change p->state to + * TASK_WAKING such that we can unlock p->pi_lock before doing the + * enqueue, such as ttwu_queue_wakelist(). + */ + p->state = TASK_WAKING; /* * If the owning (remote) CPU is still in the middle of schedule() with @@ -4097,6 +4100,7 @@ static void __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; unsigned long *switch_count; + unsigned long prev_state; struct rq_flags rf; struct rq *rq; int cpu; @@ -4113,12 +4117,22 @@ static void __sched notrace __schedule(bool preempt) local_irq_disable(); rcu_note_context_switch(preempt); + /* See deactivate_task() below. */ + prev_state = prev->state; + /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) - * done by the caller to avoid the race with signal_wake_up(). + * done by the caller to avoid the race with signal_wake_up(): + * + * __set_current_state(@state) signal_wake_up() + * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) + * wake_up_state(p, state) + * LOCK rq->lock LOCK p->pi_state + * smp_mb__after_spinlock() smp_mb__after_spinlock() + * if (signal_pending_state()) if (p->state & @state) * - * The membarrier system call requires a full memory barrier + * Also, the membarrier system call requires a full memory barrier * after coming from user-space, before storing to rq->curr. */ rq_lock(rq, &rf); @@ -4129,10 +4143,31 @@ static void __sched notrace __schedule(bool preempt) update_rq_clock(rq); switch_count = &prev->nivcsw; - if (!preempt && prev->state) { - if (signal_pending_state(prev->state, prev)) { + /* + * We must re-load prev->state in case ttwu_remote() changed it + * before we acquired rq->lock. + */ + if (!preempt && prev_state && prev_state == prev->state) { + if (signal_pending_state(prev_state, prev)) { prev->state = TASK_RUNNING; } else { + prev->sched_contributes_to_load = + (prev_state & TASK_UNINTERRUPTIBLE) && + !(prev_state & TASK_NOLOAD) && + !(prev->flags & PF_FROZEN); + + if (prev->sched_contributes_to_load) + rq->nr_uninterruptible++; + + /* + * __schedule() ttwu() + * prev_state = prev->state; if (READ_ONCE(p->on_rq) && ...) + * LOCK rq->lock goto out; + * smp_mb__after_spinlock(); smp_acquire__after_ctrl_dep(); + * p->on_rq = 0; p->state = TASK_WAKING; + * + * After this, schedule() must not care about p->state any more. + */ deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); if (prev->in_iowait) { -- cgit v1.2.3 From 25612477d20b522a3203707ff23575b99f639fff Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 7 Jul 2020 16:04:37 -0500 Subject: ASoC: soc-dai: set dai_link dpcm_ flags with a helper Add a helper to walk through all the DAIs and set dpcm_playback and dpcm_capture flags based on the DAIs capabilities, and use this helper to avoid setting these flags arbitrarily in generic cards. The commit referenced in the Fixes tag did not introduce the configuration issue but will prevent the card from probing when detecting invalid configurations. Fixes: b73287f0b0745 ('ASoC: soc-pcm: dpcm: fix playback/capture checks') Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20200707210439.115300-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 1 + sound/soc/generic/audio-graph-card.c | 4 ++-- sound/soc/generic/simple-card.c | 4 ++-- sound/soc/soc-dai.c | 38 ++++++++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 212257e84fac..71e178c89793 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -161,6 +161,7 @@ void snd_soc_dai_resume(struct snd_soc_dai *dai); int snd_soc_dai_compress_new(struct snd_soc_dai *dai, struct snd_soc_pcm_runtime *rtd, int num); bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream); +void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link); void snd_soc_dai_action(struct snd_soc_dai *dai, int stream, int action); static inline void snd_soc_dai_activate(struct snd_soc_dai *dai, diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 9ad35d9940fe..97b4f5480a31 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -317,8 +317,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (ret < 0) goto out_put_node; - dai_link->dpcm_playback = 1; - dai_link->dpcm_capture = 1; + snd_soc_dai_link_set_capabilities(dai_link); + dai_link->ops = &graph_ops; dai_link->init = asoc_simple_dai_init; diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 55e9f8800b3e..04d4d28ed511 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -231,8 +231,8 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (ret < 0) goto out_put_node; - dai_link->dpcm_playback = 1; - dai_link->dpcm_capture = 1; + snd_soc_dai_link_set_capabilities(dai_link); + dai_link->ops = &simple_ops; dai_link->init = asoc_simple_dai_init; diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index b05e18b63a1c..457159975b01 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -391,6 +391,44 @@ bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int dir) return stream->channels_min; } +/* + * snd_soc_dai_link_set_capabilities() - set dai_link properties based on its DAIs + */ +void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link) +{ + struct snd_soc_dai_link_component *cpu; + struct snd_soc_dai_link_component *codec; + struct snd_soc_dai *dai; + bool supported[SNDRV_PCM_STREAM_LAST + 1]; + int direction; + int i; + + for_each_pcm_streams(direction) { + supported[direction] = true; + + for_each_link_cpus(dai_link, i, cpu) { + dai = snd_soc_find_dai(cpu); + if (!dai || !snd_soc_dai_stream_valid(dai, direction)) { + supported[direction] = false; + break; + } + } + if (!supported[direction]) + continue; + for_each_link_codecs(dai_link, i, codec) { + dai = snd_soc_find_dai(codec); + if (!dai || !snd_soc_dai_stream_valid(dai, direction)) { + supported[direction] = false; + break; + } + } + } + + dai_link->dpcm_playback = supported[SNDRV_PCM_STREAM_PLAYBACK]; + dai_link->dpcm_capture = supported[SNDRV_PCM_STREAM_CAPTURE]; +} +EXPORT_SYMBOL_GPL(snd_soc_dai_link_set_capabilities); + void snd_soc_dai_action(struct snd_soc_dai *dai, int stream, int action) { -- cgit v1.2.3 From 6ec4476ac82512f09c94aff5972654b70f3772b2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2020 10:48:35 -0700 Subject: Raise gcc version requirement to 4.9 I realize that we fairly recently raised it to 4.8, but the fact is, 4.9 is a much better minimum version to target. We have a number of workarounds for actual bugs in pre-4.9 gcc versions (including things like internal compiler errors on ARM), but we also have some syntactic workarounds for lacking features. In particular, raising the minimum to 4.9 means that we can now just assume _Generic() exists, which is likely the much better replacement for a lot of very convoluted built-time magic with conditionals on sizeof and/or __builtin_choose_expr() with same_type() etc. Using _Generic also means that you will need to have a very recent version of 'sparse', but thats easy to build yourself, and much less of a hassle than some old gcc version can be. The latest (in a long string) of reasons for minimum compiler version upgrades was commit 5435f73d5c4a ("efi/x86: Fix build with gcc 4"). Ard points out that RHEL 7 uses gcc-4.8, but the people who stay back on old RHEL versions persumably also don't build their own kernels anyway. And maybe they should cross-built or just have a little side affair with a newer compiler? Acked-by: Ard Biesheuvel Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- arch/arm/kernel/asm-offsets.c | 9 --------- arch/mips/include/asm/unroll.h | 7 +++---- include/linux/bits.h | 3 +-- include/linux/compiler-gcc.h | 2 +- include/linux/compiler_types.h | 27 +-------------------------- mm/migrate.c | 13 +------------ tools/include/linux/bits.h | 3 +-- 7 files changed, 8 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c036a4a2f8e2..a1570c8bab25 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -31,15 +31,6 @@ #if defined(__APCS_26__) #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32 #endif -/* - * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854 - * miscompiles find_get_entry(), and can result in EXT3 and EXT4 - * filesystem corruption (possibly other FS too). - */ -#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803 -#error Your compiler is too buggy; it is known to miscompile kernels -#error and result in filesystem corruption and oopses. -#endif int main(void) { diff --git a/arch/mips/include/asm/unroll.h b/arch/mips/include/asm/unroll.h index c628747d4ecd..8ed660adc84f 100644 --- a/arch/mips/include/asm/unroll.h +++ b/arch/mips/include/asm/unroll.h @@ -19,14 +19,13 @@ \ /* \ * We can't unroll if the number of iterations isn't \ - * compile-time constant. Unfortunately GCC versions \ - * up until 4.6 tend to miss obvious constants & cause \ + * compile-time constant. Unfortunately clang versions \ + * up until 8.0 tend to miss obvious constants & cause \ * this check to fail, even though they go on to \ * generate reasonable code for the switch statement, \ * so we skip the sanity check for those compilers. \ */ \ - BUILD_BUG_ON((CONFIG_GCC_VERSION >= 40700 || \ - CONFIG_CLANG_VERSION >= 80000) && \ + BUILD_BUG_ON((CONFIG_CLANG_VERSION >= 80000) && \ !__builtin_constant_p(times)); \ \ switch (times) { \ diff --git a/include/linux/bits.h b/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 1c74464c80c6..0b1dc61f3955 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,7 +11,7 @@ + __GNUC_PATCHLEVEL__) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ -#if GCC_VERSION < 40800 +#if GCC_VERSION < 40900 # error Sorry, your compiler is too old - please upgrade it. #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c3bf7710f69a..01dd58c74d80 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -252,32 +252,8 @@ struct ftrace_likely_data { * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. */ -#if (defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900) || defined(__CHECKER__) /* - * We build this out of a couple of helper macros in a vain attempt to - * help you keep your lunch down while reading it. - */ -#define __pick_scalar_type(x, type, otherwise) \ - __builtin_choose_expr(__same_type(x, type), (type)0, otherwise) - -/* - * 'char' is not type-compatible with either 'signed char' or 'unsigned char', - * so we include the naked type here as well as the signed/unsigned variants. - */ -#define __pick_integer_type(x, type, otherwise) \ - __pick_scalar_type(x, type, \ - __pick_scalar_type(x, unsigned type, \ - __pick_scalar_type(x, signed type, otherwise))) - -#define __unqual_scalar_typeof(x) typeof( \ - __pick_integer_type(x, char, \ - __pick_integer_type(x, short, \ - __pick_integer_type(x, int, \ - __pick_integer_type(x, long, \ - __pick_integer_type(x, long long, x)))))) -#else -/* - * If supported, prefer C11 _Generic for better compile-times. As above, 'char' + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' * is not type-compatible with 'signed char', and we define a separate case. */ #define __scalar_type_to_expr_cases(type) \ @@ -293,7 +269,6 @@ struct ftrace_likely_data { __scalar_type_to_expr_cases(long), \ __scalar_type_to_expr_cases(long long), \ default: (x))) -#endif /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ diff --git a/mm/migrate.c b/mm/migrate.c index f37729673558..40cd7016ae6f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1160,22 +1160,11 @@ out: return rc; } -/* - * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move(). Work - * around it. - */ -#if defined(CONFIG_ARM) && \ - defined(GCC_VERSION) && GCC_VERSION < 40900 && GCC_VERSION >= 40700 -#define ICE_noinline noinline -#else -#define ICE_noinline -#endif - /* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */ -static ICE_noinline int unmap_and_move(new_page_t get_new_page, +static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, unsigned long private, struct page *page, int force, enum migrate_mode mode, diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ -- cgit v1.2.3 From 160251842cd35a75edfb0a1d76afa3eb674ff40a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 11:49:23 -0700 Subject: kallsyms: Refactor kallsyms_show_value() to take cred In order to perform future tests against the cred saved during open(), switch kallsyms_show_value() to operate on a cred, and have all current callers pass current_cred(). This makes it very obvious where callers are checking the wrong credential in their "read" contexts. These will be fixed in the coming patches. Additionally switch return value to bool, since it is always used as a direct permission check, not a 0-on-success, negative-on-error style function return. Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/filter.h | 2 +- include/linux/kallsyms.h | 5 +++-- kernel/kallsyms.c | 17 +++++++++++------ kernel/kprobes.c | 4 ++-- kernel/module.c | 2 +- 5 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 259377723603..55104f6c78e8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -889,7 +889,7 @@ static inline bool bpf_dump_raw_ok(void) /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ - return kallsyms_show_value() == 1; + return kallsyms_show_value(current_cred()); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 98338dc6b5d2..481273f0c72d 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -18,6 +18,7 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +struct cred; struct module; static inline int is_kernel_inittext(unsigned long addr) @@ -98,7 +99,7 @@ int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); /* How and when do we show kallsyms values? */ -extern int kallsyms_show_value(void); +extern bool kallsyms_show_value(const struct cred *cred); #else /* !CONFIG_KALLSYMS */ @@ -158,7 +159,7 @@ static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, u return -ERANGE; } -static inline int kallsyms_show_value(void) +static inline bool kallsyms_show_value(const struct cred *cred) { return false; } diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 16c8c605f4b0..bb14e64f62a4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -644,19 +644,20 @@ static inline int kallsyms_for_perf(void) * Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to * block even that). */ -int kallsyms_show_value(void) +bool kallsyms_show_value(const struct cred *cred) { switch (kptr_restrict) { case 0: if (kallsyms_for_perf()) - return 1; + return true; /* fallthrough */ case 1: - if (has_capability_noaudit(current, CAP_SYSLOG)) - return 1; + if (security_capable(cred, &init_user_ns, CAP_SYSLOG, + CAP_OPT_NOAUDIT) == 0) + return true; /* fallthrough */ default: - return 0; + return false; } } @@ -673,7 +674,11 @@ static int kallsyms_open(struct inode *inode, struct file *file) return -ENOMEM; reset_iter(iter, 0); - iter->show_value = kallsyms_show_value(); + /* + * Instead of checking this on every s_show() call, cache + * the result here at open time. + */ + iter->show_value = kallsyms_show_value(file->f_cred); return 0; } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 4a904cc56d68..d4de217e4a91 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2448,7 +2448,7 @@ static void report_probe(struct seq_file *pi, struct kprobe *p, else kprobe_type = "k"; - if (!kallsyms_show_value()) + if (!kallsyms_show_value(current_cred())) addr = NULL; if (sym) @@ -2540,7 +2540,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) * If /proc/kallsyms is not showing kernel address, we won't * show them here either. */ - if (!kallsyms_show_value()) + if (!kallsyms_show_value(current_cred())) seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL, (void *)ent->start_addr); else diff --git a/kernel/module.c b/kernel/module.c index e8a198588f26..a5022ae84e50 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4377,7 +4377,7 @@ static int modules_open(struct inode *inode, struct file *file) if (!err) { struct seq_file *m = file->private_data; - m->private = kallsyms_show_value() ? NULL : (void *)8ul; + m->private = kallsyms_show_value(current_cred()) ? NULL : (void *)8ul; } return err; -- cgit v1.2.3 From 63960260457a02af2a6cb35d75e6bdb17299c882 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 15:45:23 -0700 Subject: bpf: Check correct cred for CAP_SYSLOG in bpf_dump_raw_ok() When evaluating access control over kallsyms visibility, credentials at open() time need to be used, not the "current" creds (though in BPF's case, this has likely always been the same). Plumb access to associated file->f_cred down through bpf_dump_raw_ok() and its callers now that kallsysm_show_value() has been refactored to take struct cred. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: bpf@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 7105e828c087 ("bpf: allow for correlation of maps and helpers in dump") Signed-off-by: Kees Cook --- include/linux/filter.h | 4 ++-- kernel/bpf/syscall.c | 37 +++++++++++++++++++++---------------- net/core/sysctl_net_core.c | 2 +- 3 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 55104f6c78e8..0b0144752d78 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -884,12 +884,12 @@ void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_helper_changes_pkt_data(void *func); -static inline bool bpf_dump_raw_ok(void) +static inline bool bpf_dump_raw_ok(const struct cred *cred) { /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ - return kallsyms_show_value(current_cred()); + return kallsyms_show_value(cred); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8da159936bab..859053ddf05b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3139,7 +3139,8 @@ static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, return NULL; } -static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) +static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, + const struct cred *f_cred) { const struct bpf_map *map; struct bpf_insn *insns; @@ -3165,7 +3166,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) code == (BPF_JMP | BPF_CALL_ARGS)) { if (code == (BPF_JMP | BPF_CALL_ARGS)) insns[i].code = BPF_JMP | BPF_CALL; - if (!bpf_dump_raw_ok()) + if (!bpf_dump_raw_ok(f_cred)) insns[i].imm = 0; continue; } @@ -3221,7 +3222,8 @@ static int set_info_rec_size(struct bpf_prog_info *info) return 0; } -static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, +static int bpf_prog_get_info_by_fd(struct file *file, + struct bpf_prog *prog, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3290,11 +3292,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, struct bpf_insn *insns_sanitized; bool fault; - if (prog->blinded && !bpf_dump_raw_ok()) { + if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { info.xlated_prog_insns = 0; goto done; } - insns_sanitized = bpf_insn_prepare_dump(prog); + insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); if (!insns_sanitized) return -ENOMEM; uinsns = u64_to_user_ptr(info.xlated_prog_insns); @@ -3328,7 +3330,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } if (info.jited_prog_len && ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { uinsns = u64_to_user_ptr(info.jited_prog_insns); ulen = min_t(u32, info.jited_prog_len, ulen); @@ -3363,7 +3365,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ulen = info.nr_jited_ksyms; info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; if (ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { unsigned long ksym_addr; u64 __user *user_ksyms; u32 i; @@ -3394,7 +3396,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ulen = info.nr_jited_func_lens; info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; if (ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { u32 __user *user_lens; u32 func_len, i; @@ -3451,7 +3453,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, else info.nr_jited_line_info = 0; if (info.nr_jited_line_info && ulen) { - if (bpf_dump_raw_ok()) { + if (bpf_dump_raw_ok(file->f_cred)) { __u64 __user *user_linfo; u32 i; @@ -3497,7 +3499,8 @@ done: return 0; } -static int bpf_map_get_info_by_fd(struct bpf_map *map, +static int bpf_map_get_info_by_fd(struct file *file, + struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3540,7 +3543,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, return 0; } -static int bpf_btf_get_info_by_fd(struct btf *btf, +static int bpf_btf_get_info_by_fd(struct file *file, + struct btf *btf, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3555,7 +3559,8 @@ static int bpf_btf_get_info_by_fd(struct btf *btf, return btf_get_info_by_fd(btf, attr, uattr); } -static int bpf_link_get_info_by_fd(struct bpf_link *link, +static int bpf_link_get_info_by_fd(struct file *file, + struct bpf_link *link, const union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -3608,15 +3613,15 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return -EBADFD; if (f.file->f_op == &bpf_prog_fops) - err = bpf_prog_get_info_by_fd(f.file->private_data, attr, + err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else if (f.file->f_op == &bpf_map_fops) - err = bpf_map_get_info_by_fd(f.file->private_data, attr, + err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else if (f.file->f_op == &btf_fops) - err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr); + err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else if (f.file->f_op == &bpf_link_fops) - err = bpf_link_get_info_by_fd(f.file->private_data, + err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else err = -EINVAL; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f93f8ace6c56..6ada114bbcca 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || - (jit_enable == 2 && bpf_dump_raw_ok())) { + (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); -- cgit v1.2.3 From 6d5f904904608a9cd32854d7d0a4dd65b27f9935 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Thu, 9 Jul 2020 09:15:29 +0800 Subject: io_uring: export cq overflow status to userspace For those applications which are not willing to use io_uring_enter() to reap and handle cqes, they may completely rely on liburing's io_uring_peek_cqe(), but if cq ring has overflowed, currently because io_uring_peek_cqe() is not aware of this overflow, it won't enter kernel to flush cqes, below test program can reveal this bug: static void test_cq_overflow(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int issued = 0; int ret = 0; do { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); break;; } ret = io_uring_submit(ring); if (ret <= 0) { if (ret != -EBUSY) fprintf(stderr, "sqe submit failed: %d\n", ret); break; } issued++; } while (ret > 0); assert(ret == -EBUSY); printf("issued requests: %d\n", issued); while (issued) { ret = io_uring_peek_cqe(ring, &cqe); if (ret) { if (ret != -EAGAIN) { fprintf(stderr, "peek completion failed: %s\n", strerror(ret)); break; } printf("left requets: %d\n", issued); continue; } io_uring_cqe_seen(ring, cqe); issued--; printf("left requets: %d\n", issued); } } int main(int argc, char *argv[]) { int ret; struct io_uring ring; ret = io_uring_queue_init(16, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } test_cq_overflow(&ring); return 0; } To fix this issue, export cq overflow status to userspace by adding new IORING_SQ_CQ_OVERFLOW flag, then helper functions() in liburing, such as io_uring_peek_cqe, can be aware of this cq overflow and do flush accordingly. Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 +++++++++-- include/uapi/linux/io_uring.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/io_uring.c b/fs/io_uring.c index d37d7ea5ebe5..32e37c38f274 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1274,6 +1274,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) if (cqe) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irqrestore(&ctx->completion_lock, flags); io_cqring_ev_posted(ctx); @@ -1311,6 +1312,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->sq_check_overflow); set_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; } req->flags |= REQ_F_OVERFLOW; refcount_inc(&req->refs); @@ -6080,9 +6082,9 @@ static int io_sq_thread(void *data) } /* Tell userspace we may need a wakeup call */ + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; - /* make sure to read SQ tail after writing flags */ - smp_mb(); + spin_unlock_irq(&ctx->completion_lock); to_submit = io_sqring_entries(ctx); if (!to_submit || ret == -EBUSY) { @@ -6100,13 +6102,17 @@ static int io_sq_thread(void *data) schedule(); finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); ret = 0; continue; } finish_wait(&ctx->sqo_wait, &wait); + spin_lock_irq(&ctx->completion_lock); ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + spin_unlock_irq(&ctx->completion_lock); } mutex_lock(&ctx->uring_lock); @@ -7488,6 +7494,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, if (list_empty(&ctx->cq_overflow_list)) { clear_bit(0, &ctx->sq_check_overflow); clear_bit(0, &ctx->cq_check_overflow); + ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } spin_unlock_irq(&ctx->completion_lock); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92c22699a5a7..7843742b8b74 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -197,6 +197,7 @@ struct io_sqring_offsets { * sq_ring->flags */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ struct io_cqring_offsets { __u32 head; -- cgit v1.2.3 From a50ca29523b18baea548bdf5df9b4b923c2bb4f6 Mon Sep 17 00:00:00 2001 From: Dave Wang Date: Wed, 8 Jul 2020 22:25:03 -0700 Subject: Input: elan_i2c - add more hardware ID for Lenovo laptops This adds more hardware IDs for Elan touchpads found in various Lenovo laptops. Signed-off-by: Dave Wang Link: https://lore.kernel.org/r/000201d5a8bd$9fead3f0$dfc07bd0$@emc.com.tw Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- include/linux/input/elan-i2c-ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/input/elan-i2c-ids.h b/include/linux/input/elan-i2c-ids.h index 1ecb6b45812c..520858d12680 100644 --- a/include/linux/input/elan-i2c-ids.h +++ b/include/linux/input/elan-i2c-ids.h @@ -67,8 +67,15 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN062B", 0 }, { "ELAN062C", 0 }, { "ELAN062D", 0 }, + { "ELAN062E", 0 }, /* Lenovo V340 Whiskey Lake U */ + { "ELAN062F", 0 }, /* Lenovo V340 Comet Lake U */ { "ELAN0631", 0 }, { "ELAN0632", 0 }, + { "ELAN0633", 0 }, /* Lenovo S145 */ + { "ELAN0634", 0 }, /* Lenovo V340 Ice lake */ + { "ELAN0635", 0 }, /* Lenovo V1415-IIL */ + { "ELAN0636", 0 }, /* Lenovo V1415-Dali */ + { "ELAN0637", 0 }, /* Lenovo V1415-IGLR */ { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From f88814cc2578c121e6edef686365036db72af0ed Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 8 Jul 2020 13:01:57 +0300 Subject: efi/efivars: Expose RT service availability via efivars abstraction Commit bf67fad19e493b ("efi: Use more granular check for availability for variable services") introduced a check into the efivarfs, efi-pstore and other drivers that aborts loading of the module if not all three variable runtime services (GetVariable, SetVariable and GetNextVariable) are supported. However, this results in efivarfs being unavailable entirely if only SetVariable support is missing, which is only needed if you want to make any modifications. Also, efi-pstore and the sysfs EFI variable interface could be backed by another implementation of the 'efivars' abstraction, in which case it is completely irrelevant which services are supported by the EFI firmware. So make the generic 'efivars' abstraction dependent on the availibility of the GetVariable and GetNextVariable EFI runtime services, and add a helper 'efivar_supports_writes()' to find out whether the currently active efivars abstraction supports writes (and wire it up to the availability of SetVariable for the generic one). Then, use the efivar_supports_writes() helper to decide whether to permit efivarfs to be mounted read-write, and whether to enable efi-pstore or the sysfs EFI variable interface altogether. Fixes: bf67fad19e493b ("efi: Use more granular check for availability for variable services") Reported-by: Heinrich Schuchardt Acked-by: Ilias Apalodimas Tested-by: Ilias Apalodimas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi-pstore.c | 5 +---- drivers/firmware/efi/efi.c | 12 ++++++++---- drivers/firmware/efi/efivars.c | 5 +---- drivers/firmware/efi/vars.c | 6 ++++++ fs/efivarfs/super.c | 6 +++--- include/linux/efi.h | 1 + 6 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index c2f1d4e6630b..feb7fe6f2da7 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -356,10 +356,7 @@ static struct pstore_info efi_pstore_info = { static __init int efivars_pstore_init(void) { - if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) - return 0; - - if (!efivars_kobject()) + if (!efivars_kobject() || !efivar_supports_writes()) return 0; if (efivars_pstore_disable) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5114cae4ec97..fdd1db025dbf 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -176,11 +176,13 @@ static struct efivar_operations generic_ops; static int generic_ops_register(void) { generic_ops.get_variable = efi.get_variable; - generic_ops.set_variable = efi.set_variable; - generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; + if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE)) { + generic_ops.set_variable = efi.set_variable; + generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; + } return efivars_register(&generic_efivars, &generic_ops, efi_kobj); } @@ -382,7 +384,8 @@ static int __init efisubsys_init(void) return -ENOMEM; } - if (efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) { + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | + EFI_RT_SUPPORTED_GET_NEXT_VARIABLE_NAME)) { efivar_ssdt_load(); error = generic_ops_register(); if (error) @@ -416,7 +419,8 @@ static int __init efisubsys_init(void) err_remove_group: sysfs_remove_group(efi_kobj, &efi_subsys_attr_group); err_unregister: - if (efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | + EFI_RT_SUPPORTED_GET_NEXT_VARIABLE_NAME)) generic_ops_unregister(); err_put: kobject_put(efi_kobj); diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 26528a46d99e..dcea137142b3 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -680,11 +680,8 @@ int efivars_sysfs_init(void) struct kobject *parent_kobj = efivars_kobject(); int error = 0; - if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) - return -ENODEV; - /* No efivars has been registered yet */ - if (!parent_kobj) + if (!parent_kobj || !efivar_supports_writes()) return 0; printk(KERN_INFO "EFI Variables Facility v%s %s\n", EFIVARS_VERSION, diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 5f2a4d162795..973eef234b36 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -1229,3 +1229,9 @@ out: return rv; } EXPORT_SYMBOL_GPL(efivars_unregister); + +int efivar_supports_writes(void) +{ + return __efivars && __efivars->ops->set_variable; +} +EXPORT_SYMBOL_GPL(efivar_supports_writes); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 12c66f5d92dd..28bb5689333a 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -201,6 +201,9 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; + if (!efivar_supports_writes()) + sb->s_flags |= SB_RDONLY; + inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true); if (!inode) return -ENOMEM; @@ -252,9 +255,6 @@ static struct file_system_type efivarfs_type = { static __init int efivarfs_init(void) { - if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES)) - return -ENODEV; - if (!efivars_kobject()) return -ENODEV; diff --git a/include/linux/efi.h b/include/linux/efi.h index bb35f3305e55..05c47f857383 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -994,6 +994,7 @@ int efivars_register(struct efivars *efivars, int efivars_unregister(struct efivars *efivars); struct kobject *efivars_kobject(void); +int efivar_supports_writes(void); int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head); -- cgit v1.2.3 From c9a368f1c0fbe2e3a21ebf231caeae58b18b2681 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 8 Jul 2020 23:11:10 -0700 Subject: bpf: net: Avoid incorrect bpf_sk_reuseport_detach call bpf_sk_reuseport_detach is currently called when sk->sk_user_data is not NULL. It is incorrect because sk->sk_user_data may not be managed by the bpf's reuseport_array. It has been reported in [1] that, the bpf_sk_reuseport_detach() which is called from udp_lib_unhash() has corrupted the sk_user_data managed by l2tp. This patch solves it by using another bit (defined as SK_USER_DATA_BPF) of the sk_user_data pointer value. It marks that a sk_user_data is managed/owned by BPF. The patch depends on a PTRMASK introduced in commit f1ff5ce2cd5e ("net, sk_msg: Clear sk_user_data pointer on clone if tagged"). [ Note: sk->sk_user_data is used by bpf's reuseport_array only when a sk is added to the bpf's reuseport_array. i.e. doing setsockopt(SO_REUSEPORT) and having "sk->sk_reuseport == 1" alone will not stop sk->sk_user_data being used by other means. ] [1]: https://lore.kernel.org/netdev/20200706121259.GA20199@katalix.com/ Fixes: 5dc4c4b7d4e8 ("bpf: Introduce BPF_MAP_TYPE_REUSEPORT_SOCKARRAY") Reported-by: James Chapman Reported-by: syzbot+9f092552ba9a5efca5df@syzkaller.appspotmail.com Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Tested-by: James Chapman Acked-by: James Chapman Link: https://lore.kernel.org/bpf/20200709061110.4019316-1-kafai@fb.com --- include/net/sock.h | 3 ++- kernel/bpf/reuseport_array.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 3428619faae4..1183507df95b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -533,7 +533,8 @@ enum sk_pacing { * be copied. */ #define SK_USER_DATA_NOCOPY 1UL -#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY) +#define SK_USER_DATA_BPF 2UL /* Managed by BPF */ +#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF) /** * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index a95bc8d7e812..cae9d505e04a 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk) write_lock_bh(&sk->sk_callback_lock); sk_user_data = (uintptr_t)sk->sk_user_data; - if (sk_user_data) { + if (sk_user_data & SK_USER_DATA_BPF) { struct sock __rcu **socks; socks = (void *)(sk_user_data & SK_USER_DATA_PTRMASK); @@ -309,7 +309,8 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, if (err) goto put_file_unlock; - sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY; + sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY | + SK_USER_DATA_BPF; WRITE_ONCE(nsk->sk_user_data, (void *)sk_user_data); rcu_assign_pointer(array->ptrs[index], nsk); free_osk = osk; -- cgit v1.2.3 From 14b032b8f8fce03a546dcf365454bec8c4a58d7d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 9 Jul 2020 16:28:44 -0700 Subject: cgroup: Fix sock_cgroup_data on big-endian. In order for no_refcnt and is_data to be the lowest order two bits in the 'val' we have to pad out the bitfield of the u8. Fixes: ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()") Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4f1cd0edc57d..fee0b5547cd0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -792,6 +792,7 @@ struct sock_cgroup_data { struct { u8 is_data : 1; u8 no_refcnt : 1; + u8 unused : 6; u8 padding; u16 prioidx; u32 classid; @@ -801,6 +802,7 @@ struct sock_cgroup_data { u32 classid; u16 prioidx; u8 padding; + u8 unused : 6; u8 no_refcnt : 1; u8 is_data : 1; } __packed; -- cgit v1.2.3 From 88b3d5c90e9685be54dd5bc441970044020eca76 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 22 Jun 2020 09:03:31 +0300 Subject: net/mlx5e: Fix port buffers cell size value Device unit for port buffers size, xoff_threshold and xon_threshold is cells. Fix a bug in driver where cell unit size was hard-coded to 128 bytes. This hard-coded value is buggy, as it is wrong for some hardware versions. Driver to read cell size from SBCAM register and translate bytes to cell units accordingly. In order to fix the bug, this patch exposes SBCAM (Shared buffer capabilities mask) layout and defines. If SBCAM.cap_cell_size is valid, use it for all bytes to cells calculations. If not valid, fallback to 128. Cell size do not change on the fly per device. Instead of issuing SBCAM access reg command every time such translation is needed, cache it in mlx5e_dcbx as part of mlx5e_dcbnl_initialize(). Pass dcbx.port_buff_cell_sz as a param to every function that needs bytes to cells translation. While fixing the bug, move MLX5E_BUFFER_CELL_SHIFT macro to en_dcbnl.c, as it is only used by that file. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Eran Ben Elisha Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h | 1 + .../ethernet/mellanox/mlx5/core/en/port_buffer.c | 53 ++++++++++++---------- .../ethernet/mellanox/mlx5/core/en/port_buffer.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 19 ++++++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 28 ++++++++++++ 6 files changed, 78 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h index 7be6b2d36b60..9976de8b9047 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -29,6 +29,7 @@ struct mlx5e_dcbx { bool manual_buffer; u32 cable_len; u32 xoff; + u16 port_buff_cell_sz; }; #define MLX5E_MAX_DSCP (64) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index ae99fac08b53..673f1c82d381 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -34,6 +34,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); u32 total_used = 0; @@ -57,11 +58,11 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->buffer[i].epsb = MLX5_GET(bufferx_reg, buffer, epsb); port_buffer->buffer[i].size = - MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz; port_buffer->buffer[i].xon = - MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(bufferx_reg, buffer, xon_threshold) * port_buff_cell_sz; port_buffer->buffer[i].xoff = - MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz; total_used += port_buffer->buffer[i].size; mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i, @@ -73,7 +74,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, } port_buffer->port_buffer_size = - MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT; + MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; port_buffer->spare_buffer_size = port_buffer->port_buffer_size - total_used; @@ -88,9 +89,9 @@ out: static int port_set_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); - void *buffer; void *in; int err; int i; @@ -104,16 +105,18 @@ static int port_set_buffer(struct mlx5e_priv *priv, goto out; for (i = 0; i < MLX5E_MAX_BUFFER; i++) { - buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); - - MLX5_SET(bufferx_reg, buffer, size, - port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT); - MLX5_SET(bufferx_reg, buffer, lossy, - port_buffer->buffer[i].lossy); - MLX5_SET(bufferx_reg, buffer, xoff_threshold, - port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT); - MLX5_SET(bufferx_reg, buffer, xon_threshold, - port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT); + void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + u64 size = port_buffer->buffer[i].size; + u64 xoff = port_buffer->buffer[i].xoff; + u64 xon = port_buffer->buffer[i].xon; + + do_div(size, port_buff_cell_sz); + do_div(xoff, port_buff_cell_sz); + do_div(xon, port_buff_cell_sz); + MLX5_SET(bufferx_reg, buffer, size, size); + MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy); + MLX5_SET(bufferx_reg, buffer, xoff_threshold, xoff); + MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); } err = mlx5e_port_set_pbmc(mdev, in); @@ -143,7 +146,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) } static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, - u32 xoff, unsigned int max_mtu) + u32 xoff, unsigned int max_mtu, u16 port_buff_cell_sz) { int i; @@ -155,7 +158,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } if (port_buffer->buffer[i].size < - (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) { + (xoff + max_mtu + port_buff_cell_sz)) { pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n", i, port_buffer->buffer[i].size); return -ENOMEM; @@ -175,6 +178,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * @pfc_en: current pfc configuration * @buffer: current prio to buffer mapping * @xoff: xoff value + * @port_buff_cell_sz: port buffer cell_size * @port_buffer: port receive buffer configuration * @change: * @@ -189,7 +193,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * sets change to true if buffer configuration was modified. */ static int update_buffer_lossy(unsigned int max_mtu, - u8 pfc_en, u8 *buffer, u32 xoff, + u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz, struct mlx5e_port_buffer *port_buffer, bool *change) { @@ -225,7 +229,7 @@ static int update_buffer_lossy(unsigned int max_mtu, } if (changed) { - err = update_xoff_threshold(port_buffer, xoff, max_mtu); + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; @@ -262,6 +266,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 *buffer_size, u8 *prio2buffer) { + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5e_port_buffer port_buffer; u32 xoff = calculate_xoff(priv, mtu); bool update_prio2buffer = false; @@ -282,7 +287,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; } @@ -292,7 +297,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; @@ -304,7 +309,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz, xoff, &port_buffer, &update_buffer); if (err) return err; @@ -329,7 +334,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return -EINVAL; update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; } @@ -337,7 +342,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, /* Need to update buffer configuration if xoff value is changed */ if (!update_buffer && xoff != priv->dcbx.xoff) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, max_mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index 34f55b81a0de..80af7a5ac604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -36,7 +36,6 @@ #include "port.h" #define MLX5E_MAX_BUFFER 8 -#define MLX5E_BUFFER_CELL_SHIFT 7 #define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ #define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index bc102d094bbd..d20243d6a032 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1217,6 +1217,24 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) return 0; } +#define MLX5E_BUFFER_CELL_SHIFT 7 + +static u16 mlx5e_query_port_buffers_cell_size(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + if (mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_SBCAM, 0, 0)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + return MLX5_GET(sbcam_reg, out, cap_cell_size); +} + void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) { struct mlx5e_dcbx *dcbx = &priv->dcbx; @@ -1234,6 +1252,7 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) priv->dcbx.cap |= DCB_CAP_DCBX_HOST; + priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv); priv->dcbx.manual_buffer = false; priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 13c0e4556eda..1e6ca716635a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -147,6 +147,7 @@ enum { MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, MLX5_REG_MIRC = 0x9162, + MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ca1887dd0423..073b79eacc99 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9960,6 +9960,34 @@ struct mlx5_ifc_pptb_reg_bits { u8 untagged_buff[0x4]; }; +struct mlx5_ifc_sbcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + + u8 reserved_at_20[0x20]; + + u8 sb_access_reg_cap_mask[4][0x20]; + + u8 reserved_at_c0[0x80]; + + u8 sb_feature_cap_mask[4][0x20]; + + u8 reserved_at_1c0[0x40]; + + u8 cap_total_buffer_size[0x20]; + + u8 cap_cell_size[0x10]; + u8 cap_max_pg_buffers[0x8]; + u8 cap_num_pool_supported[0x8]; + + u8 reserved_at_240[0x8]; + u8 cap_sbsr_stat_size[0x8]; + u8 cap_max_tclass_data[0x8]; + u8 cap_max_cpu_ingress_tclass_sb[0x8]; +}; + struct mlx5_ifc_pbmc_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; -- cgit v1.2.3 From 8c080d3a974ad471d8324825851044284f1886c9 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:36:42 +0800 Subject: kgdb: enable arch to support XML packet. The XML packet could be supported by required architecture if the architecture defines CONFIG_HAVE_ARCH_KGDB_QXFER_PKT and implement its own kgdb_arch_handle_qxfer_pkt(). Except for the kgdb_arch_handle_qxfer_pkt(), the architecture also needs to record the feature supported by gdb stub into the kgdb_arch_gdb_stub_feature, and these features will be reported to host gdb when gdb stub receives the qSupported packet. Signed-off-by: Vincent Chen Acked-by: Daniel Thompson Signed-off-by: Palmer Dabbelt --- include/linux/kgdb.h | 11 +++++++++++ kernel/debug/gdbstub.c | 13 +++++++++++++ lib/Kconfig.kgdb | 5 +++++ 3 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 529116b0cabe..0e4e3a80d58c 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -176,6 +176,17 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code, char *remcom_out_buffer, struct pt_regs *regs); +/** + * kgdb_arch_handle_qxfer_pkt - Handle architecture specific GDB XML + * packets. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. + */ + +extern void +kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, + char *remcom_out_buffer); + /** * kgdb_call_nmi_hook - Call kgdb_nmicallback() on the current CPU * @ignored: This parameter is only here to match the prototype. diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 61774aec46b4..a790026e42d0 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -792,6 +792,19 @@ static void gdb_cmd_query(struct kgdb_state *ks) } break; #endif +#ifdef CONFIG_HAVE_ARCH_KGDB_QXFER_PKT + case 'S': + if (!strncmp(remcom_in_buffer, "qSupported:", 11)) + strcpy(remcom_out_buffer, kgdb_arch_gdb_stub_feature); + break; + case 'X': + if (!strncmp(remcom_in_buffer, "qXfer:", 6)) + kgdb_arch_handle_qxfer_pkt(remcom_in_buffer, + remcom_out_buffer); + break; +#endif + default: + break; } } diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index ffa7a76de086..256f2486f9bd 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -3,6 +3,11 @@ config HAVE_ARCH_KGDB bool +# set if architecture has the its kgdb_arch_handle_qxfer_pkt +# function to enable gdb stub to address XML packet sent from GDB. +config HAVE_ARCH_KGDB_QXFER_PKT + bool + menuconfig KGDB bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB -- cgit v1.2.3 From def0aa218e6d42231540329e6f5741fdec9e7da4 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:25 +0800 Subject: kgdb: Move the extern declaration kgdb_has_hit_break() to generic kgdb.h Currently, only riscv kgdb.c uses the kgdb_has_hit_break() to identify the kgdb breakpoint. It causes other architectures will encounter the "no previous prototype" warnings if the compile option has W=1. Moving the declaration of extern kgdb_has_hit_break() from risc-v kgdb.h to generic kgdb.h to avoid generating these warnings. Signed-off-by: Vincent Chen Acked-by: Daniel Thompson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kgdb.h | 1 - include/linux/kgdb.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 8177a457caff..f45889bbb965 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -19,7 +19,6 @@ #ifndef __ASSEMBLY__ -extern int kgdb_has_hit_break(unsigned long addr); extern unsigned long kgdb_compiled_break; static inline void arch_kgdb_breakpoint(void) diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 0e4e3a80d58c..477b8b7c908f 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -325,6 +325,7 @@ extern int kgdb_hex2mem(char *buf, char *mem, int count); extern int kgdb_isremovedbreak(unsigned long addr); extern void kgdb_schedule_breakpoint(void); +extern int kgdb_has_hit_break(unsigned long addr); extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, -- cgit v1.2.3 From f794db6841e5480208f0c3a3ac1df445a96b079e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 9 Jul 2020 14:08:51 +0200 Subject: virt: vbox: Fix VBGL_IOCTL_VMMDEV_REQUEST_BIG and _LOG req numbers to match upstream Until this commit the mainline kernel version (this version) of the vboxguest module contained a bug where it defined VBGL_IOCTL_VMMDEV_REQUEST_BIG and VBGL_IOCTL_LOG using _IOC(_IOC_READ | _IOC_WRITE, 'V', ...) instead of _IO(V, ...) as the out of tree VirtualBox upstream version does. Since the VirtualBox userspace bits are always built against VirtualBox upstream's headers, this means that so far the mainline kernel version of the vboxguest module has been failing these 2 ioctls with -ENOTTY. I guess that VBGL_IOCTL_VMMDEV_REQUEST_BIG is never used causing us to not hit that one and sofar the vboxguest driver has failed to actually log any log messages passed it through VBGL_IOCTL_LOG. This commit changes the VBGL_IOCTL_VMMDEV_REQUEST_BIG and VBGL_IOCTL_LOG defines to match the out of tree VirtualBox upstream vboxguest version, while keeping compatibility with the old wrong request defines so as to not break the kernel ABI in case someone has been using the old request defines. Fixes: f6ddd094f579 ("virt: Add vboxguest driver for Virtual Box Guest integration UAPI") Cc: stable@vger.kernel.org Acked-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200709120858.63928-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_core.c | 4 +++- drivers/virt/vboxguest/vboxguest_core.h | 15 +++++++++++++++ drivers/virt/vboxguest/vboxguest_linux.c | 3 ++- include/uapi/linux/vboxguest.h | 4 ++-- 4 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index b690a8a4bf9e..8fab04e76c14 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -1520,7 +1520,8 @@ int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data) /* For VMMDEV_REQUEST hdr->type != VBG_IOCTL_HDR_TYPE_DEFAULT */ if (req_no_size == VBG_IOCTL_VMMDEV_REQUEST(0) || - req == VBG_IOCTL_VMMDEV_REQUEST_BIG) + req == VBG_IOCTL_VMMDEV_REQUEST_BIG || + req == VBG_IOCTL_VMMDEV_REQUEST_BIG_ALT) return vbg_ioctl_vmmrequest(gdev, session, data); if (hdr->type != VBG_IOCTL_HDR_TYPE_DEFAULT) @@ -1558,6 +1559,7 @@ int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data) case VBG_IOCTL_HGCM_CALL(0): return vbg_ioctl_hgcm_call(gdev, session, f32bit, data); case VBG_IOCTL_LOG(0): + case VBG_IOCTL_LOG_ALT(0): return vbg_ioctl_log(data); } diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h index 4188c12b839f..77c3a9c8255d 100644 --- a/drivers/virt/vboxguest/vboxguest_core.h +++ b/drivers/virt/vboxguest/vboxguest_core.h @@ -15,6 +15,21 @@ #include #include "vmmdev.h" +/* + * The mainline kernel version (this version) of the vboxguest module + * contained a bug where it defined VBGL_IOCTL_VMMDEV_REQUEST_BIG and + * VBGL_IOCTL_LOG using _IOC(_IOC_READ | _IOC_WRITE, 'V', ...) instead + * of _IO(V, ...) as the out of tree VirtualBox upstream version does. + * + * These _ALT definitions keep compatibility with the wrong defines the + * mainline kernel version used for a while. + * Note the VirtualBox userspace bits have always been built against + * VirtualBox upstream's headers, so this is likely not necessary. But + * we must never break our ABI so we keep these around to be 100% sure. + */ +#define VBG_IOCTL_VMMDEV_REQUEST_BIG_ALT _IOC(_IOC_READ | _IOC_WRITE, 'V', 3, 0) +#define VBG_IOCTL_LOG_ALT(s) _IOC(_IOC_READ | _IOC_WRITE, 'V', 9, s) + struct vbg_session; /** VBox guest memory balloon. */ diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c index 6e8c0f1c1056..32c2c52f7e84 100644 --- a/drivers/virt/vboxguest/vboxguest_linux.c +++ b/drivers/virt/vboxguest/vboxguest_linux.c @@ -131,7 +131,8 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, * the need for a bounce-buffer and another copy later on. */ is_vmmdev_req = (req & ~IOCSIZE_MASK) == VBG_IOCTL_VMMDEV_REQUEST(0) || - req == VBG_IOCTL_VMMDEV_REQUEST_BIG; + req == VBG_IOCTL_VMMDEV_REQUEST_BIG || + req == VBG_IOCTL_VMMDEV_REQUEST_BIG_ALT; if (is_vmmdev_req) buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT, diff --git a/include/uapi/linux/vboxguest.h b/include/uapi/linux/vboxguest.h index 9cec58a6a5ea..f79d7abe27db 100644 --- a/include/uapi/linux/vboxguest.h +++ b/include/uapi/linux/vboxguest.h @@ -103,7 +103,7 @@ VMMDEV_ASSERT_SIZE(vbg_ioctl_driver_version_info, 24 + 20); /* IOCTL to perform a VMM Device request larger then 1KB. */ -#define VBG_IOCTL_VMMDEV_REQUEST_BIG _IOC(_IOC_READ | _IOC_WRITE, 'V', 3, 0) +#define VBG_IOCTL_VMMDEV_REQUEST_BIG _IO('V', 3) /** VBG_IOCTL_HGCM_CONNECT data structure. */ @@ -198,7 +198,7 @@ struct vbg_ioctl_log { } u; }; -#define VBG_IOCTL_LOG(s) _IOC(_IOC_READ | _IOC_WRITE, 'V', 9, s) +#define VBG_IOCTL_LOG(s) _IO('V', 9) /** VBG_IOCTL_WAIT_FOR_EVENTS data structure. */ -- cgit v1.2.3 From ec7bd78498f29680f536451fbdf9464e851273ed Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Jul 2020 12:42:58 -0700 Subject: driver core: Rename dev_links_info.defer_sync to defer_hook The defer_sync field is used as a hook to add the device to the deferred_sync list. Rename it so that it's more meaningful for the next patch that'll also use this field as a hook to a deferred_fw_devlink list. Signed-off-by: Saravana Kannan Reviewed-by: Rafael J. Wysocki Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200701194259.3337652-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 22 +++++++++++----------- include/linux/device.h | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index 35cc9896eb9e..d1d2cdc3a8d8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -754,11 +754,11 @@ static void __device_links_queue_sync_state(struct device *dev, */ dev->state_synced = true; - if (WARN_ON(!list_empty(&dev->links.defer_sync))) + if (WARN_ON(!list_empty(&dev->links.defer_hook))) return; get_device(dev); - list_add_tail(&dev->links.defer_sync, list); + list_add_tail(&dev->links.defer_hook, list); } /** @@ -776,8 +776,8 @@ static void device_links_flush_sync_list(struct list_head *list, { struct device *dev, *tmp; - list_for_each_entry_safe(dev, tmp, list, links.defer_sync) { - list_del_init(&dev->links.defer_sync); + list_for_each_entry_safe(dev, tmp, list, links.defer_hook) { + list_del_init(&dev->links.defer_hook); if (dev != dont_lock_dev) device_lock(dev); @@ -815,12 +815,12 @@ void device_links_supplier_sync_state_resume(void) if (defer_sync_state_count) goto out; - list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) { + list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_hook) { /* * Delete from deferred_sync list before queuing it to - * sync_list because defer_sync is used for both lists. + * sync_list because defer_hook is used for both lists. */ - list_del_init(&dev->links.defer_sync); + list_del_init(&dev->links.defer_hook); __device_links_queue_sync_state(dev, &sync_list); } out: @@ -838,8 +838,8 @@ late_initcall(sync_state_resume_initcall); static void __device_links_supplier_defer_sync(struct device *sup) { - if (list_empty(&sup->links.defer_sync) && dev_has_sync_state(sup)) - list_add_tail(&sup->links.defer_sync, &deferred_sync); + if (list_empty(&sup->links.defer_hook) && dev_has_sync_state(sup)) + list_add_tail(&sup->links.defer_hook, &deferred_sync); } static void device_link_drop_managed(struct device_link *link) @@ -1052,7 +1052,7 @@ void device_links_driver_cleanup(struct device *dev) WRITE_ONCE(link->status, DL_STATE_DORMANT); } - list_del_init(&dev->links.defer_sync); + list_del_init(&dev->links.defer_hook); __device_links_no_driver(dev); device_links_write_unlock(); @@ -2171,7 +2171,7 @@ void device_initialize(struct device *dev) INIT_LIST_HEAD(&dev->links.consumers); INIT_LIST_HEAD(&dev->links.suppliers); INIT_LIST_HEAD(&dev->links.needs_suppliers); - INIT_LIST_HEAD(&dev->links.defer_sync); + INIT_LIST_HEAD(&dev->links.defer_hook); dev->links.status = DL_DEV_NO_DRIVER; } EXPORT_SYMBOL_GPL(device_initialize); diff --git a/include/linux/device.h b/include/linux/device.h index 15460a5ac024..9bb2bc7bb8e3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -433,7 +433,7 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_sync: Hook to global list of devices that have deferred sync_state. + * @defer_hook: Hook to global list of devices that have deferred sync_state. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. @@ -442,7 +442,7 @@ struct dev_links_info { struct list_head suppliers; struct list_head consumers; struct list_head needs_suppliers; - struct list_head defer_sync; + struct list_head defer_hook; bool need_for_probe; enum dl_dev_state status; }; -- cgit v1.2.3 From 2451e746478a6a6e981cfa66b62b791ca93b90c8 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Jul 2020 12:42:59 -0700 Subject: driver core: Avoid deferred probe due to fw_devlink_pause/resume() With the earlier patch in this series, all devices that deferred probe due to fw_devlink_pause() will have their probes delayed till the deferred probe thread is kicked off during late_initcall. This will also affect all their consumers. This delayed probing in unnecessary. So this patch just keeps track of the devices that had their probe deferred due to fw_devlink_pause() and attempts to probe them once during fw_devlink_resume(). Fixes: 716a7a259690 ("driver core: fw_devlink: Add support for batching fwnode parsing") Signed-off-by: Saravana Kannan Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200701194259.3337652-4-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 21 +++++++++++++++++++++ include/linux/device.h | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index d1d2cdc3a8d8..05d414e9e8a4 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -50,6 +50,7 @@ static DEFINE_MUTEX(wfs_lock); static LIST_HEAD(deferred_sync); static unsigned int defer_sync_state_count = 1; static unsigned int defer_fw_devlink_count; +static LIST_HEAD(deferred_fw_devlink); static DEFINE_MUTEX(defer_fw_devlink_lock); static bool fw_devlink_is_permissive(void); @@ -1244,6 +1245,12 @@ static void fw_devlink_link_device(struct device *dev) fw_ret = -EAGAIN; } else { fw_ret = -ENODEV; + /* + * defer_hook is not used to add device to deferred_sync list + * until device is bound. Since deferred fw devlink also blocks + * probing, same list hook can be used for deferred_fw_devlink. + */ + list_add_tail(&dev->links.defer_hook, &deferred_fw_devlink); } if (fw_ret == -ENODEV) @@ -1312,6 +1319,9 @@ void fw_devlink_pause(void) */ void fw_devlink_resume(void) { + struct device *dev, *tmp; + LIST_HEAD(probe_list); + mutex_lock(&defer_fw_devlink_lock); if (!defer_fw_devlink_count) { WARN(true, "Unmatched fw_devlink pause/resume!"); @@ -1323,8 +1333,19 @@ void fw_devlink_resume(void) goto out; device_link_add_missing_supplier_links(); + list_splice_tail_init(&deferred_fw_devlink, &probe_list); out: mutex_unlock(&defer_fw_devlink_lock); + + /* + * bus_probe_device() can cause new devices to get added and they'll + * try to grab defer_fw_devlink_lock. So, this needs to be done outside + * the defer_fw_devlink_lock. + */ + list_for_each_entry_safe(dev, tmp, &probe_list, links.defer_hook) { + list_del_init(&dev->links.defer_hook); + bus_probe_device(dev); + } } /* Device links support end. */ diff --git a/include/linux/device.h b/include/linux/device.h index 9bb2bc7bb8e3..5efed864b387 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -433,7 +433,8 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_hook: Hook to global list of devices that have deferred sync_state. + * @defer_hook: Hook to global list of devices that have deferred sync_state or + * deferred fw_devlink. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. -- cgit v1.2.3 From b330966f79fb4fdc49183f58db113303695a750f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Jul 2020 14:45:41 +0200 Subject: fuse: reject options on reconfigure via fsconfig(2) Previous patch changed handling of remount/reconfigure to ignore all options, including those that are unknown to the fuse kernel fs. This was done for backward compatibility, but this likely only affects the old mount(2) API. The new fsconfig(2) based reconfiguration could possibly be improved. This would make the new API less of a drop in replacement for the old, OTOH this is a good chance to get rid of some weirdnesses in the old API. Several other behaviors might make sense: 1) unknown options are rejected, known options are ignored 2) unknown options are rejected, known options are rejected if the value is changed, allowed otherwise 3) all options are rejected Prior to the backward compatibility fix to ignore all options all known options were accepted (1), even if they change the value of a mount parameter; fuse_reconfigure() does not look at the config values set by fuse_parse_param(). To fix that we'd need to verify that the value provided is the same as set in the initial configuration (2). The major drawback is that this is much more complex than just rejecting all attempts at changing options (3); i.e. all options signify initial configuration values and don't make sense on reconfigure. This patch opts for (3) with the rationale that no mount options are reconfigurable in fuse. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 16 ++++++++++------ fs/namespace.c | 1 + include/linux/fs_context.h | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ba201bf5ffad..bba747520e9b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -477,12 +477,16 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fuse_fs_context *ctx = fc->fs_private; int opt; - /* - * Ignore options coming from mount(MS_REMOUNT) for backward - * compatibility. - */ - if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) - return 0; + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + /* + * Ignore options coming from mount(MS_REMOUNT) for backward + * compatibility. + */ + if (fc->oldapi) + return 0; + + return invalfc(fc, "No changes allowed in reconfigure"); + } opt = fs_parse(fc, fuse_fs_parameters, param, &result); if (opt < 0) diff --git a/fs/namespace.c b/fs/namespace.c index f30ed401cc6d..4a0f600a3328 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2603,6 +2603,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, if (IS_ERR(fc)) return PTR_ERR(fc); + fc->oldapi = true; err = parse_monolithic_mount_data(fc, data); if (!err) { down_write(&sb->s_umount); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 5f24fcbfbfb4..37e1e8f7f08d 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -109,6 +109,7 @@ struct fs_context { enum fs_context_phase phase:8; /* The phase the context is in */ bool need_free:1; /* Need to call ops->free() */ bool global:1; /* Goes into &init_user_ns */ + bool oldapi:1; /* Coming from mount(2) */ }; struct fs_context_operations { -- cgit v1.2.3 From 567f6a6eba0c09e5f502e0290e57651befa8aacb Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 14 Jul 2020 14:39:25 +0200 Subject: dma-direct: provide function to check physical memory area validity dma_coherent_ok() checks if a physical memory area fits a device's DMA constraints. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 1 + kernel/dma/direct.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 5184735a0fe8..ab2e20cba951 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -69,6 +69,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, u64 dma_direct_get_required_mask(struct device *dev); gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, u64 *phys_mask); +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 95866b647581..67f060b86a73 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -70,7 +70,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, return 0; } -static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { return phys_to_dma_direct(dev, phys) + size - 1 <= min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); -- cgit v1.2.3 From bd024e82e4cd95c7f1a475a55f99871936c2b2db Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 16 Jul 2020 12:28:16 +0100 Subject: asm-generic/mmiowb: Allow mmiowb_set_pending() when preemptible() Although mmiowb() is concerned only with serialising MMIO writes occuring in contexts where a spinlock is held, the call to mmiowb_set_pending() from the MMIO write accessors can occur in preemptible contexts, such as during driver probe() functions where ordering between CPUs is not usually a concern, assuming that the task migration path provides the necessary ordering guarantees. Unfortunately, the default implementation of mmiowb_set_pending() is not preempt-safe, as it makes use of a a per-cpu variable to track its internal state. This has been reported to generate the following splat on riscv: | BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 | caller is regmap_mmio_write32le+0x1c/0x46 | CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.8.0-rc3-hfu+ #1 | Call Trace: | walk_stackframe+0x0/0x7a | dump_stack+0x6e/0x88 | regmap_mmio_write32le+0x18/0x46 | check_preemption_disabled+0xa4/0xaa | regmap_mmio_write32le+0x18/0x46 | regmap_mmio_write+0x26/0x44 | regmap_write+0x28/0x48 | sifive_gpio_probe+0xc0/0x1da Although it's possible to fix the driver in this case, other splats have been seen from other drivers, including the infamous 8250 UART, and so it's better to address this problem in the mmiowb core itself. Fix mmiowb_set_pending() by using the raw_cpu_ptr() to get at the mmiowb state and then only updating the 'mmiowb_pending' field if we are not preemptible (i.e. we have a non-zero nesting count). Cc: Arnd Bergmann Cc: Paul Walmsley Cc: Guo Ren Cc: Michael Ellerman Reported-by: Palmer Dabbelt Reported-by: Emil Renner Berthing Tested-by: Emil Renner Berthing Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20200716112816.7356-1-will@kernel.org Signed-off-by: Will Deacon --- include/asm-generic/mmiowb.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/mmiowb.h b/include/asm-generic/mmiowb.h index 9439ff037b2d..5698fca3bf56 100644 --- a/include/asm-generic/mmiowb.h +++ b/include/asm-generic/mmiowb.h @@ -27,7 +27,7 @@ #include DECLARE_PER_CPU(struct mmiowb_state, __mmiowb_state); -#define __mmiowb_state() this_cpu_ptr(&__mmiowb_state) +#define __mmiowb_state() raw_cpu_ptr(&__mmiowb_state) #else #define __mmiowb_state() arch_mmiowb_state() #endif /* arch_mmiowb_state */ @@ -35,7 +35,9 @@ DECLARE_PER_CPU(struct mmiowb_state, __mmiowb_state); static inline void mmiowb_set_pending(void) { struct mmiowb_state *ms = __mmiowb_state(); - ms->mmiowb_pending = ms->nesting_count; + + if (likely(ms->nesting_count)) + ms->mmiowb_pending = ms->nesting_count; } static inline void mmiowb_spin_lock(void) -- cgit v1.2.3 From 7f5f81406e2b36785f1e25fe5209edd9dd3610d7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:37:25 -0700 Subject: rhashtable: drop duplicated word in Drop the doubled word "be" in a comment. Signed-off-by: Randy Dunlap Cc: Thomas Graf Cc: Herbert Xu Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 70ebef866cc8..d3432ee65de7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -33,7 +33,7 @@ * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has * the least significant bit set but otherwise stores the address of - * the hash bucket. This allows us to be be sure we've found the end + * the hash bucket. This allows us to be sure we've found the end * of the right list. * The value stored in the hash bucket has BIT(0) used as a lock bit. * This bit must be atomically set before any changes are made to -- cgit v1.2.3 From de2b41be8fcccb2f5b6c480d35df590476344201 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 21 Jul 2020 11:34:48 +0200 Subject: x86, vmlinux.lds: Page-align end of ..page_aligned sections On x86-32 the idt_table with 256 entries needs only 2048 bytes. It is page-aligned, but the end of the .bss..page_aligned section is not guaranteed to be page-aligned. As a result, objects from other .bss sections may end up on the same 4k page as the idt_table, and will accidentially get mapped read-only during boot, causing unexpected page-faults when the kernel writes to them. This could be worked around by making the objects in the page aligned sections page sized, but that's wrong. Explicit sections which store only page aligned objects have an implicit guarantee that the object is alone in the page in which it is placed. That works for all objects except the last one. That's inconsistent. Enforcing page sized objects for these sections would wreckage memory sanitizers, because the object becomes artificially larger than it should be and out of bound access becomes legit. Align the end of the .bss..page_aligned and .data..page_aligned section on page-size so all objects places in these sections are guaranteed to have their own page. [ tglx: Amended changelog ] Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200721093448.10417-1-joro@8bytes.org --- arch/x86/kernel/vmlinux.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3bfc8dd8a43d..9a03e5b23135 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -358,6 +358,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) + . = ALIGN(PAGE_SIZE); *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index db600ef218d7..052e0f05a984 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -341,7 +341,8 @@ #define PAGE_ALIGNED_DATA(page_align) \ . = ALIGN(page_align); \ - *(.data..page_aligned) + *(.data..page_aligned) \ + . = ALIGN(page_align); #define READ_MOSTLY_DATA(align) \ . = ALIGN(align); \ @@ -737,7 +738,9 @@ . = ALIGN(bss_align); \ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { \ BSS_FIRST_SECTIONS \ + . = ALIGN(PAGE_SIZE); \ *(.bss..page_aligned) \ + . = ALIGN(PAGE_SIZE); \ *(.dynbss) \ *(BSS_MAIN) \ *(COMMON) \ -- cgit v1.2.3 From aca7ed091117d9b4ce499855c383119afb2819a2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jul 2020 16:38:15 -0700 Subject: i2c: drop duplicated word in the header file Drop the doubled word "be" in a comment. Signed-off-by: Randy Dunlap Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b8b8963f8bb9..ee328cf80bd9 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -56,7 +56,7 @@ struct property_entry; * on a bus (or read from them). Apart from two basic transfer functions to * transmit one message at a time, a more complex version can be used to * transmit an arbitrary number of messages without interruption. - * @count must be be less than 64k since msg.len is u16. + * @count must be less than 64k since msg.len is u16. */ int i2c_transfer_buffer_flags(const struct i2c_client *client, char *buf, int count, u16 flags); -- cgit v1.2.3 From 5df96f2b9f58a5d2dc1f30fe7de75e197f2c25f2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 23 Jul 2020 10:42:09 -0400 Subject: dm integrity: fix integrity recalculation that is improperly skipped Commit adc0daad366b62ca1bce3e2958a40b0b71a8b8b3 ("dm: report suspended device during destroy") broke integrity recalculation. The problem is dm_suspended() returns true not only during suspend, but also during resume. So this race condition could occur: 1. dm_integrity_resume calls queue_work(ic->recalc_wq, &ic->recalc_work) 2. integrity_recalc (&ic->recalc_work) preempts the current thread 3. integrity_recalc calls if (unlikely(dm_suspended(ic->ti))) goto unlock_ret; 4. integrity_recalc exits and no recalculating is done. To fix this race condition, add a function dm_post_suspending that is only true during the postsuspend phase and use it instead of dm_suspended(). Signed-off-by: Mikulas Patocka Fixes: adc0daad366b ("dm: report suspended device during destroy") Cc: stable vger kernel org # v4.18+ Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 4 ++-- drivers/md/dm.c | 17 +++++++++++++++++ include/linux/device-mapper.h | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 81dc5ff08909..a83a1de1e03f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2420,7 +2420,7 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; /* the following test is not needed, but it tests the replay code */ - if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev) + if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) return; spin_lock_irq(&ic->endio_wait.lock); @@ -2481,7 +2481,7 @@ static void integrity_recalc(struct work_struct *w) next_chunk: - if (unlikely(dm_suspended(ic->ti))) + if (unlikely(dm_post_suspending(ic->ti))) goto unlock_ret; range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 52449afd58eb..5b9de2f71bb0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -143,6 +143,7 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr); #define DMF_NOFLUSH_SUSPENDING 5 #define DMF_DEFERRED_REMOVE 6 #define DMF_SUSPENDED_INTERNALLY 7 +#define DMF_POST_SUSPENDING 8 #define DM_NUMA_NODE NUMA_NO_NODE static int dm_numa_node = DM_NUMA_NODE; @@ -2408,6 +2409,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); set_bit(DMF_SUSPENDED, &md->flags); + set_bit(DMF_POST_SUSPENDING, &md->flags); dm_table_postsuspend_targets(map); } /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ @@ -2766,7 +2768,9 @@ retry: if (r) goto out_unlock; + set_bit(DMF_POST_SUSPENDING, &md->flags); dm_table_postsuspend_targets(map); + clear_bit(DMF_POST_SUSPENDING, &md->flags); out_unlock: mutex_unlock(&md->suspend_lock); @@ -2863,7 +2867,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE, DMF_SUSPENDED_INTERNALLY); + set_bit(DMF_POST_SUSPENDING, &md->flags); dm_table_postsuspend_targets(map); + clear_bit(DMF_POST_SUSPENDING, &md->flags); } static void __dm_internal_resume(struct mapped_device *md) @@ -3024,6 +3030,11 @@ int dm_suspended_md(struct mapped_device *md) return test_bit(DMF_SUSPENDED, &md->flags); } +static int dm_post_suspending_md(struct mapped_device *md) +{ + return test_bit(DMF_POST_SUSPENDING, &md->flags); +} + int dm_suspended_internally_md(struct mapped_device *md) { return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); @@ -3040,6 +3051,12 @@ int dm_suspended(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_suspended); +int dm_post_suspending(struct dm_target *ti) +{ + return dm_post_suspending_md(dm_table_get_md(ti->table)); +} +EXPORT_SYMBOL_GPL(dm_post_suspending); + int dm_noflush_suspending(struct dm_target *ti) { return __noflush_suspending(dm_table_get_md(ti->table)); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8750f2dc5613..73dec4b5d5be 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -426,6 +426,7 @@ const char *dm_device_name(struct mapped_device *md); int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); +int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); union map_info *dm_get_rq_mapinfo(struct request *rq); -- cgit v1.2.3 From 76be93fc0702322179bb0ea87295d820ee46ad14 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 23 Jul 2020 12:00:06 -0700 Subject: tcp: allow at most one TLP probe per flight Previously TLP may send multiple probes of new data in one flight. This happens when the sender is cwnd limited. After the initial TLP containing new data is sent, the sender receives another ACK that acks partial inflight. It may re-arm another TLP timer to send more, if no further ACK returns before the next TLP timeout (PTO) expires. The sender may send in theory a large amount of TLP until send queue is depleted. This only happens if the sender sees such irregular uncommon ACK pattern. But it is generally undesirable behavior during congestion especially. The original TLP design restrict only one TLP probe per inflight as published in "Reducing Web Latency: the Virtue of Gentle Aggression", SIGCOMM 2013. This patch changes TLP to send at most one probe per inflight. Note that if the sender is app-limited, TLP retransmits old data and did not have this issue. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++-- net/ipv4/tcp_input.c | 11 ++++++----- net/ipv4/tcp_output.c | 13 ++++++++----- 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9aac824c523c..a1bbaa1c1a3a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -220,7 +220,9 @@ struct tcp_sock { } rack; u16 advmss; /* Advertised MSS */ u8 compressed_ack; - u8 dup_ack_counter; + u8 dup_ack_counter:2, + tlp_retrans:1, /* TLP is a retransmission */ + unused:5; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ @@ -243,7 +245,7 @@ struct tcp_sock { save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ syn_smc:1; /* SYN includes SMC */ - u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9615e72656d1..518f04355fbf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3488,10 +3488,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } } -/* This routine deals with acks during a TLP episode. - * We mark the end of a TLP episode on receiving TLP dupack or when - * ack is after tlp_high_seq. - * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. +/* This routine deals with acks during a TLP episode and ends an episode by + * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { @@ -3500,7 +3498,10 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) if (before(ack, tp->tlp_high_seq)) return; - if (flag & FLAG_DSACKING_ACK) { + if (!tp->tlp_retrans) { + /* TLP of new data has been acknowledged */ + tp->tlp_high_seq = 0; + } else if (flag & FLAG_DSACKING_ACK) { /* This DSACK means original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; } else if (after(ack, tp->tlp_high_seq)) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5f5b2f0b0e60..0bc05d68cd74 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2624,6 +2624,11 @@ void tcp_send_loss_probe(struct sock *sk) int pcount; int mss = tcp_current_mss(sk); + /* At most one outstanding TLP */ + if (tp->tlp_high_seq) + goto rearm_timer; + + tp->tlp_retrans = 0; skb = tcp_send_head(sk); if (skb && tcp_snd_wnd_test(tp, skb, mss)) { pcount = tp->packets_out; @@ -2641,10 +2646,6 @@ void tcp_send_loss_probe(struct sock *sk) return; } - /* At most one outstanding TLP retransmission. */ - if (tp->tlp_high_seq) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; @@ -2666,10 +2667,12 @@ void tcp_send_loss_probe(struct sock *sk) if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; + tp->tlp_retrans = 1; + +probe_sent: /* Record snd_nxt for loss detection. */ tp->tlp_high_seq = tp->snd_nxt; -probe_sent: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; -- cgit v1.2.3 From 3bef735ad7b7d987069181e7b58588043cbd1509 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 23 Jul 2020 21:15:14 -0700 Subject: vfs/xattr: mm/shmem: kernfs: release simple xattr entry in a right way After commit fdc85222d58e ("kernfs: kvmalloc xattr value instead of kmalloc"), simple xattr entry is allocated with kvmalloc() instead of kmalloc(), so we should release it with kvfree() instead of kfree(). Fixes: fdc85222d58e ("kernfs: kvmalloc xattr value instead of kmalloc") Signed-off-by: Chengguang Xu Signed-off-by: Andrew Morton Acked-by: Hugh Dickins Acked-by: Tejun Heo Cc: Daniel Xu Cc: Chris Down Cc: Andreas Dilger Cc: Greg Kroah-Hartman Cc: Al Viro Cc: [5.7] Link: http://lkml.kernel.org/r/20200704051608.15043-1-cgxu519@mykernel.net Signed-off-by: Linus Torvalds --- include/linux/xattr.h | 3 ++- mm/shmem.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 47eaa34f8761..c5afaf8ca7a2 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -15,6 +15,7 @@ #include #include #include +#include #include struct inode; @@ -94,7 +95,7 @@ static inline void simple_xattrs_free(struct simple_xattrs *xattrs) list_for_each_entry_safe(xattr, node, &xattrs->head, list) { kfree(xattr->name); - kfree(xattr); + kvfree(xattr); } } diff --git a/mm/shmem.c b/mm/shmem.c index a0dbe62f8042..b2abca3f7f33 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3178,7 +3178,7 @@ static int shmem_initxattrs(struct inode *inode, new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, GFP_KERNEL); if (!new_xattr->name) { - kfree(new_xattr); + kvfree(new_xattr); return -ENOMEM; } -- cgit v1.2.3 From e0b3e0b1a04367fc15c07f44e78361545b55357c Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 23 Jul 2020 21:15:46 -0700 Subject: io-mapping: indicate mapping failure The !ATOMIC_IOMAP version of io_maping_init_wc will always return success, even when the ioremap fails. Since the ATOMIC_IOMAP version returns NULL when the init fails, and callers check for a NULL return on error this is unexpected. During a device probe, where the ioremap failed, a crash can look like this: BUG: unable to handle page fault for address: 0000000000210000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page Oops: 0002 [#1] PREEMPT SMP CPU: 0 PID: 177 Comm: RIP: 0010:fill_page_dma [i915] gen8_ppgtt_create [i915] i915_ppgtt_create [i915] intel_gt_init [i915] i915_gem_init [i915] i915_driver_probe [i915] pci_device_probe really_probe driver_probe_device The remap failure occurred much earlier in the probe. If it had been propagated, the driver would have exited with an error. Return NULL on ioremap failure. [akpm@linux-foundation.org: detect ioremap_wc() errors earlier] Fixes: cafaf14a5d8f ("io-mapping: Always create a struct to hold metadata about the io-mapping") Signed-off-by: Michael J. Ruhl Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Mike Rapoport Cc: Andy Shevchenko Cc: Chris Wilson Cc: Daniel Vetter Cc: Link: http://lkml.kernel.org/r/20200721171936.81563-1-michael.j.ruhl@intel.com Signed-off-by: Linus Torvalds --- include/linux/io-mapping.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0beaa3eba155..c75e4d3d8833 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -107,9 +107,12 @@ io_mapping_init_wc(struct io_mapping *iomap, resource_size_t base, unsigned long size) { + iomap->iomem = ioremap_wc(base, size); + if (!iomap->iomem) + return NULL; + iomap->base = base; iomap->size = size; - iomap->iomem = ioremap_wc(base, size); #if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */ iomap->prot = pgprot_noncached_wc(PAGE_KERNEL); #elif defined(pgprot_writecombine) -- cgit v1.2.3 From c2b69f24ebd166a13cdc9909b50f33228895998b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 24 Jul 2020 10:50:22 +1000 Subject: flow_offload: Move rhashtable inclusion to the source file I noticed that touching linux/rhashtable.h causes lib/vsprintf.c to be rebuilt. This dependency came through a bogus inclusion in the file net/flow_offload.h. This patch moves it to the right place. This patch also removes a lingering rhashtable inclusion in cls_api created by the same commit. Fixes: 4e481908c51b ("flow_offload: move tc indirect block to...") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/flow_offload.h | 1 - net/core/flow_offload.c | 1 + net/sched/cls_api.c | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 6315324b9dc2..3eaf25f68b79 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -5,7 +5,6 @@ #include #include #include -#include struct flow_match { struct flow_dissector *dissector; diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index b739cfab796e..2076219b8ba5 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -4,6 +4,7 @@ #include #include #include +#include struct flow_rule *flow_rule_alloc(unsigned int num_actions) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e62beec0d844..4619cb3cb0a8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3