From bb737bbe48bea9854455cb61ea1dc06e92ce586c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 20 Apr 2020 17:01:34 +0200 Subject: virtiofs: schedule blocking async replies in separate worker In virtiofs (unlike in regular fuse) processing of async replies is serialized. This can result in a deadlock in rare corner cases when there's a circular dependency between the completion of two or more async replies. Such a deadlock can be reproduced with xfstests:generic/503 if TEST_DIR == SCRATCH_MNT (which is a misconfiguration): - Process A is waiting for page lock in worker thread context and blocked (virtio_fs_requests_done_work()). - Process B is holding page lock and waiting for pending writes to finish (fuse_wait_on_page_writeback()). - Write requests are waiting in virtqueue and can't complete because worker thread is blocked on page lock (process A). Fix this by creating a unique work_struct for each async reply that can block (O_DIRECT read). Fixes: a62a8ef9d97d ("virtio-fs: add virtiofs filesystem") Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 1 + fs/fuse/fuse_i.h | 1 + fs/fuse/virtio_fs.c | 106 +++++++++++++++++++++++++++++++++++----------------- 3 files changed, 73 insertions(+), 35 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9d67b830fb7a..d400b71b98d5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -712,6 +712,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc, spin_unlock(&io->lock); ia->ap.args.end = fuse_aio_complete_req; + ia->ap.args.may_block = io->should_dirty; err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL); if (err) fuse_aio_complete_req(fc, &ia->ap.args, err); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ca344bf71404..d7cde216fc87 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -249,6 +249,7 @@ struct fuse_args { bool out_argvar:1; bool page_zeroing:1; bool page_replace:1; + bool may_block:1; struct fuse_in_arg in_args[3]; struct fuse_arg out_args[2]; void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index bade74768903..0c6ef5d3c6ab 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -60,6 +60,12 @@ struct virtio_fs_forget { struct virtio_fs_forget_req req; }; +struct virtio_fs_req_work { + struct fuse_req *req; + struct virtio_fs_vq *fsvq; + struct work_struct done_work; +}; + static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, struct fuse_req *req, bool in_flight); @@ -485,19 +491,67 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) } /* Work function for request completion */ +static void virtio_fs_request_complete(struct fuse_req *req, + struct virtio_fs_vq *fsvq) +{ + struct fuse_pqueue *fpq = &fsvq->fud->pq; + struct fuse_conn *fc = fsvq->fud->fc; + struct fuse_args *args; + struct fuse_args_pages *ap; + unsigned int len, i, thislen; + struct page *page; + + /* + * TODO verify that server properly follows FUSE protocol + * (oh.uniq, oh.len) + */ + args = req->args; + copy_args_from_argbuf(args, req); + + if (args->out_pages && args->page_zeroing) { + len = args->out_args[args->out_numargs - 1].size; + ap = container_of(args, typeof(*ap), args); + for (i = 0; i < ap->num_pages; i++) { + thislen = ap->descs[i].length; + if (len < thislen) { + WARN_ON(ap->descs[i].offset); + page = ap->pages[i]; + zero_user_segment(page, len, thislen); + len = 0; + } else { + len -= thislen; + } + } + } + + spin_lock(&fpq->lock); + clear_bit(FR_SENT, &req->flags); + spin_unlock(&fpq->lock); + + fuse_request_end(fc, req); + spin_lock(&fsvq->lock); + dec_in_flight_req(fsvq); + spin_unlock(&fsvq->lock); +} + +static void virtio_fs_complete_req_work(struct work_struct *work) +{ + struct virtio_fs_req_work *w = + container_of(work, typeof(*w), done_work); + + virtio_fs_request_complete(w->req, w->fsvq); + kfree(w); +} + static void virtio_fs_requests_done_work(struct work_struct *work) { struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, done_work); struct fuse_pqueue *fpq = &fsvq->fud->pq; - struct fuse_conn *fc = fsvq->fud->fc; struct virtqueue *vq = fsvq->vq; struct fuse_req *req; - struct fuse_args_pages *ap; struct fuse_req *next; - struct fuse_args *args; - unsigned int len, i, thislen; - struct page *page; + unsigned int len; LIST_HEAD(reqs); /* Collect completed requests off the virtqueue */ @@ -515,38 +569,20 @@ static void virtio_fs_requests_done_work(struct work_struct *work) /* End requests */ list_for_each_entry_safe(req, next, &reqs, list) { - /* - * TODO verify that server properly follows FUSE protocol - * (oh.uniq, oh.len) - */ - args = req->args; - copy_args_from_argbuf(args, req); - - if (args->out_pages && args->page_zeroing) { - len = args->out_args[args->out_numargs - 1].size; - ap = container_of(args, typeof(*ap), args); - for (i = 0; i < ap->num_pages; i++) { - thislen = ap->descs[i].length; - if (len < thislen) { - WARN_ON(ap->descs[i].offset); - page = ap->pages[i]; - zero_user_segment(page, len, thislen); - len = 0; - } else { - len -= thislen; - } - } - } - - spin_lock(&fpq->lock); - clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); - spin_unlock(&fpq->lock); - fuse_request_end(fc, req); - spin_lock(&fsvq->lock); - dec_in_flight_req(fsvq); - spin_unlock(&fsvq->lock); + /* blocking async request completes in a worker context */ + if (req->args->may_block) { + struct virtio_fs_req_work *w; + + w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); + INIT_WORK(&w->done_work, virtio_fs_complete_req_work); + w->fsvq = fsvq; + w->req = req; + schedule_work(&w->done_work); + } else { + virtio_fs_request_complete(req, fsvq); + } } } -- cgit v1.2.3 From 0e9fb6f17ad5b386b75451328975a07d7d953c6d Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 19 Aug 2019 09:53:50 +0300 Subject: fuse: BUG_ON correction in fuse_dev_splice_write() commit 963545357202 ("fuse: reduce allocation size for splice_write") changed size of bufs array, so BUG_ON which checks the index of the array shold also be fixed. [SzM: turn BUG_ON into WARN_ON] Fixes: 963545357202 ("fuse: reduce allocation size for splice_write") Signed-off-by: Vasily Averin Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 97eec7522bf2..5c155437a455 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1977,8 +1977,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, struct pipe_buffer *ibuf; struct pipe_buffer *obuf; - BUG_ON(nbuf >= pipe->ring_size); - BUG_ON(tail == head); + if (WARN_ON(nbuf >= count || tail == head)) + goto out_free; + ibuf = &pipe->bufs[tail & mask]; obuf = &bufs[nbuf]; -- cgit v1.2.3 From 75d892588e959573b321895003462d88cae2cff3 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 28 Feb 2020 15:15:24 +0300 Subject: fuse: Update stale comment in queue_interrupt() Fixes: 04ec5af0776e "fuse: export fuse_end_request()" Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5c155437a455..6bda7d498f1a 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -342,7 +342,7 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) list_add_tail(&req->intr_entry, &fiq->interrupts); /* * Pairs with smp_mb() implied by test_and_set_bit() - * from request_end(). + * from fuse_request_end(). */ smp_mb(); if (test_bit(FR_FINISHED, &req->flags)) { -- cgit v1.2.3 From 72ef5e52b3f74c0be47b20f5c434b7ecc830cf40 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 14 Apr 2020 18:48:35 +0200 Subject: docs: fix broken references to text files Several references got broken due to txt to ReST conversion. Several of them can be automatically fixed with: scripts/documentation-file-ref-check --fix Reviewed-by: Mathieu Poirier # hwtracing/coresight/Kconfig Reviewed-by: Paul E. McKenney # memory-barrier.txt Acked-by: Alex Shi # translations/zh_CN Acked-by: Federico Vaga # translations/it_IT Acked-by: Marc Zyngier # kvm/arm64 Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6f919ddb83a33b5f2a63b6b5f0575737bb2b36aa.1586881715.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/memory-barriers.txt | 2 +- Documentation/process/submit-checklist.rst | 2 +- .../translations/it_IT/process/submit-checklist.rst | 2 +- Documentation/translations/ko_KR/memory-barriers.txt | 2 +- Documentation/translations/zh_CN/filesystems/sysfs.txt | 2 +- .../translations/zh_CN/process/submit-checklist.rst | 2 +- Documentation/virt/kvm/arm/pvtime.rst | 2 +- Documentation/virt/kvm/devices/vcpu.rst | 2 +- Documentation/virt/kvm/hypercalls.rst | 4 ++-- arch/powerpc/include/uapi/asm/kvm_para.h | 2 +- drivers/gpu/drm/Kconfig | 2 +- drivers/gpu/drm/drm_ioctl.c | 2 +- drivers/hwtracing/coresight/Kconfig | 2 +- fs/fat/Kconfig | 8 ++++---- fs/fuse/Kconfig | 2 +- fs/fuse/dev.c | 2 +- fs/overlayfs/Kconfig | 6 +++--- include/linux/mm.h | 4 ++-- include/uapi/linux/ethtool_netlink.h | 2 +- include/uapi/rdma/rdma_user_ioctl_cmds.h | 2 +- mm/gup.c | 12 ++++++------ virt/kvm/arm/vgic/vgic-mmio-v3.c | 2 +- virt/kvm/arm/vgic/vgic.h | 4 ++-- 23 files changed, 36 insertions(+), 36 deletions(-) (limited to 'fs/fuse') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index e1c355e84edd..eaabc3134294 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -620,7 +620,7 @@ because the CPUs that the Linux kernel supports don't do writes until they are certain (1) that the write will actually happen, (2) of the location of the write, and (3) of the value to be written. But please carefully read the "CONTROL DEPENDENCIES" section and the -Documentation/RCU/rcu_dereference.txt file: The compiler can and does +Documentation/RCU/rcu_dereference.rst file: The compiler can and does break dependencies in a great many highly creative ways. CPU 1 CPU 2 diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst index 8e56337d422d..3f8e9d5d95c2 100644 --- a/Documentation/process/submit-checklist.rst +++ b/Documentation/process/submit-checklist.rst @@ -107,7 +107,7 @@ and elsewhere regarding submitting Linux kernel patches. and why. 26) If any ioctl's are added by the patch, then also update - ``Documentation/ioctl/ioctl-number.rst``. + ``Documentation/userspace-api/ioctl/ioctl-number.rst``. 27) If your modified source code depends on or uses any of the kernel APIs or features that are related to the following ``Kconfig`` symbols, diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst index 995ee69fab11..3e575502690f 100644 --- a/Documentation/translations/it_IT/process/submit-checklist.rst +++ b/Documentation/translations/it_IT/process/submit-checklist.rst @@ -117,7 +117,7 @@ sottomissione delle patch, in particolare sorgenti che ne spieghi la logica: cosa fanno e perché. 25) Se la patch aggiunge nuove chiamate ioctl, allora aggiornate - ``Documentation/ioctl/ioctl-number.rst``. + ``Documentation/userspace-api/ioctl/ioctl-number.rst``. 26) Se il codice che avete modificato dipende o usa una qualsiasi interfaccia o funzionalità del kernel che è associata a uno dei seguenti simboli diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 2e831ece6e26..e50fe6541335 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -641,7 +641,7 @@ P 는 짝수 번호 캐시 라인에 저장되어 있고, 변수 B 는 홀수 리눅스 커널이 지원하는 CPU 들은 (1) 쓰기가 정말로 일어날지, (2) 쓰기가 어디에 이루어질지, 그리고 (3) 쓰여질 값을 확실히 알기 전까지는 쓰기를 수행하지 않기 때문입니다. 하지만 "컨트롤 의존성" 섹션과 -Documentation/RCU/rcu_dereference.txt 파일을 주의 깊게 읽어 주시기 바랍니다: +Documentation/RCU/rcu_dereference.rst 파일을 주의 깊게 읽어 주시기 바랍니다: 컴파일러는 매우 창의적인 많은 방법으로 종속성을 깰 수 있습니다. CPU 1 CPU 2 diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt index ee1f37da5b23..a15c3ebdfa82 100644 --- a/Documentation/translations/zh_CN/filesystems/sysfs.txt +++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt @@ -281,7 +281,7 @@ drivers/ 包含了每个已为特定总线上的设备而挂载的驱动程序 假定驱动没有跨越多个总线类型)。 fs/ 包含了一个为文件系统设立的目录。现在每个想要导出属性的文件系统必须 -在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.txt)。 +在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.rst)。 dev/ 包含两个子目录: char/ 和 block/。在这两个子目录中,有以 : 格式命名的符号链接。这些符号链接指向 sysfs 目录 diff --git a/Documentation/translations/zh_CN/process/submit-checklist.rst b/Documentation/translations/zh_CN/process/submit-checklist.rst index 8738c55e42a2..50386e0e42e7 100644 --- a/Documentation/translations/zh_CN/process/submit-checklist.rst +++ b/Documentation/translations/zh_CN/process/submit-checklist.rst @@ -97,7 +97,7 @@ Linux内核补丁提交清单 24) 所有内存屏障例如 ``barrier()``, ``rmb()``, ``wmb()`` 都需要源代码中的注 释来解释它们正在执行的操作及其原因的逻辑。 -25) 如果补丁添加了任何ioctl,那么也要更新 ``Documentation/ioctl/ioctl-number.rst`` +25) 如果补丁添加了任何ioctl,那么也要更新 ``Documentation/userspace-api/ioctl/ioctl-number.rst`` 26) 如果修改后的源代码依赖或使用与以下 ``Kconfig`` 符号相关的任何内核API或 功能,则在禁用相关 ``Kconfig`` 符号和/或 ``=m`` (如果该选项可用)的情况 diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst index 2357dd2d8655..687b60d76ca9 100644 --- a/Documentation/virt/kvm/arm/pvtime.rst +++ b/Documentation/virt/kvm/arm/pvtime.rst @@ -76,5 +76,5 @@ It is advisable that one or more 64k pages are set aside for the purpose of these structures and not used for other purposes, this enables the guest to map the region using 64k pages and avoids conflicting attributes with other memory. -For the user space interface see Documentation/virt/kvm/devices/vcpu.txt +For the user space interface see Documentation/virt/kvm/devices/vcpu.rst section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL". diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 9963e680770a..ca374d3fe085 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -110,5 +110,5 @@ Returns: Specifies the base address of the stolen time structure for this VCPU. The base address must be 64 byte aligned and exist within a valid guest memory -region. See Documentation/virt/kvm/arm/pvtime.txt for more information +region. See Documentation/virt/kvm/arm/pvtime.rst for more information including the layout of the stolen time structure. diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst index dbaf207e560d..ed4fddd364ea 100644 --- a/Documentation/virt/kvm/hypercalls.rst +++ b/Documentation/virt/kvm/hypercalls.rst @@ -22,7 +22,7 @@ S390: number in R1. For further information on the S390 diagnose call as supported by KVM, - refer to Documentation/virt/kvm/s390-diag.txt. + refer to Documentation/virt/kvm/s390-diag.rst. PowerPC: It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers. @@ -30,7 +30,7 @@ PowerPC: KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions' property inside the device tree's /hypervisor node. - For more information refer to Documentation/virt/kvm/ppc-pv.txt + For more information refer to Documentation/virt/kvm/ppc-pv.rst MIPS: KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h index be48c2215fa2..a809b1b44ddf 100644 --- a/arch/powerpc/include/uapi/asm/kvm_para.h +++ b/arch/powerpc/include/uapi/asm/kvm_para.h @@ -31,7 +31,7 @@ * Struct fields are always 32 or 64 bit aligned, depending on them being 32 * or 64 bit wide respectively. * - * See Documentation/virt/kvm/ppc-pv.txt + * See Documentation/virt/kvm/ppc-pv.rst */ struct kvm_vcpu_arch_shared { __u64 scratch1; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 43594978958e..fb92be7e8aa7 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -161,7 +161,7 @@ config DRM_LOAD_EDID_FIRMWARE monitor are unable to provide appropriate EDID data. Since this feature is provided as a workaround for broken hardware, the default case is N. Details and instructions how to build your own - EDID data are given in Documentation/driver-api/edid.rst. + EDID data are given in Documentation/admin-guide/edid.rst. config DRM_DP_CEC bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support" diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 9e41972c4bbc..c2b8d2a953ae 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -741,7 +741,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = { * }; * * Please make sure that you follow all the best practices from - * ``Documentation/ioctl/botching-up-ioctls.rst``. Note that drm_ioctl() + * ``Documentation/process/botching-up-ioctls.rst``. Note that drm_ioctl() * automatically zero-extends structures, hence make sure you can add more stuff * at the end, i.e. don't put a variable sized array there. * diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig index 83e841be1081..02dbb5ca3bcf 100644 --- a/drivers/hwtracing/coresight/Kconfig +++ b/drivers/hwtracing/coresight/Kconfig @@ -107,7 +107,7 @@ config CORESIGHT_CPU_DEBUG can quickly get to know program counter (PC), secure state, exception level, etc. Before use debugging functionality, platform needs to ensure the clock domain and power domain are enabled - properly, please refer Documentation/trace/coresight-cpu-debug.rst + properly, please refer Documentation/trace/coresight/coresight-cpu-debug.rst for detailed description and the example for usage. config CORESIGHT_CTI diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index 718163d0c621..ca31993dcb47 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -69,7 +69,7 @@ config VFAT_FS The VFAT support enlarges your kernel by about 10 KB and it only works if you said Y to the "DOS FAT fs support" above. Please read - the file for details. If + the file for details. If unsure, say Y. To compile this as a module, choose M here: the module will be called @@ -82,7 +82,7 @@ config FAT_DEFAULT_CODEPAGE help This option should be set to the codepage of your FAT filesystems. It can be overridden with the "codepage" mount option. - See for more information. + See for more information. config FAT_DEFAULT_IOCHARSET string "Default iocharset for FAT" @@ -96,7 +96,7 @@ config FAT_DEFAULT_IOCHARSET Note that "utf8" is not recommended for FAT filesystems. If unsure, you shouldn't set "utf8" here - select the next option instead if you would like to use UTF-8 encoded file names by default. - See for more information. + See for more information. Enable any character sets you need in File Systems/Native Language Support. @@ -114,4 +114,4 @@ config FAT_DEFAULT_UTF8 Say Y if you use UTF-8 encoding for file names, N otherwise. - See for more information. + See for more information. diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index eb2a585572dc..774b2618018a 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -12,7 +12,7 @@ config FUSE_FS although chances are your distribution already has that library installed if you've installed the "fuse" package itself. - See for more information. + See for more information. See for needed library/utility version. If you want to develop a userspace FS, or if you want to use diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 97eec7522bf2..c7a65cf2bcca 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2081,7 +2081,7 @@ static void end_polls(struct fuse_conn *fc) * The same effect is usually achievable through killing the filesystem daemon * and all users of the filesystem. The exception is the combination of an * asynchronous request and the tricky deadlock (see - * Documentation/filesystems/fuse.txt). + * Documentation/filesystems/fuse.rst). * * Aborting requests under I/O goes as follows: 1: Separate out unlocked * requests, they should be finished off immediately. Locked requests will be diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 714c14c47ca5..dd188c7996b3 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -9,7 +9,7 @@ config OVERLAY_FS 'lower' filesystem is either hidden or, in the case of directories, merged with the 'upper' object. - For more information see Documentation/filesystems/overlayfs.txt + For more information see Documentation/filesystems/overlayfs.rst config OVERLAY_FS_REDIRECT_DIR bool "Overlayfs: turn on redirect directory feature by default" @@ -38,7 +38,7 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW If backward compatibility is not an issue, then it is safe and recommended to say N here. - For more information, see Documentation/filesystems/overlayfs.txt + For more information, see Documentation/filesystems/overlayfs.rst If unsure, say Y. @@ -103,7 +103,7 @@ config OVERLAY_FS_XINO_AUTO If compatibility with applications that expect 32bit inodes is not an issue, then it is safe and recommended to say Y here. - For more information, see Documentation/filesystems/overlayfs.txt + For more information, see Documentation/filesystems/overlayfs.rst If unsure, say N. diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a323422d783..1f2850465f59 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1219,7 +1219,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages); * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS * scheme). * - * For more information, please see Documentation/vm/pin_user_pages.rst. + * For more information, please see Documentation/core-api/pin_user_pages.rst. * * @page: pointer to page to be queried. * @Return: True, if it is likely that the page has been "dma-pinned". @@ -2834,7 +2834,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, * releasing pages: get_user_pages*() pages must be released via put_page(), * while pin_user_pages*() pages must be released via unpin_user_page(). * - * Please see Documentation/vm/pin_user_pages.rst for more information. + * Please see Documentation/core-api/pin_user_pages.rst for more information. */ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7fde76366ba4..1711e57f7848 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -2,7 +2,7 @@ /* * include/uapi/linux/ethtool_netlink.h - netlink interface for ethtool * - * See Documentation/networking/ethtool-netlink.txt in kernel source tree for + * See Documentation/networking/ethtool-netlink.rst in kernel source tree for * doucumentation of the interface. */ diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 7b1ec806f8f9..38ab7accb7be 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -36,7 +36,7 @@ #include #include -/* Documentation/ioctl/ioctl-number.rst */ +/* Documentation/userspace-api/ioctl/ioctl-number.rst */ #define RDMA_IOCTL_MAGIC 0x1b #define RDMA_VERBS_IOCTL \ _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) diff --git a/mm/gup.c b/mm/gup.c index 6076df8e04a4..81e4d0b377fd 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2843,9 +2843,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast); * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please - * see Documentation/vm/pin_user_pages.rst for further details. + * see Documentation/core-api/pin_user_pages.rst for further details. * - * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It + * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It * is NOT intended for Case 2 (RDMA: long-term pins). */ int pin_user_pages_fast(unsigned long start, int nr_pages, @@ -2883,9 +2883,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast); * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please - * see Documentation/vm/pin_user_pages.rst for details. + * see Documentation/core-api/pin_user_pages.rst for details. * - * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It + * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It * is NOT intended for Case 2 (RDMA: long-term pins). */ long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, @@ -2919,9 +2919,9 @@ EXPORT_SYMBOL(pin_user_pages_remote); * FOLL_PIN is set. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please - * see Documentation/vm/pin_user_pages.rst for details. + * see Documentation/core-api/pin_user_pages.rst for details. * - * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It + * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It * is NOT intended for Case 2 (RDMA: long-term pins). */ long pin_user_pages(unsigned long start, unsigned long nr_pages, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index e72dcc454247..859464fd413f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -301,7 +301,7 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, * pending state of interrupt is latched in pending_latch variable. * Userspace will save and restore pending state and line_level * separately. - * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.txt + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst * for handling of ISPENDR and ICPENDR. */ for (i = 0; i < len * 8; i++) { diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 769e4802645e..64fcd7511110 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -42,7 +42,7 @@ VGIC_AFFINITY_LEVEL(val, 3)) /* - * As per Documentation/virt/kvm/devices/arm-vgic-v3.txt, + * As per Documentation/virt/kvm/devices/arm-vgic-v3.rst, * below macros are defined for CPUREG encoding. */ #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 @@ -63,7 +63,7 @@ KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) /* - * As per Documentation/virt/kvm/devices/arm-vgic-its.txt, + * As per Documentation/virt/kvm/devices/arm-vgic-its.rst, * below macros are defined for ITS table entry encoding. */ #define KVM_ITS_CTE_VALID_SHIFT 63 -- cgit v1.2.3 From cf576c58b3a283333fc6e9a7c1c8e5342fa59b97 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 12 May 2020 10:29:04 +0800 Subject: fuse: invalidate inode attr in writeback cache mode Under writeback mode, inode->i_blocks is not updated, making utils du read st.blocks as 0. For example, when using virtiofs (cache=always & nondax mode) with writeback_cache enabled, writing a new file and check its disk usage with du, du reports 0 usage. # uname -r 5.6.0-rc6+ # mount -t virtiofs virtiofs /mnt/virtiofs # rm -f /mnt/virtiofs/testfile # create new file and do extend write # xfs_io -fc "pwrite 0 4k" /mnt/virtiofs/testfile wrote 4096/4096 bytes at offset 0 4 KiB, 1 ops; 0.0001 sec (28.103 MiB/sec and 7194.2446 ops/sec) # du -k /mnt/virtiofs/testfile 0 <==== disk usage is 0 # stat -c %s,%b /mnt/virtiofs/testfile 4096,0 <==== i_size is correct, but st_blocks is 0 Fix it by invalidating attr in fuse_flush(), so we get up-to-date attr from server on next getattr. Signed-off-by: Eryu Guan Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d400b71b98d5..262c5e20f324 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -445,8 +445,9 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (is_bad_inode(inode)) return -EIO; + err = 0; if (fc->no_flush) - return 0; + goto inval_attr_out; err = write_inode_now(inode, 1); if (err) @@ -475,6 +476,14 @@ static int fuse_flush(struct file *file, fl_owner_t id) fc->no_flush = 1; err = 0; } + +inval_attr_out: + /* + * In memory i_blocks is not maintained by fuse, if writeback cache is + * enabled, i_blocks from cached attr may not be accurate. + */ + if (!err && fc->writeback_cache) + fuse_invalidate_attr(inode); return err; } -- cgit v1.2.3 From 614c026e8a46636198da93ec30719f93975bb26a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 May 2020 14:50:37 +0200 Subject: fuse: always flush dirty data on close(2) We want cached data to synced with the userspace filesystem on close(), for example to allow getting correct st_blocks value. Do this regardless of whether the userspace filesystem implements a FLUSH method or not. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 262c5e20f324..4aa750d08d62 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -445,10 +445,6 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (is_bad_inode(inode)) return -EIO; - err = 0; - if (fc->no_flush) - goto inval_attr_out; - err = write_inode_now(inode, 1); if (err) return err; @@ -461,6 +457,10 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (err) return err; + err = 0; + if (fc->no_flush) + goto inval_attr_out; + memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.lock_owner = fuse_lock_owner_id(fc, id); -- cgit v1.2.3 From 5157da2ca42cbeca01709e29ce7b797cffed2432 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 May 2020 14:50:37 +0200 Subject: fuse: always allow query of st_dev Fuse mounts without "allow_other" are off-limits to all non-owners. Yet it makes sense to allow querying st_dev on the root, since this value is provided by the kernel, not the userspace filesystem. Allow statx(2) with a zero request mask to succeed on a fuse mounts for all users. Reported-by: Nikolaus Rath Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index de1e2fde60bd..26f028bc760b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1689,8 +1689,18 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); - if (!fuse_allow_current_process(fc)) + if (!fuse_allow_current_process(fc)) { + if (!request_mask) { + /* + * If user explicitly requested *nothing* then don't + * error out, but return st_dev only. + */ + stat->result_mask = 0; + stat->dev = inode->i_sb->s_dev; + return 0; + } return -EACCES; + } return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); } -- cgit v1.2.3 From 7fd3abfa8dd7c08ecacd25b2f9f9e1d3fb642440 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 4 May 2020 14:33:15 -0400 Subject: virtiofs: do not use fuse_fill_super_common() for device installation fuse_fill_super_common() allocates and installs one fuse_device. Hence virtiofs allocates and install all fuse devices by itself except one. This makes logic little twisted. There does not seem to be any real need that why virtiofs can't allocate and install all fuse devices itself. So opt out of fuse device allocation and installation while calling fuse_fill_super_common(). Regular fuse still wants fuse_fill_super_common() to install fuse_device. It needs to prevent against races where two mounters are trying to mount fuse using same fd. In that case one will succeed while other will get -EINVAL. virtiofs does not have this issue because sget_fc() resolves the race w.r.t multiple mounters and only one instance of virtio_fs_fill_super() should be in progress for same filesystem. Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 19 ++++++++++++------- fs/fuse/virtio_fs.c | 9 +++------ 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 95d712d44ca1..6fae66cc096a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1113,7 +1113,7 @@ EXPORT_SYMBOL_GPL(fuse_dev_free); int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) { - struct fuse_dev *fud; + struct fuse_dev *fud = NULL; struct fuse_conn *fc = get_fuse_conn_super(sb); struct inode *root; struct dentry *root_dentry; @@ -1155,9 +1155,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) if (sb->s_user_ns != &init_user_ns) sb->s_xattr = fuse_no_acl_xattr_handlers; - fud = fuse_dev_alloc_install(fc); - if (!fud) - goto err; + if (ctx->fudptr) { + err = -ENOMEM; + fud = fuse_dev_alloc_install(fc); + if (!fud) + goto err; + } fc->dev = sb->s_dev; fc->sb = sb; @@ -1191,7 +1194,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) mutex_lock(&fuse_mutex); err = -EINVAL; - if (*ctx->fudptr) + if (ctx->fudptr && *ctx->fudptr) goto err_unlock; err = fuse_ctl_add_conn(fc); @@ -1200,7 +1203,8 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - *ctx->fudptr = fud; + if (ctx->fudptr) + *ctx->fudptr = fud; mutex_unlock(&fuse_mutex); return 0; @@ -1208,7 +1212,8 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) mutex_unlock(&fuse_mutex); dput(root_dentry); err_dev_free: - fuse_dev_free(fud); + if (fud) + fuse_dev_free(fud); err: return err; } diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 0c6ef5d3c6ab..4c4ef5d69298 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1103,7 +1103,7 @@ static int virtio_fs_fill_super(struct super_block *sb) err = -ENOMEM; /* Allocate fuse_dev for hiprio and notification queues */ - for (i = 0; i < VQ_REQUEST; i++) { + for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; fsvq->fud = fuse_dev_alloc(); @@ -1111,18 +1111,15 @@ static int virtio_fs_fill_super(struct super_block *sb) goto err_free_fuse_devs; } - ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud; + /* virtiofs allocates and installs its own fuse devices */ + ctx.fudptr = NULL; err = fuse_fill_super_common(sb, &ctx); if (err < 0) goto err_free_fuse_devs; - fc = fs->vqs[VQ_REQUEST].fud->fc; - for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; - if (i == VQ_REQUEST) - continue; /* already initialized */ fuse_dev_install(fsvq->fud, fc); } -- cgit v1.2.3 From 00589386172ac577e64e3d67a3c5d4968174dcad Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 May 2020 14:50:37 +0200 Subject: fuse: use dump_page Instead of custom page dumping, use the standard helper. Reported-by: Matthew Wilcox Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6bda7d498f1a..1cfc68f8fea9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -772,8 +772,7 @@ static int fuse_check_page(struct page *page) 1 << PG_lru | 1 << PG_active | 1 << PG_reclaim))) { - pr_warn("trying to steal weird page\n"); - pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping); + dump_page(page, "fuse: trying to steal weird page"); return 1; } return 0; -- cgit v1.2.3 From a5005c3cda6eeb6b95645e6cc32f58dafeffc976 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 May 2020 14:50:37 +0200 Subject: fuse: fix weird page warning When PageWaiters was added, updating this check was missed. Reported-by: Nikolaus Rath Reported-by: Hugh Dickins Fixes: 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1cfc68f8fea9..b8962581c699 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -771,7 +771,8 @@ static int fuse_check_page(struct page *page) 1 << PG_uptodate | 1 << PG_lru | 1 << PG_active | - 1 << PG_reclaim))) { + 1 << PG_reclaim | + 1 << PG_waiters))) { dump_page(page, "fuse: trying to steal weird page"); return 1; } -- cgit v1.2.3 From 32f98877c57bee6bc27f443a96f49678a2cd6a50 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 May 2020 14:50:37 +0200 Subject: fuse: don't check refcount after stealing page page_count() is unstable. Unless there has been an RCU grace period between when the page was removed from the page cache and now, a speculative reference may exist from the page cache. Reported-by: Matthew Wilcox Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/fuse') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b8962581c699..ec97cabe51ce 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -764,7 +764,6 @@ static int fuse_check_page(struct page *page) { if (page_mapcount(page) || page->mapping != NULL || - page_count(page) != 1 || (page->flags & PAGE_FLAGS_CHECK_AT_PREP & ~(1 << PG_locked | 1 << PG_referenced | -- cgit v1.2.3 From 5ddd9ced9aef6cfa76af27d384c17c9e2d610ce8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 May 2020 14:50:38 +0200 Subject: fuse: update attr_version counter on fuse_notify_inval_inode() A GETATTR request can race with FUSE_NOTIFY_INVAL_INODE, resulting in the attribute cache being updated with stale information after the invalidation. Fix this by bumping the attribute version in fuse_reverse_inval_inode(). Reported-by: Krzysztof Rusek Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/fuse') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6fae66cc096a..5b4aebf5821f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -321,6 +321,8 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, loff_t offset, loff_t len) { + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct fuse_inode *fi; struct inode *inode; pgoff_t pg_start; pgoff_t pg_end; @@ -329,6 +331,11 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, if (!inode) return -ENOENT; + fi = get_fuse_inode(inode); + spin_lock(&fi->lock); + fi->attr_version = atomic64_inc_return(&fc->attr_version); + spin_unlock(&fi->lock); + fuse_invalidate_attr(inode); forget_all_cached_acls(inode); if (offset >= 0) { -- cgit v1.2.3 From 6b2fb79963fbed7db3ef850926d913518fd5c62f Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 19 Sep 2019 17:11:20 +0300 Subject: fuse: optimize writepages search Re-work fi->writepages, replacing list with rb-tree. This improves performance because kernel fuse iterates through fi->writepages for each writeback page and typical number of entries is about 800 (for 100MB of fuse writeback). Before patch: 10240+0 records in 10240+0 records out 10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s 2 1 0 57445400 40416 6323676 0 0 33 374743 8633 19210 1 8 88 3 0 29.86% [kernel] [k] _raw_spin_lock 26.62% [fuse] [k] fuse_page_is_writeback After patch: 10240+0 records in 10240+0 records out 10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s 2 9 0 53676040 31744 10265984 0 0 64 854790 10956 48387 1 6 88 6 0 23.55% [kernel] [k] copy_user_enhanced_fast_string 9.87% [kernel] [k] __memcpy 3.10% [kernel] [k] _raw_spin_lock Signed-off-by: Maxim Patlasov Signed-off-by: Vasily Averin Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 62 ++++++++++++++++++++++++++++++++++++++++++++------------ fs/fuse/fuse_i.h | 2 +- 2 files changed, 50 insertions(+), 14 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4aa750d08d62..15812a8b3834 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) struct fuse_writepage_args { struct fuse_io_args ia; - struct list_head writepages_entry; + struct rb_node writepages_entry; struct list_head queue_entry; struct fuse_writepage_args *next; struct inode *inode; @@ -366,17 +366,23 @@ struct fuse_writepage_args { static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi, pgoff_t idx_from, pgoff_t idx_to) { - struct fuse_writepage_args *wpa; + struct rb_node *n; + + n = fi->writepages.rb_node; - list_for_each_entry(wpa, &fi->writepages, writepages_entry) { + while (n) { + struct fuse_writepage_args *wpa; pgoff_t curr_index; + wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry); WARN_ON(get_fuse_inode(wpa->inode) != fi); curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; - if (idx_from < curr_index + wpa->ia.ap.num_pages && - curr_index <= idx_to) { + if (idx_from >= curr_index + wpa->ia.ap.num_pages) + n = n->rb_right; + else if (idx_to < curr_index) + n = n->rb_left; + else return wpa; - } } return NULL; } @@ -1624,7 +1630,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct backing_dev_info *bdi = inode_to_bdi(inode); int i; - list_del(&wpa->writepages_entry); + rb_erase(&wpa->writepages_entry, &fi->writepages); for (i = 0; i < ap->num_pages; i++) { dec_wb_stat(&bdi->wb, WB_WRITEBACK); dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); @@ -1712,6 +1718,36 @@ __acquires(fi->lock) } } +static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) +{ + pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT; + pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + + WARN_ON(!wpa->ia.ap.num_pages); + while (*p) { + struct fuse_writepage_args *curr; + pgoff_t curr_index; + + parent = *p; + curr = rb_entry(parent, struct fuse_writepage_args, + writepages_entry); + WARN_ON(curr->inode != wpa->inode); + curr_index = curr->ia.write.in.offset >> PAGE_SHIFT; + + if (idx_from >= curr_index + curr->ia.ap.num_pages) + p = &(*p)->rb_right; + else if (idx_to < curr_index) + p = &(*p)->rb_left; + else + return (void) WARN_ON(true); + } + + rb_link_node(&wpa->writepages_entry, parent, p); + rb_insert_color(&wpa->writepages_entry, root); +} + static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, int error) { @@ -1730,7 +1766,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args, wpa->next = next->next; next->next = NULL; next->ia.ff = fuse_file_get(wpa->ia.ff); - list_add(&next->writepages_entry, &fi->writepages); + tree_insert(&fi->writepages, next); /* * Skip fuse_flush_writepages() to make it easy to crop requests @@ -1865,7 +1901,7 @@ static int fuse_writepage_locked(struct page *page) inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); spin_lock(&fi->lock); - list_add(&wpa->writepages_entry, &fi->writepages); + tree_insert(&fi->writepages, wpa); list_add_tail(&wpa->queue_entry, &fi->queued_writes); fuse_flush_writepages(inode); spin_unlock(&fi->lock); @@ -1977,10 +2013,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa, WARN_ON(new_ap->num_pages != 0); spin_lock(&fi->lock); - list_del(&new_wpa->writepages_entry); + rb_erase(&new_wpa->writepages_entry, &fi->writepages); old_wpa = fuse_find_writeback(fi, page->index, page->index); if (!old_wpa) { - list_add(&new_wpa->writepages_entry, &fi->writepages); + tree_insert(&fi->writepages, new_wpa); spin_unlock(&fi->lock); return false; } @@ -2095,7 +2131,7 @@ static int fuse_writepages_fill(struct page *page, wpa->inode = inode; spin_lock(&fi->lock); - list_add(&wpa->writepages_entry, &fi->writepages); + tree_insert(&fi->writepages, wpa); spin_unlock(&fi->lock); data->wpa = wpa; @@ -3405,5 +3441,5 @@ void fuse_init_file_inode(struct inode *inode) INIT_LIST_HEAD(&fi->queued_writes); fi->writectr = 0; init_waitqueue_head(&fi->page_waitq); - INIT_LIST_HEAD(&fi->writepages); + fi->writepages = RB_ROOT; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d7cde216fc87..740a8a7d7ae6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -111,7 +111,7 @@ struct fuse_inode { wait_queue_head_t page_waitq; /* List of writepage requestst (pending or sent) */ - struct list_head writepages; + struct rb_root writepages; }; /* readdir cache (directory only) */ -- cgit v1.2.3 From 2c4656dfd994538176db30ce09c02cc0dfc361ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 20 May 2020 11:39:35 +0200 Subject: fuse: fix copy_file_range cache issues a) Dirty cache needs to be written back not just in the writeback_cache case, since the dirty pages may come from memory maps. b) The fuse_writeback_range() helper takes an inclusive interval, so the end position needs to be pos+len-1 instead of pos+len. Fixes: 88bc7d5097a1 ("fuse: add support for copy_file_range()") Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 15812a8b3834..d365008fda44 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3325,13 +3325,11 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - if (fc->writeback_cache) { - inode_lock(inode_in); - err = fuse_writeback_range(inode_in, pos_in, pos_in + len); - inode_unlock(inode_in); - if (err) - return err; - } + inode_lock(inode_in); + err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1); + inode_unlock(inode_in); + if (err) + return err; inode_lock(inode_out); @@ -3339,11 +3337,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (err) goto out; - if (fc->writeback_cache) { - err = fuse_writeback_range(inode_out, pos_out, pos_out + len); - if (err) - goto out; - } + err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1); + if (err) + goto out; if (is_unstable) set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); -- cgit v1.2.3 From 9b46418c40fe910e6537618f9932a8be78a3dd6c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 20 May 2020 11:39:35 +0200 Subject: fuse: copy_file_range should truncate cache After the copy operation completes the cache is not up-to-date. Truncate all pages in the interval that has successfully been copied. Truncating completely copied dirty pages is okay, since the data has been overwritten anyway. Truncating partially copied dirty pages is not okay; add a comment for now. Fixes: 88bc7d5097a1 ("fuse: add support for copy_file_range()") Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d365008fda44..336d1cf72da0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3337,6 +3337,24 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (err) goto out; + /* + * Write out dirty pages in the destination file before sending the COPY + * request to userspace. After the request is completed, truncate off + * pages (including partial ones) from the cache that have been copied, + * since these contain stale data at that point. + * + * This should be mostly correct, but if the COPY writes to partial + * pages (at the start or end) and the parts not covered by the COPY are + * written through a memory map after calling fuse_writeback_range(), + * then these partial page modifications will be lost on truncation. + * + * It is unlikely that someone would rely on such mixed style + * modifications. Yet this does give less guarantees than if the + * copying was performed with write(2). + * + * To fix this a i_mmap_sem style lock could be used to prevent new + * faults while the copy is ongoing. + */ err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1); if (err) goto out; @@ -3360,6 +3378,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (err) goto out; + truncate_inode_pages_range(inode_out->i_mapping, + ALIGN_DOWN(pos_out, PAGE_SIZE), + ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); + if (fc->writeback_cache) { fuse_write_update_size(inode_out, pos_out + outarg.size); file_update_time(file_out); -- cgit v1.2.3 From c928f642c29a5ffb02e16f2430b42b876dde69de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 May 2020 17:58:16 +0200 Subject: fs: rename pipe_buf ->steal to ->try_steal And replace the arcane return value convention with a simple bool where true means success and false means failure. [AV: braino fix folded in] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- drivers/char/virtio_console.c | 2 +- fs/fuse/dev.c | 2 +- fs/pipe.c | 34 ++++++++++++++++------------------ fs/splice.c | 40 ++++++++++++++++++++-------------------- include/linux/pipe_fs_i.h | 34 +++++++++++++++++----------------- kernel/relay.c | 6 +++--- 6 files changed, 58 insertions(+), 60 deletions(-) (limited to 'fs/fuse') diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 3cbaec925606..00c5e3acee46 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -871,7 +871,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, return 0; /* Try lock this page */ - if (pipe_buf_steal(pipe, buf) == 0) { + if (pipe_buf_try_steal(pipe, buf)) { /* Get reference and unlock page for moving */ get_page(buf->page); unlock_page(buf->page); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 97eec7522bf2..7a2b2de87b1f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -805,7 +805,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (cs->len != PAGE_SIZE) goto out_fallback; - if (pipe_buf_steal(cs->pipe, buf) != 0) + if (!pipe_buf_try_steal(cs->pipe, buf)) goto out_fallback; newpage = buf->page; diff --git a/fs/pipe.c b/fs/pipe.c index 58890897402a..c7c4fb5f345f 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -140,21 +140,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, put_page(page); } -static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { struct page *page = buf->page; - if (page_count(page) == 1) { - memcg_kmem_uncharge_page(page, 0); - __SetPageLocked(page); - return 0; - } - return 1; + if (page_count(page) != 1) + return false; + memcg_kmem_uncharge_page(page, 0); + __SetPageLocked(page); + return true; } /** - * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer + * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal * @@ -165,8 +164,8 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, * he wishes; the typical use is insertion into a different file * page cache. */ -int generic_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { struct page *page = buf->page; @@ -177,12 +176,11 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, */ if (page_count(page) == 1) { lock_page(page); - return 0; + return true; } - - return 1; + return false; } -EXPORT_SYMBOL(generic_pipe_buf_steal); +EXPORT_SYMBOL(generic_pipe_buf_try_steal); /** * generic_pipe_buf_get - get a reference to a &struct pipe_buffer @@ -216,9 +214,9 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, EXPORT_SYMBOL(generic_pipe_buf_release); static const struct pipe_buf_operations anon_pipe_buf_ops = { - .release = anon_pipe_buf_release, - .steal = anon_pipe_buf_steal, - .get = generic_pipe_buf_get, + .release = anon_pipe_buf_release, + .try_steal = anon_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ diff --git a/fs/splice.c b/fs/splice.c index bc834073cf74..2c2f45a9afc0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -44,8 +44,8 @@ * addition of remove_mapping(). If success is returned, the caller may * attempt to reuse this page for another destination. */ -static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { struct page *page = buf->page; struct address_space *mapping; @@ -76,7 +76,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, */ if (remove_mapping(mapping, page)) { buf->flags |= PIPE_BUF_FLAG_LRU; - return 0; + return true; } } @@ -86,7 +86,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, */ out_unlock: unlock_page(page); - return 1; + return false; } static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, @@ -139,26 +139,26 @@ error: } const struct pipe_buf_operations page_cache_pipe_buf_ops = { - .confirm = page_cache_pipe_buf_confirm, - .release = page_cache_pipe_buf_release, - .steal = page_cache_pipe_buf_steal, - .get = generic_pipe_buf_get, + .confirm = page_cache_pipe_buf_confirm, + .release = page_cache_pipe_buf_release, + .try_steal = page_cache_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; -static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) - return 1; + return false; buf->flags |= PIPE_BUF_FLAG_LRU; - return generic_pipe_buf_steal(pipe, buf); + return generic_pipe_buf_try_steal(pipe, buf); } static const struct pipe_buf_operations user_page_pipe_buf_ops = { - .release = page_cache_pipe_buf_release, - .steal = user_page_pipe_buf_steal, - .get = generic_pipe_buf_get, + .release = page_cache_pipe_buf_release, + .try_steal = user_page_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; static void wakeup_pipe_readers(struct pipe_inode_info *pipe) @@ -330,15 +330,15 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, EXPORT_SYMBOL(generic_file_splice_read); const struct pipe_buf_operations default_pipe_buf_ops = { - .release = generic_pipe_buf_release, - .steal = generic_pipe_buf_steal, - .get = generic_pipe_buf_get, + .release = generic_pipe_buf_release, + .try_steal = generic_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { - .release = generic_pipe_buf_release, - .get = generic_pipe_buf_get, + .release = generic_pipe_buf_release, + .get = generic_pipe_buf_get, }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 7c057daa0931..0c31b9461262 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -70,11 +70,11 @@ struct pipe_inode_info { * Note on the nesting of these functions: * * ->confirm() - * ->steal() + * ->try_steal() * - * That is, ->steal() must be called on a confirmed buffer. - * See below for the meaning of each operation. Also see kerneldoc - * in fs/pipe.c for the pipe and generic variants of these hooks. + * That is, ->try_steal() must be called on a confirmed buffer. See below for + * the meaning of each operation. Also see the kerneldoc in fs/pipe.c for the + * pipe and generic variants of these hooks. */ struct pipe_buf_operations { /* @@ -94,13 +94,13 @@ struct pipe_buf_operations { /* * Attempt to take ownership of the pipe buffer and its contents. - * ->steal() returns 0 for success, in which case the contents - * of the pipe (the buf->page) is locked and now completely owned - * by the caller. The page may then be transferred to a different - * mapping, the most often used case is insertion into different - * file address space cache. + * ->try_steal() returns %true for success, in which case the contents + * of the pipe (the buf->page) is locked and now completely owned by the + * caller. The page may then be transferred to a different mapping, the + * most often used case is insertion into different file address space + * cache. */ - int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); + bool (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *); /* * Get a reference to the pipe buffer. @@ -201,16 +201,16 @@ static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, } /** - * pipe_buf_steal - attempt to take ownership of a pipe_buffer + * pipe_buf_try_steal - attempt to take ownership of a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal */ -static inline int pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { - if (!buf->ops->steal) - return 1; - return buf->ops->steal(pipe, buf); + if (!buf->ops->try_steal) + return false; + return buf->ops->try_steal(pipe, buf); } /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual @@ -234,7 +234,7 @@ void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); -int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +bool generic_pipe_buf_try_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; diff --git a/kernel/relay.c b/kernel/relay.c index c5ece4c2b04d..5c2263f3f857 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1177,9 +1177,9 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe, } static const struct pipe_buf_operations relay_pipe_buf_ops = { - .release = relay_pipe_buf_release, - .steal = generic_pipe_buf_steal, - .get = generic_pipe_buf_get, + .release = relay_pipe_buf_release, + .try_steal = generic_pipe_buf_try_steal, + .get = generic_pipe_buf_get, }; static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i) -- cgit v1.2.3 From 76a0294eb19b5f909b119500e60d72ef41cc4d8e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 1 Jun 2020 21:47:31 -0700 Subject: fuse: convert from readpages to readahead Implement the new readahead operation in fuse by using __readahead_batch() to fill the array of pages in fuse_args_pages directly. This lets us inline fuse_readpages_fill() into fuse_readahead(). [willy@infradead.org: build fix] Link: http://lkml.kernel.org/r/20200415025938.GB5820@bombadil.infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Dave Chinner Reviewed-by: William Kucharski Acked-by: Miklos Szeredi Cc: Chao Yu Cc: Christoph Hellwig Cc: Cong Wang Cc: Darrick J. Wong Cc: Eric Biggers Cc: Gao Xiang Cc: Jaegeuk Kim Cc: John Hubbard Cc: Joseph Qi Cc: Junxiao Bi Cc: Michal Hocko Cc: Zi Yan Cc: Johannes Thumshirn Link: http://lkml.kernel.org/r/20200414150233.24495-25-willy@infradead.org Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 100 ++++++++++++++++----------------------------------------- 1 file changed, 28 insertions(+), 72 deletions(-) (limited to 'fs/fuse') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9d67b830fb7a..bac51c32d660 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -915,84 +915,40 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) fuse_readpages_end(fc, &ap->args, err); } -struct fuse_fill_data { - struct fuse_io_args *ia; - struct file *file; - struct inode *inode; - unsigned int nr_pages; - unsigned int max_pages; -}; - -static int fuse_readpages_fill(void *_data, struct page *page) +static void fuse_readahead(struct readahead_control *rac) { - struct fuse_fill_data *data = _data; - struct fuse_io_args *ia = data->ia; - struct fuse_args_pages *ap = &ia->ap; - struct inode *inode = data->inode; + struct inode *inode = rac->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); + unsigned int i, max_pages, nr_pages = 0; - fuse_wait_on_page_writeback(inode, page->index); - - if (ap->num_pages && - (ap->num_pages == fc->max_pages || - (ap->num_pages + 1) * PAGE_SIZE > fc->max_read || - ap->pages[ap->num_pages - 1]->index + 1 != page->index)) { - data->max_pages = min_t(unsigned int, data->nr_pages, - fc->max_pages); - fuse_send_readpages(ia, data->file); - data->ia = ia = fuse_io_alloc(NULL, data->max_pages); - if (!ia) { - unlock_page(page); - return -ENOMEM; - } - ap = &ia->ap; - } - - if (WARN_ON(ap->num_pages >= data->max_pages)) { - unlock_page(page); - fuse_io_free(ia); - return -EIO; - } - - get_page(page); - ap->pages[ap->num_pages] = page; - ap->descs[ap->num_pages].length = PAGE_SIZE; - ap->num_pages++; - data->nr_pages--; - return 0; -} - -static int fuse_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - struct inode *inode = mapping->host; - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_fill_data data; - int err; - - err = -EIO; if (is_bad_inode(inode)) - goto out; + return; - data.file = file; - data.inode = inode; - data.nr_pages = nr_pages; - data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages); -; - data.ia = fuse_io_alloc(NULL, data.max_pages); - err = -ENOMEM; - if (!data.ia) - goto out; + max_pages = min_t(unsigned int, fc->max_pages, + fc->max_read / PAGE_SIZE); - err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); - if (!err) { - if (data.ia->ap.num_pages) - fuse_send_readpages(data.ia, file); - else - fuse_io_free(data.ia); + for (;;) { + struct fuse_io_args *ia; + struct fuse_args_pages *ap; + + nr_pages = readahead_count(rac) - nr_pages; + if (nr_pages > max_pages) + nr_pages = max_pages; + if (nr_pages == 0) + break; + ia = fuse_io_alloc(NULL, nr_pages); + if (!ia) + return; + ap = &ia->ap; + nr_pages = __readahead_batch(rac, ap->pages, nr_pages); + for (i = 0; i < nr_pages; i++) { + fuse_wait_on_page_writeback(inode, + readahead_index(rac) + i); + ap->descs[i].length = PAGE_SIZE; + } + ap->num_pages = nr_pages; + fuse_send_readpages(ia, rac->file); } -out: - return err; } static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -3373,10 +3329,10 @@ static const struct file_operations fuse_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, + .readahead = fuse_readahead, .writepage = fuse_writepage, .writepages = fuse_writepages, .launder_page = fuse_launder_page, - .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, .direct_IO = fuse_direct_IO, -- cgit v1.2.3 From 6058eaec816f29fbe33c9d35694614c9a4ed75ba Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 Jun 2020 16:02:40 -0700 Subject: mm: fold and remove lru_cache_add_anon() and lru_cache_add_file() They're the same function, and for the purpose of all callers they are equivalent to lru_cache_add(). [akpm@linux-foundation.org: fix it for local_lock changes] Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Reviewed-by: Rik van Riel Acked-by: Michal Hocko Acked-by: Minchan Kim Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200520232525.798933-5-hannes@cmpxchg.org Signed-off-by: Linus Torvalds --- fs/cifs/file.c | 10 +++++----- fs/fuse/dev.c | 2 +- include/linux/swap.h | 2 -- mm/khugepaged.c | 8 ++------ mm/memory.c | 2 +- mm/shmem.c | 6 +++--- mm/swap.c | 42 ++++++++++-------------------------------- mm/swap_state.c | 2 +- 8 files changed, 23 insertions(+), 51 deletions(-) (limited to 'fs/fuse') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 75ddce8ef456..17a4f49c34f5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4162,7 +4162,7 @@ cifs_readv_complete(struct work_struct *work) for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; - lru_cache_add_file(page); + lru_cache_add(page); if (rdata->result == 0 || (rdata->result == -EAGAIN && got_bytes)) { @@ -4232,7 +4232,7 @@ readpages_fill_pages(struct TCP_Server_Info *server, * fill them until the writes are flushed. */ zero_user(page, 0, PAGE_SIZE); - lru_cache_add_file(page); + lru_cache_add(page); flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); @@ -4242,7 +4242,7 @@ readpages_fill_pages(struct TCP_Server_Info *server, continue; } else { /* no need to hold page hostage */ - lru_cache_add_file(page); + lru_cache_add(page); unlock_page(page); put_page(page); rdata->pages[i] = NULL; @@ -4437,7 +4437,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, /* best to give up if we're out of mem */ list_for_each_entry_safe(page, tpage, &tmplist, lru) { list_del(&page->lru); - lru_cache_add_file(page); + lru_cache_add(page); unlock_page(page); put_page(page); } @@ -4475,7 +4475,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, add_credits_and_wake_if(server, &rdata->credits, 0); for (i = 0; i < rdata->nr_pages; i++) { page = rdata->pages[i]; - lru_cache_add_file(page); + lru_cache_add(page); unlock_page(page); put_page(page); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c7a65cf2bcca..a01540b22122 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) get_page(newpage); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) - lru_cache_add_file(newpage); + lru_cache_add(newpage); err = 0; spin_lock(&cs->req->waitq.lock); diff --git a/include/linux/swap.h b/include/linux/swap.h index 6cea1eb97d45..217bc8e13feb 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -335,8 +335,6 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void lru_cache_add(struct page *); -extern void lru_cache_add_anon(struct page *page); -extern void lru_cache_add_file(struct page *page); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f29038c485e0..3f032487825b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1879,13 +1879,9 @@ xa_unlocked: SetPageUptodate(new_page); page_ref_add(new_page, HPAGE_PMD_NR - 1); - - if (is_shmem) { + if (is_shmem) set_page_dirty(new_page); - lru_cache_add_anon(new_page); - } else { - lru_cache_add_file(new_page); - } + lru_cache_add(new_page); /* * Remove pte page tables, so we can re-fault the page as huge. diff --git a/mm/memory.c b/mm/memory.c index d50d8b498af5..3431e76d0e75 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3139,7 +3139,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (err) goto out_page; - lru_cache_add_anon(page); + lru_cache_add(page); swap_readpage(page, true); } } else { diff --git a/mm/shmem.c b/mm/shmem.c index e83de27ce8f4..ea95a3e46fbb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1609,7 +1609,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, */ oldpage = newpage; } else { - lru_cache_add_anon(newpage); + lru_cache_add(newpage); *pagep = newpage; } @@ -1860,7 +1860,7 @@ alloc_nohuge: charge_mm); if (error) goto unacct; - lru_cache_add_anon(page); + lru_cache_add(page); spin_lock_irq(&info->lock); info->alloced += compound_nr(page); @@ -2376,7 +2376,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, if (!pte_none(*dst_pte)) goto out_release_unlock; - lru_cache_add_anon(page); + lru_cache_add(page); spin_lock_irq(&info->lock); info->alloced++; diff --git a/mm/swap.c b/mm/swap.c index f7026f72aca9..6196d792c952 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -424,37 +424,6 @@ void mark_page_accessed(struct page *page) } EXPORT_SYMBOL(mark_page_accessed); -static void __lru_cache_add(struct page *page) -{ - struct pagevec *pvec; - - local_lock(&lru_pvecs.lock); - pvec = this_cpu_ptr(&lru_pvecs.lru_add); - get_page(page); - if (!pagevec_add(pvec, page) || PageCompound(page)) - __pagevec_lru_add(pvec); - local_unlock(&lru_pvecs.lock); -} - -/** - * lru_cache_add_anon - add a page to the page lists - * @page: the page to add - */ -void lru_cache_add_anon(struct page *page) -{ - if (PageActive(page)) - ClearPageActive(page); - __lru_cache_add(page); -} - -void lru_cache_add_file(struct page *page) -{ - if (PageActive(page)) - ClearPageActive(page); - __lru_cache_add(page); -} -EXPORT_SYMBOL(lru_cache_add_file); - /** * lru_cache_add - add a page to a page list * @page: the page to be added to the LRU. @@ -466,10 +435,19 @@ EXPORT_SYMBOL(lru_cache_add_file); */ void lru_cache_add(struct page *page) { + struct pagevec *pvec; + VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); VM_BUG_ON_PAGE(PageLRU(page), page); - __lru_cache_add(page); + + get_page(page); + local_lock(&lru_pvecs.lock); + pvec = this_cpu_ptr(&lru_pvecs.lru_add); + if (!pagevec_add(pvec, page) || PageCompound(page)) + __pagevec_lru_add(pvec); + local_unlock(&lru_pvecs.lock); } +EXPORT_SYMBOL(lru_cache_add); /** * lru_cache_add_active_or_unevictable diff --git a/mm/swap_state.c b/mm/swap_state.c index ab0462819a5b..fa089002125f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -442,7 +442,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, /* Caller will initiate read into locked page */ SetPageWorkingset(page); - lru_cache_add_anon(page); + lru_cache_add(page); *new_page_allocated = true; return page; -- cgit v1.2.3