From 826d7bc9f013d01e92997883d2fd0c25f4af1f1c Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@virtuozzo.com>
Date: Fri, 8 Jun 2018 17:27:11 +0300
Subject: fs/lock: skip lock owner pid translation in case we are in
 init_pid_ns

If the flock owner process is dead and its pid has been already freed,
pid translation won't work, but we still want to show flock owner pid
number when expecting /proc/$PID/fdinfo/$FD in init pidns.

Reproducer:
process A	process A1	process A2
fork()--------->
exit()		open()
		flock()
		fork()--------->
		exit()		sleep()

Before the patch:
================
(root@vz7)/: cat /proc/${PID_A2}/fdinfo/3
pos:    4
flags:  02100002
mnt_id: 257
lock:   (root@vz7)/:

After the patch:
===============
(root@vz7)/:cat /proc/${PID_A2}/fdinfo/3
pos:    4
flags:  02100002
mnt_id: 295
lock:   1: FLOCK  ADVISORY  WRITE ${PID_A1} b6:f8a61:529946 0 EOF

Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks")
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Acked-by: Andrey Vagin <avagin@openvz.org>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 05e211be8684..bfee5b7f2862 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2072,6 +2072,13 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
 		return -1;
 	if (IS_REMOTELCK(fl))
 		return fl->fl_pid;
+	/*
+	 * If the flock owner process is dead and its pid has been already
+	 * freed, the translation below won't work, but we still want to show
+	 * flock owner pid number in init pidns.
+	 */
+	if (ns == &init_pid_ns)
+		return (pid_t)fl->fl_pid;
 
 	rcu_read_lock();
 	pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
-- 
cgit v1.2.3


From 1cf8e5de4055f85383405a21a0a7c3c4348bf2ed Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@virtuozzo.com>
Date: Fri, 8 Jun 2018 17:27:12 +0300
Subject: fs/lock: show locks taken by processes from another pidns

Currently if we face a lock taken by a process invisible in the current
pidns we skip the lock completely, but this

1) makes the output not that nice
    (root@vz7)/: cat /proc/${PID_A2}/fdinfo/3
    pos:    4
    flags:  02100002
    mnt_id: 257
    lock:   (root@vz7)/:

2) makes it more difficult to debug issues with leaked flocks
   if you get error on lock, but don't see any locks in /proc/$id/fdinfo/$file

Let's show information about such locks again as previously, but
show zero in the owner pid field.

After the patch:
===============
(root@vz7)/:cat /proc/${PID_A2}/fdinfo/3
pos:    4
flags:  02100002
mnt_id: 295
lock:   1: FLOCK  ADVISORY  WRITE 0 b6:f8a61:529946 0 EOF

Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks")
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Acked-by: Andrey Vagin <avagin@openvz.org>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/locks.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index bfee5b7f2862..e533623e2e99 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2633,12 +2633,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 
 	fl_pid = locks_translate_pid(fl, proc_pidns);
 	/*
-	 * If there isn't a fl_pid don't display who is waiting on
-	 * the lock if we are called from locks_show, or if we are
-	 * called from __show_fd_info - skip lock entirely
+	 * If lock owner is dead (and pid is freed) or not visible in current
+	 * pidns, zero is shown as a pid value. Check lock info from
+	 * init_pid_ns to get saved lock pid value.
 	 */
-	if (fl_pid == 0)
-		return;
 
 	if (fl->fl_file != NULL)
 		inode = locks_inode(fl->fl_file);
-- 
cgit v1.2.3


From 707c62352909a9be2453ae073e160de59d2d4a1f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 15:20:42 -0700
Subject: configfs: use kvasprintf() instead of open-coding it

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/configfs/item.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 88f266efc09b..99d491cd01f9 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -64,7 +64,6 @@ static void config_item_init(struct config_item *item)
  */
 int config_item_set_name(struct config_item *item, const char *fmt, ...)
 {
-	int error = 0;
 	int limit = CONFIGFS_ITEM_NAME_LEN;
 	int need;
 	va_list args;
@@ -79,25 +78,11 @@ int config_item_set_name(struct config_item *item, const char *fmt, ...)
 	if (need < limit)
 		name = item->ci_namebuf;
 	else {
-		/*
-		 * Need more space? Allocate it and try again
-		 */
-		limit = need + 1;
-		name = kmalloc(limit, GFP_KERNEL);
-		if (!name) {
-			error = -ENOMEM;
-			goto Done;
-		}
 		va_start(args, fmt);
-		need = vsnprintf(name, limit, fmt, args);
+		name = kvasprintf(GFP_KERNEL, fmt, args);
 		va_end(args);
-
-		/* Still? Give up. */
-		if (need >= limit) {
-			kfree(name);
-			error = -EFAULT;
-			goto Done;
-		}
+		if (!name)
+			return -EFAULT;
 	}
 
 	/* Free the old name, if necessary. */
@@ -106,8 +91,7 @@ int config_item_set_name(struct config_item *item, const char *fmt, ...)
 
 	/* Now, set the new name */
 	item->ci_name = name;
- Done:
-	return error;
+	return 0;
 }
 EXPORT_SYMBOL(config_item_set_name);
 
-- 
cgit v1.2.3


From bd646104ac5a6bf8bdddaeaf4e441f5d439ded96 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 19 Jun 2018 17:51:02 +0200
Subject: jfs: use time64_t for otime

The file creation time in the inode uses time_t which is defined
differently on 32-bit and 64-bit architectures and deprecated. The
representation in the inode uses an unsigned 32-bit number, but this
gets wrapped around after year 2038 when assigned to a time_t.

This changes the type to time64_t, so we can support the full range of
timestamps between 1970 and 2106 on 32-bit systems like we do on 64-bit
systems already, and matching what we do for the atime/ctime/mtime stamps
since the introduction of 64-bit timestamps in VFS.

Note: the otime stamp is not actually used anywhere at the moment in
the kernel, it is just set when writing a file, so none of this really
makes a difference unless we implement setting the btime field in the
getattr() callback.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
---
 fs/jfs/jfs_incore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 1f26d1910409..d5c46f86b2ef 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -43,7 +43,7 @@ struct jfs_inode_info {
 	pxd_t	ixpxd;		/* inode extent descriptor	*/
 	dxd_t	acl;		/* dxd describing acl	*/
 	dxd_t	ea;		/* dxd describing ea	*/
-	time_t	otime;		/* time created	*/
+	time64_t otime;		/* time created	*/
 	uint	next_index;	/* next available directory entry index */
 	int	acltype;	/* Type of ACL	*/
 	short	btorder;	/* access order	*/
-- 
cgit v1.2.3


From a6d639da63aeb838d5c0b7dc50598f2eac4014a0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:55 -0700
Subject: fs: factor out a __generic_write_end helper

Bits of the buffer.c based write_end implementations that don't know
about buffer_heads and can be reused by other implementations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/buffer.c   | 67 +++++++++++++++++++++++++++++++----------------------------
 fs/internal.h |  2 ++
 2 files changed, 37 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index cabc045f483d..aba2a948b235 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2076,6 +2076,40 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
 }
 EXPORT_SYMBOL(block_write_begin);
 
+int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
+		struct page *page)
+{
+	loff_t old_size = inode->i_size;
+	bool i_size_changed = false;
+
+	/*
+	 * No need to use i_size_read() here, the i_size cannot change under us
+	 * because we hold i_rwsem.
+	 *
+	 * But it's important to update i_size while still holding page lock:
+	 * page writeout could otherwise come in and zero beyond i_size.
+	 */
+	if (pos + copied > inode->i_size) {
+		i_size_write(inode, pos + copied);
+		i_size_changed = true;
+	}
+
+	unlock_page(page);
+	put_page(page);
+
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
+	/*
+	 * Don't mark the inode dirty under page lock. First, it unnecessarily
+	 * makes the holding time of page lock longer. Second, it forces lock
+	 * ordering of page lock and transaction start for journaling
+	 * filesystems.
+	 */
+	if (i_size_changed)
+		mark_inode_dirty(inode);
+	return copied;
+}
+
 int block_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
@@ -2116,39 +2150,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	struct inode *inode = mapping->host;
-	loff_t old_size = inode->i_size;
-	int i_size_changed = 0;
-
 	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
-
-	/*
-	 * No need to use i_size_read() here, the i_size
-	 * cannot change under us because we hold i_mutex.
-	 *
-	 * But it's important to update i_size while still holding page lock:
-	 * page writeout could otherwise come in and zero beyond i_size.
-	 */
-	if (pos+copied > inode->i_size) {
-		i_size_write(inode, pos+copied);
-		i_size_changed = 1;
-	}
-
-	unlock_page(page);
-	put_page(page);
-
-	if (old_size < pos)
-		pagecache_isize_extended(inode, old_size, pos);
-	/*
-	 * Don't mark the inode dirty under page lock. First, it unnecessarily
-	 * makes the holding time of page lock longer. Second, it forces lock
-	 * ordering of page lock and transaction start for journaling
-	 * filesystems.
-	 */
-	if (i_size_changed)
-		mark_inode_dirty(inode);
-
-	return copied;
+	return __generic_write_end(mapping->host, pos, copied, page);
 }
 EXPORT_SYMBOL(generic_write_end);
 
diff --git a/fs/internal.h b/fs/internal.h
index 980d005b21b4..4a18bdbd2214 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -43,6 +43,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
 extern void guard_bio_eod(int rw, struct bio *bio);
 extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block, struct iomap *iomap);
+int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
+		struct page *page);
 
 /*
  * char_dev.c
-- 
cgit v1.2.3


From 3d7b6b21f6c590c4d70b311bbdd78a214637c9c7 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:10:55 -0700
Subject: iomap: mark newly allocated buffer heads as new

In iomap_to_bh, not only mark buffer heads in IOMAP_UNWRITTEN maps as
new, but also buffer heads in IOMAP_MAPPED maps with the IOMAP_F_NEW
flag set.  This will be used by filesystems like gfs2, which allocate
blocks in iomap->begin.

Minor corrections to the comment for IOMAP_UNWRITTEN maps.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/buffer.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index aba2a948b235..c8c2b7d8b8d6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1900,15 +1900,16 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
 		break;
 	case IOMAP_UNWRITTEN:
 		/*
-		 * For unwritten regions, we always need to ensure that
-		 * sub-block writes cause the regions in the block we are not
-		 * writing to are zeroed. Set the buffer as new to ensure this.
+		 * For unwritten regions, we always need to ensure that regions
+		 * in the block we are not writing to are zeroed. Mark the
+		 * buffer as new to ensure this.
 		 */
 		set_buffer_new(bh);
 		set_buffer_unwritten(bh);
 		/* FALLTHRU */
 	case IOMAP_MAPPED:
-		if (offset >= i_size_read(inode))
+		if ((iomap->flags & IOMAP_F_NEW) ||
+		    offset >= i_size_read(inode))
 			set_buffer_new(bh);
 		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
 				inode->i_blkbits;
-- 
cgit v1.2.3


From ebf00be37de35788cad72f4f20b4a39e30c0be4a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:10:55 -0700
Subject: iomap: complete partial direct I/O writes synchronously

According to xfstest generic/240, applications seem to expect direct I/O
writes to either complete as a whole or to fail; short direct I/O writes
are apparently not appreciated.  This means that when only part of an
asynchronous direct I/O write succeeds, we can either fail the entire
write, or we can wait for the partial write to complete and retry the
remaining write as buffered I/O.  The old __blockdev_direct_IO helper
has code for waiting for partial writes to complete; the new
iomap_dio_rw iomap helper does not.

The above mentioned fallback mode is needed for gfs2, which doesn't
allow block allocations under direct I/O to avoid taking cluster-wide
exclusive locks.  As a consequence, an asynchronous direct I/O write to
a file range that contains a hole will result in a short write.  In that
case, wait for the short write to complete to allow gfs2 to recover.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 77397b5a96ef..9c454459a1e9 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -811,6 +811,7 @@ struct iomap_dio {
 	atomic_t		ref;
 	unsigned		flags;
 	int			error;
+	bool			wait_for_completion;
 
 	union {
 		/* used during submission and for synchronous completion: */
@@ -914,9 +915,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 		iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
 	if (atomic_dec_and_test(&dio->ref)) {
-		if (is_sync_kiocb(dio->iocb)) {
+		if (dio->wait_for_completion) {
 			struct task_struct *waiter = dio->submit.waiter;
-
 			WRITE_ONCE(dio->submit.waiter, NULL);
 			wake_up_process(waiter);
 		} else if (dio->flags & IOMAP_DIO_WRITE) {
@@ -1131,13 +1131,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	dio->end_io = end_io;
 	dio->error = 0;
 	dio->flags = 0;
+	dio->wait_for_completion = is_sync_kiocb(iocb);
 
 	dio->submit.iter = iter;
-	if (is_sync_kiocb(iocb)) {
-		dio->submit.waiter = current;
-		dio->submit.cookie = BLK_QC_T_NONE;
-		dio->submit.last_queue = NULL;
-	}
+	dio->submit.waiter = current;
+	dio->submit.cookie = BLK_QC_T_NONE;
+	dio->submit.last_queue = NULL;
 
 	if (iov_iter_rw(iter) == READ) {
 		if (pos >= dio->i_size)
@@ -1187,7 +1186,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		dio_warn_stale_pagecache(iocb->ki_filp);
 	ret = 0;
 
-	if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+	if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
 	    !inode->i_sb->s_dio_done_wq) {
 		ret = sb_init_dio_done_wq(inode->i_sb);
 		if (ret < 0)
@@ -1202,8 +1201,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 				iomap_dio_actor);
 		if (ret <= 0) {
 			/* magic error code to fall back to buffered I/O */
-			if (ret == -ENOTBLK)
+			if (ret == -ENOTBLK) {
+				dio->wait_for_completion = true;
 				ret = 0;
+			}
 			break;
 		}
 		pos += ret;
@@ -1224,7 +1225,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		dio->flags &= ~IOMAP_DIO_NEED_SYNC;
 
 	if (!atomic_dec_and_test(&dio->ref)) {
-		if (!is_sync_kiocb(iocb))
+		if (!dio->wait_for_completion)
 			return -EIOCBQUEUED;
 
 		for (;;) {
-- 
cgit v1.2.3


From 19e0c58f6552638c86395f0717210326fdf14fd2 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:10:56 -0700
Subject: iomap: generic inline data handling

Add generic inline data handling by adding a pointer to the inline data
region to struct iomap.  When handling a buffered IOMAP_INLINE write,
iomap_write_begin will copy the current inline data from the inline data
region into the page cache, and iomap_write_end will copy the changes in
the page cache back to the inline data region.

This doesn't cover inline data reads and direct I/O yet because so far,
we have no users.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[hch: small cleanups to better fit in with other iomap work]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++------
 include/linux/iomap.h |  1 +
 2 files changed, 56 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 9c454459a1e9..4aecd7c5dbd8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -103,6 +103,26 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static void
+iomap_read_inline_data(struct inode *inode, struct page *page,
+		struct iomap *iomap)
+{
+	size_t size = i_size_read(inode);
+	void *addr;
+
+	if (PageUptodate(page))
+		return;
+
+	BUG_ON(page->index);
+	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+	addr = kmap_atomic(page);
+	memcpy(addr, iomap->inline_data, size);
+	memset(addr + size, 0, PAGE_SIZE - size);
+	kunmap_atomic(addr);
+	SetPageUptodate(page);
+}
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -133,7 +153,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	if (!page)
 		return -ENOMEM;
 
-	status = __block_write_begin_int(page, pos, len, NULL, iomap);
+	if (iomap->type == IOMAP_INLINE)
+		iomap_read_inline_data(inode, page, iomap);
+	else
+		status = __block_write_begin_int(page, pos, len, NULL, iomap);
+
 	if (unlikely(status)) {
 		unlock_page(page);
 		put_page(page);
@@ -146,14 +170,37 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	return status;
 }
 
+static int
+iomap_write_end_inline(struct inode *inode, struct page *page,
+		struct iomap *iomap, loff_t pos, unsigned copied)
+{
+	void *addr;
+
+	WARN_ON_ONCE(!PageUptodate(page));
+	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+	addr = kmap_atomic(page);
+	memcpy(iomap->inline_data + pos, addr + pos, copied);
+	kunmap_atomic(addr);
+
+	mark_inode_dirty(inode);
+	__generic_write_end(inode, pos, copied, page);
+	return copied;
+}
+
 static int
 iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
-		unsigned copied, struct page *page)
+		unsigned copied, struct page *page, struct iomap *iomap)
 {
 	int ret;
 
-	ret = generic_write_end(NULL, inode->i_mapping, pos, len,
-			copied, page, NULL);
+	if (iomap->type == IOMAP_INLINE) {
+		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
+	} else {
+		ret = generic_write_end(NULL, inode->i_mapping, pos, len,
+				copied, page, NULL);
+	}
+
 	if (ret < len)
 		iomap_write_failed(inode, pos, len);
 	return ret;
@@ -208,7 +255,8 @@ again:
 
 		flush_dcache_page(page);
 
-		status = iomap_write_end(inode, pos, bytes, copied, page);
+		status = iomap_write_end(inode, pos, bytes, copied, page,
+				iomap);
 		if (unlikely(status < 0))
 			break;
 		copied = status;
@@ -302,7 +350,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		WARN_ON_ONCE(!PageUptodate(page));
 
-		status = iomap_write_end(inode, pos, bytes, bytes, page);
+		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 		if (unlikely(status <= 0)) {
 			if (WARN_ON_ONCE(status == 0))
 				return -EIO;
@@ -354,7 +402,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
 	zero_user(page, offset, bytes);
 	mark_page_accessed(page);
 
-	return iomap_write_end(inode, pos, bytes, bytes, page);
+	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 }
 
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index a044a824da85..10d6cff7f69a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -55,6 +55,7 @@ struct iomap {
 	u16			flags;	/* flags for mapping */
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
+	void			*inline_data;
 };
 
 /*
-- 
cgit v1.2.3


From 63899c6f8851c32214b19390254fa1ae90b582df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:56 -0700
Subject: iomap: add a page_done callback

This will be used by gfs2 to attach data to transactions for the journaled
data mode.  But the concept is generic enough that we might be able to
use it for other purposes like encryption/integrity post-processing in the
future.

Based on a patch from Andreas Gruenbacher.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 3 +++
 include/linux/iomap.h | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4aecd7c5dbd8..a1f71e64ea49 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -201,6 +201,9 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 				copied, page, NULL);
 	}
 
+	if (iomap->page_done)
+		iomap->page_done(inode, pos, copied, page, iomap);
+
 	if (ret < len)
 		iomap_write_failed(inode, pos, len);
 	return ret;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 10d6cff7f69a..45f43865b0f0 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -9,6 +9,7 @@ struct fiemap_extent_info;
 struct inode;
 struct iov_iter;
 struct kiocb;
+struct page;
 struct vm_area_struct;
 struct vm_fault;
 
@@ -56,6 +57,14 @@ struct iomap {
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
 	void			*inline_data;
+
+	/*
+	 * Called when finished processing a page in the mapping returned in
+	 * this iomap.  At least for now this is only supported in the buffered
+	 * write path.
+	 */
+	void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
+			struct page *page, struct iomap *iomap);
 };
 
 /*
-- 
cgit v1.2.3


From 72b4daa241295440f98e80ae21294a67b27ca091 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:57 -0700
Subject: iomap: add an iomap-based readpage and readpages implementation

Simply use iomap_apply to iterate over the file and a submit a bio for
each non-uptodate but mapped region and zero everything else.  Note that
as-is this can not be used for file systems with a blocksize smaller than
the page size, but that support will be added later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 214 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/iomap.h |   3 +
 2 files changed, 216 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index a1f71e64ea49..4f10c6b1cf6d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016 Christoph Hellwig.
+ * Copyright (c) 2016-2018 Christoph Hellwig.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
+#include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
@@ -123,6 +124,217 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
 	SetPageUptodate(page);
 }
 
+static void
+iomap_read_end_io(struct bio *bio)
+{
+	int error = blk_status_to_errno(bio->bi_status);
+	struct bio_vec *bvec;
+	int i;
+
+	bio_for_each_segment_all(bvec, bio, i)
+		page_endio(bvec->bv_page, false, error);
+	bio_put(bio);
+}
+
+struct iomap_readpage_ctx {
+	struct page		*cur_page;
+	bool			cur_page_in_bio;
+	bool			is_readahead;
+	struct bio		*bio;
+	struct list_head	*pages;
+};
+
+static loff_t
+iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	struct page *page = ctx->cur_page;
+	unsigned poff = pos & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	bool is_contig = false;
+	sector_t sector;
+
+	/* we don't support blocksize < PAGE_SIZE quite yet. */
+	WARN_ON_ONCE(pos != page_offset(page));
+	WARN_ON_ONCE(plen != PAGE_SIZE);
+
+	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
+		zero_user(page, poff, plen);
+		SetPageUptodate(page);
+		goto done;
+	}
+
+	ctx->cur_page_in_bio = true;
+
+	/*
+	 * Try to merge into a previous segment if we can.
+	 */
+	sector = iomap_sector(iomap, pos);
+	if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
+		if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+			goto done;
+		is_contig = true;
+	}
+
+	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
+		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+		if (ctx->bio)
+			submit_bio(ctx->bio);
+
+		if (ctx->is_readahead) /* same as readahead_gfp_mask */
+			gfp |= __GFP_NORETRY | __GFP_NOWARN;
+		ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+		ctx->bio->bi_opf = REQ_OP_READ;
+		if (ctx->is_readahead)
+			ctx->bio->bi_opf |= REQ_RAHEAD;
+		ctx->bio->bi_iter.bi_sector = sector;
+		bio_set_dev(ctx->bio, iomap->bdev);
+		ctx->bio->bi_end_io = iomap_read_end_io;
+	}
+
+	__bio_add_page(ctx->bio, page, plen, poff);
+done:
+	return plen;
+}
+
+int
+iomap_readpage(struct page *page, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = { .cur_page = page };
+	struct inode *inode = page->mapping->host;
+	unsigned poff;
+	loff_t ret;
+
+	WARN_ON_ONCE(page_has_buffers(page));
+
+	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
+		ret = iomap_apply(inode, page_offset(page) + poff,
+				PAGE_SIZE - poff, 0, ops, &ctx,
+				iomap_readpage_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			SetPageError(page);
+			break;
+		}
+	}
+
+	if (ctx.bio) {
+		submit_bio(ctx.bio);
+		WARN_ON_ONCE(!ctx.cur_page_in_bio);
+	} else {
+		WARN_ON_ONCE(ctx.cur_page_in_bio);
+		unlock_page(page);
+	}
+
+	/*
+	 * Just like mpage_readpages and block_read_full_page we always
+	 * return 0 and just mark the page as PageError on errors.  This
+	 * should be cleaned up all through the stack eventually.
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_readpage);
+
+static struct page *
+iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
+		loff_t length, loff_t *done)
+{
+	while (!list_empty(pages)) {
+		struct page *page = lru_to_page(pages);
+
+		if (page_offset(page) >= (u64)pos + length)
+			break;
+
+		list_del(&page->lru);
+		if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
+				GFP_NOFS))
+			return page;
+
+		/*
+		 * If we already have a page in the page cache at index we are
+		 * done.  Upper layers don't care if it is uptodate after the
+		 * readpages call itself as every page gets checked again once
+		 * actually needed.
+		 */
+		*done += PAGE_SIZE;
+		put_page(page);
+	}
+
+	return NULL;
+}
+
+static loff_t
+iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	loff_t done, ret;
+
+	for (done = 0; done < length; done += ret) {
+		if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+			ctx->cur_page = NULL;
+		}
+		if (!ctx->cur_page) {
+			ctx->cur_page = iomap_next_page(inode, ctx->pages,
+					pos, length, &done);
+			if (!ctx->cur_page)
+				break;
+			ctx->cur_page_in_bio = false;
+		}
+		ret = iomap_readpage_actor(inode, pos + done, length - done,
+				ctx, iomap);
+	}
+
+	return done;
+}
+
+int
+iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = {
+		.pages		= pages,
+		.is_readahead	= true,
+	};
+	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
+	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
+	loff_t length = last - pos + PAGE_SIZE, ret = 0;
+
+	while (length > 0) {
+		ret = iomap_apply(mapping->host, pos, length, 0, ops,
+				&ctx, iomap_readpages_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			goto done;
+		}
+		pos += ret;
+		length -= ret;
+	}
+	ret = 0;
+done:
+	if (ctx.bio)
+		submit_bio(ctx.bio);
+	if (ctx.cur_page) {
+		if (!ctx.cur_page_in_bio)
+			unlock_page(ctx.cur_page);
+		put_page(ctx.cur_page);
+	}
+
+	/*
+	 * Check that we didn't lose a page due to the arcance calling
+	 * conventions..
+	 */
+	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_readpages);
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 1f36523d448a..30d314407f66 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -99,6 +99,9 @@ struct iomap_ops {
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
+int iomap_readpage(struct page *page, const struct iomap_ops *ops);
+int iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From c03cea42149de56fbae2301d7123daaa2cfe80e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:58 -0700
Subject: iomap: add initial support for writes without buffer heads

For now just limited to blocksize == PAGE_SIZE, where we can simply read
in the full page in write begin, and just set the whole page dirty after
copying data into it.  This code is enabled by default and XFS will now
be feed pages without buffer heads in ->writepage and ->writepages.

If a file system sets the IOMAP_F_BUFFER_HEAD flag on the iomap the old
path will still be used, this both helps the transition in XFS and
prepares for the gfs2 migration to the iomap infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 115 +++++++++++++++++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_iomap.c    |   6 ++-
 include/linux/iomap.h |   2 +
 3 files changed, 114 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4f10c6b1cf6d..2ebff76039b5 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -348,6 +348,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 		truncate_pagecache_range(inode, max(pos, i_size), pos + len);
 }
 
+static int
+iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+		unsigned poff, unsigned plen, unsigned from, unsigned to,
+		struct iomap *iomap)
+{
+	struct bio_vec bvec;
+	struct bio bio;
+
+	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
+		zero_user_segments(page, poff, from, to, poff + plen);
+		return 0;
+	}
+
+	bio_init(&bio, &bvec, 1);
+	bio.bi_opf = REQ_OP_READ;
+	bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
+	bio_set_dev(&bio, iomap->bdev);
+	__bio_add_page(&bio, page, plen, poff);
+	return submit_bio_wait(&bio);
+}
+
+static int
+__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+		struct page *page, struct iomap *iomap)
+{
+	loff_t block_size = i_blocksize(inode);
+	loff_t block_start = pos & ~(block_size - 1);
+	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
+	unsigned poff = block_start & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
+	unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
+
+	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+
+	if (PageUptodate(page))
+		return 0;
+	if (from <= poff && to >= poff + plen)
+		return 0;
+	return iomap_read_page_sync(inode, block_start, page,
+			poff, plen, from, to, iomap);
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap)
@@ -367,9 +409,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 
 	if (iomap->type == IOMAP_INLINE)
 		iomap_read_inline_data(inode, page, iomap);
-	else
+	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, iomap);
-
+	else
+		status = __iomap_write_begin(inode, pos, len, page, iomap);
 	if (unlikely(status)) {
 		unlock_page(page);
 		put_page(page);
@@ -382,6 +425,57 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	return status;
 }
 
+int
+iomap_set_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	int newly_dirty;
+
+	if (unlikely(!mapping))
+		return !TestSetPageDirty(page);
+
+	/*
+	 * Lock out page->mem_cgroup migration to keep PageDirty
+	 * synchronized with per-memcg dirty page counters.
+	 */
+	lock_page_memcg(page);
+	newly_dirty = !TestSetPageDirty(page);
+	if (newly_dirty)
+		__set_page_dirty(page, mapping, 0);
+	unlock_page_memcg(page);
+
+	if (newly_dirty)
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return newly_dirty;
+}
+EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
+
+static int
+__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+		unsigned copied, struct page *page, struct iomap *iomap)
+{
+	flush_dcache_page(page);
+
+	/*
+	 * The blocks that were entirely written will now be uptodate, so we
+	 * don't have to worry about a readpage reading them and overwriting a
+	 * partial write.  However if we have encountered a short write and only
+	 * partially written into a block, it will not be marked uptodate, so a
+	 * readpage might come in and destroy our partial write.
+	 *
+	 * Do the simplest thing, and just treat any short write to a non
+	 * uptodate page as a zero-length write, and force the caller to redo
+	 * the whole thing.
+	 */
+	if (unlikely(copied < len && !PageUptodate(page))) {
+		copied = 0;
+	} else {
+		SetPageUptodate(page);
+		iomap_set_page_dirty(page);
+	}
+	return __generic_write_end(inode, pos, copied, page);
+}
+
 static int
 iomap_write_end_inline(struct inode *inode, struct page *page,
 		struct iomap *iomap, loff_t pos, unsigned copied)
@@ -408,9 +502,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 
 	if (iomap->type == IOMAP_INLINE) {
 		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
-	} else {
+	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
 		ret = generic_write_end(NULL, inode->i_mapping, pos, len,
 				copied, page, NULL);
+	} else {
+		ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
 	}
 
 	if (iomap->page_done)
@@ -703,11 +799,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	struct page *page = data;
 	int ret;
 
-	ret = __block_write_begin_int(page, pos, length, NULL, iomap);
-	if (ret)
-		return ret;
+	if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+		ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+		if (ret)
+			return ret;
+		block_commit_write(page, 0, length);
+	} else {
+		WARN_ON_ONCE(!PageUptodate(page));
+		WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+	}
 
-	block_commit_write(page, 0, length);
 	return length;
 }
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49f5492eed3b..8a3613d576af 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -626,7 +626,7 @@ retry:
 	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
 	 * them out if the write happens to fail.
 	 */
-	iomap->flags = IOMAP_F_NEW;
+	iomap->flags |= IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1019,6 +1019,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
+	iomap->flags |= IOMAP_F_BUFFER_HEAD;
+
 	if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
 			!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		/* Reserve delalloc blocks for regular writeback. */
@@ -1119,7 +1121,7 @@ xfs_file_iomap_begin(
 	if (error)
 		return error;
 
-	iomap->flags = IOMAP_F_NEW;
+	iomap->flags |= IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 
 out_finish:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 30d314407f66..5eb9ca8d7ce5 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -30,6 +30,7 @@ struct vm_fault;
  */
 #define IOMAP_F_NEW		0x01	/* blocks have been newly allocated */
 #define IOMAP_F_DIRTY		0x02	/* uncommitted metadata */
+#define IOMAP_F_BUFFER_HEAD	0x04	/* file system requires buffer heads */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
@@ -102,6 +103,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
 int iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		unsigned nr_pages, const struct iomap_ops *ops);
+int iomap_set_page_dirty(struct page *page);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From bfc18e389c7a09fbbbed6bf4032396685b14246e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:04 +0100
Subject: atomics/treewide: Rename __atomic_add_unless() =>
 atomic_fetch_add_unless()

While __atomic_add_unless() was originally intended as a building-block
for atomic_add_unless(), it's now used in a number of places around the
kernel. It's the only common atomic operation named __atomic*(), rather
than atomic_*(), and for consistency it would be better named
atomic_fetch_add_unless().

This lack of consistency is slightly confusing, and gets in the way of
scripting atomics. Given that, let's clean things up and promote it to
an official part of the atomics API, in the form of
atomic_fetch_add_unless().

This patch converts definitions and invocations over to the new name,
including the instrumented version, using the following script:

  ----
  git grep -w __atomic_add_unless | while read line; do
  sed -i '{s/\<__atomic_add_unless\>/atomic_fetch_add_unless/}' "${line%%:*}";
  done
  git grep -w __arch_atomic_add_unless | while read line; do
  sed -i '{s/\<__arch_atomic_add_unless\>/arch_atomic_fetch_add_unless/}' "${line%%:*}";
  done
  ----

Note that we do not have atomic{64,_long}_fetch_add_unless(), which will
be introduced by later patches.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-2-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           | 4 ++--
 arch/arc/include/asm/atomic.h             | 4 ++--
 arch/arm/include/asm/atomic.h             | 4 ++--
 arch/arm64/include/asm/atomic.h           | 2 +-
 arch/h8300/include/asm/atomic.h           | 2 +-
 arch/hexagon/include/asm/atomic.h         | 4 ++--
 arch/ia64/include/asm/atomic.h            | 2 +-
 arch/m68k/include/asm/atomic.h            | 2 +-
 arch/mips/include/asm/atomic.h            | 4 ++--
 arch/openrisc/include/asm/atomic.h        | 4 ++--
 arch/parisc/include/asm/atomic.h          | 4 ++--
 arch/powerpc/include/asm/atomic.h         | 8 ++++----
 arch/riscv/include/asm/atomic.h           | 4 ++--
 arch/s390/include/asm/atomic.h            | 2 +-
 arch/sh/include/asm/atomic.h              | 4 ++--
 arch/sparc/include/asm/atomic_32.h        | 2 +-
 arch/sparc/include/asm/atomic_64.h        | 2 +-
 arch/sparc/lib/atomic32.c                 | 4 ++--
 arch/x86/include/asm/atomic.h             | 4 ++--
 arch/xtensa/include/asm/atomic.h          | 4 ++--
 drivers/block/rbd.c                       | 2 +-
 drivers/infiniband/core/rdma_core.c       | 2 +-
 fs/afs/rxrpc.c                            | 2 +-
 include/asm-generic/atomic-instrumented.h | 4 ++--
 include/asm-generic/atomic.h              | 4 ++--
 include/linux/atomic.h                    | 2 +-
 kernel/bpf/syscall.c                      | 4 ++--
 net/rxrpc/call_object.c                   | 2 +-
 net/rxrpc/conn_object.c                   | 4 ++--
 net/rxrpc/local_object.c                  | 2 +-
 net/rxrpc/peer_object.c                   | 2 +-
 31 files changed, 50 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 767bfdd42992..392b15a4dd4f 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -206,7 +206,7 @@ ATOMIC_OPS(xor, xor)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -214,7 +214,7 @@ ATOMIC_OPS(xor, xor)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, new, old;
 	smp_mb();
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 11859287c52a..67121b5ff3a3 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -309,7 +309,7 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #undef ATOMIC_OP
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -317,7 +317,7 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v
  */
-#define __atomic_add_unless(v, a, u)					\
+#define atomic_fetch_add_unless(v, a, u)					\
 ({									\
 	int c, old;							\
 									\
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 66d0e215a773..9d56d0727c9b 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -130,7 +130,7 @@ static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 }
 #define atomic_cmpxchg_relaxed		atomic_cmpxchg_relaxed
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int oldval, newval;
 	unsigned long tmp;
@@ -215,7 +215,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c0235e0ff849..264d20339f74 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -125,7 +125,7 @@
 #define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
-#define __atomic_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
+#define atomic_fetch_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
 /*
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index b174dec099bf..5c856887fdf2 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -94,7 +94,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	h8300flags flags;
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index fb3dfb2a667e..287aa9f394f3 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -164,7 +164,7 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer to value
  * @a: amount to add
  * @u: unless value is equal to u
@@ -173,7 +173,7 @@ ATOMIC_OPS(xor)
  *
  */
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int __oldval;
 	register int tmp;
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 2524fb60fbc2..9d2ddde5f9d5 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,7 +215,7 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index e993e2860ee1..8022d9ea1213 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -211,7 +211,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 0ab176bdb8e8..02fc1553cf9b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -275,7 +275,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -283,7 +283,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index 146e1660f00e..b589fac39b92 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -100,7 +100,7 @@ ATOMIC_OP(xor)
  *
  * This is often used through atomic_inc_not_zero()
  */
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int old, tmp;
 
@@ -119,7 +119,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 	return old;
 }
-#define __atomic_add_unless	__atomic_add_unless
+#define atomic_fetch_add_unless	atomic_fetch_add_unless
 
 #include <asm-generic/atomic.h>
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 88bae6676c9b..7748abced766 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -78,7 +78,7 @@ static __inline__ int atomic_read(const atomic_t *v)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -86,7 +86,7 @@ static __inline__ int atomic_read(const atomic_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 682b3e6a1e21..1483261080a1 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -218,7 +218,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
 #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -226,13 +226,13 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int t;
 
 	__asm__ __volatile__ (
 	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%1		# __atomic_add_unless\n\
+"1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpw	0,%0,%3 \n\
 	beq	2f \n\
 	add	%0,%2,%0 \n"
@@ -538,7 +538,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 	__asm__ __volatile__ (
 	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%1		# __atomic_add_unless\n\
+"1:	ldarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpd	0,%0,%3 \n\
 	beq	2f \n\
 	add	%0,%2,%0 \n"
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 855115ace98c..739e810c857e 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -332,7 +332,7 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
 #undef ATOMIC_OP
 
 /* This is required to provide a full barrier on success. */
-static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
        int prev, rc;
 
@@ -381,7 +381,7 @@ static __always_inline int atomic64_add_unless(atomic64_t *v, long a, long u)
  */
 static __always_inline int atomic_inc_not_zero(atomic_t *v)
 {
-        return __atomic_add_unless(v, 1, 0);
+        return atomic_fetch_add_unless(v, 1, 0);
 }
 
 #ifndef CONFIG_GENERIC_ATOMIC64
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 4b55532f15c4..c2858cdd8c29 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -90,7 +90,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return __atomic_cmpxchg(&v->counter, old, new);
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 0fd0099f43cc..ef45931ebac5 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -46,7 +46,7 @@
 #define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -54,7 +54,7 @@
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index d13ce517f4b9..a58f4b43bcc7 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -27,7 +27,7 @@ int atomic_fetch_or(int, atomic_t *);
 int atomic_fetch_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
-int __atomic_add_unless(atomic_t *, int, int);
+int atomic_fetch_add_unless(atomic_t *, int, int);
 void atomic_set(atomic_t *, int);
 
 #define atomic_set_release(v, i)	atomic_set((v), (i))
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 28db058d471b..f416fd3d2708 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -89,7 +89,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 465a901a0ada..281fa634bb1a 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -95,7 +95,7 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)
 }
 EXPORT_SYMBOL(atomic_cmpxchg);
 
-int __atomic_add_unless(atomic_t *v, int a, int u)
+int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	unsigned long flags;
@@ -107,7 +107,7 @@ int __atomic_add_unless(atomic_t *v, int a, int u)
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret;
 }
-EXPORT_SYMBOL(__atomic_add_unless);
+EXPORT_SYMBOL(atomic_fetch_add_unless);
 
 /* Atomic operations are already serializing */
 void atomic_set(atomic_t *v, int i)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 0db6bec95489..84ed0bd76aef 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -254,7 +254,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 }
 
 /**
- * __arch_atomic_add_unless - add unless the number is already a given value
+ * arch_atomic_fetch_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -262,7 +262,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns the old value of @v.
  */
-static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c = arch_atomic_read(v);
 
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index e7a23f2a519a..4188e56c06c9 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -275,7 +275,7 @@ ATOMIC_OPS(xor)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -283,7 +283,7 @@ ATOMIC_OPS(xor)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index fa0729c1e776..d81c653b9bf6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -61,7 +61,7 @@ static int atomic_inc_return_safe(atomic_t *v)
 {
 	unsigned int counter;
 
-	counter = (unsigned int)__atomic_add_unless(v, 1, 0);
+	counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0);
 	if (counter <= (unsigned int)INT_MAX)
 		return (int)counter;
 
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index a6e904973ba8..475910ffbcb6 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -121,7 +121,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 	 * this lock.
 	 */
 	if (!exclusive)
-		return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+		return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
 			-EBUSY : 0;
 
 	/* lock is either WRITE or DESTROY - should be exclusive */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..183cc5418722 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
 	trace_afs_notify_call(rxcall, call);
 	call->need_attention = true;
 
-	u = __atomic_add_unless(&call->usage, 1, 0);
+	u = atomic_fetch_add_unless(&call->usage, 1, 0);
 	if (u != 0) {
 		trace_afs_call(call, afs_call_trace_wake, u,
 			       atomic_read(&call->net->nr_outstanding_calls),
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index ec07f23678ea..b8b14cc2df6c 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -84,10 +84,10 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne
 }
 #endif
 
-static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	kasan_check_write(v, sizeof(*v));
-	return __arch_atomic_add_unless(v, a, u);
+	return arch_atomic_fetch_add_unless(v, a, u);
 }
 
 
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index abe6dd9ca2a8..10051ed6d088 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -221,8 +221,8 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-#ifndef __atomic_add_unless
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#ifndef atomic_fetch_add_unless
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 01ce3997cb42..9cc982936675 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -530,7 +530,7 @@
  */
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
-	return __atomic_add_unless(v, a, u) != u;
+	return atomic_fetch_add_unless(v, a, u) != u;
 }
 
 /**
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35dc466641f2..f12db70d3bf3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -575,7 +575,7 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
 {
 	int refold;
 
-	refold = __atomic_add_unless(&map->refcnt, 1, 0);
+	refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
 
 	if (refold >= BPF_MAX_REFCNT) {
 		__bpf_map_put(map, false);
@@ -1142,7 +1142,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 {
 	int refold;
 
-	refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
+	refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
 
 	if (refold >= BPF_MAX_REFCNT) {
 		__bpf_prog_put(prog, false);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f6734d8cb01a..9486293fef5c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -415,7 +415,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 bool rxrpc_queue_call(struct rxrpc_call *call)
 {
 	const void *here = __builtin_return_address(0);
-	int n = __atomic_add_unless(&call->usage, 1, 0);
+	int n = atomic_fetch_add_unless(&call->usage, 1, 0);
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&call->processor))
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4c77a78a252a..77440a356b14 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -266,7 +266,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
 bool rxrpc_queue_conn(struct rxrpc_connection *conn)
 {
 	const void *here = __builtin_return_address(0);
-	int n = __atomic_add_unless(&conn->usage, 1, 0);
+	int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&conn->processor))
@@ -309,7 +309,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
 	const void *here = __builtin_return_address(0);
 
 	if (conn) {
-		int n = __atomic_add_unless(&conn->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
 		else
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b493e6b62740..777c3ed4cfc0 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -305,7 +305,7 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
 	const void *here = __builtin_return_address(0);
 
 	if (local) {
-		int n = __atomic_add_unless(&local->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&local->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
 		else
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 1b7e8107b3ae..1cf3b408017a 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -406,7 +406,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
 	const void *here = __builtin_return_address(0);
 
 	if (peer) {
-		int n = __atomic_add_unless(&peer->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
 		else
-- 
cgit v1.2.3


From f85c10e24ab9fd8ccb6de3d6061a3110ff3581df Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 13 Jun 2018 08:52:47 -0500
Subject: gfs2: eliminate rs_inum and reduce the size of gfs2 inodes

Before this patch, block reservations kept track of the inode
number. At one point, that was a valid thing to do. However, since
we made the reservation a part of the inode (rather than a pointer
to a separate allocated object) the reservation can determine the
inode number by using container_of. This saves us a little memory
in our inode.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h     | 1 -
 fs/gfs2/rgrp.c       | 5 +++--
 fs/gfs2/trace_gfs2.h | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d2ad817e089f..e9cd2cc292d3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -295,7 +295,6 @@ struct gfs2_blkreserv {
 	struct rb_node rs_node;       /* link to other block reservations */
 	struct gfs2_rbm rs_rbm;       /* Start of reservation */
 	u32 rs_free;                  /* how many blocks are still free */
-	u64 rs_inum;                  /* Inode number for reservation */
 };
 
 /*
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6bc5cfe710d1..7a001f6e8aee 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -607,8 +607,10 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip)
 
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 {
+	struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
+
 	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
-		       (unsigned long long)rs->rs_inum,
+		       (unsigned long long)ip->i_no_addr,
 		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 		       rs->rs_rbm.offset, rs->rs_free);
 }
@@ -1528,7 +1530,6 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 	if (ret == 0) {
 		rs->rs_rbm = rbm;
 		rs->rs_free = extlen;
-		rs->rs_inum = ip->i_no_addr;
 		rs_insert(ip);
 	} else {
 		if (goal == rgd->rd_last_alloc + rgd->rd_data0)
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cb10b95efe0f..e0025258107a 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -606,7 +606,8 @@ TRACE_EVENT(gfs2_rs,
 		__entry->rd_addr	= rs->rs_rbm.rgd->rd_addr;
 		__entry->rd_free_clone	= rs->rs_rbm.rgd->rd_free_clone;
 		__entry->rd_reserved	= rs->rs_rbm.rgd->rd_reserved;
-		__entry->inum		= rs->rs_inum;
+		__entry->inum		= container_of(rs, struct gfs2_inode,
+						       i_res)->i_no_addr;
 		__entry->start		= gfs2_rbm_to_block(&rs->rs_rbm);
 		__entry->free		= rs->rs_free;
 		__entry->func		= func;
-- 
cgit v1.2.3


From 9e1a9ecd13b9bb421c88135b178577caf4d54f6a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 7 Jun 2018 11:56:46 +0100
Subject: gfs2: Don't withdraw under a spin lock

In two places, the gfs2_io_error_bh macro is called while holding the
sd_ail_lock spin lock.  This isn't allowed because gfs2_io_error_bh
withdraws the filesystem, which can sleep because it issues a uevent.
To fix that, add a gfs2_io_error_bh_wd macro that does withdraw the
filesystem and change gfs2_io_error_bh to not withdraw the filesystem.
In those places where the new gfs2_io_error_bh is used, withdraw the
filesystem after releasing sd_ail_lock.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Andrew Price <anprice@redhat.com>
---
 fs/gfs2/log.c     | 26 +++++++++++++++++++-------
 fs/gfs2/lops.c    |  2 +-
 fs/gfs2/meta_io.c |  4 ++--
 fs/gfs2/util.c    | 38 ++++++++++++++++++++------------------
 fs/gfs2/util.h    | 10 +++++++---
 5 files changed, 49 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0248835625f1..a767fad02386 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -92,7 +92,8 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 
 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
 			       struct writeback_control *wbc,
-			       struct gfs2_trans *tr)
+			       struct gfs2_trans *tr,
+			       bool *withdraw)
 __releases(&sdp->sd_ail_lock)
 __acquires(&sdp->sd_ail_lock)
 {
@@ -107,8 +108,10 @@ __acquires(&sdp->sd_ail_lock)
 		gfs2_assert(sdp, bd->bd_tr == tr);
 
 		if (!buffer_busy(bh)) {
-			if (!buffer_uptodate(bh))
+			if (!buffer_uptodate(bh)) {
 				gfs2_io_error_bh(sdp, bh);
+				*withdraw = true;
+			}
 			list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
 			continue;
 		}
@@ -148,6 +151,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
 	struct list_head *head = &sdp->sd_ail1_list;
 	struct gfs2_trans *tr;
 	struct blk_plug plug;
+	bool withdraw = false;
 
 	trace_gfs2_ail_flush(sdp, wbc, 1);
 	blk_start_plug(&plug);
@@ -156,11 +160,13 @@ restart:
 	list_for_each_entry_reverse(tr, head, tr_list) {
 		if (wbc->nr_to_write <= 0)
 			break;
-		if (gfs2_ail1_start_one(sdp, wbc, tr))
+		if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw))
 			goto restart;
 	}
 	spin_unlock(&sdp->sd_ail_lock);
 	blk_finish_plug(&plug);
+	if (withdraw)
+		gfs2_lm_withdraw(sdp, NULL);
 	trace_gfs2_ail_flush(sdp, wbc, 0);
 }
 
@@ -188,7 +194,8 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
  *
  */
 
-static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+				bool *withdraw)
 {
 	struct gfs2_bufdata *bd, *s;
 	struct buffer_head *bh;
@@ -199,11 +206,12 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 		gfs2_assert(sdp, bd->bd_tr == tr);
 		if (buffer_busy(bh))
 			continue;
-		if (!buffer_uptodate(bh))
+		if (!buffer_uptodate(bh)) {
 			gfs2_io_error_bh(sdp, bh);
+			*withdraw = true;
+		}
 		list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
 	}
-
 }
 
 /**
@@ -218,10 +226,11 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
 	struct gfs2_trans *tr, *s;
 	int oldest_tr = 1;
 	int ret;
+	bool withdraw = false;
 
 	spin_lock(&sdp->sd_ail_lock);
 	list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
-		gfs2_ail1_empty_one(sdp, tr);
+		gfs2_ail1_empty_one(sdp, tr, &withdraw);
 		if (list_empty(&tr->tr_ail1_list) && oldest_tr)
 			list_move(&tr->tr_list, &sdp->sd_ail2_list);
 		else
@@ -230,6 +239,9 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
 	ret = list_empty(&sdp->sd_ail1_list);
 	spin_unlock(&sdp->sd_ail_lock);
 
+	if (withdraw)
+		gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n");
+
 	return ret;
 }
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4d6567990baf..f2567f958d00 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -49,7 +49,7 @@ void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 	if (test_set_buffer_pinned(bh))
 		gfs2_assert_withdraw(sdp, 0);
 	if (!buffer_uptodate(bh))
-		gfs2_io_error_bh(sdp, bh);
+		gfs2_io_error_bh_wd(sdp, bh);
 	bd = bh->b_private;
 	/* If this buffer is in the AIL and it has already been written
 	 * to in-place disk block, remove it from the AIL.
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 52de1036d9f9..be9c0bf697fe 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -293,7 +293,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 	if (unlikely(!buffer_uptodate(bh))) {
 		struct gfs2_trans *tr = current->journal_info;
 		if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
-			gfs2_io_error_bh(sdp, bh);
+			gfs2_io_error_bh_wd(sdp, bh);
 		brelse(bh);
 		*bhp = NULL;
 		return -EIO;
@@ -320,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
 	if (!buffer_uptodate(bh)) {
 		struct gfs2_trans *tr = current->journal_info;
 		if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
-			gfs2_io_error_bh(sdp, bh);
+			gfs2_io_error_bh_wd(sdp, bh);
 		return -EIO;
 	}
 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 763d659db91b..59c811de0dc7 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -46,14 +46,16 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
 	    test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
 		return 0;
 
-	va_start(args, fmt);
+	if (fmt) {
+		va_start(args, fmt);
 
-	vaf.fmt = fmt;
-	vaf.va = &args;
+		vaf.fmt = fmt;
+		vaf.va = &args;
 
-	fs_err(sdp, "%pV", &vaf);
+		fs_err(sdp, "%pV", &vaf);
 
-	va_end(args);
+		va_end(args);
+	}
 
 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
 		fs_err(sdp, "about to withdraw this file system\n");
@@ -246,21 +248,21 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
 }
 
 /**
- * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
- * Returns: -1 if this call withdrew the machine,
- *          0 if it was already withdrawn
+ * gfs2_io_error_bh_i - Flag a buffer I/O error
+ * @withdraw: withdraw the filesystem
  */
 
-int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
-		       const char *function, char *file, unsigned int line)
+void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
+			const char *function, char *file, unsigned int line,
+			bool withdraw)
 {
-	int rv;
-	rv = gfs2_lm_withdraw(sdp,
-			      "fatal: I/O error\n"
-			      "  block = %llu\n"
-			      "  function = %s, file = %s, line = %u\n",
-			      (unsigned long long)bh->b_blocknr,
-			      function, file, line);
-	return rv;
+	fs_err(sdp,
+	       "fatal: I/O error\n"
+	       "  block = %llu\n"
+	       "  function = %s, file = %s, line = %u\n",
+	       (unsigned long long)bh->b_blocknr,
+	       function, file, line);
+	if (withdraw)
+		gfs2_lm_withdraw(sdp, NULL);
 }
 
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 3926f95a6eb7..96ac4aba4738 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -136,11 +136,15 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
 gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
 
 
-int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
-		       const char *function, char *file, unsigned int line);
+void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
+			const char *function, char *file, unsigned int line,
+			bool withdraw);
+
+#define gfs2_io_error_bh_wd(sdp, bh) \
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true);
 
 #define gfs2_io_error_bh(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false);
 
 
 extern struct kmem_cache *gfs2_glock_cachep;
-- 
cgit v1.2.3


From 00251a16d7f9eb380437b402def05cd7c1b16c09 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 18 Jun 2018 16:34:59 +0100
Subject: gfs2: Minor clarification to __gfs2_punch_hole

Rename end_off to end_len to make the code less confusing.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/bmap.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index ed6699705c13..c7287afeeef5 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2154,11 +2154,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
 		if (error)
 			goto out;
 	} else {
-		unsigned int start_off, end_off, blocksize;
+		unsigned int start_off, end_len, blocksize;
 
 		blocksize = i_blocksize(inode);
 		start_off = offset & (blocksize - 1);
-		end_off = (offset + length) & (blocksize - 1);
+		end_len = (offset + length) & (blocksize - 1);
 		if (start_off) {
 			unsigned int len = length;
 			if (length > blocksize - start_off)
@@ -2167,11 +2167,11 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
 			if (error)
 				goto out;
 			if (start_off + length < blocksize)
-				end_off = 0;
+				end_len = 0;
 		}
-		if (end_off) {
+		if (end_len) {
 			error = gfs2_block_zero_range(inode,
-				offset + length - end_off, end_off);
+				offset + length - end_len, end_len);
 			if (error)
 				goto out;
 		}
-- 
cgit v1.2.3


From ee9c7f9ae3d4fb9fb5c9cacbe3880d5dd66feb16 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 20 Jun 2018 15:15:24 -0500
Subject: gfs2: call ktime_get_coarse_real_ts64() directly

current_kernel_time64() is now just a deprecated wrapper around
ktime_get_coarse_real_ts64(), so let's just call that directly.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index a767fad02386..ee20ea42e7b5 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -701,7 +701,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 	hash = ~crc32(~0, lh, LH_V1_SIZE);
 	lh->lh_hash = cpu_to_be32(hash);
 
-	tv = current_kernel_time64();
+	ktime_get_coarse_real_ts64(&tv);
 	lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
 	lh->lh_sec = cpu_to_be64(tv.tv_sec);
 	addr = gfs2_log_bmap(sdp);
-- 
cgit v1.2.3


From 63a67a926e214dac94e29147c0f3d11499f655a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2018 17:16:44 -0400
Subject: kill dentry_update_name_case()

the last user is gone

Spotted-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 27 ---------------------------
 include/linux/dcache.h |  2 --
 2 files changed, 29 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..d9323af88ed0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2676,33 +2676,6 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
 }
 EXPORT_SYMBOL(d_exact_alias);
 
-/**
- * dentry_update_name_case - update case insensitive dentry with a new name
- * @dentry: dentry to be updated
- * @name: new name
- *
- * Update a case insensitive dentry with new case of name.
- *
- * dentry must have been returned by d_lookup with name @name. Old and new
- * name lengths must match (ie. no d_compare which allows mismatched name
- * lengths).
- *
- * Parent inode i_mutex must be held over d_lookup and into this call (to
- * keep renames and concurrent inserts, and readdir(2) away).
- */
-void dentry_update_name_case(struct dentry *dentry, const struct qstr *name)
-{
-	BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
-	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
-
-	spin_lock(&dentry->d_lock);
-	write_seqcount_begin(&dentry->d_seq);
-	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
-	write_seqcount_end(&dentry->d_seq);
-	spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(dentry_update_name_case);
-
 static void swap_names(struct dentry *dentry, struct dentry *target)
 {
 	if (unlikely(dname_external(target))) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..cee70bf207fc 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -271,8 +271,6 @@ extern void d_rehash(struct dentry *);
  
 extern void d_add(struct dentry *, struct inode *);
 
-extern void dentry_update_name_case(struct dentry *, const struct qstr *);
-
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);
 extern void d_exchange(struct dentry *, struct dentry *);
-- 
cgit v1.2.3


From 50f307401113350a8d46ca45cace07a92c9bedc8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2018 20:27:29 -0400
Subject: hostfs_lookup: switch to d_splice_alias()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2597b290c2a5..444c7b170359 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -610,33 +610,21 @@ static struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 	int err;
 
 	inode = hostfs_iget(ino->i_sb);
-	if (IS_ERR(inode)) {
-		err = PTR_ERR(inode);
+	if (IS_ERR(inode))
 		goto out;
-	}
 
 	err = -ENOMEM;
 	name = dentry_name(dentry);
-	if (name == NULL)
-		goto out_put;
-
-	err = read_name(inode, name);
-
-	__putname(name);
-	if (err == -ENOENT) {
+	if (name) {
+		err = read_name(inode, name);
+		__putname(name);
+	}
+	if (err) {
 		iput(inode);
-		inode = NULL;
+		inode = (err == -ENOENT) ? NULL : ERR_PTR(err);
 	}
-	else if (err)
-		goto out_put;
-
-	d_add(dentry, inode);
-	return NULL;
-
- out_put:
-	iput(inode);
  out:
-	return ERR_PTR(err);
+	return d_splice_alias(inode, dentry);
 }
 
 static int hostfs_link(struct dentry *to, struct inode *ino,
-- 
cgit v1.2.3


From 6ff84735070276d72af716e21c3214ee20d60e70 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 16 Jun 2018 22:11:44 -0700
Subject: time: Change types to new y2038 safe __kernel_itimerspec

timer_set/gettime and timerfd_set/get apis use struct itimerspec at the
user interface layer.  struct itimerspec is not y2038-safe.  Change these
interfaces to use y2038-safe struct __kernel_itimerspec instead.  This will
help define new syscalls when 32bit architectures select CONFIG_64BIT_TIME.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: y2038@lists.linaro.org
Link: https://lkml.kernel.org/r/20180617051144.29756-4-deepa.kernel@gmail.com
---
 fs/timerfd.c               |  8 ++++----
 include/linux/syscalls.h   | 10 +++++-----
 kernel/time/posix-timers.c | 12 +++++++-----
 3 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index d84a2bee4f82..8bb926253f88 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
 }
 
 SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
-		const struct itimerspec __user *, utmr,
-		struct itimerspec __user *, otmr)
+		const struct __kernel_itimerspec __user *, utmr,
+		struct __kernel_itimerspec __user *, otmr)
 {
 	struct itimerspec64 new, old;
 	int ret;
@@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	return ret;
 }
 
-SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
+SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr)
 {
 	struct itimerspec64 kotmr;
 	int ret = do_timerfd_gettime(ufd, &kotmr);
@@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 	return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 		const struct compat_itimerspec __user *, utmr,
 		struct compat_itimerspec __user *, otmr)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 73810808cdf2..38b9ec152024 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -501,9 +501,9 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
 /* fs/timerfd.c */
 asmlinkage long sys_timerfd_create(int clockid, int flags);
 asmlinkage long sys_timerfd_settime(int ufd, int flags,
-				    const struct itimerspec __user *utmr,
-				    struct itimerspec __user *otmr);
-asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
+				    const struct __kernel_itimerspec __user *utmr,
+				    struct __kernel_itimerspec __user *otmr);
+asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr);
 
 /* fs/utimes.c */
 asmlinkage long sys_utimensat(int dfd, const char __user *filename,
@@ -568,10 +568,10 @@ asmlinkage long sys_timer_create(clockid_t which_clock,
 				 struct sigevent __user *timer_event_spec,
 				 timer_t __user * created_timer_id);
 asmlinkage long sys_timer_gettime(timer_t timer_id,
-				struct itimerspec __user *setting);
+				struct __kernel_itimerspec __user *setting);
 asmlinkage long sys_timer_getoverrun(timer_t timer_id);
 asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
-				const struct itimerspec __user *new_setting,
+				const struct __kernel_itimerspec __user *new_setting,
 				struct itimerspec __user *old_setting);
 asmlinkage long sys_timer_delete(timer_t timer_id);
 asmlinkage long sys_clock_settime(clockid_t which_clock,
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index fcf90a10c43a..80d59333c76e 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -743,7 +743,7 @@ static int do_timer_gettime(timer_t timer_id,  struct itimerspec64 *setting)
 
 /* Get the time remaining on a POSIX.1b interval timer. */
 SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
-		struct itimerspec __user *, setting)
+		struct __kernel_itimerspec __user *, setting)
 {
 	struct itimerspec64 cur_setting;
 
@@ -755,7 +755,8 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
+
 COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 		       struct compat_itimerspec __user *, setting)
 {
@@ -768,6 +769,7 @@ COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	}
 	return ret;
 }
+
 #endif
 
 /*
@@ -906,8 +908,8 @@ retry:
 
 /* Set a POSIX.1b interval timer */
 SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
-		const struct itimerspec __user *, new_setting,
-		struct itimerspec __user *, old_setting)
+		const struct __kernel_itimerspec __user *, new_setting,
+		struct __kernel_itimerspec __user *, old_setting)
 {
 	struct itimerspec64 new_spec, old_spec;
 	struct itimerspec64 *rtn = old_setting ? &old_spec : NULL;
@@ -927,7 +929,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 	return error;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 		       struct compat_itimerspec __user *, new,
 		       struct compat_itimerspec __user *, old)
-- 
cgit v1.2.3


From b0c6108ecf64fce3987f80c1610472a56d480d9a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 May 2018 10:55:01 -0400
Subject: nfs_instantiate(): prevent multiple aliases for directory inode

Since NFS allows open-by-fhandle, we have to cope with the possibility
of mkdir vs. open-by-guessed-handle races.  A local filesystem could
decide what the inumber of the new object will be and insert a locked
inode with that inumber into icache _before_ the on-disk data structures
begin to look good and unlock it only once it has a dentry alias, so
that open-by-handle coming first would quietly fail and mkdir coming
first would have open-by-handle grab its dentry.

For NFS it's a non-starter - the icache key is server-supplied fhandle
and we do not get that until the object has been fully created on server.
We really have to deal with the possibility that open-by-handle gets
the in-core inode and attaches a dentry to it before mkdir does.

Solution: let nfs_mkdir() use d_splice_alias() to catch those.  We can
	* get an error.  Just return it to our caller.
	* get NULL - no preexisting dentry aliases, we'd just done what
d_add() would've done.  Success.
	* get a reference to preexisting alias.  In that case the alias
had been moved in place of nfs_mkdir() argument (and hashed there), while
nfs_mkdir() argument is left unhashed negative.  Which is just fine for
->mkdir() callers, all we need is to release the reference we'd got from
d_splice_alias() and report success.

Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/dir.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7a9c14426855..df6fd4e5b068 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1641,6 +1641,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 	struct dentry *parent = dget_parent(dentry);
 	struct inode *dir = d_inode(parent);
 	struct inode *inode;
+	struct dentry *d;
 	int error = -EACCES;
 
 	d_drop(dentry);
@@ -1662,10 +1663,12 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 			goto out_error;
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
-	error = PTR_ERR(inode);
-	if (IS_ERR(inode))
+	d = d_splice_alias(inode, dentry);
+	if (IS_ERR(d)) {
+		error = PTR_ERR(d);
 		goto out_error;
-	d_add(dentry, inode);
+	}
+	dput(d);
 out:
 	dput(parent);
 	return 0;
-- 
cgit v1.2.3


From e876c445df4009d7b1ebdd2a92ca23566ca05440 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2018 20:33:59 -0400
Subject: hpfs: fix an inode leak in lookup, switch to d_splice_alias()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hpfs/dir.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index c83ece7facc5..d85230c84ef2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -244,6 +244,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
 	result = iget_locked(dir->i_sb, ino);
 	if (!result) {
 		hpfs_error(dir->i_sb, "hpfs_lookup: can't get inode");
+		result = ERR_PTR(-ENOMEM);
 		goto bail1;
 	}
 	if (result->i_state & I_NEW) {
@@ -266,6 +267,8 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
 
 	if (de->has_acl || de->has_xtd_perm) if (!sb_rdonly(dir->i_sb)) {
 		hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
+		iput(result);
+		result = ERR_PTR(-EINVAL);
 		goto bail1;
 	}
 
@@ -301,29 +304,17 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
 		}
 	}
 
+bail1:
 	hpfs_brelse4(&qbh);
 
 	/*
 	 * Made it.
 	 */
 
-	end:
-	end_add:
+end:
+end_add:
 	hpfs_unlock(dir->i_sb);
-	d_add(dentry, result);
-	return NULL;
-
-	/*
-	 * Didn't.
-	 */
-	bail1:
-	
-	hpfs_brelse4(&qbh);
-	
-	/*bail:*/
-
-	hpfs_unlock(dir->i_sb);
-	return ERR_PTR(-ENOENT);
+	return d_splice_alias(result, dentry);
 }
 
 const struct file_operations hpfs_dir_ops =
-- 
cgit v1.2.3


From 1823342a1f2b47a4e6f5667f67cd28ab6bc4d6cd Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 1 Jul 2018 13:56:54 -0700
Subject: configfs: replace strncpy with memcpy

gcc 8.1.0 complains:

fs/configfs/symlink.c:67:3: warning:
	'strncpy' output truncated before terminating nul copying as many
	bytes from a string as its length
fs/configfs/symlink.c: In function 'configfs_get_link':
fs/configfs/symlink.c:63:13: note: length computed here

Using strncpy() is indeed less than perfect since the length of data to
be copied has already been determined with strlen(). Replace strncpy()
with memcpy() to address the warning and optimize the code a little.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/configfs/symlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 78ffc2699993..a5c54af861f7 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -64,7 +64,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length)
 
 		/* back up enough to print this bus id with '/' */
 		length -= cur;
-		strncpy(buffer + length,config_item_name(p),cur);
+		memcpy(buffer + length, config_item_name(p), cur);
 		*(buffer + --length) = '/';
 	}
 }
-- 
cgit v1.2.3


From d505a96a3b16f46455035dc0296bc2da6014e163 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 24 Jun 2018 10:43:49 +0100
Subject: gfs2: Further iomap cleanups

In gfs2_iomap_alloc, set the type of newly allocated extents to
IOMAP_MAPPED so that iomap_to_bh will set the bh states correctly:
otherwise, the bhs would not be marked as mapped, confusing
__mpage_writepage.  This means that we need to check for the IOMAP_F_NEW
flag in fallocate_chunk now.

Further clean up gfs2_iomap_get and implement gfs2_stuffed_iomap here
directly.  For reads beyond the end of the file, return holes instead of
failing with -ENOENT so that we can get rid of that special case in
gfs2_block_map.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/bmap.c | 74 +++++++++++++++++++++++++++++++++-------------------------
 fs/gfs2/file.c |  2 +-
 2 files changed, 43 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index ed6699705c13..33ee93344d18 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -750,6 +750,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 		}
 	} while (iomap->addr == IOMAP_NULL_ADDR);
 
+	iomap->type = IOMAP_MAPPED;
 	iomap->length = (u64)dblks << inode->i_blkbits;
 	ip->i_height = mp->mp_fheight;
 	gfs2_add_inode_blocks(&ip->i_inode, alloced);
@@ -759,17 +760,6 @@ out:
 	return ret;
 }
 
-static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
-		      sizeof(struct gfs2_dinode);
-	iomap->offset = 0;
-	iomap->length = i_size_read(inode);
-	iomap->type = IOMAP_INLINE;
-}
-
 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
 
 /**
@@ -789,37 +779,61 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	loff_t size = i_size_read(inode);
 	__be64 *ptr;
 	sector_t lblock;
 	sector_t lblock_stop;
 	int ret;
 	int eob;
 	u64 len;
-	struct buffer_head *bh;
+	struct buffer_head *dibh = NULL, *bh;
 	u8 height;
 
 	if (!length)
 		return -EINVAL;
 
+	down_read(&ip->i_rw_mutex);
+
+	ret = gfs2_meta_inode_buffer(ip, &dibh);
+	if (ret)
+		goto unlock;
+
 	if (gfs2_is_stuffed(ip)) {
-		if (flags & IOMAP_REPORT) {
-			if (pos >= i_size_read(inode))
-				return -ENOENT;
-			gfs2_stuffed_iomap(inode, iomap);
-			return 0;
+		if (flags & IOMAP_WRITE) {
+			loff_t max_size = gfs2_max_stuffed_size(ip);
+
+			if (pos + length > max_size)
+				goto unstuff;
+			iomap->length = max_size;
+		} else {
+			if (pos >= size) {
+				if (flags & IOMAP_REPORT) {
+					ret = -ENOENT;
+					goto unlock;
+				} else {
+					/* report a hole */
+					iomap->offset = pos;
+					iomap->length = length;
+					goto do_alloc;
+				}
+			}
+			iomap->length = size;
 		}
-		BUG_ON(!(flags & IOMAP_WRITE));
+		iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
+			      sizeof(struct gfs2_dinode);
+		iomap->type = IOMAP_INLINE;
+		goto out;
 	}
+
+unstuff:
 	lblock = pos >> inode->i_blkbits;
 	iomap->offset = lblock << inode->i_blkbits;
 	lblock_stop = (pos + length - 1) >> inode->i_blkbits;
 	len = lblock_stop - lblock + 1;
+	iomap->length = len << inode->i_blkbits;
 
-	down_read(&ip->i_rw_mutex);
-
-	ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
-	if (ret)
-		goto unlock;
+	get_bh(dibh);
+	mp->mp_bh[0] = dibh;
 
 	height = ip->i_height;
 	while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
@@ -853,21 +867,23 @@ out:
 	iomap->bdev = inode->i_sb->s_bdev;
 unlock:
 	up_read(&ip->i_rw_mutex);
+	if (dibh)
+		brelse(dibh);
 	return ret;
 
 do_alloc:
 	iomap->addr = IOMAP_NULL_ADDR;
-	iomap->length = len << inode->i_blkbits;
 	iomap->type = IOMAP_HOLE;
-	iomap->flags = 0;
 	if (flags & IOMAP_REPORT) {
-		loff_t size = i_size_read(inode);
 		if (pos >= size)
 			ret = -ENOENT;
 		else if (height == ip->i_height)
 			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 		else
 			iomap->length = size - pos;
+	} else if (!(flags & IOMAP_WRITE)) {
+		if (pos < size && height == ip->i_height)
+			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 	}
 	goto out;
 }
@@ -941,12 +957,6 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 	} else {
 		ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
 		release_metapath(&mp);
-
-		/* Return unmapped buffer beyond the end of file. */
-		if (ret == -ENOENT) {
-			ret = 0;
-			goto out;
-		}
 	}
 	if (ret)
 		goto out;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7137db7b0119..6f6bbfbff13d 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -754,7 +754,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 		if (error)
 			goto out;
 		offset = iomap.offset + iomap.length;
-		if (iomap.type != IOMAP_HOLE)
+		if (!(iomap.flags & IOMAP_F_NEW))
 			continue;
 		error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits,
 					 iomap.length >> inode->i_blkbits,
-- 
cgit v1.2.3


From 64bc06bb32ee9cf458f432097113c8b495d75757 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 24 Jun 2018 15:04:04 +0100
Subject: gfs2: iomap buffered write support

With the traditional page-based writes, blocks are allocated separately
for each page written to.  With iomap writes, we can allocate a lot more
blocks at once, with a fraction of the allocation overhead for each
page.

Split calculating the number of blocks that can be allocated at a given
position (gfs2_alloc_size) off from gfs2_iomap_alloc: that size
determines the number of blocks to allocate and reserve in the journal.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/aops.c |  20 ++--
 fs/gfs2/aops.h |  19 ++++
 fs/gfs2/bmap.c | 298 +++++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/gfs2/file.c |  44 +++++++--
 4 files changed, 338 insertions(+), 43 deletions(-)
 create mode 100644 fs/gfs2/aops.h

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 35f5ee23566d..ecfbca9c88ff 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -22,6 +22,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uio.h>
 #include <trace/events/writeback.h>
+#include <linux/sched/signal.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -36,10 +37,11 @@
 #include "super.h"
 #include "util.h"
 #include "glops.h"
+#include "aops.h"
 
 
-static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
-				   unsigned int from, unsigned int len)
+void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
+			    unsigned int from, unsigned int len)
 {
 	struct buffer_head *head = page_buffers(page);
 	unsigned int bsize = head->b_size;
@@ -462,7 +464,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
  * Returns: errno
  */
 
-static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 {
 	struct buffer_head *dibh;
 	u64 dsize = i_size_read(&ip->i_inode);
@@ -776,7 +778,7 @@ out_uninit:
  * adjust_fs_space - Adjusts the free space available due to gfs2_grow
  * @inode: the rindex inode
  */
-static void adjust_fs_space(struct inode *inode)
+void adjust_fs_space(struct inode *inode)
 {
 	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -822,11 +824,11 @@ out:
  * This copies the data from the page into the inode block after
  * the inode data structure itself.
  *
- * Returns: errno
+ * Returns: copied bytes or errno
  */
-static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
-				  loff_t pos, unsigned copied,
-				  struct page *page)
+int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
+			   loff_t pos, unsigned copied,
+			   struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	u64 to = pos + copied;
@@ -865,7 +867,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
  * The main write_end function for GFS2. We just put our locking around the VFS
  * provided functions.
  *
- * Returns: errno
+ * Returns: copied bytes or errno
  */
 
 static int gfs2_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
new file mode 100644
index 000000000000..fa8e5d0144dd
--- /dev/null
+++ b/fs/gfs2/aops.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Red Hat, Inc.  All rights reserved.
+ */
+
+#ifndef __AOPS_DOT_H__
+#define __AOPS_DOT_H__
+
+#include "incore.h"
+
+extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page);
+extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
+				  loff_t pos, unsigned copied,
+				  struct page *page);
+extern void adjust_fs_space(struct inode *inode);
+extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
+				   unsigned int from, unsigned int len);
+
+#endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 33ee93344d18..9a699c0a5df1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -28,6 +28,7 @@
 #include "trans.h"
 #include "dir.h"
 #include "util.h"
+#include "aops.h"
 #include "trace_gfs2.h"
 
 /* This doesn't need to be that large as max 64 bit pointers in a 4k
@@ -41,6 +42,8 @@ struct metapath {
 	int mp_aheight; /* actual height (lookup height) */
 };
 
+static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
+
 /**
  * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  * @ip: the inode
@@ -389,7 +392,7 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 	return mp->mp_aheight - x - 1;
 }
 
-static inline void release_metapath(struct metapath *mp)
+static void release_metapath(struct metapath *mp)
 {
 	int i;
 
@@ -397,6 +400,7 @@ static inline void release_metapath(struct metapath *mp)
 		if (mp->mp_bh[i] == NULL)
 			break;
 		brelse(mp->mp_bh[i]);
+		mp->mp_bh[i] = NULL;
 	}
 }
 
@@ -609,11 +613,13 @@ enum alloc_state {
  *  ii) Indirect blocks to fill in lower part of the metadata tree
  * iii) Data blocks
  *
- * The function is in two parts. The first part works out the total
- * number of blocks which we need. The second part does the actual
- * allocation asking for an extent at a time (if enough contiguous free
- * blocks are available, there will only be one request per bmap call)
- * and uses the state machine to initialise the blocks in order.
+ * This function is called after gfs2_iomap_get, which works out the
+ * total number of blocks which we need via gfs2_alloc_size.
+ *
+ * We then do the actual allocation asking for an extent at a time (if
+ * enough contiguous free blocks are available, there will only be one
+ * allocation request per call) and uses the state machine to initialise
+ * the blocks in order.
  *
  * Right now, this function will allocate at most one indirect block
  * worth of data -- with a default block size of 4K, that's slightly
@@ -633,39 +639,26 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 	struct buffer_head *dibh = mp->mp_bh[0];
 	u64 bn;
 	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
-	unsigned dblks = 0;
-	unsigned ptrs_per_blk;
+	size_t dblks = iomap->length >> inode->i_blkbits;
 	const unsigned end_of_metadata = mp->mp_fheight - 1;
 	int ret;
 	enum alloc_state state;
 	__be64 *ptr;
 	__be64 zero_bn = 0;
-	size_t maxlen = iomap->length >> inode->i_blkbits;
 
 	BUG_ON(mp->mp_aheight < 1);
 	BUG_ON(dibh == NULL);
+	BUG_ON(dblks < 1);
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 
 	down_write(&ip->i_rw_mutex);
 
 	if (mp->mp_fheight == mp->mp_aheight) {
-		struct buffer_head *bh;
-		int eob;
-
-		/* Bottom indirect block exists, find unalloced extent size */
-		ptr = metapointer(end_of_metadata, mp);
-		bh = mp->mp_bh[end_of_metadata];
-		dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
-					   maxlen, &eob);
-		BUG_ON(dblks < 1);
+		/* Bottom indirect block exists */
 		state = ALLOC_DATA;
 	} else {
 		/* Need to allocate indirect blocks */
-		ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
-			sdp->sd_diptrs;
-		dblks = min(maxlen, (size_t)(ptrs_per_blk -
-					     mp->mp_list[end_of_metadata]));
 		if (mp->mp_fheight == ip->i_height) {
 			/* Writing into existing tree, extend tree down */
 			iblks = mp->mp_fheight - mp->mp_aheight;
@@ -762,6 +755,50 @@ out:
 
 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
 
+/**
+ * gfs2_alloc_size - Compute the maximum allocation size
+ * @inode: The inode
+ * @mp: The metapath
+ * @size: Requested size in blocks
+ *
+ * Compute the maximum size of the next allocation at @mp.
+ *
+ * Returns: size in blocks
+ */
+static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	const __be64 *first, *ptr, *end;
+
+	/*
+	 * For writes to stuffed files, this function is called twice via
+	 * gfs2_iomap_get, before and after unstuffing. The size we return the
+	 * first time needs to be large enough to get the reservation and
+	 * allocation sizes right.  The size we return the second time must
+	 * be exact or else gfs2_iomap_alloc won't do the right thing.
+	 */
+
+	if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
+		unsigned int maxsize = mp->mp_fheight > 1 ?
+			sdp->sd_inptrs : sdp->sd_diptrs;
+		maxsize -= mp->mp_list[mp->mp_fheight - 1];
+		if (size > maxsize)
+			size = maxsize;
+		return size;
+	}
+
+	first = metapointer(ip->i_height - 1, mp);
+	end = metaend(ip->i_height - 1, mp);
+	if (end - first > size)
+		end = first + size;
+	for (ptr = first; ptr < end; ptr++) {
+		if (*ptr)
+			break;
+	}
+	return ptr - first;
+}
+
 /**
  * gfs2_iomap_get - Map blocks from an inode to disk blocks
  * @inode: The inode
@@ -797,6 +834,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
 	if (ret)
 		goto unlock;
+	iomap->private = dibh;
 
 	if (gfs2_is_stuffed(ip)) {
 		if (flags & IOMAP_WRITE) {
@@ -822,6 +860,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 		iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
 			      sizeof(struct gfs2_dinode);
 		iomap->type = IOMAP_INLINE;
+		iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
 		goto out;
 	}
 
@@ -867,7 +906,7 @@ out:
 	iomap->bdev = inode->i_sb->s_bdev;
 unlock:
 	up_read(&ip->i_rw_mutex);
-	if (dibh)
+	if (ret && dibh)
 		brelse(dibh);
 	return ret;
 
@@ -881,13 +920,168 @@ do_alloc:
 			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 		else
 			iomap->length = size - pos;
-	} else if (!(flags & IOMAP_WRITE)) {
+	} else if (flags & IOMAP_WRITE) {
+		u64 alloc_size;
+
+		len = gfs2_alloc_size(inode, mp, len);
+		alloc_size = len << inode->i_blkbits;
+		if (alloc_size < iomap->length)
+			iomap->length = alloc_size;
+	} else {
 		if (pos < size && height == ip->i_height)
 			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 	}
 	goto out;
 }
 
+static int gfs2_write_lock(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	int error;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+	error = gfs2_glock_nq(&ip->i_gh);
+	if (error)
+		goto out_uninit;
+	if (&ip->i_inode == sdp->sd_rindex) {
+		struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+		error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
+					   GL_NOCACHE, &m_ip->i_gh);
+		if (error)
+			goto out_unlock;
+	}
+	return 0;
+
+out_unlock:
+	gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+	gfs2_holder_uninit(&ip->i_gh);
+	return error;
+}
+
+static void gfs2_write_unlock(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+	if (&ip->i_inode == sdp->sd_rindex) {
+		struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+		gfs2_glock_dq_uninit(&m_ip->i_gh);
+	}
+	gfs2_glock_dq_uninit(&ip->i_gh);
+}
+
+static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
+				unsigned copied, struct page *page,
+				struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
+}
+
+static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
+				  loff_t length, unsigned flags,
+				  struct iomap *iomap)
+{
+	struct metapath mp = { .mp_aheight = 1, };
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+	bool unstuff, alloc_required;
+	int ret;
+
+	ret = gfs2_write_lock(inode);
+	if (ret)
+		return ret;
+
+	unstuff = gfs2_is_stuffed(ip) &&
+		  pos + length > gfs2_max_stuffed_size(ip);
+
+	ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+	if (ret)
+		goto out_release;
+
+	alloc_required = unstuff || iomap->type == IOMAP_HOLE;
+
+	if (alloc_required || gfs2_is_jdata(ip))
+		gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
+				       &ind_blocks);
+
+	if (alloc_required) {
+		struct gfs2_alloc_parms ap = {
+			.target = data_blocks + ind_blocks
+		};
+
+		ret = gfs2_quota_lock_check(ip, &ap);
+		if (ret)
+			goto out_release;
+
+		ret = gfs2_inplace_reserve(ip, &ap);
+		if (ret)
+			goto out_qunlock;
+	}
+
+	rblocks = RES_DINODE + ind_blocks;
+	if (gfs2_is_jdata(ip))
+		rblocks += data_blocks;
+	if (ind_blocks || data_blocks)
+		rblocks += RES_STATFS + RES_QUOTA;
+	if (inode == sdp->sd_rindex)
+		rblocks += 2 * RES_STATFS;
+	if (alloc_required)
+		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
+
+	ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits);
+	if (ret)
+		goto out_trans_fail;
+
+	if (unstuff) {
+		ret = gfs2_unstuff_dinode(ip, NULL);
+		if (ret)
+			goto out_trans_end;
+		release_metapath(&mp);
+		brelse(iomap->private);
+		iomap->private = NULL;
+		ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
+				     flags, iomap, &mp);
+		if (ret)
+			goto out_trans_end;
+	}
+
+	if (iomap->type == IOMAP_HOLE) {
+		ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+		if (ret) {
+			gfs2_trans_end(sdp);
+			gfs2_inplace_release(ip);
+			punch_hole(ip, iomap->offset, iomap->length);
+			goto out_qunlock;
+		}
+	}
+	release_metapath(&mp);
+	if (gfs2_is_jdata(ip))
+		iomap->page_done = gfs2_iomap_journaled_page_done;
+	return 0;
+
+out_trans_end:
+	gfs2_trans_end(sdp);
+out_trans_fail:
+	if (alloc_required)
+		gfs2_inplace_release(ip);
+out_qunlock:
+	if (alloc_required)
+		gfs2_quota_unlock(ip);
+out_release:
+	if (iomap->private)
+		brelse(iomap->private);
+	release_metapath(&mp);
+	gfs2_write_unlock(inode);
+	return ret;
+}
+
 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 			    unsigned flags, struct iomap *iomap)
 {
@@ -897,10 +1091,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 
 	trace_gfs2_iomap_start(ip, pos, length, flags);
 	if (flags & IOMAP_WRITE) {
-		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
-		if (!ret && iomap->type == IOMAP_HOLE)
-			ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
-		release_metapath(&mp);
+		ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
 	} else {
 		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
 		release_metapath(&mp);
@@ -909,8 +1100,59 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	return ret;
 }
 
+static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+			  ssize_t written, unsigned flags, struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct gfs2_trans *tr = current->journal_info;
+	struct buffer_head *dibh = iomap->private;
+
+	if (!(flags & IOMAP_WRITE))
+		goto out;
+
+	if (iomap->type != IOMAP_INLINE) {
+		gfs2_ordered_add_inode(ip);
+
+		if (tr->tr_num_buf_new)
+			__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+		else
+			gfs2_trans_add_meta(ip->i_gl, dibh);
+	}
+
+	if (inode == sdp->sd_rindex) {
+		adjust_fs_space(inode);
+		sdp->sd_rindex_uptodate = 0;
+	}
+
+	gfs2_trans_end(sdp);
+	gfs2_inplace_release(ip);
+
+	if (length != written && (iomap->flags & IOMAP_F_NEW)) {
+		/* Deallocate blocks that were just allocated. */
+		loff_t blockmask = i_blocksize(inode) - 1;
+		loff_t end = (pos + length) & ~blockmask;
+
+		pos = (pos + written + blockmask) & ~blockmask;
+		if (pos < end) {
+			truncate_pagecache_range(inode, pos, end - 1);
+			punch_hole(ip, pos, end - pos);
+		}
+	}
+
+	if (ip->i_qadata && ip->i_qadata->qa_qd_num)
+		gfs2_quota_unlock(ip);
+	gfs2_write_unlock(inode);
+
+out:
+	if (dibh)
+		brelse(dibh);
+	return 0;
+}
+
 const struct iomap_ops gfs2_iomap_ops = {
 	.iomap_begin = gfs2_iomap_begin,
+	.iomap_end = gfs2_iomap_end,
 };
 
 /**
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6f6bbfbff13d..16dd395479a5 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -26,10 +26,12 @@
 #include <linux/dlm.h>
 #include <linux/dlm_plock.h>
 #include <linux/delay.h>
+#include <linux/backing-dev.h>
 
 #include "gfs2.h"
 #include "incore.h"
 #include "bmap.h"
+#include "aops.h"
 #include "dir.h"
 #include "glock.h"
 #include "glops.h"
@@ -691,9 +693,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 /**
  * gfs2_file_write_iter - Perform a write to a file
  * @iocb: The io context
- * @iov: The data to write
- * @nr_segs: Number of @iov segments
- * @pos: The file position
+ * @from: The data to write
  *
  * We have to do a lock/unlock here to refresh the inode size for
  * O_APPEND writes, otherwise we can land up writing at the wrong
@@ -705,8 +705,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	struct gfs2_inode *ip = GFS2_I(file_inode(file));
-	int ret;
+	struct inode *inode = file_inode(file);
+	struct gfs2_inode *ip = GFS2_I(inode);
+	ssize_t ret;
 
 	ret = gfs2_rsqa_alloc(ip);
 	if (ret)
@@ -723,7 +724,38 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		gfs2_glock_dq_uninit(&gh);
 	}
 
-	return generic_file_write_iter(iocb, from);
+	if (iocb->ki_flags & IOCB_DIRECT)
+		return generic_file_write_iter(iocb, from);
+
+	inode_lock(inode);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out;
+
+	/* We can write back this queue in page reclaim */
+	current->backing_dev_info = inode_to_bdi(inode);
+
+	ret = file_remove_privs(file);
+	if (ret)
+		goto out2;
+
+	ret = file_update_time(file);
+	if (ret)
+		goto out2;
+
+	ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+
+out2:
+	current->backing_dev_info = NULL;
+out:
+	inode_unlock(inode);
+	if (likely(ret > 0)) {
+		iocb->ki_pos += ret;
+
+		/* Handle various SYNC-type writes */
+		ret = generic_write_sync(iocb, ret);
+	}
+	return ret;
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
-- 
cgit v1.2.3


From bcfe94139a45fae128844558d6e27a0258860a90 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 11 May 2018 17:44:19 +0100
Subject: gfs2: gfs2_extent_length cleanup

Now that gfs2_extent_length is no longer used for determining the size
of a hole and always with an upper size limit, the function can be
simplified.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/bmap.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 9a699c0a5df1..8b5876e19ecf 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -406,22 +406,17 @@ static void release_metapath(struct metapath *mp)
 
 /**
  * gfs2_extent_length - Returns length of an extent of blocks
- * @start: Start of the buffer
- * @len: Length of the buffer in bytes
- * @ptr: Current position in the buffer
- * @limit: Max extent length to return (0 = unlimited)
+ * @bh: The metadata block
+ * @ptr: Current position in @bh
+ * @limit: Max extent length to return
  * @eob: Set to 1 if we hit "end of block"
  *
- * If the first block is zero (unallocated) it will return the number of
- * unallocated blocks in the extent, otherwise it will return the number
- * of contiguous blocks in the extent.
- *
  * Returns: The length of the extent (minimum of one block)
  */
 
-static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
+static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
 {
-	const __be64 *end = (start + len);
+	const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
 	const __be64 *first = ptr;
 	u64 d = be64_to_cpu(*ptr);
 
@@ -430,14 +425,11 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b
 		ptr++;
 		if (ptr >= end)
 			break;
-		if (limit && --limit == 0)
-			break;
-		if (d)
-			d++;
+		d++;
 	} while(be64_to_cpu(*ptr) == d);
 	if (ptr >= end)
 		*eob = 1;
-	return (ptr - first);
+	return ptr - first;
 }
 
 typedef const __be64 *(*gfs2_metadata_walker)(
@@ -893,7 +885,7 @@ unstuff:
 		goto do_alloc;
 
 	bh = mp->mp_bh[ip->i_height - 1];
-	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
+	len = gfs2_extent_length(bh, ptr, len, &eob);
 
 	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 	iomap->length = len << inode->i_blkbits;
-- 
cgit v1.2.3


From 967bcc91b044936e85dbb5848952dc1335a846f4 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:08:02 +0100
Subject: gfs2: iomap direct I/O support

The page unmapping previously done in gfs2_direct_IO is now done
generically in iomap_dio_rw.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/aops.c | 100 +------------------------------------------
 fs/gfs2/bmap.c |  14 +++++-
 fs/gfs2/file.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 136 insertions(+), 110 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ecfbca9c88ff..1054cc4a96db 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -84,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
 	return 0;
 }
 
-static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
-				 struct buffer_head *bh_result, int create)
-{
-	return gfs2_block_map(inode, lblock, bh_result, 0);
-}
-
 /**
  * gfs2_writepage_common - Common bits of writepage
  * @page: The page to be written
@@ -1024,96 +1018,6 @@ out:
 		try_to_release_page(page, 0);
 }
 
-/**
- * gfs2_ok_for_dio - check that dio is valid on this file
- * @ip: The inode
- * @offset: The offset at which we are reading or writing
- *
- * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
- *          1 (to accept the i/o request)
- */
-static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
-{
-	/*
-	 * Should we return an error here? I can't see that O_DIRECT for
-	 * a stuffed file makes any sense. For now we'll silently fall
-	 * back to buffered I/O
-	 */
-	if (gfs2_is_stuffed(ip))
-		return 0;
-
-	if (offset >= i_size_read(&ip->i_inode))
-		return 0;
-	return 1;
-}
-
-
-
-static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
-	struct address_space *mapping = inode->i_mapping;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	loff_t offset = iocb->ki_pos;
-	struct gfs2_holder gh;
-	int rv;
-
-	/*
-	 * Deferred lock, even if its a write, since we do no allocation
-	 * on this path. All we need change is atime, and this lock mode
-	 * ensures that other nodes have flushed their buffered read caches
-	 * (i.e. their page cache entries for this inode). We do not,
-	 * unfortunately have the option of only flushing a range like
-	 * the VFS does.
-	 */
-	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
-	rv = gfs2_glock_nq(&gh);
-	if (rv)
-		goto out_uninit;
-	rv = gfs2_ok_for_dio(ip, offset);
-	if (rv != 1)
-		goto out; /* dio not valid, fall back to buffered i/o */
-
-	/*
-	 * Now since we are holding a deferred (CW) lock at this point, you
-	 * might be wondering why this is ever needed. There is a case however
-	 * where we've granted a deferred local lock against a cached exclusive
-	 * glock. That is ok provided all granted local locks are deferred, but
-	 * it also means that it is possible to encounter pages which are
-	 * cached and possibly also mapped. So here we check for that and sort
-	 * them out ahead of the dio. The glock state machine will take care of
-	 * everything else.
-	 *
-	 * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
-	 * the first place, mapping->nr_pages will always be zero.
-	 */
-	if (mapping->nrpages) {
-		loff_t lstart = offset & ~(PAGE_SIZE - 1);
-		loff_t len = iov_iter_count(iter);
-		loff_t end = PAGE_ALIGN(offset + len) - 1;
-
-		rv = 0;
-		if (len == 0)
-			goto out;
-		if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
-			unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
-		rv = filemap_write_and_wait_range(mapping, lstart, end);
-		if (rv)
-			goto out;
-		if (iov_iter_rw(iter) == WRITE)
-			truncate_inode_pages_range(mapping, lstart, end);
-	}
-
-	rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
-				  gfs2_get_block_direct, NULL, NULL, 0);
-out:
-	gfs2_glock_dq(&gh);
-out_uninit:
-	gfs2_holder_uninit(&gh);
-	return rv;
-}
-
 /**
  * gfs2_releasepage - free the metadata associated with a page
  * @page: the page that's being released
@@ -1194,7 +1098,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
-	.direct_IO = gfs2_direct_IO,
+	.direct_IO = noop_direct_IO,
 	.migratepage = buffer_migrate_page,
 	.is_partially_uptodate = block_is_partially_uptodate,
 	.error_remove_page = generic_error_remove_page,
@@ -1211,7 +1115,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
-	.direct_IO = gfs2_direct_IO,
+	.direct_IO = noop_direct_IO,
 	.migratepage = buffer_migrate_page,
 	.is_partially_uptodate = block_is_partially_uptodate,
 	.error_remove_page = generic_error_remove_page,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8b5876e19ecf..29391090d5b7 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -915,6 +915,9 @@ do_alloc:
 	} else if (flags & IOMAP_WRITE) {
 		u64 alloc_size;
 
+		if (flags & IOMAP_DIRECT)
+			goto out;  /* (see gfs2_file_direct_write) */
+
 		len = gfs2_alloc_size(inode, mp, len);
 		alloc_size = len << inode->i_blkbits;
 		if (alloc_size < iomap->length)
@@ -1082,11 +1085,18 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	int ret;
 
 	trace_gfs2_iomap_start(ip, pos, length, flags);
-	if (flags & IOMAP_WRITE) {
+	if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
 		ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
 	} else {
 		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
 		release_metapath(&mp);
+		/*
+		 * Silently fall back to buffered I/O for stuffed files or if
+		 * we've hot a hole (see gfs2_file_direct_write).
+		 */
+		if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
+		    iomap->type != IOMAP_MAPPED)
+			ret = -ENOTBLK;
 	}
 	trace_gfs2_iomap_end(ip, iomap, ret);
 	return ret;
@@ -1100,7 +1110,7 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 	struct gfs2_trans *tr = current->journal_info;
 	struct buffer_head *dibh = iomap->private;
 
-	if (!(flags & IOMAP_WRITE))
+	if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
 		goto out;
 
 	if (iomap->type != IOMAP_INLINE) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 16dd395479a5..89280515169e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -690,6 +690,85 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 	return ret ? ret : ret1;
 }
 
+static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+	size_t count = iov_iter_count(to);
+	struct gfs2_holder gh;
+	ssize_t ret;
+
+	if (!count)
+		return 0; /* skip atime */
+
+	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret)
+		goto out_uninit;
+
+	/* fall back to buffered I/O for stuffed files */
+	ret = -ENOTBLK;
+	if (gfs2_is_stuffed(ip))
+		goto out;
+
+	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);
+
+out:
+	gfs2_glock_dq(&gh);
+out_uninit:
+	gfs2_holder_uninit(&gh);
+	return ret;
+}
+
+static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	size_t len = iov_iter_count(from);
+	loff_t offset = iocb->ki_pos;
+	struct gfs2_holder gh;
+	ssize_t ret;
+
+	/*
+	 * Deferred lock, even if its a write, since we do no allocation on
+	 * this path. All we need to change is the atime, and this lock mode
+	 * ensures that other nodes have flushed their buffered read caches
+	 * (i.e. their page cache entries for this inode). We do not,
+	 * unfortunately, have the option of only flushing a range like the
+	 * VFS does.
+	 */
+	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret)
+		goto out_uninit;
+
+	/* Silently fall back to buffered I/O when writing beyond EOF */
+	if (offset + len > i_size_read(&ip->i_inode))
+		goto out;
+
+	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL);
+
+out:
+	gfs2_glock_dq(&gh);
+out_uninit:
+	gfs2_holder_uninit(&gh);
+	return ret;
+}
+
+static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	ssize_t ret;
+
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		ret = gfs2_file_direct_read(iocb, to);
+		if (likely(ret != -ENOTBLK))
+			return ret;
+		iocb->ki_flags &= ~IOCB_DIRECT;
+	}
+	return generic_file_read_iter(iocb, to);
+}
+
 /**
  * gfs2_file_write_iter - Perform a write to a file
  * @iocb: The io context
@@ -707,7 +786,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct gfs2_inode *ip = GFS2_I(inode);
-	ssize_t ret;
+	ssize_t written = 0, ret;
 
 	ret = gfs2_rsqa_alloc(ip);
 	if (ret)
@@ -724,9 +803,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		gfs2_glock_dq_uninit(&gh);
 	}
 
-	if (iocb->ki_flags & IOCB_DIRECT)
-		return generic_file_write_iter(iocb, from);
-
 	inode_lock(inode);
 	ret = generic_write_checks(iocb, from);
 	if (ret <= 0)
@@ -743,19 +819,55 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (ret)
 		goto out2;
 
-	ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+	if (iocb->ki_flags & IOCB_DIRECT) {
+		struct address_space *mapping = file->f_mapping;
+		loff_t pos, endbyte;
+		ssize_t buffered;
+
+		written = gfs2_file_direct_write(iocb, from);
+		if (written < 0 || !iov_iter_count(from))
+			goto out2;
+
+		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+		if (unlikely(ret < 0))
+			goto out2;
+		buffered = ret;
+
+		/*
+		 * We need to ensure that the page cache pages are written to
+		 * disk and invalidated to preserve the expected O_DIRECT
+		 * semantics.
+		 */
+		pos = iocb->ki_pos;
+		endbyte = pos + buffered - 1;
+		ret = filemap_write_and_wait_range(mapping, pos, endbyte);
+		if (!ret) {
+			iocb->ki_pos += buffered;
+			written += buffered;
+			invalidate_mapping_pages(mapping,
+						 pos >> PAGE_SHIFT,
+						 endbyte >> PAGE_SHIFT);
+		} else {
+			/*
+			 * We don't know how much we wrote, so just return
+			 * the number of bytes which were direct-written
+			 */
+		}
+	} else {
+		ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+		if (likely(ret > 0))
+			iocb->ki_pos += ret;
+	}
 
 out2:
 	current->backing_dev_info = NULL;
 out:
 	inode_unlock(inode);
 	if (likely(ret > 0)) {
-		iocb->ki_pos += ret;
-
 		/* Handle various SYNC-type writes */
 		ret = generic_write_sync(iocb, ret);
 	}
-	return ret;
+	return written ? written : ret;
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
@@ -1157,7 +1269,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 
 const struct file_operations gfs2_file_fops = {
 	.llseek		= gfs2_llseek,
-	.read_iter	= generic_file_read_iter,
+	.read_iter	= gfs2_file_read_iter,
 	.write_iter	= gfs2_file_write_iter,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.mmap		= gfs2_mmap,
@@ -1187,7 +1299,7 @@ const struct file_operations gfs2_dir_fops = {
 
 const struct file_operations gfs2_file_fops_nolock = {
 	.llseek		= gfs2_llseek,
-	.read_iter	= generic_file_read_iter,
+	.read_iter	= gfs2_file_read_iter,
 	.write_iter	= gfs2_file_write_iter,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.mmap		= gfs2_mmap,
-- 
cgit v1.2.3


From 025d0e7f73c6a9cc3ca2fe7de821792a8f3269bf Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 19 Mar 2018 23:10:52 +0000
Subject: gfs2: Remove gfs2_write_{begin,end}

Now that generic_file_write_iter is no longer used, there are no
remaining users of these address space operations.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/aops.c | 213 ---------------------------------------------------------
 1 file changed, 213 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1054cc4a96db..cc80fd71f3dd 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -639,135 +639,6 @@ out_uninit:
 	return ret;
 }
 
-/**
- * gfs2_write_begin - Begin to write to a file
- * @file: The file to write to
- * @mapping: The mapping in which to write
- * @pos: The file offset at which to start writing
- * @len: Length of the write
- * @flags: Various flags
- * @pagep: Pointer to return the page
- * @fsdata: Pointer to return fs data (unused by GFS2)
- *
- * Returns: errno
- */
-
-static int gfs2_write_begin(struct file *file, struct address_space *mapping,
-			    loff_t pos, unsigned len, unsigned flags,
-			    struct page **pagep, void **fsdata)
-{
-	struct gfs2_inode *ip = GFS2_I(mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
-	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
-	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	unsigned requested = 0;
-	int alloc_required;
-	int error = 0;
-	pgoff_t index = pos >> PAGE_SHIFT;
-	unsigned from = pos & (PAGE_SIZE - 1);
-	struct page *page;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
-	error = gfs2_glock_nq(&ip->i_gh);
-	if (unlikely(error))
-		goto out_uninit;
-	if (&ip->i_inode == sdp->sd_rindex) {
-		error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
-					   GL_NOCACHE, &m_ip->i_gh);
-		if (unlikely(error)) {
-			gfs2_glock_dq(&ip->i_gh);
-			goto out_uninit;
-		}
-	}
-
-	alloc_required = gfs2_write_alloc_required(ip, pos, len);
-
-	if (alloc_required || gfs2_is_jdata(ip))
-		gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
-
-	if (alloc_required) {
-		struct gfs2_alloc_parms ap = { .aflags = 0, };
-		requested = data_blocks + ind_blocks;
-		ap.target = requested;
-		error = gfs2_quota_lock_check(ip, &ap);
-		if (error)
-			goto out_unlock;
-
-		error = gfs2_inplace_reserve(ip, &ap);
-		if (error)
-			goto out_qunlock;
-	}
-
-	rblocks = RES_DINODE + ind_blocks;
-	if (gfs2_is_jdata(ip))
-		rblocks += data_blocks ? data_blocks : 1;
-	if (ind_blocks || data_blocks)
-		rblocks += RES_STATFS + RES_QUOTA;
-	if (&ip->i_inode == sdp->sd_rindex)
-		rblocks += 2 * RES_STATFS;
-	if (alloc_required)
-		rblocks += gfs2_rg_blocks(ip, requested);
-
-	error = gfs2_trans_begin(sdp, rblocks,
-				 PAGE_SIZE/sdp->sd_sb.sb_bsize);
-	if (error)
-		goto out_trans_fail;
-
-	error = -ENOMEM;
-	flags |= AOP_FLAG_NOFS;
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	*pagep = page;
-	if (unlikely(!page))
-		goto out_endtrans;
-
-	if (gfs2_is_stuffed(ip)) {
-		error = 0;
-		if (pos + len > gfs2_max_stuffed_size(ip)) {
-			error = gfs2_unstuff_dinode(ip, page);
-			if (error == 0)
-				goto prepare_write;
-		} else if (!PageUptodate(page)) {
-			error = stuffed_readpage(ip, page);
-		}
-		goto out;
-	}
-
-prepare_write:
-	error = __block_write_begin(page, from, len, gfs2_block_map);
-out:
-	if (error == 0)
-		return 0;
-
-	unlock_page(page);
-	put_page(page);
-
-	gfs2_trans_end(sdp);
-	if (alloc_required) {
-		gfs2_inplace_release(ip);
-		if (pos + len > ip->i_inode.i_size)
-			gfs2_trim_blocks(&ip->i_inode);
-	}
-	goto out_qunlock;
-
-out_endtrans:
-	gfs2_trans_end(sdp);
-out_trans_fail:
-	if (alloc_required)
-		gfs2_inplace_release(ip);
-out_qunlock:
-	if (alloc_required)
-		gfs2_quota_unlock(ip);
-out_unlock:
-	if (&ip->i_inode == sdp->sd_rindex) {
-		gfs2_glock_dq(&m_ip->i_gh);
-		gfs2_holder_uninit(&m_ip->i_gh);
-	}
-	gfs2_glock_dq(&ip->i_gh);
-out_uninit:
-	gfs2_holder_uninit(&ip->i_gh);
-	return error;
-}
-
 /**
  * adjust_fs_space - Adjusts the free space available due to gfs2_grow
  * @inode: the rindex inode
@@ -848,84 +719,6 @@ int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	return copied;
 }
 
-/**
- * gfs2_write_end
- * @file: The file to write to
- * @mapping: The address space to write to
- * @pos: The file position
- * @len: The length of the data
- * @copied: How much was actually copied by the VFS
- * @page: The page that has been written
- * @fsdata: The fsdata (unused in GFS2)
- *
- * The main write_end function for GFS2. We just put our locking around the VFS
- * provided functions.
- *
- * Returns: copied bytes or errno
- */
-
-static int gfs2_write_end(struct file *file, struct address_space *mapping,
-			  loff_t pos, unsigned len, unsigned copied,
-			  struct page *page, void *fsdata)
-{
-	struct inode *inode = page->mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
-	struct buffer_head *dibh;
-	int ret;
-	struct gfs2_trans *tr = current->journal_info;
-	BUG_ON(!tr);
-
-	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
-
-	ret = gfs2_meta_inode_buffer(ip, &dibh);
-	if (unlikely(ret))
-		goto out;
-
-	if (gfs2_is_stuffed(ip)) {
-		ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page);
-		page = NULL;
-		goto out2;
-	}
-
-	if (gfs2_is_jdata(ip))
-		gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
-	else
-		gfs2_ordered_add_inode(ip);
-
-	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-	page = NULL;
-	if (tr->tr_num_buf_new)
-		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	else
-		gfs2_trans_add_meta(ip->i_gl, dibh);
-
-out2:
-	if (inode == sdp->sd_rindex) {
-		adjust_fs_space(inode);
-		sdp->sd_rindex_uptodate = 0;
-	}
-
-	brelse(dibh);
-out:
-	if (page) {
-		unlock_page(page);
-		put_page(page);
-	}
-	gfs2_trans_end(sdp);
-	gfs2_inplace_release(ip);
-	if (ip->i_qadata && ip->i_qadata->qa_qd_num)
-		gfs2_quota_unlock(ip);
-	if (inode == sdp->sd_rindex) {
-		gfs2_glock_dq(&m_ip->i_gh);
-		gfs2_holder_uninit(&m_ip->i_gh);
-	}
-	gfs2_glock_dq(&ip->i_gh);
-	gfs2_holder_uninit(&ip->i_gh);
-	return ret;
-}
-
 /**
  * jdata_set_page_dirty - Page dirtying function
  * @page: The page to dirty
@@ -1093,8 +886,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
 	.writepages = gfs2_writepages,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
-	.write_begin = gfs2_write_begin,
-	.write_end = gfs2_write_end,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
@@ -1109,8 +900,6 @@ static const struct address_space_operations gfs2_ordered_aops = {
 	.writepages = gfs2_writepages,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
-	.write_begin = gfs2_write_begin,
-	.write_end = gfs2_write_end,
 	.set_page_dirty = __set_page_dirty_buffers,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
@@ -1126,8 +915,6 @@ static const struct address_space_operations gfs2_jdata_aops = {
 	.writepages = gfs2_jdata_writepages,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
-	.write_begin = gfs2_write_begin,
-	.write_end = gfs2_write_end,
 	.set_page_dirty = jdata_set_page_dirty,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
-- 
cgit v1.2.3


From 09230435dffdb13de507e5e40b524b0069fc5c7b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Jul 2018 09:07:46 -0700
Subject: iomap: refactor iomap_dio_actor

Split the function up into two helpers for the bio based I/O and hole
case, and a small helper to call the two.  This separates the code a
little better in preparation for supporting I/O to inline data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 88 +++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 2ebff76039b5..4d8ff0f5ecc9 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1327,10 +1327,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
 }
 
 static loff_t
-iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
+		struct iomap_dio *dio, struct iomap *iomap)
 {
-	struct iomap_dio *dio = data;
 	unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
 	unsigned int fs_block_size = i_blocksize(inode), pad;
 	unsigned int align = iov_iter_alignment(dio->submit.iter);
@@ -1344,41 +1343,27 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 	if ((pos | length | align) & ((1 << blkbits) - 1))
 		return -EINVAL;
 
-	switch (iomap->type) {
-	case IOMAP_HOLE:
-		if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
-			return -EIO;
-		/*FALLTHRU*/
-	case IOMAP_UNWRITTEN:
-		if (!(dio->flags & IOMAP_DIO_WRITE)) {
-			length = iov_iter_zero(length, dio->submit.iter);
-			dio->size += length;
-			return length;
-		}
+	if (iomap->type == IOMAP_UNWRITTEN) {
 		dio->flags |= IOMAP_DIO_UNWRITTEN;
 		need_zeroout = true;
-		break;
-	case IOMAP_MAPPED:
-		if (iomap->flags & IOMAP_F_SHARED)
-			dio->flags |= IOMAP_DIO_COW;
-		if (iomap->flags & IOMAP_F_NEW) {
-			need_zeroout = true;
-		} else {
-			/*
-			 * Use a FUA write if we need datasync semantics, this
-			 * is a pure data IO that doesn't require any metadata
-			 * updates and the underlying device supports FUA. This
-			 * allows us to avoid cache flushes on IO completion.
-			 */
-			if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
-			    (dio->flags & IOMAP_DIO_WRITE_FUA) &&
-			    blk_queue_fua(bdev_get_queue(iomap->bdev)))
-				use_fua = true;
-		}
-		break;
-	default:
-		WARN_ON_ONCE(1);
-		return -EIO;
+	}
+
+	if (iomap->flags & IOMAP_F_SHARED)
+		dio->flags |= IOMAP_DIO_COW;
+
+	if (iomap->flags & IOMAP_F_NEW) {
+		need_zeroout = true;
+	} else {
+		/*
+		 * Use a FUA write if we need datasync semantics, this
+		 * is a pure data IO that doesn't require any metadata
+		 * updates and the underlying device supports FUA. This
+		 * allows us to avoid cache flushes on IO completion.
+		 */
+		if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
+		    (dio->flags & IOMAP_DIO_WRITE_FUA) &&
+		    blk_queue_fua(bdev_get_queue(iomap->bdev)))
+			use_fua = true;
 	}
 
 	/*
@@ -1457,6 +1442,37 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 	return copied;
 }
 
+static loff_t
+iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio)
+{
+	length = iov_iter_zero(length, dio->submit.iter);
+	dio->size += length;
+	return length;
+}
+
+static loff_t
+iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	struct iomap_dio *dio = data;
+
+	switch (iomap->type) {
+	case IOMAP_HOLE:
+		if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
+			return -EIO;
+		return iomap_dio_hole_actor(length, dio);
+	case IOMAP_UNWRITTEN:
+		if (!(dio->flags & IOMAP_DIO_WRITE))
+			return iomap_dio_hole_actor(length, dio);
+		return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+	case IOMAP_MAPPED:
+		return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+	default:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	}
+}
+
 /*
  * iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
  * is being issued as AIO or not.  This allows us to optimise pure data writes
-- 
cgit v1.2.3


From ec181f6782d8e8c22aa2c3462a20195cfa214ec6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 3 Jul 2018 09:07:47 -0700
Subject: iomap: support direct I/O to inline data

Add support for reading from and writing to inline data to iomap_dio_rw.
This saves filesystems from having to implement fallback code for this
case.

The inline data is actually cached in the inode, so the I/O is only
direct in the sense that it doesn't go through the page cache.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4d8ff0f5ecc9..98a1fdd5c091 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1450,6 +1450,33 @@ iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio)
 	return length;
 }
 
+static loff_t
+iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
+		struct iomap_dio *dio, struct iomap *iomap)
+{
+	struct iov_iter *iter = dio->submit.iter;
+	size_t copied;
+
+	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+	if (dio->flags & IOMAP_DIO_WRITE) {
+		loff_t size = inode->i_size;
+
+		if (pos > size)
+			memset(iomap->inline_data + size, 0, pos - size);
+		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+		if (copied) {
+			if (pos + copied > size)
+				i_size_write(inode, pos + copied);
+			mark_inode_dirty(inode);
+		}
+	} else {
+		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+	}
+	dio->size += copied;
+	return copied;
+}
+
 static loff_t
 iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 		void *data, struct iomap *iomap)
@@ -1467,6 +1494,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 		return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
 	case IOMAP_MAPPED:
 		return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+	case IOMAP_INLINE:
+		return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
 	default:
 		WARN_ON_ONCE(1);
 		return -EIO;
-- 
cgit v1.2.3


From 806a1477b10a153cd01ee7ccba8ca2492df3e0b2 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 3 Jul 2018 09:07:47 -0700
Subject: iomap: add inline data support to iomap_readpage_actor

Just copy the inline data into the page using the existing helper.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 98a1fdd5c091..13cdcf33e6c0 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -155,6 +155,12 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	bool is_contig = false;
 	sector_t sector;
 
+	if (iomap->type == IOMAP_INLINE) {
+		WARN_ON_ONCE(poff);
+		iomap_read_inline_data(inode, page, iomap);
+		return PAGE_SIZE;
+	}
+
 	/* we don't support blocksize < PAGE_SIZE quite yet. */
 	WARN_ON_ONCE(pos != page_offset(page));
 	WARN_ON_ONCE(plen != PAGE_SIZE);
-- 
cgit v1.2.3


From 3fae17468a96f3a397dff633be82a42d89fb1b91 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 3 Jul 2018 08:08:34 -0700
Subject: fs: Fix attr.c kernel-doc

A couple of minor warnings.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index e3d53bf12240..d22e8187477f 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -120,7 +120,6 @@ EXPORT_SYMBOL(setattr_prepare);
  * inode_newsize_ok - may this inode be truncated to a given size
  * @inode:	the inode to be truncated
  * @offset:	the new size to assign to the inode
- * @Returns:	0 on success, -ve errno on failure
  *
  * inode_newsize_ok must be called with i_mutex held.
  *
@@ -130,6 +129,8 @@ EXPORT_SYMBOL(setattr_prepare);
  * returned. @inode must be a file (not directory), with appropriate
  * permissions to allow truncate (inode_newsize_ok does NOT check these
  * conditions).
+ *
+ * Return: 0 on success, -ve errno on failure
  */
 int inode_newsize_ok(const struct inode *inode, loff_t offset)
 {
@@ -205,7 +206,7 @@ EXPORT_SYMBOL(setattr_copy);
 /**
  * notify_change - modify attributes of a filesytem object
  * @dentry:	object affected
- * @iattr:	new attributes
+ * @attr:	new attributes
  * @delegated_inode: returns inode, if the inode is delegated
  *
  * The caller must hold the i_mutex on the affected object.
-- 
cgit v1.2.3


From 03f8c41c73da849ec2b73aa678ce6380e8318920 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 21 Jun 2018 07:22:12 -0500
Subject: gfs2: Stop messing with ip->i_rgd in the rlist code

In the resource group list code, keep the last resource group added in
the last position in the array.  Check against that instead of messing
with ip->i_rgd.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/rgrp.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7a001f6e8aee..ecdc4cb5b6ad 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2559,19 +2559,35 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
 	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
 		return;
 
-	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
-		rgd = ip->i_rgd;
-	else
+	/*
+	 * The resource group last accessed is kept in the last position.
+	 */
+
+	if (rlist->rl_rgrps) {
+		rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
+		if (rgrp_contains_block(rgd, block))
+			return;
 		rgd = gfs2_blk2rgrpd(sdp, block, 1);
+	} else {
+		rgd = ip->i_rgd;
+		if (!rgd || !rgrp_contains_block(rgd, block))
+			rgd = gfs2_blk2rgrpd(sdp, block, 1);
+	}
+
 	if (!rgd) {
-		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
+		fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
+		       (unsigned long long)block);
 		return;
 	}
 	ip->i_rgd = rgd;
 
-	for (x = 0; x < rlist->rl_rgrps; x++)
-		if (rlist->rl_rgd[x] == rgd)
+	for (x = 0; x < rlist->rl_rgrps; x++) {
+		if (rlist->rl_rgd[x] == rgd) {
+			swap(rlist->rl_rgd[x],
+			     rlist->rl_rgd[rlist->rl_rgrps - 1]);
 			return;
+		}
+	}
 
 	if (rlist->rl_rgrps == rlist->rl_space) {
 		new_space = rlist->rl_space + 10;
-- 
cgit v1.2.3


From b7eba890a228f591fea2889b901267ba5de7839b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 21 Jun 2018 07:42:37 -0500
Subject: gfs2: Eliminate redundant ip->i_rgd

GFS2 remembers the last rgrp used for allocations in ip->i_rgd.
However, block allocations are made by way of a reservations structure,
ip->i_res, which keeps the last rgrp in ip->i_res.rs_rgd, and ip->i_res
is kept in sync with ip->i_res.rs_rgd, so it's redundant.  Get rid of
ip->i_rgd and just use ip->i_res.rs_rgd in its place.

Based on patches by Robert Peterson.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/incore.h |  1 -
 fs/gfs2/rgrp.c   | 13 ++++++-------
 fs/gfs2/super.c  |  1 -
 fs/gfs2/trans.h  |  6 ++++--
 4 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e9cd2cc292d3..b50908211b69 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -397,7 +397,6 @@ struct gfs2_inode {
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
 	struct gfs2_qadata *i_qadata; /* quota allocation data */
 	struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */
-	struct gfs2_rgrpd *i_rgd;
 	u64 i_goal;	/* goal block for allocations */
 	struct rw_semaphore i_rw_mutex;
 	struct list_head i_ordered;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ecdc4cb5b6ad..60c86532782e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1992,8 +1992,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		return -EINVAL;
 	if (gfs2_rs_active(rs)) {
 		begin = rs->rs_rbm.rgd;
-	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
-		rs->rs_rbm.rgd = begin = ip->i_rgd;
+	} else if (rs->rs_rbm.rgd &&
+		   rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
+		begin = rs->rs_rbm.rgd;
 	} else {
 		check_and_update_goal(ip);
 		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
@@ -2057,8 +2058,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
 		    (loops == 2 && ap->min_target &&
 		     rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
-			ip->i_rgd = rs->rs_rbm.rgd;
-			ap->allowed = ip->i_rgd->rd_free_clone;
+			ap->allowed = rs->rs_rbm.rgd->rd_free_clone;
 			return 0;
 		}
 check_rgrp:
@@ -2336,7 +2336,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
-	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
+	struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
 	unsigned int ndata;
 	u64 block; /* block, within the file system scope */
 	int error;
@@ -2569,7 +2569,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
 			return;
 		rgd = gfs2_blk2rgrpd(sdp, block, 1);
 	} else {
-		rgd = ip->i_rgd;
+		rgd = ip->i_res.rs_rbm.rgd;
 		if (!rgd || !rgrp_contains_block(rgd, block))
 			rgd = gfs2_blk2rgrpd(sdp, block, 1);
 	}
@@ -2579,7 +2579,6 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
 		       (unsigned long long)block);
 		return;
 	}
-	ip->i_rgd = rgd;
 
 	for (x = 0; x < rlist->rl_rgrps; x++) {
 		if (rlist->rl_rgd[x] == rgd) {
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index cf5c7f3080d2..685dc6fff5ed 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1729,7 +1729,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 	if (ip) {
 		ip->i_flags = 0;
 		ip->i_gl = NULL;
-		ip->i_rgd = NULL;
 		memset(&ip->i_res, 0, sizeof(ip->i_res));
 		RB_CLEAR_NODE(&ip->i_res.rs_node);
 		ip->i_rahead = 0;
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 1e6e7da25a17..ad70087d0597 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -30,9 +30,11 @@ struct gfs2_glock;
  * block, or all of the blocks in the rg, whichever is smaller */
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
 {
-	if (requested < ip->i_rgd->rd_length)
+	struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd;
+
+	if (requested < rgd->rd_length)
 		return requested + 1;
-	return ip->i_rgd->rd_length;
+	return rgd->rd_length;
 }
 
 extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
-- 
cgit v1.2.3


From e8cff84faa4ddb6716caed085f515fbb1d856099 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 11:24:21 -0400
Subject: fold security_file_free() into file_free()

.. and the call of file_free() in case of security_file_alloc() failure
in get_empty_filp() should be simply file_free_rcu() - no point in
rcu-delays there, anyway.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 7ec0b3e5f05d..eee7cf629e52 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -51,6 +51,7 @@ static void file_free_rcu(struct rcu_head *head)
 
 static inline void file_free(struct file *f)
 {
+	security_file_free(f);
 	percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
@@ -123,11 +124,10 @@ struct file *get_empty_filp(void)
 	if (unlikely(!f))
 		return ERR_PTR(-ENOMEM);
 
-	percpu_counter_inc(&nr_files);
 	f->f_cred = get_cred(cred);
 	error = security_file_alloc(f);
 	if (unlikely(error)) {
-		file_free(f);
+		file_free_rcu(&f->f_u.fu_rcuhead);
 		return ERR_PTR(error);
 	}
 
@@ -137,6 +137,7 @@ struct file *get_empty_filp(void)
 	mutex_init(&f->f_pos_lock);
 	eventpoll_init_file(f);
 	/* f->f_version: 0 */
+	percpu_counter_inc(&nr_files);
 	return f;
 
 over:
@@ -207,7 +208,6 @@ static void __fput(struct file *file)
 	}
 	if (file->f_op->release)
 		file->f_op->release(inode, file);
-	security_file_free(file);
 	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
 		     !(file->f_mode & FMODE_PATH))) {
 		cdev_put(inode->i_cdev);
@@ -302,10 +302,8 @@ EXPORT_SYMBOL(fput);
 
 void put_filp(struct file *file)
 {
-	if (atomic_long_dec_and_test(&file->f_count)) {
-		security_file_free(file);
+	if (atomic_long_dec_and_test(&file->f_count))
 		file_free(file);
-	}
 }
 
 void __init files_init(void)
-- 
cgit v1.2.3


From 19f391eb05b8b005f2907ddc8f284487b446abf3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:19:32 -0400
Subject: turn filp_clone_open() into inline wrapper for dentry_open()

it's exactly the same thing as
	dentry_open(&file->f_path, file->f_flags, file->f_cred)

... and rename it to file_clone_open(), while we are at it.
'filp' naming convention is bogus; sure, it's "file pointer",
but we generally don't do that kind of Hungarian notation.
Some of the instances have too many callers to touch, but this
one has only two, so let's sanitize it while we can...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/gpu/drm/drm_lease.c |  2 +-
 fs/binfmt_misc.c            |  2 +-
 fs/open.c                   | 20 --------------------
 include/linux/fs.h          |  5 ++++-
 4 files changed, 6 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index d638c0fb3418..b54fb78a283c 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -553,7 +553,7 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 
 	/* Clone the lessor file to create a new file for us */
 	DRM_DEBUG_LEASE("Allocating lease file\n");
-	lessee_file = filp_clone_open(lessor_file);
+	lessee_file = file_clone_open(lessor_file);
 	if (IS_ERR(lessee_file)) {
 		ret = PTR_ERR(lessee_file);
 		goto out_lessee;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 4b5fff31ef27..aa4a7a23ff99 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		goto error;
 
 	if (fmt->flags & MISC_FMT_OPEN_FILE) {
-		interp_file = filp_clone_open(fmt->interp_file);
+		interp_file = file_clone_open(fmt->interp_file);
 		if (!IS_ERR(interp_file))
 			deny_write_access(interp_file);
 	} else {
diff --git a/fs/open.c b/fs/open.c
index d0e955b558ad..76c56966e297 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1063,26 +1063,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(file_open_root);
 
-struct file *filp_clone_open(struct file *oldfile)
-{
-	struct file *file;
-	int retval;
-
-	file = get_empty_filp();
-	if (IS_ERR(file))
-		return file;
-
-	file->f_flags = oldfile->f_flags;
-	retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
-	if (retval) {
-		put_filp(file);
-		return ERR_PTR(retval);
-	}
-
-	return file;
-}
-EXPORT_SYMBOL(filp_clone_open);
-
 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa9b4c169ed2..c4ca4c9c1130 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2422,7 +2422,10 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *filp_clone_open(struct file *);
+static inline struct file *file_clone_open(struct file *file)
+{
+	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
+}
 extern int filp_close(struct file *, fl_owner_t id);
 
 extern struct filename *getname_flags(const char __user *, int, int *);
-- 
cgit v1.2.3


From b10a4a9f7695335bd2bb19bffdda7fbefbc6581f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 02:29:58 -0400
Subject: create_pipe_files(): use fput() if allocation of the second file
 fails

... just use put_pipe_info() to get the pipe->files down to 1 and let
fput()-called pipe_release() do freeing.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index bb0840e234f3..9405e455f5b1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -771,8 +771,9 @@ int create_pipe_files(struct file **res, int flags)
 
 	res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
 	if (IS_ERR(res[0])) {
-		err = PTR_ERR(res[0]);
-		goto err_file;
+		put_pipe_info(inode, inode->i_pipe);
+		fput(f);
+		return PTR_ERR(res[0]);
 	}
 
 	path_get(&path);
@@ -781,8 +782,6 @@ int create_pipe_files(struct file **res, int flags)
 	res[1] = f;
 	return 0;
 
-err_file:
-	put_filp(f);
 err_dentry:
 	free_pipe_info(inode->i_pipe);
 	path_put(&path);
-- 
cgit v1.2.3


From 6b4e8085c0004382b985a5c005c685073630e746 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 8 Jul 2018 21:45:07 -0400
Subject: make sure do_dentry_open() won't return positive as an error

An ->open() instances really, really should not be doing that.  There's
a lot of places e.g. around atomic_open() that could be confused by that,
so let's catch that early.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 76c56966e297..530da965e369 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -812,6 +812,8 @@ static int do_dentry_open(struct file *f,
 	return 0;
 
 cleanup_all:
+	if (WARN_ON_ONCE(error > 0))
+		error = -EINVAL;
 	fops_put(f->f_op);
 	if (f->f_mode & FMODE_WRITER) {
 		put_write_access(inode);
-- 
cgit v1.2.3


From 8b2e77c1634935aefc9a4e2aacadda9cd30b78d7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:56 -0700
Subject: xfs: use iomap for blocksize == PAGE_SIZE readpage and readpages

For file systems with a block size that equals the page size we never do
partial reads, so we can use the buffer_head-less iomap versions of
readpage and readpages without conflicting with the buffer_head structures
create later in write_begin.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8eb3ba3d4d00..85e1a625d42a 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1401,6 +1401,8 @@ xfs_vm_readpage(
 	struct page		*page)
 {
 	trace_xfs_vm_readpage(page->mapping->host, 1);
+	if (i_blocksize(page->mapping->host) == PAGE_SIZE)
+		return iomap_readpage(page, &xfs_iomap_ops);
 	return mpage_readpage(page, xfs_get_blocks);
 }
 
@@ -1412,6 +1414,8 @@ xfs_vm_readpages(
 	unsigned		nr_pages)
 {
 	trace_xfs_vm_readpages(mapping->host, nr_pages);
+	if (i_blocksize(mapping->host) == PAGE_SIZE)
+		return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops);
 	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
 }
 
-- 
cgit v1.2.3


From 036257213836e13f08d1b28e528ba99e4ed3fc68 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:57 -0700
Subject: xfs: simplify xfs_aops_discard_page

Instead of looking at the buffer heads to see if a block is delalloc just
call xfs_bmap_punch_delalloc_range on the whole page - this will leave
any non-delalloc block intact and handle the iteration for us.  As a side
effect one more place stops caring about buffer heads and we can remove the
xfs_check_page_type function entirely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 85 ++++++-------------------------------------------------
 1 file changed, 9 insertions(+), 76 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 85e1a625d42a..9b1a17b4cacb 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -710,49 +710,6 @@ xfs_map_at_offset(
 	clear_buffer_unwritten(bh);
 }
 
-/*
- * Test if a given page contains at least one buffer of a given @type.
- * If @check_all_buffers is true, then we walk all the buffers in the page to
- * try to find one of the type passed in. If it is not set, then the caller only
- * needs to check the first buffer on the page for a match.
- */
-STATIC bool
-xfs_check_page_type(
-	struct page		*page,
-	unsigned int		type,
-	bool			check_all_buffers)
-{
-	struct buffer_head	*bh;
-	struct buffer_head	*head;
-
-	if (PageWriteback(page))
-		return false;
-	if (!page->mapping)
-		return false;
-	if (!page_has_buffers(page))
-		return false;
-
-	bh = head = page_buffers(page);
-	do {
-		if (buffer_unwritten(bh)) {
-			if (type == XFS_IO_UNWRITTEN)
-				return true;
-		} else if (buffer_delay(bh)) {
-			if (type == XFS_IO_DELALLOC)
-				return true;
-		} else if (buffer_dirty(bh) && buffer_mapped(bh)) {
-			if (type == XFS_IO_OVERWRITE)
-				return true;
-		}
-
-		/* If we are only checking the first buffer, we are done now. */
-		if (!check_all_buffers)
-			break;
-	} while ((bh = bh->b_this_page) != head);
-
-	return false;
-}
-
 STATIC void
 xfs_vm_invalidatepage(
 	struct page		*page,
@@ -784,9 +741,6 @@ xfs_vm_invalidatepage(
  * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
  * truncation without a transaction as there is no space left for block
  * reservation (typically why we see a ENOSPC in writeback).
- *
- * This is not a performance critical path, so for now just do the punching a
- * buffer head at a time.
  */
 STATIC void
 xfs_aops_discard_page(
@@ -794,47 +748,26 @@ xfs_aops_discard_page(
 {
 	struct inode		*inode = page->mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct buffer_head	*bh, *head;
+	struct xfs_mount	*mp = ip->i_mount;
 	loff_t			offset = page_offset(page);
+	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, offset);
+	int			error;
 
-	if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
-		goto out_invalidate;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (XFS_FORCED_SHUTDOWN(mp))
 		goto out_invalidate;
 
-	xfs_alert(ip->i_mount,
+	xfs_alert(mp,
 		"page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
 			page, ip->i_ino, offset);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	bh = head = page_buffers(page);
-	do {
-		int		error;
-		xfs_fileoff_t	start_fsb;
-
-		if (!buffer_delay(bh))
-			goto next_buffer;
-
-		start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-		error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
-		if (error) {
-			/* something screwed, just bail */
-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-				xfs_alert(ip->i_mount,
-			"page discard unable to remove delalloc mapping.");
-			}
-			break;
-		}
-next_buffer:
-		offset += i_blocksize(inode);
-
-	} while ((bh = bh->b_this_page) != head);
-
+	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+			PAGE_SIZE / i_blocksize(inode));
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	if (error && !XFS_FORCED_SHUTDOWN(mp))
+		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
 out_invalidate:
 	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
-	return;
 }
 
 static int
-- 
cgit v1.2.3


From d43801775766972535a0f4b78af65d0c13055e36 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:57 -0700
Subject: xfs: move locking into xfs_bmap_punch_delalloc_range

Both callers want the same looking, so do it only once.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c      | 2 --
 fs/xfs/xfs_bmap_util.c | 9 +++++----
 fs/xfs/xfs_iomap.c     | 3 ---
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 9b1a17b4cacb..5c549e983d69 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -760,10 +760,8 @@ xfs_aops_discard_page(
 		"page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
 			page, ip->i_ino, offset);
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
 			PAGE_SIZE / i_blocksize(inode));
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	if (error && !XFS_FORCED_SHUTDOWN(mp))
 		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
 out_invalidate:
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 83b1e8c6c18f..da561882c349 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -702,16 +702,15 @@ xfs_bmap_punch_delalloc_range(
 	struct xfs_iext_cursor	icur;
 	int			error = 0;
 
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
 		if (error)
-			return error;
+			goto out_unlock;
 	}
 
 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
-		return 0;
+		goto out_unlock;
 
 	while (got.br_startoff + got.br_blockcount > start_fsb) {
 		del = got;
@@ -735,6 +734,8 @@ xfs_bmap_punch_delalloc_range(
 			break;
 	}
 
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index e08a84d9ee72..10c54fc7d1b4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1204,11 +1204,8 @@ xfs_file_iomap_end_delalloc(
 		truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
 					 XFS_FSB_TO_B(mp, end_fsb) - 1);
 
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
 					       end_fsb - start_fsb);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
 		if (error && !XFS_FORCED_SHUTDOWN(mp)) {
 			xfs_alert(mp, "%s: unable to clean up ino %lld",
 				__func__, ip->i_ino);
-- 
cgit v1.2.3


From 91cdfd1761659f338e673aca72af3d0d50b88847 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:58 -0700
Subject: xfs: do not set the page uptodate in xfs_writepage_map

We already track the page uptodate status based on the buffer uptodate
status, which is updated whenever reading or zeroing blocks.

This code has been there since commit a ptool commit in 2002, which
claims to:

    "merge" the 2.4 fsx fix for block size < page size to 2.5.  This needed
    major changes to actually fit.

and isn't present in other writepage implementations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5c549e983d69..df80a383ccd8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -849,7 +849,6 @@ xfs_writepage_map(
 	uint64_t		offset;
 	int			error = 0;
 	int			count = 0;
-	int			uptodate = 1;
 	unsigned int		new_type;
 
 	bh = head = page_buffers(page);
@@ -857,8 +856,6 @@ xfs_writepage_map(
 	do {
 		if (offset >= end_offset)
 			break;
-		if (!buffer_uptodate(bh))
-			uptodate = 0;
 
 		/*
 		 * set_page_dirty dirties all buffers in a page, independent
@@ -922,9 +919,6 @@ xfs_writepage_map(
 
 	} while (offset += len, ((bh = bh->b_this_page) != head));
 
-	if (uptodate && bh == head)
-		SetPageUptodate(page);
-
 	ASSERT(wpc->ioend || list_empty(&submit_list));
 
 out:
-- 
cgit v1.2.3


From c57371a16d074bb4eafe6b73f29360085ecb2064 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:58 -0700
Subject: xfs: don't clear imap_valid for a non-uptodate buffers

Finding a buffer that isn't uptodate doesn't invalidate the mapping for
any given block.  The last_sector check will already take care of starting
another ioend as soon as we find any non-update buffer, and if the current
mapping doesn't include the next uptodate buffer the xfs_imap_valid check
will take care of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index df80a383ccd8..1d1cb917cc6e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -863,10 +863,8 @@ xfs_writepage_map(
 		 * meaningless for holes (!mapped && uptodate), so skip
 		 * buffers covering holes here.
 		 */
-		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-			wpc->imap_valid = false;
+		if (!buffer_mapped(bh) && buffer_uptodate(bh))
 			continue;
-		}
 
 		if (buffer_unwritten(bh))
 			new_type = XFS_IO_UNWRITTEN;
@@ -879,11 +877,8 @@ xfs_writepage_map(
 				ASSERT(buffer_mapped(bh));
 			/*
 			 * This buffer is not uptodate and will not be
-			 * written to disk.  Ensure that we will put any
-			 * subsequent writeable buffers into a new
-			 * ioend.
+			 * written to disk.
 			 */
-			wpc->imap_valid = false;
 			continue;
 		}
 
-- 
cgit v1.2.3


From a7b28f72ab90fe7a2f438360df5f6fda4237afdc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:59 -0700
Subject: xfs: don't use XFS_BMAPI_IGSTATE in xfs_map_blocks

We want to be able to use the extent state as a reliably indicator for
the type of I/O, and stop using the buffer head state.  For this we
need to stop using the XFS_BMAPI_IGSTATE so that we don't see merged
extents of different types.

Based on a patch from Dave Chinner.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1d1cb917cc6e..6b6150683343 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -373,7 +373,6 @@ xfs_map_blocks(
 	ssize_t			count = i_blocksize(inode);
 	xfs_fileoff_t		offset_fsb, end_fsb;
 	int			error = 0;
-	int			bmapi_flags = XFS_BMAPI_ENTIRE;
 	int			nimaps = 1;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -393,8 +392,6 @@ xfs_map_blocks(
 		return 0;
 
 	ASSERT(type != XFS_IO_COW);
-	if (type == XFS_IO_UNWRITTEN)
-		bmapi_flags |= XFS_BMAPI_IGSTATE;
 
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
@@ -406,7 +403,7 @@ xfs_map_blocks(
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-				imap, &nimaps, bmapi_flags);
+				imap, &nimaps, XFS_BMAPI_ENTIRE);
 	/*
 	 * Truncate an overwrite extent if there's a pending CoW
 	 * reservation before the end of this extent.  This forces us
-- 
cgit v1.2.3


From fca8c805425c0d9435097a6c780e95332e54613a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:59 -0700
Subject: xfs: remove xfs_reflink_trim_irec_to_next_cow

We already have to check for overlapping COW extents everytime we
come back to a page in xfs_writepage_map / xfs_map_cow, so this
additional trim is not required.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c    |  7 -------
 fs/xfs/xfs_reflink.c | 33 ---------------------------------
 fs/xfs/xfs_reflink.h |  2 --
 fs/xfs/xfs_trace.h   |  1 -
 4 files changed, 43 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6b6150683343..08605432c497 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -404,13 +404,6 @@ xfs_map_blocks(
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
 				imap, &nimaps, XFS_BMAPI_ENTIRE);
-	/*
-	 * Truncate an overwrite extent if there's a pending CoW
-	 * reservation before the end of this extent.  This forces us
-	 * to come back to writepage to take care of the CoW.
-	 */
-	if (nimaps && type == XFS_IO_OVERWRITE)
-		xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 	if (error)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 592fb2071a03..22c11b98ab26 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -500,39 +500,6 @@ xfs_reflink_find_cow_mapping(
 	return true;
 }
 
-/*
- * Trim an extent to end at the next CoW reservation past offset_fsb.
- */
-void
-xfs_reflink_trim_irec_to_next_cow(
-	struct xfs_inode		*ip,
-	xfs_fileoff_t			offset_fsb,
-	struct xfs_bmbt_irec		*imap)
-{
-	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
-	struct xfs_bmbt_irec		got;
-	struct xfs_iext_cursor		icur;
-
-	if (!xfs_is_reflink_inode(ip))
-		return;
-
-	/* Find the extent in the CoW fork. */
-	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
-		return;
-
-	/* This is the extent before; try sliding up one. */
-	if (got.br_startoff < offset_fsb) {
-		if (!xfs_iext_next_extent(ifp, &icur, &got))
-			return;
-	}
-
-	if (got.br_startoff >= imap->br_startoff + imap->br_blockcount)
-		return;
-
-	imap->br_blockcount = got.br_startoff - imap->br_startoff;
-	trace_xfs_reflink_trim_irec(ip, imap);
-}
-
 /*
  * Cancel CoW reservations for some block range of an inode.
  *
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 1532827ba911..6f9f98894abc 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -20,8 +20,6 @@ extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
 		struct xfs_bmbt_irec *imap);
-extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
-		xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap);
 
 extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
 		struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 972d45d28097..a5b01529ecf6 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3216,7 +3216,6 @@ DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
 
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
 DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
 
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
-- 
cgit v1.2.3


From 5c665e5b5af6b8ad3e38ee73cb495ec695bcf589 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:25:59 -0700
Subject: xfs: remove xfs_map_cow

We can handle the existing cow mapping case as a special case directly
in xfs_writepage_map, and share code for allocating delalloc blocks
with regular I/O in xfs_map_blocks.  This means we need to always
call xfs_map_blocks for reflink inodes, but we can still skip most of
the work if it turns out that there is no COW mapping overlapping the
current block.

As a subtle detail we need to start caching holes in the wpc to deal
with the case of COW reservations between EOF.  But we'll need that
infrastructure later anyway, so this is no big deal.

Based on a patch from Dave Chinner.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 195 +++++++++++++++++++++++++++---------------------------
 fs/xfs/xfs_aops.h |   4 +-
 2 files changed, 100 insertions(+), 99 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 08605432c497..65454a4f4d93 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -363,70 +363,107 @@ xfs_end_bio(
 
 STATIC int
 xfs_map_blocks(
+	struct xfs_writepage_ctx *wpc,
 	struct inode		*inode,
-	loff_t			offset,
-	struct xfs_bmbt_irec	*imap,
-	int			type)
+	loff_t			offset)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
-	xfs_fileoff_t		offset_fsb, end_fsb;
+	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	struct xfs_bmbt_irec	imap;
+	int			whichfork = XFS_DATA_FORK;
 	int			error = 0;
 	int			nimaps = 1;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	/*
-	 * Truncate can race with writeback since writeback doesn't take the
-	 * iolock and truncate decreases the file size before it starts
-	 * truncating the pages between new_size and old_size.  Therefore, we
-	 * can end up in the situation where writeback gets a CoW fork mapping
-	 * but the truncate makes the mapping invalid and we end up in here
-	 * trying to get a new mapping.  Bail out here so that we simply never
-	 * get a valid mapping and so we drop the write altogether.  The page
-	 * truncation will kill the contents anyway.
-	 */
-	if (type == XFS_IO_COW && offset > i_size_read(inode))
-		return 0;
-
-	ASSERT(type != XFS_IO_COW);
-
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 
+	if (xfs_is_reflink_inode(ip) &&
+	    xfs_reflink_find_cow_mapping(ip, offset, &imap)) {
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+		/*
+		 * Truncate can race with writeback since writeback doesn't
+		 * take the iolock and truncate decreases the file size before
+		 * it starts truncating the pages between new_size and old_size.
+		 * Therefore, we can end up in the situation where writeback
+		 * gets a CoW fork mapping but the truncate makes the mapping
+		 * invalid and we end up in here trying to get a new mapping.
+		 * bail out here so that we simply never get a valid mapping
+		 * and so we drop the write altogether.  The page truncation
+		 * will kill the contents anyway.
+		 */
+		if (offset > i_size_read(inode)) {
+			wpc->io_type = XFS_IO_HOLE;
+			return 0;
+		}
+		whichfork = XFS_COW_FORK;
+		wpc->io_type = XFS_IO_COW;
+		goto allocate_blocks;
+	}
+
+	/*
+	 * Map valid and no COW extent in the way?  We're done.
+	 */
+	if (wpc->imap_valid) {
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+		return 0;
+	}
+
+	/*
+	 * If we don't have a valid map, now it's time to get a new one for this
+	 * offset.  This will convert delayed allocations (including COW ones)
+	 * into real extents.
+	 */
 	if (offset > mp->m_super->s_maxbytes - count)
 		count = mp->m_super->s_maxbytes - offset;
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-				imap, &nimaps, XFS_BMAPI_ENTIRE);
+				&imap, &nimaps, XFS_BMAPI_ENTIRE);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
 	if (error)
 		return error;
 
-	if (type == XFS_IO_DELALLOC &&
-	    (!nimaps || isnullstartblock(imap->br_startblock))) {
-		error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
-				imap);
-		if (!error)
-			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
-		return error;
+	if (!nimaps) {
+		/*
+		 * Lookup returns no match? Beyond eof? regardless,
+		 * return it as a hole so we don't write it
+		 */
+		imap.br_startoff = offset_fsb;
+		imap.br_blockcount = end_fsb - offset_fsb;
+		imap.br_startblock = HOLESTARTBLOCK;
+		wpc->io_type = XFS_IO_HOLE;
+	} else if (imap.br_startblock == HOLESTARTBLOCK) {
+		/* landed in a hole */
+		wpc->io_type = XFS_IO_HOLE;
 	}
 
+	if (wpc->io_type == XFS_IO_DELALLOC &&
+	    (!nimaps || isnullstartblock(imap.br_startblock)))
+		goto allocate_blocks;
+
 #ifdef DEBUG
-	if (type == XFS_IO_UNWRITTEN) {
+	if (wpc->io_type == XFS_IO_UNWRITTEN) {
 		ASSERT(nimaps);
-		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+		ASSERT(imap.br_startblock != HOLESTARTBLOCK);
+		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
 	}
 #endif
-	if (nimaps)
-		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+	wpc->imap = imap;
+	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
+	return 0;
+allocate_blocks:
+	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
+	if (error)
+		return error;
+	wpc->imap = imap;
+	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
 	return 0;
 }
 
@@ -758,56 +795,6 @@ out_invalidate:
 	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
 }
 
-static int
-xfs_map_cow(
-	struct xfs_writepage_ctx *wpc,
-	struct inode		*inode,
-	loff_t			offset,
-	unsigned int		*new_type)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_bmbt_irec	imap;
-	bool			is_cow = false;
-	int			error;
-
-	/*
-	 * If we already have a valid COW mapping keep using it.
-	 */
-	if (wpc->io_type == XFS_IO_COW) {
-		wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
-		if (wpc->imap_valid) {
-			*new_type = XFS_IO_COW;
-			return 0;
-		}
-	}
-
-	/*
-	 * Else we need to check if there is a COW mapping at this offset.
-	 */
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-	if (!is_cow)
-		return 0;
-
-	/*
-	 * And if the COW mapping has a delayed extent here we need to
-	 * allocate real space for it now.
-	 */
-	if (isnullstartblock(imap.br_startblock)) {
-		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
-				&imap);
-		if (error)
-			return error;
-	}
-
-	wpc->io_type = *new_type = XFS_IO_COW;
-	wpc->imap_valid = true;
-	wpc->imap = imap;
-	return 0;
-}
-
 /*
  * We implement an immediate ioend submission policy here to avoid needing to
  * chain multiple ioends and hence nest mempool allocations which can violate
@@ -836,7 +823,7 @@ xfs_writepage_map(
 	struct xfs_ioend	*ioend, *next;
 	struct buffer_head	*bh, *head;
 	ssize_t			len = i_blocksize(inode);
-	uint64_t		offset;
+	uint64_t		offset;	/* file offset of page */
 	int			error = 0;
 	int			count = 0;
 	unsigned int		new_type;
@@ -872,10 +859,13 @@ xfs_writepage_map(
 			continue;
 		}
 
-		if (xfs_is_reflink_inode(XFS_I(inode))) {
-			error = xfs_map_cow(wpc, inode, offset, &new_type);
-			if (error)
-				goto out;
+		/*
+		 * If we already have a valid COW mapping keep using it.
+		 */
+		if (wpc->io_type == XFS_IO_COW &&
+		    xfs_imap_valid(inode, &wpc->imap, offset)) {
+			wpc->imap_valid = true;
+			new_type = XFS_IO_COW;
 		}
 
 		if (wpc->io_type != new_type) {
@@ -886,22 +876,31 @@ xfs_writepage_map(
 		if (wpc->imap_valid)
 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
 							 offset);
-		if (!wpc->imap_valid) {
-			error = xfs_map_blocks(inode, offset, &wpc->imap,
-					     wpc->io_type);
+
+		/*
+		 * COW fork blocks can overlap data fork blocks even if the
+		 * blocks aren't shared. COW I/O always takes precedent, so we
+		 * must always check for overlap on reflink inodes unless the
+		 * mapping is already a COW one.
+		 */
+		if (!wpc->imap_valid ||
+		    (xfs_is_reflink_inode(XFS_I(inode)) &&
+		     wpc->io_type != XFS_IO_COW)) {
+			error = xfs_map_blocks(wpc, inode, offset);
 			if (error)
 				goto out;
 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
 							 offset);
 		}
-		if (wpc->imap_valid) {
-			lock_buffer(bh);
-			if (wpc->io_type != XFS_IO_OVERWRITE)
-				xfs_map_at_offset(inode, bh, &wpc->imap, offset);
-			xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
-			count++;
-		}
 
+		if (!wpc->imap_valid || wpc->io_type == XFS_IO_HOLE)
+			continue;
+
+		lock_buffer(bh);
+		if (wpc->io_type != XFS_IO_OVERWRITE)
+			xfs_map_at_offset(inode, bh, &wpc->imap, offset);
+		xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
+		count++;
 	} while (offset += len, ((bh = bh->b_this_page) != head));
 
 	ASSERT(wpc->ioend || list_empty(&submit_list));
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 25bc6d4a1231..9af867951a10 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -17,6 +17,7 @@ enum {
 	XFS_IO_UNWRITTEN,	/* covers allocated but uninitialized data */
 	XFS_IO_OVERWRITE,	/* covers already allocated extent */
 	XFS_IO_COW,		/* covers copy-on-write extent */
+	XFS_IO_HOLE,		/* covers region without any block allocation */
 };
 
 #define XFS_IO_TYPES \
@@ -24,7 +25,8 @@ enum {
 	{ XFS_IO_DELALLOC,		"delalloc" }, \
 	{ XFS_IO_UNWRITTEN,		"unwritten" }, \
 	{ XFS_IO_OVERWRITE,		"overwrite" }, \
-	{ XFS_IO_COW,			"CoW" }
+	{ XFS_IO_COW,			"CoW" }, \
+	{ XFS_IO_HOLE,			"hole" }
 
 /*
  * Structure for buffered I/O completions.
-- 
cgit v1.2.3


From 6a4c95013608120b2d88be67c6871cb6b86aa5d6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:00 -0700
Subject: xfs: rename the offset variable in xfs_writepage_map

Calling it file_offset makes the usage more clear, especially with
a new poffset variable that will be added soon for the offset inside
the page.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 65454a4f4d93..4dc5fcff226e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -823,15 +823,15 @@ xfs_writepage_map(
 	struct xfs_ioend	*ioend, *next;
 	struct buffer_head	*bh, *head;
 	ssize_t			len = i_blocksize(inode);
-	uint64_t		offset;	/* file offset of page */
+	uint64_t		file_offset;	/* file offset of page */
 	int			error = 0;
 	int			count = 0;
 	unsigned int		new_type;
 
 	bh = head = page_buffers(page);
-	offset = page_offset(page);
+	file_offset = page_offset(page);
 	do {
-		if (offset >= end_offset)
+		if (file_offset >= end_offset)
 			break;
 
 		/*
@@ -863,7 +863,7 @@ xfs_writepage_map(
 		 * If we already have a valid COW mapping keep using it.
 		 */
 		if (wpc->io_type == XFS_IO_COW &&
-		    xfs_imap_valid(inode, &wpc->imap, offset)) {
+		    xfs_imap_valid(inode, &wpc->imap, file_offset)) {
 			wpc->imap_valid = true;
 			new_type = XFS_IO_COW;
 		}
@@ -875,7 +875,7 @@ xfs_writepage_map(
 
 		if (wpc->imap_valid)
 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
-							 offset);
+							 file_offset);
 
 		/*
 		 * COW fork blocks can overlap data fork blocks even if the
@@ -886,11 +886,11 @@ xfs_writepage_map(
 		if (!wpc->imap_valid ||
 		    (xfs_is_reflink_inode(XFS_I(inode)) &&
 		     wpc->io_type != XFS_IO_COW)) {
-			error = xfs_map_blocks(wpc, inode, offset);
+			error = xfs_map_blocks(wpc, inode, file_offset);
 			if (error)
 				goto out;
 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
-							 offset);
+							 file_offset);
 		}
 
 		if (!wpc->imap_valid || wpc->io_type == XFS_IO_HOLE)
@@ -898,10 +898,10 @@ xfs_writepage_map(
 
 		lock_buffer(bh);
 		if (wpc->io_type != XFS_IO_OVERWRITE)
-			xfs_map_at_offset(inode, bh, &wpc->imap, offset);
-		xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
+			xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
+		xfs_add_to_ioend(inode, bh, file_offset, wpc, wbc, &submit_list);
 		count++;
-	} while (offset += len, ((bh = bh->b_this_page) != head));
+	} while (file_offset += len, ((bh = bh->b_this_page) != head));
 
 	ASSERT(wpc->ioend || list_empty(&submit_list));
 
-- 
cgit v1.2.3


From e2f6ad4624dfbde3a6c42c0cfbfc5553d93c3cae Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 11 Jul 2018 22:26:00 -0700
Subject: xfs: make xfs_writepage_map extent map centric

xfs_writepage_map() iterates over the bufferheads on a page to decide
what sort of IO to do and what actions to take.  However, when it comes
to reflink and deciding when it needs to execute a COW operation, we no
longer look at the bufferhead state but instead we ignore than and look
up internal state held in the COW fork extent list.

This means xfs_writepage_map() is somewhat confused. It does stuff, then
ignores it, then tries to handle the impedence mismatch by shovelling the
results inside the existing mapping code.  It works, but it's a bit of a
mess and it makes it hard to fix the cached map bug that the writepage
code currently has.

To unify the two different mechanisms, we first have to choose a direction.
That's already been set - we're de-emphasising bufferheads so they are no
longer a control structure as we need to do taht to allow for eventual
removal.  Hence we need to move away from looking at bufferhead state to
determine what operations we need to perform.

We can't completely get rid of bufferheads yet - they do contain some
state that is absolutely necessary, such as whether that part of the page
contains valid data or not (buffer_uptodate()).  Other state in the
bufferhead is redundant:

	BH_dirty - the page is dirty, so we can ignore this and just
		write it
	BH_delay - we have delalloc extent info in the DATA fork extent
		tree
	BH_unwritten - same as BH_delay
	BH_mapped - indicates we've already used it once for IO and it is
		mapped to a disk address. Needs to be ignored for COW
		blocks.

The BH_mapped flag is an interesting case - it's supposed to indicate that
it's already mapped to disk and so we can just use it "as is".  In theory,
we don't even have to do an extent lookup to find where to write it too,
but we have to do that anyway to determine we are actually writing over a
valid extent.  Hence it's not even serving the purpose of avoiding a an
extent lookup during writeback, and so we can pretty much ignore it.
Especially as we have to ignore it for COW operations...

Therefore, use the extent map as the source of information to tell us
what actions we need to take and what sort of IO we should perform.  The
first step is to have xfs_map_blocks() set the io type according to what
it looks up.  This means it can easily handle both normal overwrite and
COW cases.  The only thing we also need to add is the ability to return
hole mappings.

We need to return and cache hole mappings now for the case of multiple
blocks per page.  We no longer use the BH_mapped to indicate a block over
a hole, so we have to get that info from xfs_map_blocks().  We cache it so
that holes that span two pages don't need separate lookups.  This allows us
to avoid ever doing write IO over a hole, too.

Now that we have xfs_map_blocks() returning both a cached map and the type
of IO we need to perform, we can rewrite xfs_writepage_map() to drop all
the bufferhead control. It's also much simplified because it doesn't need
to explicitly handle COW operations.  Instead of iterating bufferheads, it
iterates blocks within the page and then looks up what per-block state is
required from the appropriate bufferhead.  It then validates the cached
map, and if it's not valid, we get a new map.  If we don't get a valid map
or it's over a hole, we skip the block.

At this point, we have to remap the bufferhead via xfs_map_at_offset().
As previously noted, we had to do this even if the buffer was already
mapped as the mapping would be stale for XFS_IO_DELALLOC, XFS_IO_UNWRITTEN
and XFS_IO_COW IO types.  With xfs_map_blocks() now controlling the type,
even XFS_IO_OVERWRITE types need remapping, as converted-but-not-yet-
written delalloc extents beyond EOF can be reported at XFS_IO_OVERWRITE.
Bufferheads that span such regions still need their BH_Delay flags cleared
and their block numbers calculated, so we now unconditionally map each
bufferhead before submission.

But wait! There's more - remember the old "treat unwritten extents as
holes on read" hack?  Yeah, that means we can have a dirty page with
unmapped, unwritten bufferheads that contain data!  What makes these so
special is that the unwritten "hole" bufferheads do not have a valid block
device pointer, so if we attempt to write them xfs_add_to_ioend() blows
up. So we make xfs_map_at_offset() do the "realtime or data device"
lookup from the inode and ignore what was or wasn't put into the
bufferhead when the buffer was instantiated.

The astute reader will have realised by now that this code treats
unwritten extents in multiple-blocks-per-page situations differently.
If we get any combination of unwritten blocks on a dirty page that contain
valid data in the page, we're going to convert them to real extents.  This
can actually be a win, because it means that pages with interleaving
unwritten and written blocks will get converted to a single written extent
with zeros replacing the interspersed unwritten blocks.  This is actually
good for reducing extent list and conversion overhead, and it means we
issue a contiguous IO instead of lots of little ones.  The downside is
that we use up a little extra IO bandwidth.  Neither of these seem like a
bad thing given that spinning disks are seek sensitive, and SSDs/pmem have
bandwidth to burn and the lower Io latency/CPU overhead of fewer, larger
IOs will result in better performance on them...

As a result of all this, the only state we actually care about from the
bufferhead is a single flag - BH_Uptodate. We still use the bufferhead to
pass some information to the bio via xfs_add_to_ioend(), but that is
trivial to separate and pass explicitly.  This means we really only need
1 bit of state per block per page from the buffered write path in the
writeback path.  Everything else we do with the bufferhead is purely to
make the buffered IO front end continue to work correctly. i.e we've
pretty much marginalised bufferheads in the writeback path completely.

Signed-off-By: Dave Chinner <dchinner@redhat.com>
[hch: forward port, refactor and split off bits into other commits]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 88 +++++++++++++++++++++++--------------------------------
 1 file changed, 36 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4dc5fcff226e..815b0b29438b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -442,19 +442,19 @@ xfs_map_blocks(
 	} else if (imap.br_startblock == HOLESTARTBLOCK) {
 		/* landed in a hole */
 		wpc->io_type = XFS_IO_HOLE;
-	}
-
-	if (wpc->io_type == XFS_IO_DELALLOC &&
-	    (!nimaps || isnullstartblock(imap.br_startblock)))
-		goto allocate_blocks;
+	} else {
+		if (isnullstartblock(imap.br_startblock)) {
+			/* got a delalloc extent */
+			wpc->io_type = XFS_IO_DELALLOC;
+			goto allocate_blocks;
+		}
 
-#ifdef DEBUG
-	if (wpc->io_type == XFS_IO_UNWRITTEN) {
-		ASSERT(nimaps);
-		ASSERT(imap.br_startblock != HOLESTARTBLOCK);
-		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+		if (imap.br_state == XFS_EXT_UNWRITTEN)
+			wpc->io_type = XFS_IO_UNWRITTEN;
+		else
+			wpc->io_type = XFS_IO_OVERWRITE;
 	}
-#endif
+
 	wpc->imap = imap;
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
 	return 0;
@@ -735,6 +735,14 @@ xfs_map_at_offset(
 	set_buffer_mapped(bh);
 	clear_buffer_delay(bh);
 	clear_buffer_unwritten(bh);
+
+	/*
+	 * If this is a realtime file, data may be on a different device.
+	 * to that pointed to from the buffer_head b_bdev currently. We can't
+	 * trust that the bufferhead has a already been mapped correctly, so
+	 * set the bdev now.
+	 */
+	bh->b_bdev = xfs_find_bdev_for_inode(inode);
 }
 
 STATIC void
@@ -821,58 +829,35 @@ xfs_writepage_map(
 {
 	LIST_HEAD(submit_list);
 	struct xfs_ioend	*ioend, *next;
-	struct buffer_head	*bh, *head;
+	struct buffer_head	*bh;
 	ssize_t			len = i_blocksize(inode);
 	uint64_t		file_offset;	/* file offset of page */
+	unsigned		poffset;	/* offset into page */
 	int			error = 0;
 	int			count = 0;
-	unsigned int		new_type;
 
-	bh = head = page_buffers(page);
+	/*
+	 * Walk the blocks on the page, and if we run off the end of the current
+	 * map or find the current map invalid, grab a new one.  We only use
+	 * bufferheads here to check per-block state - they no longer control
+	 * the iteration through the page. This allows us to replace the
+	 * bufferhead with some other state tracking mechanism in future.
+	 */
 	file_offset = page_offset(page);
-	do {
+	bh = page_buffers(page);
+	for (poffset = 0;
+	     poffset < PAGE_SIZE;
+	     poffset += len, file_offset += len, bh = bh->b_this_page) {
+		/* past the range we are writing, so nothing more to write. */
 		if (file_offset >= end_offset)
 			break;
 
-		/*
-		 * set_page_dirty dirties all buffers in a page, independent
-		 * of their state.  The dirty state however is entirely
-		 * meaningless for holes (!mapped && uptodate), so skip
-		 * buffers covering holes here.
-		 */
-		if (!buffer_mapped(bh) && buffer_uptodate(bh))
-			continue;
-
-		if (buffer_unwritten(bh))
-			new_type = XFS_IO_UNWRITTEN;
-		else if (buffer_delay(bh))
-			new_type = XFS_IO_DELALLOC;
-		else if (buffer_uptodate(bh))
-			new_type = XFS_IO_OVERWRITE;
-		else {
+		if (!buffer_uptodate(bh)) {
 			if (PageUptodate(page))
 				ASSERT(buffer_mapped(bh));
-			/*
-			 * This buffer is not uptodate and will not be
-			 * written to disk.
-			 */
 			continue;
 		}
 
-		/*
-		 * If we already have a valid COW mapping keep using it.
-		 */
-		if (wpc->io_type == XFS_IO_COW &&
-		    xfs_imap_valid(inode, &wpc->imap, file_offset)) {
-			wpc->imap_valid = true;
-			new_type = XFS_IO_COW;
-		}
-
-		if (wpc->io_type != new_type) {
-			wpc->io_type = new_type;
-			wpc->imap_valid = false;
-		}
-
 		if (wpc->imap_valid)
 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
 							 file_offset);
@@ -897,11 +882,10 @@ xfs_writepage_map(
 			continue;
 
 		lock_buffer(bh);
-		if (wpc->io_type != XFS_IO_OVERWRITE)
-			xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
+		xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
 		xfs_add_to_ioend(inode, bh, file_offset, wpc, wbc, &submit_list);
 		count++;
-	} while (file_offset += len, ((bh = bh->b_this_page) != head));
+	}
 
 	ASSERT(wpc->ioend || list_empty(&submit_list));
 
-- 
cgit v1.2.3


From c3a2f9fff1bbe3cfc2070b2e9f1f0ad0d5ccda91 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:01 -0700
Subject: xfs: remove the now unused XFS_BMAPI_IGSTATE flag

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 6 ++----
 fs/xfs/libxfs/xfs_bmap.h | 3 ---
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7205268b30bc..68ea1f4b9c3f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3788,8 +3788,7 @@ xfs_bmapi_update_map(
 		   mval[-1].br_startblock != HOLESTARTBLOCK &&
 		   mval->br_startblock == mval[-1].br_startblock +
 					  mval[-1].br_blockcount &&
-		   ((flags & XFS_BMAPI_IGSTATE) ||
-			mval[-1].br_state == mval->br_state)) {
+		   mval[-1].br_state == mval->br_state) {
 		ASSERT(mval->br_startoff ==
 		       mval[-1].br_startoff + mval[-1].br_blockcount);
 		mval[-1].br_blockcount += mval->br_blockcount;
@@ -3834,7 +3833,7 @@ xfs_bmapi_read(
 
 	ASSERT(*nmap >= 1);
 	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
-			   XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK)));
+			   XFS_BMAPI_COWFORK)));
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
 
 	if (unlikely(XFS_TEST_ERROR(
@@ -4279,7 +4278,6 @@ xfs_bmapi_write(
 
 	ASSERT(*nmap >= 1);
 	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
-	ASSERT(!(flags & XFS_BMAPI_IGSTATE));
 	ASSERT(tp != NULL ||
 	       (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
 			(XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 9b49ddf99c41..44639588d1c7 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -68,8 +68,6 @@ struct xfs_extent_free_item
 #define XFS_BMAPI_METADATA	0x002	/* mapping metadata not user data */
 #define XFS_BMAPI_ATTRFORK	0x004	/* use attribute fork not data */
 #define XFS_BMAPI_PREALLOC	0x008	/* preallocation op: unwritten space */
-#define XFS_BMAPI_IGSTATE	0x010	/* Ignore state - */
-					/* combine contig. space */
 #define XFS_BMAPI_CONTIG	0x020	/* must allocate only one extent */
 /*
  * unwritten extent conversion - this needs write cache flushing and no additional
@@ -116,7 +114,6 @@ struct xfs_extent_free_item
 	{ XFS_BMAPI_METADATA,	"METADATA" }, \
 	{ XFS_BMAPI_ATTRFORK,	"ATTRFORK" }, \
 	{ XFS_BMAPI_PREALLOC,	"PREALLOC" }, \
-	{ XFS_BMAPI_IGSTATE,	"IGSTATE" }, \
 	{ XFS_BMAPI_CONTIG,	"CONTIG" }, \
 	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
 	{ XFS_BMAPI_ZERO,	"ZERO" }, \
-- 
cgit v1.2.3


From 060d4eaa0bf30a8fc2d189e4d4922f6e9027857b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:01 -0700
Subject: xfs: remove xfs_reflink_find_cow_mapping

We only have one caller left, and open coding the simple extent list
lookup in it allows us to make the code both more understandable and
reuse calculations and variables already present.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c    | 19 +++++++++++++------
 fs/xfs/xfs_reflink.c | 30 ------------------------------
 fs/xfs/xfs_reflink.h |  2 --
 fs/xfs/xfs_trace.h   |  1 -
 4 files changed, 13 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 815b0b29438b..5c5d8c832dcc 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -370,9 +370,10 @@ xfs_map_blocks(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
-	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	xfs_fileoff_t		offset_fsb, end_fsb;
 	struct xfs_bmbt_irec	imap;
 	int			whichfork = XFS_DATA_FORK;
+	struct xfs_iext_cursor	icur;
 	int			error = 0;
 	int			nimaps = 1;
 
@@ -384,8 +385,18 @@ xfs_map_blocks(
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 
+	if (offset > mp->m_super->s_maxbytes - count)
+		count = mp->m_super->s_maxbytes - offset;
+	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+	/*
+	 * Check if this is offset is covered by a COW extents, and if yes use
+	 * it directly instead of looking up anything in the data fork.
+	 */
 	if (xfs_is_reflink_inode(ip) &&
-	    xfs_reflink_find_cow_mapping(ip, offset, &imap)) {
+	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
+	    imap.br_startoff <= offset_fsb) {
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		/*
 		 * Truncate can race with writeback since writeback doesn't
@@ -420,10 +431,6 @@ xfs_map_blocks(
 	 * offset.  This will convert delayed allocations (including COW ones)
 	 * into real extents.
 	 */
-	if (offset > mp->m_super->s_maxbytes - count)
-		count = mp->m_super->s_maxbytes - offset;
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
 				&imap, &nimaps, XFS_BMAPI_ENTIRE);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 22c11b98ab26..49e4913fa779 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -470,36 +470,6 @@ out:
 	return error;
 }
 
-/*
- * Find the CoW reservation for a given byte offset of a file.
- */
-bool
-xfs_reflink_find_cow_mapping(
-	struct xfs_inode		*ip,
-	xfs_off_t			offset,
-	struct xfs_bmbt_irec		*imap)
-{
-	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
-	xfs_fileoff_t			offset_fsb;
-	struct xfs_bmbt_irec		got;
-	struct xfs_iext_cursor		icur;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
-
-	if (!xfs_is_reflink_inode(ip))
-		return false;
-	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
-		return false;
-	if (got.br_startoff > offset_fsb)
-		return false;
-
-	trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
-			&got);
-	*imap = got;
-	return true;
-}
-
 /*
  * Cancel CoW reservations for some block range of an inode.
  *
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 6f9f98894abc..c585ad9552b2 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -18,8 +18,6 @@ extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
 		struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
 extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
-extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
-		struct xfs_bmbt_irec *imap);
 
 extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
 		struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a5b01529ecf6..1af123df19b5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3215,7 +3215,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
 DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
 
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
-DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
 
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
-- 
cgit v1.2.3


From 3345746ef38bb794ae9d4d0762adf151e452663e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:02 -0700
Subject: xfs: simplify xfs_map_blocks by using xfs_iext_lookup_extent directly

xfs_bmapi_read adds zero value in xfs_map_blocks.  Replace it with a
direct call to the low-level extent lookup function.

Note that we now always pass a 0 length to the trace points as we ask
for an unspecified len.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5c5d8c832dcc..0bfcc2d06658 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -375,7 +375,6 @@ xfs_map_blocks(
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
 	int			error = 0;
-	int			nimaps = 1;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
@@ -431,24 +430,16 @@ xfs_map_blocks(
 	 * offset.  This will convert delayed allocations (including COW ones)
 	 * into real extents.
 	 */
-	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-				&imap, &nimaps, XFS_BMAPI_ENTIRE);
+	if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
+		imap.br_startoff = end_fsb;	/* fake a hole past EOF */
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	if (error)
-		return error;
 
-	if (!nimaps) {
-		/*
-		 * Lookup returns no match? Beyond eof? regardless,
-		 * return it as a hole so we don't write it
-		 */
+	if (imap.br_startoff > offset_fsb) {
+		/* landed in a hole or beyond EOF */
+		imap.br_blockcount = imap.br_startoff - offset_fsb;
 		imap.br_startoff = offset_fsb;
-		imap.br_blockcount = end_fsb - offset_fsb;
 		imap.br_startblock = HOLESTARTBLOCK;
 		wpc->io_type = XFS_IO_HOLE;
-	} else if (imap.br_startblock == HOLESTARTBLOCK) {
-		/* landed in a hole */
-		wpc->io_type = XFS_IO_HOLE;
 	} else {
 		if (isnullstartblock(imap.br_startblock)) {
 			/* got a delalloc extent */
-- 
cgit v1.2.3


From 889c65b3f60af4c840896478fc6151363ffa279f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:02 -0700
Subject: xfs: remove the imap_valid flag

Simplify the way we check for a valid imap - we know we have a valid
mapping after xfs_map_blocks returned successfully, and we know we can
call xfs_imap_valid on any imap, as it will always fail on a
zero-initialized map.

We can also remove the xfs_imap_valid function and fold it into
xfs_map_blocks now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 89 ++++++++++++++++++++++++-------------------------------
 1 file changed, 38 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0bfcc2d06658..09092f10cff3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -30,7 +30,6 @@
  */
 struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
-	bool			imap_valid;
 	unsigned int		io_type;
 	struct xfs_ioend	*ioend;
 	sector_t		last_block;
@@ -370,15 +369,47 @@ xfs_map_blocks(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
-	xfs_fileoff_t		offset_fsb, end_fsb;
+	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
 	struct xfs_bmbt_irec	imap;
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
+	bool			imap_valid;
 	int			error = 0;
 
+	/*
+	 * We have to make sure the cached mapping is within EOF to protect
+	 * against eofblocks trimming on file release leaving us with a stale
+	 * mapping. Otherwise, a page for a subsequent file extending buffered
+	 * write could get picked up by this writeback cycle and written to the
+	 * wrong blocks.
+	 *
+	 * Note that what we really want here is a generic mapping invalidation
+	 * mechanism to protect us from arbitrary extent modifying contexts, not
+	 * just eofblocks.
+	 */
+	xfs_trim_extent_eof(&wpc->imap, ip);
+
+	/*
+	 * COW fork blocks can overlap data fork blocks even if the blocks
+	 * aren't shared.  COW I/O always takes precedent, so we must always
+	 * check for overlap on reflink inodes unless the mapping is already a
+	 * COW one.
+	 */
+	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
+		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
+	if (imap_valid &&
+	    (!xfs_is_reflink_inode(ip) || wpc->io_type == XFS_IO_COW))
+		return 0;
+
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
+	/*
+	 * If we don't have a valid map, now it's time to get a new one for this
+	 * offset.  This will convert delayed allocations (including COW ones)
+	 * into real extents.  If we return without a valid map, it means we
+	 * landed in a hole and we skip the block.
+	 */
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
@@ -387,7 +418,6 @@ xfs_map_blocks(
 	if (offset > mp->m_super->s_maxbytes - count)
 		count = mp->m_super->s_maxbytes - offset;
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
 	/*
 	 * Check if this is offset is covered by a COW extents, and if yes use
@@ -420,7 +450,7 @@ xfs_map_blocks(
 	/*
 	 * Map valid and no COW extent in the way?  We're done.
 	 */
-	if (wpc->imap_valid) {
+	if (imap_valid) {
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		return 0;
 	}
@@ -465,31 +495,6 @@ allocate_blocks:
 	return 0;
 }
 
-STATIC bool
-xfs_imap_valid(
-	struct inode		*inode,
-	struct xfs_bmbt_irec	*imap,
-	xfs_off_t		offset)
-{
-	offset >>= inode->i_blkbits;
-
-	/*
-	 * We have to make sure the cached mapping is within EOF to protect
-	 * against eofblocks trimming on file release leaving us with a stale
-	 * mapping. Otherwise, a page for a subsequent file extending buffered
-	 * write could get picked up by this writeback cycle and written to the
-	 * wrong blocks.
-	 *
-	 * Note that what we really want here is a generic mapping invalidation
-	 * mechanism to protect us from arbitrary extent modifying contexts, not
-	 * just eofblocks.
-	 */
-	xfs_trim_extent_eof(imap, XFS_I(inode));
-
-	return offset >= imap->br_startoff &&
-		offset < imap->br_startoff + imap->br_blockcount;
-}
-
 STATIC void
 xfs_start_buffer_writeback(
 	struct buffer_head	*bh)
@@ -856,27 +861,10 @@ xfs_writepage_map(
 			continue;
 		}
 
-		if (wpc->imap_valid)
-			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
-							 file_offset);
-
-		/*
-		 * COW fork blocks can overlap data fork blocks even if the
-		 * blocks aren't shared. COW I/O always takes precedent, so we
-		 * must always check for overlap on reflink inodes unless the
-		 * mapping is already a COW one.
-		 */
-		if (!wpc->imap_valid ||
-		    (xfs_is_reflink_inode(XFS_I(inode)) &&
-		     wpc->io_type != XFS_IO_COW)) {
-			error = xfs_map_blocks(wpc, inode, file_offset);
-			if (error)
-				goto out;
-			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
-							 file_offset);
-		}
-
-		if (!wpc->imap_valid || wpc->io_type == XFS_IO_HOLE)
+		error = xfs_map_blocks(wpc, inode, file_offset);
+		if (error)
+			break;
+		if (wpc->io_type == XFS_IO_HOLE)
 			continue;
 
 		lock_buffer(bh);
@@ -887,7 +875,6 @@ xfs_writepage_map(
 
 	ASSERT(wpc->ioend || list_empty(&submit_list));
 
-out:
 	/*
 	 * On error, we have to fail the ioend here because we have locked
 	 * buffers in the ioend. If we don't do this, we'll deadlock
-- 
cgit v1.2.3


From 3faed667644d787c3cf6f977f80bac7a013eb045 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:02 -0700
Subject: xfs: don't look at buffer heads in xfs_add_to_ioend

Calculate all information for the bio based on the passed in information
without requiring a buffer_head structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 68 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 32 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09092f10cff3..6402e4323031 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -32,7 +32,6 @@ struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
 	unsigned int		io_type;
 	struct xfs_ioend	*ioend;
-	sector_t		last_block;
 };
 
 void
@@ -534,11 +533,6 @@ xfs_start_page_writeback(
 	unlock_page(page);
 }
 
-static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
-{
-	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-}
-
 /*
  * Submit the bio for an ioend. We are passed an ioend with a bio attached to
  * it, and we submit that bio. The ioend may be used for multiple bio
@@ -604,27 +598,20 @@ xfs_submit_ioend(
 	return 0;
 }
 
-static void
-xfs_init_bio_from_bh(
-	struct bio		*bio,
-	struct buffer_head	*bh)
-{
-	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-	bio_set_dev(bio, bh->b_bdev);
-}
-
 static struct xfs_ioend *
 xfs_alloc_ioend(
 	struct inode		*inode,
 	unsigned int		type,
 	xfs_off_t		offset,
-	struct buffer_head	*bh)
+	struct block_device	*bdev,
+	sector_t		sector)
 {
 	struct xfs_ioend	*ioend;
 	struct bio		*bio;
 
 	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
-	xfs_init_bio_from_bh(bio, bh);
+	bio_set_dev(bio, bdev);
+	bio->bi_iter.bi_sector = sector;
 
 	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
 	INIT_LIST_HEAD(&ioend->io_list);
@@ -649,13 +636,14 @@ static void
 xfs_chain_bio(
 	struct xfs_ioend	*ioend,
 	struct writeback_control *wbc,
-	struct buffer_head	*bh)
+	struct block_device	*bdev,
+	sector_t		sector)
 {
 	struct bio *new;
 
 	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
-	xfs_init_bio_from_bh(new, bh);
-
+	bio_set_dev(new, bdev);
+	new->bi_iter.bi_sector = sector;
 	bio_chain(ioend->io_bio, new);
 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
 	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
@@ -665,39 +653,45 @@ xfs_chain_bio(
 }
 
 /*
- * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
- * can, otherwise we finish off any current ioend and start another.
- * Return the ioend we finished off so that the caller can submit it
- * once it has finished processing the dirty page.
+ * Test to see if we have an existing ioend structure that we could append to
+ * first, otherwise finish off the current ioend and start another.
  */
 STATIC void
 xfs_add_to_ioend(
 	struct inode		*inode,
-	struct buffer_head	*bh,
 	xfs_off_t		offset,
+	struct page		*page,
 	struct xfs_writepage_ctx *wpc,
 	struct writeback_control *wbc,
 	struct list_head	*iolist)
 {
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
+	unsigned		len = i_blocksize(inode);
+	unsigned		poff = offset & (PAGE_SIZE - 1);
+	sector_t		sector;
+
+	sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
+		((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9);
+
 	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
-	    bh->b_blocknr != wpc->last_block + 1 ||
+	    sector != bio_end_sector(wpc->ioend->io_bio) ||
 	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
 		if (wpc->ioend)
 			list_add(&wpc->ioend->io_list, iolist);
-		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
+		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset,
+				bdev, sector);
 	}
 
 	/*
-	 * If the buffer doesn't fit into the bio we need to allocate a new
-	 * one.  This shouldn't happen more than once for a given buffer.
+	 * If the block doesn't fit into the bio we need to allocate a new
+	 * one.  This shouldn't happen more than once for a given block.
 	 */
-	while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
-		xfs_chain_bio(wpc->ioend, wbc, bh);
+	while (bio_add_page(wpc->ioend->io_bio, page, len, poff) != len)
+		xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
 
-	wpc->ioend->io_size += bh->b_size;
-	wpc->last_block = bh->b_blocknr;
-	xfs_start_buffer_writeback(bh);
+	wpc->ioend->io_size += len;
 }
 
 STATIC void
@@ -869,7 +863,9 @@ xfs_writepage_map(
 
 		lock_buffer(bh);
 		xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
-		xfs_add_to_ioend(inode, bh, file_offset, wpc, wbc, &submit_list);
+		xfs_add_to_ioend(inode, file_offset, page, wpc, wbc,
+				&submit_list);
+		xfs_start_buffer_writeback(bh);
 		count++;
 	}
 
-- 
cgit v1.2.3


From 6d465e895343225e3ad35fe10d7b3e9f2f18faec Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:03 -0700
Subject: xfs: move all writeback buffer_head manipulation into
 xfs_map_at_offset

This keeps it in a single place so it can be made otional more easily.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6402e4323031..71b4ca60ff40 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -494,21 +494,6 @@ allocate_blocks:
 	return 0;
 }
 
-STATIC void
-xfs_start_buffer_writeback(
-	struct buffer_head	*bh)
-{
-	ASSERT(buffer_mapped(bh));
-	ASSERT(buffer_locked(bh));
-	ASSERT(!buffer_delay(bh));
-	ASSERT(!buffer_unwritten(bh));
-
-	bh->b_end_io = NULL;
-	set_buffer_async_write(bh);
-	set_buffer_uptodate(bh);
-	clear_buffer_dirty(bh);
-}
-
 STATIC void
 xfs_start_page_writeback(
 	struct page		*page,
@@ -728,6 +713,7 @@ xfs_map_at_offset(
 	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
 	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
 
+	lock_buffer(bh);
 	xfs_map_buffer(inode, bh, imap, offset);
 	set_buffer_mapped(bh);
 	clear_buffer_delay(bh);
@@ -740,6 +726,10 @@ xfs_map_at_offset(
 	 * set the bdev now.
 	 */
 	bh->b_bdev = xfs_find_bdev_for_inode(inode);
+	bh->b_end_io = NULL;
+	set_buffer_async_write(bh);
+	set_buffer_uptodate(bh);
+	clear_buffer_dirty(bh);
 }
 
 STATIC void
@@ -861,11 +851,9 @@ xfs_writepage_map(
 		if (wpc->io_type == XFS_IO_HOLE)
 			continue;
 
-		lock_buffer(bh);
 		xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
 		xfs_add_to_ioend(inode, file_offset, page, wpc, wbc,
 				&submit_list);
-		xfs_start_buffer_writeback(bh);
 		count++;
 	}
 
-- 
cgit v1.2.3


From 1b65d3dd2d5e938b035b2ad73d0b47f35b5ef9a0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:03 -0700
Subject: xfs: remove xfs_start_page_writeback

This helper only has two callers, one of them with a constant error
argument.  Remove it to make pending changes to the code a little easier.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 71b4ca60ff40..af9224ea4ebf 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -494,30 +494,6 @@ allocate_blocks:
 	return 0;
 }
 
-STATIC void
-xfs_start_page_writeback(
-	struct page		*page,
-	int			clear_dirty)
-{
-	ASSERT(PageLocked(page));
-	ASSERT(!PageWriteback(page));
-
-	/*
-	 * if the page was not fully cleaned, we need to ensure that the higher
-	 * layers come back to it correctly. That means we need to keep the page
-	 * dirty, and for WB_SYNC_ALL writeback we need to ensure the
-	 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
-	 * write this page in this writeback sweep will be made.
-	 */
-	if (clear_dirty) {
-		clear_page_dirty_for_io(page);
-		set_page_writeback(page);
-	} else
-		set_page_writeback_keepwrite(page);
-
-	unlock_page(page);
-}
-
 /*
  * Submit the bio for an ioend. We are passed an ioend with a bio attached to
  * it, and we submit that bio. The ioend may be used for multiple bio
@@ -858,6 +834,8 @@ xfs_writepage_map(
 	}
 
 	ASSERT(wpc->ioend || list_empty(&submit_list));
+	ASSERT(PageLocked(page));
+	ASSERT(!PageWriteback(page));
 
 	/*
 	 * On error, we have to fail the ioend here because we have locked
@@ -877,7 +855,21 @@ xfs_writepage_map(
 	 * treated correctly on error.
 	 */
 	if (count) {
-		xfs_start_page_writeback(page, !error);
+		/*
+		 * If the page was not fully cleaned, we need to ensure that the
+		 * higher layers come back to it correctly.  That means we need
+		 * to keep the page dirty, and for WB_SYNC_ALL writeback we need
+		 * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
+		 * so another attempt to write this page in this writeback sweep
+		 * will be made.
+		 */
+		if (error) {
+			set_page_writeback_keepwrite(page);
+		} else {
+			clear_page_dirty_for_io(page);
+			set_page_writeback(page);
+		}
+		unlock_page(page);
 
 		/*
 		 * Preserve the original error if there was one, otherwise catch
@@ -902,7 +894,9 @@ xfs_writepage_map(
 		 * race with a partial page truncate on a sub-page block sized
 		 * filesystem. In that case we need to mark the page clean.
 		 */
-		xfs_start_page_writeback(page, 1);
+		clear_page_dirty_for_io(page);
+		set_page_writeback(page);
+		unlock_page(page);
 		end_page_writeback(page);
 	}
 
-- 
cgit v1.2.3


From 8e1f065bea1b1c128c92ef7e386779a23cd5d342 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:04 -0700
Subject: xfs: refactor the tail of xfs_writepage_map

Rejuggle how we deal with the different error vs non-error and have
ioends vs not have ioend cases to keep the fast path streamlined, and
the duplicate code at a minimum.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 65 +++++++++++++++++++++++++++----------------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index af9224ea4ebf..c8e0d3055153 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -854,7 +854,14 @@ xfs_writepage_map(
 	 * submission of outstanding ioends on the writepage context so they are
 	 * treated correctly on error.
 	 */
-	if (count) {
+	if (unlikely(error)) {
+		if (!count) {
+			xfs_aops_discard_page(page);
+			ClearPageUptodate(page);
+			unlock_page(page);
+			goto done;
+		}
+
 		/*
 		 * If the page was not fully cleaned, we need to ensure that the
 		 * higher layers come back to it correctly.  That means we need
@@ -863,43 +870,35 @@ xfs_writepage_map(
 		 * so another attempt to write this page in this writeback sweep
 		 * will be made.
 		 */
-		if (error) {
-			set_page_writeback_keepwrite(page);
-		} else {
-			clear_page_dirty_for_io(page);
-			set_page_writeback(page);
-		}
-		unlock_page(page);
-
-		/*
-		 * Preserve the original error if there was one, otherwise catch
-		 * submission errors here and propagate into subsequent ioend
-		 * submissions.
-		 */
-		list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
-			int error2;
-
-			list_del_init(&ioend->io_list);
-			error2 = xfs_submit_ioend(wbc, ioend, error);
-			if (error2 && !error)
-				error = error2;
-		}
-	} else if (error) {
-		xfs_aops_discard_page(page);
-		ClearPageUptodate(page);
-		unlock_page(page);
+		set_page_writeback_keepwrite(page);
 	} else {
-		/*
-		 * We can end up here with no error and nothing to write if we
-		 * race with a partial page truncate on a sub-page block sized
-		 * filesystem. In that case we need to mark the page clean.
-		 */
 		clear_page_dirty_for_io(page);
 		set_page_writeback(page);
-		unlock_page(page);
-		end_page_writeback(page);
 	}
 
+	unlock_page(page);
+
+	/*
+	 * Preserve the original error if there was one, otherwise catch
+	 * submission errors here and propagate into subsequent ioend
+	 * submissions.
+	 */
+	list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
+		int error2;
+
+		list_del_init(&ioend->io_list);
+		error2 = xfs_submit_ioend(wbc, ioend, error);
+		if (error2 && !error)
+			error = error2;
+	}
+
+	/*
+	 * We can end up here with no error and nothing to write if we race with
+	 * a partial page truncate on a sub-page block sized filesystem.
+	 */
+	if (!count)
+		end_page_writeback(page);
+done:
 	mapping_set_error(page->mapping, error);
 	return error;
 }
-- 
cgit v1.2.3


From ac8ee54669c07e6b38b4cae13a65f5ec06a11323 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:04 -0700
Subject: xfs: allow writeback on pages without buffer heads

Disable the IOMAP_F_BUFFER_HEAD flag on file systems with a block size
equal to the page size, and deal with pages without buffer heads in
writeback.  Thanks to the previous refactoring this is basically trivial
now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c  | 51 +++++++++++++++++++++++++++++++++++++--------------
 fs/xfs/xfs_iomap.c |  3 ++-
 2 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index c8e0d3055153..0058f9893705 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -79,6 +79,19 @@ xfs_find_daxdev_for_inode(
 		return mp->m_ddev_targp->bt_daxdev;
 }
 
+static void
+xfs_finish_page_writeback(
+	struct inode		*inode,
+	struct bio_vec		*bvec,
+	int			error)
+{
+	if (error) {
+		SetPageError(bvec->bv_page);
+		mapping_set_error(inode->i_mapping, -EIO);
+	}
+	end_page_writeback(bvec->bv_page);
+}
+
 /*
  * We're now finished for good with this page.  Update the page state via the
  * associated buffer_heads, paying attention to the start and end offsets that
@@ -91,7 +104,7 @@ xfs_find_daxdev_for_inode(
  * and buffers potentially freed after every call to end_buffer_async_write.
  */
 static void
-xfs_finish_page_writeback(
+xfs_finish_buffer_writeback(
 	struct inode		*inode,
 	struct bio_vec		*bvec,
 	int			error)
@@ -166,9 +179,12 @@ xfs_destroy_ioend(
 			next = bio->bi_private;
 
 		/* walk each page on bio, ending page IO on them */
-		bio_for_each_segment_all(bvec, bio, i)
-			xfs_finish_page_writeback(inode, bvec, error);
-
+		bio_for_each_segment_all(bvec, bio, i) {
+			if (page_has_buffers(bvec->bv_page))
+				xfs_finish_buffer_writeback(inode, bvec, error);
+			else
+				xfs_finish_page_writeback(inode, bvec, error);
+		}
 		bio_put(bio);
 	}
 
@@ -792,13 +808,16 @@ xfs_writepage_map(
 {
 	LIST_HEAD(submit_list);
 	struct xfs_ioend	*ioend, *next;
-	struct buffer_head	*bh;
+	struct buffer_head	*bh = NULL;
 	ssize_t			len = i_blocksize(inode);
 	uint64_t		file_offset;	/* file offset of page */
 	unsigned		poffset;	/* offset into page */
 	int			error = 0;
 	int			count = 0;
 
+	if (page_has_buffers(page))
+		bh = page_buffers(page);
+
 	/*
 	 * Walk the blocks on the page, and if we run off the end of the current
 	 * map or find the current map invalid, grab a new one.  We only use
@@ -806,28 +825,34 @@ xfs_writepage_map(
 	 * the iteration through the page. This allows us to replace the
 	 * bufferhead with some other state tracking mechanism in future.
 	 */
-	file_offset = page_offset(page);
-	bh = page_buffers(page);
-	for (poffset = 0;
+	for (poffset = 0, file_offset = page_offset(page);
 	     poffset < PAGE_SIZE;
-	     poffset += len, file_offset += len, bh = bh->b_this_page) {
+	     poffset += len, file_offset += len) {
 		/* past the range we are writing, so nothing more to write. */
 		if (file_offset >= end_offset)
 			break;
 
-		if (!buffer_uptodate(bh)) {
+		if (bh && !buffer_uptodate(bh)) {
 			if (PageUptodate(page))
 				ASSERT(buffer_mapped(bh));
+			bh = bh->b_this_page;
 			continue;
 		}
 
 		error = xfs_map_blocks(wpc, inode, file_offset);
 		if (error)
 			break;
-		if (wpc->io_type == XFS_IO_HOLE)
+
+		if (wpc->io_type == XFS_IO_HOLE) {
+			if (bh)
+				bh = bh->b_this_page;
 			continue;
+		}
 
-		xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
+		if (bh) {
+			xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
+			bh = bh->b_this_page;
+		}
 		xfs_add_to_ioend(inode, file_offset, page, wpc, wbc,
 				&submit_list);
 		count++;
@@ -925,8 +950,6 @@ xfs_do_writepage(
 
 	trace_xfs_writepage(inode, page, 0, 0);
 
-	ASSERT(page_has_buffers(page));
-
 	/*
 	 * Refuse to write the page out if we are called from reclaim context.
 	 *
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 10c54fc7d1b4..7fe42a126ec1 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1032,7 +1032,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	iomap->flags |= IOMAP_F_BUFFER_HEAD;
+	if (i_blocksize(inode) < PAGE_SIZE)
+		iomap->flags |= IOMAP_F_BUFFER_HEAD;
 
 	if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
 			!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
-- 
cgit v1.2.3


From 9dc55f1389f9569acf9659e58dd836a9c70df217 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:05 -0700
Subject: iomap: add support for sub-pagesize buffered I/O without buffer heads

After already supporting a simple implementation of buffered writes for
the blocksize == PAGE_SIZE case in the last commit this adds full support
even for smaller block sizes.   There are three bits of per-block
information in the buffer_head structure that really matter for the iomap
read and write path:

 - uptodate status (BH_uptodate)
 - marked as currently under read I/O (BH_Async_Read)
 - marked as currently under write I/O (BH_Async_Write)

Instead of having new per-block structures this now adds a per-page
structure called struct iomap_page to track this information in a slightly
different form:

 - a bitmap for the per-block uptodate status.  For worst case of a 64k
   page size system this bitmap needs to contain 128 bits.  For the
   typical 4k page size case it only needs 8 bits, although we still
   need a full unsigned long due to the way the atomic bitmap API works.
 - two atomic_t counters are used to track the outstanding read and write
   counts

There is quite a bit of boilerplate code as the buffered I/O path uses
various helper methods, but the actual code is very straight forward.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 280 ++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/iomap.h |  31 ++++++
 2 files changed, 290 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 13cdcf33e6c0..07501a647d13 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -17,6 +17,7 @@
 #include <linux/iomap.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 #include <linux/mm.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
@@ -104,6 +105,138 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static struct iomap_page *
+iomap_page_create(struct inode *inode, struct page *page)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (iop || i_blocksize(inode) == PAGE_SIZE)
+		return iop;
+
+	iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
+	atomic_set(&iop->read_count, 0);
+	atomic_set(&iop->write_count, 0);
+	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+	set_page_private(page, (unsigned long)iop);
+	SetPagePrivate(page);
+	return iop;
+}
+
+static void
+iomap_page_release(struct page *page)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (!iop)
+		return;
+	WARN_ON_ONCE(atomic_read(&iop->read_count));
+	WARN_ON_ONCE(atomic_read(&iop->write_count));
+	ClearPagePrivate(page);
+	set_page_private(page, 0);
+	kfree(iop);
+}
+
+/*
+ * Calculate the range inside the page that we actually need to read.
+ */
+static void
+iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
+		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
+{
+	unsigned block_bits = inode->i_blkbits;
+	unsigned block_size = (1 << block_bits);
+	unsigned poff = *pos & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	unsigned first = poff >> block_bits;
+	unsigned last = (poff + plen - 1) >> block_bits;
+	unsigned end = (i_size_read(inode) & (PAGE_SIZE - 1)) >> block_bits;
+
+	/*
+	 * If the block size is smaller than the page size we need to check the
+	 * per-block uptodate status and adjust the offset and length if needed
+	 * to avoid reading in already uptodate ranges.
+	 */
+	if (iop) {
+		unsigned int i;
+
+		/* move forward for each leading block marked uptodate */
+		for (i = first; i <= last; i++) {
+			if (!test_bit(i, iop->uptodate))
+				break;
+			*pos += block_size;
+			poff += block_size;
+			plen -= block_size;
+			first++;
+		}
+
+		/* truncate len if we find any trailing uptodate block(s) */
+		for ( ; i <= last; i++) {
+			if (test_bit(i, iop->uptodate)) {
+				plen -= (last - i + 1) * block_size;
+				last = i - 1;
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If the extent spans the block that contains the i_size we need to
+	 * handle both halves separately so that we properly zero data in the
+	 * page cache for blocks that are entirely outside of i_size.
+	 */
+	if (first <= end && last > end)
+		plen -= (last - end) * block_size;
+
+	*offp = poff;
+	*lenp = plen;
+}
+
+static void
+iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+	struct inode *inode = page->mapping->host;
+	unsigned first = off >> inode->i_blkbits;
+	unsigned last = (off + len - 1) >> inode->i_blkbits;
+	unsigned int i;
+	bool uptodate = true;
+
+	if (iop) {
+		for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
+			if (i >= first && i <= last)
+				set_bit(i, iop->uptodate);
+			else if (!test_bit(i, iop->uptodate))
+				uptodate = false;
+		}
+	}
+
+	if (uptodate && !PageError(page))
+		SetPageUptodate(page);
+}
+
+static void
+iomap_read_finish(struct iomap_page *iop, struct page *page)
+{
+	if (!iop || atomic_dec_and_test(&iop->read_count))
+		unlock_page(page);
+}
+
+static void
+iomap_read_page_end_io(struct bio_vec *bvec, int error)
+{
+	struct page *page = bvec->bv_page;
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (unlikely(error)) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
+	}
+
+	iomap_read_finish(iop, page);
+}
+
 static void
 iomap_read_inline_data(struct inode *inode, struct page *page,
 		struct iomap *iomap)
@@ -132,7 +265,7 @@ iomap_read_end_io(struct bio *bio)
 	int i;
 
 	bio_for_each_segment_all(bvec, bio, i)
-		page_endio(bvec->bv_page, false, error);
+		iomap_read_page_end_io(bvec, error);
 	bio_put(bio);
 }
 
@@ -150,9 +283,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 {
 	struct iomap_readpage_ctx *ctx = data;
 	struct page *page = ctx->cur_page;
-	unsigned poff = pos & (PAGE_SIZE - 1);
-	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	struct iomap_page *iop = iomap_page_create(inode, page);
 	bool is_contig = false;
+	loff_t orig_pos = pos;
+	unsigned poff, plen;
 	sector_t sector;
 
 	if (iomap->type == IOMAP_INLINE) {
@@ -161,13 +295,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		return PAGE_SIZE;
 	}
 
-	/* we don't support blocksize < PAGE_SIZE quite yet. */
-	WARN_ON_ONCE(pos != page_offset(page));
-	WARN_ON_ONCE(plen != PAGE_SIZE);
+	/* zero post-eof blocks as the page may be mapped */
+	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
+	if (plen == 0)
+		goto done;
 
 	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
 		zero_user(page, poff, plen);
-		SetPageUptodate(page);
+		iomap_set_range_uptodate(page, poff, plen);
 		goto done;
 	}
 
@@ -183,6 +318,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		is_contig = true;
 	}
 
+	/*
+	 * If we start a new segment we need to increase the read count, and we
+	 * need to do so before submitting any previous full bio to make sure
+	 * that we don't prematurely unlock the page.
+	 */
+	if (iop)
+		atomic_inc(&iop->read_count);
+
 	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
 		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
 		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -203,7 +346,13 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 	__bio_add_page(ctx->bio, page, plen, poff);
 done:
-	return plen;
+	/*
+	 * Move the caller beyond our range so that it keeps making progress.
+	 * For that we have to include any leading non-uptodate ranges, but
+	 * we can skip trailing ones as they will be handled in the next
+	 * iteration.
+	 */
+	return pos - orig_pos + plen;
 }
 
 int
@@ -214,8 +363,6 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
 	unsigned poff;
 	loff_t ret;
 
-	WARN_ON_ONCE(page_has_buffers(page));
-
 	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
 		ret = iomap_apply(inode, page_offset(page) + poff,
 				PAGE_SIZE - poff, 0, ops, &ctx,
@@ -341,6 +488,84 @@ done:
 }
 EXPORT_SYMBOL_GPL(iomap_readpages);
 
+int
+iomap_is_partially_uptodate(struct page *page, unsigned long from,
+		unsigned long count)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+	struct inode *inode = page->mapping->host;
+	unsigned first = from >> inode->i_blkbits;
+	unsigned last = (from + count - 1) >> inode->i_blkbits;
+	unsigned i;
+
+	if (iop) {
+		for (i = first; i <= last; i++)
+			if (!test_bit(i, iop->uptodate))
+				return 0;
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
+
+int
+iomap_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	/*
+	 * mm accommodates an old ext3 case where clean pages might not have had
+	 * the dirty bit cleared. Thus, it can send actual dirty pages to
+	 * ->releasepage() via shrink_active_list(), skip those here.
+	 */
+	if (PageDirty(page) || PageWriteback(page))
+		return 0;
+	iomap_page_release(page);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(iomap_releasepage);
+
+void
+iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
+{
+	/*
+	 * If we are invalidating the entire page, clear the dirty state from it
+	 * and release it to avoid unnecessary buildup of the LRU.
+	 */
+	if (offset == 0 && len == PAGE_SIZE) {
+		WARN_ON_ONCE(PageWriteback(page));
+		cancel_dirty_page(page);
+		iomap_page_release(page);
+	}
+}
+EXPORT_SYMBOL_GPL(iomap_invalidatepage);
+
+#ifdef CONFIG_MIGRATION
+int
+iomap_migrate_page(struct address_space *mapping, struct page *newpage,
+		struct page *page, enum migrate_mode mode)
+{
+	int ret;
+
+	ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+	if (ret != MIGRATEPAGE_SUCCESS)
+		return ret;
+
+	if (page_has_private(page)) {
+		ClearPagePrivate(page);
+		set_page_private(newpage, page_private(page));
+		set_page_private(page, 0);
+		SetPagePrivate(newpage);
+	}
+
+	if (mode != MIGRATE_SYNC_NO_COPY)
+		migrate_page_copy(newpage, page);
+	else
+		migrate_page_states(newpage, page);
+	return MIGRATEPAGE_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(iomap_migrate_page);
+#endif /* CONFIG_MIGRATION */
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -364,6 +589,7 @@ iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
 
 	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
 		zero_user_segments(page, poff, from, to, poff + plen);
+		iomap_set_range_uptodate(page, poff, plen);
 		return 0;
 	}
 
@@ -379,21 +605,33 @@ static int
 __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
 		struct page *page, struct iomap *iomap)
 {
+	struct iomap_page *iop = iomap_page_create(inode, page);
 	loff_t block_size = i_blocksize(inode);
 	loff_t block_start = pos & ~(block_size - 1);
 	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
-	unsigned poff = block_start & (PAGE_SIZE - 1);
-	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
-	unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
-
-	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+	unsigned from = pos & (PAGE_SIZE - 1), to = from + len, poff, plen;
+	int status = 0;
 
 	if (PageUptodate(page))
 		return 0;
-	if (from <= poff && to >= poff + plen)
-		return 0;
-	return iomap_read_page_sync(inode, block_start, page,
-			poff, plen, from, to, iomap);
+
+	do {
+		iomap_adjust_read_range(inode, iop, &block_start,
+				block_end - block_start, &poff, &plen);
+		if (plen == 0)
+			break;
+
+		if ((from > poff && from < poff + plen) ||
+		    (to > poff && to < poff + plen)) {
+			status = iomap_read_page_sync(inode, block_start, page,
+					poff, plen, from, to, iomap);
+			if (status)
+				break;
+		}
+
+	} while ((block_start += plen) < block_end);
+
+	return status;
 }
 
 static int
@@ -476,7 +714,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 	if (unlikely(copied < len && !PageUptodate(page))) {
 		copied = 0;
 	} else {
-		SetPageUptodate(page);
+		iomap_set_range_uptodate(page, pos & (PAGE_SIZE - 1), len);
 		iomap_set_page_dirty(page);
 	}
 	return __generic_write_end(inode, pos, copied, page);
@@ -812,7 +1050,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 		block_commit_write(page, 0, length);
 	} else {
 		WARN_ON_ONCE(!PageUptodate(page));
-		WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+		iomap_page_create(inode, page);
 	}
 
 	return length;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 5eb9ca8d7ce5..3555d54bf79a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -2,6 +2,9 @@
 #ifndef LINUX_IOMAP_H
 #define LINUX_IOMAP_H 1
 
+#include <linux/atomic.h>
+#include <linux/bitmap.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 
 struct address_space;
@@ -98,12 +101,40 @@ struct iomap_ops {
 			ssize_t written, unsigned flags, struct iomap *iomap);
 };
 
+/*
+ * Structure allocate for each page when block size < PAGE_SIZE to track
+ * sub-page uptodate status and I/O completions.
+ */
+struct iomap_page {
+	atomic_t		read_count;
+	atomic_t		write_count;
+	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
+};
+
+static inline struct iomap_page *to_iomap_page(struct page *page)
+{
+	if (page_has_private(page))
+		return (struct iomap_page *)page_private(page);
+	return NULL;
+}
+
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
 int iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_set_page_dirty(struct page *page);
+int iomap_is_partially_uptodate(struct page *page, unsigned long from,
+		unsigned long count);
+int iomap_releasepage(struct page *page, gfp_t gfp_mask);
+void iomap_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int len);
+#ifdef CONFIG_MIGRATION
+int iomap_migrate_page(struct address_space *mapping, struct page *newpage,
+		struct page *page, enum migrate_mode mode);
+#else
+#define iomap_migrate_page NULL
+#endif
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From 82cb14175e7ddb08721fb1d766195cd10dad791a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:05 -0700
Subject: xfs: add support for sub-pagesize writeback without buffer_heads

Switch to using the iomap_page structure for checking sub-page uptodate
status and track sub-page I/O completion status, and remove large
quantities of boilerplate code working around buffer heads.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c  | 492 +++++++----------------------------------------------
 fs/xfs/xfs_buf.h   |   1 -
 fs/xfs/xfs_iomap.c |   3 -
 fs/xfs/xfs_super.c |   2 +-
 fs/xfs/xfs_trace.h |  18 +-
 5 files changed, 61 insertions(+), 455 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0058f9893705..bae88ac1101d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -20,9 +20,6 @@
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
-#include <linux/gfp.h>
-#include <linux/mpage.h>
-#include <linux/pagevec.h>
 #include <linux/writeback.h>
 
 /*
@@ -34,25 +31,6 @@ struct xfs_writepage_ctx {
 	struct xfs_ioend	*ioend;
 };
 
-void
-xfs_count_page_state(
-	struct page		*page,
-	int			*delalloc,
-	int			*unwritten)
-{
-	struct buffer_head	*bh, *head;
-
-	*delalloc = *unwritten = 0;
-
-	bh = head = page_buffers(page);
-	do {
-		if (buffer_unwritten(bh))
-			(*unwritten) = 1;
-		else if (buffer_delay(bh))
-			(*delalloc) = 1;
-	} while ((bh = bh->b_this_page) != head);
-}
-
 struct block_device *
 xfs_find_bdev_for_inode(
 	struct inode		*inode)
@@ -85,67 +63,17 @@ xfs_finish_page_writeback(
 	struct bio_vec		*bvec,
 	int			error)
 {
+	struct iomap_page	*iop = to_iomap_page(bvec->bv_page);
+
 	if (error) {
 		SetPageError(bvec->bv_page);
 		mapping_set_error(inode->i_mapping, -EIO);
 	}
-	end_page_writeback(bvec->bv_page);
-}
 
-/*
- * We're now finished for good with this page.  Update the page state via the
- * associated buffer_heads, paying attention to the start and end offsets that
- * we need to process on the page.
- *
- * Note that we open code the action in end_buffer_async_write here so that we
- * only have to iterate over the buffers attached to the page once.  This is not
- * only more efficient, but also ensures that we only calls end_page_writeback
- * at the end of the iteration, and thus avoids the pitfall of having the page
- * and buffers potentially freed after every call to end_buffer_async_write.
- */
-static void
-xfs_finish_buffer_writeback(
-	struct inode		*inode,
-	struct bio_vec		*bvec,
-	int			error)
-{
-	struct buffer_head	*head = page_buffers(bvec->bv_page), *bh = head;
-	bool			busy = false;
-	unsigned int		off = 0;
-	unsigned long		flags;
-
-	ASSERT(bvec->bv_offset < PAGE_SIZE);
-	ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
-	ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
-	ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
-
-	local_irq_save(flags);
-	bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
-	do {
-		if (off >= bvec->bv_offset &&
-		    off < bvec->bv_offset + bvec->bv_len) {
-			ASSERT(buffer_async_write(bh));
-			ASSERT(bh->b_end_io == NULL);
-
-			if (error) {
-				mark_buffer_write_io_error(bh);
-				clear_buffer_uptodate(bh);
-				SetPageError(bvec->bv_page);
-			} else {
-				set_buffer_uptodate(bh);
-			}
-			clear_buffer_async_write(bh);
-			unlock_buffer(bh);
-		} else if (buffer_async_write(bh)) {
-			ASSERT(buffer_locked(bh));
-			busy = true;
-		}
-		off += bh->b_size;
-	} while ((bh = bh->b_this_page) != head);
-	bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
-	local_irq_restore(flags);
+	ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
+	ASSERT(!iop || atomic_read(&iop->write_count) > 0);
 
-	if (!busy)
+	if (!iop || atomic_dec_and_test(&iop->write_count))
 		end_page_writeback(bvec->bv_page);
 }
 
@@ -179,12 +107,8 @@ xfs_destroy_ioend(
 			next = bio->bi_private;
 
 		/* walk each page on bio, ending page IO on them */
-		bio_for_each_segment_all(bvec, bio, i) {
-			if (page_has_buffers(bvec->bv_page))
-				xfs_finish_buffer_writeback(inode, bvec, error);
-			else
-				xfs_finish_page_writeback(inode, bvec, error);
-		}
+		bio_for_each_segment_all(bvec, bio, i)
+			xfs_finish_page_writeback(inode, bvec, error);
 		bio_put(bio);
 	}
 
@@ -638,6 +562,7 @@ xfs_add_to_ioend(
 	struct inode		*inode,
 	xfs_off_t		offset,
 	struct page		*page,
+	struct iomap_page	*iop,
 	struct xfs_writepage_ctx *wpc,
 	struct writeback_control *wbc,
 	struct list_head	*iolist)
@@ -661,100 +586,37 @@ xfs_add_to_ioend(
 				bdev, sector);
 	}
 
-	/*
-	 * If the block doesn't fit into the bio we need to allocate a new
-	 * one.  This shouldn't happen more than once for a given block.
-	 */
-	while (bio_add_page(wpc->ioend->io_bio, page, len, poff) != len)
-		xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
+	if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
+		if (iop)
+			atomic_inc(&iop->write_count);
+		if (bio_full(wpc->ioend->io_bio))
+			xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
+		__bio_add_page(wpc->ioend->io_bio, page, len, poff);
+	}
 
 	wpc->ioend->io_size += len;
 }
 
-STATIC void
-xfs_map_buffer(
-	struct inode		*inode,
-	struct buffer_head	*bh,
-	struct xfs_bmbt_irec	*imap,
-	xfs_off_t		offset)
-{
-	sector_t		bn;
-	struct xfs_mount	*m = XFS_I(inode)->i_mount;
-	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
-	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
-
-	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
-	      ((offset - iomap_offset) >> inode->i_blkbits);
-
-	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
-
-	bh->b_blocknr = bn;
-	set_buffer_mapped(bh);
-}
-
-STATIC void
-xfs_map_at_offset(
-	struct inode		*inode,
-	struct buffer_head	*bh,
-	struct xfs_bmbt_irec	*imap,
-	xfs_off_t		offset)
-{
-	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-	lock_buffer(bh);
-	xfs_map_buffer(inode, bh, imap, offset);
-	set_buffer_mapped(bh);
-	clear_buffer_delay(bh);
-	clear_buffer_unwritten(bh);
-
-	/*
-	 * If this is a realtime file, data may be on a different device.
-	 * to that pointed to from the buffer_head b_bdev currently. We can't
-	 * trust that the bufferhead has a already been mapped correctly, so
-	 * set the bdev now.
-	 */
-	bh->b_bdev = xfs_find_bdev_for_inode(inode);
-	bh->b_end_io = NULL;
-	set_buffer_async_write(bh);
-	set_buffer_uptodate(bh);
-	clear_buffer_dirty(bh);
-}
-
 STATIC void
 xfs_vm_invalidatepage(
 	struct page		*page,
 	unsigned int		offset,
 	unsigned int		length)
 {
-	trace_xfs_invalidatepage(page->mapping->host, page, offset,
-				 length);
-
-	/*
-	 * If we are invalidating the entire page, clear the dirty state from it
-	 * so that we can check for attempts to release dirty cached pages in
-	 * xfs_vm_releasepage().
-	 */
-	if (offset == 0 && length >= PAGE_SIZE)
-		cancel_dirty_page(page);
-	block_invalidatepage(page, offset, length);
+	trace_xfs_invalidatepage(page->mapping->host, page, offset, length);
+	iomap_invalidatepage(page, offset, length);
 }
 
 /*
- * If the page has delalloc buffers on it, we need to punch them out before we
- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
- * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
- * is done on that same region - the delalloc extent is returned when none is
- * supposed to be there.
+ * If the page has delalloc blocks on it, we need to punch them out before we
+ * invalidate the page.  If we don't, we leave a stale delalloc mapping on the
+ * inode that can trip up a later direct I/O read operation on the same region.
  *
- * We prevent this by truncating away the delalloc regions on the page before
- * invalidating it. Because they are delalloc, we can do this without needing a
- * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
- * truncation without a transaction as there is no space left for block
- * reservation (typically why we see a ENOSPC in writeback).
+ * We prevent this by truncating away the delalloc regions on the page.  Because
+ * they are delalloc, we can do this without needing a transaction. Indeed - if
+ * we get ENOSPC errors, we have to be able to do this truncation without a
+ * transaction as there is no space left for block reservation (typically why we
+ * see a ENOSPC in writeback).
  */
 STATIC void
 xfs_aops_discard_page(
@@ -786,7 +648,7 @@ out_invalidate:
  * We implement an immediate ioend submission policy here to avoid needing to
  * chain multiple ioends and hence nest mempool allocations which can violate
  * forward progress guarantees we need to provide. The current ioend we are
- * adding buffers to is cached on the writepage context, and if the new buffer
+ * adding blocks to is cached on the writepage context, and if the new block
  * does not append to the cached ioend it will create a new ioend and cache that
  * instead.
  *
@@ -807,54 +669,33 @@ xfs_writepage_map(
 	uint64_t		end_offset)
 {
 	LIST_HEAD(submit_list);
+	struct iomap_page	*iop = to_iomap_page(page);
+	unsigned		len = i_blocksize(inode);
 	struct xfs_ioend	*ioend, *next;
-	struct buffer_head	*bh = NULL;
-	ssize_t			len = i_blocksize(inode);
 	uint64_t		file_offset;	/* file offset of page */
-	unsigned		poffset;	/* offset into page */
-	int			error = 0;
-	int			count = 0;
+	int			error = 0, count = 0, i;
 
-	if (page_has_buffers(page))
-		bh = page_buffers(page);
+	ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
+	ASSERT(!iop || atomic_read(&iop->write_count) == 0);
 
 	/*
-	 * Walk the blocks on the page, and if we run off the end of the current
-	 * map or find the current map invalid, grab a new one.  We only use
-	 * bufferheads here to check per-block state - they no longer control
-	 * the iteration through the page. This allows us to replace the
-	 * bufferhead with some other state tracking mechanism in future.
+	 * Walk through the page to find areas to write back. If we run off the
+	 * end of the current map or find the current map invalid, grab a new
+	 * one.
 	 */
-	for (poffset = 0, file_offset = page_offset(page);
-	     poffset < PAGE_SIZE;
-	     poffset += len, file_offset += len) {
-		/* past the range we are writing, so nothing more to write. */
-		if (file_offset >= end_offset)
-			break;
-
-		if (bh && !buffer_uptodate(bh)) {
-			if (PageUptodate(page))
-				ASSERT(buffer_mapped(bh));
-			bh = bh->b_this_page;
+	for (i = 0, file_offset = page_offset(page);
+	     i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
+	     i++, file_offset += len) {
+		if (iop && !test_bit(i, iop->uptodate))
 			continue;
-		}
 
 		error = xfs_map_blocks(wpc, inode, file_offset);
 		if (error)
 			break;
-
-		if (wpc->io_type == XFS_IO_HOLE) {
-			if (bh)
-				bh = bh->b_this_page;
+		if (wpc->io_type == XFS_IO_HOLE)
 			continue;
-		}
-
-		if (bh) {
-			xfs_map_at_offset(inode, bh, &wpc->imap, file_offset);
-			bh = bh->b_this_page;
-		}
-		xfs_add_to_ioend(inode, file_offset, page, wpc, wbc,
-				&submit_list);
+		xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
+				 &submit_list);
 		count++;
 	}
 
@@ -863,21 +704,18 @@ xfs_writepage_map(
 	ASSERT(!PageWriteback(page));
 
 	/*
-	 * On error, we have to fail the ioend here because we have locked
-	 * buffers in the ioend. If we don't do this, we'll deadlock
-	 * invalidating the page as that tries to lock the buffers on the page.
-	 * Also, because we may have set pages under writeback, we have to make
-	 * sure we run IO completion to mark the error state of the IO
-	 * appropriately, so we can't cancel the ioend directly here. That means
-	 * we have to mark this page as under writeback if we included any
-	 * buffers from it in the ioend chain so that completion treats it
-	 * correctly.
+	 * On error, we have to fail the ioend here because we may have set
+	 * pages under writeback, we have to make sure we run IO completion to
+	 * mark the error state of the IO appropriately, so we can't cancel the
+	 * ioend directly here.  That means we have to mark this page as under
+	 * writeback if we included any blocks from it in the ioend chain so
+	 * that completion treats it correctly.
 	 *
 	 * If we didn't include the page in the ioend, the on error we can
 	 * simply discard and unlock it as there are no other users of the page
-	 * or it's buffers right now. The caller will still need to trigger
-	 * submission of outstanding ioends on the writepage context so they are
-	 * treated correctly on error.
+	 * now.  The caller will still need to trigger submission of outstanding
+	 * ioends on the writepage context so they are treated correctly on
+	 * error.
 	 */
 	if (unlikely(error)) {
 		if (!count) {
@@ -918,8 +756,8 @@ xfs_writepage_map(
 	}
 
 	/*
-	 * We can end up here with no error and nothing to write if we race with
-	 * a partial page truncate on a sub-page block sized filesystem.
+	 * We can end up here with no error and nothing to write only if we race
+	 * with a partial page truncate on a sub-page block sized filesystem.
 	 */
 	if (!count)
 		end_page_writeback(page);
@@ -934,7 +772,6 @@ done:
  * For delalloc space on the page we need to allocate space and flush it.
  * For unwritten space on the page we need to start the conversion to
  * regular allocated space.
- * For any other dirty buffer heads on the page we should flush them.
  */
 STATIC int
 xfs_do_writepage(
@@ -1088,166 +925,13 @@ xfs_dax_writepages(
 			xfs_find_bdev_for_inode(mapping->host), wbc);
 }
 
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. The page should already be clean. We always
- * have buffer heads in this call.
- *
- * Returns 1 if the page is ok to release, 0 otherwise.
- */
 STATIC int
 xfs_vm_releasepage(
 	struct page		*page,
 	gfp_t			gfp_mask)
 {
-	int			delalloc, unwritten;
-
 	trace_xfs_releasepage(page->mapping->host, page, 0, 0);
-
-	/*
-	 * mm accommodates an old ext3 case where clean pages might not have had
-	 * the dirty bit cleared. Thus, it can send actual dirty pages to
-	 * ->releasepage() via shrink_active_list(). Conversely,
-	 * block_invalidatepage() can send pages that are still marked dirty but
-	 * otherwise have invalidated buffers.
-	 *
-	 * We want to release the latter to avoid unnecessary buildup of the
-	 * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
-	 * that are entirely invalidated and need to be released.  Hence the
-	 * only time we should get dirty pages here is through
-	 * shrink_active_list() and so we can simply skip those now.
-	 *
-	 * warn if we've left any lingering delalloc/unwritten buffers on clean
-	 * or invalidated pages we are about to release.
-	 */
-	if (PageDirty(page))
-		return 0;
-
-	xfs_count_page_state(page, &delalloc, &unwritten);
-
-	if (WARN_ON_ONCE(delalloc))
-		return 0;
-	if (WARN_ON_ONCE(unwritten))
-		return 0;
-
-	return try_to_free_buffers(page);
-}
-
-/*
- * If this is O_DIRECT or the mpage code calling tell them how large the mapping
- * is, so that we can avoid repeated get_blocks calls.
- *
- * If the mapping spans EOF, then we have to break the mapping up as the mapping
- * for blocks beyond EOF must be marked new so that sub block regions can be
- * correctly zeroed. We can't do this for mappings within EOF unless the mapping
- * was just allocated or is unwritten, otherwise the callers would overwrite
- * existing data with zeros. Hence we have to split the mapping into a range up
- * to and including EOF, and a second mapping for beyond EOF.
- */
-static void
-xfs_map_trim_size(
-	struct inode		*inode,
-	sector_t		iblock,
-	struct buffer_head	*bh_result,
-	struct xfs_bmbt_irec	*imap,
-	xfs_off_t		offset,
-	ssize_t			size)
-{
-	xfs_off_t		mapping_size;
-
-	mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
-	mapping_size <<= inode->i_blkbits;
-
-	ASSERT(mapping_size > 0);
-	if (mapping_size > size)
-		mapping_size = size;
-	if (offset < i_size_read(inode) &&
-	    (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {
-		/* limit mapping to block that spans EOF */
-		mapping_size = roundup_64(i_size_read(inode) - offset,
-					  i_blocksize(inode));
-	}
-	if (mapping_size > LONG_MAX)
-		mapping_size = LONG_MAX;
-
-	bh_result->b_size = mapping_size;
-}
-
-static int
-xfs_get_blocks(
-	struct inode		*inode,
-	sector_t		iblock,
-	struct buffer_head	*bh_result,
-	int			create)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb, end_fsb;
-	int			error = 0;
-	int			lockmode = 0;
-	struct xfs_bmbt_irec	imap;
-	int			nimaps = 1;
-	xfs_off_t		offset;
-	ssize_t			size;
-
-	BUG_ON(create);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	offset = (xfs_off_t)iblock << inode->i_blkbits;
-	ASSERT(bh_result->b_size >= i_blocksize(inode));
-	size = bh_result->b_size;
-
-	if (offset >= i_size_read(inode))
-		return 0;
-
-	/*
-	 * Direct I/O is usually done on preallocated files, so try getting
-	 * a block mapping without an exclusive lock first.
-	 */
-	lockmode = xfs_ilock_data_map_shared(ip);
-
-	ASSERT(offset <= mp->m_super->s_maxbytes);
-	if (offset > mp->m_super->s_maxbytes - size)
-		size = mp->m_super->s_maxbytes - offset;
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
-	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
-			&nimaps, 0);
-	if (error)
-		goto out_unlock;
-	if (!nimaps) {
-		trace_xfs_get_blocks_notfound(ip, offset, size);
-		goto out_unlock;
-	}
-
-	trace_xfs_get_blocks_found(ip, offset, size,
-		imap.br_state == XFS_EXT_UNWRITTEN ?
-			XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
-	xfs_iunlock(ip, lockmode);
-
-	/* trim mapping down to size requested */
-	xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
-
-	/*
-	 * For unwritten extents do not report a disk address in the buffered
-	 * read case (treat as if we're reading into a hole).
-	 */
-	if (xfs_bmap_is_real_extent(&imap))
-		xfs_map_buffer(inode, bh_result, &imap, offset);
-
-	/*
-	 * If this is a realtime file, data may be on a different device.
-	 * to that pointed to from the buffer_head b_bdev currently.
-	 */
-	bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-	return 0;
-
-out_unlock:
-	xfs_iunlock(ip, lockmode);
-	return error;
+	return iomap_releasepage(page, gfp_mask);
 }
 
 STATIC sector_t
@@ -1279,9 +963,7 @@ xfs_vm_readpage(
 	struct page		*page)
 {
 	trace_xfs_vm_readpage(page->mapping->host, 1);
-	if (i_blocksize(page->mapping->host) == PAGE_SIZE)
-		return iomap_readpage(page, &xfs_iomap_ops);
-	return mpage_readpage(page, xfs_get_blocks);
+	return iomap_readpage(page, &xfs_iomap_ops);
 }
 
 STATIC int
@@ -1292,65 +974,7 @@ xfs_vm_readpages(
 	unsigned		nr_pages)
 {
 	trace_xfs_vm_readpages(mapping->host, nr_pages);
-	if (i_blocksize(mapping->host) == PAGE_SIZE)
-		return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops);
-	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
-}
-
-/*
- * This is basically a copy of __set_page_dirty_buffers() with one
- * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
- * dirty, we'll never be able to clean them because we don't write buffers
- * beyond EOF, and that means we can't invalidate pages that span EOF
- * that have been marked dirty. Further, the dirty state can leak into
- * the file interior if the file is extended, resulting in all sorts of
- * bad things happening as the state does not match the underlying data.
- *
- * XXX: this really indicates that bufferheads in XFS need to die. Warts like
- * this only exist because of bufferheads and how the generic code manages them.
- */
-STATIC int
-xfs_vm_set_page_dirty(
-	struct page		*page)
-{
-	struct address_space	*mapping = page->mapping;
-	struct inode		*inode = mapping->host;
-	loff_t			end_offset;
-	loff_t			offset;
-	int			newly_dirty;
-
-	if (unlikely(!mapping))
-		return !TestSetPageDirty(page);
-
-	end_offset = i_size_read(inode);
-	offset = page_offset(page);
-
-	spin_lock(&mapping->private_lock);
-	if (page_has_buffers(page)) {
-		struct buffer_head *head = page_buffers(page);
-		struct buffer_head *bh = head;
-
-		do {
-			if (offset < end_offset)
-				set_buffer_dirty(bh);
-			bh = bh->b_this_page;
-			offset += i_blocksize(inode);
-		} while (bh != head);
-	}
-	/*
-	 * Lock out page->mem_cgroup migration to keep PageDirty
-	 * synchronized with per-memcg dirty page counters.
-	 */
-	lock_page_memcg(page);
-	newly_dirty = !TestSetPageDirty(page);
-	spin_unlock(&mapping->private_lock);
-
-	if (newly_dirty)
-		__set_page_dirty(page, mapping, 1);
-	unlock_page_memcg(page);
-	if (newly_dirty)
-		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return newly_dirty;
+	return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops);
 }
 
 static int
@@ -1368,13 +992,13 @@ const struct address_space_operations xfs_address_space_operations = {
 	.readpages		= xfs_vm_readpages,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
-	.set_page_dirty		= xfs_vm_set_page_dirty,
+	.set_page_dirty		= iomap_set_page_dirty,
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.bmap			= xfs_vm_bmap,
 	.direct_IO		= noop_direct_IO,
-	.migratepage		= buffer_migrate_page,
-	.is_partially_uptodate  = block_is_partially_uptodate,
+	.migratepage		= iomap_migrate_page,
+	.is_partially_uptodate  = iomap_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
 	.swap_activate		= xfs_iomap_swapfile_activate,
 };
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d24dbd4dac39..6ddf1907fc7a 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -12,7 +12,6 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/dax.h>
-#include <linux/buffer_head.h>
 #include <linux/uio.h>
 #include <linux/list_lru.h>
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7fe42a126ec1..778b8c850de3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1032,9 +1032,6 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if (i_blocksize(inode) < PAGE_SIZE)
-		iomap->flags |= IOMAP_F_BUFFER_HEAD;
-
 	if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
 			!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		/* Reserve delalloc blocks for regular writeback. */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 9d791f158dfe..f9f8dc490d3d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1860,7 +1860,7 @@ MODULE_ALIAS_FS("xfs");
 STATIC int __init
 xfs_init_zones(void)
 {
-	if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE,
+	if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
 			offsetof(struct xfs_ioend, io_inline_bio),
 			BIOSET_NEED_BVECS))
 		goto out;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 1af123df19b5..7f4c7071e7ed 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1153,33 +1153,23 @@ DECLARE_EVENT_CLASS(xfs_page_class,
 		__field(loff_t, size)
 		__field(unsigned long, offset)
 		__field(unsigned int, length)
-		__field(int, delalloc)
-		__field(int, unwritten)
 	),
 	TP_fast_assign(
-		int delalloc = -1, unwritten = -1;
-
-		if (page_has_buffers(page))
-			xfs_count_page_state(page, &delalloc, &unwritten);
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = XFS_I(inode)->i_ino;
 		__entry->pgoff = page_offset(page);
 		__entry->size = i_size_read(inode);
 		__entry->offset = off;
 		__entry->length = len;
-		__entry->delalloc = delalloc;
-		__entry->unwritten = unwritten;
 	),
 	TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
-		  "length %x delalloc %d unwritten %d",
+		  "length %x",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->pgoff,
 		  __entry->size,
 		  __entry->offset,
-		  __entry->length,
-		  __entry->delalloc,
-		  __entry->unwritten)
+		  __entry->length)
 )
 
 #define DEFINE_PAGE_EVENT(name)		\
@@ -1263,9 +1253,6 @@ DEFINE_EVENT(xfs_imap_class, name,	\
 	TP_ARGS(ip, offset, count, type, irec))
 DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
 DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
 DEFINE_IOMAP_EVENT(xfs_iomap_found);
 
@@ -1304,7 +1291,6 @@ DEFINE_EVENT(xfs_simple_io_class, name,	\
 	TP_ARGS(ip, offset, count))
 DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
 DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
-DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
 DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
 DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
 DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
-- 
cgit v1.2.3


From 98c1a7c0ece345dc8e34c0c85703e4df9fdfd071 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:06 -0700
Subject: xfs: update my copyrights for the writeback and iomap code

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c  | 1 +
 fs/xfs/xfs_iomap.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index bae88ac1101d..f4d3252236c1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2016-2018 Christoph Hellwig.
  * All Rights Reserved.
  */
 #include "xfs.h"
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 778b8c850de3..fb9746cc7338 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * Copyright (c) 2016 Christoph Hellwig.
+ * Copyright (c) 2016-2018 Christoph Hellwig.
  * All Rights Reserved.
  */
 #include <linux/iomap.h>
-- 
cgit v1.2.3


From 8a749386498327c98c2792e19eeeaa21dafc6954 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:06 -0700
Subject: xfs: cow unwritten conversion uses uninitialized dfops

A couple COW fork unwritten extent conversion helpers pass an
uninitialized dfops pointer to xfs_bmapi_write(). This does not
cause problems because conversion does not use a transaction or the
dfops structure for the COW fork.  Drop the uninitialized usage of
dfops in these codepaths and pass NULL along to xfs_bmapi_write()
instead.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_reflink.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 49e4913fa779..33845009bec5 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -312,8 +312,7 @@ xfs_reflink_convert_cow_extent(
 	struct xfs_inode		*ip,
 	struct xfs_bmbt_irec		*imap,
 	xfs_fileoff_t			offset_fsb,
-	xfs_filblks_t			count_fsb,
-	struct xfs_defer_ops		*dfops)
+	xfs_filblks_t			count_fsb)
 {
 	xfs_fsblock_t			first_block = NULLFSBLOCK;
 	int				nimaps = 1;
@@ -327,7 +326,7 @@ xfs_reflink_convert_cow_extent(
 		return 0;
 	return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
-			0, imap, &nimaps, dfops);
+			0, imap, &nimaps, NULL);
 }
 
 /* Convert all of the unwritten CoW extents in a file's range to real ones. */
@@ -342,7 +341,6 @@ xfs_reflink_convert_cow(
 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
 	xfs_filblks_t		count_fsb = end_fsb - offset_fsb;
 	struct xfs_bmbt_irec	imap;
-	struct xfs_defer_ops	dfops;
 	xfs_fsblock_t		first_block = NULLFSBLOCK;
 	int			nimaps = 1, error = 0;
 
@@ -352,7 +350,7 @@ xfs_reflink_convert_cow(
 	error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT |
 			XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps,
-			&dfops);
+			NULL);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
@@ -458,8 +456,7 @@ retry:
 	if (nimaps == 0)
 		return -ENOSPC;
 convert:
-	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
-			&dfops);
+	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb);
 out_bmap_cancel:
 	xfs_defer_cancel(&dfops);
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
-- 
cgit v1.2.3


From 6aa6718439ac6907b3c34cbe7ac2c71408f84f6e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:07 -0700
Subject: xfs: rename xfs_trans ->t_agfl_dfops to ->t_dfops

The ->t_agfl_dfops field is currently used to defer agfl block frees
from associated transaction contexts. While all known problematic
contexts have already been updated to use ->t_agfl_dfops, the
broader goal is defer agfl frees from all callers that already use a
deferred operations structure. Further, the transaction field
facilitates a good amount of code clean up where the transaction and
dfops have historically been passed down through the stack
separately.

Rename the field to something more generic to prepare to use it as
such throughout XFS. This patch does not change behavior.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c |  4 ++--
 fs/xfs/libxfs/xfs_defer.c |  8 ++++----
 fs/xfs/xfs_inode.c        | 10 +++++-----
 fs/xfs/xfs_symlink.c      |  2 +-
 fs/xfs/xfs_trans.c        |  6 +++---
 fs/xfs/xfs_trans.h        |  2 +-
 6 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index eef466260d43..5b1607d76fe9 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2323,8 +2323,8 @@ xfs_alloc_fix_freelist(
 			goto out_agbp_relse;
 
 		/* defer agfl frees if dfops is provided */
-		if (tp->t_agfl_dfops) {
-			xfs_defer_agfl_block(mp, tp->t_agfl_dfops, args->agno,
+		if (tp->t_dfops) {
+			xfs_defer_agfl_block(mp, tp->t_dfops, args->agno,
 					     bno, &targs.oinfo);
 		} else {
 			error = xfs_free_agfl_block(tp, args->agno, bno, agbp,
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index c3e5bffda4f5..560a7d178c1e 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -350,9 +350,9 @@ xfs_defer_finish(
 	 * Note that this code can go away once all dfops users attach to the
 	 * associated tp.
 	 */
-	ASSERT(!(*tp)->t_agfl_dfops || ((*tp)->t_agfl_dfops == dop));
-	orig_dop = (*tp)->t_agfl_dfops;
-	(*tp)->t_agfl_dfops = dop;
+	ASSERT(!(*tp)->t_dfops || ((*tp)->t_dfops == dop));
+	orig_dop = (*tp)->t_dfops;
+	(*tp)->t_dfops = dop;
 
 	/* Until we run out of pending work to finish... */
 	while (xfs_defer_has_unfinished_work(dop)) {
@@ -425,7 +425,7 @@ xfs_defer_finish(
 	}
 
 out:
-	(*tp)->t_agfl_dfops = orig_dop;
+	(*tp)->t_dfops = orig_dop;
 	if (error)
 		trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
 	else
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5df4de666cc1..9c5fbc94cf14 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1196,7 +1196,7 @@ xfs_create(
 	unlock_dp_on_error = true;
 
 	xfs_defer_init(&dfops, &first_block);
-	tp->t_agfl_dfops = &dfops;
+	tp->t_dfops = &dfops;
 
 	/*
 	 * Reserve disk quota and the inode.
@@ -1452,7 +1452,7 @@ xfs_link(
 	}
 
 	xfs_defer_init(&dfops, &first_block);
-	tp->t_agfl_dfops = &dfops;
+	tp->t_dfops = &dfops;
 
 	/*
 	 * Handle initial link state of O_TMPFILE inode
@@ -1813,7 +1813,7 @@ xfs_inactive_ifree(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	xfs_defer_init(&dfops, &first_block);
-	tp->t_agfl_dfops = &dfops;
+	tp->t_dfops = &dfops;
 	error = xfs_ifree(tp, ip, &dfops);
 	if (error) {
 		/*
@@ -2659,7 +2659,7 @@ xfs_remove(
 		goto out_trans_cancel;
 
 	xfs_defer_init(&dfops, &first_block);
-	tp->t_agfl_dfops = &dfops;
+	tp->t_dfops = &dfops;
 	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
 					&first_block, &dfops, resblks);
 	if (error) {
@@ -3027,7 +3027,7 @@ xfs_rename(
 	}
 
 	xfs_defer_init(&dfops, &first_block);
-	tp->t_agfl_dfops = &dfops;
+	tp->t_dfops = &dfops;
 
 	/* RENAME_EXCHANGE is unique from here on. */
 	if (flags & RENAME_EXCHANGE)
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 3783afcb68d2..44335bdebea2 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -247,7 +247,7 @@ xfs_symlink(
 	 * bmapi or the directory create code.
 	 */
 	xfs_defer_init(&dfops, &first_block);
-	tp->t_agfl_dfops = &dfops;
+	tp->t_dfops = &dfops;
 
 	/*
 	 * Allocate an inode for the symlink.
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 524f543c5b82..630993387517 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -118,7 +118,7 @@ xfs_trans_dup(
 	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
-	ntp->t_agfl_dfops = tp->t_agfl_dfops;
+	ntp->t_dfops = tp->t_dfops;
 
 	xfs_trans_dup_dqinfo(tp, ntp);
 
@@ -914,8 +914,8 @@ __xfs_trans_commit(
 	int			error = 0;
 	int			sync = tp->t_flags & XFS_TRANS_SYNC;
 
-	ASSERT(!tp->t_agfl_dfops ||
-	       !xfs_defer_has_unfinished_work(tp->t_agfl_dfops) || regrant);
+	ASSERT(!tp->t_dfops ||
+	       !xfs_defer_has_unfinished_work(tp->t_dfops) || regrant);
 
 	trace_xfs_trans_commit(tp, _RET_IP_);
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 6526314f0b8f..d8a695c57103 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -105,7 +105,7 @@ typedef struct xfs_trans {
 	struct xlog_ticket	*t_ticket;	/* log mgr ticket */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
-	struct xfs_defer_ops	*t_agfl_dfops;	/* optional agfl fixup dfops */
+	struct xfs_defer_ops	*t_dfops;	/* dfops reference */
 	unsigned int		t_flags;	/* misc flags */
 	int64_t			t_icount_delta;	/* superblock icount change */
 	int64_t			t_ifree_delta;	/* superblock ifree change */
-- 
cgit v1.2.3


From 0e0417f3e54668cec9f677850130e6fbb64edcf2 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:07 -0700
Subject: xfs: remove dfops parameter from ifree call stack

The inode free callchain starting in xfs_inactive_ifree() already
associates its dfops with the transaction. It still passes the dfops
on the stack down through xfs_difree_inobt(), however.

Clean up the call stack and reference dfops directly from the
transaction. This patch does not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_ialloc.c | 6 ++----
 fs/xfs/libxfs/xfs_ialloc.h | 1 -
 fs/xfs/xfs_inode.c         | 9 ++++-----
 fs/xfs/xfs_inode.h         | 3 +--
 4 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0d968e8143aa..c38d14106b53 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1915,7 +1915,6 @@ xfs_difree_inobt(
 	struct xfs_trans		*tp,
 	struct xfs_buf			*agbp,
 	xfs_agino_t			agino,
-	struct xfs_defer_ops		*dfops,
 	struct xfs_icluster		*xic,
 	struct xfs_inobt_rec_incore	*orec)
 {
@@ -2003,7 +2002,7 @@ xfs_difree_inobt(
 			goto error0;
 		}
 
-		xfs_difree_inode_chunk(mp, agno, &rec, dfops);
+		xfs_difree_inode_chunk(mp, agno, &rec, tp->t_dfops);
 	} else {
 		xic->deleted = false;
 
@@ -2148,7 +2147,6 @@ int
 xfs_difree(
 	struct xfs_trans	*tp,		/* transaction pointer */
 	xfs_ino_t		inode,		/* inode to be freed */
-	struct xfs_defer_ops	*dfops,		/* extents to free */
 	struct xfs_icluster	*xic)	/* cluster info if deleted */
 {
 	/* REFERENCED */
@@ -2200,7 +2198,7 @@ xfs_difree(
 	/*
 	 * Fix up the inode allocation btree.
 	 */
-	error = xfs_difree_inobt(mp, tp, agbp, agino, dfops, xic, &rec);
+	error = xfs_difree_inobt(mp, tp, agbp, agino, xic, &rec);
 	if (error)
 		goto error0;
 
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 90b09c5f163b..e936b7cc9389 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -82,7 +82,6 @@ int					/* error */
 xfs_difree(
 	struct xfs_trans *tp,		/* transaction pointer */
 	xfs_ino_t	inode,		/* inode to be freed */
-	struct xfs_defer_ops *dfops,	/* extents to free */
 	struct xfs_icluster *ifree);	/* cluster info if deleted */
 
 /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9c5fbc94cf14..f5649a023d46 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1814,7 +1814,7 @@ xfs_inactive_ifree(
 
 	xfs_defer_init(&dfops, &first_block);
 	tp->t_dfops = &dfops;
-	error = xfs_ifree(tp, ip, &dfops);
+	error = xfs_ifree(tp, ip);
 	if (error) {
 		/*
 		 * If we fail to free the inode, shut down.  The cancel
@@ -2445,9 +2445,8 @@ xfs_ifree_local_data(
  */
 int
 xfs_ifree(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	struct xfs_defer_ops	*dfops)
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip)
 {
 	int			error;
 	struct xfs_icluster	xic = { 0 };
@@ -2466,7 +2465,7 @@ xfs_ifree(
 	if (error)
 		return error;
 
-	error = xfs_difree(tp, ip->i_ino, dfops, &xic);
+	error = xfs_difree(tp, ip->i_ino, &xic);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 2ed63a49e890..b1f0e8394f3b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -415,8 +415,7 @@ uint		xfs_ilock_data_map_shared(struct xfs_inode *);
 uint		xfs_ilock_attr_map_shared(struct xfs_inode *);
 
 uint		xfs_ip2xflags(struct xfs_inode *);
-int		xfs_ifree(struct xfs_trans *, xfs_inode_t *,
-			   struct xfs_defer_ops *);
+int		xfs_ifree(struct xfs_trans *, struct xfs_inode *);
 int		xfs_itruncate_extents_flags(struct xfs_trans **,
 				struct xfs_inode *, int, xfs_fsize_t, int);
 void		xfs_iext_realloc(xfs_inode_t *, int, int);
-- 
cgit v1.2.3


From c9cfdb381172174ade2445e7b468f1be550b1a44 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:08 -0700
Subject: xfs: remove dfops param from high level dirname calls

All callers of the directory create, rename and remove interfaces
already associate the dfops with the transaction. Drop the dfops
parameters in these calls in preparation for further cleanups in the
layers below. This patch does not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_dir2.c | 17 ++++++++--------
 fs/xfs/libxfs/xfs_dir2.h |  9 +++------
 fs/xfs/xfs_inode.c       | 50 ++++++++++++++++++++++--------------------------
 fs/xfs/xfs_symlink.c     |  2 +-
 4 files changed, 36 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 59169aff30fe..c98250f0de50 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -244,7 +244,6 @@ xfs_dir_createname(
 	struct xfs_name		*name,
 	xfs_ino_t		inum,		/* new entry inode number */
 	xfs_fsblock_t		*first,		/* bmap's firstblock */
-	struct xfs_defer_ops	*dfops,		/* bmap's freeblock list */
 	xfs_extlen_t		total)		/* bmap's total block count */
 {
 	struct xfs_da_args	*args;
@@ -252,6 +251,8 @@ xfs_dir_createname(
 	int			v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+	ASSERT(tp->t_dfops || !first);
+
 	if (inum) {
 		rval = xfs_dir_ino_validate(tp->t_mountp, inum);
 		if (rval)
@@ -270,11 +271,11 @@ xfs_dir_createname(
 	args->hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args->inumber = inum;
 	args->dp = dp;
-	args->firstblock = first;
-	args->dfops = dfops;
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
+	args->dfops = tp->t_dfops;
+	args->firstblock = first;
 	args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 	if (!inum)
 		args->op_flags |= XFS_DA_OP_JUSTCHECK;
@@ -421,7 +422,6 @@ xfs_dir_removename(
 	struct xfs_name	*name,
 	xfs_ino_t	ino,
 	xfs_fsblock_t	*first,		/* bmap's firstblock */
-	struct xfs_defer_ops	*dfops,		/* bmap's freeblock list */
 	xfs_extlen_t	total)		/* bmap's total block count */
 {
 	struct xfs_da_args *args;
@@ -429,6 +429,7 @@ xfs_dir_removename(
 	int		v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+	ASSERT(tp->t_dfops);
 	XFS_STATS_INC(dp->i_mount, xs_dir_remove);
 
 	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -443,10 +444,10 @@ xfs_dir_removename(
 	args->inumber = ino;
 	args->dp = dp;
 	args->firstblock = first;
-	args->dfops = dfops;
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
+	args->dfops = tp->t_dfops;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_removename(args);
@@ -483,7 +484,6 @@ xfs_dir_replace(
 	struct xfs_name	*name,		/* name of entry to replace */
 	xfs_ino_t	inum,		/* new inode number */
 	xfs_fsblock_t	*first,		/* bmap's firstblock */
-	struct xfs_defer_ops	*dfops,		/* bmap's freeblock list */
 	xfs_extlen_t	total)		/* bmap's total block count */
 {
 	struct xfs_da_args *args;
@@ -491,6 +491,7 @@ xfs_dir_replace(
 	int		v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+	ASSERT(tp->t_dfops);
 
 	rval = xfs_dir_ino_validate(tp->t_mountp, inum);
 	if (rval)
@@ -508,10 +509,10 @@ xfs_dir_replace(
 	args->inumber = inum;
 	args->dp = dp;
 	args->firstblock = first;
-	args->dfops = dfops;
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
+	args->dfops = tp->t_dfops;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_replace(args);
@@ -547,7 +548,7 @@ xfs_dir_canenter(
 	xfs_inode_t	*dp,
 	struct xfs_name	*name)		/* name of entry to add */
 {
-	return xfs_dir_createname(tp, dp, name, 0, NULL, NULL, 0);
+	return xfs_dir_createname(tp, dp, name, 0, NULL, 0);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index ed385316c7dc..f203aebc07ed 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -118,19 +118,16 @@ extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_inode *pdp);
 extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t inum,
-				xfs_fsblock_t *first,
-				struct xfs_defer_ops *dfops, xfs_extlen_t tot);
+				xfs_fsblock_t *first, xfs_extlen_t tot);
 extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t *inum,
 				struct xfs_name *ci_name);
 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t ino,
-				xfs_fsblock_t *first,
-				struct xfs_defer_ops *dfops, xfs_extlen_t tot);
+				xfs_fsblock_t *first, xfs_extlen_t tot);
 extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t inum,
-				xfs_fsblock_t *first,
-				struct xfs_defer_ops *dfops, xfs_extlen_t tot);
+				xfs_fsblock_t *first, xfs_extlen_t tot);
 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name);
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f5649a023d46..e1bc686b70b4 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1225,8 +1225,8 @@ xfs_create(
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = false;
 
-	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
-					&first_block, &dfops, resblks ?
+	error = xfs_dir_createname(tp, dp, name, ip->i_ino, &first_block,
+				   resblks ?
 					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
 	if (error) {
 		ASSERT(error != -ENOSPC);
@@ -1464,7 +1464,7 @@ xfs_link(
 	}
 
 	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
-					&first_block, &dfops, resblks);
+				   &first_block, resblks);
 	if (error)
 		goto error_return;
 	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2659,8 +2659,8 @@ xfs_remove(
 
 	xfs_defer_init(&dfops, &first_block);
 	tp->t_dfops = &dfops;
-	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
-					&first_block, &dfops, resblks);
+	error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block,
+				   resblks);
 	if (error) {
 		ASSERT(error != -ENOENT);
 		goto out_bmap_cancel;
@@ -2748,9 +2748,9 @@ xfs_sort_for_rename(
 
 static int
 xfs_finish_rename(
-	struct xfs_trans	*tp,
-	struct xfs_defer_ops	*dfops)
+	struct xfs_trans	*tp)
 {
+	struct xfs_defer_ops	*dfops = tp->t_dfops;
 	int			error;
 
 	/*
@@ -2784,7 +2784,6 @@ xfs_cross_rename(
 	struct xfs_inode	*dp2,
 	struct xfs_name		*name2,
 	struct xfs_inode	*ip2,
-	struct xfs_defer_ops	*dfops,
 	xfs_fsblock_t		*first_block,
 	int			spaceres)
 {
@@ -2794,16 +2793,14 @@ xfs_cross_rename(
 	int		dp2_flags = 0;
 
 	/* Swap inode number for dirent in first parent */
-	error = xfs_dir_replace(tp, dp1, name1,
-				ip2->i_ino,
-				first_block, dfops, spaceres);
+	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, first_block,
+				spaceres);
 	if (error)
 		goto out_trans_abort;
 
 	/* Swap inode number for dirent in second parent */
-	error = xfs_dir_replace(tp, dp2, name2,
-				ip1->i_ino,
-				first_block, dfops, spaceres);
+	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, first_block,
+				spaceres);
 	if (error)
 		goto out_trans_abort;
 
@@ -2818,7 +2815,7 @@ xfs_cross_rename(
 		if (S_ISDIR(VFS_I(ip2)->i_mode)) {
 			error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
 						dp1->i_ino, first_block,
-						dfops, spaceres);
+						spaceres);
 			if (error)
 				goto out_trans_abort;
 
@@ -2845,7 +2842,7 @@ xfs_cross_rename(
 		if (S_ISDIR(VFS_I(ip1)->i_mode)) {
 			error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
 						dp2->i_ino, first_block,
-						dfops, spaceres);
+						spaceres);
 			if (error)
 				goto out_trans_abort;
 
@@ -2884,10 +2881,10 @@ xfs_cross_rename(
 	}
 	xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-	return xfs_finish_rename(tp, dfops);
+	return xfs_finish_rename(tp);
 
 out_trans_abort:
-	xfs_defer_cancel(dfops);
+	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_cancel(tp);
 	return error;
 }
@@ -3032,7 +3029,7 @@ xfs_rename(
 	if (flags & RENAME_EXCHANGE)
 		return xfs_cross_rename(tp, src_dp, src_name, src_ip,
 					target_dp, target_name, target_ip,
-					&dfops, &first_block, spaceres);
+					&first_block, spaceres);
 
 	/*
 	 * Set up the target.
@@ -3054,7 +3051,7 @@ xfs_rename(
 		 */
 		error = xfs_dir_createname(tp, target_dp, target_name,
 						src_ip->i_ino, &first_block,
-						&dfops, spaceres);
+						spaceres);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -3093,8 +3090,7 @@ xfs_rename(
 		 * name at the destination directory, remove it first.
 		 */
 		error = xfs_dir_replace(tp, target_dp, target_name,
-					src_ip->i_ino,
-					&first_block, &dfops, spaceres);
+					src_ip->i_ino, &first_block, spaceres);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -3128,8 +3124,8 @@ xfs_rename(
 		 * directory.
 		 */
 		error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
-					target_dp->i_ino,
-					&first_block, &dfops, spaceres);
+					target_dp->i_ino, &first_block,
+					spaceres);
 		ASSERT(error != -EEXIST);
 		if (error)
 			goto out_bmap_cancel;
@@ -3168,10 +3164,10 @@ xfs_rename(
 	 */
 	if (wip) {
 		error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
-					&first_block, &dfops, spaceres);
+					&first_block, spaceres);
 	} else
 		error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
-					   &first_block, &dfops, spaceres);
+					   &first_block, spaceres);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -3206,7 +3202,7 @@ xfs_rename(
 	if (new_parent)
 		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
 
-	error = xfs_finish_rename(tp, &dfops);
+	error = xfs_finish_rename(tp);
 	if (wip)
 		IRELE(wip);
 	return error;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 44335bdebea2..e347a3db018f 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -339,7 +339,7 @@ xfs_symlink(
 	 * Create the directory entry for the symlink.
 	 */
 	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
-					&first_block, &dfops, resblks);
+				   &first_block, resblks);
 	if (error)
 		goto out_bmap_cancel;
 	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-- 
cgit v1.2.3


From 813d08cb6de0af43da40393aebff914d2de9e50e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:09 -0700
Subject: xfs: use ->t_dfops for recovery of [b|c]ui log items

Log recovery passes down a central dfops structure to recovery
handlers for bui and cui log items. Each of these handlers allocates
and commits a transaction and defers any remaining operations to be
completed by the main recovery sequence.

Since dfops outlives the transaction in this context, set and clear
->t_dfops appropriately such that the *_finish_item() paths and
below (i.e., xfs_bmapi*()) can expect to find the dfops in the
transaction without it being committed with the dfops attached. This
is required because transaction commit expects that an associated
dfops is finished and in this context the dfops may be populated at
commit time.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c     | 8 ++++++++
 fs/xfs/xfs_refcount_item.c | 8 ++++++++
 2 files changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 956ebd583e27..478bfc798861 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -441,6 +441,7 @@ xfs_bui_recover(
 			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
 	if (error)
 		return error;
+	tp->t_dfops = dfops;
 	budp = xfs_trans_get_bud(tp, buip);
 
 	/* Grab the inode. */
@@ -487,6 +488,12 @@ xfs_bui_recover(
 	}
 
 	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
+	/*
+	 * Recovery finishes all deferred ops once intent processing is
+	 * complete. Reset the trans reference because commit expects a finished
+	 * dfops or none at all.
+	 */
+	tp->t_dfops = NULL;
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	IRELE(ip);
@@ -494,6 +501,7 @@ xfs_bui_recover(
 	return error;
 
 err_inode:
+	tp->t_dfops = NULL;
 	xfs_trans_cancel(tp);
 	if (ip) {
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 472a73e9d331..2064c689bc72 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -452,6 +452,7 @@ xfs_cui_recover(
 			mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
 	if (error)
 		return error;
+	tp->t_dfops = dfops;
 	cudp = xfs_trans_get_cud(tp, cuip);
 
 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
@@ -514,11 +515,18 @@ xfs_cui_recover(
 
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
+	/*
+	 * Recovery finishes all deferred ops once intent processing is
+	 * complete. Reset the trans reference because commit expects a finished
+	 * dfops or none at all.
+	 */
+	tp->t_dfops = NULL;
 	error = xfs_trans_commit(tp);
 	return error;
 
 abort_error:
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
+	tp->t_dfops = NULL;
 	xfs_trans_cancel(tp);
 	return error;
 }
-- 
cgit v1.2.3


From 40d03ac6aa2bebe05190462734690472310167e4 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:09 -0700
Subject: xfs: use ->t_dfops for attr set/remove operations

Attach the local dfops to the transaction allocated for xattr add
and remove operations. Add an earlier initialization in
xfs_attr_remove() to ensure the structure is valid if it remains
unused at transaction commit time.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c | 11 +++++++----
 fs/xfs/libxfs/xfs_bmap.c |  3 ++-
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 99590f61d624..cc23c269f2bf 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -254,6 +254,8 @@ xfs_attr_set(
 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
 	if (error)
 		return error;
+	xfs_defer_init(&dfops, &firstblock);
+	args.trans->t_dfops = &dfops;
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
@@ -315,7 +317,6 @@ xfs_attr_set(
 		 * It won't fit in the shortform, transform to a leaf block.
 		 * GROT: another possible req'mt for a double-split btree op.
 		 */
-		xfs_defer_init(args.dfops, args.firstblock);
 		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
 		if (error)
 			goto out_defer_cancel;
@@ -325,9 +326,9 @@ xfs_attr_set(
 		 * buffer and run into problems with the write verifier.
 		 */
 		xfs_trans_bhold(args.trans, leaf_bp);
-		xfs_defer_bjoin(args.dfops, leaf_bp);
-		xfs_defer_ijoin(args.dfops, dp);
-		error = xfs_defer_finish(&args.trans, args.dfops);
+		xfs_defer_bjoin(&dfops, leaf_bp);
+		xfs_defer_ijoin(&dfops, dp);
+		error = xfs_defer_finish(&args.trans, &dfops);
 		if (error)
 			goto out_defer_cancel;
 
@@ -429,6 +430,8 @@ xfs_attr_remove(
 			&args.trans);
 	if (error)
 		return error;
+	xfs_defer_init(&dfops, &firstblock);
+	args.trans->t_dfops = &dfops;
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	/*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 68ea1f4b9c3f..bc73375769a0 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1057,6 +1057,8 @@ xfs_bmap_add_attrfork(
 			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
 	if (error)
 		return error;
+	xfs_defer_init(&dfops, &firstblock);
+	tp->t_dfops = &dfops;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
@@ -1104,7 +1106,6 @@ xfs_bmap_add_attrfork(
 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
 	ip->i_afp->if_flags = XFS_IFEXTENTS;
 	logflags = 0;
-	xfs_defer_init(&dfops, &firstblock);
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_LOCAL:
 		error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
-- 
cgit v1.2.3


From 0bd6207f836446f9d472f342a0ba3c21fe5e986f Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:10 -0700
Subject: xfs: remove dfops param in attr fork add path

Now that the attribute fork add tx carries dfops along with the
transaction, it is unnecessary to pass it down the stack. Remove the
dfops parameter and access ->t_dfops directly where necessary. This
patch does not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index bc73375769a0..7a414e7fbbb8 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -918,7 +918,6 @@ xfs_bmap_add_attrfork_btree(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_inode_t		*ip,		/* incore inode pointer */
 	xfs_fsblock_t		*firstblock,	/* first block allocated */
-	struct xfs_defer_ops	*dfops,		/* blocks to free at commit */
 	int			*flags)		/* inode logging flags */
 {
 	xfs_btree_cur_t		*cur;		/* btree cursor */
@@ -931,7 +930,7 @@ xfs_bmap_add_attrfork_btree(
 		*flags |= XFS_ILOG_DBROOT;
 	else {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
-		cur->bc_private.b.dfops = dfops;
+		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.firstblock = *firstblock;
 		error = xfs_bmbt_lookup_first(cur, &stat);
 		if (error)
@@ -962,7 +961,6 @@ xfs_bmap_add_attrfork_extents(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_inode_t		*ip,		/* incore inode pointer */
 	xfs_fsblock_t		*firstblock,	/* first block allocated */
-	struct xfs_defer_ops	*dfops,		/* blocks to free at commit */
 	int			*flags)		/* inode logging flags */
 {
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
@@ -971,7 +969,7 @@ xfs_bmap_add_attrfork_extents(
 	if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
 		return 0;
 	cur = NULL;
-	error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
+	error = xfs_bmap_extents_to_btree(tp, ip, firstblock, tp->t_dfops, &cur, 0,
 		flags, XFS_DATA_FORK);
 	if (cur) {
 		cur->bc_private.b.allocated = 0;
@@ -997,7 +995,6 @@ xfs_bmap_add_attrfork_local(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_inode_t		*ip,		/* incore inode pointer */
 	xfs_fsblock_t		*firstblock,	/* first block allocated */
-	struct xfs_defer_ops	*dfops,		/* blocks to free at commit */
 	int			*flags)		/* inode logging flags */
 {
 	xfs_da_args_t		dargs;		/* args for dir/attr code */
@@ -1010,7 +1007,7 @@ xfs_bmap_add_attrfork_local(
 		dargs.geo = ip->i_mount->m_dir_geo;
 		dargs.dp = ip;
 		dargs.firstblock = firstblock;
-		dargs.dfops = dfops;
+		dargs.dfops = tp->t_dfops;
 		dargs.total = dargs.geo->fsbcount;
 		dargs.whichfork = XFS_DATA_FORK;
 		dargs.trans = tp;
@@ -1108,16 +1105,16 @@ xfs_bmap_add_attrfork(
 	logflags = 0;
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_LOCAL:
-		error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
-			&logflags);
+		error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock,
+						    &logflags);
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
 		error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
-			&dfops, &logflags);
+						      &logflags);
 		break;
 	case XFS_DINODE_FMT_BTREE:
-		error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
-			&logflags);
+		error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock,
+						    &logflags);
 		break;
 	default:
 		error = 0;
-- 
cgit v1.2.3


From d76e6ce8ed6943941c0c964d8ae43fdaefdcbb5a Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:10 -0700
Subject: xfs: use ->t_dfops in extent split tx and remove param

Attach the local dfops to ->t_dfops of the extent split transaction.
Since this is the only caller of xfs_bmap_split_extent_at(), remove
the dfops parameter as well.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7a414e7fbbb8..459a29cb17c2 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5919,8 +5919,7 @@ xfs_bmap_split_extent_at(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	xfs_fileoff_t		split_fsb,
-	xfs_fsblock_t		*firstfsb,
-	struct xfs_defer_ops	*dfops)
+	xfs_fsblock_t		*firstfsb)
 {
 	int				whichfork = XFS_DATA_FORK;
 	struct xfs_btree_cur		*cur = NULL;
@@ -5970,7 +5969,7 @@ xfs_bmap_split_extent_at(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstfsb;
-		cur->bc_private.b.dfops = dfops;
+		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 		error = xfs_bmbt_lookup_eq(cur, &got, &i);
 		if (error)
@@ -6014,7 +6013,7 @@ xfs_bmap_split_extent_at(
 		int tmp_logflags; /* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
+		error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, tp->t_dfops,
 				&cur, 0, &tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 	}
@@ -6046,14 +6045,14 @@ xfs_bmap_split_extent(
 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
 	if (error)
 		return error;
+	xfs_defer_init(&dfops, &firstfsb);
+	tp->t_dfops = &dfops;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-	xfs_defer_init(&dfops, &firstfsb);
-
 	error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
-			&firstfsb, &dfops);
+					 &firstfsb);
 	if (error)
 		goto out;
 
-- 
cgit v1.2.3


From 32a9b7c65cfc7d9283055fc9d61e6393ee2aa984 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:11 -0700
Subject: xfs: replace xfs_da_args->dfops accesses with ->t_dfops and remove

Now that xfs_da_args->dfops is always assigned from a ->t_dfops
pointer (or one that is immediately attached), replace all
downstream accesses of the former with the latter and remove the
field from struct xfs_da_args.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 103 +++++++++++++++++++++-------------------
 fs/xfs/libxfs/xfs_attr_leaf.c   |  24 +++++-----
 fs/xfs/libxfs/xfs_attr_remote.c |  23 ++++-----
 fs/xfs/libxfs/xfs_bmap.c        |   1 -
 fs/xfs/libxfs/xfs_da_btree.c    |  19 ++++----
 fs/xfs/libxfs/xfs_da_btree.h    |   1 -
 fs/xfs/libxfs/xfs_dir2.c        |  65 ++++++++++++-------------
 7 files changed, 117 insertions(+), 119 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index cc23c269f2bf..a14ab9b2669e 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -220,7 +220,6 @@ xfs_attr_set(
 	args.value = value;
 	args.valuelen = valuelen;
 	args.firstblock = &firstblock;
-	args.dfops = &dfops;
 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 	args.total = xfs_attr_calc_size(&args, &local);
 
@@ -407,7 +406,6 @@ xfs_attr_remove(
 		return error;
 
 	args.firstblock = &firstblock;
-	args.dfops = &dfops;
 
 	/*
 	 * we have no control over the attribute names that userspace passes us
@@ -539,11 +537,12 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
  * if bmap_one_block() says there is only one block (ie: no remote blks).
  */
 STATIC int
-xfs_attr_leaf_addname(xfs_da_args_t *args)
+xfs_attr_leaf_addname(
+	struct xfs_da_args	*args)
 {
-	xfs_inode_t *dp;
-	struct xfs_buf *bp;
-	int retval, error, forkoff;
+	struct xfs_inode	*dp;
+	struct xfs_buf		*bp;
+	int			retval, error, forkoff;
 
 	trace_xfs_attr_leaf_addname(args);
 
@@ -601,12 +600,12 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 		 * Commit that transaction so that the node_addname() call
 		 * can manage its own transactions.
 		 */
-		xfs_defer_init(args->dfops, args->firstblock);
+		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		error = xfs_attr3_leaf_to_node(args);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->dfops);
+		xfs_defer_ijoin(args->trans->t_dfops, dp);
+		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 		if (error)
 			goto out_defer_cancel;
 
@@ -690,13 +689,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 		 * If the result is small enough, shrink it all into the inode.
 		 */
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->dfops, args->firstblock);
+			xfs_defer_init(args->trans->t_dfops, args->firstblock);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->dfops, dp);
-			error = xfs_defer_finish(&args->trans, args->dfops);
+			xfs_defer_ijoin(args->trans->t_dfops, dp);
+			error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 			if (error)
 				goto out_defer_cancel;
 		}
@@ -714,7 +713,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 	}
 	return error;
 out_defer_cancel:
-	xfs_defer_cancel(args->dfops);
+	xfs_defer_cancel(args->trans->t_dfops);
 	return error;
 }
 
@@ -725,11 +724,12 @@ out_defer_cancel:
  * if bmap_one_block() says there is only one block (ie: no remote blks).
  */
 STATIC int
-xfs_attr_leaf_removename(xfs_da_args_t *args)
+xfs_attr_leaf_removename(
+	struct xfs_da_args	*args)
 {
-	xfs_inode_t *dp;
-	struct xfs_buf *bp;
-	int error, forkoff;
+	struct xfs_inode	*dp;
+	struct xfs_buf		*bp;
+	int			error, forkoff;
 
 	trace_xfs_attr_leaf_removename(args);
 
@@ -754,19 +754,19 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 	 * If the result is small enough, shrink it all into the inode.
 	 */
 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-		xfs_defer_init(args->dfops, args->firstblock);
+		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->dfops);
+		xfs_defer_ijoin(args->trans->t_dfops, dp);
+		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 		if (error)
 			goto out_defer_cancel;
 	}
 	return 0;
 out_defer_cancel:
-	xfs_defer_cancel(args->dfops);
+	xfs_defer_cancel(args->trans->t_dfops);
 	return error;
 }
 
@@ -817,13 +817,14 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
  * add a whole extra layer of confusion on top of that.
  */
 STATIC int
-xfs_attr_node_addname(xfs_da_args_t *args)
+xfs_attr_node_addname(
+	struct xfs_da_args	*args)
 {
-	xfs_da_state_t *state;
-	xfs_da_state_blk_t *blk;
-	xfs_inode_t *dp;
-	xfs_mount_t *mp;
-	int retval, error;
+	struct xfs_da_state	*state;
+	struct xfs_da_state_blk	*blk;
+	struct xfs_inode	*dp;
+	struct xfs_mount	*mp;
+	int			retval, error;
 
 	trace_xfs_attr_node_addname(args);
 
@@ -882,12 +883,13 @@ restart:
 			 */
 			xfs_da_state_free(state);
 			state = NULL;
-			xfs_defer_init(args->dfops, args->firstblock);
+			xfs_defer_init(args->trans->t_dfops, args->firstblock);
 			error = xfs_attr3_leaf_to_node(args);
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->dfops, dp);
-			error = xfs_defer_finish(&args->trans, args->dfops);
+			xfs_defer_ijoin(args->trans->t_dfops, dp);
+			error = xfs_defer_finish(&args->trans,
+						 args->trans->t_dfops);
 			if (error)
 				goto out_defer_cancel;
 
@@ -908,12 +910,12 @@ restart:
 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
 		 */
-		xfs_defer_init(args->dfops, args->firstblock);
+		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		error = xfs_da3_split(state);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->dfops);
+		xfs_defer_ijoin(args->trans->t_dfops, dp);
+		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 		if (error)
 			goto out_defer_cancel;
 	} else {
@@ -1006,12 +1008,12 @@ restart:
 		 * Check to see if the tree needs to be collapsed.
 		 */
 		if (retval && (state->path.active > 1)) {
-			xfs_defer_init(args->dfops, args->firstblock);
+			xfs_defer_init(args->trans->t_dfops, args->firstblock);
 			error = xfs_da3_join(state);
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->dfops, dp);
-			error = xfs_defer_finish(&args->trans, args->dfops);
+			xfs_defer_ijoin(args->trans->t_dfops, dp);
+			error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 			if (error)
 				goto out_defer_cancel;
 		}
@@ -1040,7 +1042,7 @@ out:
 		return error;
 	return retval;
 out_defer_cancel:
-	xfs_defer_cancel(args->dfops);
+	xfs_defer_cancel(args->trans->t_dfops);
 	goto out;
 }
 
@@ -1052,13 +1054,14 @@ out_defer_cancel:
  * the root node (a special case of an intermediate node).
  */
 STATIC int
-xfs_attr_node_removename(xfs_da_args_t *args)
+xfs_attr_node_removename(
+	struct xfs_da_args	*args)
 {
-	xfs_da_state_t *state;
-	xfs_da_state_blk_t *blk;
-	xfs_inode_t *dp;
-	struct xfs_buf *bp;
-	int retval, error, forkoff;
+	struct xfs_da_state	*state;
+	struct xfs_da_state_blk	*blk;
+	struct xfs_inode	*dp;
+	struct xfs_buf		*bp;
+	int			retval, error, forkoff;
 
 	trace_xfs_attr_node_removename(args);
 
@@ -1130,12 +1133,12 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 	 * Check to see if the tree needs to be collapsed.
 	 */
 	if (retval && (state->path.active > 1)) {
-		xfs_defer_init(args->dfops, args->firstblock);
+		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		error = xfs_da3_join(state);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->dfops);
+		xfs_defer_ijoin(args->trans->t_dfops, dp);
+		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 		if (error)
 			goto out_defer_cancel;
 		/*
@@ -1162,13 +1165,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 			goto out;
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->dfops, args->firstblock);
+			xfs_defer_init(args->trans->t_dfops, args->firstblock);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->dfops, dp);
-			error = xfs_defer_finish(&args->trans, args->dfops);
+			xfs_defer_ijoin(args->trans->t_dfops, dp);
+			error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 			if (error)
 				goto out_defer_cancel;
 		} else
@@ -1180,7 +1183,7 @@ out:
 	xfs_da_state_free(state);
 	return error;
 out_defer_cancel:
-	xfs_defer_cancel(args->dfops);
+	xfs_defer_cancel(args->trans->t_dfops);
 	goto out;
 }
 
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 76e90046731c..c131469db0f1 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -747,18 +747,18 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
  */
 int
 xfs_attr_shortform_to_leaf(
-	struct xfs_da_args	*args,
-	struct xfs_buf		**leaf_bp)
+	struct xfs_da_args		*args,
+	struct xfs_buf			**leaf_bp)
 {
-	xfs_inode_t *dp;
-	xfs_attr_shortform_t *sf;
-	xfs_attr_sf_entry_t *sfe;
-	xfs_da_args_t nargs;
-	char *tmpbuffer;
-	int error, i, size;
-	xfs_dablk_t blkno;
-	struct xfs_buf *bp;
-	xfs_ifork_t *ifp;
+	struct xfs_inode		*dp;
+	struct xfs_attr_shortform	*sf;
+	struct xfs_attr_sf_entry	*sfe;
+	struct xfs_da_args		nargs;
+	char				*tmpbuffer;
+	int				error, i, size;
+	xfs_dablk_t			blkno;
+	struct xfs_buf			*bp;
+	struct xfs_ifork		*ifp;
 
 	trace_xfs_attr_sf_to_leaf(args);
 
@@ -803,7 +803,6 @@ xfs_attr_shortform_to_leaf(
 	nargs.dp = dp;
 	nargs.geo = args->geo;
 	nargs.firstblock = args->firstblock;
-	nargs.dfops = args->dfops;
 	nargs.total = args->total;
 	nargs.whichfork = XFS_ATTR_FORK;
 	nargs.trans = args->trans;
@@ -1007,7 +1006,6 @@ xfs_attr3_leaf_to_shortform(
 	nargs.geo = args->geo;
 	nargs.dp = dp;
 	nargs.firstblock = args->firstblock;
-	nargs.dfops = args->dfops;
 	nargs.total = args->total;
 	nargs.whichfork = XFS_ATTR_FORK;
 	nargs.trans = args->trans;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index bf2e0371149b..577c4c372265 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -480,15 +480,16 @@ xfs_attr_rmtval_set(
 		 * extent and then crash then the block may not contain the
 		 * correct metadata after log recovery occurs.
 		 */
-		xfs_defer_init(args->dfops, args->firstblock);
+		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		nmap = 1;
 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
 				  blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
-				  args->total, &map, &nmap, args->dfops);
+				  args->total, &map, &nmap,
+				  args->trans->t_dfops);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->dfops);
+		xfs_defer_ijoin(args->trans->t_dfops, dp);
+		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 		if (error)
 			goto out_defer_cancel;
 
@@ -522,7 +523,7 @@ xfs_attr_rmtval_set(
 
 		ASSERT(blkcnt > 0);
 
-		xfs_defer_init(args->dfops, args->firstblock);
+		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		nmap = 1;
 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
 				       blkcnt, &map, &nmap,
@@ -557,7 +558,7 @@ xfs_attr_rmtval_set(
 	ASSERT(valuelen == 0);
 	return 0;
 out_defer_cancel:
-	xfs_defer_cancel(args->dfops);
+	xfs_defer_cancel(args->trans->t_dfops);
 	args->trans = NULL;
 	return error;
 }
@@ -626,14 +627,14 @@ xfs_attr_rmtval_remove(
 	blkcnt = args->rmtblkcnt;
 	done = 0;
 	while (!done) {
-		xfs_defer_init(args->dfops, args->firstblock);
+		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 				    XFS_BMAPI_ATTRFORK, 1, args->firstblock,
-				    args->dfops, &done);
+				    args->trans->t_dfops, &done);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->dfops, args->dp);
-		error = xfs_defer_finish(&args->trans, args->dfops);
+		xfs_defer_ijoin(args->trans->t_dfops, args->dp);
+		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
 		if (error)
 			goto out_defer_cancel;
 
@@ -646,7 +647,7 @@ xfs_attr_rmtval_remove(
 	}
 	return 0;
 out_defer_cancel:
-	xfs_defer_cancel(args->dfops);
+	xfs_defer_cancel(args->trans->t_dfops);
 	args->trans = NULL;
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 459a29cb17c2..c65aeb088552 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1007,7 +1007,6 @@ xfs_bmap_add_attrfork_local(
 		dargs.geo = ip->i_mount->m_dir_geo;
 		dargs.dp = ip;
 		dargs.firstblock = firstblock;
-		dargs.dfops = tp->t_dfops;
 		dargs.total = dargs.geo->fsbcount;
 		dargs.whichfork = XFS_DATA_FORK;
 		dargs.trans = tp;
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 8a301402bbc4..d2048e4e93ec 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2063,7 +2063,7 @@ xfs_da_grow_inode_int(
 	error = xfs_bmapi_write(tp, dp, *bno, count,
 			xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
 			args->firstblock, args->total, &map, &nmap,
-			args->dfops);
+			args->trans->t_dfops);
 	if (error)
 		return error;
 
@@ -2086,7 +2086,8 @@ xfs_da_grow_inode_int(
 			error = xfs_bmapi_write(tp, dp, b, c,
 					xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
 					args->firstblock, args->total,
-					&mapp[mapi], &nmap, args->dfops);
+					&mapp[mapi], &nmap,
+					args->trans->t_dfops);
 			if (error)
 				goto out_free_map;
 			if (nmap < 1)
@@ -2375,13 +2376,13 @@ done:
  */
 int
 xfs_da_shrink_inode(
-	xfs_da_args_t	*args,
-	xfs_dablk_t	dead_blkno,
-	struct xfs_buf	*dead_buf)
+	struct xfs_da_args	*args,
+	xfs_dablk_t		dead_blkno,
+	struct xfs_buf		*dead_buf)
 {
-	xfs_inode_t *dp;
-	int done, error, w, count;
-	xfs_trans_t *tp;
+	struct xfs_inode	*dp;
+	int			done, error, w, count;
+	struct xfs_trans	*tp;
 
 	trace_xfs_da_shrink_inode(args);
 
@@ -2396,7 +2397,7 @@ xfs_da_shrink_inode(
 		 */
 		error = xfs_bunmapi(tp, dp, dead_blkno, count,
 				    xfs_bmapi_aflag(w), 0, args->firstblock,
-				    args->dfops, &done);
+				    args->trans->t_dfops, &done);
 		if (error == -ENOSPC) {
 			if (w != XFS_DATA_FORK)
 				break;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 28260073ae71..6b8a04f3f162 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -58,7 +58,6 @@ typedef struct xfs_da_args {
 	xfs_ino_t	inumber;	/* input/output inode number */
 	struct xfs_inode *dp;		/* directory inode to manipulate */
 	xfs_fsblock_t	*firstblock;	/* ptr to firstblock for bmap calls */
-	struct xfs_defer_ops *dfops;	/* ptr to freelist for bmap_finish */
 	struct xfs_trans *trans;	/* current trans (changes over time) */
 	xfs_extlen_t	total;		/* total blocks needed, for 1st bmap */
 	int		whichfork;	/* data or attribute fork */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index c98250f0de50..b21f55a11f35 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -239,8 +239,8 @@ xfs_dir_init(
  */
 int
 xfs_dir_createname(
-	xfs_trans_t		*tp,
-	xfs_inode_t		*dp,
+	struct xfs_trans	*tp,
+	struct xfs_inode	*dp,
 	struct xfs_name		*name,
 	xfs_ino_t		inum,		/* new entry inode number */
 	xfs_fsblock_t		*first,		/* bmap's firstblock */
@@ -274,7 +274,6 @@ xfs_dir_createname(
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
-	args->dfops = tp->t_dfops;
 	args->firstblock = first;
 	args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 	if (!inum)
@@ -417,16 +416,16 @@ out_free:
  */
 int
 xfs_dir_removename(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*dp,
-	struct xfs_name	*name,
-	xfs_ino_t	ino,
-	xfs_fsblock_t	*first,		/* bmap's firstblock */
-	xfs_extlen_t	total)		/* bmap's total block count */
+	struct xfs_trans	*tp,
+	struct xfs_inode	*dp,
+	struct xfs_name		*name,
+	xfs_ino_t		ino,
+	xfs_fsblock_t		*first,		/* bmap's firstblock */
+	xfs_extlen_t		total)		/* bmap's total block count */
 {
-	struct xfs_da_args *args;
-	int		rval;
-	int		v;		/* type-checking value */
+	struct xfs_da_args	*args;
+	int			rval;
+	int			v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
 	ASSERT(tp->t_dfops);
@@ -447,7 +446,6 @@ xfs_dir_removename(
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
-	args->dfops = tp->t_dfops;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_removename(args);
@@ -479,16 +477,16 @@ out_free:
  */
 int
 xfs_dir_replace(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*dp,
-	struct xfs_name	*name,		/* name of entry to replace */
-	xfs_ino_t	inum,		/* new inode number */
-	xfs_fsblock_t	*first,		/* bmap's firstblock */
-	xfs_extlen_t	total)		/* bmap's total block count */
+	struct xfs_trans	*tp,
+	struct xfs_inode	*dp,
+	struct xfs_name		*name,		/* name of entry to replace */
+	xfs_ino_t		inum,		/* new inode number */
+	xfs_fsblock_t		*first,		/* bmap's firstblock */
+	xfs_extlen_t		total)		/* bmap's total block count */
 {
-	struct xfs_da_args *args;
-	int		rval;
-	int		v;		/* type-checking value */
+	struct xfs_da_args	*args;
+	int			rval;
+	int			v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
 	ASSERT(tp->t_dfops);
@@ -512,7 +510,6 @@ xfs_dir_replace(
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
-	args->dfops = tp->t_dfops;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
 		rval = xfs_dir2_sf_replace(args);
@@ -646,17 +643,17 @@ xfs_dir2_isleaf(
  */
 int
 xfs_dir2_shrink_inode(
-	xfs_da_args_t	*args,
-	xfs_dir2_db_t	db,
-	struct xfs_buf	*bp)
+	struct xfs_da_args	*args,
+	xfs_dir2_db_t		db,
+	struct xfs_buf		*bp)
 {
-	xfs_fileoff_t	bno;		/* directory file offset */
-	xfs_dablk_t	da;		/* directory file offset */
-	int		done;		/* bunmap is finished */
-	xfs_inode_t	*dp;
-	int		error;
-	xfs_mount_t	*mp;
-	xfs_trans_t	*tp;
+	xfs_fileoff_t		bno;		/* directory file offset */
+	xfs_dablk_t		da;		/* directory file offset */
+	int			done;		/* bunmap is finished */
+	struct xfs_inode	*dp;
+	int			error;
+	struct xfs_mount	*mp;
+	struct xfs_trans	*tp;
 
 	trace_xfs_dir2_shrink_inode(args, db);
 
@@ -667,7 +664,7 @@ xfs_dir2_shrink_inode(
 
 	/* Unmap the fsblock(s). */
 	error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
-			    args->firstblock, args->dfops, &done);
+			    args->firstblock, args->trans->t_dfops, &done);
 	if (error) {
 		/*
 		 * ENOSPC actually can happen if we're in a removename with no
-- 
cgit v1.2.3


From 2ba13721257e2c76e90e8d83dfd9ba91c434355d Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:11 -0700
Subject: xfs: use ->t_dfops in dqalloc transaction

xfs_dquot_disk_alloc() receives a transaction from the caller and
passes a local dfops along to xfs_bmapi_write(). If we attach this
dfops to the transaction, we have to make sure to clear it before
returning to avoid invalid access of stack memory.

Since xfs_qm_dqread_alloc() is the only caller, pull dfops into the
caller and attach it to the transaction to eliminate this pattern
entirely.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_dquot.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 0973a0423bed..aa62f8b17376 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -286,8 +286,8 @@ xfs_dquot_disk_alloc(
 	struct xfs_buf		**bpp)
 {
 	struct xfs_bmbt_irec	map;
-	struct xfs_defer_ops	dfops;
-	struct xfs_mount	*mp = (*tpp)->t_mountp;
+	struct xfs_trans	*tp = *tpp;
+	struct xfs_mount	*mp = tp->t_mountp;
 	struct xfs_buf		*bp;
 	struct xfs_inode	*quotip = xfs_quota_inode(mp, dqp->dq_flags);
 	xfs_fsblock_t		firstblock;
@@ -296,7 +296,8 @@ xfs_dquot_disk_alloc(
 
 	trace_xfs_dqalloc(dqp);
 
-	xfs_defer_init(&dfops, &firstblock);
+	xfs_defer_init(tp->t_dfops, &firstblock);
+
 	xfs_ilock(quotip, XFS_ILOCK_EXCL);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 		/*
@@ -308,11 +309,11 @@ xfs_dquot_disk_alloc(
 	}
 
 	/* Create the block mapping. */
-	xfs_trans_ijoin(*tpp, quotip, XFS_ILOCK_EXCL);
-	error = xfs_bmapi_write(*tpp, quotip, dqp->q_fileoffset,
+	xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
+	error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
 			XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
 			&firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
-			&map, &nmaps, &dfops);
+			&map, &nmaps, tp->t_dfops);
 	if (error)
 		goto error0;
 	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -326,7 +327,7 @@ xfs_dquot_disk_alloc(
 	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 
 	/* now we can just get the buffer (there's nothing to read yet) */
-	bp = xfs_trans_get_buf(*tpp, mp->m_ddev_targp, dqp->q_blkno,
+	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
 			mp->m_quotainfo->qi_dqchunklen, 0);
 	if (!bp) {
 		error = -ENOMEM;
@@ -338,7 +339,7 @@ xfs_dquot_disk_alloc(
 	 * Make a chunk of dquots out of this buffer and log
 	 * the entire thing.
 	 */
-	xfs_qm_init_dquot_blk(*tpp, mp, be32_to_cpu(dqp->q_core.d_id),
+	xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
 			      dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 	xfs_buf_set_ref(bp, XFS_DQUOT_REF);
 
@@ -364,14 +365,15 @@ xfs_dquot_disk_alloc(
 	 * is responsible for unlocking any buffer passed back, either
 	 * manually or by committing the transaction.
 	 */
-	xfs_trans_bhold(*tpp, bp);
-	error = xfs_defer_bjoin(&dfops, bp);
+	xfs_trans_bhold(tp, bp);
+	error = xfs_defer_bjoin(tp->t_dfops, bp);
 	if (error) {
-		xfs_trans_bhold_release(*tpp, bp);
-		xfs_trans_brelse(*tpp, bp);
+		xfs_trans_bhold_release(tp, bp);
+		xfs_trans_brelse(tp, bp);
 		goto error1;
 	}
-	error = xfs_defer_finish(tpp, &dfops);
+	error = xfs_defer_finish(tpp, tp->t_dfops);
+	tp = *tpp;
 	if (error) {
 		xfs_buf_relse(bp);
 		goto error1;
@@ -380,7 +382,7 @@ xfs_dquot_disk_alloc(
 	return 0;
 
 error1:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 error0:
 	return error;
 }
@@ -538,13 +540,17 @@ xfs_qm_dqread_alloc(
 	struct xfs_buf		**bpp)
 {
 	struct xfs_trans	*tp;
+	struct xfs_defer_ops	dfops;
 	struct xfs_buf		*bp;
+	xfs_fsblock_t		firstblock;
 	int			error;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
 			XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
 	if (error)
 		goto err;
+	xfs_defer_init(&dfops, &firstblock);
+	tp->t_dfops = &dfops;
 
 	error = xfs_dquot_disk_alloc(&tp, dqp, &bp);
 	if (error)
-- 
cgit v1.2.3


From 175d1a013eaf35c802e0195d87ac2a8df83ad42a Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:12 -0700
Subject: xfs: use ->t_dfops for all xfs_bmapi_write() callers

Attach ->t_dfops for all remaining callers of xfs_bmapi_write().
This prepares the latter to no longer require a separate dfops
parameter.

Note that xfs_symlink() already uses ->t_dfops. Fix up the local
references for consistency.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c |  5 +++--
 fs/xfs/xfs_iomap.c     | 21 ++++++++++++---------
 fs/xfs/xfs_reflink.c   |  7 ++++---
 fs/xfs/xfs_rtalloc.c   |  7 ++++---
 fs/xfs/xfs_symlink.c   |  6 +++---
 5 files changed, 26 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index da561882c349..4a698dab85d2 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -973,16 +973,17 @@ xfs_alloc_file_space(
 		xfs_trans_ijoin(tp, ip, 0);
 
 		xfs_defer_init(&dfops, &firstfsb);
+		tp->t_dfops = &dfops;
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
 					allocatesize_fsb, alloc_type, &firstfsb,
-					resblks, imapp, &nimaps, &dfops);
+					resblks, imapp, &nimaps, tp->t_dfops);
 		if (error)
 			goto error0;
 
 		/*
 		 * Complete the transaction
 		 */
-		error = xfs_defer_finish(&tp, &dfops);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto error0;
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb9746cc7338..7c0b858f6723 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -255,17 +255,18 @@ xfs_iomap_write_direct(
 	 * caller gave to us.
 	 */
 	xfs_defer_init(&dfops, &firstfsb);
+	tp->t_dfops = &dfops;
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 				bmapi_flags, &firstfsb, resblks, imap,
-				&nimaps, &dfops);
+				&nimaps, tp->t_dfops);
 	if (error)
 		goto out_bmap_cancel;
 
 	/*
 	 * Complete the transaction
 	 */
-	error = xfs_defer_finish(&tp, &dfops);
+	error = xfs_defer_finish(&tp, tp->t_dfops);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -289,7 +290,7 @@ out_unlock:
 	return error;
 
 out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
@@ -717,6 +718,7 @@ xfs_iomap_write_allocate(
 			xfs_trans_ijoin(tp, ip, 0);
 
 			xfs_defer_init(&dfops, &first_block);
+			tp->t_dfops = &dfops;
 
 			/*
 			 * it is possible that the extents have changed since
@@ -772,11 +774,11 @@ xfs_iomap_write_allocate(
 			error = xfs_bmapi_write(tp, ip, map_start_fsb,
 						count_fsb, flags, &first_block,
 						nres, imap, &nimaps,
-						&dfops);
+						tp->t_dfops);
 			if (error)
 				goto trans_cancel;
 
-			error = xfs_defer_finish(&tp, &dfops);
+			error = xfs_defer_finish(&tp, tp->t_dfops);
 			if (error)
 				goto trans_cancel;
 
@@ -810,7 +812,7 @@ xfs_iomap_write_allocate(
 	}
 
 trans_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_cancel(tp);
 error0:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -878,10 +880,11 @@ xfs_iomap_write_unwritten(
 		 * Modify the unwritten extent state of the buffer.
 		 */
 		xfs_defer_init(&dfops, &firstfsb);
+		tp->t_dfops = &dfops;
 		nimaps = 1;
 		error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 					XFS_BMAPI_CONVERT, &firstfsb, resblks,
-					&imap, &nimaps, &dfops);
+					&imap, &nimaps, tp->t_dfops);
 		if (error)
 			goto error_on_bmapi_transaction;
 
@@ -901,7 +904,7 @@ xfs_iomap_write_unwritten(
 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 		}
 
-		error = xfs_defer_finish(&tp, &dfops);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto error_on_bmapi_transaction;
 
@@ -928,7 +931,7 @@ xfs_iomap_write_unwritten(
 	return 0;
 
 error_on_bmapi_transaction:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 33845009bec5..3c28e7ff7365 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -429,19 +429,20 @@ retry:
 	xfs_trans_ijoin(tp, ip, 0);
 
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_dfops = &dfops;
 	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
 	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
-			resblks, imap, &nimaps, &dfops);
+			resblks, imap, &nimaps, tp->t_dfops);
 	if (error)
 		goto out_bmap_cancel;
 
 	xfs_inode_set_cowblocks_tag(ip);
 
 	/* Finish up. */
-	error = xfs_defer_finish(&tp, &dfops);
+	error = xfs_defer_finish(&tp, tp->t_dfops);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -458,7 +459,7 @@ retry:
 convert:
 	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb);
 out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
 out:
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 329d4d26c13e..312d410d91fb 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -788,13 +788,14 @@ xfs_growfs_rt_alloc(
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
 		xfs_defer_init(&dfops, &firstblock);
+		tp->t_dfops = &dfops;
 		/*
 		 * Allocate blocks to the bitmap file.
 		 */
 		nmap = 1;
 		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
 					XFS_BMAPI_METADATA, &firstblock,
-					resblks, &map, &nmap, &dfops);
+					resblks, &map, &nmap, tp->t_dfops);
 		if (!error && nmap < 1)
 			error = -ENOSPC;
 		if (error)
@@ -802,7 +803,7 @@ xfs_growfs_rt_alloc(
 		/*
 		 * Free any blocks freed up in the transaction, then commit.
 		 */
-		error = xfs_defer_finish(&tp, &dfops);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto out_bmap_cancel;
 		error = xfs_trans_commit(tp);
@@ -855,7 +856,7 @@ xfs_growfs_rt_alloc(
 	return 0;
 
 out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 	return error;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index e347a3db018f..6825a31727d1 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -291,7 +291,7 @@ xfs_symlink(
 
 		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
 				  XFS_BMAPI_METADATA, &first_block, resblks,
-				  mval, &nmaps, &dfops);
+				  mval, &nmaps, tp->t_dfops);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -354,7 +354,7 @@ xfs_symlink(
 		xfs_trans_set_sync(tp);
 	}
 
-	error = xfs_defer_finish(&tp, &dfops);
+	error = xfs_defer_finish(&tp, tp->t_dfops);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -370,7 +370,7 @@ xfs_symlink(
 	return 0;
 
 out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 out_release_inode:
-- 
cgit v1.2.3


From 6e702a5dcbe1d38cf479931dd2e2a3da884143f0 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:12 -0700
Subject: xfs: remove xfs_bmapi_write() dfops param

Now that all callers use ->t_dfops, the xfs_bmapi_write() dfops
parameter is no longer necessary. Remove it and access ->t_dfops
directly. This patch does not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_remote.c |  3 +--
 fs/xfs/libxfs/xfs_bmap.c        | 10 +++++-----
 fs/xfs/libxfs/xfs_bmap.h        |  3 +--
 fs/xfs/libxfs/xfs_da_btree.c    |  6 ++----
 fs/xfs/xfs_bmap_util.c          |  2 +-
 fs/xfs/xfs_dquot.c              |  2 +-
 fs/xfs/xfs_iomap.c              |  7 +++----
 fs/xfs/xfs_reflink.c            |  8 ++++----
 fs/xfs/xfs_rtalloc.c            |  2 +-
 fs/xfs/xfs_symlink.c            |  2 +-
 10 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 577c4c372265..1f2bc86a28ed 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -484,8 +484,7 @@ xfs_attr_rmtval_set(
 		nmap = 1;
 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
 				  blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
-				  args->total, &map, &nmap,
-				  args->trans->t_dfops);
+				  args->total, &map, &nmap);
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, dp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c65aeb088552..02e72c195c4f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4245,8 +4245,7 @@ xfs_bmapi_write(
 						   controls a.g. for allocs */
 	xfs_extlen_t		total,		/* total blocks needed */
 	struct xfs_bmbt_irec	*mval,		/* output: map values */
-	int			*nmap,		/* i/o: mval size/count */
-	struct xfs_defer_ops	*dfops)		/* i/o: list extents to free */
+	int			*nmap)		/* i/o: mval size/count */
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_ifork	*ifp;
@@ -4337,7 +4336,7 @@ xfs_bmapi_write(
 	bma.ip = ip;
 	bma.total = total;
 	bma.datatype = 0;
-	bma.dfops = dfops;
+	bma.dfops = tp ? tp->t_dfops : NULL;
 	bma.firstblock = firstblock;
 
 	while (bno < end && n < *nmap) {
@@ -4414,8 +4413,9 @@ xfs_bmapi_write(
 			 * the refcount btree for orphan recovery.
 			 */
 			if (whichfork == XFS_COW_FORK) {
-				error = xfs_refcount_alloc_cow_extent(mp, dfops,
-						bma.blkno, bma.length);
+				error = xfs_refcount_alloc_cow_extent(mp,
+						tp->t_dfops, bma.blkno,
+						bma.length);
 				if (error)
 					goto error0;
 			}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 44639588d1c7..fc86cc218c58 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -203,8 +203,7 @@ int	xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
 int	xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t len, int flags,
 		xfs_fsblock_t *firstblock, xfs_extlen_t total,
-		struct xfs_bmbt_irec *mval, int *nmap,
-		struct xfs_defer_ops *dfops);
+		struct xfs_bmbt_irec *mval, int *nmap);
 int	__xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t *rlen, int flags,
 		xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index d2048e4e93ec..e43f1dda02e4 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2062,8 +2062,7 @@ xfs_da_grow_inode_int(
 	ASSERT(args->firstblock != NULL);
 	error = xfs_bmapi_write(tp, dp, *bno, count,
 			xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
-			args->firstblock, args->total, &map, &nmap,
-			args->trans->t_dfops);
+			args->firstblock, args->total, &map, &nmap);
 	if (error)
 		return error;
 
@@ -2086,8 +2085,7 @@ xfs_da_grow_inode_int(
 			error = xfs_bmapi_write(tp, dp, b, c,
 					xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
 					args->firstblock, args->total,
-					&mapp[mapi], &nmap,
-					args->trans->t_dfops);
+					&mapp[mapi], &nmap);
 			if (error)
 				goto out_free_map;
 			if (nmap < 1)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 4a698dab85d2..cf54da3187d5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -976,7 +976,7 @@ xfs_alloc_file_space(
 		tp->t_dfops = &dfops;
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
 					allocatesize_fsb, alloc_type, &firstfsb,
-					resblks, imapp, &nimaps, tp->t_dfops);
+					resblks, imapp, &nimaps);
 		if (error)
 			goto error0;
 
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index aa62f8b17376..1ef38e1df679 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -313,7 +313,7 @@ xfs_dquot_disk_alloc(
 	error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
 			XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
 			&firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
-			&map, &nmaps, tp->t_dfops);
+			&map, &nmaps);
 	if (error)
 		goto error0;
 	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7c0b858f6723..0c736c938f52 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -259,7 +259,7 @@ xfs_iomap_write_direct(
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 				bmapi_flags, &firstfsb, resblks, imap,
-				&nimaps, tp->t_dfops);
+				&nimaps);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -773,8 +773,7 @@ xfs_iomap_write_allocate(
 			 */
 			error = xfs_bmapi_write(tp, ip, map_start_fsb,
 						count_fsb, flags, &first_block,
-						nres, imap, &nimaps,
-						tp->t_dfops);
+						nres, imap, &nimaps);
 			if (error)
 				goto trans_cancel;
 
@@ -884,7 +883,7 @@ xfs_iomap_write_unwritten(
 		nimaps = 1;
 		error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 					XFS_BMAPI_CONVERT, &firstfsb, resblks,
-					&imap, &nimaps, tp->t_dfops);
+					&imap, &nimaps);
 		if (error)
 			goto error_on_bmapi_transaction;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3c28e7ff7365..9f3f144bf9ff 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -326,7 +326,7 @@ xfs_reflink_convert_cow_extent(
 		return 0;
 	return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
-			0, imap, &nimaps, NULL);
+			0, imap, &nimaps);
 }
 
 /* Convert all of the unwritten CoW extents in a file's range to real ones. */
@@ -349,8 +349,8 @@ xfs_reflink_convert_cow(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT |
-			XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps,
-			NULL);
+			XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap,
+			&nimaps);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
@@ -435,7 +435,7 @@ retry:
 	/* Allocate the entire reservation as unwritten blocks. */
 	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
-			resblks, imap, &nimaps, tp->t_dfops);
+			resblks, imap, &nimaps);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 312d410d91fb..1c7d1238ff3b 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -795,7 +795,7 @@ xfs_growfs_rt_alloc(
 		nmap = 1;
 		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
 					XFS_BMAPI_METADATA, &firstblock,
-					resblks, &map, &nmap, tp->t_dfops);
+					resblks, &map, &nmap);
 		if (!error && nmap < 1)
 			error = -ENOSPC;
 		if (error)
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 6825a31727d1..2b6bcfd39c14 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -291,7 +291,7 @@ xfs_symlink(
 
 		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
 				  XFS_BMAPI_METADATA, &first_block, resblks,
-				  mval, &nmaps, tp->t_dfops);
+				  mval, &nmaps);
 		if (error)
 			goto out_bmap_cancel;
 
-- 
cgit v1.2.3


From 4bcfa613a0582a9992a6c2af82273bd770103d12 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:13 -0700
Subject: xfs: use ->t_dfops for all xfs_bunmapi() callers

Use ->t_dfops for all remaining xfs_bunmapi() callers. This prepares
the latter to no longer require a dfops parameter.

Note that xfs_itruncate_extents_flags() associates a local dfops
with a transaction provided from the caller. Since there are
multiple callers, set and reset ->t_dfops before the function
returns to avoid exposure of stack memory to the caller.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c |  9 +++++----
 fs/xfs/xfs_inode.c     | 12 ++++++++----
 fs/xfs/xfs_reflink.c   | 27 +++++++++++++++------------
 fs/xfs/xfs_symlink.c   |  9 +++++----
 4 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cf54da3187d5..76f28b823866 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1044,13 +1044,14 @@ xfs_unmap_extent(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	xfs_defer_init(&dfops, &firstfsb);
+	tp->t_dfops = &dfops;
 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
-			&dfops, done);
+			tp->t_dfops, done);
 	if (error)
 		goto out_bmap_cancel;
 
-	xfs_defer_ijoin(&dfops, ip);
-	error = xfs_defer_finish(&tp, &dfops);
+	xfs_defer_ijoin(tp->t_dfops, ip);
+	error = xfs_defer_finish(&tp, tp->t_dfops);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -1060,7 +1061,7 @@ out_unlock:
 	return error;
 
 out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 	goto out_unlock;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e1bc686b70b4..539d96201666 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1545,6 +1545,7 @@ xfs_itruncate_extents_flags(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp = *tpp;
+	struct xfs_defer_ops	*odfops = tp->t_dfops;
 	struct xfs_defer_ops	dfops;
 	xfs_fsblock_t		first_block;
 	xfs_fileoff_t		first_unmap_block;
@@ -1584,9 +1585,10 @@ xfs_itruncate_extents_flags(
 	unmap_len = last_block - first_unmap_block + 1;
 	while (!done) {
 		xfs_defer_init(&dfops, &first_block);
+		tp->t_dfops = &dfops;
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
 				    XFS_ITRUNC_MAX_EXTENTS, &first_block,
-				    &dfops, &done);
+				    tp->t_dfops, &done);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -1594,8 +1596,8 @@ xfs_itruncate_extents_flags(
 		 * Duplicate the transaction that has the permanent
 		 * reservation and commit the old transaction.
 		 */
-		xfs_defer_ijoin(&dfops, ip);
-		error = xfs_defer_finish(&tp, &dfops);
+		xfs_defer_ijoin(tp->t_dfops, ip);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -1623,6 +1625,8 @@ xfs_itruncate_extents_flags(
 	trace_xfs_itruncate_extents_end(ip, new_size);
 
 out:
+	/* ->t_dfops points to local stack, don't leak it! */
+	tp->t_dfops = odfops;
 	*tpp = tp;
 	return error;
 out_bmap_cancel:
@@ -1631,7 +1635,7 @@ out_bmap_cancel:
 	 * the transaction can be properly aborted.  We just need to make sure
 	 * we're not holding any resources that we were not when we came in.
 	 */
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 	goto out;
 }
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 9f3f144bf9ff..c119cd33766e 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -700,9 +700,10 @@ xfs_reflink_end_cow(
 
 		/* Unmap the old blocks in the data fork. */
 		xfs_defer_init(&dfops, &firstfsb);
+		tp->t_dfops = &dfops;
 		rlen = del.br_blockcount;
 		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
-				&firstfsb, &dfops);
+				&firstfsb, tp->t_dfops);
 		if (error)
 			goto out_defer;
 
@@ -714,13 +715,14 @@ xfs_reflink_end_cow(
 		trace_xfs_reflink_cow_remap(ip, &del);
 
 		/* Free the CoW orphan record. */
-		error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
+		error = xfs_refcount_free_cow_extent(tp->t_mountp, tp->t_dfops,
 				del.br_startblock, del.br_blockcount);
 		if (error)
 			goto out_defer;
 
 		/* Map the new blocks into the data fork. */
-		error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
+		error = xfs_bmap_map_extent(tp->t_mountp, tp->t_dfops, ip,
+					    &del);
 		if (error)
 			goto out_defer;
 
@@ -731,8 +733,8 @@ xfs_reflink_end_cow(
 		/* Remove the mapping from the CoW fork. */
 		xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
 
-		xfs_defer_ijoin(&dfops, ip);
-		error = xfs_defer_finish(&tp, &dfops);
+		xfs_defer_ijoin(tp->t_dfops, ip);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto out_defer;
 		if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -750,7 +752,7 @@ prev_extent:
 	return 0;
 
 out_defer:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out_cancel:
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1049,8 +1051,9 @@ xfs_reflink_remap_extent(
 	rlen = unmap_len;
 	while (rlen) {
 		xfs_defer_init(&dfops, &firstfsb);
+		tp->t_dfops = &dfops;
 		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1,
-				&firstfsb, &dfops);
+				&firstfsb, tp->t_dfops);
 		if (error)
 			goto out_defer;
 
@@ -1071,12 +1074,12 @@ xfs_reflink_remap_extent(
 				uirec.br_blockcount, uirec.br_startblock);
 
 		/* Update the refcount tree */
-		error = xfs_refcount_increase_extent(mp, &dfops, &uirec);
+		error = xfs_refcount_increase_extent(mp, tp->t_dfops, &uirec);
 		if (error)
 			goto out_defer;
 
 		/* Map the new blocks into the data fork. */
-		error = xfs_bmap_map_extent(mp, &dfops, ip, &uirec);
+		error = xfs_bmap_map_extent(mp, tp->t_dfops, ip, &uirec);
 		if (error)
 			goto out_defer;
 
@@ -1097,8 +1100,8 @@ xfs_reflink_remap_extent(
 
 next_extent:
 		/* Process all the deferred stuff. */
-		xfs_defer_ijoin(&dfops, ip);
-		error = xfs_defer_finish(&tp, &dfops);
+		xfs_defer_ijoin(tp->t_dfops, ip);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto out_defer;
 	}
@@ -1110,7 +1113,7 @@ next_extent:
 	return 0;
 
 out_defer:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out_cancel:
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 2b6bcfd39c14..290ae13d4673 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -444,6 +444,7 @@ xfs_inactive_symlink_rmt(
 	 */
 	done = 0;
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_dfops = &dfops;
 	nmaps = ARRAY_SIZE(mval);
 	error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
 				mval, &nmaps, 0);
@@ -466,15 +467,15 @@ xfs_inactive_symlink_rmt(
 	 * Unmap the dead block(s) to the dfops.
 	 */
 	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps,
-			    &first_block, &dfops, &done);
+			    &first_block, tp->t_dfops, &done);
 	if (error)
 		goto error_bmap_cancel;
 	ASSERT(done);
 	/*
 	 * Commit the first transaction.  This logs the EFI and the inode.
 	 */
-	xfs_defer_ijoin(&dfops, ip);
-	error = xfs_defer_finish(&tp, &dfops);
+	xfs_defer_ijoin(tp->t_dfops, ip);
+	error = xfs_defer_finish(&tp, tp->t_dfops);
 	if (error)
 		goto error_bmap_cancel;
 
@@ -499,7 +500,7 @@ xfs_inactive_symlink_rmt(
 	return 0;
 
 error_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 error_trans_cancel:
 	xfs_trans_cancel(tp);
 error_unlock:
-- 
cgit v1.2.3


From ccd9d91148780a5e979ac00bce67c2155fb6378f Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:13 -0700
Subject: xfs: remove xfs_bunmapi() dfops param

Now that all xfs_bunmapi() callers use ->t_dfops, remove the
unnecessary parameter and access ->t_dfops directly. This patch does
not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_remote.c |  2 +-
 fs/xfs/libxfs/xfs_bmap.c        | 20 +++++++++-----------
 fs/xfs/libxfs/xfs_bmap.h        |  5 ++---
 fs/xfs/libxfs/xfs_da_btree.c    |  2 +-
 fs/xfs/libxfs/xfs_dir2.c        |  2 +-
 fs/xfs/xfs_bmap_util.c          |  2 +-
 fs/xfs/xfs_inode.c              |  2 +-
 fs/xfs/xfs_reflink.c            |  5 ++---
 fs/xfs/xfs_symlink.c            |  3 +--
 9 files changed, 19 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 1f2bc86a28ed..179259fd1b5e 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -629,7 +629,7 @@ xfs_attr_rmtval_remove(
 		xfs_defer_init(args->trans->t_dfops, args->firstblock);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 				    XFS_BMAPI_ATTRFORK, 1, args->firstblock,
-				    args->trans->t_dfops, &done);
+				    &done);
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, args->dp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 02e72c195c4f..9c778e50ad19 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5143,26 +5143,26 @@ done:
  */
 int						/* error */
 __xfs_bunmapi(
-	xfs_trans_t		*tp,		/* transaction pointer */
+	struct xfs_trans	*tp,		/* transaction pointer */
 	struct xfs_inode	*ip,		/* incore inode */
 	xfs_fileoff_t		start,		/* first file offset deleted */
 	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
 	int			flags,		/* misc flags */
 	xfs_extnum_t		nexts,		/* number of extents max */
-	xfs_fsblock_t		*firstblock,	/* first allocated block
+	xfs_fsblock_t		*firstblock)	/* first allocated block
 						   controls a.g. for allocs */
-	struct xfs_defer_ops	*dfops)		/* i/o: deferred updates */
 {
-	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
-	xfs_bmbt_irec_t		del;		/* extent being deleted */
+	struct xfs_defer_ops	*dfops = tp ? tp->t_dfops : NULL;
+	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
+	struct xfs_bmbt_irec	del;		/* extent being deleted */
 	int			error;		/* error return value */
 	xfs_extnum_t		extno;		/* extent number in list */
-	xfs_bmbt_irec_t		got;		/* current extent record */
+	struct xfs_bmbt_irec	got;		/* current extent record */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	int			isrt;		/* freeing in rt area */
 	int			logflags;	/* transaction logging flags */
 	xfs_extlen_t		mod;		/* rt extent offset */
-	xfs_mount_t		*mp;		/* mount structure */
+	struct xfs_mount	*mp;		/* mount structure */
 	int			tmp_logflags;	/* partial logging flags */
 	int			wasdel;		/* was a delayed alloc extent */
 	int			whichfork;	/* data or attribute fork */
@@ -5516,13 +5516,11 @@ xfs_bunmapi(
 	int			flags,
 	xfs_extnum_t		nexts,
 	xfs_fsblock_t		*firstblock,
-	struct xfs_defer_ops	*dfops,
 	int			*done)
 {
 	int			error;
 
-	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock,
-			dfops);
+	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock);
 	*done = (len == 0);
 	return error;
 }
@@ -6193,7 +6191,7 @@ xfs_bmap_finish_one(
 		break;
 	case XFS_BMAP_UNMAP:
 		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
-				XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
+				XFS_BMAPI_REMAP, 1, &firstfsb);
 		break;
 	default:
 		ASSERT(0);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index fc86cc218c58..a83906ec6141 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -206,12 +206,11 @@ int	xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
 		struct xfs_bmbt_irec *mval, int *nmap);
 int	__xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t *rlen, int flags,
-		xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
-		struct xfs_defer_ops *dfops);
+		xfs_extnum_t nexts, xfs_fsblock_t *firstblock);
 int	xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t len, int flags,
 		xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
-		struct xfs_defer_ops *dfops, int *done);
+		int *done);
 int	xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
 		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
 		struct xfs_bmbt_irec *del);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e43f1dda02e4..68a72e3d9f53 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2395,7 +2395,7 @@ xfs_da_shrink_inode(
 		 */
 		error = xfs_bunmapi(tp, dp, dead_blkno, count,
 				    xfs_bmapi_aflag(w), 0, args->firstblock,
-				    args->trans->t_dfops, &done);
+				    &done);
 		if (error == -ENOSPC) {
 			if (w != XFS_DATA_FORK)
 				break;
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index b21f55a11f35..781dc63d305d 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -664,7 +664,7 @@ xfs_dir2_shrink_inode(
 
 	/* Unmap the fsblock(s). */
 	error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
-			    args->firstblock, args->trans->t_dfops, &done);
+			    args->firstblock, &done);
 	if (error) {
 		/*
 		 * ENOSPC actually can happen if we're in a removename with no
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 76f28b823866..6c02cd264045 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1046,7 +1046,7 @@ xfs_unmap_extent(
 	xfs_defer_init(&dfops, &firstfsb);
 	tp->t_dfops = &dfops;
 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
-			tp->t_dfops, done);
+			    done);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 539d96201666..f456df2e1394 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1588,7 +1588,7 @@ xfs_itruncate_extents_flags(
 		tp->t_dfops = &dfops;
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
 				    XFS_ITRUNC_MAX_EXTENTS, &first_block,
-				    tp->t_dfops, &done);
+				    &done);
 		if (error)
 			goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c119cd33766e..0ac0706c98e8 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -703,7 +703,7 @@ xfs_reflink_end_cow(
 		tp->t_dfops = &dfops;
 		rlen = del.br_blockcount;
 		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
-				&firstfsb, tp->t_dfops);
+				&firstfsb);
 		if (error)
 			goto out_defer;
 
@@ -1052,8 +1052,7 @@ xfs_reflink_remap_extent(
 	while (rlen) {
 		xfs_defer_init(&dfops, &firstfsb);
 		tp->t_dfops = &dfops;
-		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1,
-				&firstfsb, tp->t_dfops);
+		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1, &firstfsb);
 		if (error)
 			goto out_defer;
 
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 290ae13d4673..a54f095c1409 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -466,8 +466,7 @@ xfs_inactive_symlink_rmt(
 	/*
 	 * Unmap the dead block(s) to the dfops.
 	 */
-	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps,
-			    &first_block, tp->t_dfops, &done);
+	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &first_block, &done);
 	if (error)
 		goto error_bmap_cancel;
 	ASSERT(done);
-- 
cgit v1.2.3


From ff3edf255da7a1ceb0fb2cb7f195fc27edd0091d Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:14 -0700
Subject: xfs: remove xfs_bmapi_remap() dfops param

All xfs_bmapi_remap() callers already use ->t_dfops. Note that
deferred completion context unconditionally sets ->t_dfops if it
hasn't already been set by the caller. Remove the unnecessary
parameter and access ->t_dfops directly.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 8 ++++----
 fs/xfs/libxfs/xfs_bmap.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 9c778e50ad19..236e773073cf 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4511,7 +4511,6 @@ xfs_bmapi_remap(
 	xfs_fileoff_t		bno,
 	xfs_filblks_t		len,
 	xfs_fsblock_t		startblock,
-	struct xfs_defer_ops	*dfops,
 	int			flags)
 {
 	struct xfs_mount	*mp = ip->i_mount;
@@ -4561,7 +4560,7 @@ xfs_bmapi_remap(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = firstblock;
-		cur->bc_private.b.dfops = dfops;
+		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -4574,7 +4573,7 @@ xfs_bmapi_remap(
 		got.br_state = XFS_EXT_NORM;
 
 	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
-			&cur, &got, &firstblock, dfops, &logflags, flags);
+			&cur, &got, &firstblock, tp->t_dfops, &logflags, flags);
 	if (error)
 		goto error0;
 
@@ -6185,8 +6184,9 @@ xfs_bmap_finish_one(
 
 	switch (type) {
 	case XFS_BMAP_MAP:
+		ASSERT(dfops == tp->t_dfops);
 		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
-				startblock, dfops, 0);
+				startblock, 0);
 		*blockcount = 0;
 		break;
 	case XFS_BMAP_UNMAP:
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index a83906ec6141..2728e98e991a 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -284,6 +284,6 @@ xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork,
 
 int	xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock,
-		struct xfs_defer_ops *dfops, int flags);
+		int flags);
 
 #endif	/* __XFS_BMAP_H__ */
-- 
cgit v1.2.3


From 3e3673e3029c1dedf75a1688a5203d9550adf490 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:14 -0700
Subject: xfs: remove struct xfs_bmalloca dfops field

Now that bma.dfops is only assigned from ->t_dfops, replace all
accesses to the former with the latter and remove the unnecessary
field. This patch does not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 30 +++++++++++++++---------------
 fs/xfs/libxfs/xfs_bmap.h |  1 -
 fs/xfs/xfs_filestream.c  |  3 ++-
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 236e773073cf..e24c54799aae 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1806,7 +1806,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					bma->firstblock, bma->dfops,
+					bma->firstblock, bma->tp->t_dfops,
 					&bma->cur, 1, &tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
@@ -1884,7 +1884,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				bma->firstblock, bma->dfops, &bma->cur, 1,
+				bma->firstblock, bma->tp->t_dfops, &bma->cur, 1,
 				&tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
@@ -1965,8 +1965,8 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					bma->firstblock, bma->dfops, &bma->cur,
-					1, &tmp_rval, whichfork);
+					bma->firstblock, bma->tp->t_dfops,
+					&bma->cur, 1, &tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
 				goto done;
@@ -1991,7 +1991,7 @@ xfs_bmap_add_extent_delay_real(
 
 	/* add reverse mapping unless caller opted out */
 	if (!(bma->flags & XFS_BMAPI_NORMAP)) {
-		error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip,
+		error = xfs_rmap_map_extent(mp, bma->tp->t_dfops, bma->ip,
 				whichfork, new);
 		if (error)
 			goto done;
@@ -2003,7 +2003,7 @@ xfs_bmap_add_extent_delay_real(
 
 		ASSERT(bma->cur == NULL);
 		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				bma->firstblock, bma->dfops, &bma->cur,
+				bma->firstblock, bma->tp->t_dfops, &bma->cur,
 				da_old > 0, &tmp_logflags, whichfork);
 		bma->logflags |= tmp_logflags;
 		if (error)
@@ -3480,7 +3480,7 @@ xfs_bmap_btalloc(
 			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
 		if (error)
 			return error;
-	} else if (ap->dfops->dop_low) {
+	} else if (ap->tp->t_dfops->dop_low) {
 		if (xfs_inode_is_filestream(ap->ip))
 			args.type = XFS_ALLOCTYPE_FIRST_AG;
 		else
@@ -3515,7 +3515,7 @@ xfs_bmap_btalloc(
 	 * is >= the stripe unit and the allocation offset is
 	 * at the end of file.
 	 */
-	if (!ap->dfops->dop_low && ap->aeof) {
+	if (!ap->tp->t_dfops->dop_low && ap->aeof) {
 		if (!ap->offset) {
 			args.alignment = stripe_align;
 			atype = args.type;
@@ -3607,7 +3607,7 @@ xfs_bmap_btalloc(
 		args.total = ap->minlen;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
-		ap->dfops->dop_low = true;
+		ap->tp->t_dfops->dop_low = true;
 	}
 	if (args.fsbno != NULLFSBLOCK) {
 		/*
@@ -4082,7 +4082,7 @@ xfs_bmapi_allocate(
 	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
 		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
 		bma->cur->bc_private.b.firstblock = *bma->firstblock;
-		bma->cur->bc_private.b.dfops = bma->dfops;
+		bma->cur->bc_private.b.dfops = bma->tp->t_dfops;
 	}
 	/*
 	 * Bump the number of extents we've allocated
@@ -4118,8 +4118,8 @@ xfs_bmapi_allocate(
 	else
 		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
 				whichfork, &bma->icur, &bma->cur, &bma->got,
-				bma->firstblock, bma->dfops, &bma->logflags,
-				bma->flags);
+				bma->firstblock, bma->tp->t_dfops,
+				&bma->logflags, bma->flags);
 
 	bma->logflags |= tmp_logflags;
 	if (error)
@@ -4171,7 +4171,7 @@ xfs_bmapi_convert_unwritten(
 		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
 					bma->ip, whichfork);
 		bma->cur->bc_private.b.firstblock = *bma->firstblock;
-		bma->cur->bc_private.b.dfops = bma->dfops;
+		bma->cur->bc_private.b.dfops = bma->tp->t_dfops;
 	}
 	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
 				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
@@ -4189,7 +4189,7 @@ xfs_bmapi_convert_unwritten(
 
 	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
 			&bma->icur, &bma->cur, mval, bma->firstblock,
-			bma->dfops, &tmp_logflags);
+			bma->tp->t_dfops, &tmp_logflags);
 	/*
 	 * Log the inode core unconditionally in the unwritten extent conversion
 	 * path because the conversion might not have done so (e.g., if the
@@ -4336,8 +4336,8 @@ xfs_bmapi_write(
 	bma.ip = ip;
 	bma.total = total;
 	bma.datatype = 0;
-	bma.dfops = tp ? tp->t_dfops : NULL;
 	bma.firstblock = firstblock;
+	ASSERT(!tp || tp->t_dfops);
 
 	while (bno < end && n < *nmap) {
 		bool			need_alloc = false, wasdelay = false;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 2728e98e991a..560235603444 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -20,7 +20,6 @@ extern kmem_zone_t	*xfs_bmap_free_item_zone;
  */
 struct xfs_bmalloca {
 	xfs_fsblock_t		*firstblock; /* i/o first block allocated */
-	struct xfs_defer_ops	*dfops;	/* bmap freelist */
 	struct xfs_trans	*tp;	/* transaction pointer */
 	struct xfs_inode	*ip;	/* incore inode pointer */
 	struct xfs_bmbt_irec	prev;	/* extent before the new one */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 2d2c5ab9143c..56a3999cefae 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -19,6 +19,7 @@
 #include "xfs_filestream.h"
 #include "xfs_trace.h"
 #include "xfs_ag_resv.h"
+#include "xfs_trans.h"
 
 struct xfs_fstrm_item {
 	struct xfs_mru_cache_elem	mru;
@@ -377,7 +378,7 @@ xfs_filestream_new_ag(
 
 	if (xfs_alloc_is_userdata(ap->datatype))
 		flags |= XFS_PICK_USERDATA;
-	if (ap->dfops->dop_low)
+	if (ap->tp->t_dfops->dop_low)
 		flags |= XFS_PICK_LOWSPACE;
 
 	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
-- 
cgit v1.2.3


From f4a9cf97faf4adb27e4e105beda420bb5253c502 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:15 -0700
Subject: xfs: use ->t_dfops for collapse/insert range operations

Use ->t_dfops for the collapse and insert range transactions. These
are the only callers of the respective bmap helpers, so replace the
unnecessary dfops parameters with direct accesses to ->t_dfops.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 16 +++++++---------
 fs/xfs/libxfs/xfs_bmap.h |  6 ++----
 fs/xfs/xfs_bmap_util.c   | 14 ++++++++------
 3 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index e24c54799aae..89a21dc7a7cb 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5683,8 +5683,7 @@ xfs_bmap_collapse_extents(
 	xfs_fileoff_t		*next_fsb,
 	xfs_fileoff_t		offset_shift_fsb,
 	bool			*done,
-	xfs_fsblock_t		*firstblock,
-	struct xfs_defer_ops	*dfops)
+	xfs_fsblock_t		*firstblock)
 {
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_mount	*mp = ip->i_mount;
@@ -5718,7 +5717,7 @@ xfs_bmap_collapse_extents(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstblock;
-		cur->bc_private.b.dfops = dfops;
+		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5739,7 +5738,7 @@ xfs_bmap_collapse_extents(
 		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
 			error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
 					&icur, &got, &prev, cur, &logflags,
-					dfops);
+					tp->t_dfops);
 			if (error)
 				goto del_cursor;
 			goto done;
@@ -5752,7 +5751,7 @@ xfs_bmap_collapse_extents(
 	}
 
 	error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
-			&logflags, dfops, new_startoff);
+			&logflags, tp->t_dfops, new_startoff);
 	if (error)
 		goto del_cursor;
 
@@ -5806,8 +5805,7 @@ xfs_bmap_insert_extents(
 	xfs_fileoff_t		offset_shift_fsb,
 	bool			*done,
 	xfs_fileoff_t		stop_fsb,
-	xfs_fsblock_t		*firstblock,
-	struct xfs_defer_ops	*dfops)
+	xfs_fsblock_t		*firstblock)
 {
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_mount	*mp = ip->i_mount;
@@ -5841,7 +5839,7 @@ xfs_bmap_insert_extents(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstblock;
-		cur->bc_private.b.dfops = dfops;
+		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5884,7 +5882,7 @@ xfs_bmap_insert_extents(
 	}
 
 	error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
-			&logflags, dfops, new_startoff);
+			&logflags, tp->t_dfops, new_startoff);
 	if (error)
 		goto del_cursor;
 
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 560235603444..83180c7cf3ee 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -219,14 +219,12 @@ void	xfs_bmap_del_extent_cow(struct xfs_inode *ip,
 uint	xfs_default_attroffset(struct xfs_inode *ip);
 int	xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
-		bool *done, xfs_fsblock_t *firstblock,
-		struct xfs_defer_ops *dfops);
+		bool *done, xfs_fsblock_t *firstblock);
 int	xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off,
 		xfs_fileoff_t shift);
 int	xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
-		bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
-		struct xfs_defer_ops *dfops);
+		bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock);
 int	xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
 int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
 		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 6c02cd264045..cd5d410acc73 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1348,12 +1348,13 @@ xfs_collapse_file_space(
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
 		xfs_defer_init(&dfops, &first_block);
+		tp->t_dfops = &dfops;
 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
-				&done, &first_block, &dfops);
+				&done, &first_block);
 		if (error)
 			goto out_bmap_cancel;
 
-		error = xfs_defer_finish(&tp, &dfops);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto out_bmap_cancel;
 		error = xfs_trans_commit(tp);
@@ -1362,7 +1363,7 @@ xfs_collapse_file_space(
 	return error;
 
 out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 	return error;
@@ -1427,12 +1428,13 @@ xfs_insert_file_space(
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_defer_init(&dfops, &first_block);
+		tp->t_dfops = &dfops;
 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
-				&done, stop_fsb, &first_block, &dfops);
+				&done, stop_fsb, &first_block);
 		if (error)
 			goto out_bmap_cancel;
 
-		error = xfs_defer_finish(&tp, &dfops);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto out_bmap_cancel;
 		error = xfs_trans_commit(tp);
@@ -1441,7 +1443,7 @@ xfs_insert_file_space(
 	return error;
 
 out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_cancel(tp);
 	return error;
 }
-- 
cgit v1.2.3


From 81ba8f3e947c3c53beb535c7f29fe402429cae37 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:16 -0700
Subject: xfs: remove dfops param from internal bmap extent helpers

All callers of the various bmap extent helpers now use ->t_dfops.
Remove the unnecessary dfops params and access ->t_dfops directly.
This patch does not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 92 +++++++++++++++++++++++-------------------------
 1 file changed, 44 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 89a21dc7a7cb..66655973c229 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -644,25 +644,24 @@ xfs_bmap_btree_to_extents(
  */
 STATIC int					/* error */
 xfs_bmap_extents_to_btree(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_inode_t		*ip,		/* incore inode pointer */
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode pointer */
 	xfs_fsblock_t		*firstblock,	/* first-block-allocated */
-	struct xfs_defer_ops	*dfops,		/* blocks freed in xaction */
-	xfs_btree_cur_t		**curp,		/* cursor returned to caller */
+	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
 	int			wasdel,		/* converting a delayed alloc */
 	int			*logflagsp,	/* inode logging flags */
 	int			whichfork)	/* data or attr fork */
 {
 	struct xfs_btree_block	*ablock;	/* allocated (child) bt block */
-	xfs_buf_t		*abp;		/* buffer for ablock */
-	xfs_alloc_arg_t		args;		/* allocation arguments */
-	xfs_bmbt_rec_t		*arp;		/* child record pointer */
+	struct xfs_buf		*abp;		/* buffer for ablock */
+	struct xfs_alloc_arg	args;		/* allocation arguments */
+	struct xfs_bmbt_rec	*arp;		/* child record pointer */
 	struct xfs_btree_block	*block;		/* btree root block */
-	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
+	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 	int			error;		/* error return value */
-	xfs_ifork_t		*ifp;		/* inode fork pointer */
-	xfs_bmbt_key_t		*kp;		/* root block key pointer */
-	xfs_mount_t		*mp;		/* mount structure */
+	struct xfs_ifork	*ifp;		/* inode fork pointer */
+	struct xfs_bmbt_key	*kp;		/* root block key pointer */
+	struct xfs_mount	*mp;		/* mount structure */
 	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
 	struct xfs_iext_cursor	icur;
 	struct xfs_bmbt_irec	rec;
@@ -691,7 +690,7 @@ xfs_bmap_extents_to_btree(
 	 */
 	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 	cur->bc_private.b.firstblock = *firstblock;
-	cur->bc_private.b.dfops = dfops;
+	cur->bc_private.b.dfops = tp->t_dfops;
 	cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
 	/*
 	 * Convert to a btree with two levels, one record in root.
@@ -705,7 +704,7 @@ xfs_bmap_extents_to_btree(
 	if (*firstblock == NULLFSBLOCK) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
-	} else if (dfops->dop_low) {
+	} else if (tp->t_dfops->dop_low) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = *firstblock;
 	} else {
@@ -958,8 +957,8 @@ error0:
  */
 STATIC int					/* error */
 xfs_bmap_add_attrfork_extents(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_inode_t		*ip,		/* incore inode pointer */
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode pointer */
 	xfs_fsblock_t		*firstblock,	/* first block allocated */
 	int			*flags)		/* inode logging flags */
 {
@@ -969,8 +968,8 @@ xfs_bmap_add_attrfork_extents(
 	if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
 		return 0;
 	cur = NULL;
-	error = xfs_bmap_extents_to_btree(tp, ip, firstblock, tp->t_dfops, &cur, 0,
-		flags, XFS_DATA_FORK);
+	error = xfs_bmap_extents_to_btree(tp, ip, firstblock, &cur, 0, flags,
+					  XFS_DATA_FORK);
 	if (cur) {
 		cur->bc_private.b.allocated = 0;
 		xfs_btree_del_cursor(cur,
@@ -1806,8 +1805,8 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					bma->firstblock, bma->tp->t_dfops,
-					&bma->cur, 1, &tmp_rval, whichfork);
+					bma->firstblock, &bma->cur, 1,
+					&tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
 				goto done;
@@ -1884,8 +1883,8 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				bma->firstblock, bma->tp->t_dfops, &bma->cur, 1,
-				&tmp_rval, whichfork);
+				bma->firstblock, &bma->cur, 1, &tmp_rval,
+				whichfork);
 			rval |= tmp_rval;
 			if (error)
 				goto done;
@@ -1965,8 +1964,8 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					bma->firstblock, bma->tp->t_dfops,
-					&bma->cur, 1, &tmp_rval, whichfork);
+					bma->firstblock, &bma->cur, 1,
+					&tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
 				goto done;
@@ -2003,8 +2002,8 @@ xfs_bmap_add_extent_delay_real(
 
 		ASSERT(bma->cur == NULL);
 		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				bma->firstblock, bma->tp->t_dfops, &bma->cur,
-				da_old > 0, &tmp_logflags, whichfork);
+				bma->firstblock, &bma->cur, da_old > 0,
+				&tmp_logflags, whichfork);
 		bma->logflags |= tmp_logflags;
 		if (error)
 			goto done;
@@ -2044,7 +2043,6 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
-	struct xfs_defer_ops	*dfops,	/* list of extents to be freed */
 	int			*logflagsp) /* inode logging flags */
 {
 	xfs_btree_cur_t		*cur;	/* btree cursor */
@@ -2058,6 +2056,7 @@ xfs_bmap_add_extent_unwritten_real(
 	int			state = xfs_bmap_fork_to_state(whichfork);
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_bmbt_irec	old;
+	struct xfs_defer_ops	*dfops = tp ? tp->t_dfops : NULL;
 
 	*logflagsp = 0;
 
@@ -2485,8 +2484,8 @@ xfs_bmap_add_extent_unwritten_real(
 		int	tmp_logflags;	/* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
-				0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, first, &cur, 0,
+				&tmp_logflags, whichfork);
 		*logflagsp |= tmp_logflags;
 		if (error)
 			goto done;
@@ -2658,7 +2657,6 @@ xfs_bmap_add_extent_hole_real(
 	struct xfs_btree_cur	**curp,
 	struct xfs_bmbt_irec	*new,
 	xfs_fsblock_t		*first,
-	struct xfs_defer_ops	*dfops,
 	int			*logflagsp,
 	int			flags)
 {
@@ -2839,7 +2837,8 @@ xfs_bmap_add_extent_hole_real(
 
 	/* add reverse mapping unless caller opted out */
 	if (!(flags & XFS_BMAPI_NORMAP)) {
-		error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new);
+		error = xfs_rmap_map_extent(mp, tp->t_dfops, ip, whichfork,
+				new);
 		if (error)
 			goto done;
 	}
@@ -2849,8 +2848,8 @@ xfs_bmap_add_extent_hole_real(
 		int	tmp_logflags;	/* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp,
-				0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, first, curp, 0,
+				&tmp_logflags, whichfork);
 		*logflagsp |= tmp_logflags;
 		cur = *curp;
 		if (error)
@@ -4118,8 +4117,7 @@ xfs_bmapi_allocate(
 	else
 		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
 				whichfork, &bma->icur, &bma->cur, &bma->got,
-				bma->firstblock, bma->tp->t_dfops,
-				&bma->logflags, bma->flags);
+				bma->firstblock, &bma->logflags, bma->flags);
 
 	bma->logflags |= tmp_logflags;
 	if (error)
@@ -4189,7 +4187,7 @@ xfs_bmapi_convert_unwritten(
 
 	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
 			&bma->icur, &bma->cur, mval, bma->firstblock,
-			bma->tp->t_dfops, &tmp_logflags);
+			&tmp_logflags);
 	/*
 	 * Log the inode core unconditionally in the unwritten extent conversion
 	 * path because the conversion might not have done so (e.g., if the
@@ -4573,7 +4571,7 @@ xfs_bmapi_remap(
 		got.br_state = XFS_EXT_NORM;
 
 	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
-			&cur, &got, &firstblock, tp->t_dfops, &logflags, flags);
+			&cur, &got, &firstblock, &logflags, flags);
 	if (error)
 		goto error0;
 
@@ -4892,7 +4890,6 @@ xfs_bmap_del_extent_real(
 	xfs_inode_t		*ip,	/* incore inode pointer */
 	xfs_trans_t		*tp,	/* current transaction pointer */
 	struct xfs_iext_cursor	*icur,
-	struct xfs_defer_ops	*dfops,	/* list of extents to be freed */
 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
 	xfs_bmbt_irec_t		*del,	/* data to remove from extents */
 	int			*logflagsp, /* inode logging flags */
@@ -4915,6 +4912,7 @@ xfs_bmap_del_extent_real(
 	uint			qfield;	/* quota field to update */
 	int			state = xfs_bmap_fork_to_state(whichfork);
 	struct xfs_bmbt_irec	old;
+	struct xfs_defer_ops	*dfops = tp ? tp->t_dfops : NULL;
 
 	mp = ip->i_mount;
 	XFS_STATS_INC(mp, xs_del_exlist);
@@ -5341,7 +5339,7 @@ __xfs_bunmapi(
 			del.br_state = XFS_EXT_UNWRITTEN;
 			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
 					whichfork, &icur, &cur, &del,
-					firstblock, dfops, &logflags);
+					firstblock, &logflags);
 			if (error)
 				goto error0;
 			goto nodelete;
@@ -5398,8 +5396,7 @@ __xfs_bunmapi(
 				prev.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent_unwritten_real(tp,
 						ip, whichfork, &icur, &cur,
-						&prev, firstblock, dfops,
-						&logflags);
+						&prev, firstblock, &logflags);
 				if (error)
 					goto error0;
 				goto nodelete;
@@ -5408,8 +5405,7 @@ __xfs_bunmapi(
 				del.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent_unwritten_real(tp,
 						ip, whichfork, &icur, &cur,
-						&del, firstblock, dfops,
-						&logflags);
+						&del, firstblock, &logflags);
 				if (error)
 					goto error0;
 				goto nodelete;
@@ -5421,8 +5417,8 @@ delete:
 			error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
 					&got, &del);
 		} else {
-			error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops,
-					cur, &del, &tmp_logflags, whichfork,
+			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
+					&del, &tmp_logflags, whichfork,
 					flags);
 			logflags |= tmp_logflags;
 		}
@@ -5456,8 +5452,8 @@ nodelete:
 	 */
 	if (xfs_bmap_needs_btree(ip, whichfork)) {
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
-			&cur, 0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, firstblock, &cur, 0,
+				&tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 		if (error)
 			goto error0;
@@ -6007,8 +6003,8 @@ xfs_bmap_split_extent_at(
 		int tmp_logflags; /* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, tp->t_dfops,
-				&cur, 0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, &cur, 0,
+				&tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 	}
 
-- 
cgit v1.2.3


From 42b394a92562b464e9ef81954ca93930c037a51b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:16 -0700
Subject: xfs: remove xfs_btree_cur bmbt dfops field

All assignments of xfs_btree_cur.bc_private.b.dfops originate from
->t_dfops. Replace accesses of the former with the latter and remove
the unnecessary field. This patch does not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c       | 12 +-----------
 fs/xfs/libxfs/xfs_bmap_btree.c |  9 +++------
 fs/xfs/libxfs/xfs_btree.h      |  1 -
 3 files changed, 4 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 66655973c229..32d8d87b7582 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -624,7 +624,7 @@ xfs_bmap_btree_to_extents(
 	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 		return error;
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
-	xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
+	xfs_bmap_add_free(mp, cur->bc_tp->t_dfops, cbno, 1, &oinfo);
 	ip->i_d.di_nblocks--;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
@@ -690,7 +690,6 @@ xfs_bmap_extents_to_btree(
 	 */
 	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 	cur->bc_private.b.firstblock = *firstblock;
-	cur->bc_private.b.dfops = tp->t_dfops;
 	cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
 	/*
 	 * Convert to a btree with two levels, one record in root.
@@ -929,7 +928,6 @@ xfs_bmap_add_attrfork_btree(
 		*flags |= XFS_ILOG_DBROOT;
 	else {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
-		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.firstblock = *firstblock;
 		error = xfs_bmbt_lookup_first(cur, &stat);
 		if (error)
@@ -4081,7 +4079,6 @@ xfs_bmapi_allocate(
 	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
 		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
 		bma->cur->bc_private.b.firstblock = *bma->firstblock;
-		bma->cur->bc_private.b.dfops = bma->tp->t_dfops;
 	}
 	/*
 	 * Bump the number of extents we've allocated
@@ -4169,7 +4166,6 @@ xfs_bmapi_convert_unwritten(
 		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
 					bma->ip, whichfork);
 		bma->cur->bc_private.b.firstblock = *bma->firstblock;
-		bma->cur->bc_private.b.dfops = bma->tp->t_dfops;
 	}
 	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
 				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
@@ -4558,7 +4554,6 @@ xfs_bmapi_remap(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = firstblock;
-		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5149,7 +5144,6 @@ __xfs_bunmapi(
 	xfs_fsblock_t		*firstblock)	/* first allocated block
 						   controls a.g. for allocs */
 {
-	struct xfs_defer_ops	*dfops = tp ? tp->t_dfops : NULL;
 	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 	struct xfs_bmbt_irec	del;		/* extent being deleted */
 	int			error;		/* error return value */
@@ -5223,7 +5217,6 @@ __xfs_bunmapi(
 		ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstblock;
-		cur->bc_private.b.dfops = dfops;
 		cur->bc_private.b.flags = 0;
 	} else
 		cur = NULL;
@@ -5713,7 +5706,6 @@ xfs_bmap_collapse_extents(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstblock;
-		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5835,7 +5827,6 @@ xfs_bmap_insert_extents(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstblock;
-		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5959,7 +5950,6 @@ xfs_bmap_split_extent_at(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = *firstfsb;
-		cur->bc_private.b.dfops = tp->t_dfops;
 		cur->bc_private.b.flags = 0;
 		error = xfs_bmbt_lookup_eq(cur, &got, &i);
 		if (error)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index e1a2d9ceb615..e8b01af09db5 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -176,7 +176,6 @@ xfs_bmbt_dup_cursor(
 	 * since init cursor doesn't get them.
 	 */
 	new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
-	new->bc_private.b.dfops = cur->bc_private.b.dfops;
 	new->bc_private.b.flags = cur->bc_private.b.flags;
 
 	return new;
@@ -189,7 +188,6 @@ xfs_bmbt_update_cursor(
 {
 	ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
 	       (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
-	ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops);
 
 	dst->bc_private.b.allocated += src->bc_private.b.allocated;
 	dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
@@ -230,7 +228,7 @@ xfs_bmbt_alloc_block(
 		 * block allocation here and corrupt the filesystem.
 		 */
 		args.minleft = args.tp->t_blk_res;
-	} else if (cur->bc_private.b.dfops->dop_low) {
+	} else if (cur->bc_tp->t_dfops->dop_low) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -257,7 +255,7 @@ xfs_bmbt_alloc_block(
 		error = xfs_alloc_vextent(&args);
 		if (error)
 			goto error0;
-		cur->bc_private.b.dfops->dop_low = true;
+		cur->bc_tp->t_dfops->dop_low = true;
 	}
 	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 		*stat = 0;
@@ -293,7 +291,7 @@ xfs_bmbt_free_block(
 	struct xfs_owner_info	oinfo;
 
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
-	xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo);
+	xfs_bmap_add_free(mp, cur->bc_tp->t_dfops, fsbno, 1, &oinfo);
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -565,7 +563,6 @@ xfs_bmbt_init_cursor(
 	cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
 	cur->bc_private.b.ip = ip;
 	cur->bc_private.b.firstblock = NULLFSBLOCK;
-	cur->bc_private.b.dfops = NULL;
 	cur->bc_private.b.allocated = 0;
 	cur->bc_private.b.flags = 0;
 	cur->bc_private.b.whichfork = whichfork;
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 0a4fdf7f11a7..cc94ac765dec 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -215,7 +215,6 @@ typedef struct xfs_btree_cur
 		} a;
 		struct {			/* needed for BMAP */
 			struct xfs_inode *ip;	/* pointer to our inode */
-			struct xfs_defer_ops *dfops;	/* deferred updates */
 			xfs_fsblock_t	firstblock;	/* 1st blk allocated */
 			int		allocated;	/* count of alloced */
 			short		forksize;	/* fork's inode space */
-- 
cgit v1.2.3


From ed7ef8e55c6f24ae4347b5bda89e00af475ebc89 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:17 -0700
Subject: xfs: remove unused btree cursor bc_private.a.dfops field

The xfs_btree_cur.bc_private.a.dfops field is only ever initialized
by the refcountbt cursor init function. The only caller of that
function with a non-NULL dfops is from deferred completion context,
which already has attached to ->t_dfops.

In addition to that, the only actual reference of a.dfops is the
cursor duplication function, which means the field is effectively
unused.

Remove the dfops field from the bc_private.a union. Any future users
can acquire the dfops from the transaction. This patch does not
change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_btree.h          | 1 -
 fs/xfs/libxfs/xfs_refcount.c       | 4 ++--
 fs/xfs/libxfs/xfs_refcount_btree.c | 7 ++-----
 fs/xfs/libxfs/xfs_refcount_btree.h | 4 ++--
 fs/xfs/scrub/common.c              | 2 +-
 fs/xfs/xfs_fsmap.c                 | 2 +-
 fs/xfs/xfs_reflink.c               | 2 +-
 7 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index cc94ac765dec..b986a8fc8d40 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -209,7 +209,6 @@ typedef struct xfs_btree_cur
 	union {
 		struct {			/* needed for BNO, CNT, INO */
 			struct xfs_buf	*agbp;	/* agf/agi buffer pointer */
-			struct xfs_defer_ops *dfops;	/* deferred updates */
 			xfs_agnumber_t	agno;	/* ag number */
 			union xfs_btree_cur_private	priv;
 		} a;
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 9dda6fd0bb13..8e330a196060 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1132,7 +1132,7 @@ xfs_refcount_finish_one(
 		if (!agbp)
 			return -EFSCORRUPTED;
 
-		rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops);
+		rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
 		if (!rcur) {
 			error = -ENOMEM;
 			goto out_cur;
@@ -1666,7 +1666,7 @@ xfs_refcount_recover_cow_leftovers(
 		error = -ENOMEM;
 		goto out_trans;
 	}
-	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
 
 	/* Find all the leftover CoW staging extents. */
 	memset(&low, 0, sizeof(low));
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index b71937982c5b..393aa88f93db 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -27,8 +27,7 @@ xfs_refcountbt_dup_cursor(
 	struct xfs_btree_cur	*cur)
 {
 	return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp,
-			cur->bc_private.a.agbp, cur->bc_private.a.agno,
-			cur->bc_private.a.dfops);
+			cur->bc_private.a.agbp, cur->bc_private.a.agno);
 }
 
 STATIC void
@@ -323,8 +322,7 @@ xfs_refcountbt_init_cursor(
 	struct xfs_mount	*mp,
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agbp,
-	xfs_agnumber_t		agno,
-	struct xfs_defer_ops	*dfops)
+	xfs_agnumber_t		agno)
 {
 	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
 	struct xfs_btree_cur	*cur;
@@ -344,7 +342,6 @@ xfs_refcountbt_init_cursor(
 
 	cur->bc_private.a.agbp = agbp;
 	cur->bc_private.a.agno = agno;
-	cur->bc_private.a.dfops = dfops;
 	cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
 
 	cur->bc_private.a.priv.refc.nr_ops = 0;
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index d2852b6e1fa8..801c2c7732fd 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -44,8 +44,8 @@ struct xfs_mount;
 		 ((index) - 1) * sizeof(xfs_refcount_ptr_t)))
 
 extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp,
-		struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno,
-		struct xfs_defer_ops *dfops);
+		struct xfs_trans *tp, struct xfs_buf *agbp,
+		xfs_agnumber_t agno);
 extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf);
 extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
 
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 70e70c69f83f..385fa5b9c878 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -499,7 +499,7 @@ xfs_scrub_ag_btcur_init(
 	/* Set up a refcountbt cursor for cross-referencing. */
 	if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
 		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
-				sa->agf_bp, agno, NULL);
+				sa->agf_bp, agno);
 		if (!sa->refc_cur)
 			goto err;
 	}
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index c7157bc48bd1..297d7ce2901e 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -214,7 +214,7 @@ xfs_getfsmap_is_shared(
 	/* Are there any shared blocks here? */
 	flen = 0;
 	cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
-			info->agno, NULL);
+			info->agno);
 
 	error = xfs_refcount_find_shared(cur, rec->rm_startblock,
 			rec->rm_blockcount, &fbno, &flen, false);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 0ac0706c98e8..90457c2a7569 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -157,7 +157,7 @@ xfs_reflink_find_shared(
 	if (!agbp)
 		return -ENOMEM;
 
-	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
 
 	error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
 			find_end_of_shared);
-- 
cgit v1.2.3


From 7a7943c7e09546d4cc78b9756de23fd395b2cba4 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:17 -0700
Subject: xfs: use ->t_dfops for rmap extent swap operations

xfs_swap_extent_rmap() uses a local dfops instance with a
transaction from the caller. Since there is only one caller, pull
the dfops structure into the caller and attach it to the
transaction. This avoids the need to clear ->t_dfops to prevent
invalid stack memory access.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd5d410acc73..6bea8df348bc 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1571,6 +1571,8 @@ xfs_swap_extent_rmap(
 	struct xfs_inode		*ip,
 	struct xfs_inode		*tip)
 {
+	struct xfs_trans		*tp = *tpp;
+	struct xfs_mount		*mp = tp->t_mountp;
 	struct xfs_bmbt_irec		irec;
 	struct xfs_bmbt_irec		uirec;
 	struct xfs_bmbt_irec		tirec;
@@ -1578,7 +1580,6 @@ xfs_swap_extent_rmap(
 	xfs_fileoff_t			end_fsb;
 	xfs_filblks_t			count_fsb;
 	xfs_fsblock_t			firstfsb;
-	struct xfs_defer_ops		dfops;
 	int				error;
 	xfs_filblks_t			ilen;
 	xfs_filblks_t			rlen;
@@ -1614,7 +1615,7 @@ xfs_swap_extent_rmap(
 
 		/* Unmap the old blocks in the source file. */
 		while (tirec.br_blockcount) {
-			xfs_defer_init(&dfops, &firstfsb);
+			xfs_defer_init(tp->t_dfops, &firstfsb);
 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
 
 			/* Read extent from the source file */
@@ -1636,31 +1637,32 @@ xfs_swap_extent_rmap(
 			trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
 
 			/* Remove the mapping from the donor file. */
-			error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
-					tip, &uirec);
+			error = xfs_bmap_unmap_extent(mp, tp->t_dfops, tip,
+					&uirec);
 			if (error)
 				goto out_defer;
 
 			/* Remove the mapping from the source file. */
-			error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
-					ip, &irec);
+			error = xfs_bmap_unmap_extent(mp, tp->t_dfops, ip,
+					&irec);
 			if (error)
 				goto out_defer;
 
 			/* Map the donor file's blocks into the source file. */
-			error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
-					ip, &uirec);
+			error = xfs_bmap_map_extent(mp, tp->t_dfops, ip,
+					&uirec);
 			if (error)
 				goto out_defer;
 
 			/* Map the source file's blocks into the donor file. */
-			error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
-					tip, &irec);
+			error = xfs_bmap_map_extent(mp, tp->t_dfops, tip,
+					&irec);
 			if (error)
 				goto out_defer;
 
-			xfs_defer_ijoin(&dfops, ip);
-			error = xfs_defer_finish(tpp, &dfops);
+			xfs_defer_ijoin(tp->t_dfops, ip);
+			error = xfs_defer_finish(tpp, tp->t_dfops);
+			tp = *tpp;
 			if (error)
 				goto out_defer;
 
@@ -1680,7 +1682,7 @@ xfs_swap_extent_rmap(
 	return 0;
 
 out_defer:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out:
 	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
 	tip->i_d.di_flags2 = tip_flags2;
@@ -1847,6 +1849,7 @@ xfs_swap_extents(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
+	struct xfs_defer_ops	dfops;
 	struct xfs_bstat	*sbp = &sxp->sx_stat;
 	int			src_log_flags, target_log_flags;
 	int			error = 0;
@@ -1854,6 +1857,7 @@ xfs_swap_extents(
 	struct xfs_ifork	*cowfp;
 	uint64_t		f;
 	int			resblks = 0;
+	xfs_fsblock_t		firstfsb;
 
 	/*
 	 * Lock the inodes against other IO, page faults and truncate to
@@ -1916,6 +1920,8 @@ xfs_swap_extents(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
 		goto out_unlock;
+	xfs_defer_init(&dfops, &firstfsb);
+	tp->t_dfops = &dfops;
 
 	/*
 	 * Lock and join the inodes to the tansaction so that transaction commit
-- 
cgit v1.2.3


From 27356a063a8572b2d3bf57d92b0a12399478958a Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:18 -0700
Subject: xfs: use ->t_dfops in cancel cow blocks operation

Use ->t_dfops of the transaction from the caller. Reset it before we
return to avoid leaks of local stack memory.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_reflink.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 90457c2a7569..8ea4ba0e45d8 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -490,6 +490,7 @@ xfs_reflink_cancel_cow_blocks(
 	struct xfs_iext_cursor		icur;
 	xfs_fsblock_t			firstfsb;
 	struct xfs_defer_ops		dfops;
+	struct xfs_defer_ops		*odfops = (*tpp)->t_dfops;
 	int				error = 0;
 
 	if (!xfs_is_reflink_inode(ip))
@@ -517,23 +518,24 @@ xfs_reflink_cancel_cow_blocks(
 				break;
 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
 			xfs_defer_init(&dfops, &firstfsb);
+			(*tpp)->t_dfops = &dfops;
 
 			/* Free the CoW orphan record. */
 			error = xfs_refcount_free_cow_extent(ip->i_mount,
-					&dfops, del.br_startblock,
+					(*tpp)->t_dfops, del.br_startblock,
 					del.br_blockcount);
 			if (error)
 				break;
 
-			xfs_bmap_add_free(ip->i_mount, &dfops,
+			xfs_bmap_add_free(ip->i_mount, (*tpp)->t_dfops,
 					del.br_startblock, del.br_blockcount,
 					NULL);
 
 			/* Roll the transaction */
-			xfs_defer_ijoin(&dfops, ip);
-			error = xfs_defer_finish(tpp, &dfops);
+			xfs_defer_ijoin((*tpp)->t_dfops, ip);
+			error = xfs_defer_finish(tpp, (*tpp)->t_dfops);
 			if (error) {
-				xfs_defer_cancel(&dfops);
+				xfs_defer_cancel((*tpp)->t_dfops);
 				break;
 			}
 
@@ -558,7 +560,7 @@ next_extent:
 	/* clear tag if cow fork is emptied */
 	if (!ifp->if_bytes)
 		xfs_inode_clear_cowblocks_tag(ip);
-
+	(*tpp)->t_dfops = odfops;
 	return error;
 }
 
-- 
cgit v1.2.3


From d5669ed58175f85d2c13e914c5c4e2bd3647d893 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:18 -0700
Subject: xfs: use ->t_dfops in reflink cow recover path

Use ->t_dfops of the leftover COW reservation cleanup transaction.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_refcount.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 8e330a196060..df67821fb5f4 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1692,18 +1692,19 @@ xfs_refcount_recover_cow_leftovers(
 
 		/* Free the orphan record */
 		xfs_defer_init(&dfops, &fsb);
+		tp->t_dfops = &dfops;
 		agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
 		fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
-		error = xfs_refcount_free_cow_extent(mp, &dfops, fsb,
+		error = xfs_refcount_free_cow_extent(mp, tp->t_dfops, fsb,
 				rr->rr_rrec.rc_blockcount);
 		if (error)
 			goto out_defer;
 
 		/* Free the block. */
-		xfs_bmap_add_free(mp, &dfops, fsb,
+		xfs_bmap_add_free(mp, tp->t_dfops, fsb,
 				rr->rr_rrec.rc_blockcount, NULL);
 
-		error = xfs_defer_finish(&tp, &dfops);
+		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
 			goto out_defer;
 
@@ -1717,7 +1718,7 @@ xfs_refcount_recover_cow_leftovers(
 
 	return error;
 out_defer:
-	xfs_defer_cancel(&dfops);
+	xfs_defer_cancel(tp->t_dfops);
 out_trans:
 	xfs_trans_cancel(tp);
 out_free:
-- 
cgit v1.2.3


From bcd2c9f33559764e0d306e226a8aa88bc2e1e6fb Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:19 -0700
Subject: xfs: refactor dfops init to attach to transaction

Most callers of xfs_defer_init() immediately attach the dfops
structure to a transaction. Add a transaction parameter to eliminate
much of this boilerplate code. This also helps self-document the
fact that many codepaths now expect a dfops pointer implicitly via
xfs_trans->t_dfops.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 26 ++++++++++++++------------
 fs/xfs/libxfs/xfs_attr_remote.c |  6 +++---
 fs/xfs/libxfs/xfs_bmap.c        |  6 ++----
 fs/xfs/libxfs/xfs_defer.c       |  9 ++++++++-
 fs/xfs/libxfs/xfs_defer.h       |  3 ++-
 fs/xfs/libxfs/xfs_refcount.c    |  3 +--
 fs/xfs/xfs_bmap_util.c          | 17 ++++++-----------
 fs/xfs/xfs_dquot.c              |  5 ++---
 fs/xfs/xfs_inode.c              | 18 ++++++------------
 fs/xfs/xfs_iomap.c              |  9 +++------
 fs/xfs/xfs_log_recover.c        |  2 +-
 fs/xfs/xfs_reflink.c            | 12 ++++--------
 fs/xfs/xfs_rtalloc.c            |  3 +--
 fs/xfs/xfs_symlink.c            |  6 ++----
 14 files changed, 55 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index a14ab9b2669e..8a7e2c0308c4 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -253,8 +253,7 @@ xfs_attr_set(
 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(&dfops, &firstblock);
-	args.trans->t_dfops = &dfops;
+	xfs_defer_init(args.trans, &dfops, &firstblock);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
@@ -428,8 +427,7 @@ xfs_attr_remove(
 			&args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(&dfops, &firstblock);
-	args.trans->t_dfops = &dfops;
+	xfs_defer_init(args.trans, &dfops, &firstblock);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	/*
@@ -600,7 +598,7 @@ xfs_attr_leaf_addname(
 		 * Commit that transaction so that the node_addname() call
 		 * can manage its own transactions.
 		 */
-		xfs_defer_init(args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
 		error = xfs_attr3_leaf_to_node(args);
 		if (error)
 			goto out_defer_cancel;
@@ -689,7 +687,8 @@ xfs_attr_leaf_addname(
 		 * If the result is small enough, shrink it all into the inode.
 		 */
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->trans->t_dfops, args->firstblock);
+			xfs_defer_init(NULL, args->trans->t_dfops,
+				       args->firstblock);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
@@ -754,7 +753,7 @@ xfs_attr_leaf_removename(
 	 * If the result is small enough, shrink it all into the inode.
 	 */
 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-		xfs_defer_init(args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
@@ -883,7 +882,8 @@ restart:
 			 */
 			xfs_da_state_free(state);
 			state = NULL;
-			xfs_defer_init(args->trans->t_dfops, args->firstblock);
+			xfs_defer_init(NULL, args->trans->t_dfops,
+				       args->firstblock);
 			error = xfs_attr3_leaf_to_node(args);
 			if (error)
 				goto out_defer_cancel;
@@ -910,7 +910,7 @@ restart:
 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
 		 */
-		xfs_defer_init(args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
 		error = xfs_da3_split(state);
 		if (error)
 			goto out_defer_cancel;
@@ -1008,7 +1008,8 @@ restart:
 		 * Check to see if the tree needs to be collapsed.
 		 */
 		if (retval && (state->path.active > 1)) {
-			xfs_defer_init(args->trans->t_dfops, args->firstblock);
+			xfs_defer_init(NULL, args->trans->t_dfops,
+				       args->firstblock);
 			error = xfs_da3_join(state);
 			if (error)
 				goto out_defer_cancel;
@@ -1133,7 +1134,7 @@ xfs_attr_node_removename(
 	 * Check to see if the tree needs to be collapsed.
 	 */
 	if (retval && (state->path.active > 1)) {
-		xfs_defer_init(args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
 		error = xfs_da3_join(state);
 		if (error)
 			goto out_defer_cancel;
@@ -1165,7 +1166,8 @@ xfs_attr_node_removename(
 			goto out;
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->trans->t_dfops, args->firstblock);
+			xfs_defer_init(NULL, args->trans->t_dfops,
+				       args->firstblock);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 179259fd1b5e..ab7c2755ad8c 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -480,7 +480,7 @@ xfs_attr_rmtval_set(
 		 * extent and then crash then the block may not contain the
 		 * correct metadata after log recovery occurs.
 		 */
-		xfs_defer_init(args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
 		nmap = 1;
 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
 				  blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
@@ -522,7 +522,7 @@ xfs_attr_rmtval_set(
 
 		ASSERT(blkcnt > 0);
 
-		xfs_defer_init(args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
 		nmap = 1;
 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
 				       blkcnt, &map, &nmap,
@@ -626,7 +626,7 @@ xfs_attr_rmtval_remove(
 	blkcnt = args->rmtblkcnt;
 	done = 0;
 	while (!done) {
-		xfs_defer_init(args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 				    XFS_BMAPI_ATTRFORK, 1, args->firstblock,
 				    &done);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 32d8d87b7582..dfff840c79f9 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1050,8 +1050,7 @@ xfs_bmap_add_attrfork(
 			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(&dfops, &firstblock);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &firstblock);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
@@ -6025,8 +6024,7 @@ xfs_bmap_split_extent(
 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(&dfops, &firstfsb);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &firstfsb);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 560a7d178c1e..6b25a9436829 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -523,12 +523,19 @@ xfs_defer_init_op_type(
 /* Initialize a deferred operation. */
 void
 xfs_defer_init(
+	struct xfs_trans		*tp,
 	struct xfs_defer_ops		*dop,
 	xfs_fsblock_t			*fbp)
 {
+	struct xfs_mount		*mp = NULL;
+
 	memset(dop, 0, sizeof(struct xfs_defer_ops));
 	*fbp = NULLFSBLOCK;
 	INIT_LIST_HEAD(&dop->dop_intake);
 	INIT_LIST_HEAD(&dop->dop_pending);
-	trace_xfs_defer_init(NULL, dop, _RET_IP_);
+	if (tp) {
+		tp->t_dfops = dop;
+		mp = tp->t_mountp;
+	}
+	trace_xfs_defer_init(mp, dop, _RET_IP_);
 }
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index a02b2b748b6d..56eaaac31df5 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -63,7 +63,8 @@ void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
 		struct list_head *h);
 int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop);
 void xfs_defer_cancel(struct xfs_defer_ops *dop);
-void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
+void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop,
+		    xfs_fsblock_t *fbp);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
 int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index df67821fb5f4..8dc380574cd8 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1691,8 +1691,7 @@ xfs_refcount_recover_cow_leftovers(
 		trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec);
 
 		/* Free the orphan record */
-		xfs_defer_init(&dfops, &fsb);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &fsb);
 		agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
 		fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
 		error = xfs_refcount_free_cow_extent(mp, tp->t_dfops, fsb,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 6bea8df348bc..1259e599158d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -972,8 +972,7 @@ xfs_alloc_file_space(
 
 		xfs_trans_ijoin(tp, ip, 0);
 
-		xfs_defer_init(&dfops, &firstfsb);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &firstfsb);
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
 					allocatesize_fsb, alloc_type, &firstfsb,
 					resblks, imapp, &nimaps);
@@ -1043,8 +1042,7 @@ xfs_unmap_extent(
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(&dfops, &firstfsb);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &firstfsb);
 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
 			    done);
 	if (error)
@@ -1347,8 +1345,7 @@ xfs_collapse_file_space(
 			goto out_trans_cancel;
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(&dfops, &first_block);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &first_block);
 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
 				&done, &first_block);
 		if (error)
@@ -1427,8 +1424,7 @@ xfs_insert_file_space(
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-		xfs_defer_init(&dfops, &first_block);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &first_block);
 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
 				&done, stop_fsb, &first_block);
 		if (error)
@@ -1615,7 +1611,7 @@ xfs_swap_extent_rmap(
 
 		/* Unmap the old blocks in the source file. */
 		while (tirec.br_blockcount) {
-			xfs_defer_init(tp->t_dfops, &firstfsb);
+			xfs_defer_init(tp, tp->t_dfops, &firstfsb);
 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
 
 			/* Read extent from the source file */
@@ -1920,8 +1916,7 @@ xfs_swap_extents(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
 		goto out_unlock;
-	xfs_defer_init(&dfops, &firstfsb);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &firstfsb);
 
 	/*
 	 * Lock and join the inodes to the tansaction so that transaction commit
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 1ef38e1df679..c698e7f6f744 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -296,7 +296,7 @@ xfs_dquot_disk_alloc(
 
 	trace_xfs_dqalloc(dqp);
 
-	xfs_defer_init(tp->t_dfops, &firstblock);
+	xfs_defer_init(tp, tp->t_dfops, &firstblock);
 
 	xfs_ilock(quotip, XFS_ILOCK_EXCL);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
@@ -549,8 +549,7 @@ xfs_qm_dqread_alloc(
 			XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
 	if (error)
 		goto err;
-	xfs_defer_init(&dfops, &firstblock);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &firstblock);
 
 	error = xfs_dquot_disk_alloc(&tp, dqp, &bp);
 	if (error)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f456df2e1394..04e17234e5d7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1195,8 +1195,7 @@ xfs_create(
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 	unlock_dp_on_error = true;
 
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 
 	/*
 	 * Reserve disk quota and the inode.
@@ -1451,8 +1450,7 @@ xfs_link(
 			goto error_return;
 	}
 
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 
 	/*
 	 * Handle initial link state of O_TMPFILE inode
@@ -1584,8 +1582,7 @@ xfs_itruncate_extents_flags(
 	ASSERT(first_unmap_block < last_block);
 	unmap_len = last_block - first_unmap_block + 1;
 	while (!done) {
-		xfs_defer_init(&dfops, &first_block);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &first_block);
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
 				    XFS_ITRUNC_MAX_EXTENTS, &first_block,
 				    &done);
@@ -1816,8 +1813,7 @@ xfs_inactive_ifree(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 	error = xfs_ifree(tp, ip);
 	if (error) {
 		/*
@@ -2661,8 +2657,7 @@ xfs_remove(
 	if (error)
 		goto out_trans_cancel;
 
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 	error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block,
 				   resblks);
 	if (error) {
@@ -3026,8 +3021,7 @@ xfs_rename(
 		goto out_trans_cancel;
 	}
 
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 
 	/* RENAME_EXCHANGE is unique from here on. */
 	if (flags & RENAME_EXCHANGE)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0c736c938f52..777c349607b3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -254,8 +254,7 @@ xfs_iomap_write_direct(
 	 * From this point onwards we overwrite the imap pointer that the
 	 * caller gave to us.
 	 */
-	xfs_defer_init(&dfops, &firstfsb);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &firstfsb);
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 				bmapi_flags, &firstfsb, resblks, imap,
@@ -717,8 +716,7 @@ xfs_iomap_write_allocate(
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
 			xfs_trans_ijoin(tp, ip, 0);
 
-			xfs_defer_init(&dfops, &first_block);
-			tp->t_dfops = &dfops;
+			xfs_defer_init(tp, &dfops, &first_block);
 
 			/*
 			 * it is possible that the extents have changed since
@@ -878,8 +876,7 @@ xfs_iomap_write_unwritten(
 		/*
 		 * Modify the unwritten extent state of the buffer.
 		 */
-		xfs_defer_init(&dfops, &firstfsb);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &firstfsb);
 		nimaps = 1;
 		error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 					XFS_BMAPI_CONVERT, &firstfsb, resblks,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b181b5f57a19..940eb30e0271 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4902,7 +4902,7 @@ xlog_recover_process_intents(
 #if defined(DEBUG) || defined(XFS_WARN)
 	last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
 #endif
-	xfs_defer_init(&dfops, &firstfsb);
+	xfs_defer_init(NULL, &dfops, &firstfsb);
 	while (lip != NULL) {
 		/*
 		 * We're done when we see something other than an intent.
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 8ea4ba0e45d8..bef780171962 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -428,8 +428,7 @@ retry:
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
@@ -517,8 +516,7 @@ xfs_reflink_cancel_cow_blocks(
 			if (error)
 				break;
 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
-			xfs_defer_init(&dfops, &firstfsb);
-			(*tpp)->t_dfops = &dfops;
+			xfs_defer_init(*tpp, &dfops, &firstfsb);
 
 			/* Free the CoW orphan record. */
 			error = xfs_refcount_free_cow_extent(ip->i_mount,
@@ -701,8 +699,7 @@ xfs_reflink_end_cow(
 			goto prev_extent;
 
 		/* Unmap the old blocks in the data fork. */
-		xfs_defer_init(&dfops, &firstfsb);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &firstfsb);
 		rlen = del.br_blockcount;
 		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
 				&firstfsb);
@@ -1052,8 +1049,7 @@ xfs_reflink_remap_extent(
 	/* Unmap the old blocks in the data fork. */
 	rlen = unmap_len;
 	while (rlen) {
-		xfs_defer_init(&dfops, &firstfsb);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &firstfsb);
 		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1, &firstfsb);
 		if (error)
 			goto out_defer;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 1c7d1238ff3b..c102b0d26bc1 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -787,8 +787,7 @@ xfs_growfs_rt_alloc(
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(&dfops, &firstblock);
-		tp->t_dfops = &dfops;
+		xfs_defer_init(tp, &dfops, &firstblock);
 		/*
 		 * Allocate blocks to the bitmap file.
 		 */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index a54f095c1409..e50e97308f81 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -246,8 +246,7 @@ xfs_symlink(
 	 * Initialize the bmap freelist prior to calling either
 	 * bmapi or the directory create code.
 	 */
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 
 	/*
 	 * Allocate an inode for the symlink.
@@ -443,8 +442,7 @@ xfs_inactive_symlink_rmt(
 	 * Find the block(s) so we can inval and unmap them.
 	 */
 	done = 0;
-	xfs_defer_init(&dfops, &first_block);
-	tp->t_dfops = &dfops;
+	xfs_defer_init(tp, &dfops, &first_block);
 	nmaps = ARRAY_SIZE(mval);
 	error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
 				mval, &nmaps, 0);
-- 
cgit v1.2.3


From 3ae2d89174e4ba581093320afb48421ca95191d2 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:19 -0700
Subject: xfs: allow null firstblock in xfs_bmapi_write() when tp is null

xfs_bmapi_write() always expects a valid firstblock pointer. It
immediately dereferences the pointer to help determine how to
initialize the bma.minleft field. The remaining accesses are
related to modifying btree format forks, which is only relevant for
!COW fork callers.

The reflink code passes a NULL transaction to xfs_bmapi_write() in a
couple places that do COW fork unwritten conversion. The purpose of
the firstblock field is to track the first block allocation in the
current transaction, so technically firstblock should not be
required for these callers either.

Tweak xfs_bmapi_write() to initialize the bma correctly without
accessing the firstblock pointer if no transaction is provided in
the first place. Update the reflink callers to pass NULL instead of
otherwise unused firstblock references.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 2 +-
 fs/xfs/xfs_reflink.c     | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index dfff840c79f9..0b476a8e751c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4302,7 +4302,7 @@ xfs_bmapi_write(
 
 	XFS_STATS_INC(mp, xs_blk_mapw);
 
-	if (*firstblock == NULLFSBLOCK) {
+	if (!tp || *firstblock == NULLFSBLOCK) {
 		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
 			bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
 		else
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index bef780171962..b1bc2eb54a14 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -314,7 +314,6 @@ xfs_reflink_convert_cow_extent(
 	xfs_fileoff_t			offset_fsb,
 	xfs_filblks_t			count_fsb)
 {
-	xfs_fsblock_t			first_block = NULLFSBLOCK;
 	int				nimaps = 1;
 
 	if (imap->br_state == XFS_EXT_NORM)
@@ -325,8 +324,8 @@ xfs_reflink_convert_cow_extent(
 	if (imap->br_blockcount == 0)
 		return 0;
 	return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
-			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
-			0, imap, &nimaps);
+			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, NULL, 0, imap,
+			&nimaps);
 }
 
 /* Convert all of the unwritten CoW extents in a file's range to real ones. */
@@ -341,7 +340,6 @@ xfs_reflink_convert_cow(
 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
 	xfs_filblks_t		count_fsb = end_fsb - offset_fsb;
 	struct xfs_bmbt_irec	imap;
-	xfs_fsblock_t		first_block = NULLFSBLOCK;
 	int			nimaps = 1, error = 0;
 
 	ASSERT(count != 0);
@@ -349,8 +347,7 @@ xfs_reflink_convert_cow(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT |
-			XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap,
-			&nimaps);
+			XFS_BMAPI_CONVERT_ONLY, NULL, 0, &imap, &nimaps);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
-- 
cgit v1.2.3


From bba59c5e4b38e160c6be25b2f4fe36ebc84f53df Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:20 -0700
Subject: xfs: add firstblock field to xfs_trans

A firstblock var is typically allocated and initialized along with
xfs_defer_ops structures and passed around independent from the
associated transaction. To facilitate combining the two, add an
optional ->t_firstblock field to xfs_trans that can be used in place
of an on-stack variable.

The firstblock value follows the lifetime of the transaction, so
initialize it on allocation and when a transaction rolls.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_trans.c | 2 ++
 fs/xfs/xfs_trans.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 630993387517..de00f79ff698 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -100,6 +100,7 @@ xfs_trans_dup(
 	ntp->t_mountp = tp->t_mountp;
 	INIT_LIST_HEAD(&ntp->t_items);
 	INIT_LIST_HEAD(&ntp->t_busy);
+	ntp->t_firstblock = NULLFSBLOCK;
 
 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
 	ASSERT(tp->t_ticket != NULL);
@@ -273,6 +274,7 @@ xfs_trans_alloc(
 	tp->t_mountp = mp;
 	INIT_LIST_HEAD(&tp->t_items);
 	INIT_LIST_HEAD(&tp->t_busy);
+	tp->t_firstblock = NULLFSBLOCK;
 
 	error = xfs_trans_reserve(tp, resp, blocks, rtextents);
 	if (error) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index d8a695c57103..37fdacc690c7 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -102,6 +102,7 @@ typedef struct xfs_trans {
 	unsigned int		t_blk_res_used;	/* # of resvd blocks used */
 	unsigned int		t_rtx_res;	/* # of rt extents resvd */
 	unsigned int		t_rtx_res_used;	/* # of resvd rt extents used */
+	xfs_fsblock_t		t_firstblock;	/* first block allocated */
 	struct xlog_ticket	*t_ticket;	/* log mgr ticket */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
-- 
cgit v1.2.3


From f16dea54b789aad464ae4f34caea9f8d81052729 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:20 -0700
Subject: xfs: use ->t_firstblock in dir ops

Callers of the xfs_dir_*() functions currently pass an on-stack
firstblock variable. While the dirops infrastructure carries a
pointer to this variable, it never rolls the transaction and so it
is safe to use ->t_firstblock instead.

Fix up the various xfs_dir_*() callers to use ->t_firstblock. Also
remove the unnecessary parameter for xfs_cross_rename().

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode.c   | 42 +++++++++++++++++++-----------------------
 fs/xfs/xfs_symlink.c |  9 ++++-----
 2 files changed, 23 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 04e17234e5d7..6a3fe2d3df6c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1143,7 +1143,6 @@ xfs_create(
 	struct xfs_trans	*tp = NULL;
 	int			error;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		first_block;
 	bool                    unlock_dp_on_error = false;
 	prid_t			prid;
 	struct xfs_dquot	*udqp = NULL;
@@ -1195,7 +1194,7 @@ xfs_create(
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 	unlock_dp_on_error = true;
 
-	xfs_defer_init(tp, &dfops, &first_block);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	/*
 	 * Reserve disk quota and the inode.
@@ -1224,7 +1223,7 @@ xfs_create(
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = false;
 
-	error = xfs_dir_createname(tp, dp, name, ip->i_ino, &first_block,
+	error = xfs_dir_createname(tp, dp, name, ip->i_ino, &tp->t_firstblock,
 				   resblks ?
 					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
 	if (error) {
@@ -1401,7 +1400,6 @@ xfs_link(
 	xfs_trans_t		*tp;
 	int			error;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t           first_block;
 	int			resblks;
 
 	trace_xfs_link(tdp, target_name);
@@ -1450,7 +1448,7 @@ xfs_link(
 			goto error_return;
 	}
 
-	xfs_defer_init(tp, &dfops, &first_block);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	/*
 	 * Handle initial link state of O_TMPFILE inode
@@ -1462,7 +1460,7 @@ xfs_link(
 	}
 
 	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
-				   &first_block, resblks);
+				   &tp->t_firstblock, resblks);
 	if (error)
 		goto error_return;
 	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2577,7 +2575,6 @@ xfs_remove(
 	int			is_dir = S_ISDIR(VFS_I(ip)->i_mode);
 	int                     error = 0;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t           first_block;
 	uint			resblks;
 
 	trace_xfs_remove(dp, name);
@@ -2657,8 +2654,8 @@ xfs_remove(
 	if (error)
 		goto out_trans_cancel;
 
-	xfs_defer_init(tp, &dfops, &first_block);
-	error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block,
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	error = xfs_dir_removename(tp, dp, name, ip->i_ino, &tp->t_firstblock,
 				   resblks);
 	if (error) {
 		ASSERT(error != -ENOENT);
@@ -2783,7 +2780,6 @@ xfs_cross_rename(
 	struct xfs_inode	*dp2,
 	struct xfs_name		*name2,
 	struct xfs_inode	*ip2,
-	xfs_fsblock_t		*first_block,
 	int			spaceres)
 {
 	int		error = 0;
@@ -2792,13 +2788,13 @@ xfs_cross_rename(
 	int		dp2_flags = 0;
 
 	/* Swap inode number for dirent in first parent */
-	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, first_block,
+	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, &tp->t_firstblock,
 				spaceres);
 	if (error)
 		goto out_trans_abort;
 
 	/* Swap inode number for dirent in second parent */
-	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, first_block,
+	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, &tp->t_firstblock,
 				spaceres);
 	if (error)
 		goto out_trans_abort;
@@ -2813,7 +2809,7 @@ xfs_cross_rename(
 
 		if (S_ISDIR(VFS_I(ip2)->i_mode)) {
 			error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
-						dp1->i_ino, first_block,
+						dp1->i_ino, &tp->t_firstblock,
 						spaceres);
 			if (error)
 				goto out_trans_abort;
@@ -2840,7 +2836,7 @@ xfs_cross_rename(
 
 		if (S_ISDIR(VFS_I(ip1)->i_mode)) {
 			error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
-						dp2->i_ino, first_block,
+						dp2->i_ino, &tp->t_firstblock,
 						spaceres);
 			if (error)
 				goto out_trans_abort;
@@ -2939,7 +2935,6 @@ xfs_rename(
 	struct xfs_mount	*mp = src_dp->i_mount;
 	struct xfs_trans	*tp;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		first_block;
 	struct xfs_inode	*wip = NULL;		/* whiteout inode */
 	struct xfs_inode	*inodes[__XFS_SORT_INODES];
 	int			num_inodes = __XFS_SORT_INODES;
@@ -3021,13 +3016,13 @@ xfs_rename(
 		goto out_trans_cancel;
 	}
 
-	xfs_defer_init(tp, &dfops, &first_block);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	/* RENAME_EXCHANGE is unique from here on. */
 	if (flags & RENAME_EXCHANGE)
 		return xfs_cross_rename(tp, src_dp, src_name, src_ip,
 					target_dp, target_name, target_ip,
-					&first_block, spaceres);
+					spaceres);
 
 	/*
 	 * Set up the target.
@@ -3048,8 +3043,8 @@ xfs_rename(
 		 * to account for the ".." reference from the new entry.
 		 */
 		error = xfs_dir_createname(tp, target_dp, target_name,
-						src_ip->i_ino, &first_block,
-						spaceres);
+					   src_ip->i_ino, &tp->t_firstblock,
+					   spaceres);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -3088,7 +3083,8 @@ xfs_rename(
 		 * name at the destination directory, remove it first.
 		 */
 		error = xfs_dir_replace(tp, target_dp, target_name,
-					src_ip->i_ino, &first_block, spaceres);
+					src_ip->i_ino, &tp->t_firstblock,
+					spaceres);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -3122,7 +3118,7 @@ xfs_rename(
 		 * directory.
 		 */
 		error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
-					target_dp->i_ino, &first_block,
+					target_dp->i_ino, &tp->t_firstblock,
 					spaceres);
 		ASSERT(error != -EEXIST);
 		if (error)
@@ -3162,10 +3158,10 @@ xfs_rename(
 	 */
 	if (wip) {
 		error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
-					&first_block, spaceres);
+					&tp->t_firstblock, spaceres);
 	} else
 		error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
-					   &first_block, spaceres);
+					   &tp->t_firstblock, spaceres);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index e50e97308f81..8ddc7f1147dc 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -164,7 +164,6 @@ xfs_symlink(
 	int			error = 0;
 	int			pathlen;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		first_block;
 	bool                    unlock_dp_on_error = false;
 	xfs_fileoff_t		first_fsb;
 	xfs_filblks_t		fs_blocks;
@@ -246,7 +245,7 @@ xfs_symlink(
 	 * Initialize the bmap freelist prior to calling either
 	 * bmapi or the directory create code.
 	 */
-	xfs_defer_init(tp, &dfops, &first_block);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	/*
 	 * Allocate an inode for the symlink.
@@ -289,8 +288,8 @@ xfs_symlink(
 		nmaps = XFS_SYMLINK_MAPS;
 
 		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
-				  XFS_BMAPI_METADATA, &first_block, resblks,
-				  mval, &nmaps);
+				  XFS_BMAPI_METADATA, &tp->t_firstblock,
+				  resblks, mval, &nmaps);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -338,7 +337,7 @@ xfs_symlink(
 	 * Create the directory entry for the symlink.
 	 */
 	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
-				   &first_block, resblks);
+				   &tp->t_firstblock, resblks);
 	if (error)
 		goto out_bmap_cancel;
 	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-- 
cgit v1.2.3


From 381eee69f862d38bef468e91517e37fc53f60885 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:21 -0700
Subject: xfs: remove firstblock param from xfs dir ops

All callers of the xfs_dir_*() functions pass ->t_firstblock as the
firstblock parameter. Drop the parameter and access ->t_firstblock
directly.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_dir2.c | 12 ++++--------
 fs/xfs/libxfs/xfs_dir2.h |  6 +++---
 fs/xfs/xfs_inode.c       | 32 ++++++++++++--------------------
 fs/xfs/xfs_symlink.c     |  3 +--
 4 files changed, 20 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 781dc63d305d..a3983e3eb64a 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -243,7 +243,6 @@ xfs_dir_createname(
 	struct xfs_inode	*dp,
 	struct xfs_name		*name,
 	xfs_ino_t		inum,		/* new entry inode number */
-	xfs_fsblock_t		*first,		/* bmap's firstblock */
 	xfs_extlen_t		total)		/* bmap's total block count */
 {
 	struct xfs_da_args	*args;
@@ -251,7 +250,6 @@ xfs_dir_createname(
 	int			v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
-	ASSERT(tp->t_dfops || !first);
 
 	if (inum) {
 		rval = xfs_dir_ino_validate(tp->t_mountp, inum);
@@ -274,7 +272,7 @@ xfs_dir_createname(
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
-	args->firstblock = first;
+	args->firstblock = &tp->t_firstblock;
 	args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 	if (!inum)
 		args->op_flags |= XFS_DA_OP_JUSTCHECK;
@@ -420,7 +418,6 @@ xfs_dir_removename(
 	struct xfs_inode	*dp,
 	struct xfs_name		*name,
 	xfs_ino_t		ino,
-	xfs_fsblock_t		*first,		/* bmap's firstblock */
 	xfs_extlen_t		total)		/* bmap's total block count */
 {
 	struct xfs_da_args	*args;
@@ -442,7 +439,7 @@ xfs_dir_removename(
 	args->hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args->inumber = ino;
 	args->dp = dp;
-	args->firstblock = first;
+	args->firstblock = &tp->t_firstblock;
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
@@ -481,7 +478,6 @@ xfs_dir_replace(
 	struct xfs_inode	*dp,
 	struct xfs_name		*name,		/* name of entry to replace */
 	xfs_ino_t		inum,		/* new inode number */
-	xfs_fsblock_t		*first,		/* bmap's firstblock */
 	xfs_extlen_t		total)		/* bmap's total block count */
 {
 	struct xfs_da_args	*args;
@@ -506,7 +502,7 @@ xfs_dir_replace(
 	args->hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args->inumber = inum;
 	args->dp = dp;
-	args->firstblock = first;
+	args->firstblock = &tp->t_firstblock;
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
@@ -545,7 +541,7 @@ xfs_dir_canenter(
 	xfs_inode_t	*dp,
 	struct xfs_name	*name)		/* name of entry to add */
 {
-	return xfs_dir_createname(tp, dp, name, 0, NULL, 0);
+	return xfs_dir_createname(tp, dp, name, 0, 0);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index f203aebc07ed..ba5acd03de94 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -118,16 +118,16 @@ extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_inode *pdp);
 extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t inum,
-				xfs_fsblock_t *first, xfs_extlen_t tot);
+				xfs_extlen_t tot);
 extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t *inum,
 				struct xfs_name *ci_name);
 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t ino,
-				xfs_fsblock_t *first, xfs_extlen_t tot);
+				xfs_extlen_t tot);
 extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name, xfs_ino_t inum,
-				xfs_fsblock_t *first, xfs_extlen_t tot);
+				xfs_extlen_t tot);
 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
 				struct xfs_name *name);
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6a3fe2d3df6c..ab1fd696500c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1223,7 +1223,7 @@ xfs_create(
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = false;
 
-	error = xfs_dir_createname(tp, dp, name, ip->i_ino, &tp->t_firstblock,
+	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
 				   resblks ?
 					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
 	if (error) {
@@ -1460,7 +1460,7 @@ xfs_link(
 	}
 
 	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
-				   &tp->t_firstblock, resblks);
+				   resblks);
 	if (error)
 		goto error_return;
 	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2655,8 +2655,7 @@ xfs_remove(
 		goto out_trans_cancel;
 
 	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
-	error = xfs_dir_removename(tp, dp, name, ip->i_ino, &tp->t_firstblock,
-				   resblks);
+	error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
 	if (error) {
 		ASSERT(error != -ENOENT);
 		goto out_bmap_cancel;
@@ -2788,14 +2787,12 @@ xfs_cross_rename(
 	int		dp2_flags = 0;
 
 	/* Swap inode number for dirent in first parent */
-	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, &tp->t_firstblock,
-				spaceres);
+	error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres);
 	if (error)
 		goto out_trans_abort;
 
 	/* Swap inode number for dirent in second parent */
-	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, &tp->t_firstblock,
-				spaceres);
+	error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres);
 	if (error)
 		goto out_trans_abort;
 
@@ -2809,8 +2806,7 @@ xfs_cross_rename(
 
 		if (S_ISDIR(VFS_I(ip2)->i_mode)) {
 			error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
-						dp1->i_ino, &tp->t_firstblock,
-						spaceres);
+						dp1->i_ino, spaceres);
 			if (error)
 				goto out_trans_abort;
 
@@ -2836,8 +2832,7 @@ xfs_cross_rename(
 
 		if (S_ISDIR(VFS_I(ip1)->i_mode)) {
 			error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
-						dp2->i_ino, &tp->t_firstblock,
-						spaceres);
+						dp2->i_ino, spaceres);
 			if (error)
 				goto out_trans_abort;
 
@@ -3043,8 +3038,7 @@ xfs_rename(
 		 * to account for the ".." reference from the new entry.
 		 */
 		error = xfs_dir_createname(tp, target_dp, target_name,
-					   src_ip->i_ino, &tp->t_firstblock,
-					   spaceres);
+					   src_ip->i_ino, spaceres);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -3083,8 +3077,7 @@ xfs_rename(
 		 * name at the destination directory, remove it first.
 		 */
 		error = xfs_dir_replace(tp, target_dp, target_name,
-					src_ip->i_ino, &tp->t_firstblock,
-					spaceres);
+					src_ip->i_ino, spaceres);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -3118,8 +3111,7 @@ xfs_rename(
 		 * directory.
 		 */
 		error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
-					target_dp->i_ino, &tp->t_firstblock,
-					spaceres);
+					target_dp->i_ino, spaceres);
 		ASSERT(error != -EEXIST);
 		if (error)
 			goto out_bmap_cancel;
@@ -3158,10 +3150,10 @@ xfs_rename(
 	 */
 	if (wip) {
 		error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
-					&tp->t_firstblock, spaceres);
+					spaceres);
 	} else
 		error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
-					   &tp->t_firstblock, spaceres);
+					   spaceres);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 8ddc7f1147dc..583ca83353f7 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -336,8 +336,7 @@ xfs_symlink(
 	/*
 	 * Create the directory entry for the symlink.
 	 */
-	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
-				   &tp->t_firstblock, resblks);
+	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks);
 	if (error)
 		goto out_bmap_cancel;
 	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-- 
cgit v1.2.3


From 825d75cd8c1b53883dd8c2fe1d8833c371b08074 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:21 -0700
Subject: xfs: use ->t_firstblock in attrfork add

Note that this codepath is a user of struct xfs_da_args. Switch it
over to ->t_firstblock in preparation to remove
xfs_da_args.firstblock.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 0b476a8e751c..da73c1a011d3 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -915,7 +915,6 @@ STATIC int					/* error */
 xfs_bmap_add_attrfork_btree(
 	xfs_trans_t		*tp,		/* transaction pointer */
 	xfs_inode_t		*ip,		/* incore inode pointer */
-	xfs_fsblock_t		*firstblock,	/* first block allocated */
 	int			*flags)		/* inode logging flags */
 {
 	xfs_btree_cur_t		*cur;		/* btree cursor */
@@ -928,7 +927,7 @@ xfs_bmap_add_attrfork_btree(
 		*flags |= XFS_ILOG_DBROOT;
 	else {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
-		cur->bc_private.b.firstblock = *firstblock;
+		cur->bc_private.b.firstblock = tp->t_firstblock;
 		error = xfs_bmbt_lookup_first(cur, &stat);
 		if (error)
 			goto error0;
@@ -940,7 +939,7 @@ xfs_bmap_add_attrfork_btree(
 			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 			return -ENOSPC;
 		}
-		*firstblock = cur->bc_private.b.firstblock;
+		tp->t_firstblock = cur->bc_private.b.firstblock;
 		cur->bc_private.b.allocated = 0;
 		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 	}
@@ -957,7 +956,6 @@ STATIC int					/* error */
 xfs_bmap_add_attrfork_extents(
 	struct xfs_trans	*tp,		/* transaction pointer */
 	struct xfs_inode	*ip,		/* incore inode pointer */
-	xfs_fsblock_t		*firstblock,	/* first block allocated */
 	int			*flags)		/* inode logging flags */
 {
 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
@@ -966,8 +964,8 @@ xfs_bmap_add_attrfork_extents(
 	if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
 		return 0;
 	cur = NULL;
-	error = xfs_bmap_extents_to_btree(tp, ip, firstblock, &cur, 0, flags,
-					  XFS_DATA_FORK);
+	error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock, &cur, 0,
+					  flags, XFS_DATA_FORK);
 	if (cur) {
 		cur->bc_private.b.allocated = 0;
 		xfs_btree_del_cursor(cur,
@@ -989,12 +987,11 @@ xfs_bmap_add_attrfork_extents(
  */
 STATIC int					/* error */
 xfs_bmap_add_attrfork_local(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_inode_t		*ip,		/* incore inode pointer */
-	xfs_fsblock_t		*firstblock,	/* first block allocated */
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_inode	*ip,		/* incore inode pointer */
 	int			*flags)		/* inode logging flags */
 {
-	xfs_da_args_t		dargs;		/* args for dir/attr code */
+	struct xfs_da_args	dargs;		/* args for dir/attr code */
 
 	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
 		return 0;
@@ -1003,7 +1000,7 @@ xfs_bmap_add_attrfork_local(
 		memset(&dargs, 0, sizeof(dargs));
 		dargs.geo = ip->i_mount->m_dir_geo;
 		dargs.dp = ip;
-		dargs.firstblock = firstblock;
+		dargs.firstblock = &tp->t_firstblock;
 		dargs.total = dargs.geo->fsbcount;
 		dargs.whichfork = XFS_DATA_FORK;
 		dargs.trans = tp;
@@ -1011,7 +1008,7 @@ xfs_bmap_add_attrfork_local(
 	}
 
 	if (S_ISLNK(VFS_I(ip)->i_mode))
-		return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
+		return xfs_bmap_local_to_extents(tp, ip, &tp->t_firstblock, 1,
 						 flags, XFS_DATA_FORK,
 						 xfs_symlink_local_to_remote);
 
@@ -1030,7 +1027,6 @@ xfs_bmap_add_attrfork(
 	int			size,		/* space new attribute needs */
 	int			rsvd)		/* xact may use reserved blks */
 {
-	xfs_fsblock_t		firstblock;	/* 1st block/ag allocated */
 	struct xfs_defer_ops	dfops;		/* freed extent records */
 	xfs_mount_t		*mp;		/* mount structure */
 	xfs_trans_t		*tp;		/* transaction pointer */
@@ -1050,7 +1046,7 @@ xfs_bmap_add_attrfork(
 			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(tp, &dfops, &firstblock);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
@@ -1100,16 +1096,13 @@ xfs_bmap_add_attrfork(
 	logflags = 0;
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_LOCAL:
-		error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock,
-						    &logflags);
+		error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
-		error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
-						      &logflags);
+		error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
 		break;
 	case XFS_DINODE_FMT_BTREE:
-		error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock,
-						    &logflags);
+		error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
 		break;
 	default:
 		error = 0;
-- 
cgit v1.2.3


From 766139032f95bb41031f6de9c2ee0538bd035229 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:22 -0700
Subject: xfs: use ->t_firstblock in xattr ops

Similar to the dirops code, the xattr code uses an on-stack
firstblock variable for the various operations. This code rolls the
underlying transaction in various places, however, which means we
cannot simply replace the local firstblock vars with ->t_firstblock.
Doing so (without further changes) would invalidate the memory
pointed to by xfs_da_args.firstblock as soon as the first
transaction rolls.

To avoid this problem, remove xfs_da_args.firstblock and replace all
such accesses with ->t_firstblock at the same time. This ensures
that accesses to the current firstblock always occur through the
current transaction rather than a potentially invalid xfs_da_args
pointer.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 37 ++++++++++++++++++-------------------
 fs/xfs/libxfs/xfs_attr_leaf.c   |  2 --
 fs/xfs/libxfs/xfs_attr_remote.c | 18 +++++++++++-------
 fs/xfs/libxfs/xfs_bmap.c        |  1 -
 fs/xfs/libxfs/xfs_da_btree.c    |  7 +++----
 fs/xfs/libxfs/xfs_da_btree.h    |  1 -
 fs/xfs/libxfs/xfs_dir2.c        |  5 +----
 7 files changed, 33 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 8a7e2c0308c4..153d2e29f872 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -204,7 +204,6 @@ xfs_attr_set(
 	struct xfs_da_args	args;
 	struct xfs_defer_ops	dfops;
 	struct xfs_trans_res	tres;
-	xfs_fsblock_t		firstblock;
 	int			rsvd = (flags & ATTR_ROOT) != 0;
 	int			error, err2, local;
 
@@ -219,7 +218,6 @@ xfs_attr_set(
 
 	args.value = value;
 	args.valuelen = valuelen;
-	args.firstblock = &firstblock;
 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 	args.total = xfs_attr_calc_size(&args, &local);
 
@@ -253,7 +251,7 @@ xfs_attr_set(
 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(args.trans, &dfops, &firstblock);
+	xfs_defer_init(args.trans, &dfops, &args.trans->t_firstblock);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
@@ -392,7 +390,6 @@ xfs_attr_remove(
 	struct xfs_mount	*mp = dp->i_mount;
 	struct xfs_da_args	args;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		firstblock;
 	int			error;
 
 	XFS_STATS_INC(mp, xs_attr_remove);
@@ -404,8 +401,6 @@ xfs_attr_remove(
 	if (error)
 		return error;
 
-	args.firstblock = &firstblock;
-
 	/*
 	 * we have no control over the attribute names that userspace passes us
 	 * to remove, so we have to allow the name lookup prior to attribute
@@ -427,7 +422,7 @@ xfs_attr_remove(
 			&args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(args.trans, &dfops, &firstblock);
+	xfs_defer_init(args.trans, &dfops, &args.trans->t_firstblock);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	/*
@@ -598,7 +593,8 @@ xfs_attr_leaf_addname(
 		 * Commit that transaction so that the node_addname() call
 		 * can manage its own transactions.
 		 */
-		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops,
+			       &args->trans->t_firstblock);
 		error = xfs_attr3_leaf_to_node(args);
 		if (error)
 			goto out_defer_cancel;
@@ -687,8 +683,8 @@ xfs_attr_leaf_addname(
 		 * If the result is small enough, shrink it all into the inode.
 		 */
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(NULL, args->trans->t_dfops,
-				       args->firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops,
+				       &args->trans->t_firstblock);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
@@ -753,7 +749,8 @@ xfs_attr_leaf_removename(
 	 * If the result is small enough, shrink it all into the inode.
 	 */
 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops,
+			       &args->trans->t_firstblock);
 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
@@ -882,8 +879,8 @@ restart:
 			 */
 			xfs_da_state_free(state);
 			state = NULL;
-			xfs_defer_init(NULL, args->trans->t_dfops,
-				       args->firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops,
+				       &args->trans->t_firstblock);
 			error = xfs_attr3_leaf_to_node(args);
 			if (error)
 				goto out_defer_cancel;
@@ -910,7 +907,8 @@ restart:
 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
 		 */
-		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops,
+			       &args->trans->t_firstblock);
 		error = xfs_da3_split(state);
 		if (error)
 			goto out_defer_cancel;
@@ -1008,8 +1006,8 @@ restart:
 		 * Check to see if the tree needs to be collapsed.
 		 */
 		if (retval && (state->path.active > 1)) {
-			xfs_defer_init(NULL, args->trans->t_dfops,
-				       args->firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops,
+				       &args->trans->t_firstblock);
 			error = xfs_da3_join(state);
 			if (error)
 				goto out_defer_cancel;
@@ -1134,7 +1132,8 @@ xfs_attr_node_removename(
 	 * Check to see if the tree needs to be collapsed.
 	 */
 	if (retval && (state->path.active > 1)) {
-		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops,
+			       &args->trans->t_firstblock);
 		error = xfs_da3_join(state);
 		if (error)
 			goto out_defer_cancel;
@@ -1166,8 +1165,8 @@ xfs_attr_node_removename(
 			goto out;
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(NULL, args->trans->t_dfops,
-				       args->firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops,
+				       &args->trans->t_firstblock);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index c131469db0f1..251304f3bc5d 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -802,7 +802,6 @@ xfs_attr_shortform_to_leaf(
 	memset((char *)&nargs, 0, sizeof(nargs));
 	nargs.dp = dp;
 	nargs.geo = args->geo;
-	nargs.firstblock = args->firstblock;
 	nargs.total = args->total;
 	nargs.whichfork = XFS_ATTR_FORK;
 	nargs.trans = args->trans;
@@ -1005,7 +1004,6 @@ xfs_attr3_leaf_to_shortform(
 	memset((char *)&nargs, 0, sizeof(nargs));
 	nargs.geo = args->geo;
 	nargs.dp = dp;
-	nargs.firstblock = args->firstblock;
 	nargs.total = args->total;
 	nargs.whichfork = XFS_ATTR_FORK;
 	nargs.trans = args->trans;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index ab7c2755ad8c..205098aeb4bc 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -480,11 +480,13 @@ xfs_attr_rmtval_set(
 		 * extent and then crash then the block may not contain the
 		 * correct metadata after log recovery occurs.
 		 */
-		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops,
+			       &args->trans->t_firstblock);
 		nmap = 1;
 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
-				  blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
-				  args->total, &map, &nmap);
+				  blkcnt, XFS_BMAPI_ATTRFORK,
+				  &args->trans->t_firstblock, args->total, &map,
+				  &nmap);
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, dp);
@@ -522,7 +524,8 @@ xfs_attr_rmtval_set(
 
 		ASSERT(blkcnt > 0);
 
-		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops,
+			       &args->trans->t_firstblock);
 		nmap = 1;
 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
 				       blkcnt, &map, &nmap,
@@ -626,10 +629,11 @@ xfs_attr_rmtval_remove(
 	blkcnt = args->rmtblkcnt;
 	done = 0;
 	while (!done) {
-		xfs_defer_init(NULL, args->trans->t_dfops, args->firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops,
+			       &args->trans->t_firstblock);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
-				    XFS_BMAPI_ATTRFORK, 1, args->firstblock,
-				    &done);
+				    XFS_BMAPI_ATTRFORK, 1,
+				    &args->trans->t_firstblock, &done);
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, args->dp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index da73c1a011d3..6f9b2cddb933 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1000,7 +1000,6 @@ xfs_bmap_add_attrfork_local(
 		memset(&dargs, 0, sizeof(dargs));
 		dargs.geo = ip->i_mount->m_dir_geo;
 		dargs.dp = ip;
-		dargs.firstblock = &tp->t_firstblock;
 		dargs.total = dargs.geo->fsbcount;
 		dargs.whichfork = XFS_DATA_FORK;
 		dargs.trans = tp;
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 68a72e3d9f53..2f2be86c10dc 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2059,10 +2059,9 @@ xfs_da_grow_inode_int(
 	 * Try mapping it in one filesystem block.
 	 */
 	nmap = 1;
-	ASSERT(args->firstblock != NULL);
 	error = xfs_bmapi_write(tp, dp, *bno, count,
 			xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
-			args->firstblock, args->total, &map, &nmap);
+			&tp->t_firstblock, args->total, &map, &nmap);
 	if (error)
 		return error;
 
@@ -2084,7 +2083,7 @@ xfs_da_grow_inode_int(
 			c = (int)(*bno + count - b);
 			error = xfs_bmapi_write(tp, dp, b, c,
 					xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
-					args->firstblock, args->total,
+					&tp->t_firstblock, args->total,
 					&mapp[mapi], &nmap);
 			if (error)
 				goto out_free_map;
@@ -2394,7 +2393,7 @@ xfs_da_shrink_inode(
 		 * the last block to the place we want to kill.
 		 */
 		error = xfs_bunmapi(tp, dp, dead_blkno, count,
-				    xfs_bmapi_aflag(w), 0, args->firstblock,
+				    xfs_bmapi_aflag(w), 0, &tp->t_firstblock,
 				    &done);
 		if (error == -ENOSPC) {
 			if (w != XFS_DATA_FORK)
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 6b8a04f3f162..59e290ef334f 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -57,7 +57,6 @@ typedef struct xfs_da_args {
 	xfs_dahash_t	hashval;	/* hash value of name */
 	xfs_ino_t	inumber;	/* input/output inode number */
 	struct xfs_inode *dp;		/* directory inode to manipulate */
-	xfs_fsblock_t	*firstblock;	/* ptr to firstblock for bmap calls */
 	struct xfs_trans *trans;	/* current trans (changes over time) */
 	xfs_extlen_t	total;		/* total blocks needed, for 1st bmap */
 	int		whichfork;	/* data or attribute fork */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index a3983e3eb64a..5db73d96b99e 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -272,7 +272,6 @@ xfs_dir_createname(
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
-	args->firstblock = &tp->t_firstblock;
 	args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 	if (!inum)
 		args->op_flags |= XFS_DA_OP_JUSTCHECK;
@@ -439,7 +438,6 @@ xfs_dir_removename(
 	args->hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args->inumber = ino;
 	args->dp = dp;
-	args->firstblock = &tp->t_firstblock;
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
@@ -502,7 +500,6 @@ xfs_dir_replace(
 	args->hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args->inumber = inum;
 	args->dp = dp;
-	args->firstblock = &tp->t_firstblock;
 	args->total = total;
 	args->whichfork = XFS_DATA_FORK;
 	args->trans = tp;
@@ -660,7 +657,7 @@ xfs_dir2_shrink_inode(
 
 	/* Unmap the fsblock(s). */
 	error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
-			    args->firstblock, &done);
+			    &tp->t_firstblock, &done);
 	if (error) {
 		/*
 		 * ENOSPC actually can happen if we're in a removename with no
-- 
cgit v1.2.3


From 650919f13182e8deeeeaeb580570afb0cdf8bd0d Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:23 -0700
Subject: xfs: use ->t_firstblock for all xfs_bmapi_write() callers

Convert all xfs_bmapi_write() users to ->t_firstblock.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c |  8 ++++----
 fs/xfs/xfs_dquot.c     |  5 ++---
 fs/xfs/xfs_iomap.c     | 20 +++++++++-----------
 fs/xfs/xfs_reflink.c   |  7 +++----
 fs/xfs/xfs_rtalloc.c   |  5 ++---
 5 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 1259e599158d..dd563f4a82e8 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -873,7 +873,6 @@ xfs_alloc_file_space(
 	xfs_filblks_t		allocatesize_fsb;
 	xfs_extlen_t		extsz, temp;
 	xfs_fileoff_t		startoffset_fsb;
-	xfs_fsblock_t		firstfsb;
 	int			nimaps;
 	int			quota_flag;
 	int			rt;
@@ -972,10 +971,11 @@ xfs_alloc_file_space(
 
 		xfs_trans_ijoin(tp, ip, 0);
 
-		xfs_defer_init(tp, &dfops, &firstfsb);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
-					allocatesize_fsb, alloc_type, &firstfsb,
-					resblks, imapp, &nimaps);
+					allocatesize_fsb, alloc_type,
+					&tp->t_firstblock, resblks, imapp,
+					&nimaps);
 		if (error)
 			goto error0;
 
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index c698e7f6f744..2fc5e21373be 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -290,13 +290,12 @@ xfs_dquot_disk_alloc(
 	struct xfs_mount	*mp = tp->t_mountp;
 	struct xfs_buf		*bp;
 	struct xfs_inode	*quotip = xfs_quota_inode(mp, dqp->dq_flags);
-	xfs_fsblock_t		firstblock;
 	int			nmaps = 1;
 	int			error;
 
 	trace_xfs_dqalloc(dqp);
 
-	xfs_defer_init(tp, tp->t_dfops, &firstblock);
+	xfs_defer_init(tp, tp->t_dfops, &tp->t_firstblock);
 
 	xfs_ilock(quotip, XFS_ILOCK_EXCL);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
@@ -312,7 +311,7 @@ xfs_dquot_disk_alloc(
 	xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
 	error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
 			XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-			&firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
+			&tp->t_firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
 			&map, &nmaps);
 	if (error)
 		goto error0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 777c349607b3..a2b302ba40a8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -152,7 +152,6 @@ xfs_iomap_write_direct(
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	last_fsb;
 	xfs_filblks_t	count_fsb, resaligned;
-	xfs_fsblock_t	firstfsb;
 	xfs_extlen_t	extsz;
 	int		nimaps;
 	int		quota_flag;
@@ -254,10 +253,10 @@ xfs_iomap_write_direct(
 	 * From this point onwards we overwrite the imap pointer that the
 	 * caller gave to us.
 	 */
-	xfs_defer_init(tp, &dfops, &firstfsb);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
-				bmapi_flags, &firstfsb, resblks, imap,
+				bmapi_flags, &tp->t_firstblock, resblks, imap,
 				&nimaps);
 	if (error)
 		goto out_bmap_cancel;
@@ -665,7 +664,6 @@ xfs_iomap_write_allocate(
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb, last_block;
 	xfs_fileoff_t	end_fsb, map_start_fsb;
-	xfs_fsblock_t	first_block;
 	struct xfs_defer_ops	dfops;
 	xfs_filblks_t	count_fsb;
 	xfs_trans_t	*tp;
@@ -716,7 +714,7 @@ xfs_iomap_write_allocate(
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
 			xfs_trans_ijoin(tp, ip, 0);
 
-			xfs_defer_init(tp, &dfops, &first_block);
+			xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 			/*
 			 * it is possible that the extents have changed since
@@ -770,8 +768,9 @@ xfs_iomap_write_allocate(
 			 * pointer that the caller gave to us.
 			 */
 			error = xfs_bmapi_write(tp, ip, map_start_fsb,
-						count_fsb, flags, &first_block,
-						nres, imap, &nimaps);
+						count_fsb, flags,
+						&tp->t_firstblock, nres, imap,
+						&nimaps);
 			if (error)
 				goto trans_cancel;
 
@@ -827,7 +826,6 @@ xfs_iomap_write_unwritten(
 	xfs_fileoff_t	offset_fsb;
 	xfs_filblks_t	count_fsb;
 	xfs_filblks_t	numblks_fsb;
-	xfs_fsblock_t	firstfsb;
 	int		nimaps;
 	xfs_trans_t	*tp;
 	xfs_bmbt_irec_t imap;
@@ -876,11 +874,11 @@ xfs_iomap_write_unwritten(
 		/*
 		 * Modify the unwritten extent state of the buffer.
 		 */
-		xfs_defer_init(tp, &dfops, &firstfsb);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		nimaps = 1;
 		error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
-					XFS_BMAPI_CONVERT, &firstfsb, resblks,
-					&imap, &nimaps);
+					XFS_BMAPI_CONVERT, &tp->t_firstblock,
+					resblks, &imap, &nimaps);
 		if (error)
 			goto error_on_bmapi_transaction;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index b1bc2eb54a14..d0397622be9f 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -366,7 +366,6 @@ xfs_reflink_allocate_cow(
 	struct xfs_bmbt_irec	got;
 	struct xfs_defer_ops	dfops;
 	struct xfs_trans	*tp = NULL;
-	xfs_fsblock_t		first_block;
 	int			nimaps, error = 0;
 	bool			trimmed;
 	xfs_filblks_t		resaligned;
@@ -425,13 +424,13 @@ retry:
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops, &first_block);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
 	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
-			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
-			resblks, imap, &nimaps);
+			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
+			&tp->t_firstblock, resblks, imap, &nimaps);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c102b0d26bc1..1c894ea2abca 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -761,7 +761,6 @@ xfs_growfs_rt_alloc(
 	struct xfs_buf		*bp;	/* temporary buffer for zeroing */
 	xfs_daddr_t		d;		/* disk block address */
 	int			error;		/* error return value */
-	xfs_fsblock_t		firstblock;/* first block allocated in xaction */
 	struct xfs_defer_ops	dfops;		/* list of freed blocks */
 	xfs_fsblock_t		fsbno;		/* filesystem block for bno */
 	struct xfs_bmbt_irec	map;		/* block map output */
@@ -787,13 +786,13 @@ xfs_growfs_rt_alloc(
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(tp, &dfops, &firstblock);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		/*
 		 * Allocate blocks to the bitmap file.
 		 */
 		nmap = 1;
 		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
-					XFS_BMAPI_METADATA, &firstblock,
+					XFS_BMAPI_METADATA, &tp->t_firstblock,
 					resblks, &map, &nmap);
 		if (!error && nmap < 1)
 			error = -ENOSPC;
-- 
cgit v1.2.3


From 372837978d90d1c563315192196735c09623a5d6 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:23 -0700
Subject: xfs: use ->t_firstblock for all xfs_bunmapi() callers

Convert all xfs_bunmapi() callers to ->t_firstblock.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 12 ++----------
 fs/xfs/xfs_bmap_util.c   |  7 +++----
 fs/xfs/xfs_inode.c       |  5 ++---
 fs/xfs/xfs_reflink.c     | 11 +++++------
 fs/xfs/xfs_symlink.c     |  6 +++---
 5 files changed, 15 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6f9b2cddb933..572c8d0c40db 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -6132,17 +6132,9 @@ xfs_bmap_finish_one(
 	xfs_filblks_t			*blockcount,
 	xfs_exntst_t			state)
 {
-	xfs_fsblock_t			firstfsb;
 	int				error = 0;
 
-	/*
-	 * firstfsb is tied to the transaction lifetime and is used to
-	 * ensure correct AG locking order and schedule work item
-	 * continuations.  XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us
-	 * to only making one bmap call per transaction, so it should
-	 * be safe to have it as a local variable here.
-	 */
-	firstfsb = NULLFSBLOCK;
+	ASSERT(tp->t_firstblock == NULLFSBLOCK);
 
 	trace_xfs_bmap_deferred(tp->t_mountp,
 			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
@@ -6165,7 +6157,7 @@ xfs_bmap_finish_one(
 		break;
 	case XFS_BMAP_UNMAP:
 		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
-				XFS_BMAPI_REMAP, 1, &firstfsb);
+				XFS_BMAPI_REMAP, 1, &tp->t_firstblock);
 		break;
 	default:
 		ASSERT(0);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index dd563f4a82e8..0b2b52854061 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1024,7 +1024,6 @@ xfs_unmap_extent(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		firstfsb;
 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
 	int			error;
 
@@ -1042,9 +1041,9 @@ xfs_unmap_extent(
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops, &firstfsb);
-	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
-			    done);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2,
+			    &tp->t_firstblock, done);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ab1fd696500c..3cdfd795a50c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1543,7 +1543,6 @@ xfs_itruncate_extents_flags(
 	struct xfs_trans	*tp = *tpp;
 	struct xfs_defer_ops	*odfops = tp->t_dfops;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		first_block;
 	xfs_fileoff_t		first_unmap_block;
 	xfs_fileoff_t		last_block;
 	xfs_filblks_t		unmap_len;
@@ -1580,9 +1579,9 @@ xfs_itruncate_extents_flags(
 	ASSERT(first_unmap_block < last_block);
 	unmap_len = last_block - first_unmap_block + 1;
 	while (!done) {
-		xfs_defer_init(tp, &dfops, &first_block);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
-				    XFS_ITRUNC_MAX_EXTENTS, &first_block,
+				    XFS_ITRUNC_MAX_EXTENTS, &tp->t_firstblock,
 				    &done);
 		if (error)
 			goto out_bmap_cancel;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index d0397622be9f..83c02f6b1d02 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -627,7 +627,6 @@ xfs_reflink_end_cow(
 	struct xfs_trans		*tp;
 	xfs_fileoff_t			offset_fsb;
 	xfs_fileoff_t			end_fsb;
-	xfs_fsblock_t			firstfsb;
 	struct xfs_defer_ops		dfops;
 	int				error;
 	unsigned int			resblks;
@@ -695,10 +694,10 @@ xfs_reflink_end_cow(
 			goto prev_extent;
 
 		/* Unmap the old blocks in the data fork. */
-		xfs_defer_init(tp, &dfops, &firstfsb);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		rlen = del.br_blockcount;
 		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
-				&firstfsb);
+				&tp->t_firstblock);
 		if (error)
 			goto out_defer;
 
@@ -1002,7 +1001,6 @@ xfs_reflink_remap_extent(
 	struct xfs_mount	*mp = ip->i_mount;
 	bool			real_extent = xfs_bmap_is_real_extent(irec);
 	struct xfs_trans	*tp;
-	xfs_fsblock_t		firstfsb;
 	unsigned int		resblks;
 	struct xfs_defer_ops	dfops;
 	struct xfs_bmbt_irec	uirec;
@@ -1045,8 +1043,9 @@ xfs_reflink_remap_extent(
 	/* Unmap the old blocks in the data fork. */
 	rlen = unmap_len;
 	while (rlen) {
-		xfs_defer_init(tp, &dfops, &firstfsb);
-		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1, &firstfsb);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1,
+				      &tp->t_firstblock);
 		if (error)
 			goto out_defer;
 
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 583ca83353f7..18d9b4d301e5 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -400,7 +400,6 @@ xfs_inactive_symlink_rmt(
 	xfs_buf_t	*bp;
 	int		done;
 	int		error;
-	xfs_fsblock_t	first_block;
 	struct xfs_defer_ops	dfops;
 	int		i;
 	xfs_mount_t	*mp;
@@ -440,7 +439,7 @@ xfs_inactive_symlink_rmt(
 	 * Find the block(s) so we can inval and unmap them.
 	 */
 	done = 0;
-	xfs_defer_init(tp, &dfops, &first_block);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 	nmaps = ARRAY_SIZE(mval);
 	error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
 				mval, &nmaps, 0);
@@ -462,7 +461,8 @@ xfs_inactive_symlink_rmt(
 	/*
 	 * Unmap the dead block(s) to the dfops.
 	 */
-	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &first_block, &done);
+	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &tp->t_firstblock,
+			    &done);
 	if (error)
 		goto error_bmap_cancel;
 	ASSERT(done);
-- 
cgit v1.2.3


From 580c4ff9484ac3395ad48b1118b269a6d68c9318 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:24 -0700
Subject: xfs: use ->t_firstblock in xfs_bmapi_remap()

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 572c8d0c40db..e37e1319d733 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4501,7 +4501,6 @@ xfs_bmapi_remap(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_ifork	*ifp;
 	struct xfs_btree_cur	*cur = NULL;
-	xfs_fsblock_t		firstblock = NULLFSBLOCK;
 	struct xfs_bmbt_irec	got;
 	struct xfs_iext_cursor	icur;
 	int			whichfork = xfs_bmapi_whichfork(flags);
@@ -4544,7 +4543,7 @@ xfs_bmapi_remap(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = firstblock;
+		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -4557,7 +4556,7 @@ xfs_bmapi_remap(
 		got.br_state = XFS_EXT_NORM;
 
 	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
-			&cur, &got, &firstblock, &logflags, flags);
+			&cur, &got, &tp->t_firstblock, &logflags, flags);
 	if (error)
 		goto error0;
 
-- 
cgit v1.2.3


From d0a9d795729945fc7eea77387af7780a5a0ec4c5 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:24 -0700
Subject: xfs: use ->t_firstblock in insert/collapse range

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0b2b52854061..d98f6e3065db 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1311,7 +1311,6 @@ xfs_collapse_file_space(
 	struct xfs_trans	*tp;
 	int			error;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		first_block;
 	xfs_fileoff_t		next_fsb = XFS_B_TO_FSB(mp, offset + len);
 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
@@ -1344,9 +1343,9 @@ xfs_collapse_file_space(
 			goto out_trans_cancel;
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(tp, &dfops, &first_block);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
-				&done, &first_block);
+				&done, &tp->t_firstblock);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -1387,7 +1386,6 @@ xfs_insert_file_space(
 	struct xfs_trans	*tp;
 	int			error;
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		first_block;
 	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, offset);
 	xfs_fileoff_t		next_fsb = NULLFSBLOCK;
 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
@@ -1423,9 +1421,9 @@ xfs_insert_file_space(
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-		xfs_defer_init(tp, &dfops, &first_block);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
-				&done, stop_fsb, &first_block);
+				&done, stop_fsb, &tp->t_firstblock);
 		if (error)
 			goto out_bmap_cancel;
 
-- 
cgit v1.2.3


From a7beabeae221db2118a51f6948239d63b84499ca Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:25 -0700
Subject: xfs: remove xfs_bmapi_write() firstblock param

All callers pass ->t_firstblock from the current transaction.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_remote.c |  3 +--
 fs/xfs/libxfs/xfs_bmap.c        | 18 +++++-------------
 fs/xfs/libxfs/xfs_bmap.h        |  3 +--
 fs/xfs/libxfs/xfs_da_btree.c    |  5 ++---
 fs/xfs/xfs_bmap_util.c          |  5 ++---
 fs/xfs/xfs_dquot.c              |  3 +--
 fs/xfs/xfs_iomap.c              | 10 ++++------
 fs/xfs/xfs_reflink.c            |  6 +++---
 fs/xfs/xfs_rtalloc.c            |  4 ++--
 fs/xfs/xfs_symlink.c            |  3 +--
 10 files changed, 22 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 205098aeb4bc..2db9ef186e05 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -484,8 +484,7 @@ xfs_attr_rmtval_set(
 			       &args->trans->t_firstblock);
 		nmap = 1;
 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
-				  blkcnt, XFS_BMAPI_ATTRFORK,
-				  &args->trans->t_firstblock, args->total, &map,
+				  blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
 				  &nmap);
 		if (error)
 			goto out_defer_cancel;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index e37e1319d733..ca30e972288a 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4212,12 +4212,6 @@ xfs_bmapi_convert_unwritten(
  * extent state if necessary.  Details behaviour is controlled by the flags
  * parameter.  Only allocates blocks from a single allocation group, to avoid
  * locking problems.
- *
- * The returned value in "firstblock" from the first call in a transaction
- * must be remembered and presented to subsequent calls in "firstblock".
- * An upper bound for the number of blocks to be allocated is supplied to
- * the first call in "total"; if no allocation group has that many free
- * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
  */
 int
 xfs_bmapi_write(
@@ -4226,8 +4220,6 @@ xfs_bmapi_write(
 	xfs_fileoff_t		bno,		/* starting file offs. mapped */
 	xfs_filblks_t		len,		/* length to map in file */
 	int			flags,		/* XFS_BMAPI_... */
-	xfs_fsblock_t		*firstblock,	/* first allocated block
-						   controls a.g. for allocs */
 	xfs_extlen_t		total,		/* total blocks needed */
 	struct xfs_bmbt_irec	*mval,		/* output: map values */
 	int			*nmap)		/* i/o: mval size/count */
@@ -4294,7 +4286,7 @@ xfs_bmapi_write(
 
 	XFS_STATS_INC(mp, xs_blk_mapw);
 
-	if (!tp || *firstblock == NULLFSBLOCK) {
+	if (!tp || tp->t_firstblock == NULLFSBLOCK) {
 		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
 			bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
 		else
@@ -4321,7 +4313,7 @@ xfs_bmapi_write(
 	bma.ip = ip;
 	bma.total = total;
 	bma.datatype = 0;
-	bma.firstblock = firstblock;
+	bma.firstblock = &tp->t_firstblock;
 	ASSERT(!tp || tp->t_dfops);
 
 	while (bno < end && n < *nmap) {
@@ -4474,11 +4466,11 @@ error0:
 
 	if (bma.cur) {
 		if (!error) {
-			ASSERT(*firstblock == NULLFSBLOCK ||
-			       XFS_FSB_TO_AGNO(mp, *firstblock) <=
+			ASSERT(tp->t_firstblock == NULLFSBLOCK ||
+			       XFS_FSB_TO_AGNO(mp, tp->t_firstblock) <=
 			       XFS_FSB_TO_AGNO(mp,
 				       bma.cur->bc_private.b.firstblock));
-			*firstblock = bma.cur->bc_private.b.firstblock;
+			tp->t_firstblock = bma.cur->bc_private.b.firstblock;
 		}
 		xfs_btree_del_cursor(bma.cur,
 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 83180c7cf3ee..88c2b5dc499e 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -201,8 +201,7 @@ int	xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
 		int *nmap, int flags);
 int	xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t len, int flags,
-		xfs_fsblock_t *firstblock, xfs_extlen_t total,
-		struct xfs_bmbt_irec *mval, int *nmap);
+		xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap);
 int	__xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t *rlen, int flags,
 		xfs_extnum_t nexts, xfs_fsblock_t *firstblock);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 2f2be86c10dc..fe4a192696ae 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2061,7 +2061,7 @@ xfs_da_grow_inode_int(
 	nmap = 1;
 	error = xfs_bmapi_write(tp, dp, *bno, count,
 			xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
-			&tp->t_firstblock, args->total, &map, &nmap);
+			args->total, &map, &nmap);
 	if (error)
 		return error;
 
@@ -2083,8 +2083,7 @@ xfs_da_grow_inode_int(
 			c = (int)(*bno + count - b);
 			error = xfs_bmapi_write(tp, dp, b, c,
 					xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
-					&tp->t_firstblock, args->total,
-					&mapp[mapi], &nmap);
+					args->total, &mapp[mapi], &nmap);
 			if (error)
 				goto out_free_map;
 			if (nmap < 1)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d98f6e3065db..4f4b1d3fb898 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -973,9 +973,8 @@ xfs_alloc_file_space(
 
 		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
-					allocatesize_fsb, alloc_type,
-					&tp->t_firstblock, resblks, imapp,
-					&nimaps);
+					allocatesize_fsb, alloc_type, resblks,
+					imapp, &nimaps);
 		if (error)
 			goto error0;
 
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 2fc5e21373be..84359eeb20f4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -311,8 +311,7 @@ xfs_dquot_disk_alloc(
 	xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
 	error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
 			XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-			&tp->t_firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
-			&map, &nmaps);
+			XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps);
 	if (error)
 		goto error0;
 	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a2b302ba40a8..0ae822538a63 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -256,8 +256,7 @@ xfs_iomap_write_direct(
 	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
-				bmapi_flags, &tp->t_firstblock, resblks, imap,
-				&nimaps);
+				bmapi_flags, resblks, imap, &nimaps);
 	if (error)
 		goto out_bmap_cancel;
 
@@ -768,8 +767,7 @@ xfs_iomap_write_allocate(
 			 * pointer that the caller gave to us.
 			 */
 			error = xfs_bmapi_write(tp, ip, map_start_fsb,
-						count_fsb, flags,
-						&tp->t_firstblock, nres, imap,
+						count_fsb, flags, nres, imap,
 						&nimaps);
 			if (error)
 				goto trans_cancel;
@@ -877,8 +875,8 @@ xfs_iomap_write_unwritten(
 		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		nimaps = 1;
 		error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
-					XFS_BMAPI_CONVERT, &tp->t_firstblock,
-					resblks, &imap, &nimaps);
+					XFS_BMAPI_CONVERT, resblks, &imap,
+					&nimaps);
 		if (error)
 			goto error_on_bmapi_transaction;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 83c02f6b1d02..7010f3453c29 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -324,7 +324,7 @@ xfs_reflink_convert_cow_extent(
 	if (imap->br_blockcount == 0)
 		return 0;
 	return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
-			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, NULL, 0, imap,
+			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, 0, imap,
 			&nimaps);
 }
 
@@ -347,7 +347,7 @@ xfs_reflink_convert_cow(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT |
-			XFS_BMAPI_CONVERT_ONLY, NULL, 0, &imap, &nimaps);
+			XFS_BMAPI_CONVERT_ONLY, 0, &imap, &nimaps);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
@@ -430,7 +430,7 @@ retry:
 	/* Allocate the entire reservation as unwritten blocks. */
 	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
-			&tp->t_firstblock, resblks, imap, &nimaps);
+			resblks, imap, &nimaps);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 1c894ea2abca..edd949376a51 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -792,8 +792,8 @@ xfs_growfs_rt_alloc(
 		 */
 		nmap = 1;
 		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
-					XFS_BMAPI_METADATA, &tp->t_firstblock,
-					resblks, &map, &nmap);
+					XFS_BMAPI_METADATA, resblks, &map,
+					&nmap);
 		if (!error && nmap < 1)
 			error = -ENOSPC;
 		if (error)
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 18d9b4d301e5..94301b63525f 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -288,8 +288,7 @@ xfs_symlink(
 		nmaps = XFS_SYMLINK_MAPS;
 
 		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
-				  XFS_BMAPI_METADATA, &tp->t_firstblock,
-				  resblks, mval, &nmaps);
+				  XFS_BMAPI_METADATA, resblks, mval, &nmaps);
 		if (error)
 			goto out_bmap_cancel;
 
-- 
cgit v1.2.3


From 2af528425342dc8f696b28693c5e61587cd72b43 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:25 -0700
Subject: xfs: remove xfs_bunmapi() firstblock param

All callers pass ->t_firstblock from the current transaction.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_remote.c |  3 +--
 fs/xfs/libxfs/xfs_bmap.c        | 25 ++++++++++++-------------
 fs/xfs/libxfs/xfs_bmap.h        |  5 ++---
 fs/xfs/libxfs/xfs_da_btree.c    |  3 +--
 fs/xfs/libxfs/xfs_dir2.c        |  3 +--
 fs/xfs/xfs_bmap_util.c          |  3 +--
 fs/xfs/xfs_inode.c              |  3 +--
 fs/xfs/xfs_reflink.c            |  6 ++----
 fs/xfs/xfs_symlink.c            |  3 +--
 9 files changed, 22 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 2db9ef186e05..f02c705965ff 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -631,8 +631,7 @@ xfs_attr_rmtval_remove(
 		xfs_defer_init(args->trans, args->trans->t_dfops,
 			       &args->trans->t_firstblock);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
-				    XFS_BMAPI_ATTRFORK, 1,
-				    &args->trans->t_firstblock, &done);
+				    XFS_BMAPI_ATTRFORK, 1, &done);
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, args->dp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index ca30e972288a..619d3adc5923 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5122,9 +5122,7 @@ __xfs_bunmapi(
 	xfs_fileoff_t		start,		/* first file offset deleted */
 	xfs_filblks_t		*rlen,		/* i/o: amount remaining */
 	int			flags,		/* misc flags */
-	xfs_extnum_t		nexts,		/* number of extents max */
-	xfs_fsblock_t		*firstblock)	/* first allocated block
-						   controls a.g. for allocs */
+	xfs_extnum_t		nexts)		/* number of extents max */
 {
 	struct xfs_btree_cur	*cur;		/* bmap btree cursor */
 	struct xfs_bmbt_irec	del;		/* extent being deleted */
@@ -5198,7 +5196,7 @@ __xfs_bunmapi(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = *firstblock;
+		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	} else
 		cur = NULL;
@@ -5314,7 +5312,7 @@ __xfs_bunmapi(
 			del.br_state = XFS_EXT_UNWRITTEN;
 			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
 					whichfork, &icur, &cur, &del,
-					firstblock, &logflags);
+					&tp->t_firstblock, &logflags);
 			if (error)
 				goto error0;
 			goto nodelete;
@@ -5371,7 +5369,8 @@ __xfs_bunmapi(
 				prev.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent_unwritten_real(tp,
 						ip, whichfork, &icur, &cur,
-						&prev, firstblock, &logflags);
+						&prev, &tp->t_firstblock,
+						&logflags);
 				if (error)
 					goto error0;
 				goto nodelete;
@@ -5380,7 +5379,8 @@ __xfs_bunmapi(
 				del.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent_unwritten_real(tp,
 						ip, whichfork, &icur, &cur,
-						&del, firstblock, &logflags);
+						&del, &tp->t_firstblock,
+						&logflags);
 				if (error)
 					goto error0;
 				goto nodelete;
@@ -5427,8 +5427,8 @@ nodelete:
 	 */
 	if (xfs_bmap_needs_btree(ip, whichfork)) {
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, firstblock, &cur, 0,
-				&tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
+				&cur, 0, &tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 		if (error)
 			goto error0;
@@ -5467,7 +5467,7 @@ error0:
 		xfs_trans_log_inode(tp, ip, logflags);
 	if (cur) {
 		if (!error) {
-			*firstblock = cur->bc_private.b.firstblock;
+			tp->t_firstblock = cur->bc_private.b.firstblock;
 			cur->bc_private.b.allocated = 0;
 		}
 		xfs_btree_del_cursor(cur,
@@ -5485,12 +5485,11 @@ xfs_bunmapi(
 	xfs_filblks_t		len,
 	int			flags,
 	xfs_extnum_t		nexts,
-	xfs_fsblock_t		*firstblock,
 	int			*done)
 {
 	int			error;
 
-	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock);
+	error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
 	*done = (len == 0);
 	return error;
 }
@@ -6148,7 +6147,7 @@ xfs_bmap_finish_one(
 		break;
 	case XFS_BMAP_UNMAP:
 		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
-				XFS_BMAPI_REMAP, 1, &tp->t_firstblock);
+				XFS_BMAPI_REMAP, 1);
 		break;
 	default:
 		ASSERT(0);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 88c2b5dc499e..108a3073d658 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -204,11 +204,10 @@ int	xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap);
 int	__xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t *rlen, int flags,
-		xfs_extnum_t nexts, xfs_fsblock_t *firstblock);
+		xfs_extnum_t nexts);
 int	xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t bno, xfs_filblks_t len, int flags,
-		xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
-		int *done);
+		xfs_extnum_t nexts, int *done);
 int	xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
 		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
 		struct xfs_bmbt_irec *del);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index fe4a192696ae..9efbd2038ffb 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2392,8 +2392,7 @@ xfs_da_shrink_inode(
 		 * the last block to the place we want to kill.
 		 */
 		error = xfs_bunmapi(tp, dp, dead_blkno, count,
-				    xfs_bmapi_aflag(w), 0, &tp->t_firstblock,
-				    &done);
+				    xfs_bmapi_aflag(w), 0, &done);
 		if (error == -ENOSPC) {
 			if (w != XFS_DATA_FORK)
 				break;
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 5db73d96b99e..4ea1fddb126f 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -656,8 +656,7 @@ xfs_dir2_shrink_inode(
 	da = xfs_dir2_db_to_da(args->geo, db);
 
 	/* Unmap the fsblock(s). */
-	error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
-			    &tp->t_firstblock, &done);
+	error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0, &done);
 	if (error) {
 		/*
 		 * ENOSPC actually can happen if we're in a removename with no
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 4f4b1d3fb898..f225707c89be 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1041,8 +1041,7 @@ xfs_unmap_extent(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
-	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2,
-			    &tp->t_firstblock, done);
+	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done);
 	if (error)
 		goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3cdfd795a50c..5e1eaa26435c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1581,8 +1581,7 @@ xfs_itruncate_extents_flags(
 	while (!done) {
 		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
-				    XFS_ITRUNC_MAX_EXTENTS, &tp->t_firstblock,
-				    &done);
+				    XFS_ITRUNC_MAX_EXTENTS, &done);
 		if (error)
 			goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 7010f3453c29..2972efeee5cc 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -696,8 +696,7 @@ xfs_reflink_end_cow(
 		/* Unmap the old blocks in the data fork. */
 		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		rlen = del.br_blockcount;
-		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
-				&tp->t_firstblock);
+		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1);
 		if (error)
 			goto out_defer;
 
@@ -1044,8 +1043,7 @@ xfs_reflink_remap_extent(
 	rlen = unmap_len;
 	while (rlen) {
 		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
-		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1,
-				      &tp->t_firstblock);
+		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1);
 		if (error)
 			goto out_defer;
 
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 94301b63525f..a3dc552a5b97 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -460,8 +460,7 @@ xfs_inactive_symlink_rmt(
 	/*
 	 * Unmap the dead block(s) to the dfops.
 	 */
-	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &tp->t_firstblock,
-			    &done);
+	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &done);
 	if (error)
 		goto error_bmap_cancel;
 	ASSERT(done);
-- 
cgit v1.2.3


From 333f950c89a17018f812eae13daaa2a404c413c1 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:27 -0700
Subject: xfs: remove bmap insert/collapse firstblock param

The only callers pass ->t_firstblock.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 10 ++++------
 fs/xfs/libxfs/xfs_bmap.h |  4 ++--
 fs/xfs/xfs_bmap_util.c   |  4 ++--
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 619d3adc5923..5b4bee4645df 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5652,8 +5652,7 @@ xfs_bmap_collapse_extents(
 	struct xfs_inode	*ip,
 	xfs_fileoff_t		*next_fsb,
 	xfs_fileoff_t		offset_shift_fsb,
-	bool			*done,
-	xfs_fsblock_t		*firstblock)
+	bool			*done)
 {
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_mount	*mp = ip->i_mount;
@@ -5686,7 +5685,7 @@ xfs_bmap_collapse_extents(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = *firstblock;
+		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5773,8 +5772,7 @@ xfs_bmap_insert_extents(
 	xfs_fileoff_t		*next_fsb,
 	xfs_fileoff_t		offset_shift_fsb,
 	bool			*done,
-	xfs_fileoff_t		stop_fsb,
-	xfs_fsblock_t		*firstblock)
+	xfs_fileoff_t		stop_fsb)
 {
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_mount	*mp = ip->i_mount;
@@ -5807,7 +5805,7 @@ xfs_bmap_insert_extents(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = *firstblock;
+		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	}
 
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 108a3073d658..3cad9c3e3bda 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -217,12 +217,12 @@ void	xfs_bmap_del_extent_cow(struct xfs_inode *ip,
 uint	xfs_default_attroffset(struct xfs_inode *ip);
 int	xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
-		bool *done, xfs_fsblock_t *firstblock);
+		bool *done);
 int	xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off,
 		xfs_fileoff_t shift);
 int	xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
-		bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock);
+		bool *done, xfs_fileoff_t stop_fsb);
 int	xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
 int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
 		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f225707c89be..e9907bbe9529 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1343,7 +1343,7 @@ xfs_collapse_file_space(
 
 		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
-				&done, &tp->t_firstblock);
+				&done);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -1421,7 +1421,7 @@ xfs_insert_file_space(
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
-				&done, stop_fsb, &tp->t_firstblock);
+				&done, stop_fsb);
 		if (error)
 			goto out_bmap_cancel;
 
-- 
cgit v1.2.3


From 4b77a088d781b53d263c37e75222439297b410e5 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:27 -0700
Subject: xfs: use ->t_firstblock in bmap extent split

Also remove the unnecessary xfs_bmap_split_extent_at() parameter.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 5b4bee4645df..6e8254599fab 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5878,8 +5878,7 @@ STATIC int
 xfs_bmap_split_extent_at(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
-	xfs_fileoff_t		split_fsb,
-	xfs_fsblock_t		*firstfsb)
+	xfs_fileoff_t		split_fsb)
 {
 	int				whichfork = XFS_DATA_FORK;
 	struct xfs_btree_cur		*cur = NULL;
@@ -5928,7 +5927,7 @@ xfs_bmap_split_extent_at(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = *firstfsb;
+		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 		error = xfs_bmbt_lookup_eq(cur, &got, &i);
 		if (error)
@@ -5972,8 +5971,8 @@ xfs_bmap_split_extent_at(
 		int tmp_logflags; /* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, &cur, 0,
-				&tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
+				&cur, 0, &tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 	}
 
@@ -5997,20 +5996,18 @@ xfs_bmap_split_extent(
 	struct xfs_mount        *mp = ip->i_mount;
 	struct xfs_trans        *tp;
 	struct xfs_defer_ops    dfops;
-	xfs_fsblock_t           firstfsb;
 	int                     error;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(tp, &dfops, &firstfsb);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-	error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
-					 &firstfsb);
+	error = xfs_bmap_split_extent_at(tp, ip, split_fsb);
 	if (error)
 		goto out;
 
-- 
cgit v1.2.3


From 94c07b4dba01481740ce893d05a71578150b8f0b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:28 -0700
Subject: xfs: remove xfs_bmalloca firstblock field

The xfs_bmalloca.firstblock field carries the firstblock value from
the transaction into the bmap infrastructure. It's initialized in
one place from ->t_firstblock, so drop the field and access
->t_firstblock directly throughout the bmap code.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 44 +++++++++++++++++++++++---------------------
 fs/xfs/libxfs/xfs_bmap.h |  1 -
 2 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6e8254599fab..815600d39b03 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1794,7 +1794,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					bma->firstblock, &bma->cur, 1,
+					&bma->tp->t_firstblock, &bma->cur, 1,
 					&tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
@@ -1872,7 +1872,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				bma->firstblock, &bma->cur, 1, &tmp_rval,
+				&bma->tp->t_firstblock, &bma->cur, 1, &tmp_rval,
 				whichfork);
 			rval |= tmp_rval;
 			if (error)
@@ -1953,7 +1953,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					bma->firstblock, &bma->cur, 1,
+					&bma->tp->t_firstblock, &bma->cur, 1,
 					&tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
@@ -1991,7 +1991,7 @@ xfs_bmap_add_extent_delay_real(
 
 		ASSERT(bma->cur == NULL);
 		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				bma->firstblock, &bma->cur, da_old > 0,
+				&bma->tp->t_firstblock, &bma->cur, da_old > 0,
 				&tmp_logflags, whichfork);
 		bma->logflags |= tmp_logflags;
 		if (error)
@@ -3056,10 +3056,11 @@ xfs_bmap_adjacent(
 		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
 
 	mp = ap->ip->i_mount;
-	nullfb = *ap->firstblock == NULLFSBLOCK;
+	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
 	rt = XFS_IS_REALTIME_INODE(ap->ip) &&
 		xfs_alloc_is_userdata(ap->datatype);
-	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
+	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
+							ap->tp->t_firstblock);
 	/*
 	 * If allocating at eof, and there's a previous real block,
 	 * try to use its last block as our starting point.
@@ -3417,8 +3418,9 @@ xfs_bmap_btalloc(
 	}
 
 
-	nullfb = *ap->firstblock == NULLFSBLOCK;
-	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
+	nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
+	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
+							ap->tp->t_firstblock);
 	if (nullfb) {
 		if (xfs_alloc_is_userdata(ap->datatype) &&
 		    xfs_inode_is_filestream(ap->ip)) {
@@ -3429,7 +3431,7 @@ xfs_bmap_btalloc(
 			ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
 		}
 	} else
-		ap->blkno = *ap->firstblock;
+		ap->blkno = ap->tp->t_firstblock;
 
 	xfs_bmap_adjacent(ap);
 
@@ -3440,7 +3442,7 @@ xfs_bmap_btalloc(
 	if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
 		;
 	else
-		ap->blkno = *ap->firstblock;
+		ap->blkno = ap->tp->t_firstblock;
 	/*
 	 * Normal allocation, done through xfs_alloc_vextent.
 	 */
@@ -3453,7 +3455,7 @@ xfs_bmap_btalloc(
 
 	/* Trim the allocation back to the maximum an AG can fit. */
 	args.maxlen = min(ap->length, mp->m_ag_max_usable);
-	args.firstblock = *ap->firstblock;
+	args.firstblock = ap->tp->t_firstblock;
 	blen = 0;
 	if (nullfb) {
 		/*
@@ -3602,13 +3604,13 @@ xfs_bmap_btalloc(
 		 * check the allocation happened at the same or higher AG than
 		 * the first block that was allocated.
 		 */
-		ASSERT(*ap->firstblock == NULLFSBLOCK ||
-		       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
+		ASSERT(ap->tp->t_firstblock == NULLFSBLOCK ||
+		       XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <=
 		       XFS_FSB_TO_AGNO(mp, args.fsbno));
 
 		ap->blkno = args.fsbno;
-		if (*ap->firstblock == NULLFSBLOCK)
-			*ap->firstblock = args.fsbno;
+		if (ap->tp->t_firstblock == NULLFSBLOCK)
+			ap->tp->t_firstblock = args.fsbno;
 		ASSERT(nullfb || fb_agno <= args.agno);
 		ap->length = args.len;
 		/*
@@ -4064,12 +4066,12 @@ xfs_bmapi_allocate(
 		return error;
 
 	if (bma->cur)
-		bma->cur->bc_private.b.firstblock = *bma->firstblock;
+		bma->cur->bc_private.b.firstblock = bma->tp->t_firstblock;
 	if (bma->blkno == NULLFSBLOCK)
 		return 0;
 	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
 		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
-		bma->cur->bc_private.b.firstblock = *bma->firstblock;
+		bma->cur->bc_private.b.firstblock = bma->tp->t_firstblock;
 	}
 	/*
 	 * Bump the number of extents we've allocated
@@ -4105,7 +4107,8 @@ xfs_bmapi_allocate(
 	else
 		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
 				whichfork, &bma->icur, &bma->cur, &bma->got,
-				bma->firstblock, &bma->logflags, bma->flags);
+				&bma->tp->t_firstblock, &bma->logflags,
+				bma->flags);
 
 	bma->logflags |= tmp_logflags;
 	if (error)
@@ -4156,7 +4159,7 @@ xfs_bmapi_convert_unwritten(
 	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
 		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
 					bma->ip, whichfork);
-		bma->cur->bc_private.b.firstblock = *bma->firstblock;
+		bma->cur->bc_private.b.firstblock = bma->tp->t_firstblock;
 	}
 	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
 				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
@@ -4173,7 +4176,7 @@ xfs_bmapi_convert_unwritten(
 	}
 
 	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
-			&bma->icur, &bma->cur, mval, bma->firstblock,
+			&bma->icur, &bma->cur, mval, &bma->tp->t_firstblock,
 			&tmp_logflags);
 	/*
 	 * Log the inode core unconditionally in the unwritten extent conversion
@@ -4313,7 +4316,6 @@ xfs_bmapi_write(
 	bma.ip = ip;
 	bma.total = total;
 	bma.datatype = 0;
-	bma.firstblock = &tp->t_firstblock;
 	ASSERT(!tp || tp->t_dfops);
 
 	while (bno < end && n < *nmap) {
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 3cad9c3e3bda..2e8555c1229a 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -19,7 +19,6 @@ extern kmem_zone_t	*xfs_bmap_free_item_zone;
  * Argument structure for xfs_bmap_alloc.
  */
 struct xfs_bmalloca {
-	xfs_fsblock_t		*firstblock; /* i/o first block allocated */
 	struct xfs_trans	*tp;	/* transaction pointer */
 	struct xfs_inode	*ip;	/* incore inode pointer */
 	struct xfs_bmbt_irec	prev;	/* extent before the new one */
-- 
cgit v1.2.3


From 92f9da30f57bdb653ee46f26df2d51484b27c7f0 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:28 -0700
Subject: xfs: remove bmap extent add helper firstblock params

The add extent helpers all receive firstblock via ->t_firstblock.
Drop the parameter and access it directly.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 815600d39b03..cb2a4cde4c3f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2031,7 +2031,6 @@ xfs_bmap_add_extent_unwritten_real(
 	struct xfs_iext_cursor	*icur,
 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
 	xfs_bmbt_irec_t		*new,	/* new data to add to file extents */
-	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
 	int			*logflagsp) /* inode logging flags */
 {
 	xfs_btree_cur_t		*cur;	/* btree cursor */
@@ -2473,8 +2472,8 @@ xfs_bmap_add_extent_unwritten_real(
 		int	tmp_logflags;	/* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, first, &cur, 0,
-				&tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
+				&cur, 0, &tmp_logflags, whichfork);
 		*logflagsp |= tmp_logflags;
 		if (error)
 			goto done;
@@ -2645,7 +2644,6 @@ xfs_bmap_add_extent_hole_real(
 	struct xfs_iext_cursor	*icur,
 	struct xfs_btree_cur	**curp,
 	struct xfs_bmbt_irec	*new,
-	xfs_fsblock_t		*first,
 	int			*logflagsp,
 	int			flags)
 {
@@ -2837,8 +2835,8 @@ xfs_bmap_add_extent_hole_real(
 		int	tmp_logflags;	/* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, first, curp, 0,
-				&tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
+				curp, 0, &tmp_logflags, whichfork);
 		*logflagsp |= tmp_logflags;
 		cur = *curp;
 		if (error)
@@ -4107,8 +4105,7 @@ xfs_bmapi_allocate(
 	else
 		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
 				whichfork, &bma->icur, &bma->cur, &bma->got,
-				&bma->tp->t_firstblock, &bma->logflags,
-				bma->flags);
+				&bma->logflags, bma->flags);
 
 	bma->logflags |= tmp_logflags;
 	if (error)
@@ -4176,8 +4173,7 @@ xfs_bmapi_convert_unwritten(
 	}
 
 	error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
-			&bma->icur, &bma->cur, mval, &bma->tp->t_firstblock,
-			&tmp_logflags);
+			&bma->icur, &bma->cur, mval, &tmp_logflags);
 	/*
 	 * Log the inode core unconditionally in the unwritten extent conversion
 	 * path because the conversion might not have done so (e.g., if the
@@ -4550,7 +4546,7 @@ xfs_bmapi_remap(
 		got.br_state = XFS_EXT_NORM;
 
 	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
-			&cur, &got, &tp->t_firstblock, &logflags, flags);
+			&cur, &got, &logflags, flags);
 	if (error)
 		goto error0;
 
@@ -5314,7 +5310,7 @@ __xfs_bunmapi(
 			del.br_state = XFS_EXT_UNWRITTEN;
 			error = xfs_bmap_add_extent_unwritten_real(tp, ip,
 					whichfork, &icur, &cur, &del,
-					&tp->t_firstblock, &logflags);
+					&logflags);
 			if (error)
 				goto error0;
 			goto nodelete;
@@ -5371,8 +5367,7 @@ __xfs_bunmapi(
 				prev.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent_unwritten_real(tp,
 						ip, whichfork, &icur, &cur,
-						&prev, &tp->t_firstblock,
-						&logflags);
+						&prev, &logflags);
 				if (error)
 					goto error0;
 				goto nodelete;
@@ -5381,8 +5376,7 @@ __xfs_bunmapi(
 				del.br_state = XFS_EXT_UNWRITTEN;
 				error = xfs_bmap_add_extent_unwritten_real(tp,
 						ip, whichfork, &icur, &cur,
-						&del, &tp->t_firstblock,
-						&logflags);
+						&del, &logflags);
 				if (error)
 					goto error0;
 				goto nodelete;
-- 
cgit v1.2.3


From 280253d213fb735b565532be2836f94cf574260d Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:29 -0700
Subject: xfs: remove bmap format helpers firstblock params

The bmap format helpers receive firstblock via ->t_firstblock. Drop
the param and access it directly.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 63 ++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index cb2a4cde4c3f..183450f1df19 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -646,7 +646,6 @@ STATIC int					/* error */
 xfs_bmap_extents_to_btree(
 	struct xfs_trans	*tp,		/* transaction pointer */
 	struct xfs_inode	*ip,		/* incore inode pointer */
-	xfs_fsblock_t		*firstblock,	/* first-block-allocated */
 	struct xfs_btree_cur	**curp,		/* cursor returned to caller */
 	int			wasdel,		/* converting a delayed alloc */
 	int			*logflagsp,	/* inode logging flags */
@@ -689,7 +688,7 @@ xfs_bmap_extents_to_btree(
 	 * Need a cursor.  Can't allocate until bb_level is filled in.
 	 */
 	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-	cur->bc_private.b.firstblock = *firstblock;
+	cur->bc_private.b.firstblock = tp->t_firstblock;
 	cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
 	/*
 	 * Convert to a btree with two levels, one record in root.
@@ -699,16 +698,16 @@ xfs_bmap_extents_to_btree(
 	args.tp = tp;
 	args.mp = mp;
 	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
-	args.firstblock = *firstblock;
-	if (*firstblock == NULLFSBLOCK) {
+	args.firstblock = tp->t_firstblock;
+	if (tp->t_firstblock == NULLFSBLOCK) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 	} else if (tp->t_dfops->dop_low) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
-		args.fsbno = *firstblock;
+		args.fsbno = tp->t_firstblock;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
-		args.fsbno = *firstblock;
+		args.fsbno = tp->t_firstblock;
 	}
 	args.minlen = args.maxlen = args.prod = 1;
 	args.wasdel = wasdel;
@@ -731,9 +730,9 @@ xfs_bmap_extents_to_btree(
 	/*
 	 * Allocation can't fail, the space was reserved.
 	 */
-	ASSERT(*firstblock == NULLFSBLOCK ||
-	       args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
-	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
+	ASSERT(tp->t_firstblock == NULLFSBLOCK ||
+	       args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
+	tp->t_firstblock = cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
@@ -810,7 +809,6 @@ STATIC int				/* error */
 xfs_bmap_local_to_extents(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_fsblock_t	*firstblock,	/* first block allocated in xaction */
 	xfs_extlen_t	total,		/* total blocks needed by transaction */
 	int		*logflagsp,	/* inode logging flags */
 	int		whichfork,
@@ -848,16 +846,16 @@ xfs_bmap_local_to_extents(
 	args.tp = tp;
 	args.mp = ip->i_mount;
 	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
-	args.firstblock = *firstblock;
+	args.firstblock = tp->t_firstblock;
 	/*
 	 * Allocate a block.  We know we need only one, since the
 	 * file currently fits in an inode.
 	 */
-	if (*firstblock == NULLFSBLOCK) {
+	if (tp->t_firstblock == NULLFSBLOCK) {
 		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
 		args.type = XFS_ALLOCTYPE_START_BNO;
 	} else {
-		args.fsbno = *firstblock;
+		args.fsbno = tp->t_firstblock;
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
 	}
 	args.total = total;
@@ -869,7 +867,7 @@ xfs_bmap_local_to_extents(
 	/* Can't fail, the space was reserved. */
 	ASSERT(args.fsbno != NULLFSBLOCK);
 	ASSERT(args.len == 1);
-	*firstblock = args.fsbno;
+	tp->t_firstblock = args.fsbno;
 	bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
 
 	/*
@@ -964,8 +962,8 @@ xfs_bmap_add_attrfork_extents(
 	if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
 		return 0;
 	cur = NULL;
-	error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock, &cur, 0,
-					  flags, XFS_DATA_FORK);
+	error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
+					  XFS_DATA_FORK);
 	if (cur) {
 		cur->bc_private.b.allocated = 0;
 		xfs_btree_del_cursor(cur,
@@ -1007,8 +1005,8 @@ xfs_bmap_add_attrfork_local(
 	}
 
 	if (S_ISLNK(VFS_I(ip)->i_mode))
-		return xfs_bmap_local_to_extents(tp, ip, &tp->t_firstblock, 1,
-						 flags, XFS_DATA_FORK,
+		return xfs_bmap_local_to_extents(tp, ip, 1, flags,
+						 XFS_DATA_FORK,
 						 xfs_symlink_local_to_remote);
 
 	/* should only be called for types that support local format data */
@@ -1794,8 +1792,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					&bma->tp->t_firstblock, &bma->cur, 1,
-					&tmp_rval, whichfork);
+					&bma->cur, 1, &tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
 				goto done;
@@ -1872,8 +1869,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				&bma->tp->t_firstblock, &bma->cur, 1, &tmp_rval,
-				whichfork);
+				&bma->cur, 1, &tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
 				goto done;
@@ -1953,8 +1949,7 @@ xfs_bmap_add_extent_delay_real(
 
 		if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
 			error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-					&bma->tp->t_firstblock, &bma->cur, 1,
-					&tmp_rval, whichfork);
+					&bma->cur, 1, &tmp_rval, whichfork);
 			rval |= tmp_rval;
 			if (error)
 				goto done;
@@ -1991,8 +1986,8 @@ xfs_bmap_add_extent_delay_real(
 
 		ASSERT(bma->cur == NULL);
 		error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-				&bma->tp->t_firstblock, &bma->cur, da_old > 0,
-				&tmp_logflags, whichfork);
+				&bma->cur, da_old > 0, &tmp_logflags,
+				whichfork);
 		bma->logflags |= tmp_logflags;
 		if (error)
 			goto done;
@@ -2472,8 +2467,8 @@ xfs_bmap_add_extent_unwritten_real(
 		int	tmp_logflags;	/* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
-				&cur, 0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
+				&tmp_logflags, whichfork);
 		*logflagsp |= tmp_logflags;
 		if (error)
 			goto done;
@@ -2835,8 +2830,8 @@ xfs_bmap_add_extent_hole_real(
 		int	tmp_logflags;	/* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
-				curp, 0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
+				&tmp_logflags, whichfork);
 		*logflagsp |= tmp_logflags;
 		cur = *curp;
 		if (error)
@@ -5423,8 +5418,8 @@ nodelete:
 	 */
 	if (xfs_bmap_needs_btree(ip, whichfork)) {
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
-				&cur, 0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
+				&tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 		if (error)
 			goto error0;
@@ -5967,8 +5962,8 @@ xfs_bmap_split_extent_at(
 		int tmp_logflags; /* partial log flag return val */
 
 		ASSERT(cur == NULL);
-		error = xfs_bmap_extents_to_btree(tp, ip, &tp->t_firstblock,
-				&cur, 0, &tmp_logflags, whichfork);
+		error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
+				&tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 	}
 
-- 
cgit v1.2.3


From cf612de732cb6ef626019ca085406d183f0a055a Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:29 -0700
Subject: xfs: remove xfs_btree_cur private firstblock field

The bmbt cursor private structure has a firstblock field that is
used to maintain locking order on bmbt allocations. The field holds
an actual firstblock value (as opposed to a pointer), so it is
initialized on cursor creation, updated on allocation and then the
value is transferred back to the source before the cursor is
destroyed.

This value is always transferred from and back to the ->t_firstblock
field. Since xfs_btree_cur already carries a reference to the
transaction, we can remove this field from xfs_btree_cur and the
associated copying. The bmbt allocations will update the value in
the transaction directly.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c       | 28 +++-------------------------
 fs/xfs/libxfs/xfs_bmap_btree.c | 10 ++++------
 fs/xfs/libxfs/xfs_btree.h      |  1 -
 3 files changed, 7 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 183450f1df19..8a1e6890a64b 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -688,7 +688,6 @@ xfs_bmap_extents_to_btree(
 	 * Need a cursor.  Can't allocate until bb_level is filled in.
 	 */
 	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-	cur->bc_private.b.firstblock = tp->t_firstblock;
 	cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
 	/*
 	 * Convert to a btree with two levels, one record in root.
@@ -732,7 +731,7 @@ xfs_bmap_extents_to_btree(
 	 */
 	ASSERT(tp->t_firstblock == NULLFSBLOCK ||
 	       args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
-	tp->t_firstblock = cur->bc_private.b.firstblock = args.fsbno;
+	tp->t_firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
@@ -925,7 +924,6 @@ xfs_bmap_add_attrfork_btree(
 		*flags |= XFS_ILOG_DBROOT;
 	else {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
-		cur->bc_private.b.firstblock = tp->t_firstblock;
 		error = xfs_bmbt_lookup_first(cur, &stat);
 		if (error)
 			goto error0;
@@ -937,7 +935,6 @@ xfs_bmap_add_attrfork_btree(
 			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 			return -ENOSPC;
 		}
-		tp->t_firstblock = cur->bc_private.b.firstblock;
 		cur->bc_private.b.allocated = 0;
 		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 	}
@@ -4058,14 +4055,10 @@ xfs_bmapi_allocate(
 	if (error)
 		return error;
 
-	if (bma->cur)
-		bma->cur->bc_private.b.firstblock = bma->tp->t_firstblock;
 	if (bma->blkno == NULLFSBLOCK)
 		return 0;
-	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
+	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
 		bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
-		bma->cur->bc_private.b.firstblock = bma->tp->t_firstblock;
-	}
 	/*
 	 * Bump the number of extents we've allocated
 	 * in this call.
@@ -4151,7 +4144,6 @@ xfs_bmapi_convert_unwritten(
 	if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
 		bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
 					bma->ip, whichfork);
-		bma->cur->bc_private.b.firstblock = bma->tp->t_firstblock;
 	}
 	mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
 				? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
@@ -4458,13 +4450,6 @@ error0:
 		xfs_trans_log_inode(tp, ip, bma.logflags);
 
 	if (bma.cur) {
-		if (!error) {
-			ASSERT(tp->t_firstblock == NULLFSBLOCK ||
-			       XFS_FSB_TO_AGNO(mp, tp->t_firstblock) <=
-			       XFS_FSB_TO_AGNO(mp,
-				       bma.cur->bc_private.b.firstblock));
-			tp->t_firstblock = bma.cur->bc_private.b.firstblock;
-		}
 		xfs_btree_del_cursor(bma.cur,
 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
 	}
@@ -4528,7 +4513,6 @@ xfs_bmapi_remap(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5189,7 +5173,6 @@ __xfs_bunmapi(
 	if (ifp->if_flags & XFS_IFBROOT) {
 		ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	} else
 		cur = NULL;
@@ -5457,10 +5440,8 @@ error0:
 	if (logflags)
 		xfs_trans_log_inode(tp, ip, logflags);
 	if (cur) {
-		if (!error) {
-			tp->t_firstblock = cur->bc_private.b.firstblock;
+		if (!error)
 			cur->bc_private.b.allocated = 0;
-		}
 		xfs_btree_del_cursor(cur,
 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
 	}
@@ -5676,7 +5657,6 @@ xfs_bmap_collapse_extents(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5796,7 +5776,6 @@ xfs_bmap_insert_extents(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 	}
 
@@ -5918,7 +5897,6 @@ xfs_bmap_split_extent_at(
 
 	if (ifp->if_flags & XFS_IFBROOT) {
 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-		cur->bc_private.b.firstblock = tp->t_firstblock;
 		cur->bc_private.b.flags = 0;
 		error = xfs_bmbt_lookup_eq(cur, &got, &i);
 		if (error)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index e8b01af09db5..8a9b98b11e34 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -175,7 +175,6 @@ xfs_bmbt_dup_cursor(
 	 * Copy the firstblock, dfops, and flags values,
 	 * since init cursor doesn't get them.
 	 */
-	new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
 	new->bc_private.b.flags = cur->bc_private.b.flags;
 
 	return new;
@@ -186,11 +185,11 @@ xfs_bmbt_update_cursor(
 	struct xfs_btree_cur	*src,
 	struct xfs_btree_cur	*dst)
 {
-	ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
+	ASSERT((dst->bc_tp->t_firstblock != NULLFSBLOCK) ||
 	       (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
 
 	dst->bc_private.b.allocated += src->bc_private.b.allocated;
-	dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
+	dst->bc_tp->t_firstblock = src->bc_tp->t_firstblock;
 
 	src->bc_private.b.allocated = 0;
 }
@@ -208,7 +207,7 @@ xfs_bmbt_alloc_block(
 	memset(&args, 0, sizeof(args));
 	args.tp = cur->bc_tp;
 	args.mp = cur->bc_mp;
-	args.fsbno = cur->bc_private.b.firstblock;
+	args.fsbno = cur->bc_tp->t_firstblock;
 	args.firstblock = args.fsbno;
 	xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
 			cur->bc_private.b.whichfork);
@@ -263,7 +262,7 @@ xfs_bmbt_alloc_block(
 	}
 
 	ASSERT(args.len == 1);
-	cur->bc_private.b.firstblock = args.fsbno;
+	cur->bc_tp->t_firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	cur->bc_private.b.ip->i_d.di_nblocks++;
 	xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
@@ -562,7 +561,6 @@ xfs_bmbt_init_cursor(
 
 	cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
 	cur->bc_private.b.ip = ip;
-	cur->bc_private.b.firstblock = NULLFSBLOCK;
 	cur->bc_private.b.allocated = 0;
 	cur->bc_private.b.flags = 0;
 	cur->bc_private.b.whichfork = whichfork;
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b986a8fc8d40..503615f4d729 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -214,7 +214,6 @@ typedef struct xfs_btree_cur
 		} a;
 		struct {			/* needed for BMAP */
 			struct xfs_inode *ip;	/* pointer to our inode */
-			xfs_fsblock_t	firstblock;	/* 1st blk allocated */
 			int		allocated;	/* count of alloced */
 			short		forksize;	/* fork's inode space */
 			char		whichfork;	/* data or attr fork */
-- 
cgit v1.2.3


From 64396ff2c25b2cd8156948a64ae0da5ff962e3f2 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:30 -0700
Subject: xfs: remove xfs_alloc_arg firstblock field

The xfs_alloc_arg.firstblock field is used to control the starting
agno for an allocation. The structure already carries a pointer to
the transaction, which carries the current firstblock value.

Remove the field and access ->t_firstblock directly in the
allocation code.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c          | 20 ++++++++++----------
 fs/xfs/libxfs/xfs_alloc.h          |  1 -
 fs/xfs/libxfs/xfs_bmap.c           |  3 ---
 fs/xfs/libxfs/xfs_bmap_btree.c     |  1 -
 fs/xfs/libxfs/xfs_refcount_btree.c |  1 -
 fs/xfs/xfs_trace.h                 |  2 +-
 6 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 5b1607d76fe9..bd6d8aeea825 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2783,16 +2783,16 @@ xfs_alloc_read_agf(
  */
 int				/* error */
 xfs_alloc_vextent(
-	xfs_alloc_arg_t	*args)	/* allocation argument structure */
+	struct xfs_alloc_arg	*args)	/* allocation argument structure */
 {
-	xfs_agblock_t	agsize;	/* allocation group size */
-	int		error;
-	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
-	xfs_mount_t	*mp;	/* mount structure pointer */
-	xfs_agnumber_t	sagno;	/* starting allocation group number */
-	xfs_alloctype_t	type;	/* input allocation type */
-	int		bump_rotor = 0;
-	xfs_agnumber_t	rotorstep = xfs_rotorstep; /* inode32 agf stepper */
+	xfs_agblock_t		agsize;	/* allocation group size */
+	int			error;
+	int			flags;	/* XFS_ALLOC_FLAG_... locking flags */
+	struct xfs_mount	*mp;	/* mount structure pointer */
+	xfs_agnumber_t		sagno;	/* starting allocation group number */
+	xfs_alloctype_t		type;	/* input allocation type */
+	int			bump_rotor = 0;
+	xfs_agnumber_t		rotorstep = xfs_rotorstep; /* inode32 agf stepper */
 
 	mp = args->mp;
 	type = args->otype = args->type;
@@ -2913,7 +2913,7 @@ xfs_alloc_vextent(
 			* locking of AGF, which might cause deadlock.
 			*/
 			if (++(args->agno) == mp->m_sb.sb_agcount) {
-				if (args->firstblock != NULLFSBLOCK)
+				if (args->tp->t_firstblock != NULLFSBLOCK)
 					args->agno = sagno;
 				else
 					args->agno = 0;
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index e716c993ac4c..00cd5ec4cb6b 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -74,7 +74,6 @@ typedef struct xfs_alloc_arg {
 	int		datatype;	/* mask defining data type treatment */
 	char		wasdel;		/* set if allocation was prev delayed */
 	char		wasfromfl;	/* set if allocation is from freelist */
-	xfs_fsblock_t	firstblock;	/* io first block allocated */
 	struct xfs_owner_info	oinfo;	/* owner of blocks being allocated */
 	enum xfs_ag_resv_type	resv;	/* block reservation to use */
 } xfs_alloc_arg_t;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 8a1e6890a64b..12be9ad888c3 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -697,7 +697,6 @@ xfs_bmap_extents_to_btree(
 	args.tp = tp;
 	args.mp = mp;
 	xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
-	args.firstblock = tp->t_firstblock;
 	if (tp->t_firstblock == NULLFSBLOCK) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
@@ -845,7 +844,6 @@ xfs_bmap_local_to_extents(
 	args.tp = tp;
 	args.mp = ip->i_mount;
 	xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
-	args.firstblock = tp->t_firstblock;
 	/*
 	 * Allocate a block.  We know we need only one, since the
 	 * file currently fits in an inode.
@@ -3445,7 +3443,6 @@ xfs_bmap_btalloc(
 
 	/* Trim the allocation back to the maximum an AG can fit. */
 	args.maxlen = min(ap->length, mp->m_ag_max_usable);
-	args.firstblock = ap->tp->t_firstblock;
 	blen = 0;
 	if (nullfb) {
 		/*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 8a9b98b11e34..628ed82ca286 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -208,7 +208,6 @@ xfs_bmbt_alloc_block(
 	args.tp = cur->bc_tp;
 	args.mp = cur->bc_mp;
 	args.fsbno = cur->bc_tp->t_firstblock;
-	args.firstblock = args.fsbno;
 	xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
 			cur->bc_private.b.whichfork);
 
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 393aa88f93db..26d2300ed865 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -70,7 +70,6 @@ xfs_refcountbt_alloc_block(
 	args.type = XFS_ALLOCTYPE_NEAR_BNO;
 	args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
 			xfs_refc_block(args.mp));
-	args.firstblock = args.fsbno;
 	xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_REFC);
 	args.minlen = args.maxlen = args.prod = 1;
 	args.resv = XFS_AG_RESV_METADATA;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7f4c7071e7ed..9d741571b61e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1590,7 +1590,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
 		__entry->wasfromfl = args->wasfromfl;
 		__entry->resv = args->resv;
 		__entry->datatype = args->datatype;
-		__entry->firstblock = args->firstblock;
+		__entry->firstblock = args->tp->t_firstblock;
 	),
 	TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
 		  "prod %u minleft %u total %u alignment %u minalignslop %u "
-- 
cgit v1.2.3


From 058529c5f51cd680eddbc6c42f56d490e290dd78 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:30 -0700
Subject: xfs: use ->t_firstblock in dq alloc

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_dquot.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 84359eeb20f4..3b61b4d266b4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -540,14 +540,13 @@ xfs_qm_dqread_alloc(
 	struct xfs_trans	*tp;
 	struct xfs_defer_ops	dfops;
 	struct xfs_buf		*bp;
-	xfs_fsblock_t		firstblock;
 	int			error;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
 			XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
 	if (error)
 		goto err;
-	xfs_defer_init(tp, &dfops, &firstblock);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	error = xfs_dquot_disk_alloc(&tp, dqp, &bp);
 	if (error)
-- 
cgit v1.2.3


From fb91f4b5d6187996796f4a9989decdf6ead12851 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:31 -0700
Subject: xfs: replace no-op firstblock init with ->t_firstblock

xfs_refcount_recover_cow_leftovers() has no need for a firstblock
variable and so passes an unrelated xfs_fsblock_t to
xfs_defer_init() to avoid declaring one. Replace this no-op
initialization with ->t_firstblock. This will be optimized away by
the removal of the xfs_defer_init() firstblock param.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_refcount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 8dc380574cd8..d81c17aac710 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1691,7 +1691,7 @@ xfs_refcount_recover_cow_leftovers(
 		trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec);
 
 		/* Free the orphan record */
-		xfs_defer_init(tp, &dfops, &fsb);
+		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 		agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
 		fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
 		error = xfs_refcount_free_cow_extent(mp, tp->t_dfops, fsb,
-- 
cgit v1.2.3


From 381d592848721cb1b82b4ea9f57b46cf4a4a6973 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:31 -0700
Subject: xfs: use ->t_firstblock in reflink cow block cancel

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_reflink.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 2972efeee5cc..891214242118 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -483,7 +483,6 @@ xfs_reflink_cancel_cow_blocks(
 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	struct xfs_bmbt_irec		got, del;
 	struct xfs_iext_cursor		icur;
-	xfs_fsblock_t			firstfsb;
 	struct xfs_defer_ops		dfops;
 	struct xfs_defer_ops		*odfops = (*tpp)->t_dfops;
 	int				error = 0;
@@ -512,7 +511,7 @@ xfs_reflink_cancel_cow_blocks(
 			if (error)
 				break;
 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
-			xfs_defer_init(*tpp, &dfops, &firstfsb);
+			xfs_defer_init(*tpp, &dfops, &(*tpp)->t_firstblock);
 
 			/* Free the CoW orphan record. */
 			error = xfs_refcount_free_cow_extent(ip->i_mount,
-- 
cgit v1.2.3


From f537538921872c772aa743d8b58b69e7729c73aa Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:32 -0700
Subject: xfs: use ->t_firstblock in extent swap

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index e9907bbe9529..765859843606 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1570,7 +1570,6 @@ xfs_swap_extent_rmap(
 	xfs_fileoff_t			offset_fsb;
 	xfs_fileoff_t			end_fsb;
 	xfs_filblks_t			count_fsb;
-	xfs_fsblock_t			firstfsb;
 	int				error;
 	xfs_filblks_t			ilen;
 	xfs_filblks_t			rlen;
@@ -1606,7 +1605,7 @@ xfs_swap_extent_rmap(
 
 		/* Unmap the old blocks in the source file. */
 		while (tirec.br_blockcount) {
-			xfs_defer_init(tp, tp->t_dfops, &firstfsb);
+			xfs_defer_init(tp, tp->t_dfops, &tp->t_firstblock);
 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
 
 			/* Read extent from the source file */
@@ -1848,7 +1847,6 @@ xfs_swap_extents(
 	struct xfs_ifork	*cowfp;
 	uint64_t		f;
 	int			resblks = 0;
-	xfs_fsblock_t		firstfsb;
 
 	/*
 	 * Lock the inodes against other IO, page faults and truncate to
@@ -1911,7 +1909,7 @@ xfs_swap_extents(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
 		goto out_unlock;
-	xfs_defer_init(tp, &dfops, &firstfsb);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 
 	/*
 	 * Lock and join the inodes to the tansaction so that transaction commit
-- 
cgit v1.2.3


From 9c3bf5da80efc1d502c7ef55ea5b77628e341510 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:32 -0700
Subject: xfs: use ->t_firstblock in inode inactivate

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5e1eaa26435c..48d22134b06f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1772,7 +1772,6 @@ xfs_inactive_ifree(
 	struct xfs_inode *ip)
 {
 	struct xfs_defer_ops	dfops;
-	xfs_fsblock_t		first_block;
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	int			error;
@@ -1809,7 +1808,7 @@ xfs_inactive_ifree(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops, &first_block);
+	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
 	error = xfs_ifree(tp, ip);
 	if (error) {
 		/*
-- 
cgit v1.2.3


From 5fdd97944ee5ae0fcdd88227224d0c2c87aa6db9 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:33 -0700
Subject: xfs: remove xfs_defer_init() firstblock param

All but one caller of xfs_defer_init() passes in the ->t_firstblock
of the associated transaction. The one outlier is
xlog_recover_process_intents(), which simply passes a dummy value
because a valid pointer is required. This firstblock variable can
simply be removed.

At this point we could remove the xfs_defer_init() firstblock
parameter and initialize ->t_firstblock directly. Even that is not
necessary, however, because ->t_firstblock is automatically
reinitialized in the new transaction on a transaction roll. Since
xfs_defer_init() should never occur more than once on a particular
transaction (since the corresponding finish will roll it), replace
the reinit from xfs_defer_init() with an assert that verifies the
transaction has a NULLFSBLOCK firstblock.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 28 ++++++++++------------------
 fs/xfs/libxfs/xfs_attr_remote.c |  9 +++------
 fs/xfs/libxfs/xfs_bmap.c        |  4 ++--
 fs/xfs/libxfs/xfs_defer.c       |  5 ++---
 fs/xfs/libxfs/xfs_defer.h       |  3 +--
 fs/xfs/libxfs/xfs_refcount.c    |  2 +-
 fs/xfs/xfs_bmap_util.c          | 12 ++++++------
 fs/xfs/xfs_dquot.c              |  4 ++--
 fs/xfs/xfs_inode.c              | 12 ++++++------
 fs/xfs/xfs_iomap.c              |  6 +++---
 fs/xfs/xfs_log_recover.c        |  3 +--
 fs/xfs/xfs_reflink.c            |  8 ++++----
 fs/xfs/xfs_rtalloc.c            |  2 +-
 fs/xfs/xfs_symlink.c            |  4 ++--
 14 files changed, 44 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 153d2e29f872..927d4c968f9a 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -251,7 +251,7 @@ xfs_attr_set(
 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(args.trans, &dfops, &args.trans->t_firstblock);
+	xfs_defer_init(args.trans, &dfops);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
@@ -422,7 +422,7 @@ xfs_attr_remove(
 			&args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(args.trans, &dfops, &args.trans->t_firstblock);
+	xfs_defer_init(args.trans, &dfops);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	/*
@@ -593,8 +593,7 @@ xfs_attr_leaf_addname(
 		 * Commit that transaction so that the node_addname() call
 		 * can manage its own transactions.
 		 */
-		xfs_defer_init(args->trans, args->trans->t_dfops,
-			       &args->trans->t_firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_attr3_leaf_to_node(args);
 		if (error)
 			goto out_defer_cancel;
@@ -683,8 +682,7 @@ xfs_attr_leaf_addname(
 		 * If the result is small enough, shrink it all into the inode.
 		 */
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->trans, args->trans->t_dfops,
-				       &args->trans->t_firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
@@ -749,8 +747,7 @@ xfs_attr_leaf_removename(
 	 * If the result is small enough, shrink it all into the inode.
 	 */
 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-		xfs_defer_init(args->trans, args->trans->t_dfops,
-			       &args->trans->t_firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
@@ -879,8 +876,7 @@ restart:
 			 */
 			xfs_da_state_free(state);
 			state = NULL;
-			xfs_defer_init(args->trans, args->trans->t_dfops,
-				       &args->trans->t_firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_attr3_leaf_to_node(args);
 			if (error)
 				goto out_defer_cancel;
@@ -907,8 +903,7 @@ restart:
 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
 		 */
-		xfs_defer_init(args->trans, args->trans->t_dfops,
-			       &args->trans->t_firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_da3_split(state);
 		if (error)
 			goto out_defer_cancel;
@@ -1006,8 +1001,7 @@ restart:
 		 * Check to see if the tree needs to be collapsed.
 		 */
 		if (retval && (state->path.active > 1)) {
-			xfs_defer_init(args->trans, args->trans->t_dfops,
-				       &args->trans->t_firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_da3_join(state);
 			if (error)
 				goto out_defer_cancel;
@@ -1132,8 +1126,7 @@ xfs_attr_node_removename(
 	 * Check to see if the tree needs to be collapsed.
 	 */
 	if (retval && (state->path.active > 1)) {
-		xfs_defer_init(args->trans, args->trans->t_dfops,
-			       &args->trans->t_firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_da3_join(state);
 		if (error)
 			goto out_defer_cancel;
@@ -1165,8 +1158,7 @@ xfs_attr_node_removename(
 			goto out;
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->trans, args->trans->t_dfops,
-				       &args->trans->t_firstblock);
+			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index f02c705965ff..7841e6255129 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -480,8 +480,7 @@ xfs_attr_rmtval_set(
 		 * extent and then crash then the block may not contain the
 		 * correct metadata after log recovery occurs.
 		 */
-		xfs_defer_init(args->trans, args->trans->t_dfops,
-			       &args->trans->t_firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops);
 		nmap = 1;
 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
 				  blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
@@ -523,8 +522,7 @@ xfs_attr_rmtval_set(
 
 		ASSERT(blkcnt > 0);
 
-		xfs_defer_init(args->trans, args->trans->t_dfops,
-			       &args->trans->t_firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops);
 		nmap = 1;
 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
 				       blkcnt, &map, &nmap,
@@ -628,8 +626,7 @@ xfs_attr_rmtval_remove(
 	blkcnt = args->rmtblkcnt;
 	done = 0;
 	while (!done) {
-		xfs_defer_init(args->trans, args->trans->t_dfops,
-			       &args->trans->t_firstblock);
+		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 				    XFS_BMAPI_ATTRFORK, 1, &done);
 		if (error)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 12be9ad888c3..7b93b1e16ad9 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1038,7 +1038,7 @@ xfs_bmap_add_attrfork(
 			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
@@ -5968,7 +5968,7 @@ xfs_bmap_split_extent(
 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 6b25a9436829..2713e2d808a7 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -524,16 +524,15 @@ xfs_defer_init_op_type(
 void
 xfs_defer_init(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop,
-	xfs_fsblock_t			*fbp)
+	struct xfs_defer_ops		*dop)
 {
 	struct xfs_mount		*mp = NULL;
 
 	memset(dop, 0, sizeof(struct xfs_defer_ops));
-	*fbp = NULLFSBLOCK;
 	INIT_LIST_HEAD(&dop->dop_intake);
 	INIT_LIST_HEAD(&dop->dop_pending);
 	if (tp) {
+		ASSERT(tp->t_firstblock == NULLFSBLOCK);
 		tp->t_dfops = dop;
 		mp = tp->t_mountp;
 	}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 56eaaac31df5..c17c9deda995 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -63,8 +63,7 @@ void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
 		struct list_head *h);
 int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop);
 void xfs_defer_cancel(struct xfs_defer_ops *dop);
-void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop,
-		    xfs_fsblock_t *fbp);
+void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
 int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index d81c17aac710..2ecfb0518580 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1691,7 +1691,7 @@ xfs_refcount_recover_cow_leftovers(
 		trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec);
 
 		/* Free the orphan record */
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
 		fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
 		error = xfs_refcount_free_cow_extent(mp, tp->t_dfops, fsb,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 765859843606..d3a314fd721f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -971,7 +971,7 @@ xfs_alloc_file_space(
 
 		xfs_trans_ijoin(tp, ip, 0);
 
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
 					allocatesize_fsb, alloc_type, resblks,
 					imapp, &nimaps);
@@ -1040,7 +1040,7 @@ xfs_unmap_extent(
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done);
 	if (error)
 		goto out_bmap_cancel;
@@ -1341,7 +1341,7 @@ xfs_collapse_file_space(
 			goto out_trans_cancel;
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
 				&done);
 		if (error)
@@ -1419,7 +1419,7 @@ xfs_insert_file_space(
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
 				&done, stop_fsb);
 		if (error)
@@ -1605,7 +1605,7 @@ xfs_swap_extent_rmap(
 
 		/* Unmap the old blocks in the source file. */
 		while (tirec.br_blockcount) {
-			xfs_defer_init(tp, tp->t_dfops, &tp->t_firstblock);
+			xfs_defer_init(tp, tp->t_dfops);
 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
 
 			/* Read extent from the source file */
@@ -1909,7 +1909,7 @@ xfs_swap_extents(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
 		goto out_unlock;
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	/*
 	 * Lock and join the inodes to the tansaction so that transaction commit
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 3b61b4d266b4..c53de34c9ae5 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -295,7 +295,7 @@ xfs_dquot_disk_alloc(
 
 	trace_xfs_dqalloc(dqp);
 
-	xfs_defer_init(tp, tp->t_dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, tp->t_dfops);
 
 	xfs_ilock(quotip, XFS_ILOCK_EXCL);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
@@ -546,7 +546,7 @@ xfs_qm_dqread_alloc(
 			XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
 	if (error)
 		goto err;
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	error = xfs_dquot_disk_alloc(&tp, dqp, &bp);
 	if (error)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 48d22134b06f..7b2694d3901a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1194,7 +1194,7 @@ xfs_create(
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 	unlock_dp_on_error = true;
 
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	/*
 	 * Reserve disk quota and the inode.
@@ -1448,7 +1448,7 @@ xfs_link(
 			goto error_return;
 	}
 
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	/*
 	 * Handle initial link state of O_TMPFILE inode
@@ -1579,7 +1579,7 @@ xfs_itruncate_extents_flags(
 	ASSERT(first_unmap_block < last_block);
 	unmap_len = last_block - first_unmap_block + 1;
 	while (!done) {
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
 				    XFS_ITRUNC_MAX_EXTENTS, &done);
 		if (error)
@@ -1808,7 +1808,7 @@ xfs_inactive_ifree(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 	error = xfs_ifree(tp, ip);
 	if (error) {
 		/*
@@ -2651,7 +2651,7 @@ xfs_remove(
 	if (error)
 		goto out_trans_cancel;
 
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 	error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
 	if (error) {
 		ASSERT(error != -ENOENT);
@@ -3008,7 +3008,7 @@ xfs_rename(
 		goto out_trans_cancel;
 	}
 
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	/* RENAME_EXCHANGE is unique from here on. */
 	if (flags & RENAME_EXCHANGE)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0ae822538a63..756694219f77 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -253,7 +253,7 @@ xfs_iomap_write_direct(
 	 * From this point onwards we overwrite the imap pointer that the
 	 * caller gave to us.
 	 */
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 				bmapi_flags, resblks, imap, &nimaps);
@@ -713,7 +713,7 @@ xfs_iomap_write_allocate(
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
 			xfs_trans_ijoin(tp, ip, 0);
 
-			xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+			xfs_defer_init(tp, &dfops);
 
 			/*
 			 * it is possible that the extents have changed since
@@ -872,7 +872,7 @@ xfs_iomap_write_unwritten(
 		/*
 		 * Modify the unwritten extent state of the buffer.
 		 */
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		nimaps = 1;
 		error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 					XFS_BMAPI_CONVERT, resblks, &imap,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 940eb30e0271..8317023293a5 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4890,7 +4890,6 @@ xlog_recover_process_intents(
 	struct xfs_ail_cursor	cur;
 	struct xfs_log_item	*lip;
 	struct xfs_ail		*ailp;
-	xfs_fsblock_t		firstfsb;
 	int			error = 0;
 #if defined(DEBUG) || defined(XFS_WARN)
 	xfs_lsn_t		last_lsn;
@@ -4902,7 +4901,7 @@ xlog_recover_process_intents(
 #if defined(DEBUG) || defined(XFS_WARN)
 	last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
 #endif
-	xfs_defer_init(NULL, &dfops, &firstfsb);
+	xfs_defer_init(NULL, &dfops);
 	while (lip != NULL) {
 		/*
 		 * We're done when we see something other than an intent.
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 891214242118..3143889097f1 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -424,7 +424,7 @@ retry:
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
@@ -511,7 +511,7 @@ xfs_reflink_cancel_cow_blocks(
 			if (error)
 				break;
 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
-			xfs_defer_init(*tpp, &dfops, &(*tpp)->t_firstblock);
+			xfs_defer_init(*tpp, &dfops);
 
 			/* Free the CoW orphan record. */
 			error = xfs_refcount_free_cow_extent(ip->i_mount,
@@ -693,7 +693,7 @@ xfs_reflink_end_cow(
 			goto prev_extent;
 
 		/* Unmap the old blocks in the data fork. */
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		rlen = del.br_blockcount;
 		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1);
 		if (error)
@@ -1041,7 +1041,7 @@ xfs_reflink_remap_extent(
 	/* Unmap the old blocks in the data fork. */
 	rlen = unmap_len;
 	while (rlen) {
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1);
 		if (error)
 			goto out_defer;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index edd949376a51..bc471d42a968 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -786,7 +786,7 @@ xfs_growfs_rt_alloc(
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+		xfs_defer_init(tp, &dfops);
 		/*
 		 * Allocate blocks to the bitmap file.
 		 */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index a3dc552a5b97..d1ab0afa2723 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -245,7 +245,7 @@ xfs_symlink(
 	 * Initialize the bmap freelist prior to calling either
 	 * bmapi or the directory create code.
 	 */
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 
 	/*
 	 * Allocate an inode for the symlink.
@@ -438,7 +438,7 @@ xfs_inactive_symlink_rmt(
 	 * Find the block(s) so we can inval and unmap them.
 	 */
 	done = 0;
-	xfs_defer_init(tp, &dfops, &tp->t_firstblock);
+	xfs_defer_init(tp, &dfops);
 	nmaps = ARRAY_SIZE(mval);
 	error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
 				mval, &nmaps, 0);
-- 
cgit v1.2.3


From eaebb515f1a4728565c482b13dfb17273dc4ddc8 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:34 -0700
Subject: xfs: refactor buffer submission into a common helper

Sync and async buffer submission both do generally similar things
with a couple odd exceptions. Refactor the core buffer submission
code into a common helper to isolate buffer submission from
completion handling of synchronous buffer I/O.

This patch does not change behavior. It is a step towards support
for using synchronous buffer I/O via synchronous delwri queue
submission.

Designed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf.c   | 85 ++++++++++++++++++++++++------------------------------
 fs/xfs/xfs_trace.h |  1 -
 2 files changed, 37 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e9c058e3761c..7b0f7c79cd62 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1458,22 +1458,20 @@ _xfs_buf_ioapply(
  * a call to this function unless the caller holds an additional reference
  * itself.
  */
-void
-xfs_buf_submit(
+static int
+__xfs_buf_submit(
 	struct xfs_buf	*bp)
 {
 	trace_xfs_buf_submit(bp, _RET_IP_);
 
 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
-	ASSERT(bp->b_flags & XBF_ASYNC);
 
 	/* on shutdown we stale and complete the buffer immediately */
 	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
 		xfs_buf_ioerror(bp, -EIO);
 		bp->b_flags &= ~XBF_DONE;
 		xfs_buf_stale(bp);
-		xfs_buf_ioend(bp);
-		return;
+		return -EIO;
 	}
 
 	if (bp->b_flags & XBF_WRITE)
@@ -1482,23 +1480,14 @@ xfs_buf_submit(
 	/* clear the internal error state to avoid spurious errors */
 	bp->b_io_error = 0;
 
-	/*
-	 * The caller's reference is released during I/O completion.
-	 * This occurs some time after the last b_io_remaining reference is
-	 * released, so after we drop our Io reference we have to have some
-	 * other reference to ensure the buffer doesn't go away from underneath
-	 * us. Take a direct reference to ensure we have safe access to the
-	 * buffer until we are finished with it.
-	 */
-	xfs_buf_hold(bp);
-
 	/*
 	 * Set the count to 1 initially, this will stop an I/O completion
 	 * callout which happens before we have started all the I/O from calling
 	 * xfs_buf_ioend too early.
 	 */
 	atomic_set(&bp->b_io_remaining, 1);
-	xfs_buf_ioacct_inc(bp);
+	if (bp->b_flags & XBF_ASYNC)
+		xfs_buf_ioacct_inc(bp);
 	_xfs_buf_ioapply(bp);
 
 	/*
@@ -1507,14 +1496,39 @@ xfs_buf_submit(
 	 * that we don't return to the caller with completion still pending.
 	 */
 	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
-		if (bp->b_error)
+		if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
 			xfs_buf_ioend(bp);
 		else
 			xfs_buf_ioend_async(bp);
 	}
 
-	xfs_buf_rele(bp);
+	return 0;
+}
+
+void
+xfs_buf_submit(
+	struct xfs_buf	*bp)
+{
+	int		error;
+
+	ASSERT(bp->b_flags & XBF_ASYNC);
+
+	/*
+	 * The caller's reference is released during I/O completion.
+	 * This occurs some time after the last b_io_remaining reference is
+	 * released, so after we drop our Io reference we have to have some
+	 * other reference to ensure the buffer doesn't go away from underneath
+	 * us. Take a direct reference to ensure we have safe access to the
+	 * buffer until we are finished with it.
+	 */
+	xfs_buf_hold(bp);
+
+	error = __xfs_buf_submit(bp);
+	if (error)
+		xfs_buf_ioend(bp);
+
 	/* Note: it is not safe to reference bp now we've dropped our ref */
+	xfs_buf_rele(bp);
 }
 
 /*
@@ -1526,22 +1540,7 @@ xfs_buf_submit_wait(
 {
 	int		error;
 
-	trace_xfs_buf_submit_wait(bp, _RET_IP_);
-
-	ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
-
-	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
-		xfs_buf_ioerror(bp, -EIO);
-		xfs_buf_stale(bp);
-		bp->b_flags &= ~XBF_DONE;
-		return -EIO;
-	}
-
-	if (bp->b_flags & XBF_WRITE)
-		xfs_buf_wait_unpin(bp);
-
-	/* clear the internal error state to avoid spurious errors */
-	bp->b_io_error = 0;
+	ASSERT(!(bp->b_flags & XBF_ASYNC));
 
 	/*
 	 * For synchronous IO, the IO does not inherit the submitters reference
@@ -1551,20 +1550,9 @@ xfs_buf_submit_wait(
 	 */
 	xfs_buf_hold(bp);
 
-	/*
-	 * Set the count to 1 initially, this will stop an I/O completion
-	 * callout which happens before we have started all the I/O from calling
-	 * xfs_buf_ioend too early.
-	 */
-	atomic_set(&bp->b_io_remaining, 1);
-	_xfs_buf_ioapply(bp);
-
-	/*
-	 * make sure we run completion synchronously if it raced with us and is
-	 * already complete.
-	 */
-	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
-		xfs_buf_ioend(bp);
+	error = __xfs_buf_submit(bp);
+	if (error)
+		goto out;
 
 	/* wait for completion before gathering the error from the buffer */
 	trace_xfs_buf_iowait(bp, _RET_IP_);
@@ -1572,6 +1560,7 @@ xfs_buf_submit_wait(
 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
 	error = bp->b_error;
 
+out:
 	/*
 	 * all done now, we can release the hold that keeps the buffer
 	 * referenced for the entire IO.
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 9d741571b61e..b668fc127aa7 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -310,7 +310,6 @@ DEFINE_BUF_EVENT(xfs_buf_hold);
 DEFINE_BUF_EVENT(xfs_buf_rele);
 DEFINE_BUF_EVENT(xfs_buf_iodone);
 DEFINE_BUF_EVENT(xfs_buf_submit);
-DEFINE_BUF_EVENT(xfs_buf_submit_wait);
 DEFINE_BUF_EVENT(xfs_buf_lock);
 DEFINE_BUF_EVENT(xfs_buf_lock_done);
 DEFINE_BUF_EVENT(xfs_buf_trylock_fail);
-- 
cgit v1.2.3


From e339dd8d8b045399e918c6737b2cc435b21a451e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:34 -0700
Subject: xfs: use sync buffer I/O for sync delwri queue submission

If a delwri queue occurs of a buffer that sits on a delwri queue
wait list, the queue sets _XBF_DELWRI_Q without changing the state
of ->b_list. This occurs, for example, if another thread beats the
current delwri waiter thread to the buffer lock after I/O
completion. Once the waiter acquires the lock, it removes the buffer
from the wait list and leaves a buffer with _XBF_DELWRI_Q set but
not populated on a list. This results in a lost buffer submission
and in turn can result in assert failures due to _XBF_DELWRI_Q being
set on buffer reclaim or filesystem lockups if the buffer happens to
cover an item in the AIL.

This problem has been reproduced by repeated iterations of xfs/305
on high CPU count (28xcpu) systems with limited memory (~1GB). Dirty
dquot reclaim races with an xfsaild push of a separate dquot backed
by the same buffer such that the buffer sits on the reclaim wait
list at the time xfsaild attempts to queue it. Since the latter
dquot has been flush locked but the underlying buffer not submitted
for I/O, the dquot pins the AIL and causes the filesystem to
livelock.

This race is essentially made possible by the buffer lock cycle
involved with waiting on a synchronous delwri queue submission.
Close the race by using synchronous buffer I/O for respective delwri
queue submission. This means the buffer remains locked across the
I/O and so is inaccessible from other contexts while in the
intermediate wait list state. The sync buffer I/O wait mechanism is
factored into a helper such that sync delwri buffer submission and
serialization are batched operations.

Designed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf.c | 80 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 41 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 7b0f7c79cd62..ef234847b4e6 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1531,6 +1531,20 @@ xfs_buf_submit(
 	xfs_buf_rele(bp);
 }
 
+/*
+ * Wait for I/O completion of a sync buffer and return the I/O error code.
+ */
+static int
+xfs_buf_iowait(
+	struct xfs_buf	*bp)
+{
+	trace_xfs_buf_iowait(bp, _RET_IP_);
+	wait_for_completion(&bp->b_iowait);
+	trace_xfs_buf_iowait_done(bp, _RET_IP_);
+
+	return bp->b_error;
+}
+
 /*
  * Synchronous buffer IO submission path, read or write.
  */
@@ -1553,12 +1567,7 @@ xfs_buf_submit_wait(
 	error = __xfs_buf_submit(bp);
 	if (error)
 		goto out;
-
-	/* wait for completion before gathering the error from the buffer */
-	trace_xfs_buf_iowait(bp, _RET_IP_);
-	wait_for_completion(&bp->b_iowait);
-	trace_xfs_buf_iowait_done(bp, _RET_IP_);
-	error = bp->b_error;
+	error = xfs_buf_iowait(bp);
 
 out:
 	/*
@@ -1961,16 +1970,11 @@ xfs_buf_cmp(
 }
 
 /*
- * submit buffers for write.
- *
- * When we have a large buffer list, we do not want to hold all the buffers
- * locked while we block on the request queue waiting for IO dispatch. To avoid
- * this problem, we lock and submit buffers in groups of 50, thereby minimising
- * the lock hold times for lists which may contain thousands of objects.
- *
- * To do this, we sort the buffer list before we walk the list to lock and
- * submit buffers, and we plug and unplug around each group of buffers we
- * submit.
+ * Submit buffers for write. If wait_list is specified, the buffers are
+ * submitted using sync I/O and placed on the wait list such that the caller can
+ * iowait each buffer. Otherwise async I/O is used and the buffers are released
+ * at I/O completion time. In either case, buffers remain locked until I/O
+ * completes and the buffer is released from the queue.
  */
 static int
 xfs_buf_delwri_submit_buffers(
@@ -2012,21 +2016,22 @@ xfs_buf_delwri_submit_buffers(
 		trace_xfs_buf_delwri_split(bp, _RET_IP_);
 
 		/*
-		 * We do all IO submission async. This means if we need
-		 * to wait for IO completion we need to take an extra
-		 * reference so the buffer is still valid on the other
-		 * side. We need to move the buffer onto the io_list
-		 * at this point so the caller can still access it.
+		 * If we have a wait list, each buffer (and associated delwri
+		 * queue reference) transfers to it and is submitted
+		 * synchronously. Otherwise, drop the buffer from the delwri
+		 * queue and submit async.
 		 */
 		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL);
-		bp->b_flags |= XBF_WRITE | XBF_ASYNC;
+		bp->b_flags |= XBF_WRITE;
 		if (wait_list) {
-			xfs_buf_hold(bp);
+			bp->b_flags &= ~XBF_ASYNC;
 			list_move_tail(&bp->b_list, wait_list);
-		} else
+			__xfs_buf_submit(bp);
+		} else {
+			bp->b_flags |= XBF_ASYNC;
 			list_del_init(&bp->b_list);
-
-		xfs_buf_submit(bp);
+			xfs_buf_submit(bp);
+		}
 	}
 	blk_finish_plug(&plug);
 
@@ -2073,9 +2078,11 @@ xfs_buf_delwri_submit(
 
 		list_del_init(&bp->b_list);
 
-		/* locking the buffer will wait for async IO completion. */
-		xfs_buf_lock(bp);
-		error2 = bp->b_error;
+		/*
+		 * Wait on the locked buffer, check for errors and unlock and
+		 * release the delwri queue reference.
+		 */
+		error2 = xfs_buf_iowait(bp);
 		xfs_buf_relse(bp);
 		if (!error)
 			error = error2;
@@ -2121,23 +2128,18 @@ xfs_buf_delwri_pushbuf(
 
 	/*
 	 * Delwri submission clears the DELWRI_Q buffer flag and returns with
-	 * the buffer on the wait list with an associated reference. Rather than
+	 * the buffer on the wait list with the original reference. Rather than
 	 * bounce the buffer from a local wait list back to the original list
 	 * after I/O completion, reuse the original list as the wait list.
 	 */
 	xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
 
 	/*
-	 * The buffer is now under I/O and wait listed as during typical delwri
-	 * submission. Lock the buffer to wait for I/O completion. Rather than
-	 * remove the buffer from the wait list and release the reference, we
-	 * want to return with the buffer queued to the original list. The
-	 * buffer already sits on the original list with a wait list reference,
-	 * however. If we let the queue inherit that wait list reference, all we
-	 * need to do is reset the DELWRI_Q flag.
+	 * The buffer is now locked, under I/O and wait listed on the original
+	 * delwri queue. Wait for I/O completion, restore the DELWRI_Q flag and
+	 * return with the buffer unlocked and on the original queue.
 	 */
-	xfs_buf_lock(bp);
-	error = bp->b_error;
+	error = xfs_buf_iowait(bp);
 	bp->b_flags |= _XBF_DELWRI_Q;
 	xfs_buf_unlock(bp);
 
-- 
cgit v1.2.3


From 6af88cda007695af003a1cd41f077c826aa59b97 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:35 -0700
Subject: xfs: combine [a]sync buffer submission apis

The buffer I/O submission path consists of separate function calls
per type. The buffer I/O type is already controlled via buffer
state (XBF_ASYNC), however, so there is no real need for separate
submission functions.

Combine the buffer submission functions into a single function that
processes the buffer appropriately based on XBF_ASYNC. Retain an
internal helper with a conditional wait parameter to continue to
support batched !XBF_ASYNC submission/completion required by delwri
queues.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf.c         | 72 ++++++++++++++++--------------------------------
 fs/xfs/xfs_buf.h         | 10 +++++--
 fs/xfs/xfs_log_recover.c |  4 +--
 3 files changed, 33 insertions(+), 53 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ef234847b4e6..be79dd25b8cf 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -757,11 +757,7 @@ _xfs_buf_read(
 	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
 	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
 
-	if (flags & XBF_ASYNC) {
-		xfs_buf_submit(bp);
-		return 0;
-	}
-	return xfs_buf_submit_wait(bp);
+	return xfs_buf_submit(bp);
 }
 
 xfs_buf_t *
@@ -846,7 +842,7 @@ xfs_buf_read_uncached(
 	bp->b_flags |= XBF_READ;
 	bp->b_ops = ops;
 
-	xfs_buf_submit_wait(bp);
+	xfs_buf_submit(bp);
 	if (bp->b_error) {
 		int	error = bp->b_error;
 		xfs_buf_relse(bp);
@@ -1249,7 +1245,7 @@ xfs_bwrite(
 	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
 			 XBF_WRITE_FAIL | XBF_DONE);
 
-	error = xfs_buf_submit_wait(bp);
+	error = xfs_buf_submit(bp);
 	if (error) {
 		xfs_force_shutdown(bp->b_target->bt_mount,
 				   SHUTDOWN_META_IO_ERROR);
@@ -1459,7 +1455,7 @@ _xfs_buf_ioapply(
  * itself.
  */
 static int
-__xfs_buf_submit(
+__xfs_buf_submit_common(
 	struct xfs_buf	*bp)
 {
 	trace_xfs_buf_submit(bp, _RET_IP_);
@@ -1505,32 +1501,6 @@ __xfs_buf_submit(
 	return 0;
 }
 
-void
-xfs_buf_submit(
-	struct xfs_buf	*bp)
-{
-	int		error;
-
-	ASSERT(bp->b_flags & XBF_ASYNC);
-
-	/*
-	 * The caller's reference is released during I/O completion.
-	 * This occurs some time after the last b_io_remaining reference is
-	 * released, so after we drop our Io reference we have to have some
-	 * other reference to ensure the buffer doesn't go away from underneath
-	 * us. Take a direct reference to ensure we have safe access to the
-	 * buffer until we are finished with it.
-	 */
-	xfs_buf_hold(bp);
-
-	error = __xfs_buf_submit(bp);
-	if (error)
-		xfs_buf_ioend(bp);
-
-	/* Note: it is not safe to reference bp now we've dropped our ref */
-	xfs_buf_rele(bp);
-}
-
 /*
  * Wait for I/O completion of a sync buffer and return the I/O error code.
  */
@@ -1538,6 +1508,8 @@ static int
 xfs_buf_iowait(
 	struct xfs_buf	*bp)
 {
+	ASSERT(!(bp->b_flags & XBF_ASYNC));
+
 	trace_xfs_buf_iowait(bp, _RET_IP_);
 	wait_for_completion(&bp->b_iowait);
 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1549,30 +1521,33 @@ xfs_buf_iowait(
  * Synchronous buffer IO submission path, read or write.
  */
 int
-xfs_buf_submit_wait(
-	struct xfs_buf	*bp)
+__xfs_buf_submit(
+	struct xfs_buf	*bp,
+	bool		wait)
 {
 	int		error;
 
-	ASSERT(!(bp->b_flags & XBF_ASYNC));
-
 	/*
-	 * For synchronous IO, the IO does not inherit the submitters reference
-	 * count, nor the buffer lock. Hence we cannot release the reference we
-	 * are about to take until we've waited for all IO completion to occur,
-	 * including any xfs_buf_ioend_async() work that may be pending.
+	 * Grab a reference so the buffer does not go away underneath us. For
+	 * async buffers, I/O completion drops the callers reference, which
+	 * could occur before submission returns.
 	 */
 	xfs_buf_hold(bp);
 
-	error = __xfs_buf_submit(bp);
-	if (error)
+	error = __xfs_buf_submit_common(bp);
+	if (error) {
+		if (bp->b_flags & XBF_ASYNC)
+			xfs_buf_ioend(bp);
 		goto out;
-	error = xfs_buf_iowait(bp);
+	}
 
+	if (wait)
+		error = xfs_buf_iowait(bp);
 out:
 	/*
-	 * all done now, we can release the hold that keeps the buffer
-	 * referenced for the entire IO.
+	 * Release the hold that keeps the buffer referenced for the entire
+	 * I/O. Note that if the buffer is async, it is not safe to reference
+	 * after this release.
 	 */
 	xfs_buf_rele(bp);
 	return error;
@@ -2026,12 +2001,11 @@ xfs_buf_delwri_submit_buffers(
 		if (wait_list) {
 			bp->b_flags &= ~XBF_ASYNC;
 			list_move_tail(&bp->b_list, wait_list);
-			__xfs_buf_submit(bp);
 		} else {
 			bp->b_flags |= XBF_ASYNC;
 			list_del_init(&bp->b_list);
-			xfs_buf_submit(bp);
 		}
+		__xfs_buf_submit(bp, false);
 	}
 	blk_finish_plug(&plug);
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 6ddf1907fc7a..f04613181ca1 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -297,8 +297,14 @@ extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
 		xfs_failaddr_t failaddr);
 #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
-extern void xfs_buf_submit(struct xfs_buf *bp);
-extern int xfs_buf_submit_wait(struct xfs_buf *bp);
+
+extern int __xfs_buf_submit(struct xfs_buf *bp, bool);
+static inline int xfs_buf_submit(struct xfs_buf *bp)
+{
+	bool wait = bp->b_flags & XBF_ASYNC ? false : true;
+	return __xfs_buf_submit(bp, wait);
+}
+
 extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
 				xfs_buf_rw_t);
 #define xfs_buf_zero(bp, off, len) \
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8317023293a5..cbac943896f4 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -196,7 +196,7 @@ xlog_bread_noalign(
 	bp->b_io_length = nbblks;
 	bp->b_error = 0;
 
-	error = xfs_buf_submit_wait(bp);
+	error = xfs_buf_submit(bp);
 	if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
 		xfs_buf_ioerror_alert(bp, __func__);
 	return error;
@@ -5706,7 +5706,7 @@ xlog_do_recover(
 	bp->b_flags |= XBF_READ;
 	bp->b_ops = &xfs_sb_buf_ops;
 
-	error = xfs_buf_submit_wait(bp);
+	error = xfs_buf_submit(bp);
 	if (error) {
 		if (!XFS_FORCED_SHUTDOWN(mp)) {
 			xfs_buf_ioerror_alert(bp, __func__);
-- 
cgit v1.2.3


From bb00b6f1e2b9699f6140849ab3d6a252b130f44e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 11 Jul 2018 22:26:35 -0700
Subject: xfs: kill __xfs_buf_submit_common()

Now that there is only one caller, fold the common submission helper
into __xfs_buf_submit().

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf.c | 83 +++++++++++++++++++++++---------------------------------
 1 file changed, 34 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index be79dd25b8cf..c641c7fa1a03 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1449,15 +1449,34 @@ _xfs_buf_ioapply(
 }
 
 /*
- * Asynchronous IO submission path. This transfers the buffer lock ownership and
- * the current reference to the IO. It is not safe to reference the buffer after
- * a call to this function unless the caller holds an additional reference
- * itself.
+ * Wait for I/O completion of a sync buffer and return the I/O error code.
  */
 static int
-__xfs_buf_submit_common(
+xfs_buf_iowait(
 	struct xfs_buf	*bp)
 {
+	ASSERT(!(bp->b_flags & XBF_ASYNC));
+
+	trace_xfs_buf_iowait(bp, _RET_IP_);
+	wait_for_completion(&bp->b_iowait);
+	trace_xfs_buf_iowait_done(bp, _RET_IP_);
+
+	return bp->b_error;
+}
+
+/*
+ * Buffer I/O submission path, read or write. Asynchronous submission transfers
+ * the buffer lock ownership and the current reference to the IO. It is not
+ * safe to reference the buffer after a call to this function unless the caller
+ * holds an additional reference itself.
+ */
+int
+__xfs_buf_submit(
+	struct xfs_buf	*bp,
+	bool		wait)
+{
+	int		error = 0;
+
 	trace_xfs_buf_submit(bp, _RET_IP_);
 
 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
@@ -1467,9 +1486,18 @@ __xfs_buf_submit_common(
 		xfs_buf_ioerror(bp, -EIO);
 		bp->b_flags &= ~XBF_DONE;
 		xfs_buf_stale(bp);
+		if (bp->b_flags & XBF_ASYNC)
+			xfs_buf_ioend(bp);
 		return -EIO;
 	}
 
+	/*
+	 * Grab a reference so the buffer does not go away underneath us. For
+	 * async buffers, I/O completion drops the callers reference, which
+	 * could occur before submission returns.
+	 */
+	xfs_buf_hold(bp);
+
 	if (bp->b_flags & XBF_WRITE)
 		xfs_buf_wait_unpin(bp);
 
@@ -1498,52 +1526,9 @@ __xfs_buf_submit_common(
 			xfs_buf_ioend_async(bp);
 	}
 
-	return 0;
-}
-
-/*
- * Wait for I/O completion of a sync buffer and return the I/O error code.
- */
-static int
-xfs_buf_iowait(
-	struct xfs_buf	*bp)
-{
-	ASSERT(!(bp->b_flags & XBF_ASYNC));
-
-	trace_xfs_buf_iowait(bp, _RET_IP_);
-	wait_for_completion(&bp->b_iowait);
-	trace_xfs_buf_iowait_done(bp, _RET_IP_);
-
-	return bp->b_error;
-}
-
-/*
- * Synchronous buffer IO submission path, read or write.
- */
-int
-__xfs_buf_submit(
-	struct xfs_buf	*bp,
-	bool		wait)
-{
-	int		error;
-
-	/*
-	 * Grab a reference so the buffer does not go away underneath us. For
-	 * async buffers, I/O completion drops the callers reference, which
-	 * could occur before submission returns.
-	 */
-	xfs_buf_hold(bp);
-
-	error = __xfs_buf_submit_common(bp);
-	if (error) {
-		if (bp->b_flags & XBF_ASYNC)
-			xfs_buf_ioend(bp);
-		goto out;
-	}
-
 	if (wait)
 		error = xfs_buf_iowait(bp);
-out:
+
 	/*
 	 * Release the hold that keeps the buffer referenced for the entire
 	 * I/O. Note that if the buffer is async, it is not safe to reference
-- 
cgit v1.2.3


From a4722a643fbb9e1466491fbe5a3c44591805dcc8 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 11 Jul 2018 22:26:36 -0700
Subject: xfs: remove unused iolock arg from xfs_break_dax_layouts

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a3e7767a5715..6b31f41eafa2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -735,7 +735,6 @@ xfs_wait_dax_page(
 static int
 xfs_break_dax_layouts(
 	struct inode		*inode,
-	uint			iolock,
 	bool			*did_unlock)
 {
 	struct page		*page;
@@ -766,7 +765,7 @@ xfs_break_layouts(
 		retry = false;
 		switch (reason) {
 		case BREAK_UNMAP:
-			error = xfs_break_dax_layouts(inode, *iolock, &retry);
+			error = xfs_break_dax_layouts(inode, &retry);
 			if (error || retry)
 				break;
 			/* fall through */
-- 
cgit v1.2.3


From efe803277364a621348b679058222d644e113c9c Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Wed, 11 Jul 2018 22:26:36 -0700
Subject: xfs: Initialize variables in xfs_alloc_get_rec before using them

Make sure we initialize *bno and *len, before jumping to out_bad_rec
label, and risk calling xfs_warn() with uninitialized variables.

Coverity: 100898
Coverity: 1437081
Coverity: 1437129
Coverity: 1437191
Coverity: 1437201
Coverity: 1437212
Coverity: 1437341
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index bd6d8aeea825..3c3f2d5119ea 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -223,12 +223,13 @@ xfs_alloc_get_rec(
 	error = xfs_btree_get_rec(cur, &rec, stat);
 	if (error || !(*stat))
 		return error;
-	if (rec->alloc.ar_blockcount == 0)
-		goto out_bad_rec;
 
 	*bno = be32_to_cpu(rec->alloc.ar_startblock);
 	*len = be32_to_cpu(rec->alloc.ar_blockcount);
 
+	if (*len == 0)
+		goto out_bad_rec;
+
 	/* check for valid extent range, including overflow */
 	if (!xfs_verify_agbno(mp, agno, *bno))
 		goto out_bad_rec;
-- 
cgit v1.2.3


From 9b54bf9d6a5b30e2cc22b18793e9a4158c5b4882 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Jul 2018 14:56:51 +0100
Subject: kernel: add kcompat_sys_{f,}statfs64()

Using this helper allows us to avoid the in-kernel calls to the
compat_sys_{f,}statfs64() sycalls, as are necessary for parameter
mangling in arm64's compat handling.

Following the example of ksys_* functions, kcompat_sys_* functions are
intended to be a drop-in replacement for their compat_sys_*
counterparts, with the same calling convention.

This is necessary to enable conversion of arm64's syscall handling to
use pt_regs wrappers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 fs/statfs.c            | 14 ++++++++++++--
 include/linux/compat.h | 11 +++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/statfs.c b/fs/statfs.c
index 5b2a24f0f263..f0216629621d 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -335,7 +335,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -349,7 +349,12 @@ COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, s
 	return error;
 }
 
-COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_statfs64(pathname, sz, buf);
+}
+
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -363,6 +368,11 @@ COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct co
 	return error;
 }
 
+COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_fstatfs64(fd, sz, buf);
+}
+
 /*
  * This is a copy of sys_ustat, just dealing with a structure layout.
  * Given how simple this syscall is that apporach is more maintainable
diff --git a/include/linux/compat.h b/include/linux/compat.h
index c68acc47da57..43f4ed44c5d5 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -1028,6 +1028,17 @@ static inline struct compat_timeval ns_to_compat_timeval(s64 nsec)
 	return ctv;
 }
 
+/*
+ * Kernel code should not call compat syscalls (i.e., compat_sys_xyzyyz())
+ * directly.  Instead, use one of the functions which work equivalently, such
+ * as the kcompat_sys_xyzyyz() functions prototyped below.
+ */
+
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz,
+		     struct compat_statfs64 __user * buf);
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
+			  struct compat_statfs64 __user * buf);
+
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
-- 
cgit v1.2.3


From c9c554f21490bbc96cc554f80024d27d09670480 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 11 Jul 2018 14:19:04 -0400
Subject: alloc_file(): switch to passing O_... flags instead of FMODE_... mode

... so that it could set both ->f_flags and ->f_mode, without callers
having to set ->f_flags manually.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/misc/cxl/api.c          |  3 +--
 drivers/scsi/cxlflash/ocxl_hw.c |  3 +--
 fs/aio.c                        |  8 ++------
 fs/anon_inodes.c                |  3 +--
 fs/file_table.c                 | 17 +++++++++--------
 fs/hugetlbfs/inode.c            |  3 +--
 fs/pipe.c                       |  8 ++++----
 include/linux/file.h            |  2 +-
 ipc/shm.c                       |  8 ++++----
 mm/memfd.c                      |  2 +-
 mm/shmem.c                      |  3 +--
 net/socket.c                    |  3 +--
 12 files changed, 27 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 6b16946f9b05..0b5cb6cf91a0 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -102,12 +102,11 @@ static struct file *cxl_getfile(const char *name,
 	path.mnt = mntget(cxl_vfs_mount);
 	d_instantiate(path.dentry, inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file)) {
 		path_put(&path);
 		goto err_fs;
 	}
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 
 	return file;
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index 497a68389461..99bb393a8a34 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -129,7 +129,7 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name,
 	path.mnt = mntget(ocxlflash_vfs_mount);
 	d_instantiate(path.dentry, inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file)) {
 		rc = PTR_ERR(file);
 		dev_err(dev, "%s: alloc_file failed rc=%d\n",
@@ -138,7 +138,6 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name,
 		goto err3;
 	}
 
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 out:
 	return file;
diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..9eea53887d6c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -234,13 +234,9 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 	path.mnt = mntget(aio_mnt);
 
 	d_instantiate(path.dentry, inode);
-	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
-	if (IS_ERR(file)) {
+	file = alloc_file(&path, O_RDWR, &aio_ring_fops);
+	if (IS_ERR(file))
 		path_put(&path);
-		return file;
-	}
-
-	file->f_flags = O_RDWR;
 	return file;
 }
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3168ee4e77f4..6b235ab1df6c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,12 +102,11 @@ struct file *anon_inode_getfile(const char *name,
 
 	d_instantiate(path.dentry, anon_inode_inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file))
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
 
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 
 	return file;
diff --git a/fs/file_table.c b/fs/file_table.c
index eee7cf629e52..086c3f5ec31a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -153,10 +153,10 @@ over:
  * alloc_file - allocate and initialize a 'struct file'
  *
  * @path: the (dentry, vfsmount) pair for the new file
- * @mode: the mode with which the new file will be opened
+ * @flags: O_... flags with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
  */
-struct file *alloc_file(const struct path *path, fmode_t mode,
+struct file *alloc_file(const struct path *path, int flags,
 		const struct file_operations *fop)
 {
 	struct file *file;
@@ -165,19 +165,20 @@ struct file *alloc_file(const struct path *path, fmode_t mode,
 	if (IS_ERR(file))
 		return file;
 
+	file->f_mode = OPEN_FMODE(flags);
+	file->f_flags = flags;
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
-	if ((mode & FMODE_READ) &&
+	if ((file->f_mode & FMODE_READ) &&
 	     likely(fop->read || fop->read_iter))
-		mode |= FMODE_CAN_READ;
-	if ((mode & FMODE_WRITE) &&
+		file->f_mode |= FMODE_CAN_READ;
+	if ((file->f_mode & FMODE_WRITE) &&
 	     likely(fop->write || fop->write_iter))
-		mode |= FMODE_CAN_WRITE;
-	file->f_mode = mode;
+		file->f_mode |= FMODE_CAN_WRITE;
 	file->f_op = fop;
-	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(path->dentry->d_inode);
 	return file;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d508c7844681..71aed47422e2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1375,8 +1375,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	inode->i_size = size;
 	clear_nlink(inode);
 
-	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
-			&hugetlbfs_file_operations);
+	file = alloc_file(&path, O_RDWR, &hugetlbfs_file_operations);
 	if (IS_ERR(file))
 		goto out_dentry; /* inode is already attached */
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 9405e455f5b1..1909422e5a78 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -760,16 +760,17 @@ int create_pipe_files(struct file **res, int flags)
 
 	d_instantiate(path.dentry, inode);
 
-	f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
+	f = alloc_file(&path, O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
+			&pipefifo_fops);
 	if (IS_ERR(f)) {
 		err = PTR_ERR(f);
 		goto err_dentry;
 	}
 
-	f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
 	f->private_data = inode->i_pipe;
 
-	res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
+	res[0] = alloc_file(&path, O_RDONLY | (flags & O_NONBLOCK),
+			&pipefifo_fops);
 	if (IS_ERR(res[0])) {
 		put_pipe_info(inode, inode->i_pipe);
 		fput(f);
@@ -778,7 +779,6 @@ int create_pipe_files(struct file **res, int flags)
 
 	path_get(&path);
 	res[0]->private_data = inode->i_pipe;
-	res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	res[1] = f;
 	return 0;
 
diff --git a/include/linux/file.h b/include/linux/file.h
index 279720db984a..6d34a1262b31 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,7 +18,7 @@ struct file_operations;
 struct vfsmount;
 struct dentry;
 struct path;
-extern struct file *alloc_file(const struct path *, fmode_t mode,
+extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
 
 static inline void fput_light(struct file *file, int fput_needed)
diff --git a/ipc/shm.c b/ipc/shm.c
index 051a3e1fb8df..c702abd578a7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1362,7 +1362,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 	struct ipc_namespace *ns;
 	struct shm_file_data *sfd;
 	struct path path;
-	fmode_t f_mode;
+	int f_flags;
 	unsigned long populate = 0;
 
 	err = -EINVAL;
@@ -1395,11 +1395,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 	if (shmflg & SHM_RDONLY) {
 		prot = PROT_READ;
 		acc_mode = S_IRUGO;
-		f_mode = FMODE_READ;
+		f_flags = O_RDONLY;
 	} else {
 		prot = PROT_READ | PROT_WRITE;
 		acc_mode = S_IRUGO | S_IWUGO;
-		f_mode = FMODE_READ | FMODE_WRITE;
+		f_flags = O_RDWR;
 	}
 	if (shmflg & SHM_EXEC) {
 		prot |= PROT_EXEC;
@@ -1449,7 +1449,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 		goto out_nattch;
 	}
 
-	file = alloc_file(&path, f_mode,
+	file = alloc_file(&path, f_flags,
 			  is_file_hugepages(shp->shm_file) ?
 				&shm_file_operations_huge :
 				&shm_file_operations);
diff --git a/mm/memfd.c b/mm/memfd.c
index 27069518e3c5..2bb5e257080e 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -326,7 +326,7 @@ SYSCALL_DEFINE2(memfd_create,
 		goto err_fd;
 	}
 	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
-	file->f_flags |= O_RDWR | O_LARGEFILE;
+	file->f_flags |= O_LARGEFILE;
 
 	if (flags & MFD_ALLOW_SEALING) {
 		file_seals = memfd_file_seals_ptr(file);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cab84403055..84844e52bf24 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3942,8 +3942,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
 	if (IS_ERR(res))
 		goto put_path;
 
-	res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
-		  &shmem_file_operations);
+	res = alloc_file(&path, O_RDWR, &shmem_file_operations);
 	if (IS_ERR(res))
 		goto put_path;
 
diff --git a/net/socket.c b/net/socket.c
index 8a109012608a..2cdbe8f71b7f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -411,7 +411,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 
 	d_instantiate(path.dentry, SOCK_INODE(sock));
 
-	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK),
 		  &socket_file_ops);
 	if (IS_ERR(file)) {
 		/* drop dentry, keep inode for a bit */
@@ -423,7 +423,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 	}
 
 	sock->file = file;
-	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->private_data = sock;
 	return file;
 }
-- 
cgit v1.2.3


From 6de37b6dc085e7c5e092b69289af66876526da44 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 10 Jul 2018 13:12:05 -0400
Subject: pass creds to get_empty_filp(), make sure dentry_open() passes the
 right creds

... and rename get_empty_filp() to alloc_empty_file().

dentry_open() gets creds as argument, but the only thing that sees those is
security_file_open() - file->f_cred still ends up with current_cred().  For
almost all callers it's the same thing, but there are several broken cases.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c | 5 ++---
 fs/internal.h   | 2 +-
 fs/namei.c      | 2 +-
 fs/open.c       | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 086c3f5ec31a..76cfa4c43e13 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -101,9 +101,8 @@ int proc_nr_files(struct ctl_table *table, int write,
  * done, you will imbalance int the mount's writer count
  * and a warning at __fput() time.
  */
-struct file *get_empty_filp(void)
+struct file *alloc_empty_file(const struct cred *cred)
 {
-	const struct cred *cred = current_cred();
 	static long old_max;
 	struct file *f;
 	int error;
@@ -161,7 +160,7 @@ struct file *alloc_file(const struct path *path, int flags,
 {
 	struct file *file;
 
-	file = get_empty_filp();
+	file = alloc_empty_file(current_cred());
 	if (IS_ERR(file))
 		return file;
 
diff --git a/fs/internal.h b/fs/internal.h
index 5645b4ebf494..66473bf388e4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -93,7 +93,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern struct file *get_empty_filp(void);
+extern struct file *alloc_empty_file(const struct cred *);
 
 /*
  * super.c
diff --git a/fs/namei.c b/fs/namei.c
index 734cef54fdf8..af2ec1803f57 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3513,7 +3513,7 @@ static struct file *path_openat(struct nameidata *nd,
 	int opened = 0;
 	int error;
 
-	file = get_empty_filp();
+	file = alloc_empty_file(current_cred());
 	if (IS_ERR(file))
 		return file;
 
diff --git a/fs/open.c b/fs/open.c
index 530da965e369..0061f9ea044d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -921,7 +921,7 @@ struct file *dentry_open(const struct path *path, int flags,
 	/* We must always pass in a valid mount pointer. */
 	BUG_ON(!path->mnt);
 
-	f = get_empty_filp();
+	f = alloc_empty_file(cred);
 	if (!IS_ERR(f)) {
 		f->f_flags = flags;
 		error = vfs_open(path, f, cred);
-- 
cgit v1.2.3


From ea73ea7279884ba80896d4ea0f0443bf48b9e311 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 11 Jul 2018 15:00:04 -0400
Subject: pass ->f_flags value to alloc_empty_file()

... and have it set the f_flags-derived part of ->f_mode.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c | 8 ++++----
 fs/internal.h   | 2 +-
 fs/namei.c      | 4 +---
 fs/open.c       | 8 +++-----
 4 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 76cfa4c43e13..705f486f7007 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -101,7 +101,7 @@ int proc_nr_files(struct ctl_table *table, int write,
  * done, you will imbalance int the mount's writer count
  * and a warning at __fput() time.
  */
-struct file *alloc_empty_file(const struct cred *cred)
+struct file *alloc_empty_file(int flags, const struct cred *cred)
 {
 	static long old_max;
 	struct file *f;
@@ -135,6 +135,8 @@ struct file *alloc_empty_file(const struct cred *cred)
 	spin_lock_init(&f->f_lock);
 	mutex_init(&f->f_pos_lock);
 	eventpoll_init_file(f);
+	f->f_flags = flags;
+	f->f_mode = OPEN_FMODE(flags);
 	/* f->f_version: 0 */
 	percpu_counter_inc(&nr_files);
 	return f;
@@ -160,12 +162,10 @@ struct file *alloc_file(const struct path *path, int flags,
 {
 	struct file *file;
 
-	file = alloc_empty_file(current_cred());
+	file = alloc_empty_file(flags, current_cred());
 	if (IS_ERR(file))
 		return file;
 
-	file->f_mode = OPEN_FMODE(flags);
-	file->f_flags = flags;
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
diff --git a/fs/internal.h b/fs/internal.h
index 66473bf388e4..661c314aba30 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -93,7 +93,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern struct file *alloc_empty_file(const struct cred *);
+extern struct file *alloc_empty_file(int, const struct cred *);
 
 /*
  * super.c
diff --git a/fs/namei.c b/fs/namei.c
index af2ec1803f57..223925e30adb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3513,12 +3513,10 @@ static struct file *path_openat(struct nameidata *nd,
 	int opened = 0;
 	int error;
 
-	file = alloc_empty_file(current_cred());
+	file = alloc_empty_file(op->open_flag, current_cred());
 	if (IS_ERR(file))
 		return file;
 
-	file->f_flags = op->open_flag;
-
 	if (unlikely(file->f_flags & __O_TMPFILE)) {
 		error = do_tmpfile(nd, flags, op, file, &opened);
 		goto out2;
diff --git a/fs/open.c b/fs/open.c
index 0061f9ea044d..15d2c3ab91ff 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -742,9 +742,6 @@ static int do_dentry_open(struct file *f,
 	static const struct file_operations empty_fops = {};
 	int error;
 
-	f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
-				FMODE_PREAD | FMODE_PWRITE;
-
 	path_get(&f->f_path);
 	f->f_inode = inode;
 	f->f_mapping = inode->i_mapping;
@@ -788,6 +785,8 @@ static int do_dentry_open(struct file *f,
 	if (error)
 		goto cleanup_all;
 
+	/* normally all 3 are set; ->open() can clear them if needed */
+	f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
 	if (!open)
 		open = f->f_op->open;
 	if (open) {
@@ -921,9 +920,8 @@ struct file *dentry_open(const struct path *path, int flags,
 	/* We must always pass in a valid mount pointer. */
 	BUG_ON(!path->mnt);
 
-	f = alloc_empty_file(cred);
+	f = alloc_empty_file(flags, cred);
 	if (!IS_ERR(f)) {
-		f->f_flags = flags;
 		error = vfs_open(path, f, cred);
 		if (!error) {
 			/* from now on we need fput() to dispose of f */
-- 
cgit v1.2.3


From ae2bb293a3e8adbc54d08cede5afc22929030c03 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 10 Jul 2018 13:22:28 -0400
Subject: get rid of cred argument of vfs_open() and do_dentry_open()

always equal to ->f_cred

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h |  2 +-
 fs/namei.c    |  4 ++--
 fs/open.c     | 15 ++++++---------
 3 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/internal.h b/fs/internal.h
index 661c314aba30..baeab53aeaff 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -126,7 +126,7 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
 
 extern int open_check_o_direct(struct file *f);
-extern int vfs_open(const struct path *, struct file *, const struct cred *);
+extern int vfs_open(const struct path *, struct file *);
 
 /*
  * inode.c
diff --git a/fs/namei.c b/fs/namei.c
index 223925e30adb..3cf02804d5ff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3396,7 +3396,7 @@ finish_open_created:
 	if (error)
 		goto out;
 	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
-	error = vfs_open(&nd->path, file, current_cred());
+	error = vfs_open(&nd->path, file);
 	if (error)
 		goto out;
 	*opened |= FILE_OPENED;
@@ -3499,7 +3499,7 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
 	int error = path_lookupat(nd, flags, &path);
 	if (!error) {
 		audit_inode(nd->name, path.dentry, 0);
-		error = vfs_open(&path, file, current_cred());
+		error = vfs_open(&path, file);
 		path_put(&path);
 	}
 	return error;
diff --git a/fs/open.c b/fs/open.c
index 15d2c3ab91ff..0a9f00b7f3d5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -736,8 +736,7 @@ int open_check_o_direct(struct file *f)
 
 static int do_dentry_open(struct file *f,
 			  struct inode *inode,
-			  int (*open)(struct inode *, struct file *),
-			  const struct cred *cred)
+			  int (*open)(struct inode *, struct file *))
 {
 	static const struct file_operations empty_fops = {};
 	int error;
@@ -777,7 +776,7 @@ static int do_dentry_open(struct file *f,
 		goto cleanup_all;
 	}
 
-	error = security_file_open(f, cred);
+	error = security_file_open(f, f->f_cred);
 	if (error)
 		goto cleanup_all;
 
@@ -855,8 +854,7 @@ int finish_open(struct file *file, struct dentry *dentry,
 	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
 
 	file->f_path.dentry = dentry;
-	error = do_dentry_open(file, d_backing_inode(dentry), open,
-			       current_cred());
+	error = do_dentry_open(file, d_backing_inode(dentry), open);
 	if (!error)
 		*opened |= FILE_OPENED;
 
@@ -897,8 +895,7 @@ EXPORT_SYMBOL(file_path);
  * @file: newly allocated file with f_flag initialized
  * @cred: credentials to use
  */
-int vfs_open(const struct path *path, struct file *file,
-	     const struct cred *cred)
+int vfs_open(const struct path *path, struct file *file)
 {
 	struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
 
@@ -906,7 +903,7 @@ int vfs_open(const struct path *path, struct file *file,
 		return PTR_ERR(dentry);
 
 	file->f_path = *path;
-	return do_dentry_open(file, d_backing_inode(dentry), NULL, cred);
+	return do_dentry_open(file, d_backing_inode(dentry), NULL);
 }
 
 struct file *dentry_open(const struct path *path, int flags,
@@ -922,7 +919,7 @@ struct file *dentry_open(const struct path *path, int flags,
 
 	f = alloc_empty_file(flags, cred);
 	if (!IS_ERR(f)) {
-		error = vfs_open(path, f, cred);
+		error = vfs_open(path, f);
 		if (!error) {
 			/* from now on we need fput() to dispose of f */
 			error = open_check_o_direct(f);
-- 
cgit v1.2.3


From e3f20ae21079ecac282df65d83865c5771f4bca0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 10 Jul 2018 13:25:29 -0400
Subject: security_file_open(): lose cred argument

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c                | 2 +-
 include/linux/security.h | 5 ++---
 security/security.c      | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 0a9f00b7f3d5..4c65edefa487 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -776,7 +776,7 @@ static int do_dentry_open(struct file *f,
 		goto cleanup_all;
 	}
 
-	error = security_file_open(f, f->f_cred);
+	error = security_file_open(f);
 	if (error)
 		goto cleanup_all;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 63030c85ee19..88d30fc975e7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -309,7 +309,7 @@ void security_file_set_fowner(struct file *file);
 int security_file_send_sigiotask(struct task_struct *tsk,
 				 struct fown_struct *fown, int sig);
 int security_file_receive(struct file *file);
-int security_file_open(struct file *file, const struct cred *cred);
+int security_file_open(struct file *file);
 int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
 void security_task_free(struct task_struct *task);
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
@@ -858,8 +858,7 @@ static inline int security_file_receive(struct file *file)
 	return 0;
 }
 
-static inline int security_file_open(struct file *file,
-				     const struct cred *cred)
+static inline int security_file_open(struct file *file)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 68f46d849abe..235b35f58a65 100644
--- a/security/security.c
+++ b/security/security.c
@@ -970,11 +970,11 @@ int security_file_receive(struct file *file)
 	return call_int_hook(file_receive, 0, file);
 }
 
-int security_file_open(struct file *file, const struct cred *cred)
+int security_file_open(struct file *file)
 {
 	int ret;
 
-	ret = call_int_hook(file_open, 0, file, cred);
+	ret = call_int_hook(file_open, 0, file, file->f_cred);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From f5d11409e61dadf1f9af91b22bbedc28a60a2e2c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 02:35:08 -0400
Subject: introduce FMODE_OPENED

basically, "is that instance set up enough for regular fput(), or
do we want put_filp() for that one".

NOTE: the only alloc_file() caller that could be followed by put_filp()
is in arch/ia64/kernel/perfmon.c, which is (Kconfig-level) broken.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 1 +
 fs/open.c          | 3 ++-
 include/linux/fs.h | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 705f486f7007..d664d10acfeb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -176,6 +176,7 @@ struct file *alloc_file(const struct path *path, int flags,
 	if ((file->f_mode & FMODE_WRITE) &&
 	     likely(fop->write || fop->write_iter))
 		file->f_mode |= FMODE_CAN_WRITE;
+	file->f_mode |= FMODE_OPENED;
 	file->f_op = fop;
 	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(path->dentry->d_inode);
diff --git a/fs/open.c b/fs/open.c
index 4c65edefa487..f3c6cb6a57b9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -749,7 +749,7 @@ static int do_dentry_open(struct file *f,
 	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
 
 	if (unlikely(f->f_flags & O_PATH)) {
-		f->f_mode = FMODE_PATH;
+		f->f_mode = FMODE_PATH | FMODE_OPENED;
 		f->f_op = &empty_fops;
 		return 0;
 	}
@@ -793,6 +793,7 @@ static int do_dentry_open(struct file *f,
 		if (error)
 			goto cleanup_all;
 	}
+	f->f_mode |= FMODE_OPENED;
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
 	if ((f->f_mode & FMODE_READ) &&
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c4ca4c9c1130..05f34726e29c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -148,6 +148,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* Has write method(s) */
 #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
+#define FMODE_OPENED		((__force fmode_t)0x80000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
-- 
cgit v1.2.3


From 4d27f3266f14e4d1d13125ce32cb49a40f3122c3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 11:14:39 -0400
Subject: fold put_filp() into fput()

Just check FMODE_OPENED in __fput() and be done with that...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 15 +++++----------
 fs/namei.c           |  4 ++--
 fs/open.c            | 11 +++--------
 include/linux/file.h |  1 -
 4 files changed, 10 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index d664d10acfeb..9b70ed2bbc4e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -192,6 +192,9 @@ static void __fput(struct file *file)
 	struct vfsmount *mnt = file->f_path.mnt;
 	struct inode *inode = file->f_inode;
 
+	if (unlikely(!(file->f_mode & FMODE_OPENED)))
+		goto out;
+
 	might_sleep();
 
 	fsnotify_close(file);
@@ -221,12 +224,10 @@ static void __fput(struct file *file)
 		put_write_access(inode);
 		__mnt_drop_write(mnt);
 	}
-	file->f_path.dentry = NULL;
-	file->f_path.mnt = NULL;
-	file->f_inode = NULL;
-	file_free(file);
 	dput(dentry);
 	mntput(mnt);
+out:
+	file_free(file);
 }
 
 static LLIST_HEAD(delayed_fput_list);
@@ -301,12 +302,6 @@ void __fput_sync(struct file *file)
 
 EXPORT_SYMBOL(fput);
 
-void put_filp(struct file *file)
-{
-	if (atomic_long_dec_and_test(&file->f_count))
-		file_free(file);
-}
-
 void __init files_init(void)
 {
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/namei.c b/fs/namei.c
index 3cf02804d5ff..503c4b968415 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3531,7 +3531,7 @@ static struct file *path_openat(struct nameidata *nd,
 
 	s = path_init(nd, flags);
 	if (IS_ERR(s)) {
-		put_filp(file);
+		fput(file);
 		return ERR_CAST(s);
 	}
 	while (!(error = link_path_walk(s, nd)) &&
@@ -3547,7 +3547,7 @@ static struct file *path_openat(struct nameidata *nd,
 out2:
 	if (!(opened & FILE_OPENED)) {
 		BUG_ON(!error);
-		put_filp(file);
+		fput(file);
 	}
 	if (unlikely(error)) {
 		if (error == -EOPENSTALE) {
diff --git a/fs/open.c b/fs/open.c
index f3c6cb6a57b9..3d09b823f12b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -921,15 +921,10 @@ struct file *dentry_open(const struct path *path, int flags,
 	f = alloc_empty_file(flags, cred);
 	if (!IS_ERR(f)) {
 		error = vfs_open(path, f);
-		if (!error) {
-			/* from now on we need fput() to dispose of f */
+		if (!error)
 			error = open_check_o_direct(f);
-			if (error) {
-				fput(f);
-				f = ERR_PTR(error);
-			}
-		} else { 
-			put_filp(f);
+		if (error) {
+			fput(f);
 			f = ERR_PTR(error);
 		}
 	}
diff --git a/include/linux/file.h b/include/linux/file.h
index 6d34a1262b31..aed45d69811e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -78,7 +78,6 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
 extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
 extern void set_close_on_exec(unsigned int fd, int flag);
 extern bool get_close_on_exec(unsigned int fd);
-extern void put_filp(struct file *);
 extern int get_unused_fd_flags(unsigned flags);
 extern void put_unused_fd(unsigned int fd);
 
-- 
cgit v1.2.3


From 7c1c01ec20d61ef52dba9b6f85435e53449bea71 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 12:56:55 -0400
Subject: lift fput() on late failures into path_openat()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 503c4b968415..bb77f6cc3ea8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3407,8 +3407,6 @@ opened:
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
-	if (unlikely(error) && (*opened & FILE_OPENED))
-		fput(file);
 	if (unlikely(error > 0)) {
 		WARN_ON(1);
 		error = -EINVAL;
@@ -3484,8 +3482,6 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	if (error)
 		goto out2;
 	error = open_check_o_direct(file);
-	if (error)
-		fput(file);
 out2:
 	mnt_drop_write(path.mnt);
 out:
@@ -3545,20 +3541,20 @@ static struct file *path_openat(struct nameidata *nd,
 	}
 	terminate_walk(nd);
 out2:
-	if (!(opened & FILE_OPENED)) {
-		BUG_ON(!error);
-		fput(file);
+	if (likely(!error)) {
+		if (likely(opened & FILE_OPENED))
+			return file;
+		WARN_ON(1);
+		error = -EINVAL;
 	}
-	if (unlikely(error)) {
-		if (error == -EOPENSTALE) {
-			if (flags & LOOKUP_RCU)
-				error = -ECHILD;
-			else
-				error = -ESTALE;
-		}
-		file = ERR_PTR(error);
+	fput(file);
+	if (error == -EOPENSTALE) {
+		if (flags & LOOKUP_RCU)
+			error = -ECHILD;
+		else
+			error = -ESTALE;
 	}
-	return file;
+	return ERR_PTR(error);
 }
 
 struct file *do_filp_open(int dfd, struct filename *pathname,
-- 
cgit v1.2.3


From 69527c554f82d4bca4b154ccc06ad1554806bdc0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:01:49 -0400
Subject: now we can fold open_check_o_direct() into do_dentry_open()

These checks are better off in do_dentry_open(); the reason we couldn't
put them there used to be that callers couldn't tell what kind of cleanup
would do_dentry_open() failure call for.  Now that we have FMODE_OPENED,
cleanup is the same in all cases - it's simply fput().  So let's fold
that into do_dentry_open(), as Christoph's patch tried to.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h |  1 -
 fs/namei.c    |  7 +------
 fs/open.c     | 17 +++++------------
 3 files changed, 6 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/internal.h b/fs/internal.h
index baeab53aeaff..52a346903748 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,7 +125,6 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
 
-extern int open_check_o_direct(struct file *f);
 extern int vfs_open(const struct path *, struct file *);
 
 /*
diff --git a/fs/namei.c b/fs/namei.c
index bb77f6cc3ea8..d152cc05fdc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3401,9 +3401,7 @@ finish_open_created:
 		goto out;
 	*opened |= FILE_OPENED;
 opened:
-	error = open_check_o_direct(file);
-	if (!error)
-		error = ima_file_check(file, op->acc_mode, *opened);
+	error = ima_file_check(file, op->acc_mode, *opened);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
@@ -3479,9 +3477,6 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 		goto out2;
 	file->f_path.mnt = path.mnt;
 	error = finish_open(file, child, NULL, opened);
-	if (error)
-		goto out2;
-	error = open_check_o_direct(file);
 out2:
 	mnt_drop_write(path.mnt);
 out:
diff --git a/fs/open.c b/fs/open.c
index 3d09b823f12b..ee893240d199 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,16 +724,6 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	return ksys_fchown(fd, user, group);
 }
 
-int open_check_o_direct(struct file *f)
-{
-	/* NB: we're sure to have correct a_ops only after f_op->open */
-	if (f->f_flags & O_DIRECT) {
-		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
-			return -EINVAL;
-	}
-	return 0;
-}
-
 static int do_dentry_open(struct file *f,
 			  struct inode *inode,
 			  int (*open)(struct inode *, struct file *))
@@ -808,6 +798,11 @@ static int do_dentry_open(struct file *f,
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
 
+	/* NB: we're sure to have correct a_ops only after f_op->open */
+	if (f->f_flags & O_DIRECT) {
+		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
+			return -EINVAL;
+	}
 	return 0;
 
 cleanup_all:
@@ -921,8 +916,6 @@ struct file *dentry_open(const struct path *path, int flags,
 	f = alloc_empty_file(flags, cred);
 	if (!IS_ERR(f)) {
 		error = vfs_open(path, f);
-		if (!error)
-			error = open_check_o_direct(f);
 		if (error) {
 			fput(f);
 			f = ERR_PTR(error);
-- 
cgit v1.2.3


From aad888f828fec1e7160b67f122172e7ab7f82e03 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 12:58:04 -0400
Subject: switch all remaining checks for FILE_OPENED to FMODE_OPENED

... and don't bother with setting FILE_OPENED at all.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/inode.c | 2 +-
 fs/namei.c      | 7 ++-----
 fs/open.c       | 9 ++-------
 3 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index feda55f67050..67c588edf8d8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1250,7 +1250,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (d != NULL)
 		dentry = d;
 	if (d_really_is_positive(dentry)) {
-		if (!(*opened & FILE_OPENED))
+		if (!(file->f_mode & FMODE_OPENED))
 			return finish_no_open(file, d);
 		dput(d);
 		return 0;
diff --git a/fs/namei.c b/fs/namei.c
index d152cc05fdc3..8a1ae074c1c1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3395,11 +3395,10 @@ finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto out;
-	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
 	error = vfs_open(&nd->path, file);
 	if (error)
 		goto out;
-	*opened |= FILE_OPENED;
 opened:
 	error = ima_file_check(file, op->acc_mode, *opened);
 	if (!error && will_truncate)
@@ -3515,8 +3514,6 @@ static struct file *path_openat(struct nameidata *nd,
 
 	if (unlikely(file->f_flags & O_PATH)) {
 		error = do_o_path(nd, flags, file);
-		if (!error)
-			opened |= FILE_OPENED;
 		goto out2;
 	}
 
@@ -3537,7 +3534,7 @@ static struct file *path_openat(struct nameidata *nd,
 	terminate_walk(nd);
 out2:
 	if (likely(!error)) {
-		if (likely(opened & FILE_OPENED))
+		if (likely(file->f_mode & FMODE_OPENED))
 			return file;
 		WARN_ON(1);
 		error = -EINVAL;
diff --git a/fs/open.c b/fs/open.c
index ee893240d199..d2030a3c5c52 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -846,15 +846,10 @@ int finish_open(struct file *file, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *),
 		int *opened)
 {
-	int error;
-	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
 
 	file->f_path.dentry = dentry;
-	error = do_dentry_open(file, d_backing_inode(dentry), open);
-	if (!error)
-		*opened |= FILE_OPENED;
-
-	return error;
+	return do_dentry_open(file, d_backing_inode(dentry), open);
 }
 EXPORT_SYMBOL(finish_open);
 
-- 
cgit v1.2.3


From 73a09dd94377e4b186b300bd5461920710c7c3d5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:22:02 -0400
Subject: introduce FMODE_CREATED and switch to it

Parallel to FILE_CREATED, goes into ->f_mode instead of *opened.
NFS is a bit of a wart here - it doesn't have file at the point
where FILE_CREATED used to be set, so we need to propagate it
there (for now).  IMA is another one (here and everywhere)...

Note that this needs do_dentry_open() to leave old bits in ->f_mode
alone - we want it to preserve FMODE_CREATED if it had been already
set (no other bit can be there).

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      |  2 +-
 fs/9p/vfs_inode_dotl.c |  2 +-
 fs/ceph/file.c         |  2 +-
 fs/cifs/dir.c          |  2 +-
 fs/fuse/dir.c          |  2 +-
 fs/gfs2/inode.c        |  2 +-
 fs/namei.c             | 15 ++++++++-------
 fs/nfs/dir.c           |  5 ++++-
 fs/nfs/nfs4proc.c      |  2 +-
 include/linux/fs.h     |  1 +
 10 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 42e102e2e74a..566929792480 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -925,7 +925,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(d_inode(dentry), file);
 
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 out:
 	dput(res);
 	return err;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 7f6ae21a27b3..ee65db5c7eb0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -358,7 +358,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	file->private_data = ofid;
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(inode, file);
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 out:
 	v9fs_put_acl(dacl, pacl);
 	dput(res);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ad0bed99b1d5..38a63fff7903 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -507,7 +507,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		dout("atomic_open finish_open on dn %p\n", dn);
 		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
 			ceph_init_inode_acls(d_inode(dentry), &acls);
-			*opened |= FILE_CREATED;
+			file->f_mode |= FMODE_CREATED;
 		}
 		err = finish_open(file, dentry, ceph_open, opened);
 	}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ddae52bd1993..21d7e393900e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -539,7 +539,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	}
 
 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 
 	rc = finish_open(file, direntry, generic_file_open, opened);
 	if (rc) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..d4bdcf51e6cb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -508,7 +508,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 		goto no_open;
 
 	/* Only creates */
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 
 	if (fc->no_create)
 		goto mknod;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 67c588edf8d8..4aba00a6004b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -767,7 +767,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	mark_inode_dirty(inode);
 	d_instantiate(dentry, inode);
 	if (file) {
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 		error = finish_open(file, dentry, gfs2_open_common, opened);
 	}
 	gfs2_glock_dq_uninit(ghs);
diff --git a/fs/namei.c b/fs/namei.c
index 8a1ae074c1c1..4bd7cc0d7522 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3061,7 +3061,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 		 * permission here.
 		 */
 		int acc_mode = op->acc_mode;
-		if (*opened & FILE_CREATED) {
+		if (file->f_mode & FMODE_CREATED) {
 			WARN_ON(!(open_flag & O_CREAT));
 			fsnotify_create(dir, dentry);
 			acc_mode = 0;
@@ -3077,7 +3077,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 				dput(dentry);
 				dentry = file->f_path.dentry;
 			}
-			if (*opened & FILE_CREATED)
+			if (file->f_mode & FMODE_CREATED)
 				fsnotify_create(dir, dentry);
 			if (unlikely(d_is_negative(dentry))) {
 				error = -ENOENT;
@@ -3126,7 +3126,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 	if (unlikely(IS_DEADDIR(dir_inode)))
 		return -ENOENT;
 
-	*opened &= ~FILE_CREATED;
+	file->f_mode &= ~FMODE_CREATED;
 	dentry = d_lookup(dir, &nd->last);
 	for (;;) {
 		if (!dentry) {
@@ -3211,7 +3211,7 @@ no_open:
 
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode && (open_flag & O_CREAT)) {
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 		audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
 		if (!dir_inode->i_op->create) {
 			error = -EACCES;
@@ -3318,7 +3318,7 @@ static int do_last(struct nameidata *nd,
 		if (error)
 			goto out;
 
-		if ((*opened & FILE_CREATED) ||
+		if ((file->f_mode & FMODE_CREATED) ||
 		    !S_ISREG(file_inode(file)->i_mode))
 			will_truncate = false;
 
@@ -3326,7 +3326,7 @@ static int do_last(struct nameidata *nd,
 		goto opened;
 	}
 
-	if (*opened & FILE_CREATED) {
+	if (file->f_mode & FMODE_CREATED) {
 		/* Don't check for write permission, don't truncate */
 		open_flag &= ~O_TRUNC;
 		will_truncate = false;
@@ -3400,7 +3400,8 @@ finish_open_created:
 	if (error)
 		goto out;
 opened:
-	error = ima_file_check(file, op->acc_mode, *opened);
+	error = ima_file_check(file, op->acc_mode,
+				file->f_mode & FMODE_CREATED ? FILE_CREATED : 0);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7a9c14426855..0ac50983fc4e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1461,6 +1461,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	unsigned int lookup_flags = 0;
 	bool switched = false;
+	int created = 0;
 	int err;
 
 	/* Expect a negative dentry */
@@ -1521,7 +1522,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
-	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
+	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
+	if (created)
+		file->f_mode |= FMODE_CREATED;
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ed45090e4df6..2c4df0ffbca1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2951,7 +2951,7 @@ static int _nfs4_do_open(struct inode *dir,
 		}
 	}
 	if (opened && opendata->file_created)
-		*opened |= FILE_CREATED;
+		*opened = 1;
 
 	if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
 		*ctx_th = opendata->f_attr.mdsthreshold;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05f34726e29c..ca668c7e48a7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -149,6 +149,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 #define FMODE_OPENED		((__force fmode_t)0x80000)
+#define FMODE_CREATED		((__force fmode_t)0x100000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
-- 
cgit v1.2.3


From 6035a27b25ab9dadc8c3d5c5df5eae3fca62fc95 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:40:10 -0400
Subject: IMA: don't propagate opened through the entire thing

just check ->f_mode in ima_appraise_measurement()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                            |  3 +--
 fs/nfsd/vfs.c                         |  2 +-
 include/linux/ima.h                   |  4 ++--
 security/integrity/ima/ima.h          |  4 ++--
 security/integrity/ima/ima_appraise.c |  4 ++--
 security/integrity/ima/ima_main.c     | 16 ++++++++--------
 6 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 4bd7cc0d7522..d2aeb282ed05 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3400,8 +3400,7 @@ finish_open_created:
 	if (error)
 		goto out;
 opened:
-	error = ima_file_check(file, op->acc_mode,
-				file->f_mode & FMODE_CREATED ? FILE_CREATED : 0);
+	error = ima_file_check(file, op->acc_mode);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b0555d7d8200..55a099e47ba2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -763,7 +763,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 		goto out_nfserr;
 	}
 
-	host_err = ima_file_check(file, may_flags, 0);
+	host_err = ima_file_check(file, may_flags);
 	if (host_err) {
 		fput(file);
 		goto out_nfserr;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e4647e0eb60..d9ba3fc363b7 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -16,7 +16,7 @@ struct linux_binprm;
 
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_file_check(struct file *file, int mask, int opened);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
@@ -34,7 +34,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
-static inline int ima_file_check(struct file *file, int mask, int opened)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 354bb5716ce3..e4c1a236976c 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -238,7 +238,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 			     struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len, int opened);
+			     int xattr_len);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -254,7 +254,7 @@ static inline int ima_appraise_measurement(enum ima_hooks func,
 					   struct file *file,
 					   const unsigned char *filename,
 					   struct evm_ima_xattr_data *xattr_value,
-					   int xattr_len, int opened)
+					   int xattr_len)
 {
 	return INTEGRITY_UNKNOWN;
 }
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 8bd7a0733e51..deec1804a00a 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -212,7 +212,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 			     struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len, int opened)
+			     int xattr_len)
 {
 	static const char op[] = "appraise_data";
 	const char *cause = "unknown";
@@ -231,7 +231,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 		cause = iint->flags & IMA_DIGSIG_REQUIRED ?
 				"IMA-signature-required" : "missing-hash";
 		status = INTEGRITY_NOLABEL;
-		if (opened & FILE_CREATED)
+		if (file->f_mode & FMODE_CREATED)
 			iint->flags |= IMA_NEW_FILE;
 		if ((iint->flags & IMA_NEW_FILE) &&
 		    (!(iint->flags & IMA_DIGSIG_REQUIRED) ||
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index dca44cf7838e..b286f37712d5 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -168,7 +168,7 @@ void ima_file_free(struct file *file)
 
 static int process_measurement(struct file *file, const struct cred *cred,
 			       u32 secid, char *buf, loff_t size, int mask,
-			       enum ima_hooks func, int opened)
+			       enum ima_hooks func)
 {
 	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint = NULL;
@@ -294,7 +294,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
 	if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
 		inode_lock(inode);
 		rc = ima_appraise_measurement(func, iint, file, pathname,
-					      xattr_value, xattr_len, opened);
+					      xattr_value, xattr_len);
 		inode_unlock(inode);
 	}
 	if (action & IMA_AUDIT)
@@ -338,7 +338,7 @@ int ima_file_mmap(struct file *file, unsigned long prot)
 	if (file && (prot & PROT_EXEC)) {
 		security_task_getsecid(current, &secid);
 		return process_measurement(file, current_cred(), secid, NULL,
-					   0, MAY_EXEC, MMAP_CHECK, 0);
+					   0, MAY_EXEC, MMAP_CHECK);
 	}
 
 	return 0;
@@ -364,13 +364,13 @@ int ima_bprm_check(struct linux_binprm *bprm)
 
 	security_task_getsecid(current, &secid);
 	ret = process_measurement(bprm->file, current_cred(), secid, NULL, 0,
-				  MAY_EXEC, BPRM_CHECK, 0);
+				  MAY_EXEC, BPRM_CHECK);
 	if (ret)
 		return ret;
 
 	security_cred_getsecid(bprm->cred, &secid);
 	return process_measurement(bprm->file, bprm->cred, secid, NULL, 0,
-				   MAY_EXEC, CREDS_CHECK, 0);
+				   MAY_EXEC, CREDS_CHECK);
 }
 
 /**
@@ -383,14 +383,14 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * On success return 0.  On integrity appraisal error, assuming the file
  * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
  */
-int ima_file_check(struct file *file, int mask, int opened)
+int ima_file_check(struct file *file, int mask)
 {
 	u32 secid;
 
 	security_task_getsecid(current, &secid);
 	return process_measurement(file, current_cred(), secid, NULL, 0,
 				   mask & (MAY_READ | MAY_WRITE | MAY_EXEC |
-					   MAY_APPEND), FILE_CHECK, opened);
+					   MAY_APPEND), FILE_CHECK);
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
@@ -493,7 +493,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 	func = read_idmap[read_id] ?: FILE_CHECK;
 	security_task_getsecid(current, &secid);
 	return process_measurement(file, current_cred(), secid, buf, size,
-				   MAY_READ, func, 0);
+				   MAY_READ, func);
 }
 
 static int __init init_ima(void)
-- 
cgit v1.2.3


From be12af3ef5e61ebc44d065e121424ac605d7bb8e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:44:56 -0400
Subject: getting rid of 'opened' argument of ->atomic_open() - part 1

'opened' argument of finish_open() is unused.  Kill it.

Signed-off-by Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 2 +-
 fs/9p/vfs_inode_dotl.c | 2 +-
 fs/ceph/file.c         | 2 +-
 fs/cifs/dir.c          | 2 +-
 fs/fuse/dir.c          | 2 +-
 fs/gfs2/inode.c        | 6 +++---
 fs/namei.c             | 2 +-
 fs/nfs/dir.c           | 2 +-
 fs/open.c              | 3 +--
 include/linux/fs.h     | 3 +--
 10 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 566929792480..7b6ff3275d9c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -917,7 +917,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		v9inode->writeback_fid = (void *) inode_fid;
 	}
 	mutex_unlock(&v9inode->v_mutex);
-	err = finish_open(file, dentry, generic_file_open, opened);
+	err = finish_open(file, dentry, generic_file_open);
 	if (err)
 		goto error;
 
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ee65db5c7eb0..c6939b7cb18c 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -352,7 +352,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	}
 	mutex_unlock(&v9inode->v_mutex);
 	/* Since we are opening a file, assign the open fid to the file */
-	err = finish_open(file, dentry, generic_file_open, opened);
+	err = finish_open(file, dentry, generic_file_open);
 	if (err)
 		goto err_clunk_old_fid;
 	file->private_data = ofid;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 38a63fff7903..38b28cb2fac1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -509,7 +509,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			ceph_init_inode_acls(d_inode(dentry), &acls);
 			file->f_mode |= FMODE_CREATED;
 		}
-		err = finish_open(file, dentry, ceph_open, opened);
+		err = finish_open(file, dentry, ceph_open);
 	}
 out_req:
 	if (!req->r_err && req->r_target_inode)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 21d7e393900e..891bfd62e67a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -541,7 +541,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 		file->f_mode |= FMODE_CREATED;
 
-	rc = finish_open(file, direntry, generic_file_open, opened);
+	rc = finish_open(file, direntry, generic_file_open);
 	if (rc) {
 		if (server->ops->close)
 			server->ops->close(xid, tcon, &fid);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d4bdcf51e6cb..a5b1f5ff8cb7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -469,7 +469,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	d_instantiate(entry, inode);
 	fuse_change_entry_timeout(entry, &outentry);
 	fuse_invalidate_attr(dir);
-	err = finish_open(file, entry, generic_file_open, opened);
+	err = finish_open(file, entry, generic_file_open);
 	if (err) {
 		fuse_sync_release(ff, flags);
 	} else {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4aba00a6004b..59f695e96d63 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -626,7 +626,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		error = 0;
 		if (file) {
 			if (S_ISREG(inode->i_mode))
-				error = finish_open(file, dentry, gfs2_open_common, opened);
+				error = finish_open(file, dentry, gfs2_open_common);
 			else
 				error = finish_no_open(file, NULL);
 		}
@@ -768,7 +768,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	d_instantiate(dentry, inode);
 	if (file) {
 		file->f_mode |= FMODE_CREATED;
-		error = finish_open(file, dentry, gfs2_open_common, opened);
+		error = finish_open(file, dentry, gfs2_open_common);
 	}
 	gfs2_glock_dq_uninit(ghs);
 	gfs2_glock_dq_uninit(ghs + 1);
@@ -866,7 +866,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
 		return d;
 	}
 	if (file && S_ISREG(inode->i_mode))
-		error = finish_open(file, dentry, gfs2_open_common, opened);
+		error = finish_open(file, dentry, gfs2_open_common);
 
 	gfs2_glock_dq_uninit(&gh);
 	if (error) {
diff --git a/fs/namei.c b/fs/namei.c
index d2aeb282ed05..117b118853f2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3475,7 +3475,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	if (error)
 		goto out2;
 	file->f_path.mnt = path.mnt;
-	error = finish_open(file, child, NULL, opened);
+	error = finish_open(file, child, NULL);
 out2:
 	mnt_drop_write(path.mnt);
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0ac50983fc4e..22176a3818d5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1439,7 +1439,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 {
 	int err;
 
-	err = finish_open(file, dentry, do_open, opened);
+	err = finish_open(file, dentry, do_open);
 	if (err)
 		goto out;
 	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
diff --git a/fs/open.c b/fs/open.c
index d2030a3c5c52..dbaac9efc7fc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -843,8 +843,7 @@ cleanup_file:
  * Returns zero on success or -errno if the open failed.
  */
 int finish_open(struct file *file, struct dentry *dentry,
-		int (*open)(struct inode *, struct file *),
-		int *opened)
+		int (*open)(struct inode *, struct file *))
 {
 	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca668c7e48a7..70be3e4c26ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2441,8 +2441,7 @@ enum {
 	FILE_OPENED = 2
 };
 extern int finish_open(struct file *file, struct dentry *dentry,
-			int (*open)(struct inode *, struct file *),
-			int *opened);
+			int (*open)(struct inode *, struct file *));
 extern int finish_no_open(struct file *file, struct dentry *dentry);
 
 /* fs/ioctl.c */
-- 
cgit v1.2.3


From b452a458caaa95d02b74897d35e87aa080122f07 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:06:28 -0400
Subject: getting rid of 'opened' argument of ->atomic_open() - part 2

__gfs2_lookup(), gfs2_create_inode(), nfs_finish_open() and fuse_create_open()
don't need 'opened' anymore.  Get rid of that argument in those.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/dir.c   |  4 ++--
 fs/gfs2/inode.c | 19 +++++++++----------
 fs/nfs/dir.c    |  5 ++---
 3 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index a5b1f5ff8cb7..b8d7e9d423c8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -399,7 +399,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
  */
 static int fuse_create_open(struct inode *dir, struct dentry *entry,
 			    struct file *file, unsigned flags,
-			    umode_t mode, int *opened)
+			    umode_t mode)
 {
 	int err;
 	struct inode *inode;
@@ -513,7 +513,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 	if (fc->no_create)
 		goto mknod;
 
-	err = fuse_create_open(dir, entry, file, flags, mode, opened);
+	err = fuse_create_open(dir, entry, file, flags, mode);
 	if (err == -ENOSYS) {
 		fc->no_create = 1;
 		goto mknod;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 59f695e96d63..15e2a8a3b917 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -580,7 +580,7 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
 static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 			     struct file *file,
 			     umode_t mode, dev_t dev, const char *symname,
-			     unsigned int size, int excl, int *opened)
+			     unsigned int size, int excl)
 {
 	const struct qstr *name = &dentry->d_name;
 	struct posix_acl *default_acl, *acl;
@@ -822,7 +822,7 @@ fail:
 static int gfs2_create(struct inode *dir, struct dentry *dentry,
 		       umode_t mode, bool excl)
 {
-	return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL);
+	return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl);
 }
 
 /**
@@ -830,14 +830,13 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
  * @dir: The directory inode
  * @dentry: The dentry of the new inode
  * @file: File to be opened
- * @opened: atomic_open flags
  *
  *
  * Returns: errno
  */
 
 static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
-				    struct file *file, int *opened)
+				    struct file *file)
 {
 	struct inode *inode;
 	struct dentry *d;
@@ -879,7 +878,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
 static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
 				  unsigned flags)
 {
-	return __gfs2_lookup(dir, dentry, NULL, NULL);
+	return __gfs2_lookup(dir, dentry, NULL);
 }
 
 /**
@@ -1189,7 +1188,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 	if (size >= gfs2_max_stuffed_size(GFS2_I(dir)))
 		return -ENAMETOOLONG;
 
-	return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL);
+	return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0);
 }
 
 /**
@@ -1204,7 +1203,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
-	return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL);
+	return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0);
 }
 
 /**
@@ -1219,7 +1218,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      dev_t dev)
 {
-	return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL);
+	return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0);
 }
 
 /**
@@ -1244,7 +1243,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (!d_in_lookup(dentry))
 		goto skip_lookup;
 
-	d = __gfs2_lookup(dir, dentry, file, opened);
+	d = __gfs2_lookup(dir, dentry, file);
 	if (IS_ERR(d))
 		return PTR_ERR(d);
 	if (d != NULL)
@@ -1262,7 +1261,7 @@ skip_lookup:
 	if (!(flags & O_CREAT))
 		return -ENOENT;
 
-	return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened);
+	return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl);
 }
 
 /*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 22176a3818d5..71ae3cc3e53a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1434,8 +1434,7 @@ static int do_open(struct inode *inode, struct file *filp)
 
 static int nfs_finish_open(struct nfs_open_context *ctx,
 			   struct dentry *dentry,
-			   struct file *file, unsigned open_flags,
-			   int *opened)
+			   struct file *file, unsigned open_flags)
 {
 	int err;
 
@@ -1549,7 +1548,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		goto out;
 	}
 
-	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened);
+	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
 	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
 	put_nfs_open_context(ctx);
 out:
-- 
cgit v1.2.3


From 44907d79002466049fdbb8ef15730d185e0808b4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:32:02 -0400
Subject: get rid of 'opened' argument of ->atomic_open() - part 3

now it can be done...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 3 +--
 fs/9p/vfs_inode_dotl.c | 3 +--
 fs/bad_inode.c         | 2 +-
 fs/ceph/file.c         | 3 +--
 fs/ceph/super.h        | 3 +--
 fs/cifs/cifsfs.h       | 3 +--
 fs/cifs/dir.c          | 3 +--
 fs/fuse/dir.c          | 2 +-
 fs/gfs2/inode.c        | 3 +--
 fs/namei.c             | 3 +--
 fs/nfs/dir.c           | 2 +-
 fs/nfs/nfs4_fs.h       | 2 +-
 include/linux/fs.h     | 2 +-
 13 files changed, 13 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7b6ff3275d9c..85ff859d3af5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -859,8 +859,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 static int
 v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct file *file, unsigned flags, umode_t mode,
-		     int *opened)
+		     struct file *file, unsigned flags, umode_t mode)
 {
 	int err;
 	u32 perm;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c6939b7cb18c..4823e1c46999 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -241,8 +241,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 
 static int
 v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
-			  struct file *file, unsigned flags, umode_t omode,
-			  int *opened)
+			  struct file *file, unsigned flags, umode_t omode)
 {
 	int err = 0;
 	kgid_t gid;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 125e8bbd22a2..8035d2a44561 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -134,7 +134,7 @@ static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
 
 static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
 				 struct file *file, unsigned int open_flag,
-				 umode_t create_mode, int *opened)
+				 umode_t create_mode)
 {
 	return -EIO;
 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 38b28cb2fac1..e2679e8a2535 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -429,8 +429,7 @@ out:
  * file or symlink, return 1 so the VFS can retry.
  */
 int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct file *file, unsigned flags, umode_t mode,
-		     int *opened)
+		     struct file *file, unsigned flags, umode_t mode)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a7077a0c989f..971328b99ede 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1025,8 +1025,7 @@ extern const struct file_operations ceph_file_fops;
 extern int ceph_renew_caps(struct inode *inode);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-			    struct file *file, unsigned flags, umode_t mode,
-			    int *opened);
+			    struct file *file, unsigned flags, umode_t mode);
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 				  char *data, size_t len);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5f0231803431..f3a78efc3109 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -65,8 +65,7 @@ extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       bool excl);
 extern int cifs_atomic_open(struct inode *, struct dentry *,
-			    struct file *, unsigned, umode_t,
-			    int *);
+			    struct file *, unsigned, umode_t);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  unsigned int);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 891bfd62e67a..3713d22b95a7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -465,8 +465,7 @@ out_err:
 
 int
 cifs_atomic_open(struct inode *inode, struct dentry *direntry,
-		 struct file *file, unsigned oflags, umode_t mode,
-		 int *opened)
+		 struct file *file, unsigned oflags, umode_t mode)
 {
 	int rc;
 	unsigned int xid;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b8d7e9d423c8..c979329311c8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -489,7 +489,7 @@ out_err:
 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 			    struct file *file, unsigned flags,
-			    umode_t mode, int *opened)
+			    umode_t mode)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 15e2a8a3b917..648f0ca1ad57 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1228,14 +1228,13 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
  * @file: The proposed new struct file
  * @flags: open flags
  * @mode: File mode
- * @opened: Flag to say whether the file has been opened or not
  *
  * Returns: error code or 0 for success
  */
 
 static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags,
-			    umode_t mode, int *opened)
+			    umode_t mode)
 {
 	struct dentry *d;
 	bool excl = !!(flags & O_EXCL);
diff --git a/fs/namei.c b/fs/namei.c
index 117b118853f2..1da272bf8ed3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3052,8 +3052,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 	file->f_path.dentry = DENTRY_NOT_SET;
 	file->f_path.mnt = nd->path.mnt;
 	error = dir->i_op->atomic_open(dir, dentry, file,
-				       open_to_namei_flags(open_flag),
-				       mode, opened);
+				       open_to_namei_flags(open_flag), mode);
 	d_lookup_done(dentry);
 	if (!error) {
 		/*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 71ae3cc3e53a..f447b1a24350 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1451,7 +1451,7 @@ out:
 
 int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		    struct file *file, unsigned open_flags,
-		    umode_t mode, int *opened)
+		    umode_t mode)
 {
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	struct nfs_open_context *ctx;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 137e18abb7e7..51beb6e38c90 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,7 +258,7 @@ extern const struct dentry_operations nfs4_dentry_operations;
 
 /* dir.c */
 int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
-		    unsigned, umode_t, int *);
+		    unsigned, umode_t);
 
 /* super.c */
 extern struct file_system_type nfs4_fs_type;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70be3e4c26ac..c25896b30e9f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1781,7 +1781,7 @@ struct inode_operations {
 	int (*update_time)(struct inode *, struct timespec64 *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
 			   struct file *, unsigned open_flag,
-			   umode_t create_mode, int *opened);
+			   umode_t create_mode);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
-- 
cgit v1.2.3


From 3ec2eef116e900099edc6d31a1a0423166e2906d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:43:47 -0400
Subject: get rid of 'opened' in path_openat() and the helpers downstream

unused now

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1da272bf8ed3..6def3e148f90 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3036,8 +3036,7 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
 static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 			struct path *path, struct file *file,
 			const struct open_flags *op,
-			int open_flag, umode_t mode,
-			int *opened)
+			int open_flag, umode_t mode)
 {
 	struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
 	struct inode *dir =  nd->path.dentry->d_inode;
@@ -3105,14 +3104,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
  * specified then a negative dentry may be returned.
  *
  * An error code is returned otherwise.
- *
- * FILE_CREATE will be set in @*opened if the dentry was created and will be
- * cleared otherwise prior to returning.
  */
 static int lookup_open(struct nameidata *nd, struct path *path,
 			struct file *file,
 			const struct open_flags *op,
-			bool got_write, int *opened)
+			bool got_write)
 {
 	struct dentry *dir = nd->path.dentry;
 	struct inode *dir_inode = dir->d_inode;
@@ -3187,7 +3183,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 
 	if (dir_inode->i_op->atomic_open) {
 		error = atomic_open(nd, dentry, path, file, op, open_flag,
-				    mode, opened);
+				    mode);
 		if (unlikely(error == -ENOENT) && create_error)
 			error = create_error;
 		return error;
@@ -3240,8 +3236,7 @@ out_dput:
  * Handle the last step of open()
  */
 static int do_last(struct nameidata *nd,
-		   struct file *file, const struct open_flags *op,
-		   int *opened)
+		   struct file *file, const struct open_flags *op)
 {
 	struct dentry *dir = nd->path.dentry;
 	int open_flag = op->open_flag;
@@ -3307,7 +3302,7 @@ static int do_last(struct nameidata *nd,
 		inode_lock(dir->d_inode);
 	else
 		inode_lock_shared(dir->d_inode);
-	error = lookup_open(nd, &path, file, op, got_write, opened);
+	error = lookup_open(nd, &path, file, op, got_write);
 	if (open_flag & O_CREAT)
 		inode_unlock(dir->d_inode);
 	else
@@ -3452,7 +3447,7 @@ EXPORT_SYMBOL(vfs_tmpfile);
 
 static int do_tmpfile(struct nameidata *nd, unsigned flags,
 		const struct open_flags *op,
-		struct file *file, int *opened)
+		struct file *file)
 {
 	struct dentry *child;
 	struct path path;
@@ -3499,7 +3494,6 @@ static struct file *path_openat(struct nameidata *nd,
 {
 	const char *s;
 	struct file *file;
-	int opened = 0;
 	int error;
 
 	file = alloc_empty_file(op->open_flag, current_cred());
@@ -3507,7 +3501,7 @@ static struct file *path_openat(struct nameidata *nd,
 		return file;
 
 	if (unlikely(file->f_flags & __O_TMPFILE)) {
-		error = do_tmpfile(nd, flags, op, file, &opened);
+		error = do_tmpfile(nd, flags, op, file);
 		goto out2;
 	}
 
@@ -3522,7 +3516,7 @@ static struct file *path_openat(struct nameidata *nd,
 		return ERR_CAST(s);
 	}
 	while (!(error = link_path_walk(s, nd)) &&
-		(error = do_last(nd, file, op, &opened)) > 0) {
+		(error = do_last(nd, file, op)) > 0) {
 		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
 		s = trailing_symlink(nd);
 		if (IS_ERR(s)) {
-- 
cgit v1.2.3


From 64e1ac4d46f9f5d8284aefb97e1b550dbb26abe8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 19:17:52 -0400
Subject: ->atomic_open(): return 0 in all success cases

FMODE_OPENED can be used to distingusish "successful open" from the
"called finish_no_open(), do it yourself" cases.  Since finish_no_open()
has been adjusted, no changes in the instances were actually needed.
The caller has been adjusted.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 30 +++++++++++++++---------------
 fs/open.c  |  4 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 6def3e148f90..e213e7bf028a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3054,21 +3054,21 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 				       open_to_namei_flags(open_flag), mode);
 	d_lookup_done(dentry);
 	if (!error) {
-		/*
-		 * We didn't have the inode before the open, so check open
-		 * permission here.
-		 */
-		int acc_mode = op->acc_mode;
-		if (file->f_mode & FMODE_CREATED) {
-			WARN_ON(!(open_flag & O_CREAT));
-			fsnotify_create(dir, dentry);
-			acc_mode = 0;
-		}
-		error = may_open(&file->f_path, acc_mode, open_flag);
-		if (WARN_ON(error > 0))
-			error = -EINVAL;
-	} else if (error > 0) {
-		if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
+		if (file->f_mode & FMODE_OPENED) {
+			/*
+			 * We didn't have the inode before the open, so check open
+			 * permission here.
+			 */
+			int acc_mode = op->acc_mode;
+			if (file->f_mode & FMODE_CREATED) {
+				WARN_ON(!(open_flag & O_CREAT));
+				fsnotify_create(dir, dentry);
+				acc_mode = 0;
+			}
+			error = may_open(&file->f_path, acc_mode, open_flag);
+			if (WARN_ON(error > 0))
+				error = -EINVAL;
+		} else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
 			error = -EIO;
 		} else {
 			if (file->f_path.dentry) {
diff --git a/fs/open.c b/fs/open.c
index dbaac9efc7fc..d98e19239bb7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -863,13 +863,13 @@ EXPORT_SYMBOL(finish_open);
  * NB: unlike finish_open() this function does consume the dentry reference and
  * the caller need not dput() it.
  *
- * Returns "1" which must be the return value of ->atomic_open() after having
+ * Returns "0" which must be the return value of ->atomic_open() after having
  * called this function.
  */
 int finish_no_open(struct file *file, struct dentry *dentry)
 {
 	file->f_path.dentry = dentry;
-	return 1;
+	return 0;
 }
 EXPORT_SYMBOL(finish_no_open);
 
-- 
cgit v1.2.3


From 00a07c1591a3fb3d71f7083361eab4a2444938a3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 19:30:20 -0400
Subject: switch atomic_open() and lookup_open() to returning 0 in all success
 cases

caller can tell "opened" from "open it yourself" by looking at ->f_mode.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index e213e7bf028a..8311dce1c649 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3027,9 +3027,9 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
  * Returns 0 if successful.  The file will have been created and attached to
  * @file by the filesystem calling finish_open().
  *
- * Returns 1 if the file was looked up only or didn't need creating.  The
- * caller will need to perform the open themselves.  @path will have been
- * updated to point to the new dentry.  This may be negative.
+ * If the file was looked up only or didn't need creating, FMODE_OPENED won't
+ * be set.  The caller will need to perform the open themselves.  @path will
+ * have been updated to point to the new dentry.  This may be negative.
  *
  * Returns an error code otherwise.
  */
@@ -3082,7 +3082,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 			} else {
 				path->dentry = dentry;
 				path->mnt = nd->path.mnt;
-				return 1;
+				return 0;
 			}
 		}
 	}
@@ -3093,17 +3093,17 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 /*
  * Look up and maybe create and open the last component.
  *
- * Must be called with i_mutex held on parent.
- *
- * Returns 0 if the file was successfully atomically created (if necessary) and
- * opened.  In this case the file will be returned attached to @file.
+ * Must be called with parent locked (exclusive in O_CREAT case).
  *
- * Returns 1 if the file was not completely opened at this time, though lookups
- * and creations will have been performed and the dentry returned in @path will
- * be positive upon return if O_CREAT was specified.  If O_CREAT wasn't
- * specified then a negative dentry may be returned.
+ * Returns 0 on success, that is, if
+ *  the file was successfully atomically created (if necessary) and opened, or
+ *  the file was not completely opened at this time, though lookups and
+ *  creations were performed.
+ * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
+ * In the latter case dentry returned in @path might be negative if O_CREAT
+ * hadn't been specified.
  *
- * An error code is returned otherwise.
+ * An error code is returned on failure.
  */
 static int lookup_open(struct nameidata *nd, struct path *path,
 			struct file *file,
@@ -3225,7 +3225,7 @@ no_open:
 out_no_open:
 	path->dentry = dentry;
 	path->mnt = nd->path.mnt;
-	return 1;
+	return 0;
 
 out_dput:
 	dput(dentry);
@@ -3308,10 +3308,10 @@ static int do_last(struct nameidata *nd,
 	else
 		inode_unlock_shared(dir->d_inode);
 
-	if (error <= 0) {
-		if (error)
-			goto out;
+	if (error)
+		goto out;
 
+	if (file->f_mode & FMODE_OPENED) {
 		if ((file->f_mode & FMODE_CREATED) ||
 		    !S_ISREG(file_inode(file)->i_mode))
 			will_truncate = false;
-- 
cgit v1.2.3


From d93aa9d82aea80b80f225dbf9c7986df444d8106 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2018 09:40:05 -0400
Subject: new wrapper: alloc_file_pseudo()

takes inode, vfsmount, name, O_... flags and file_operations and
either returns a new struct file (in which case inode reference we
held is consumed) or returns ERR_PTR(), in which case no refcounts
are altered.

converted aio_private_file() and sock_alloc_file() to it

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c             | 20 ++++----------------
 fs/file_table.c      | 27 +++++++++++++++++++++++++++
 include/linux/file.h |  3 +++
 net/socket.c         | 28 +++++-----------------------
 4 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 9eea53887d6c..c3a8bac16374 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -215,9 +215,7 @@ static const struct address_space_operations aio_ctx_aops;
 
 static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 {
-	struct qstr this = QSTR_INIT("[aio]", 5);
 	struct file *file;
-	struct path path;
 	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
@@ -226,27 +224,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 	inode->i_mapping->private_data = ctx;
 	inode->i_size = PAGE_SIZE * nr_pages;
 
-	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
-	if (!path.dentry) {
-		iput(inode);
-		return ERR_PTR(-ENOMEM);
-	}
-	path.mnt = mntget(aio_mnt);
-
-	d_instantiate(path.dentry, inode);
-	file = alloc_file(&path, O_RDWR, &aio_ring_fops);
+	file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
+				O_RDWR, &aio_ring_fops);
 	if (IS_ERR(file))
-		path_put(&path);
+		iput(inode);
 	return file;
 }
 
 static struct dentry *aio_mount(struct file_system_type *fs_type,
 				int flags, const char *dev_name, void *data)
 {
-	static const struct dentry_operations ops = {
-		.d_dname	= simple_dname,
-	};
-	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
 					   AIO_RING_MAGIC);
 
 	if (!IS_ERR(root))
diff --git a/fs/file_table.c b/fs/file_table.c
index 9b70ed2bbc4e..6b3723909342 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -184,6 +184,33 @@ struct file *alloc_file(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(alloc_file);
 
+struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
+				const char *name, int flags,
+				const struct file_operations *fops)
+{
+	static const struct dentry_operations anon_ops = {
+		.d_dname = simple_dname
+	};
+	struct qstr this = QSTR_INIT(name, strlen(name));
+	struct path path;
+	struct file *file;
+
+	path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
+	if (!path.dentry)
+		return ERR_PTR(-ENOMEM);
+	if (!mnt->mnt_sb->s_d_op)
+		d_set_d_op(path.dentry, &anon_ops);
+	path.mnt = mntget(mnt);
+	d_instantiate(path.dentry, inode);
+	file = alloc_file(&path, flags, fops);
+	if (IS_ERR(file)) {
+		ihold(inode);
+		path_put(&path);
+	}
+	return file;
+}
+EXPORT_SYMBOL(alloc_file_pseudo);
+
 /* the real guts of fput() - releasing the last reference to file
  */
 static void __fput(struct file *file)
diff --git a/include/linux/file.h b/include/linux/file.h
index aed45d69811e..5b25388f2f79 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -17,9 +17,12 @@ extern void fput(struct file *);
 struct file_operations;
 struct vfsmount;
 struct dentry;
+struct inode;
 struct path;
 extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
+extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
+	const char *, int flags, const struct file_operations *);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
diff --git a/net/socket.c b/net/socket.c
index 2cdbe8f71b7f..4cf3568caf9f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -391,33 +391,15 @@ static struct file_system_type sock_fs_type = {
 
 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 {
-	struct qstr name = { .name = "" };
-	struct path path;
 	struct file *file;
 
-	if (dname) {
-		name.name = dname;
-		name.len = strlen(name.name);
-	} else if (sock->sk) {
-		name.name = sock->sk->sk_prot_creator->name;
-		name.len = strlen(name.name);
-	}
-	path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
-	if (unlikely(!path.dentry)) {
-		sock_release(sock);
-		return ERR_PTR(-ENOMEM);
-	}
-	path.mnt = mntget(sock_mnt);
-
-	d_instantiate(path.dentry, SOCK_INODE(sock));
+	if (!dname)
+		dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
 
-	file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK),
-		  &socket_file_ops);
+	file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
+				O_RDWR | (flags & O_NONBLOCK),
+				&socket_file_ops);
 	if (IS_ERR(file)) {
-		/* drop dentry, keep inode for a bit */
-		ihold(d_inode(path.dentry));
-		path_put(&path);
-		/* ... and now kill it properly */
 		sock_release(sock);
 		return file;
 	}
-- 
cgit v1.2.3


From e68375c850b0d5699a27bb598317a3274913824b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2018 09:50:46 -0400
Subject: hugetlb_file_setup(): switch to alloc_file_pseudo()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hugetlbfs/inode.c | 53 ++++++++++++++++------------------------------------
 1 file changed, 16 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 71aed47422e2..87605c73361b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1308,10 +1308,6 @@ static int get_hstate_idx(int page_size_log)
 	return h - hstates;
 }
 
-static const struct dentry_operations anon_ops = {
-	.d_dname = simple_dname
-};
-
 /*
  * Note that size should be aligned to proper hugepage size in caller side,
  * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
@@ -1320,19 +1316,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 				vm_flags_t acctflag, struct user_struct **user,
 				int creat_flags, int page_size_log)
 {
-	struct file *file = ERR_PTR(-ENOMEM);
 	struct inode *inode;
-	struct path path;
-	struct super_block *sb;
-	struct qstr quick_string;
+	struct vfsmount *mnt;
 	int hstate_idx;
+	struct file *file;
 
 	hstate_idx = get_hstate_idx(page_size_log);
 	if (hstate_idx < 0)
 		return ERR_PTR(-ENODEV);
 
 	*user = NULL;
-	if (!hugetlbfs_vfsmount[hstate_idx])
+	mnt = hugetlbfs_vfsmount[hstate_idx];
+	if (!mnt)
 		return ERR_PTR(-ENOENT);
 
 	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
@@ -1348,44 +1343,28 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 		}
 	}
 
-	sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
-	quick_string.name = name;
-	quick_string.len = strlen(quick_string.name);
-	quick_string.hash = 0;
-	path.dentry = d_alloc_pseudo(sb, &quick_string);
-	if (!path.dentry)
-		goto out_shm_unlock;
-
-	d_set_d_op(path.dentry, &anon_ops);
-	path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
 	file = ERR_PTR(-ENOSPC);
-	inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
+	inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
 	if (!inode)
-		goto out_dentry;
+		goto out;
 	if (creat_flags == HUGETLB_SHMFS_INODE)
 		inode->i_flags |= S_PRIVATE;
 
-	file = ERR_PTR(-ENOMEM);
-	if (hugetlb_reserve_pages(inode, 0,
-			size >> huge_page_shift(hstate_inode(inode)), NULL,
-			acctflag))
-		goto out_inode;
-
-	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	clear_nlink(inode);
 
-	file = alloc_file(&path, O_RDWR, &hugetlbfs_file_operations);
-	if (IS_ERR(file))
-		goto out_dentry; /* inode is already attached */
-
-	return file;
+	if (hugetlb_reserve_pages(inode, 0,
+			size >> huge_page_shift(hstate_inode(inode)), NULL,
+			acctflag))
+		file = ERR_PTR(-ENOMEM);
+	else
+		file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
+					&hugetlbfs_file_operations);
+	if (!IS_ERR(file))
+		return file;
 
-out_inode:
 	iput(inode);
-out_dentry:
-	path_put(&path);
-out_shm_unlock:
+out:
 	if (*user) {
 		user_shm_unlock(size, *user);
 		*user = NULL;
-- 
cgit v1.2.3


From 52c91f8b3b1f5f69e47f7f65f76066d0c940b191 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2018 09:58:23 -0400
Subject: anon_inode_getfile(): switch to alloc_file_pseudo()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 6b235ab1df6c..91262c34b797 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -71,8 +71,6 @@ struct file *anon_inode_getfile(const char *name,
 				const struct file_operations *fops,
 				void *priv, int flags)
 {
-	struct qstr this;
-	struct path path;
 	struct file *file;
 
 	if (IS_ERR(anon_inode_inode))
@@ -81,39 +79,24 @@ struct file *anon_inode_getfile(const char *name,
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
-	/*
-	 * Link the inode to a directory entry by creating a unique name
-	 * using the inode sequence number.
-	 */
-	file = ERR_PTR(-ENOMEM);
-	this.name = name;
-	this.len = strlen(name);
-	this.hash = 0;
-	path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
-	if (!path.dentry)
-		goto err_module;
-
-	path.mnt = mntget(anon_inode_mnt);
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
 	 * so ihold() is safe.
 	 */
 	ihold(anon_inode_inode);
-
-	d_instantiate(path.dentry, anon_inode_inode);
-
-	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
+	file = alloc_file_pseudo(anon_inode_inode, anon_inode_mnt, name,
+				 flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file))
-		goto err_dput;
+		goto err;
+
 	file->f_mapping = anon_inode_inode->i_mapping;
 
 	file->private_data = priv;
 
 	return file;
 
-err_dput:
-	path_put(&path);
-err_module:
+err:
+	iput(anon_inode_inode);
 	module_put(fops->owner);
 	return file;
 }
-- 
cgit v1.2.3


From 152b6372c90630ef6787334e84cdddbcf8beb241 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2018 10:05:18 -0400
Subject: create_pipe_files(): switch the first allocation to
 alloc_file_pseudo()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 1909422e5a78..9701bd3458d1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -744,32 +744,24 @@ fail_inode:
 
 int create_pipe_files(struct file **res, int flags)
 {
-	int err;
 	struct inode *inode = get_pipe_inode();
 	struct file *f;
-	struct path path;
 
 	if (!inode)
 		return -ENFILE;
 
-	err = -ENOMEM;
-	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name);
-	if (!path.dentry)
-		goto err_inode;
-	path.mnt = mntget(pipe_mnt);
-
-	d_instantiate(path.dentry, inode);
-
-	f = alloc_file(&path, O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
-			&pipefifo_fops);
+	f = alloc_file_pseudo(inode, pipe_mnt, "",
+				O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
+				&pipefifo_fops);
 	if (IS_ERR(f)) {
-		err = PTR_ERR(f);
-		goto err_dentry;
+		free_pipe_info(inode->i_pipe);
+		iput(inode);
+		return PTR_ERR(f);
 	}
 
 	f->private_data = inode->i_pipe;
 
-	res[0] = alloc_file(&path, O_RDONLY | (flags & O_NONBLOCK),
+	res[0] = alloc_file(&f->f_path, O_RDONLY | (flags & O_NONBLOCK),
 			&pipefifo_fops);
 	if (IS_ERR(res[0])) {
 		put_pipe_info(inode, inode->i_pipe);
@@ -777,20 +769,10 @@ int create_pipe_files(struct file **res, int flags)
 		return PTR_ERR(res[0]);
 	}
 
-	path_get(&path);
+	path_get(&f->f_path);
 	res[0]->private_data = inode->i_pipe;
 	res[1] = f;
 	return 0;
-
-err_dentry:
-	free_pipe_info(inode->i_pipe);
-	path_put(&path);
-	return err;
-
-err_inode:
-	free_pipe_info(inode->i_pipe);
-	iput(inode);
-	return err;
 }
 
 static int __do_pipe_flags(int *fd, struct file **files, int flags)
-- 
cgit v1.2.3


From 183266f26f45a47958afb5c9aa1b3d4651e2eb8c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 17 Jun 2018 14:15:10 -0400
Subject: new helper: alloc_file_clone()

alloc_file_clone(old_file, mode, ops): create a new struct file with
->f_path equal to that of old_file.  pipe converted.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 11 +++++++++++
 fs/pipe.c            |  6 ++----
 include/linux/file.h |  2 ++
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 6b3723909342..78b067ddb386 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -211,6 +211,17 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(alloc_file_pseudo);
 
+struct file *alloc_file_clone(struct file *base, int flags,
+				const struct file_operations *fops)
+{
+	struct file *f = alloc_file(&base->f_path, flags, fops);
+	if (!IS_ERR(f)) {
+		path_get(&f->f_path);
+		f->f_mapping = base->f_mapping;
+	}
+	return f;
+}
+
 /* the real guts of fput() - releasing the last reference to file
  */
 static void __fput(struct file *file)
diff --git a/fs/pipe.c b/fs/pipe.c
index 9701bd3458d1..5f50070774bc 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -761,15 +761,13 @@ int create_pipe_files(struct file **res, int flags)
 
 	f->private_data = inode->i_pipe;
 
-	res[0] = alloc_file(&f->f_path, O_RDONLY | (flags & O_NONBLOCK),
-			&pipefifo_fops);
+	res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
+				  &pipefifo_fops);
 	if (IS_ERR(res[0])) {
 		put_pipe_info(inode, inode->i_pipe);
 		fput(f);
 		return PTR_ERR(res[0]);
 	}
-
-	path_get(&f->f_path);
 	res[0]->private_data = inode->i_pipe;
 	res[1] = f;
 	return 0;
diff --git a/include/linux/file.h b/include/linux/file.h
index 5b25388f2f79..60914843c737 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -23,6 +23,8 @@ extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
 extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
 	const char *, int flags, const struct file_operations *);
+extern struct file *alloc_file_clone(struct file *, int flags,
+	const struct file_operations *);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
-- 
cgit v1.2.3


From ee1904ba44bd4a242b453e8fe179b374906da173 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 17 Jun 2018 14:21:27 -0400
Subject: make alloc_file() static

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 3 +--
 include/linux/file.h | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 78b067ddb386..d6eccd04d703 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -157,7 +157,7 @@ over:
  * @flags: O_... flags with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
  */
-struct file *alloc_file(const struct path *path, int flags,
+static struct file *alloc_file(const struct path *path, int flags,
 		const struct file_operations *fop)
 {
 	struct file *file;
@@ -182,7 +182,6 @@ struct file *alloc_file(const struct path *path, int flags,
 		i_readcount_inc(path->dentry->d_inode);
 	return file;
 }
-EXPORT_SYMBOL(alloc_file);
 
 struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
 				const char *name, int flags,
diff --git a/include/linux/file.h b/include/linux/file.h
index 60914843c737..6b2fb032416c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -19,8 +19,6 @@ struct vfsmount;
 struct dentry;
 struct inode;
 struct path;
-extern struct file *alloc_file(const struct path *, int flags,
-	const struct file_operations *fop);
 extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
 	const char *, int flags, const struct file_operations *);
 extern struct file *alloc_file_clone(struct file *, int flags,
-- 
cgit v1.2.3


From edc2b1da779887c74ade799574efc5819906598f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 16:27:23 -0400
Subject: make path_init() unconditionally paired with terminate_walk()

including the failure exits

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 8311dce1c649..1b5c58ad4113 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2125,12 +2125,15 @@ OK:
 	}
 }
 
+/* must be paired with terminate_walk() */
 static const char *path_init(struct nameidata *nd, unsigned flags)
 {
 	const char *s = nd->name->name;
 
 	if (!*s)
 		flags &= ~LOOKUP_RCU;
+	if (flags & LOOKUP_RCU)
+		rcu_read_lock();
 
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -2143,7 +2146,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 		nd->path = nd->root;
 		nd->inode = inode;
 		if (flags & LOOKUP_RCU) {
-			rcu_read_lock();
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
 			nd->root_seq = nd->seq;
 			nd->m_seq = read_seqbegin(&mount_lock);
@@ -2159,21 +2161,15 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 
 	nd->m_seq = read_seqbegin(&mount_lock);
 	if (*s == '/') {
-		if (flags & LOOKUP_RCU)
-			rcu_read_lock();
 		set_root(nd);
 		if (likely(!nd_jump_root(nd)))
 			return s;
-		nd->root.mnt = NULL;
-		rcu_read_unlock();
 		return ERR_PTR(-ECHILD);
 	} else if (nd->dfd == AT_FDCWD) {
 		if (flags & LOOKUP_RCU) {
 			struct fs_struct *fs = current->fs;
 			unsigned seq;
 
-			rcu_read_lock();
-
 			do {
 				seq = read_seqcount_begin(&fs->seq);
 				nd->path = fs->pwd;
@@ -2195,16 +2191,13 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 
 		dentry = f.file->f_path.dentry;
 
-		if (*s) {
-			if (!d_can_lookup(dentry)) {
-				fdput(f);
-				return ERR_PTR(-ENOTDIR);
-			}
+		if (*s && unlikely(!d_can_lookup(dentry))) {
+			fdput(f);
+			return ERR_PTR(-ENOTDIR);
 		}
 
 		nd->path = f.file->f_path;
 		if (flags & LOOKUP_RCU) {
-			rcu_read_lock();
 			nd->inode = nd->path.dentry->d_inode;
 			nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
 		} else {
@@ -2272,8 +2265,10 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 	const char *s = path_init(nd, flags);
 	int err;
 
-	if (IS_ERR(s))
+	if (IS_ERR(s)) {
+		terminate_walk(nd);
 		return PTR_ERR(s);
+	}
 
 	if (unlikely(flags & LOOKUP_DOWN)) {
 		err = handle_lookup_down(nd);
@@ -2337,8 +2332,10 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
 {
 	const char *s = path_init(nd, flags);
 	int err;
-	if (IS_ERR(s))
+	if (IS_ERR(s)) {
+		terminate_walk(nd);
 		return PTR_ERR(s);
+	}
 	err = link_path_walk(s, nd);
 	if (!err)
 		err = complete_walk(nd);
@@ -2666,8 +2663,10 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
 {
 	const char *s = path_init(nd, flags);
 	int err;
-	if (IS_ERR(s))
+	if (IS_ERR(s)) {
+		terminate_walk(nd);
 		return PTR_ERR(s);
+	}
 	while (!(err = link_path_walk(s, nd)) &&
 		(err = mountpoint_last(nd)) > 0) {
 		s = trailing_symlink(nd);
@@ -3512,6 +3511,7 @@ static struct file *path_openat(struct nameidata *nd,
 
 	s = path_init(nd, flags);
 	if (IS_ERR(s)) {
+		terminate_walk(nd);
 		fput(file);
 		return ERR_CAST(s);
 	}
-- 
cgit v1.2.3


From 9b5858e99ae1cfb60dc00461cfc7bd4dd077d7d7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 16:33:23 -0400
Subject: allow link_path_walk() to take ERR_PTR()

There is a check for IS_ERR(name) immediately upstream of each call
of link_path_walk(name, nd), with positives treated as if link_path_walk()
failed with PTR_ERR(name).  Taking that check into link_path_walk() itself
simplifies things nicely.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 38 +++++---------------------------------
 1 file changed, 5 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1b5c58ad4113..22535f133200 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2028,6 +2028,8 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 {
 	int err;
 
+	if (IS_ERR(name))
+		return PTR_ERR(name);
 	while (*name=='/')
 		name++;
 	if (!*name)
@@ -2265,12 +2267,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 	const char *s = path_init(nd, flags);
 	int err;
 
-	if (IS_ERR(s)) {
-		terminate_walk(nd);
-		return PTR_ERR(s);
-	}
-
-	if (unlikely(flags & LOOKUP_DOWN)) {
+	if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
 		err = handle_lookup_down(nd);
 		if (unlikely(err < 0)) {
 			terminate_walk(nd);
@@ -2281,10 +2278,6 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 	while (!(err = link_path_walk(s, nd))
 		&& ((err = lookup_last(nd)) > 0)) {
 		s = trailing_symlink(nd);
-		if (IS_ERR(s)) {
-			err = PTR_ERR(s);
-			break;
-		}
 	}
 	if (!err)
 		err = complete_walk(nd);
@@ -2331,12 +2324,7 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
 				struct path *parent)
 {
 	const char *s = path_init(nd, flags);
-	int err;
-	if (IS_ERR(s)) {
-		terminate_walk(nd);
-		return PTR_ERR(s);
-	}
-	err = link_path_walk(s, nd);
+	int err = link_path_walk(s, nd);
 	if (!err)
 		err = complete_walk(nd);
 	if (!err) {
@@ -2663,17 +2651,10 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
 {
 	const char *s = path_init(nd, flags);
 	int err;
-	if (IS_ERR(s)) {
-		terminate_walk(nd);
-		return PTR_ERR(s);
-	}
+
 	while (!(err = link_path_walk(s, nd)) &&
 		(err = mountpoint_last(nd)) > 0) {
 		s = trailing_symlink(nd);
-		if (IS_ERR(s)) {
-			err = PTR_ERR(s);
-			break;
-		}
 	}
 	if (!err) {
 		*path = nd->path;
@@ -3510,19 +3491,10 @@ static struct file *path_openat(struct nameidata *nd,
 	}
 
 	s = path_init(nd, flags);
-	if (IS_ERR(s)) {
-		terminate_walk(nd);
-		fput(file);
-		return ERR_CAST(s);
-	}
 	while (!(error = link_path_walk(s, nd)) &&
 		(error = do_last(nd, file, op)) > 0) {
 		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
 		s = trailing_symlink(nd);
-		if (IS_ERR(s)) {
-			error = PTR_ERR(s);
-			break;
-		}
 	}
 	terminate_walk(nd);
 out2:
-- 
cgit v1.2.3


From 5f336e722cc961be94d264d96b90c92888fffae1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 16:38:06 -0400
Subject: few more cleanups of link_path_walk() callers

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 22535f133200..278e494bcbd2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2269,10 +2269,8 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 
 	if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
 		err = handle_lookup_down(nd);
-		if (unlikely(err < 0)) {
-			terminate_walk(nd);
-			return err;
-		}
+		if (unlikely(err < 0))
+			s = ERR_PTR(err);
 	}
 
 	while (!(err = link_path_walk(s, nd))
@@ -3472,7 +3470,6 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
 static struct file *path_openat(struct nameidata *nd,
 			const struct open_flags *op, unsigned flags)
 {
-	const char *s;
 	struct file *file;
 	int error;
 
@@ -3482,22 +3479,17 @@ static struct file *path_openat(struct nameidata *nd,
 
 	if (unlikely(file->f_flags & __O_TMPFILE)) {
 		error = do_tmpfile(nd, flags, op, file);
-		goto out2;
-	}
-
-	if (unlikely(file->f_flags & O_PATH)) {
+	} else if (unlikely(file->f_flags & O_PATH)) {
 		error = do_o_path(nd, flags, file);
-		goto out2;
-	}
-
-	s = path_init(nd, flags);
-	while (!(error = link_path_walk(s, nd)) &&
-		(error = do_last(nd, file, op)) > 0) {
-		nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
-		s = trailing_symlink(nd);
+	} else {
+		const char *s = path_init(nd, flags);
+		while (!(error = link_path_walk(s, nd)) &&
+			(error = do_last(nd, file, op)) > 0) {
+			nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
+			s = trailing_symlink(nd);
+		}
+		terminate_walk(nd);
 	}
-	terminate_walk(nd);
-out2:
 	if (likely(!error)) {
 		if (likely(file->f_mode & FMODE_OPENED))
 			return file;
-- 
cgit v1.2.3


From cc57c07343bd071cdf1915a91a24ab7d40c9b590 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Sun, 15 Jul 2018 18:16:17 -0500
Subject: configfs: fix registered group removal

This patch fixes a bug where configfs_register_group had added
a group in a tree, and userspace has done a rmdir on a dir somewhere
above that group and we hit a kernel crash. The problem is configfs_rmdir
will detach everything under it and unlink groups on the default_groups
list. It will not unlink groups added with configfs_register_group so when
configfs_unregister_group is called to drop its references to the group/items
we crash when we try to access the freed dentrys.

The patch just adds a check for if a rmdir has been done above
us and if so just does the unlink part of unregistration.

Sorry if you are getting this multiple times. I thouhgt I sent
this to some of you and lkml, but I do not see it.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/configfs/dir.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 577cff24707b..39843fa7e11b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1777,6 +1777,16 @@ void configfs_unregister_group(struct config_group *group)
 	struct dentry *dentry = group->cg_item.ci_dentry;
 	struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
 
+	mutex_lock(&subsys->su_mutex);
+	if (!group->cg_item.ci_parent->ci_group) {
+		/*
+		 * The parent has already been unlinked and detached
+		 * due to a rmdir.
+		 */
+		goto unlink_group;
+	}
+	mutex_unlock(&subsys->su_mutex);
+
 	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
 	spin_lock(&configfs_dirent_lock);
 	configfs_detach_prep(dentry, NULL);
@@ -1791,6 +1801,7 @@ void configfs_unregister_group(struct config_group *group)
 	dput(dentry);
 
 	mutex_lock(&subsys->su_mutex);
+unlink_group:
 	unlink_group(group);
 	mutex_unlock(&subsys->su_mutex);
 }
-- 
cgit v1.2.3


From fa248de98a6beb9ceaec5059041d65f87ac438b4 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 17 Jul 2018 14:24:11 -0700
Subject: xfs: don't assume a left rmap when allocating a new rmap

The original rmap code assumed that there would always be at least one
rmap in the rmapbt (the AG sb/agf/agi) and so errored out if it didn't
find one.  This assumption isn't true for the rmapbt repair function
(and it won't be true for realtime rmap either), so remove the check and
just deal with the situation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
---
 fs/xfs/libxfs/xfs_rmap.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index d4460b0d2d81..8b2a2f81d110 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -753,19 +753,19 @@ xfs_rmap_map(
 			&have_lt);
 	if (error)
 		goto out_error;
-	XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
-
-	error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
-	if (error)
-		goto out_error;
-	XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
-	trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
-			cur->bc_private.a.agno, ltrec.rm_startblock,
-			ltrec.rm_blockcount, ltrec.rm_owner,
-			ltrec.rm_offset, ltrec.rm_flags);
+	if (have_lt) {
+		error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
+		if (error)
+			goto out_error;
+		XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
+		trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
+				cur->bc_private.a.agno, ltrec.rm_startblock,
+				ltrec.rm_blockcount, ltrec.rm_owner,
+				ltrec.rm_offset, ltrec.rm_flags);
 
-	if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
-		have_lt = 0;
+		if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
+			have_lt = 0;
+	}
 
 	XFS_WANT_CORRUPTED_GOTO(mp,
 		have_lt == 0 ||
-- 
cgit v1.2.3


From 1d5bebbafc73d82e5af003cdd2bf8ee5741cd1df Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 11 Jul 2018 22:26:38 -0700
Subject: xfs_attr_leaf: use swap macro in xfs_attr3_leaf_rebalance

Make use of the swap macro and remove some unnecessary variables.
This makes the code easier to read and maintain. Also, reduces the
stack usage.

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 251304f3bc5d..8ff287979a8c 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1566,17 +1566,10 @@ xfs_attr3_leaf_rebalance(
 	 */
 	swap = 0;
 	if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) {
-		struct xfs_da_state_blk	*tmp_blk;
-		struct xfs_attr3_icleaf_hdr tmp_ichdr;
+		swap(blk1, blk2);
 
-		tmp_blk = blk1;
-		blk1 = blk2;
-		blk2 = tmp_blk;
-
-		/* struct copies to swap them rather than reconverting */
-		tmp_ichdr = ichdr1;
-		ichdr1 = ichdr2;
-		ichdr2 = tmp_ichdr;
+		/* swap structures rather than reconverting them */
+		swap(ichdr1, ichdr2);
 
 		leaf1 = blk1->bp->b_addr;
 		leaf2 = blk2->bp->b_addr;
-- 
cgit v1.2.3


From 897992b7e3505659fda57887223bd6bfe163c12f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 11 Jul 2018 22:26:38 -0700
Subject: xfs_bmap_util: use swap macro

Make use of the swap macro and remove some unnecessary variables.
This makes the code easier to read and maintain. Also, reduces the
stack usage.

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d3a314fd721f..1b78c20de7bd 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1688,7 +1688,6 @@ xfs_swap_extent_forks(
 	int			*src_log_flags,
 	int			*target_log_flags)
 {
-	struct xfs_ifork	tempifp, *ifp, *tifp;
 	xfs_filblks_t		aforkblks = 0;
 	xfs_filblks_t		taforkblks = 0;
 	xfs_extnum_t		junk;
@@ -1730,11 +1729,7 @@ xfs_swap_extent_forks(
 	/*
 	 * Swap the data forks of the inodes
 	 */
-	ifp = &ip->i_df;
-	tifp = &tip->i_df;
-	tempifp = *ifp;		/* struct copy */
-	*ifp = *tifp;		/* struct copy */
-	*tifp = tempifp;	/* struct copy */
+	swap(ip->i_df, tip->i_df);
 
 	/*
 	 * Fix the on-disk inode values
@@ -1743,13 +1738,8 @@ xfs_swap_extent_forks(
 	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
 	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
 
-	tmp = (uint64_t) ip->i_d.di_nextents;
-	ip->i_d.di_nextents = tip->i_d.di_nextents;
-	tip->i_d.di_nextents = tmp;
-
-	tmp = (uint64_t) ip->i_d.di_format;
-	ip->i_d.di_format = tip->i_d.di_format;
-	tip->i_d.di_format = tmp;
+	swap(ip->i_d.di_nextents, tip->i_d.di_nextents);
+	swap(ip->i_d.di_format, tip->i_d.di_format);
 
 	/*
 	 * The extents in the source inode could still contain speculative
@@ -1844,7 +1834,6 @@ xfs_swap_extents(
 	int			src_log_flags, target_log_flags;
 	int			error = 0;
 	int			lock_flags;
-	struct xfs_ifork	*cowfp;
 	uint64_t		f;
 	int			resblks = 0;
 
@@ -1986,18 +1975,11 @@ xfs_swap_extents(
 
 	/* Swap the cow forks. */
 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-		xfs_extnum_t	extnum;
-
 		ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
 		ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
 
-		extnum = ip->i_cnextents;
-		ip->i_cnextents = tip->i_cnextents;
-		tip->i_cnextents = extnum;
-
-		cowfp = ip->i_cowfp;
-		ip->i_cowfp = tip->i_cowfp;
-		tip->i_cowfp = cowfp;
+		swap(ip->i_cnextents, tip->i_cnextents);
+		swap(ip->i_cowfp, tip->i_cowfp);
 
 		if (ip->i_cowfp && ip->i_cowfp->if_bytes)
 			xfs_inode_set_cowblocks_tag(ip);
-- 
cgit v1.2.3


From e4e542a683c16945533d700339a0aec261d39f34 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 17 Jul 2018 14:25:01 -0700
Subject: xfs: use swap macro in xfs_dir2_leafn_rebalance

Make use of the swap macro and remove unnecessary variable *tmp*. This
makes the code easier to read and maintain. Also, slightly refactor some
code.

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_dir2_node.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 2daf874969ab..f1bb3434f51c 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -1012,7 +1012,7 @@ xfs_dir2_leafn_rebalance(
 	int			oldstale;	/* old count of stale leaves */
 #endif
 	int			oldsum;		/* old total leaf count */
-	int			swap;		/* swapped leaf blocks */
+	int			swap_blocks;	/* swapped leaf blocks */
 	struct xfs_dir2_leaf_entry *ents1;
 	struct xfs_dir2_leaf_entry *ents2;
 	struct xfs_dir3_icleaf_hdr hdr1;
@@ -1023,13 +1023,10 @@ xfs_dir2_leafn_rebalance(
 	/*
 	 * If the block order is wrong, swap the arguments.
 	 */
-	if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) {
-		xfs_da_state_blk_t	*tmp;	/* temp for block swap */
+	swap_blocks = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp);
+	if (swap_blocks)
+		swap(blk1, blk2);
 
-		tmp = blk1;
-		blk1 = blk2;
-		blk2 = tmp;
-	}
 	leaf1 = blk1->bp->b_addr;
 	leaf2 = blk2->bp->b_addr;
 	dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
@@ -1093,11 +1090,11 @@ xfs_dir2_leafn_rebalance(
 	 * Mark whether we're inserting into the old or new leaf.
 	 */
 	if (hdr1.count < hdr2.count)
-		state->inleaf = swap;
+		state->inleaf = swap_blocks;
 	else if (hdr1.count > hdr2.count)
-		state->inleaf = !swap;
+		state->inleaf = !swap_blocks;
 	else
-		state->inleaf = swap ^ (blk1->index <= hdr1.count);
+		state->inleaf = swap_blocks ^ (blk1->index <= hdr1.count);
 	/*
 	 * Adjust the expected index for insertion.
 	 */
-- 
cgit v1.2.3


From 5089eafffba2ed444789e5d25c7c0dfd62595713 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Tue, 17 Jul 2018 14:25:20 -0700
Subject: libxfs: Fix a couple of sparse complaintis

No significant changes, just silence a couple of sparse errors.

Using cpu_to_be32(NULLAGINO), the NULLAGINO constant will be encoded in
BE as a constant, avoiding a BE -> CPU conversion every iteraction of
the loop, if be32_to_cpu(agi->agi_unlinked[i]) was used instead.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_ag_resv.h | 2 +-
 fs/xfs/libxfs/xfs_ialloc.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index 4619b554ee90..dc953fc84b2f 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -28,7 +28,7 @@ xfs_ag_resv_rmapbt_alloc(
 	struct xfs_mount	*mp,
 	xfs_agnumber_t		agno)
 {
-	struct xfs_alloc_arg	args = {0};
+	struct xfs_alloc_arg	args = { NULL };
 	struct xfs_perag	*pag;
 
 	args.len = 1;
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index c38d14106b53..811d36afd024 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2537,7 +2537,7 @@ xfs_agi_verify(
 		return __this_address;
 
 	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
-		if (agi->agi_unlinked[i] == NULLAGINO)
+		if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
 			continue;
 		if (!xfs_verify_ino(mp, be32_to_cpu(agi->agi_unlinked[i])))
 			return __this_address;
-- 
cgit v1.2.3


From c4592b9c37889c2850b0edadcff063d5097f1cb9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 16:47:16 +0200
Subject: jffs2: use 64-bit intermediate timestamps

The VFS now uses timespec64 timestamps consistently, but jffs2 still
converts them to 32-bit numbers on the storage medium. As the helper
functions for the conversion (get_seconds() and timespec_to_timespec64())
are now deprecated, let's change them over to the more modern
replacements.

This keeps the traditional interpretation of those values, where
the on-disk 32-bit numbers are taken to be negative numbers, i.e.
dates before 1970, on 32-bit machines, but future numbers past 2038
on 64-bit machines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 fs/jffs2/dir.c      | 32 ++++++++++++++++----------------
 fs/jffs2/file.c     |  6 +++---
 fs/jffs2/fs.c       | 12 ++++++------
 fs/jffs2/os-linux.h |  3 ++-
 4 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index b2944f9218f7..f20cff1194bb 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -201,7 +201,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
 	if (ret)
 		goto fail;
 
-	dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime)));
+	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
 
 	jffs2_free_raw_inode(ri);
 
@@ -227,14 +227,14 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
 	struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
 	struct jffs2_inode_info *dead_f = JFFS2_INODE_INFO(d_inode(dentry));
 	int ret;
-	uint32_t now = get_seconds();
+	uint32_t now = JFFS2_NOW();
 
 	ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
 			      dentry->d_name.len, dead_f, now);
 	if (dead_f->inocache)
 		set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink);
 	if (!ret)
-		dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now));
+		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
 	return ret;
 }
 /***********************************************************************/
@@ -260,7 +260,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 	type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12;
 	if (!type) type = DT_REG;
 
-	now = get_seconds();
+	now = JFFS2_NOW();
 	ret = jffs2_do_link(c, dir_f, f->inocache->ino, type, dentry->d_name.name, dentry->d_name.len, now);
 
 	if (!ret) {
@@ -268,7 +268,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 		set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink);
 		mutex_unlock(&f->sem);
 		d_instantiate(dentry, d_inode(old_dentry));
-		dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now));
+		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
 		ihold(d_inode(old_dentry));
 	}
 	return ret;
@@ -400,7 +400,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	rd->pino = cpu_to_je32(dir_i->i_ino);
 	rd->version = cpu_to_je32(++dir_f->highest_version);
 	rd->ino = cpu_to_je32(inode->i_ino);
-	rd->mctime = cpu_to_je32(get_seconds());
+	rd->mctime = cpu_to_je32(JFFS2_NOW());
 	rd->nsize = namelen;
 	rd->type = DT_LNK;
 	rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
@@ -418,7 +418,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 		goto fail;
 	}
 
-	dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime)));
+	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
 
 	jffs2_free_raw_dirent(rd);
 
@@ -543,7 +543,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
 	rd->pino = cpu_to_je32(dir_i->i_ino);
 	rd->version = cpu_to_je32(++dir_f->highest_version);
 	rd->ino = cpu_to_je32(inode->i_ino);
-	rd->mctime = cpu_to_je32(get_seconds());
+	rd->mctime = cpu_to_je32(JFFS2_NOW());
 	rd->nsize = namelen;
 	rd->type = DT_DIR;
 	rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
@@ -561,7 +561,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
 		goto fail;
 	}
 
-	dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime)));
+	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
 	inc_nlink(dir_i);
 
 	jffs2_free_raw_dirent(rd);
@@ -588,7 +588,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
 	struct jffs2_full_dirent *fd;
 	int ret;
-	uint32_t now = get_seconds();
+	uint32_t now = JFFS2_NOW();
 
 	for (fd = f->dents ; fd; fd = fd->next) {
 		if (fd->ino)
@@ -598,7 +598,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
 	ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
 			      dentry->d_name.len, f, now);
 	if (!ret) {
-		dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now));
+		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
 		clear_nlink(d_inode(dentry));
 		drop_nlink(dir_i);
 	}
@@ -712,7 +712,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
 	rd->pino = cpu_to_je32(dir_i->i_ino);
 	rd->version = cpu_to_je32(++dir_f->highest_version);
 	rd->ino = cpu_to_je32(inode->i_ino);
-	rd->mctime = cpu_to_je32(get_seconds());
+	rd->mctime = cpu_to_je32(JFFS2_NOW());
 	rd->nsize = namelen;
 
 	/* XXX: This is ugly. */
@@ -733,7 +733,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
 		goto fail;
 	}
 
-	dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime)));
+	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
 
 	jffs2_free_raw_dirent(rd);
 
@@ -797,7 +797,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
 	type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12;
 	if (!type) type = DT_REG;
 
-	now = get_seconds();
+	now = JFFS2_NOW();
 	ret = jffs2_do_link(c, JFFS2_INODE_INFO(new_dir_i),
 			    d_inode(old_dentry)->i_ino, type,
 			    new_dentry->d_name.name, new_dentry->d_name.len, now);
@@ -853,14 +853,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
 		 * caller won't do it on its own since we are returning an error.
 		 */
 		d_invalidate(new_dentry);
-		new_dir_i->i_mtime = new_dir_i->i_ctime = timespec_to_timespec64(ITIME(now));
+		new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
 		return ret;
 	}
 
 	if (d_is_dir(old_dentry))
 		drop_nlink(old_dir_i);
 
-	new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = timespec_to_timespec64(ITIME(now));
+	new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now);
 
 	return 0;
 }
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 481afd4c2e1a..7d8654a1472e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -175,7 +175,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 		ri.uid = cpu_to_je16(i_uid_read(inode));
 		ri.gid = cpu_to_je16(i_gid_read(inode));
 		ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
-		ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds());
+		ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW());
 		ri.offset = cpu_to_je32(inode->i_size);
 		ri.dsize = cpu_to_je32(pageofs - inode->i_size);
 		ri.csize = cpu_to_je32(0);
@@ -283,7 +283,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 	ri->uid = cpu_to_je16(i_uid_read(inode));
 	ri->gid = cpu_to_je16(i_gid_read(inode));
 	ri->isize = cpu_to_je32((uint32_t)inode->i_size);
-	ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds());
+	ri->atime = ri->ctime = ri->mtime = cpu_to_je32(JFFS2_NOW());
 
 	/* In 2.4, it was already kmapped by generic_file_write(). Doesn't
 	   hurt to do it again. The alternative is ifdefs, which are ugly. */
@@ -308,7 +308,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 			inode->i_size = pos + writtenlen;
 			inode->i_blocks = (inode->i_size + 511) >> 9;
 
-			inode->i_ctime = inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime)));
+			inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime));
 		}
 	}
 
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 0ecfb8ea38cd..eab04eca95a3 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -146,9 +146,9 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 		return PTR_ERR(new_metadata);
 	}
 	/* It worked. Update the inode */
-	inode->i_atime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->atime)));
-	inode->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime)));
-	inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->mtime)));
+	inode->i_atime = ITIME(je32_to_cpu(ri->atime));
+	inode->i_ctime = ITIME(je32_to_cpu(ri->ctime));
+	inode->i_mtime = ITIME(je32_to_cpu(ri->mtime));
 	inode->i_mode = jemode_to_cpu(ri->mode);
 	i_uid_write(inode, je16_to_cpu(ri->uid));
 	i_gid_write(inode, je16_to_cpu(ri->gid));
@@ -280,9 +280,9 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 	i_uid_write(inode, je16_to_cpu(latest_node.uid));
 	i_gid_write(inode, je16_to_cpu(latest_node.gid));
 	inode->i_size = je32_to_cpu(latest_node.isize);
-	inode->i_atime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.atime)));
-	inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.mtime)));
-	inode->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.ctime)));
+	inode->i_atime = ITIME(je32_to_cpu(latest_node.atime));
+	inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
+	inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
 
 	set_nlink(inode, f->inocache->pino_nlink);
 
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index c2fbec19c616..acbe1f722f2d 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -31,7 +31,8 @@ struct kvec;
 #define JFFS2_F_I_GID(f) (i_gid_read(OFNI_EDONI_2SFFJ(f)))
 #define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev)
 
-#define ITIME(sec) ((struct timespec){sec, 0})
+#define ITIME(sec) ((struct timespec64){(int32_t)sec, 0})
+#define JFFS2_NOW() (ktime_get_real_seconds())
 #define I_SEC(tv) ((tv).tv_sec)
 #define JFFS2_F_I_CTIME(f) (OFNI_EDONI_2SFFJ(f)->i_ctime.tv_sec)
 #define JFFS2_F_I_MTIME(f) (OFNI_EDONI_2SFFJ(f)->i_mtime.tv_sec)
-- 
cgit v1.2.3


From 5f7a01e222635cba7e4889ad4ebd891835e8b2eb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 16:47:17 +0200
Subject: jffs2: use unsigned 32-bit timstamps consistently

Most users of jffs2 are 32-bit systems that traditionally only support
timestamps using a 32-bit signed time_t, in the range from years 1902 to
2038. On 64-bit systems, jffs2 however interpreted the same timestamps
as unsigned values, reading back negative times (before 1970) as times
between 2038 and 2106.

Now that Linux supports 64-bit inode timestamps even on 32-bit systems,
let's use the second interpretation everywhere to allow jffs2 to be
used on 32-bit systems beyond 2038 without a fundamental change to the
inode format.

This has a slight risk of regressions, when existing files with timestamps
before 1970 are present in file system images and are now interpreted
as future time stamps. I considered moving the wraparound point a bit,
e.g. to 1960, in order to deal with timestamps that ended up on Dec 31,
1969 due to incorrect timezone handling. However, this would complicate
the implementation unnecessarily, so I went with the simplest possible
method of extending the timestamps.

Writing files with timestamps before 1970 or after 2106 now results
in those times being clamped in the file system.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 fs/jffs2/os-linux.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index acbe1f722f2d..a2dbbb3f4c74 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -31,13 +31,13 @@ struct kvec;
 #define JFFS2_F_I_GID(f) (i_gid_read(OFNI_EDONI_2SFFJ(f)))
 #define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev)
 
-#define ITIME(sec) ((struct timespec64){(int32_t)sec, 0})
-#define JFFS2_NOW() (ktime_get_real_seconds())
-#define I_SEC(tv) ((tv).tv_sec)
-#define JFFS2_F_I_CTIME(f) (OFNI_EDONI_2SFFJ(f)->i_ctime.tv_sec)
-#define JFFS2_F_I_MTIME(f) (OFNI_EDONI_2SFFJ(f)->i_mtime.tv_sec)
-#define JFFS2_F_I_ATIME(f) (OFNI_EDONI_2SFFJ(f)->i_atime.tv_sec)
-
+#define JFFS2_CLAMP_TIME(t) ((uint32_t)clamp_t(time64_t, (t), 0, U32_MAX))
+#define ITIME(sec) ((struct timespec64){sec, 0})
+#define JFFS2_NOW() JFFS2_CLAMP_TIME(ktime_get_real_seconds())
+#define I_SEC(tv) JFFS2_CLAMP_TIME((tv).tv_sec)
+#define JFFS2_F_I_CTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_ctime)
+#define JFFS2_F_I_MTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_mtime)
+#define JFFS2_F_I_ATIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_atime)
 #define sleep_on_spinunlock(wq, s)				\
 	do {							\
 		DECLARE_WAITQUEUE(__wait, current);		\
-- 
cgit v1.2.3


From 3f289dcb4b265416a57ca79cf4a324060bb09060 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Jul 2018 04:47:36 -0700
Subject: block: make bdev_ops->rw_page() take a REQ_OP instead of bool

c11f0c0b5bb9 ("block/mm: make bdev_ops->rw_page() take a bool for
read/write") replaced @op with boolean @is_write, which limited the
amount of information going into ->rw_page() and more importantly
page_endio(), which removed the need to expose block internals to mm.

Unfortunately, we want to track discards separately and @is_write
isn't enough information.  This patch updates bdev_ops->rw_page() to
take REQ_OP instead but leaves page_endio() to take bool @is_write.
This allows the block part of operations to have enough information
while not leaking it to mm.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c           | 14 +++++++-------
 drivers/block/zram/zram_drv.c | 16 ++++++++--------
 drivers/nvdimm/btt.c          | 12 ++++++------
 drivers/nvdimm/pmem.c         | 13 ++++++-------
 fs/block_dev.c                |  6 ++++--
 fs/mpage.c                    |  4 ++--
 include/linux/blkdev.h        |  2 +-
 7 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bb976598ee43..df8103dd40ac 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -254,20 +254,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
  * Process a single bvec of a bio.
  */
 static int brd_do_bvec(struct brd_device *brd, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	void *mem;
 	int err = 0;
 
-	if (is_write) {
+	if (op_is_write(op)) {
 		err = copy_to_brd_setup(brd, sector, len);
 		if (err)
 			goto out;
 	}
 
 	mem = kmap_atomic(page);
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		copy_from_brd(mem + off, brd, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -296,7 +296,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 		int err;
 
 		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
-					op_is_write(bio_op(bio)), sector);
+				  bio_op(bio), sector);
 		if (err)
 			goto io_error;
 		sector += len >> SECTOR_SHIFT;
@@ -310,15 +310,15 @@ io_error:
 }
 
 static int brd_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	int err;
 
 	if (PageTransHuge(page))
 		return -ENOTSUPP;
-	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
-	page_endio(page, is_write, err);
+	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
+	page_endio(page, op_is_write(op), err);
 	return err;
 }
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7436b2d27fa3..78c29044684a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1274,17 +1274,17 @@ static void zram_bio_discard(struct zram *zram, u32 index,
  * Returns 1 if IO request was successfully submitted.
  */
 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
-			int offset, bool is_write, struct bio *bio)
+			int offset, unsigned int op, struct bio *bio)
 {
 	unsigned long start_time = jiffies;
-	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
+	int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ;
 	struct request_queue *q = zram->disk->queue;
 	int ret;
 
 	generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
 			&zram->disk->part0);
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		atomic64_inc(&zram->stats.num_reads);
 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
 		flush_dcache_page(bvec->bv_page);
@@ -1300,7 +1300,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 	zram_slot_unlock(zram, index);
 
 	if (unlikely(ret < 0)) {
-		if (!is_write)
+		if (!op_is_write(op))
 			atomic64_inc(&zram->stats.failed_reads);
 		else
 			atomic64_inc(&zram->stats.failed_writes);
@@ -1338,7 +1338,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 			bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
 							unwritten);
 			if (zram_bvec_rw(zram, &bv, index, offset,
-					op_is_write(bio_op(bio)), bio) < 0)
+					 bio_op(bio), bio) < 0)
 				goto out;
 
 			bv.bv_offset += bv.bv_len;
@@ -1390,7 +1390,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
 }
 
 static int zram_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	int offset, ret;
 	u32 index;
@@ -1414,7 +1414,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 	bv.bv_len = PAGE_SIZE;
 	bv.bv_offset = 0;
 
-	ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
+	ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
 out:
 	/*
 	 * If I/O fails, just return error(ie, non-zero) without
@@ -1429,7 +1429,7 @@ out:
 
 	switch (ret) {
 	case 0:
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 		break;
 	case 1:
 		ret = 0;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 85de8053aa34..0360c015f658 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1423,11 +1423,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 
 static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
 			struct page *page, unsigned int len, unsigned int off,
-			bool is_write, sector_t sector)
+			unsigned int op, sector_t sector)
 {
 	int ret;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		ret = btt_read_pg(btt, bip, page, off, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -1464,7 +1464,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 		}
 
 		err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
-				  op_is_write(bio_op(bio)), iter.bi_sector);
+				  bio_op(bio), iter.bi_sector);
 		if (err) {
 			dev_err(&btt->nd_btt->dev,
 					"io error in %s sector %lld, len %d,\n",
@@ -1483,16 +1483,16 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int btt_rw_page(struct block_device *bdev, sector_t sector,
-		struct page *page, bool is_write)
+		struct page *page, unsigned int op)
 {
 	struct btt *btt = bdev->bd_disk->private_data;
 	int rc;
 	unsigned int len;
 
 	len = hpage_nr_pages(page) * PAGE_SIZE;
-	rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector);
+	rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector);
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8b1fd7f1a224..dd17acd8fe68 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -120,7 +120,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 }
 
 static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	blk_status_t rc = BLK_STS_OK;
@@ -131,7 +131,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
 		bad_pmem = true;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		if (unlikely(bad_pmem))
 			rc = BLK_STS_IOERR;
 		else {
@@ -180,8 +180,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
 		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
-				bvec.bv_offset, op_is_write(bio_op(bio)),
-				iter.bi_sector);
+				bvec.bv_offset, bio_op(bio), iter.bi_sector);
 		if (rc) {
 			bio->bi_status = rc;
 			break;
@@ -198,13 +197,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
 	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
-			  0, is_write, sector);
+			  0, op, sector);
 
 	/*
 	 * The ->rw_page interface is subtle and tricky.  The core
@@ -213,7 +212,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	 * caused by double completion.
 	 */
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return blk_status_to_errno(rc);
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0dd87aaeb39a..496fb51a1e1a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -665,7 +665,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 	result = blk_queue_enter(bdev->bd_queue, 0);
 	if (result)
 		return result;
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_READ);
 	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
@@ -703,7 +704,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 		return result;
 
 	set_page_writeback(page);
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_WRITE);
 	if (result) {
 		end_page_writeback(page);
 	} else {
diff --git a/fs/mpage.c b/fs/mpage.c
index b7e7f570733a..b73638db9866 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -51,8 +51,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)),
-				blk_status_to_errno(bio->bi_status));
+		page_endio(page, bio_op(bio),
+			   blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1939ed95f936..331a6cb8805f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1943,7 +1943,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
-	int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
+	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	unsigned int (*check_events) (struct gendisk *disk,
-- 
cgit v1.2.3


From dbae2c551377b6533a00c11fc7ede370100ab404 Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:38 -0700
Subject: block: Define and use STAT_READ and STAT_WRITE

Add defines for STAT_READ and STAT_WRITE for indexing the partition
stat entries. This clarifies some fs/ code which has hardcoded 1 for
STAT_WRITE and will make it easier to extend the stats with additional
fields.

tj: Refreshed on top of v4.17.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c             | 16 ++++++++--------
 block/partition-generic.c | 16 ++++++++--------
 fs/ext4/super.c           |  5 +++--
 fs/ext4/sysfs.c           |  6 ++++--
 fs/f2fs/f2fs.h            |  2 +-
 fs/f2fs/super.c           |  3 ++-
 include/linux/blk_types.h |  7 +++++++
 include/linux/genhd.h     | 13 +++++++------
 8 files changed, 40 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/block/genhd.c b/block/genhd.c
index f1543a45e73b..0711a800d0d4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1337,14 +1337,14 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   "%u %lu %lu %lu %u %u %u %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
-			   part_stat_read(hd, ios[READ]),
-			   part_stat_read(hd, merges[READ]),
-			   part_stat_read(hd, sectors[READ]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
-			   part_stat_read(hd, ios[WRITE]),
-			   part_stat_read(hd, merges[WRITE]),
-			   part_stat_read(hd, sectors[WRITE]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
+			   part_stat_read(hd, ios[STAT_READ]),
+			   part_stat_read(hd, merges[STAT_READ]),
+			   part_stat_read(hd, sectors[STAT_READ]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])),
+			   part_stat_read(hd, ios[STAT_WRITE]),
+			   part_stat_read(hd, merges[STAT_WRITE]),
+			   part_stat_read(hd, sectors[STAT_WRITE]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
 			   inflight[0],
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 3dcfd4ec0e11..0ddb06722162 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -132,14 +132,14 @@ ssize_t part_stat_show(struct device *dev,
 		"%8lu %8lu %8llu %8u "
 		"%8u %8u %8u"
 		"\n",
-		part_stat_read(p, ios[READ]),
-		part_stat_read(p, merges[READ]),
-		(unsigned long long)part_stat_read(p, sectors[READ]),
-		jiffies_to_msecs(part_stat_read(p, ticks[READ])),
-		part_stat_read(p, ios[WRITE]),
-		part_stat_read(p, merges[WRITE]),
-		(unsigned long long)part_stat_read(p, sectors[WRITE]),
-		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
+		part_stat_read(p, ios[STAT_READ]),
+		part_stat_read(p, merges[STAT_READ]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_READ]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])),
+		part_stat_read(p, ios[STAT_WRITE]),
+		part_stat_read(p, merges[STAT_WRITE]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
 		inflight[0],
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba2396a7bd04..4b8aef989552 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3514,7 +3514,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_sb_block = sb_block;
 	if (sb->s_bdev->bd_part)
 		sbi->s_sectors_written_start =
-			part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+			part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
 
 	/* Cleanup superblock name */
 	strreplace(sb->s_id, '/', '!');
@@ -4824,7 +4824,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	if (sb->s_bdev->bd_part)
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
-			    ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			    ((part_stat_read(sb->s_bdev->bd_part,
+					     sectors[STAT_WRITE]) -
 			      EXT4_SB(sb)->s_sectors_written_start) >> 1));
 	else
 		es->s_kbytes_written =
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f34da0bb8f17..2be9ad790017 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -56,7 +56,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 	if (!sb->s_bdev->bd_part)
 		return snprintf(buf, PAGE_SIZE, "0\n");
 	return snprintf(buf, PAGE_SIZE, "%lu\n",
-			(part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			(part_stat_read(sb->s_bdev->bd_part,
+					sectors[STAT_WRITE]) -
 			 sbi->s_sectors_written_start) >> 1);
 }
 
@@ -68,7 +69,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 		return snprintf(buf, PAGE_SIZE, "0\n");
 	return snprintf(buf, PAGE_SIZE, "%llu\n",
 			(unsigned long long)(sbi->s_kbytes_written +
-			((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			((part_stat_read(sb->s_bdev->bd_part,
+					 sectors[STAT_WRITE]) -
 			  EXT4_SB(sb)->s_sectors_written_start) >> 1)));
 }
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4d8b1de83143..6799c3fc44e3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1304,7 +1304,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
  * and the return value is in kbytes. s is of struct f2fs_sb_info.
  */
 #define BD_PART_WRITTEN(s)						 \
-(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) -		 \
+(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) -   \
 		(s)->sectors_written_start) >> 1)
 
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3995e926ba3a..17bcff789c08 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2882,7 +2882,8 @@ try_onemore:
 	/* For write statistics */
 	if (sb->s_bdev->bd_part)
 		sbi->sectors_written_start =
-			(u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+			(u64)part_stat_read(sb->s_bdev->bd_part,
+					    sectors[STAT_WRITE]);
 
 	/* Read accumulated write IO statistics if exists */
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e13449a379a1..d2b44de56bc1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -357,6 +357,13 @@ enum req_flag_bits {
 #define REQ_NOMERGE_FLAGS \
 	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
+enum stat_group {
+	STAT_READ,
+	STAT_WRITE,
+
+	NR_STAT_GROUPS
+};
+
 #define bio_op(bio) \
 	((bio)->bi_opf & REQ_OP_MASK)
 #define req_op(req) \
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 19f36fa10995..a75445446974 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/percpu-refcount.h>
 #include <linux/uuid.h>
+#include <linux/blk_types.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -82,10 +83,10 @@ struct partition {
 } __attribute__((packed));
 
 struct disk_stats {
-	unsigned long sectors[2];	/* READs and WRITEs */
-	unsigned long ios[2];
-	unsigned long merges[2];
-	unsigned long ticks[2];
+	unsigned long sectors[NR_STAT_GROUPS];
+	unsigned long ios[NR_STAT_GROUPS];
+	unsigned long merges[NR_STAT_GROUPS];
+	unsigned long ticks[NR_STAT_GROUPS];
 	unsigned long io_ticks;
 	unsigned long time_in_queue;
 };
@@ -354,8 +355,8 @@ static inline void free_part_stats(struct hd_struct *part)
 #endif /* CONFIG_SMP */
 
 #define part_stat_read_accum(part, field)				\
-	(part_stat_read(part, field[0]) +				\
-	 part_stat_read(part, field[1]))
+	(part_stat_read(part, field[STAT_READ]) +			\
+	 part_stat_read(part, field[STAT_WRITE]))
 
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
-- 
cgit v1.2.3


From 2f819db565e82e5f73cd42b39925098986693378 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Tue, 15 May 2018 23:32:45 +0100
Subject: binfmt_elf: Respect error return from `regset->active'

The regset API documented in <linux/regset.h> defines -ENODEV as the
result of the `->active' handler to be used where the feature requested
is not available on the hardware found.  However code handling core file
note generation in `fill_thread_core_info' interpretes any non-zero
result from the `->active' handler as the regset requested being active.
Consequently processing continues (and hopefully gracefully fails later
on) rather than being abandoned right away for the regset requested.

Fix the problem then by making the code proceed only if a positive
result is returned from the `->active' handler.

Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 4206d3aa1978 ("elf core dump: notes user_regset")
Patchwork: https://patchwork.linux-mips.org/patch/19332/
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 fs/binfmt_elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0ac456b52bdd..21b47c36309b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1752,7 +1752,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 		const struct user_regset *regset = &view->regsets[i];
 		do_thread_regset_writeback(t->task, regset);
 		if (regset->core_note_type && regset->get &&
-		    (!regset->active || regset->active(t->task, regset))) {
+		    (!regset->active || regset->active(t->task, regset) > 0)) {
 			int ret;
 			size_t size = regset_size(t->task, regset);
 			void *data = kmalloc(size, GFP_KERNEL);
-- 
cgit v1.2.3


From f2df5da66262f429cbb0d5da0e72ada6a6345f28 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Jul 2018 17:35:51 -0400
Subject: fold generic_readlink() into its only caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 36 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 734cef54fdf8..bb6c6e4f59e2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4712,29 +4712,6 @@ out:
 	return len;
 }
 
-/*
- * A helper for ->readlink().  This should be used *ONLY* for symlinks that
- * have ->get_link() not calling nd_jump_link().  Using (or not using) it
- * for any given inode is up to filesystem.
- */
-static int generic_readlink(struct dentry *dentry, char __user *buffer,
-			    int buflen)
-{
-	DEFINE_DELAYED_CALL(done);
-	struct inode *inode = d_inode(dentry);
-	const char *link = inode->i_link;
-	int res;
-
-	if (!link) {
-		link = inode->i_op->get_link(dentry, inode, &done);
-		if (IS_ERR(link))
-			return PTR_ERR(link);
-	}
-	res = readlink_copy(buffer, buflen, link);
-	do_delayed_call(&done);
-	return res;
-}
-
 /**
  * vfs_readlink - copy symlink body into userspace buffer
  * @dentry: dentry on which to get symbolic link
@@ -4748,6 +4725,9 @@ static int generic_readlink(struct dentry *dentry, char __user *buffer,
 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
 	struct inode *inode = d_inode(dentry);
+	DEFINE_DELAYED_CALL(done);
+	const char *link;
+	int res;
 
 	if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
 		if (unlikely(inode->i_op->readlink))
@@ -4761,7 +4741,15 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 		spin_unlock(&inode->i_lock);
 	}
 
-	return generic_readlink(dentry, buffer, buflen);
+	link = inode->i_link;
+	if (!link) {
+		link = inode->i_op->get_link(dentry, inode, &done);
+		if (IS_ERR(link))
+			return PTR_ERR(link);
+	}
+	res = readlink_copy(buffer, buflen, link);
+	do_delayed_call(&done);
+	return res;
 }
 EXPORT_SYMBOL(vfs_readlink);
 
-- 
cgit v1.2.3


From 488dee96bb62f0b3d9e678cf42574034d5b033a5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:47 +0000
Subject: kernfs: allow creating kernfs objects with arbitrary uid/gid

This change allows creating kernfs files and directories with arbitrary
uid/gid instead of always using GLOBAL_ROOT_UID/GID by extending
kernfs_create_dir_ns() and kernfs_create_file_ns() with uid/gid arguments.
The "simple" kernfs_create_file() and kernfs_create_dir() are left alone
and always create objects belonging to the global root.

When creating symlinks ownership (uid/gid) is taken from the target kernfs
object.

Co-Developed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  4 +++-
 fs/kernfs/dir.c                          | 29 ++++++++++++++++++++++++++---
 fs/kernfs/file.c                         |  8 ++++++--
 fs/kernfs/inode.c                        |  2 +-
 fs/kernfs/kernfs-internal.h              |  2 ++
 fs/kernfs/symlink.c                      | 11 ++++++++++-
 fs/sysfs/dir.c                           |  4 +++-
 fs/sysfs/file.c                          |  5 +++--
 include/linux/kernfs.h                   | 28 +++++++++++++++++++---------
 kernel/cgroup/cgroup.c                   |  4 +++-
 10 files changed, 76 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 749856a2e736..9af1a21265d3 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -146,6 +146,7 @@ static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 	int ret;
 
 	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 				  0, rft->kf_ops, rft, NULL, NULL);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
@@ -1503,7 +1504,8 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
 	struct kernfs_node *kn;
 	int ret = 0;
 
-	kn = __kernfs_create_file(parent_kn, name, 0444, 0,
+	kn = __kernfs_create_file(parent_kn, name, 0444,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
 				  &kf_mondata_ops, priv, NULL, NULL);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d66cc0777303..4ca0b5c18192 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -619,6 +619,7 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 
 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 					     const char *name, umode_t mode,
+					     kuid_t uid, kgid_t gid,
 					     unsigned flags)
 {
 	struct kernfs_node *kn;
@@ -661,8 +662,22 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	kn->mode = mode;
 	kn->flags = flags;
 
+	if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
+		struct iattr iattr = {
+			.ia_valid = ATTR_UID | ATTR_GID,
+			.ia_uid = uid,
+			.ia_gid = gid,
+		};
+
+		ret = __kernfs_setattr(kn, &iattr);
+		if (ret < 0)
+			goto err_out3;
+	}
+
 	return kn;
 
+ err_out3:
+	idr_remove(&root->ino_idr, kn->id.ino);
  err_out2:
 	kmem_cache_free(kernfs_node_cache, kn);
  err_out1:
@@ -672,11 +687,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
+				    kuid_t uid, kgid_t gid,
 				    unsigned flags)
 {
 	struct kernfs_node *kn;
 
-	kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
+	kn = __kernfs_new_node(kernfs_root(parent),
+			       name, mode, uid, gid, flags);
 	if (kn) {
 		kernfs_get(parent);
 		kn->parent = parent;
@@ -946,6 +963,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 	root->next_generation = 1;
 
 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
+			       GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 			       KERNFS_DIR);
 	if (!kn) {
 		idr_destroy(&root->ino_idr);
@@ -984,6 +1002,8 @@ void kernfs_destroy_root(struct kernfs_root *root)
  * @parent: parent in which to create a new directory
  * @name: name of the new directory
  * @mode: mode of the new directory
+ * @uid: uid of the new directory
+ * @gid: gid of the new directory
  * @priv: opaque data associated with the new directory
  * @ns: optional namespace tag of the directory
  *
@@ -991,13 +1011,15 @@ void kernfs_destroy_root(struct kernfs_root *root)
  */
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
 					 void *priv, const void *ns)
 {
 	struct kernfs_node *kn;
 	int rc;
 
 	/* allocate */
-	kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
+	kn = kernfs_new_node(parent, name, mode | S_IFDIR,
+			     uid, gid, KERNFS_DIR);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
@@ -1028,7 +1050,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 	int rc;
 
 	/* allocate */
-	kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
+	kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
+			     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 2015d8c45e4a..dbf5bc250bfd 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -965,6 +965,8 @@ const struct file_operations kernfs_file_fops = {
  * @parent: directory to create the file in
  * @name: name of the file
  * @mode: mode of the file
+ * @uid: uid of the file
+ * @gid: gid of the file
  * @size: size of the file
  * @ops: kernfs operations for the file
  * @priv: private data for the file
@@ -975,7 +977,8 @@ const struct file_operations kernfs_file_fops = {
  */
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 					 const char *name,
-					 umode_t mode, loff_t size,
+					 umode_t mode, kuid_t uid, kgid_t gid,
+					 loff_t size,
 					 const struct kernfs_ops *ops,
 					 void *priv, const void *ns,
 					 struct lock_class_key *key)
@@ -986,7 +989,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 
 	flags = KERNFS_FILE;
 
-	kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags);
+	kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
+			     uid, gid, flags);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 3d73fe9d56e2..80cebcd94c90 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -63,7 +63,7 @@ out_unlock:
 	return ret;
 }
 
-static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
 {
 	struct kernfs_iattrs *attrs;
 	struct iattr *iattrs;
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 0f260dcca177..3d83b114bb08 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -90,6 +90,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
 int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
 		       u32 request_mask, unsigned int query_flags);
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
 
 /*
  * dir.c
@@ -104,6 +105,7 @@ void kernfs_put_active(struct kernfs_node *kn);
 int kernfs_add_one(struct kernfs_node *kn);
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
+				    kuid_t uid, kgid_t gid,
 				    unsigned flags);
 struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
 						    unsigned int ino);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 08ccabd7047f..5ffed48f3d0e 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -21,6 +21,7 @@
  * @target: target node for the symlink to point to
  *
  * Returns the created node on success, ERR_PTR() value on error.
+ * Ownership of the link matches ownership of the target.
  */
 struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 				       const char *name,
@@ -28,8 +29,16 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 {
 	struct kernfs_node *kn;
 	int error;
+	kuid_t uid = GLOBAL_ROOT_UID;
+	kgid_t gid = GLOBAL_ROOT_GID;
 
-	kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
+	if (target->iattr) {
+		uid = target->iattr->ia_iattr.ia_uid;
+		gid = target->iattr->ia_iattr.ia_gid;
+	}
+
+	kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid,
+			     KERNFS_LINK);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 58eba92a0e41..e39b884f0867 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -52,7 +52,9 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 		return -ENOENT;
 
 	kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
-				  S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns);
+				  S_IRWXU | S_IRUGO | S_IXUGO,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  kobj, ns);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
 			sysfs_warn_dup(parent, kobject_name(kobj));
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5c13f29bfcdb..513fa691ecbd 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -302,8 +302,9 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	if (!attr->ignore_lockdep)
 		key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-	kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
-				  (void *)attr, ns, key);
+	kn = __kernfs_create_file(parent, attr->name,
+				  mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  size, ops, (void *)attr, ns, key);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
 			sysfs_warn_dup(parent, attr->name);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index ab25c8b6d9e3..814643f7ee52 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -15,6 +15,7 @@
 #include <linux/lockdep.h>
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
+#include <linux/uidgid.h>
 #include <linux/wait.h>
 
 struct file;
@@ -325,12 +326,14 @@ void kernfs_destroy_root(struct kernfs_root *root);
 
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
 					 void *priv, const void *ns);
 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 					    const char *name);
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
-					 const char *name,
-					 umode_t mode, loff_t size,
+					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
+					 loff_t size,
 					 const struct kernfs_ops *ops,
 					 void *priv, const void *ns,
 					 struct lock_class_key *key);
@@ -415,12 +418,14 @@ static inline void kernfs_destroy_root(struct kernfs_root *root) { }
 
 static inline struct kernfs_node *
 kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
-		     umode_t mode, void *priv, const void *ns)
+		     umode_t mode, kuid_t uid, kgid_t gid,
+		     void *priv, const void *ns)
 { return ERR_PTR(-ENOSYS); }
 
 static inline struct kernfs_node *
 __kernfs_create_file(struct kernfs_node *parent, const char *name,
-		     umode_t mode, loff_t size, const struct kernfs_ops *ops,
+		     umode_t mode, kuid_t uid, kgid_t gid,
+		     loff_t size, const struct kernfs_ops *ops,
 		     void *priv, const void *ns, struct lock_class_key *key)
 { return ERR_PTR(-ENOSYS); }
 
@@ -498,12 +503,15 @@ static inline struct kernfs_node *
 kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
 		  void *priv)
 {
-	return kernfs_create_dir_ns(parent, name, mode, priv, NULL);
+	return kernfs_create_dir_ns(parent, name, mode,
+				    GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				    priv, NULL);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
-		      umode_t mode, loff_t size, const struct kernfs_ops *ops,
+		      umode_t mode, kuid_t uid, kgid_t gid,
+		      loff_t size, const struct kernfs_ops *ops,
 		      void *priv, const void *ns)
 {
 	struct lock_class_key *key = NULL;
@@ -511,15 +519,17 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	key = (struct lock_class_key *)&ops->lockdep_key;
 #endif
-	return __kernfs_create_file(parent, name, mode, size, ops, priv, ns,
-				    key);
+	return __kernfs_create_file(parent, name, mode, uid, gid,
+				    size, ops, priv, ns, key);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
 		   loff_t size, const struct kernfs_ops *ops, void *priv)
 {
-	return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
+	return kernfs_create_file_ns(parent, name, mode,
+				     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				     size, ops, priv, NULL);
 }
 
 static inline int kernfs_remove_by_name(struct kernfs_node *parent,
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 077370bf8964..35cf3d71f8aa 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3557,7 +3557,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 	key = &cft->lockdep_key;
 #endif
 	kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
-				  cgroup_file_mode(cft), 0, cft->kf_ops, cft,
+				  cgroup_file_mode(cft),
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  0, cft->kf_ops, cft,
 				  NULL, key);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
-- 
cgit v1.2.3


From 5f81880d5204ee2388fd9a75bb850ccd526885b7 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:48 +0000
Subject: sysfs, kobject: allow creating kobject belonging to arbitrary users

Normally kobjects and their sysfs representation belong to global root,
however it is not necessarily the case for objects in separate namespaces.
For example, objects in separate network namespace logically belong to the
container's root and not global root.

This change lays groundwork for allowing network namespace objects
ownership to be transferred to container's root user by defining
get_ownership() callback in ktype structure and using it in sysfs code to
retrieve desired uid/gid when creating sysfs objects for given kobject.

Co-Developed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/sysfs/dir.c          |  7 +++++--
 fs/sysfs/file.c         | 32 ++++++++++++++++++++------------
 fs/sysfs/group.c        | 23 +++++++++++++++++------
 fs/sysfs/sysfs.h        |  5 ++---
 include/linux/kobject.h |  4 ++++
 lib/kobject.c           | 19 +++++++++++++++++++
 6 files changed, 67 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e39b884f0867..feeae8081c22 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -40,6 +40,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
 int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
 	struct kernfs_node *parent, *kn;
+	kuid_t uid;
+	kgid_t gid;
 
 	BUG_ON(!kobj);
 
@@ -51,9 +53,10 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 	if (!parent)
 		return -ENOENT;
 
+	kobject_get_ownership(kobj, &uid, &gid);
+
 	kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
-				  S_IRWXU | S_IRUGO | S_IXUGO,
-				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  S_IRWXU | S_IRUGO | S_IXUGO, uid, gid,
 				  kobj, ns);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 513fa691ecbd..fa46216523cf 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -245,7 +245,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
 
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			   const struct attribute *attr, bool is_bin,
-			   umode_t mode, const void *ns)
+			   umode_t mode, kuid_t uid, kgid_t gid, const void *ns)
 {
 	struct lock_class_key *key = NULL;
 	const struct kernfs_ops *ops;
@@ -302,8 +302,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	if (!attr->ignore_lockdep)
 		key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-	kn = __kernfs_create_file(parent, attr->name,
-				  mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+
+	kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
 				  size, ops, (void *)attr, ns, key);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
@@ -313,12 +313,6 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	return 0;
 }
 
-int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
-		   bool is_bin)
-{
-	return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
-}
-
 /**
  * sysfs_create_file_ns - create an attribute file for an object with custom ns
  * @kobj: object we're creating for
@@ -328,9 +322,14 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
 int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
 			 const void *ns)
 {
+	kuid_t uid;
+	kgid_t gid;
+
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
+	kobject_get_ownership(kobj, &uid, &gid);
+	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
+				      uid, gid, ns);
 
 }
 EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -359,6 +358,8 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 		const struct attribute *attr, const char *group)
 {
 	struct kernfs_node *parent;
+	kuid_t uid;
+	kgid_t gid;
 	int error;
 
 	if (group) {
@@ -371,7 +372,9 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 	if (!parent)
 		return -ENOENT;
 
-	error = sysfs_add_file(parent, attr, false);
+	kobject_get_ownership(kobj, &uid, &gid);
+	error = sysfs_add_file_mode_ns(kobj->sd, attr, false,
+				       attr->mode, uid, gid, NULL);
 	kernfs_put(parent);
 
 	return error;
@@ -487,9 +490,14 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 int sysfs_create_bin_file(struct kobject *kobj,
 			  const struct bin_attribute *attr)
 {
+	kuid_t uid;
+	kgid_t gid;
+
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->sd, &attr->attr, true);
+	kobject_get_ownership(kobj, &uid, &gid);
+	return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
+				      attr->attr.mode, uid, gid, NULL);
 }
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
 
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 4802ec0e1e3a..c7a716c4acc9 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -31,6 +31,7 @@ static void remove_files(struct kernfs_node *parent,
 }
 
 static int create_files(struct kernfs_node *parent, struct kobject *kobj,
+			kuid_t uid, kgid_t gid,
 			const struct attribute_group *grp, int update)
 {
 	struct attribute *const *attr;
@@ -60,7 +61,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 
 			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent, *attr, false,
-						       mode, NULL);
+						       mode, uid, gid, NULL);
 			if (unlikely(error))
 				break;
 		}
@@ -90,7 +91,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent,
 					&(*bin_attr)->attr, true,
-					mode, NULL);
+					mode,
+					uid, gid, NULL);
 			if (error)
 				break;
 		}
@@ -106,6 +108,8 @@ static int internal_create_group(struct kobject *kobj, int update,
 				 const struct attribute_group *grp)
 {
 	struct kernfs_node *kn;
+	kuid_t uid;
+	kgid_t gid;
 	int error;
 
 	BUG_ON(!kobj || (!update && !kobj->sd));
@@ -118,9 +122,11 @@ static int internal_create_group(struct kobject *kobj, int update,
 			kobj->name, grp->name ?: "");
 		return -EINVAL;
 	}
+	kobject_get_ownership(kobj, &uid, &gid);
 	if (grp->name) {
-		kn = kernfs_create_dir(kobj->sd, grp->name,
-				       S_IRWXU | S_IRUGO | S_IXUGO, kobj);
+		kn = kernfs_create_dir_ns(kobj->sd, grp->name,
+					  S_IRWXU | S_IRUGO | S_IXUGO,
+					  uid, gid, kobj, NULL);
 		if (IS_ERR(kn)) {
 			if (PTR_ERR(kn) == -EEXIST)
 				sysfs_warn_dup(kobj->sd, grp->name);
@@ -129,7 +135,7 @@ static int internal_create_group(struct kobject *kobj, int update,
 	} else
 		kn = kobj->sd;
 	kernfs_get(kn);
-	error = create_files(kn, kobj, grp, update);
+	error = create_files(kn, kobj, uid, gid, grp, update);
 	if (error) {
 		if (grp->name)
 			kernfs_remove(kn);
@@ -281,6 +287,8 @@ int sysfs_merge_group(struct kobject *kobj,
 		       const struct attribute_group *grp)
 {
 	struct kernfs_node *parent;
+	kuid_t uid;
+	kgid_t gid;
 	int error = 0;
 	struct attribute *const *attr;
 	int i;
@@ -289,8 +297,11 @@ int sysfs_merge_group(struct kobject *kobj,
 	if (!parent)
 		return -ENOENT;
 
+	kobject_get_ownership(kobj, &uid, &gid);
+
 	for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
-		error = sysfs_add_file(parent, *attr, false);
+		error = sysfs_add_file_mode_ns(parent, *attr, false,
+					       (*attr)->mode, uid, gid, NULL);
 	if (error) {
 		while (--i >= 0)
 			kernfs_remove_by_name(parent, (*--attr)->name);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d098e015fcc9..0050cc0c0236 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -27,11 +27,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
 /*
  * file.c
  */
-int sysfs_add_file(struct kernfs_node *parent,
-		   const struct attribute *attr, bool is_bin);
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			   const struct attribute *attr, bool is_bin,
-			   umode_t amode, const void *ns);
+			   umode_t amode, kuid_t uid, kgid_t gid,
+			   const void *ns);
 
 /*
  * symlink.c
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 7f6f93c3df9c..b49ff230beba 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -26,6 +26,7 @@
 #include <linux/wait.h>
 #include <linux/atomic.h>
 #include <linux/workqueue.h>
+#include <linux/uidgid.h>
 
 #define UEVENT_HELPER_PATH_LEN		256
 #define UEVENT_NUM_ENVP			32	/* number of env pointers */
@@ -114,6 +115,8 @@ extern struct kobject * __must_check kobject_get_unless_zero(
 extern void kobject_put(struct kobject *kobj);
 
 extern const void *kobject_namespace(struct kobject *kobj);
+extern void kobject_get_ownership(struct kobject *kobj,
+				  kuid_t *uid, kgid_t *gid);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
@@ -122,6 +125,7 @@ struct kobj_type {
 	struct attribute **default_attrs;
 	const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
 	const void *(*namespace)(struct kobject *kobj);
+	void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
 };
 
 struct kobj_uevent_env {
diff --git a/lib/kobject.c b/lib/kobject.c
index 18989b5b3b56..f2dc1f756007 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -35,6 +35,25 @@ const void *kobject_namespace(struct kobject *kobj)
 	return kobj->ktype->namespace(kobj);
 }
 
+/**
+ * kobject_get_ownership - get sysfs ownership data for @kobj
+ * @kobj: kobject in question
+ * @uid: kernel user ID for sysfs objects
+ * @gid: kernel group ID for sysfs objects
+ *
+ * Returns initial uid/gid pair that should be used when creating sysfs
+ * representation of given kobject. Normally used to adjust ownership of
+ * objects in a container.
+ */
+void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+	*uid = GLOBAL_ROOT_UID;
+	*gid = GLOBAL_ROOT_GID;
+
+	if (kobj->ktype->get_ownership)
+		kobj->ktype->get_ownership(kobj, uid, gid);
+}
+
 /*
  * populate_dir - populate directory with attributes.
  * @kobj: object we're working on.
-- 
cgit v1.2.3


From c4326563d9abe86ad54474f9e9142bd2663eede5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 20 Jul 2018 10:47:26 +0900
Subject: efivars: Call guid_parse() against guid_t type of variable

uuid_le_to_bin() is deprecated API and take into consideration that variable,
to where we store parsed data, is type of guid_t we switch to guid_parse()
for sake of consistency.

While here, add error checking to it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180720014726.24031-10-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/efivarfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 71fccccf317e..8c6ab6c95727 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -86,7 +86,9 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
 	/* length of the variable name itself: remove GUID and separator */
 	namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1;
 
-	uuid_le_to_bin(dentry->d_name.name + namelen + 1, &var->var.VendorGuid);
+	err = guid_parse(dentry->d_name.name + namelen + 1, &var->var.VendorGuid);
+	if (err)
+		goto out;
 
 	if (efivar_variable_is_removable(var->var.VendorGuid,
 					 dentry->d_name.name, namelen))
-- 
cgit v1.2.3


From 65cfcc3897d7715a878b9f59736e7527ca27514f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:24:55 -0700
Subject: xfs: check leaf attribute block freemap in verifier

Check the leaf attribute freemap when we're verifying the block.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_attr_leaf.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 8ff287979a8c..088ffcd22fa2 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -244,6 +244,8 @@ xfs_attr3_leaf_verify(
 	struct xfs_attr_leafblock	*leaf = bp->b_addr;
 	struct xfs_perag		*pag = bp->b_pag;
 	struct xfs_attr_leaf_entry	*entries;
+	uint16_t			end;
+	int				i;
 
 	xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
 
@@ -289,6 +291,26 @@ xfs_attr3_leaf_verify(
 	/* XXX: need to range check rest of attr header values */
 	/* XXX: hash order check? */
 
+	/*
+	 * Quickly check the freemap information.  Attribute data has to be
+	 * aligned to 4-byte boundaries, and likewise for the free space.
+	 */
+	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+		if (ichdr.freemap[i].base > mp->m_attr_geo->blksize)
+			return __this_address;
+		if (ichdr.freemap[i].base & 0x3)
+			return __this_address;
+		if (ichdr.freemap[i].size > mp->m_attr_geo->blksize)
+			return __this_address;
+		if (ichdr.freemap[i].size & 0x3)
+			return __this_address;
+		end = ichdr.freemap[i].base + ichdr.freemap[i].size;
+		if (end < ichdr.freemap[i].base)
+			return __this_address;
+		if (end > mp->m_attr_geo->blksize)
+			return __this_address;
+	}
+
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 81b549aa626b650bbf00423c084c0fd5581169b9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:25:47 -0700
Subject: xfs: return from _defer_finish with a clean transaction

The following assertion was seen on generic/051:

XFS: Assertion failed: tp->t_firstblock == NULLFSBLOCK, file: fs/xfs/libxfs5
------------[ cut here ]------------
kernel BUG at fs/xfs/xfs_message.c:102!
invalid opcode: 0000 [#1] SMP PTI
CPU: 2 PID: 20757 Comm: fsstress Not tainted 4.18.0-rc4+ #3969
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-1 04/01/4
RIP: 0010:assfail+0x23/0x30
Code: c3 66 0f 1f 44 00 00 48 89 f1 41 89 d0 48 c7 c6 88 e0 8c 82 48 89 fa
RSP: 0018:ffff88012dc43c08 EFLAGS: 00010202
RAX: 0000000000000000 RBX: ffff88012dc43ca0 RCX: 0000000000000000
RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffff828480eb
RBP: ffff88012aa92758 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: f000000000000000 R12: 0000000000000000
R13: ffff88012dc43d48 R14: ffff88013092e7e8 R15: 0000000000000014
FS:  00007f8d689b8e80(0000) GS:ffff88013fd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f8d689c7000 CR3: 000000012ba6a000 CR4: 00000000000006e0
Call Trace:
 xfs_defer_init+0xff/0x160
 xfs_reflink_remap_extent+0x31b/0xa00
 xfs_reflink_remap_blocks+0xec/0x4a0
 xfs_reflink_remap_range+0x3a1/0x650
 xfs_file_dedupe_range+0x39/0x50
 vfs_dedupe_file_range+0x218/0x260
 do_vfs_ioctl+0x262/0x6a0
 ? __se_sys_newfstat+0x3c/0x60
 ksys_ioctl+0x35/0x60
 __x64_sys_ioctl+0x11/0x20
 do_syscall_64+0x4b/0x190
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

The root cause of the assertion failure is that xfs_defer_finish doesn't
roll the transaction after processing all the deferred items.  Therefore
it returns a dirty transaction to the caller, which leaves the caller at
risk of exceeding the transaction reservation if it logs more items.

Brian Foster's patchset to move the defer_ops firstblock into the
transaction requires t_firstblock == NULLFSBLOCK upon defer_ops
initialization, which is how this was noticed at all.

Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_defer.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 2713e2d808a7..c4b0eaeb5190 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -424,6 +424,12 @@ xfs_defer_finish(
 			cleanup_fn(*tp, state, error);
 	}
 
+	/*
+	 * Roll the transaction once more to avoid returning to the caller
+	 * with a dirty transaction.
+	 */
+	if ((*tp)->t_flags & XFS_TRANS_DIRTY)
+		error = xfs_defer_trans_roll(tp, dop);
 out:
 	(*tp)->t_dfops = orig_dop;
 	if (error)
-- 
cgit v1.2.3


From 0b04b6b875b32f2b32263ba46d54d001e05724f9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:26:31 -0700
Subject: xfs: trivial xfs_btree_del_cursor cleanups

The error argument to xfs_btree_del_cursor already understands the
"nonzero for error" semantics, so remove pointless error testing in the
callers and pass it directly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c         | 24 ++++++++----------------
 fs/xfs/libxfs/xfs_bmap_btree.c   |  2 +-
 fs/xfs/libxfs/xfs_ialloc.c       |  2 +-
 fs/xfs/libxfs/xfs_ialloc_btree.c |  2 +-
 fs/xfs/libxfs/xfs_refcount.c     |  2 +-
 fs/xfs/libxfs/xfs_rmap.c         | 18 +++---------------
 fs/xfs/scrub/bmap.c              |  5 ++---
 fs/xfs/scrub/repair.c            |  2 +-
 fs/xfs/xfs_discard.c             |  2 +-
 fs/xfs/xfs_fsmap.c               |  2 +-
 fs/xfs/xfs_itable.c              |  6 ++----
 fs/xfs/xfs_reflink.c             |  2 +-
 12 files changed, 23 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7b93b1e16ad9..6bc0cdff488e 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -961,8 +961,7 @@ xfs_bmap_add_attrfork_extents(
 					  XFS_DATA_FORK);
 	if (cur) {
 		cur->bc_private.b.allocated = 0;
-		xfs_btree_del_cursor(cur,
-			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(cur, error);
 	}
 	return error;
 }
@@ -4447,8 +4446,7 @@ error0:
 		xfs_trans_log_inode(tp, ip, bma.logflags);
 
 	if (bma.cur) {
-		xfs_btree_del_cursor(bma.cur,
-			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(bma.cur, error);
 	}
 	if (!error)
 		xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
@@ -4542,10 +4540,8 @@ error0:
 
 	if (logflags)
 		xfs_trans_log_inode(tp, ip, logflags);
-	if (cur) {
-		xfs_btree_del_cursor(cur,
-				error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-	}
+	if (cur)
+		xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
@@ -5439,8 +5435,7 @@ error0:
 	if (cur) {
 		if (!error)
 			cur->bc_private.b.allocated = 0;
-		xfs_btree_del_cursor(cur,
-			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(cur, error);
 	}
 	return error;
 }
@@ -5700,8 +5695,7 @@ done:
 	*next_fsb = got.br_startoff;
 del_cursor:
 	if (cur)
-		xfs_btree_del_cursor(cur,
-			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(cur, error);
 	if (logflags)
 		xfs_trans_log_inode(tp, ip, logflags);
 	return error;
@@ -5828,8 +5822,7 @@ xfs_bmap_insert_extents(
 	*next_fsb = got.br_startoff;
 del_cursor:
 	if (cur)
-		xfs_btree_del_cursor(cur,
-			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(cur, error);
 	if (logflags)
 		xfs_trans_log_inode(tp, ip, logflags);
 	return error;
@@ -5945,8 +5938,7 @@ xfs_bmap_split_extent_at(
 del_cursor:
 	if (cur) {
 		cur->bc_private.b.allocated = 0;
-		xfs_btree_del_cursor(cur,
-				error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(cur, error);
 	}
 
 	if (logflags)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 628ed82ca286..01489714a253 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -639,7 +639,7 @@ xfs_bmbt_change_owner(
 	cur->bc_private.b.flags |= XFS_BTCUR_BPRV_INVALID_OWNER;
 
 	error = xfs_btree_change_owner(cur, new_owner, buffer_list);
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 811d36afd024..295304ad1bc1 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2258,7 +2258,7 @@ xfs_imap_lookup(
 	}
 
 	xfs_trans_brelse(tp, agbp);
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index a5237afec5ab..735a33252eb2 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -566,7 +566,7 @@ xfs_inobt_count_blocks(
 
 	cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum);
 	error = xfs_btree_count_blocks(cur, tree_blocks);
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 	xfs_buf_relse(agbp);
 
 	return error;
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 2ecfb0518580..a2dfae67ade1 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1067,7 +1067,7 @@ xfs_refcount_finish_one_cleanup(
 	if (rcur == NULL)
 		return;
 	agbp = rcur->bc_private.a.agbp;
-	xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(rcur, error);
 	if (error)
 		xfs_trans_brelse(tp, agbp);
 }
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 8b2a2f81d110..fb266fa2cc45 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -670,14 +670,8 @@ xfs_rmap_free(
 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
 
 	error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
-	if (error)
-		goto out_error;
 
-	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-	return 0;
-
-out_error:
-	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
@@ -912,14 +906,8 @@ xfs_rmap_alloc(
 
 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
 	error = xfs_rmap_map(cur, bno, len, false, oinfo);
-	if (error)
-		goto out_error;
 
-	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-	return 0;
-
-out_error:
-	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
@@ -2156,7 +2144,7 @@ xfs_rmap_finish_one_cleanup(
 	if (rcur == NULL)
 		return;
 	agbp = rcur->bc_private.a.agbp;
-	xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(rcur, error);
 	if (error)
 		xfs_trans_brelse(tp, agbp);
 }
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 3d08589f5c60..ebbfab173e97 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -404,8 +404,7 @@ xfs_scrub_bmap_btree(
 	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
 	error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info);
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
-					  XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
@@ -514,7 +513,7 @@ xfs_scrub_bmap_check_ag_rmaps(
 	if (error == XFS_BTREE_QUERY_RANGE_ABORT)
 		error = 0;
 
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 out_agf:
 	xfs_trans_brelse(sc->tp, agf);
 	return error;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 326be4e8b71e..35c589a04fac 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1009,7 +1009,7 @@ xfs_repair_find_ag_btree_roots(
 
 	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
 	error = xfs_rmap_query_all(cur, xfs_repair_findroot_rmap, &ri);
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 
 	return error;
 }
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 678a5fcd7576..93f07edafd81 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -128,7 +128,7 @@ next_extent:
 	}
 
 out_del_cursor:
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 	xfs_buf_relse(agbp);
 out_put_perag:
 	xfs_perag_put(pag);
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 297d7ce2901e..3d76a9e35870 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -219,7 +219,7 @@ xfs_getfsmap_is_shared(
 	error = xfs_refcount_find_shared(cur, rec->rm_startblock,
 			rec->rm_blockcount, &fbno, &flen, false);
 
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 24f4f1c555b5..65810827a8d0 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -458,8 +458,7 @@ xfs_bulkstat(
 		 * pending error, then we are done.
 		 */
 del_cursor:
-		xfs_btree_del_cursor(cur, error ?
-					  XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+		xfs_btree_del_cursor(cur, error);
 		xfs_buf_relse(agbp);
 		if (error)
 			break;
@@ -632,8 +631,7 @@ next_ag:
 
 	kmem_free(buffer);
 	if (cur)
-		xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
-					   XFS_BTREE_NOERROR));
+		xfs_btree_del_cursor(cur, error);
 	if (agbp)
 		xfs_buf_relse(agbp);
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3143889097f1..406f79d44153 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -162,7 +162,7 @@ xfs_reflink_find_shared(
 	error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
 			find_end_of_shared);
 
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 
 	xfs_trans_brelse(tp, agbp);
 	return error;
-- 
cgit v1.2.3


From ef97ef26d263fb65f0c7446a10cf93201dc0388c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:29:10 -0700
Subject: xfs: clean up xfs_btree_del_cursor callers

Less trivial cleanups of the error argument to xfs_btree_del_cursor;
these require some minor code refactoring.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_refcount.c | 11 +++--------
 fs/xfs/scrub/repair.c        |  7 +++----
 2 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index a2dfae67ade1..9ef1f440a6f2 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1675,11 +1675,11 @@ xfs_refcount_recover_cow_leftovers(
 	high.rc.rc_startblock = -1U;
 	error = xfs_btree_query_range(cur, &low, &high,
 			xfs_refcount_recover_extent, &debris);
-	if (error)
-		goto out_cursor;
-	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+	xfs_btree_del_cursor(cur, error);
 	xfs_trans_brelse(tp, agbp);
 	xfs_trans_cancel(tp);
+	if (error)
+		goto out_free;
 
 	/* Now iterate the list to free the leftovers */
 	list_for_each_entry_safe(rr, n, &debris, rr_list) {
@@ -1727,11 +1727,6 @@ out_free:
 		kmem_free(rr);
 	}
 	return error;
-
-out_cursor:
-	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-	xfs_trans_brelse(tp, agbp);
-	goto out_trans;
 }
 
 /* Is there a record covering a given extent? */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 35c589a04fac..ea39e2bdc96a 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -747,9 +747,9 @@ xfs_repair_dispose_btree_block(
 
 	/* Can we find any other rmappings? */
 	error = xfs_rmap_has_other_keys(cur, agbno, 1, oinfo, &has_other_rmap);
+	xfs_btree_del_cursor(cur, error);
 	if (error)
-		goto out_cur;
-	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+		goto out_free;
 
 	/*
 	 * If there are other rmappings, this block is cross linked and must
@@ -779,8 +779,7 @@ xfs_repair_dispose_btree_block(
 		return xfs_trans_roll_inode(&sc->tp, sc->ip);
 	return xfs_repair_roll_ag_trans(sc);
 
-out_cur:
-	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+out_free:
 	if (agf_bp != sc->sa.agf_bp)
 		xfs_trans_brelse(sc->tp, agf_bp);
 	return error;
-- 
cgit v1.2.3


From c517b3aa02cff1dd688aa783b748e06c8aee1285 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:29:11 -0700
Subject: xfs: shorten xfs_scrub_ prefix

Shorten all the metadata checking xfs_scrub_ prefixes to xchk_.  After
this, the only xfs_scrub* symbols are the ones that pertain to both
scrub and repair.  Whitespace damage will be fixed in a subsequent
patch.  There are no functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/agheader.c | 318 ++++++++++++++++++++++++------------------------
 fs/xfs/scrub/alloc.c    |  56 ++++-----
 fs/xfs/scrub/attr.c     | 106 ++++++++--------
 fs/xfs/scrub/bmap.c     | 176 +++++++++++++--------------
 fs/xfs/scrub/btree.c    | 166 ++++++++++++-------------
 fs/xfs/scrub/btree.h    |  22 ++--
 fs/xfs/scrub/common.c   | 172 +++++++++++++-------------
 fs/xfs/scrub/common.h   | 102 ++++++++--------
 fs/xfs/scrub/dabtree.c  | 124 +++++++++----------
 fs/xfs/scrub/dabtree.h  |  14 +--
 fs/xfs/scrub/dir.c      | 170 +++++++++++++-------------
 fs/xfs/scrub/ialloc.c   | 138 ++++++++++-----------
 fs/xfs/scrub/inode.c    | 160 ++++++++++++------------
 fs/xfs/scrub/parent.c   |  62 +++++-----
 fs/xfs/scrub/quota.c    |  64 +++++-----
 fs/xfs/scrub/refcount.c | 114 ++++++++---------
 fs/xfs/scrub/repair.c   |   4 +-
 fs/xfs/scrub/rmap.c     |  78 ++++++------
 fs/xfs/scrub/rtbitmap.c |  30 ++---
 fs/xfs/scrub/scrub.c    | 142 ++++++++++-----------
 fs/xfs/scrub/scrub.h    |  82 ++++++-------
 fs/xfs/scrub/symlink.c  |  14 +--
 fs/xfs/scrub/trace.c    |   2 +-
 fs/xfs/scrub/trace.h    |  70 +++++------
 24 files changed, 1196 insertions(+), 1190 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 9bb0745f1ad2..c0625ec16d63 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -28,7 +28,7 @@
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_superblock_xref(
+xchk_superblock_xref(
 	struct xfs_scrub_context	*sc,
 	struct xfs_buf			*bp)
 {
@@ -43,15 +43,15 @@ xfs_scrub_superblock_xref(
 
 	agbno = XFS_SB_BLOCK(mp);
 
-	error = xfs_scrub_ag_init(sc, agno, &sc->sa);
-	if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+	error = xchk_ag_init(sc, agno, &sc->sa);
+	if (!xchk_xref_process_error(sc, agno, agbno, &error))
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, 1);
-	xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+	xchk_xref_is_used_space(sc, agbno, 1);
+	xchk_xref_is_not_inode_chunk(sc, agbno, 1);
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
-	xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
-	xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+	xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+	xchk_xref_is_not_shared(sc, agbno, 1);
 
 	/* scrub teardown will take care of sc->sa for us */
 }
@@ -65,7 +65,7 @@ xfs_scrub_superblock_xref(
  * sb 0 is ok and we can use its information to check everything else.
  */
 int
-xfs_scrub_superblock(
+xchk_superblock(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
@@ -98,7 +98,7 @@ xfs_scrub_superblock(
 	default:
 		break;
 	}
-	if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
+	if (!xchk_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
 		return error;
 
 	sb = XFS_BUF_TO_SBP(bp);
@@ -110,46 +110,46 @@ xfs_scrub_superblock(
 	 * checked.
 	 */
 	if (sb->sb_blocksize != cpu_to_be32(mp->m_sb.sb_blocksize))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_dblocks != cpu_to_be64(mp->m_sb.sb_dblocks))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_rblocks != cpu_to_be64(mp->m_sb.sb_rblocks))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_rextents != cpu_to_be64(mp->m_sb.sb_rextents))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (!uuid_equal(&sb->sb_uuid, &mp->m_sb.sb_uuid))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_logstart != cpu_to_be64(mp->m_sb.sb_logstart))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_agblocks != cpu_to_be32(mp->m_sb.sb_agblocks))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_agcount != cpu_to_be32(mp->m_sb.sb_agcount))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_rbmblocks != cpu_to_be32(mp->m_sb.sb_rbmblocks))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_logblocks != cpu_to_be32(mp->m_sb.sb_logblocks))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	/* Check sb_versionnum bits that are set at mkfs time. */
 	vernum_mask = cpu_to_be16(~XFS_SB_VERSION_OKBITS |
@@ -163,7 +163,7 @@ xfs_scrub_superblock(
 				  XFS_SB_VERSION_DIRV2BIT);
 	if ((sb->sb_versionnum & vernum_mask) !=
 	    (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	/* Check sb_versionnum bits that can be set after mkfs time. */
 	vernum_mask = cpu_to_be16(XFS_SB_VERSION_ATTRBIT |
@@ -171,40 +171,40 @@ xfs_scrub_superblock(
 				  XFS_SB_VERSION_QUOTABIT);
 	if ((sb->sb_versionnum & vernum_mask) !=
 	    (cpu_to_be16(mp->m_sb.sb_versionnum) & vernum_mask))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_sectsize != cpu_to_be16(mp->m_sb.sb_sectsize))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_inodesize != cpu_to_be16(mp->m_sb.sb_inodesize))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_inopblock != cpu_to_be16(mp->m_sb.sb_inopblock))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (memcmp(sb->sb_fname, mp->m_sb.sb_fname, sizeof(sb->sb_fname)))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_blocklog != mp->m_sb.sb_blocklog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_sectlog != mp->m_sb.sb_sectlog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_inodelog != mp->m_sb.sb_inodelog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_inopblog != mp->m_sb.sb_inopblog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_agblklog != mp->m_sb.sb_agblklog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_rextslog != mp->m_sb.sb_rextslog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_imax_pct != mp->m_sb.sb_imax_pct)
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	/*
 	 * Skip the summary counters since we track them in memory anyway.
@@ -212,10 +212,10 @@ xfs_scrub_superblock(
 	 */
 
 	if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	/*
 	 * Skip the quota flags since repair will force quotacheck.
@@ -223,46 +223,46 @@ xfs_scrub_superblock(
 	 */
 
 	if (sb->sb_flags != mp->m_sb.sb_flags)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_shared_vn != mp->m_sb.sb_shared_vn)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_inoalignmt != cpu_to_be32(mp->m_sb.sb_inoalignmt))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_unit != cpu_to_be32(mp->m_sb.sb_unit))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_width != cpu_to_be32(mp->m_sb.sb_width))
-		xfs_scrub_block_set_preen(sc, bp);
+		xchk_block_set_preen(sc, bp);
 
 	if (sb->sb_dirblklog != mp->m_sb.sb_dirblklog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_logsectlog != mp->m_sb.sb_logsectlog)
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_logsectsize != cpu_to_be16(mp->m_sb.sb_logsectsize))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (sb->sb_logsunit != cpu_to_be32(mp->m_sb.sb_logsunit))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	/* Do we see any invalid bits in sb_features2? */
 	if (!xfs_sb_version_hasmorebits(&mp->m_sb)) {
 		if (sb->sb_features2 != 0)
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 	} else {
 		v2_ok = XFS_SB_VERSION2_OKBITS;
 		if (XFS_SB_VERSION_NUM(&mp->m_sb) >= XFS_SB_VERSION_5)
 			v2_ok |= XFS_SB_VERSION2_CRCBIT;
 
 		if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 
 		if (sb->sb_features2 != sb->sb_bad_features2)
-			xfs_scrub_block_set_preen(sc, bp);
+			xchk_block_set_preen(sc, bp);
 	}
 
 	/* Check sb_features2 flags that are set at mkfs time. */
@@ -272,26 +272,26 @@ xfs_scrub_superblock(
 				    XFS_SB_VERSION2_FTYPE);
 	if ((sb->sb_features2 & features_mask) !=
 	    (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	/* Check sb_features2 flags that can be set after mkfs time. */
 	features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT);
 	if ((sb->sb_features2 & features_mask) !=
 	    (cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
 	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
 		/* all v5 fields must be zero */
 		if (memchr_inv(&sb->sb_features_compat, 0,
 				sizeof(struct xfs_dsb) -
 				offsetof(struct xfs_dsb, sb_features_compat)))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 	} else {
 		/* Check compat flags; all are set at mkfs time. */
 		features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN);
 		if ((sb->sb_features_compat & features_mask) !=
 		    (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 
 		/* Check ro compat flags; all are set at mkfs time. */
 		features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN |
@@ -301,7 +301,7 @@ xfs_scrub_superblock(
 		if ((sb->sb_features_ro_compat & features_mask) !=
 		    (cpu_to_be32(mp->m_sb.sb_features_ro_compat) &
 		     features_mask))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 
 		/* Check incompat flags; all are set at mkfs time. */
 		features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN |
@@ -311,22 +311,22 @@ xfs_scrub_superblock(
 		if ((sb->sb_features_incompat & features_mask) !=
 		    (cpu_to_be32(mp->m_sb.sb_features_incompat) &
 		     features_mask))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 
 		/* Check log incompat flags; all are set at mkfs time. */
 		features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN);
 		if ((sb->sb_features_log_incompat & features_mask) !=
 		    (cpu_to_be32(mp->m_sb.sb_features_log_incompat) &
 		     features_mask))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 
 		/* Don't care about sb_crc */
 
 		if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 
 		if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
-			xfs_scrub_block_set_preen(sc, bp);
+			xchk_block_set_preen(sc, bp);
 
 		/* Don't care about sb_lsn */
 	}
@@ -334,15 +334,15 @@ xfs_scrub_superblock(
 	if (xfs_sb_version_hasmetauuid(&mp->m_sb)) {
 		/* The metadata UUID must be the same for all supers */
 		if (!uuid_equal(&sb->sb_meta_uuid, &mp->m_sb.sb_meta_uuid))
-			xfs_scrub_block_set_corrupt(sc, bp);
+			xchk_block_set_corrupt(sc, bp);
 	}
 
 	/* Everything else must be zero. */
 	if (memchr_inv(sb + 1, 0,
 			BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 
-	xfs_scrub_superblock_xref(sc, bp);
+	xchk_superblock_xref(sc, bp);
 
 	return error;
 }
@@ -351,7 +351,7 @@ xfs_scrub_superblock(
 
 /* Tally freespace record lengths. */
 STATIC int
-xfs_scrub_agf_record_bno_lengths(
+xchk_agf_record_bno_lengths(
 	struct xfs_btree_cur		*cur,
 	struct xfs_alloc_rec_incore	*rec,
 	void				*priv)
@@ -364,7 +364,7 @@ xfs_scrub_agf_record_bno_lengths(
 
 /* Check agf_freeblks */
 static inline void
-xfs_scrub_agf_xref_freeblks(
+xchk_agf_xref_freeblks(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -375,16 +375,16 @@ xfs_scrub_agf_xref_freeblks(
 		return;
 
 	error = xfs_alloc_query_all(sc->sa.bno_cur,
-			xfs_scrub_agf_record_bno_lengths, &blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+			xchk_agf_record_bno_lengths, &blocks);
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.bno_cur))
 		return;
 	if (blocks != be32_to_cpu(agf->agf_freeblks))
-		xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp);
 }
 
 /* Cross reference the AGF with the cntbt (freespace by length btree) */
 static inline void
-xfs_scrub_agf_xref_cntbt(
+xchk_agf_xref_cntbt(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -398,25 +398,25 @@ xfs_scrub_agf_xref_cntbt(
 
 	/* Any freespace at all? */
 	error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.cnt_cur))
 		return;
 	if (!have) {
 		if (agf->agf_freeblks != be32_to_cpu(0))
-			xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+			xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp);
 		return;
 	}
 
 	/* Check agf_longest */
 	error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.cnt_cur))
 		return;
 	if (!have || blocks != be32_to_cpu(agf->agf_longest))
-		xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp);
 }
 
 /* Check the btree block counts in the AGF against the btrees. */
 STATIC void
-xfs_scrub_agf_xref_btreeblks(
+xchk_agf_xref_btreeblks(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -428,11 +428,11 @@ xfs_scrub_agf_xref_btreeblks(
 	/* Check agf_rmap_blocks; set up for agf_btreeblks check */
 	if (sc->sa.rmap_cur) {
 		error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks);
-		if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+		if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 			return;
 		btreeblks = blocks - 1;
 		if (blocks != be32_to_cpu(agf->agf_rmap_blocks))
-			xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+			xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp);
 	} else {
 		btreeblks = 0;
 	}
@@ -447,22 +447,22 @@ xfs_scrub_agf_xref_btreeblks(
 
 	/* Check agf_btreeblks */
 	error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.bno_cur))
 		return;
 	btreeblks += blocks - 1;
 
 	error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.cnt_cur))
 		return;
 	btreeblks += blocks - 1;
 
 	if (btreeblks != be32_to_cpu(agf->agf_btreeblks))
-		xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp);
 }
 
 /* Check agf_refcount_blocks against tree size */
 static inline void
-xfs_scrub_agf_xref_refcblks(
+xchk_agf_xref_refcblks(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -473,15 +473,15 @@ xfs_scrub_agf_xref_refcblks(
 		return;
 
 	error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 		return;
 	if (blocks != be32_to_cpu(agf->agf_refcount_blocks))
-		xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_xref_set_corrupt(sc, sc->sa.agf_bp);
 }
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_agf_xref(
+xchk_agf_xref(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_owner_info		oinfo;
@@ -494,26 +494,26 @@ xfs_scrub_agf_xref(
 
 	agbno = XFS_AGF_BLOCK(mp);
 
-	error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+	error = xchk_ag_btcur_init(sc, &sc->sa);
 	if (error)
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, 1);
-	xfs_scrub_agf_xref_freeblks(sc);
-	xfs_scrub_agf_xref_cntbt(sc);
-	xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+	xchk_xref_is_used_space(sc, agbno, 1);
+	xchk_agf_xref_freeblks(sc);
+	xchk_agf_xref_cntbt(sc);
+	xchk_xref_is_not_inode_chunk(sc, agbno, 1);
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
-	xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
-	xfs_scrub_agf_xref_btreeblks(sc);
-	xfs_scrub_xref_is_not_shared(sc, agbno, 1);
-	xfs_scrub_agf_xref_refcblks(sc);
+	xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+	xchk_agf_xref_btreeblks(sc);
+	xchk_xref_is_not_shared(sc, agbno, 1);
+	xchk_agf_xref_refcblks(sc);
 
 	/* scrub teardown will take care of sc->sa for us */
 }
 
 /* Scrub the AGF. */
 int
-xfs_scrub_agf(
+xchk_agf(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
@@ -529,54 +529,54 @@ xfs_scrub_agf(
 	int				error = 0;
 
 	agno = sc->sa.agno = sc->sm->sm_agno;
-	error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
+	error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp,
 			&sc->sa.agf_bp, &sc->sa.agfl_bp);
-	if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
+	if (!xchk_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
 		goto out;
-	xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp);
+	xchk_buffer_recheck(sc, sc->sa.agf_bp);
 
 	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 
 	/* Check the AG length */
 	eoag = be32_to_cpu(agf->agf_length);
 	if (eoag != xfs_ag_block_count(mp, agno))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 	/* Check the AGF btree roots and levels */
 	agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
 	if (!xfs_verify_agbno(mp, agno, agbno))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 	agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
 	if (!xfs_verify_agbno(mp, agno, agbno))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 	level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
 	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 	level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
 	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
 		agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
 		if (!xfs_verify_agbno(mp, agno, agbno))
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 		level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
 		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 	}
 
 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 		agbno = be32_to_cpu(agf->agf_refcount_root);
 		if (!xfs_verify_agbno(mp, agno, agbno))
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
 		level = be32_to_cpu(agf->agf_refcount_level);
 		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 	}
 
 	/* Check the AGFL counters */
@@ -588,16 +588,16 @@ xfs_scrub_agf(
 	else
 		fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1;
 	if (agfl_count != 0 && fl_count != agfl_count)
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 
-	xfs_scrub_agf_xref(sc);
+	xchk_agf_xref(sc);
 out:
 	return error;
 }
 
 /* AGFL */
 
-struct xfs_scrub_agfl_info {
+struct xchk_agfl_info {
 	struct xfs_owner_info		oinfo;
 	unsigned int			sz_entries;
 	unsigned int			nr_entries;
@@ -607,7 +607,7 @@ struct xfs_scrub_agfl_info {
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_agfl_block_xref(
+xchk_agfl_block_xref(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	struct xfs_owner_info		*oinfo)
@@ -615,20 +615,20 @@ xfs_scrub_agfl_block_xref(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, 1);
-	xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
-	xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo);
-	xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+	xchk_xref_is_used_space(sc, agbno, 1);
+	xchk_xref_is_not_inode_chunk(sc, agbno, 1);
+	xchk_xref_is_owned_by(sc, agbno, 1, oinfo);
+	xchk_xref_is_not_shared(sc, agbno, 1);
 }
 
 /* Scrub an AGFL block. */
 STATIC int
-xfs_scrub_agfl_block(
+xchk_agfl_block(
 	struct xfs_mount		*mp,
 	xfs_agblock_t			agbno,
 	void				*priv)
 {
-	struct xfs_scrub_agfl_info	*sai = priv;
+	struct xchk_agfl_info		*sai = priv;
 	struct xfs_scrub_context	*sc = sai->sc;
 	xfs_agnumber_t			agno = sc->sa.agno;
 
@@ -636,9 +636,9 @@ xfs_scrub_agfl_block(
 	    sai->nr_entries < sai->sz_entries)
 		sai->entries[sai->nr_entries++] = agbno;
 	else
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agfl_bp);
 
-	xfs_scrub_agfl_block_xref(sc, agbno, priv);
+	xchk_agfl_block_xref(sc, agbno, priv);
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return XFS_BTREE_QUERY_RANGE_ABORT;
@@ -647,7 +647,7 @@ xfs_scrub_agfl_block(
 }
 
 static int
-xfs_scrub_agblock_cmp(
+xchk_agblock_cmp(
 	const void		*pa,
 	const void		*pb)
 {
@@ -659,7 +659,7 @@ xfs_scrub_agblock_cmp(
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_agfl_xref(
+xchk_agfl_xref(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_owner_info		oinfo;
@@ -672,15 +672,15 @@ xfs_scrub_agfl_xref(
 
 	agbno = XFS_AGFL_BLOCK(mp);
 
-	error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+	error = xchk_ag_btcur_init(sc, &sc->sa);
 	if (error)
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, 1);
-	xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+	xchk_xref_is_used_space(sc, agbno, 1);
+	xchk_xref_is_not_inode_chunk(sc, agbno, 1);
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
-	xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
-	xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+	xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+	xchk_xref_is_not_shared(sc, agbno, 1);
 
 	/*
 	 * Scrub teardown will take care of sc->sa for us.  Leave sc->sa
@@ -690,10 +690,10 @@ xfs_scrub_agfl_xref(
 
 /* Scrub the AGFL. */
 int
-xfs_scrub_agfl(
+xchk_agfl(
 	struct xfs_scrub_context	*sc)
 {
-	struct xfs_scrub_agfl_info	sai;
+	struct xchk_agfl_info		sai;
 	struct xfs_agf			*agf;
 	xfs_agnumber_t			agno;
 	unsigned int			agflcount;
@@ -701,15 +701,15 @@ xfs_scrub_agfl(
 	int				error;
 
 	agno = sc->sa.agno = sc->sm->sm_agno;
-	error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
+	error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp,
 			&sc->sa.agf_bp, &sc->sa.agfl_bp);
-	if (!xfs_scrub_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
+	if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
 		goto out;
 	if (!sc->sa.agf_bp)
 		return -EFSCORRUPTED;
-	xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp);
+	xchk_buffer_recheck(sc, sc->sa.agfl_bp);
 
-	xfs_scrub_agfl_xref(sc);
+	xchk_agfl_xref(sc);
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
@@ -718,7 +718,7 @@ xfs_scrub_agfl(
 	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 	agflcount = be32_to_cpu(agf->agf_flcount);
 	if (agflcount > xfs_agfl_size(sc->mp)) {
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 		goto out;
 	}
 	memset(&sai, 0, sizeof(sai));
@@ -734,7 +734,7 @@ xfs_scrub_agfl(
 	/* Check the blocks in the AGFL. */
 	xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
 	error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
-			sc->sa.agfl_bp, xfs_scrub_agfl_block, &sai);
+			sc->sa.agfl_bp, xchk_agfl_block, &sai);
 	if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
 		error = 0;
 		goto out_free;
@@ -743,16 +743,16 @@ xfs_scrub_agfl(
 		goto out_free;
 
 	if (agflcount != sai.nr_entries) {
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 		goto out_free;
 	}
 
 	/* Sort entries, check for duplicates. */
 	sort(sai.entries, sai.nr_entries, sizeof(sai.entries[0]),
-			xfs_scrub_agblock_cmp, NULL);
+			xchk_agblock_cmp, NULL);
 	for (i = 1; i < sai.nr_entries; i++) {
 		if (sai.entries[i] == sai.entries[i - 1]) {
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 			break;
 		}
 	}
@@ -767,7 +767,7 @@ out:
 
 /* Check agi_count/agi_freecount */
 static inline void
-xfs_scrub_agi_xref_icounts(
+xchk_agi_xref_icounts(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_agi			*agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
@@ -779,16 +779,16 @@ xfs_scrub_agi_xref_icounts(
 		return;
 
 	error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.ino_cur))
 		return;
 	if (be32_to_cpu(agi->agi_count) != icount ||
 	    be32_to_cpu(agi->agi_freecount) != freecount)
-		xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp);
 }
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_agi_xref(
+xchk_agi_xref(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_owner_info		oinfo;
@@ -801,23 +801,23 @@ xfs_scrub_agi_xref(
 
 	agbno = XFS_AGI_BLOCK(mp);
 
-	error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+	error = xchk_ag_btcur_init(sc, &sc->sa);
 	if (error)
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, 1);
-	xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
-	xfs_scrub_agi_xref_icounts(sc);
+	xchk_xref_is_used_space(sc, agbno, 1);
+	xchk_xref_is_not_inode_chunk(sc, agbno, 1);
+	xchk_agi_xref_icounts(sc);
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
-	xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
-	xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+	xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+	xchk_xref_is_not_shared(sc, agbno, 1);
 
 	/* scrub teardown will take care of sc->sa for us */
 }
 
 /* Scrub the AGI. */
 int
-xfs_scrub_agi(
+xchk_agi(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
@@ -834,36 +834,36 @@ xfs_scrub_agi(
 	int				error = 0;
 
 	agno = sc->sa.agno = sc->sm->sm_agno;
-	error = xfs_scrub_ag_read_headers(sc, agno, &sc->sa.agi_bp,
+	error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp,
 			&sc->sa.agf_bp, &sc->sa.agfl_bp);
-	if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
+	if (!xchk_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
 		goto out;
-	xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp);
+	xchk_buffer_recheck(sc, sc->sa.agi_bp);
 
 	agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
 
 	/* Check the AG length */
 	eoag = be32_to_cpu(agi->agi_length);
 	if (eoag != xfs_ag_block_count(mp, agno))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 	/* Check btree roots and levels */
 	agbno = be32_to_cpu(agi->agi_root);
 	if (!xfs_verify_agbno(mp, agno, agbno))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 	level = be32_to_cpu(agi->agi_level);
 	if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
 		agbno = be32_to_cpu(agi->agi_free_root);
 		if (!xfs_verify_agbno(mp, agno, agbno))
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 		level = be32_to_cpu(agi->agi_free_level);
 		if (level <= 0 || level > XFS_BTREE_MAXLEVELS)
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 	}
 
 	/* Check inode counters */
@@ -871,16 +871,16 @@ xfs_scrub_agi(
 	icount = be32_to_cpu(agi->agi_count);
 	if (icount > last_agino - first_agino + 1 ||
 	    icount < be32_to_cpu(agi->agi_freecount))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 	/* Check inode pointers */
 	agino = be32_to_cpu(agi->agi_newino);
 	if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 	agino = be32_to_cpu(agi->agi_dirino);
 	if (agino != NULLAGINO && !xfs_verify_agino(mp, agno, agino))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
 	/* Check unlinked inode buckets */
 	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
@@ -888,13 +888,13 @@ xfs_scrub_agi(
 		if (agino == NULLAGINO)
 			continue;
 		if (!xfs_verify_agino(mp, agno, agino))
-			xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+			xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 	}
 
 	if (agi->agi_pad32 != cpu_to_be32(0))
-		xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+		xchk_block_set_corrupt(sc, sc->sa.agi_bp);
 
-	xfs_scrub_agi_xref(sc);
+	xchk_agi_xref(sc);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 50e4f7fa06f0..1f6e3a6a1fdd 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -28,11 +28,11 @@
  * Set us up to scrub free space btrees.
  */
 int
-xfs_scrub_setup_ag_allocbt(
+xchk_setup_ag_allocbt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	return xfs_scrub_setup_ag_btree(sc, ip, false);
+	return xchk_setup_ag_btree(sc, ip, false);
 }
 
 /* Free space btree scrubber. */
@@ -41,7 +41,7 @@ xfs_scrub_setup_ag_allocbt(
  * bnobt/cntbt record, respectively.
  */
 STATIC void
-xfs_scrub_allocbt_xref_other(
+xchk_allocbt_xref_other(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
@@ -56,32 +56,32 @@ xfs_scrub_allocbt_xref_other(
 		pcur = &sc->sa.cnt_cur;
 	else
 		pcur = &sc->sa.bno_cur;
-	if (!*pcur || xfs_scrub_skip_xref(sc->sm))
+	if (!*pcur || xchk_skip_xref(sc->sm))
 		return;
 
 	error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec);
-	if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+	if (!xchk_should_check_xref(sc, &error, pcur))
 		return;
 	if (!has_otherrec) {
-		xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+		xchk_btree_xref_set_corrupt(sc, *pcur, 0);
 		return;
 	}
 
 	error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec);
-	if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+	if (!xchk_should_check_xref(sc, &error, pcur))
 		return;
 	if (!has_otherrec) {
-		xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+		xchk_btree_xref_set_corrupt(sc, *pcur, 0);
 		return;
 	}
 
 	if (fbno != agbno || flen != len)
-		xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+		xchk_btree_xref_set_corrupt(sc, *pcur, 0);
 }
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_allocbt_xref(
+xchk_allocbt_xref(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
@@ -89,16 +89,16 @@ xfs_scrub_allocbt_xref(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
 
-	xfs_scrub_allocbt_xref_other(sc, agbno, len);
-	xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
-	xfs_scrub_xref_has_no_owner(sc, agbno, len);
-	xfs_scrub_xref_is_not_shared(sc, agbno, len);
+	xchk_allocbt_xref_other(sc, agbno, len);
+	xchk_xref_is_not_inode_chunk(sc, agbno, len);
+	xchk_xref_has_no_owner(sc, agbno, len);
+	xchk_xref_is_not_shared(sc, agbno, len);
 }
 
 /* Scrub a bnobt/cntbt record. */
 STATIC int
-xfs_scrub_allocbt_rec(
-	struct xfs_scrub_btree		*bs,
+xchk_allocbt_rec(
+	struct xchk_btree		*bs,
 	union xfs_btree_rec		*rec)
 {
 	struct xfs_mount		*mp = bs->cur->bc_mp;
@@ -113,16 +113,16 @@ xfs_scrub_allocbt_rec(
 	if (bno + len <= bno ||
 	    !xfs_verify_agbno(mp, agno, bno) ||
 	    !xfs_verify_agbno(mp, agno, bno + len - 1))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
-	xfs_scrub_allocbt_xref(bs->sc, bno, len);
+	xchk_allocbt_xref(bs->sc, bno, len);
 
 	return error;
 }
 
 /* Scrub the freespace btrees for some AG. */
 STATIC int
-xfs_scrub_allocbt(
+xchk_allocbt(
 	struct xfs_scrub_context	*sc,
 	xfs_btnum_t			which)
 {
@@ -131,26 +131,26 @@ xfs_scrub_allocbt(
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
 	cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
-	return xfs_scrub_btree(sc, cur, xfs_scrub_allocbt_rec, &oinfo, NULL);
+	return xchk_btree(sc, cur, xchk_allocbt_rec, &oinfo, NULL);
 }
 
 int
-xfs_scrub_bnobt(
+xchk_bnobt(
 	struct xfs_scrub_context	*sc)
 {
-	return xfs_scrub_allocbt(sc, XFS_BTNUM_BNO);
+	return xchk_allocbt(sc, XFS_BTNUM_BNO);
 }
 
 int
-xfs_scrub_cntbt(
+xchk_cntbt(
 	struct xfs_scrub_context	*sc)
 {
-	return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
+	return xchk_allocbt(sc, XFS_BTNUM_CNT);
 }
 
 /* xref check that the extent is not free */
 void
-xfs_scrub_xref_is_used_space(
+xchk_xref_is_used_space(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
@@ -158,12 +158,12 @@ xfs_scrub_xref_is_used_space(
 	bool				is_freesp;
 	int				error;
 
-	if (!sc->sa.bno_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.bno_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.bno_cur))
 		return;
 	if (is_freesp)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0);
 }
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index de51cf8a8516..0068bebddf3e 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -32,7 +32,7 @@
 
 /* Set us up to scrub an inode's extended attributes. */
 int
-xfs_scrub_setup_xattr(
+xchk_setup_xattr(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
@@ -50,12 +50,12 @@ xfs_scrub_setup_xattr(
 	if (!sc->buf)
 		return -ENOMEM;
 
-	return xfs_scrub_setup_inode_contents(sc, ip, 0);
+	return xchk_setup_inode_contents(sc, ip, 0);
 }
 
 /* Extended Attributes */
 
-struct xfs_scrub_xattr {
+struct xchk_xattr {
 	struct xfs_attr_list_context	context;
 	struct xfs_scrub_context	*sc;
 };
@@ -69,22 +69,22 @@ struct xfs_scrub_xattr {
  * or if we get more or less data than we expected.
  */
 static void
-xfs_scrub_xattr_listent(
+xchk_xattr_listent(
 	struct xfs_attr_list_context	*context,
 	int				flags,
 	unsigned char			*name,
 	int				namelen,
 	int				valuelen)
 {
-	struct xfs_scrub_xattr		*sx;
+	struct xchk_xattr		*sx;
 	struct xfs_da_args		args = { NULL };
 	int				error = 0;
 
-	sx = container_of(context, struct xfs_scrub_xattr, context);
+	sx = container_of(context, struct xchk_xattr, context);
 
 	if (flags & XFS_ATTR_INCOMPLETE) {
 		/* Incomplete attr key, just mark the inode for preening. */
-		xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino);
+		xchk_ino_set_preen(sx->sc, context->dp->i_ino);
 		return;
 	}
 
@@ -106,11 +106,11 @@ xfs_scrub_xattr_listent(
 	error = xfs_attr_get_ilocked(context->dp, &args);
 	if (error == -EEXIST)
 		error = 0;
-	if (!xfs_scrub_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno,
+	if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno,
 			&error))
 		goto fail_xref;
 	if (args.valuelen != valuelen)
-		xfs_scrub_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK,
+		xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK,
 					     args.blkno);
 fail_xref:
 	if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
@@ -126,7 +126,7 @@ fail_xref:
  * the smallest address
  */
 STATIC bool
-xfs_scrub_xattr_set_map(
+xchk_xattr_set_map(
 	struct xfs_scrub_context	*sc,
 	unsigned long			*map,
 	unsigned int			start,
@@ -154,7 +154,7 @@ xfs_scrub_xattr_set_map(
  * attr freemap has problems or points to used space.
  */
 STATIC bool
-xfs_scrub_xattr_check_freemap(
+xchk_xattr_check_freemap(
 	struct xfs_scrub_context	*sc,
 	unsigned long			*map,
 	struct xfs_attr3_icleaf_hdr	*leafhdr)
@@ -168,7 +168,7 @@ xfs_scrub_xattr_check_freemap(
 	freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
 	bitmap_zero(freemap, mapsize);
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-		if (!xfs_scrub_xattr_set_map(sc, freemap,
+		if (!xchk_xattr_set_map(sc, freemap,
 				leafhdr->freemap[i].base,
 				leafhdr->freemap[i].size))
 			return false;
@@ -184,8 +184,8 @@ xfs_scrub_xattr_check_freemap(
  * Returns the number of bytes used for the name/value data.
  */
 STATIC void
-xfs_scrub_xattr_entry(
-	struct xfs_scrub_da_btree	*ds,
+xchk_xattr_entry(
+	struct xchk_da_btree		*ds,
 	int				level,
 	char				*buf_end,
 	struct xfs_attr_leafblock	*leaf,
@@ -204,17 +204,17 @@ xfs_scrub_xattr_entry(
 	unsigned int			namesize;
 
 	if (ent->pad2 != 0)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 
 	/* Hash values in order? */
 	if (be32_to_cpu(ent->hashval) < *last_hashval)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 	*last_hashval = be32_to_cpu(ent->hashval);
 
 	nameidx = be16_to_cpu(ent->nameidx);
 	if (nameidx < leafhdr->firstused ||
 	    nameidx >= mp->m_attr_geo->blksize) {
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 		return;
 	}
 
@@ -225,27 +225,27 @@ xfs_scrub_xattr_entry(
 				be16_to_cpu(lentry->valuelen));
 		name_end = (char *)lentry + namesize;
 		if (lentry->namelen == 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 	} else {
 		rentry = xfs_attr3_leaf_name_remote(leaf, idx);
 		namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
 		name_end = (char *)rentry + namesize;
 		if (rentry->namelen == 0 || rentry->valueblk == 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 	}
 	if (name_end > buf_end)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 
-	if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, nameidx, namesize))
-		xfs_scrub_da_set_corrupt(ds, level);
+	if (!xchk_xattr_set_map(ds->sc, usedmap, nameidx, namesize))
+		xchk_da_set_corrupt(ds, level);
 	if (!(ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 		*usedbytes += namesize;
 }
 
 /* Scrub an attribute leaf. */
 STATIC int
-xfs_scrub_xattr_block(
-	struct xfs_scrub_da_btree	*ds,
+xchk_xattr_block(
+	struct xchk_da_btree		*ds,
 	int				level)
 {
 	struct xfs_attr3_icleaf_hdr	leafhdr;
@@ -275,10 +275,10 @@ xfs_scrub_xattr_block(
 
 		if (leaf->hdr.pad1 != 0 || leaf->hdr.pad2 != 0 ||
 		    leaf->hdr.info.hdr.pad != 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 	} else {
 		if (leaf->hdr.pad1 != 0 || leaf->hdr.info.pad != 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 	}
 
 	/* Check the leaf header */
@@ -286,44 +286,44 @@ xfs_scrub_xattr_block(
 	hdrsize = xfs_attr3_leaf_hdr_size(leaf);
 
 	if (leafhdr.usedbytes > mp->m_attr_geo->blksize)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 	if (leafhdr.firstused > mp->m_attr_geo->blksize)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 	if (leafhdr.firstused < hdrsize)
-		xfs_scrub_da_set_corrupt(ds, level);
-	if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, 0, hdrsize))
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
+	if (!xchk_xattr_set_map(ds->sc, usedmap, 0, hdrsize))
+		xchk_da_set_corrupt(ds, level);
 
 	if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
 
 	entries = xfs_attr3_leaf_entryp(leaf);
 	if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 
 	buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
 	for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
 		/* Mark the leaf entry itself. */
 		off = (char *)ent - (char *)leaf;
-		if (!xfs_scrub_xattr_set_map(ds->sc, usedmap, off,
+		if (!xchk_xattr_set_map(ds->sc, usedmap, off,
 				sizeof(xfs_attr_leaf_entry_t))) {
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 			goto out;
 		}
 
 		/* Check the entry and nameval. */
-		xfs_scrub_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
+		xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
 				usedmap, ent, i, &usedbytes, &last_hashval);
 
 		if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 			goto out;
 	}
 
-	if (!xfs_scrub_xattr_check_freemap(ds->sc, usedmap, &leafhdr))
-		xfs_scrub_da_set_corrupt(ds, level);
+	if (!xchk_xattr_check_freemap(ds->sc, usedmap, &leafhdr))
+		xchk_da_set_corrupt(ds, level);
 
 	if (leafhdr.usedbytes != usedbytes)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 
 out:
 	return 0;
@@ -331,8 +331,8 @@ out:
 
 /* Scrub a attribute btree record. */
 STATIC int
-xfs_scrub_xattr_rec(
-	struct xfs_scrub_da_btree	*ds,
+xchk_xattr_rec(
+	struct xchk_da_btree		*ds,
 	int				level,
 	void				*rec)
 {
@@ -352,14 +352,14 @@ xfs_scrub_xattr_rec(
 	blk = &ds->state->path.blk[level];
 
 	/* Check the whole block, if necessary. */
-	error = xfs_scrub_xattr_block(ds, level);
+	error = xchk_xattr_block(ds, level);
 	if (error)
 		goto out;
 	if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
 
 	/* Check the hash of the entry. */
-	error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
+	error = xchk_da_btree_hash(ds, level, &ent->hashval);
 	if (error)
 		goto out;
 
@@ -368,7 +368,7 @@ xfs_scrub_xattr_rec(
 	hdrsize = xfs_attr3_leaf_hdr_size(bp->b_addr);
 	nameidx = be16_to_cpu(ent->nameidx);
 	if (nameidx < hdrsize || nameidx >= mp->m_attr_geo->blksize) {
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 		goto out;
 	}
 
@@ -377,12 +377,12 @@ xfs_scrub_xattr_rec(
 	badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE |
 			XFS_ATTR_INCOMPLETE);
 	if ((ent->flags & badflags) != 0)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 	if (ent->flags & XFS_ATTR_LOCAL) {
 		lentry = (struct xfs_attr_leaf_name_local *)
 				(((char *)bp->b_addr) + nameidx);
 		if (lentry->namelen <= 0) {
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 			goto out;
 		}
 		calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen);
@@ -390,13 +390,13 @@ xfs_scrub_xattr_rec(
 		rentry = (struct xfs_attr_leaf_name_remote *)
 				(((char *)bp->b_addr) + nameidx);
 		if (rentry->namelen <= 0) {
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 			goto out;
 		}
 		calc_hash = xfs_da_hashname(rentry->name, rentry->namelen);
 	}
 	if (calc_hash != hash)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 
 out:
 	return error;
@@ -404,10 +404,10 @@ out:
 
 /* Scrub the extended attribute metadata. */
 int
-xfs_scrub_xattr(
+xchk_xattr(
 	struct xfs_scrub_context	*sc)
 {
-	struct xfs_scrub_xattr		sx;
+	struct xchk_xattr		sx;
 	struct attrlist_cursor_kern	cursor = { 0 };
 	xfs_dablk_t			last_checked = -1U;
 	int				error = 0;
@@ -417,7 +417,7 @@ xfs_scrub_xattr(
 
 	memset(&sx, 0, sizeof(sx));
 	/* Check attribute tree structure */
-	error = xfs_scrub_da_btree(sc, XFS_ATTR_FORK, xfs_scrub_xattr_rec,
+	error = xchk_da_btree(sc, XFS_ATTR_FORK, xchk_xattr_rec,
 			&last_checked);
 	if (error)
 		goto out;
@@ -429,7 +429,7 @@ xfs_scrub_xattr(
 	sx.context.dp = sc->ip;
 	sx.context.cursor = &cursor;
 	sx.context.resynch = 1;
-	sx.context.put_listent = xfs_scrub_xattr_listent;
+	sx.context.put_listent = xchk_xattr_listent;
 	sx.context.tp = sc->tp;
 	sx.context.flags = ATTR_INCOMPLETE;
 	sx.sc = sc;
@@ -438,7 +438,7 @@ xfs_scrub_xattr(
 	 * Look up every xattr in this file by name.
 	 *
 	 * Use the backend implementation of xfs_attr_list to call
-	 * xfs_scrub_xattr_listent on every attribute key in this inode.
+	 * xchk_xattr_listent on every attribute key in this inode.
 	 * In other words, we use the same iterator/callback mechanism
 	 * that listattr uses to scrub extended attributes, though in our
 	 * _listent function, we check the value of the attribute.
@@ -451,7 +451,7 @@ xfs_scrub_xattr(
 	 * locking order.
 	 */
 	error = xfs_attr_list_int_ilocked(&sx.context);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
+	if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
 		goto out;
 out:
 	return error;
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index ebbfab173e97..19cfbd3910a2 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -33,13 +33,13 @@
 
 /* Set us up with an inode's bmap. */
 int
-xfs_scrub_setup_inode_bmap(
+xchk_setup_inode_bmap(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
 	int				error;
 
-	error = xfs_scrub_get_inode(sc, ip);
+	error = xchk_get_inode(sc, ip);
 	if (error)
 		goto out;
 
@@ -60,7 +60,7 @@ xfs_scrub_setup_inode_bmap(
 	}
 
 	/* Got the inode, lock it and we're ready to go. */
-	error = xfs_scrub_trans_alloc(sc, 0);
+	error = xchk_trans_alloc(sc, 0);
 	if (error)
 		goto out;
 	sc->ilock_flags |= XFS_ILOCK_EXCL;
@@ -78,7 +78,7 @@ out:
  * is in btree format.
  */
 
-struct xfs_scrub_bmap_info {
+struct xchk_bmap_info {
 	struct xfs_scrub_context	*sc;
 	xfs_fileoff_t			lastoff;
 	bool				is_rt;
@@ -88,8 +88,8 @@ struct xfs_scrub_bmap_info {
 
 /* Look for a corresponding rmap for this irec. */
 static inline bool
-xfs_scrub_bmap_get_rmap(
-	struct xfs_scrub_bmap_info	*info,
+xchk_bmap_get_rmap(
+	struct xchk_bmap_info		*info,
 	struct xfs_bmbt_irec		*irec,
 	xfs_agblock_t			agbno,
 	uint64_t			owner,
@@ -120,7 +120,7 @@ xfs_scrub_bmap_get_rmap(
 	if (info->is_shared) {
 		error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
 				owner, offset, rflags, rmap, &has_rmap);
-		if (!xfs_scrub_should_check_xref(info->sc, &error,
+		if (!xchk_should_check_xref(info->sc, &error,
 				&info->sc->sa.rmap_cur))
 			return false;
 		goto out;
@@ -131,28 +131,28 @@ xfs_scrub_bmap_get_rmap(
 	 */
 	error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
 			offset, rflags, &has_rmap);
-	if (!xfs_scrub_should_check_xref(info->sc, &error,
+	if (!xchk_should_check_xref(info->sc, &error,
 			&info->sc->sa.rmap_cur))
 		return false;
 	if (!has_rmap)
 		goto out;
 
 	error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
-	if (!xfs_scrub_should_check_xref(info->sc, &error,
+	if (!xchk_should_check_xref(info->sc, &error,
 			&info->sc->sa.rmap_cur))
 		return false;
 
 out:
 	if (!has_rmap)
-		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 			irec->br_startoff);
 	return has_rmap;
 }
 
 /* Make sure that we have rmapbt records for this extent. */
 STATIC void
-xfs_scrub_bmap_xref_rmap(
-	struct xfs_scrub_bmap_info	*info,
+xchk_bmap_xref_rmap(
+	struct xchk_bmap_info		*info,
 	struct xfs_bmbt_irec		*irec,
 	xfs_agblock_t			agbno)
 {
@@ -160,7 +160,7 @@ xfs_scrub_bmap_xref_rmap(
 	unsigned long long		rmap_end;
 	uint64_t			owner;
 
-	if (!info->sc->sa.rmap_cur || xfs_scrub_skip_xref(info->sc->sm))
+	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
 		return;
 
 	if (info->whichfork == XFS_COW_FORK)
@@ -169,14 +169,14 @@ xfs_scrub_bmap_xref_rmap(
 		owner = info->sc->ip->i_ino;
 
 	/* Find the rmap record for this irec. */
-	if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
 		return;
 
 	/* Check the rmap. */
 	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
 	if (rmap.rm_startblock > agbno ||
 	    agbno + irec->br_blockcount > rmap_end)
-		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	/*
@@ -189,12 +189,12 @@ xfs_scrub_bmap_xref_rmap(
 				rmap.rm_blockcount;
 		if (rmap.rm_offset > irec->br_startoff ||
 		    irec->br_startoff + irec->br_blockcount > rmap_end)
-			xfs_scrub_fblock_xref_set_corrupt(info->sc,
+			xchk_fblock_xref_set_corrupt(info->sc,
 					info->whichfork, irec->br_startoff);
 	}
 
 	if (rmap.rm_owner != owner)
-		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	/*
@@ -207,22 +207,22 @@ xfs_scrub_bmap_xref_rmap(
 	if (owner != XFS_RMAP_OWN_COW &&
 	    irec->br_state == XFS_EXT_UNWRITTEN &&
 	    !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
-		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	if (info->whichfork == XFS_ATTR_FORK &&
 	    !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
-		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
-		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 }
 
 /* Cross-reference a single rtdev extent record. */
 STATIC void
-xfs_scrub_bmap_rt_extent_xref(
-	struct xfs_scrub_bmap_info	*info,
+xchk_bmap_rt_extent_xref(
+	struct xchk_bmap_info		*info,
 	struct xfs_inode		*ip,
 	struct xfs_btree_cur		*cur,
 	struct xfs_bmbt_irec		*irec)
@@ -230,14 +230,14 @@ xfs_scrub_bmap_rt_extent_xref(
 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
 
-	xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock,
+	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
 			irec->br_blockcount);
 }
 
 /* Cross-reference a single datadev extent record. */
 STATIC void
-xfs_scrub_bmap_extent_xref(
-	struct xfs_scrub_bmap_info	*info,
+xchk_bmap_extent_xref(
+	struct xchk_bmap_info		*info,
 	struct xfs_inode		*ip,
 	struct xfs_btree_cur		*cur,
 	struct xfs_bmbt_irec		*irec)
@@ -255,38 +255,38 @@ xfs_scrub_bmap_extent_xref(
 	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
 	len = irec->br_blockcount;
 
-	error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa);
-	if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork,
+	error = xchk_ag_init(info->sc, agno, &info->sc->sa);
+	if (!xchk_fblock_process_error(info->sc, info->whichfork,
 			irec->br_startoff, &error))
 		return;
 
-	xfs_scrub_xref_is_used_space(info->sc, agbno, len);
-	xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len);
-	xfs_scrub_bmap_xref_rmap(info, irec, agbno);
+	xchk_xref_is_used_space(info->sc, agbno, len);
+	xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
+	xchk_bmap_xref_rmap(info, irec, agbno);
 	switch (info->whichfork) {
 	case XFS_DATA_FORK:
 		if (xfs_is_reflink_inode(info->sc->ip))
 			break;
 		/* fall through */
 	case XFS_ATTR_FORK:
-		xfs_scrub_xref_is_not_shared(info->sc, agbno,
+		xchk_xref_is_not_shared(info->sc, agbno,
 				irec->br_blockcount);
 		break;
 	case XFS_COW_FORK:
-		xfs_scrub_xref_is_cow_staging(info->sc, agbno,
+		xchk_xref_is_cow_staging(info->sc, agbno,
 				irec->br_blockcount);
 		break;
 	}
 
-	xfs_scrub_ag_free(info->sc, &info->sc->sa);
+	xchk_ag_free(info->sc, &info->sc->sa);
 }
 
 /* Scrub a single extent record. */
 STATIC int
-xfs_scrub_bmap_extent(
+xchk_bmap_extent(
 	struct xfs_inode		*ip,
 	struct xfs_btree_cur		*cur,
-	struct xfs_scrub_bmap_info	*info,
+	struct xchk_bmap_info		*info,
 	struct xfs_bmbt_irec		*irec)
 {
 	struct xfs_mount		*mp = info->sc->mp;
@@ -302,12 +302,12 @@ xfs_scrub_bmap_extent(
 	 * from the incore list, for which there is no ordering check.
 	 */
 	if (irec->br_startoff < info->lastoff)
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	/* There should never be a "hole" extent in either extent list. */
 	if (irec->br_startblock == HOLESTARTBLOCK)
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	/*
@@ -315,40 +315,40 @@ xfs_scrub_bmap_extent(
 	 * in-core extent scan, and we should never see these in the bmbt.
 	 */
 	if (isnullstartblock(irec->br_startblock))
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	/* Make sure the extent points to a valid place. */
 	if (irec->br_blockcount > MAXEXTLEN)
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 	if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 	end = irec->br_startblock + irec->br_blockcount - 1;
 	if (info->is_rt &&
 	    (!xfs_verify_rtbno(mp, irec->br_startblock) ||
 	     !xfs_verify_rtbno(mp, end)))
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 	if (!info->is_rt &&
 	    (!xfs_verify_fsbno(mp, irec->br_startblock) ||
 	     !xfs_verify_fsbno(mp, end) ||
 	     XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
 				XFS_FSB_TO_AGNO(mp, end)))
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	/* We don't allow unwritten extents on attr forks. */
 	if (irec->br_state == XFS_EXT_UNWRITTEN &&
 	    info->whichfork == XFS_ATTR_FORK)
-		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
 	if (info->is_rt)
-		xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec);
+		xchk_bmap_rt_extent_xref(info, ip, cur, irec);
 	else
-		xfs_scrub_bmap_extent_xref(info, ip, cur, irec);
+		xchk_bmap_extent_xref(info, ip, cur, irec);
 
 	info->lastoff = irec->br_startoff + irec->br_blockcount;
 	return error;
@@ -356,12 +356,12 @@ xfs_scrub_bmap_extent(
 
 /* Scrub a bmbt record. */
 STATIC int
-xfs_scrub_bmapbt_rec(
-	struct xfs_scrub_btree		*bs,
+xchk_bmapbt_rec(
+	struct xchk_btree		*bs,
 	union xfs_btree_rec		*rec)
 {
 	struct xfs_bmbt_irec		irec;
-	struct xfs_scrub_bmap_info	*info = bs->private;
+	struct xchk_bmap_info		*info = bs->private;
 	struct xfs_inode		*ip = bs->cur->bc_private.b.ip;
 	struct xfs_buf			*bp = NULL;
 	struct xfs_btree_block		*block;
@@ -378,22 +378,22 @@ xfs_scrub_bmapbt_rec(
 			block = xfs_btree_get_block(bs->cur, i, &bp);
 			owner = be64_to_cpu(block->bb_u.l.bb_owner);
 			if (owner != ip->i_ino)
-				xfs_scrub_fblock_set_corrupt(bs->sc,
+				xchk_fblock_set_corrupt(bs->sc,
 						info->whichfork, 0);
 		}
 	}
 
 	/* Set up the in-core record and scrub it. */
 	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
-	return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec);
+	return xchk_bmap_extent(ip, bs->cur, info, &irec);
 }
 
 /* Scan the btree records. */
 STATIC int
-xfs_scrub_bmap_btree(
+xchk_bmap_btree(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
-	struct xfs_scrub_bmap_info	*info)
+	struct xchk_bmap_info		*info)
 {
 	struct xfs_owner_info		oinfo;
 	struct xfs_mount		*mp = sc->mp;
@@ -403,12 +403,12 @@ xfs_scrub_bmap_btree(
 
 	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
-	error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info);
+	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
 	xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
-struct xfs_scrub_bmap_check_rmap_info {
+struct xchk_bmap_check_rmap_info {
 	struct xfs_scrub_context	*sc;
 	int				whichfork;
 	struct xfs_iext_cursor		icur;
@@ -416,13 +416,13 @@ struct xfs_scrub_bmap_check_rmap_info {
 
 /* Can we find bmaps that fit this rmap? */
 STATIC int
-xfs_scrub_bmap_check_rmap(
+xchk_bmap_check_rmap(
 	struct xfs_btree_cur		*cur,
 	struct xfs_rmap_irec		*rec,
 	void				*priv)
 {
 	struct xfs_bmbt_irec		irec;
-	struct xfs_scrub_bmap_check_rmap_info	*sbcri = priv;
+	struct xchk_bmap_check_rmap_info	*sbcri = priv;
 	struct xfs_ifork		*ifp;
 	struct xfs_scrub_context	*sc = sbcri->sc;
 	bool				have_map;
@@ -439,14 +439,14 @@ xfs_scrub_bmap_check_rmap(
 	/* Now look up the bmbt record. */
 	ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork);
 	if (!ifp) {
-		xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 				rec->rm_offset);
 		goto out;
 	}
 	have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
 			&sbcri->icur, &irec);
 	if (!have_map)
-		xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 				rec->rm_offset);
 	/*
 	 * bmap extent record lengths are constrained to 2^21 blocks in length
@@ -457,14 +457,14 @@ xfs_scrub_bmap_check_rmap(
 	 */
 	while (have_map) {
 		if (irec.br_startoff != rec->rm_offset)
-			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 					rec->rm_offset);
 		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
 				cur->bc_private.a.agno, rec->rm_startblock))
-			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 					rec->rm_offset);
 		if (irec.br_blockcount > rec->rm_blockcount)
-			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 					rec->rm_offset);
 		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 			break;
@@ -475,7 +475,7 @@ xfs_scrub_bmap_check_rmap(
 			break;
 		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
 		if (!have_map)
-			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
 					rec->rm_offset);
 	}
 
@@ -487,12 +487,12 @@ out:
 
 /* Make sure each rmap has a corresponding bmbt entry. */
 STATIC int
-xfs_scrub_bmap_check_ag_rmaps(
+xchk_bmap_check_ag_rmaps(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
 	xfs_agnumber_t			agno)
 {
-	struct xfs_scrub_bmap_check_rmap_info	sbcri;
+	struct xchk_bmap_check_rmap_info	sbcri;
 	struct xfs_btree_cur		*cur;
 	struct xfs_buf			*agf;
 	int				error;
@@ -509,7 +509,7 @@ xfs_scrub_bmap_check_ag_rmaps(
 
 	sbcri.sc = sc;
 	sbcri.whichfork = whichfork;
-	error = xfs_rmap_query_all(cur, xfs_scrub_bmap_check_rmap, &sbcri);
+	error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
 	if (error == XFS_BTREE_QUERY_RANGE_ABORT)
 		error = 0;
 
@@ -521,7 +521,7 @@ out_agf:
 
 /* Make sure each rmap has a corresponding bmbt entry. */
 STATIC int
-xfs_scrub_bmap_check_rmaps(
+xchk_bmap_check_rmaps(
 	struct xfs_scrub_context	*sc,
 	int				whichfork)
 {
@@ -561,7 +561,7 @@ xfs_scrub_bmap_check_rmaps(
 		return 0;
 
 	for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
-		error = xfs_scrub_bmap_check_ag_rmaps(sc, whichfork, agno);
+		error = xchk_bmap_check_ag_rmaps(sc, whichfork, agno);
 		if (error)
 			return error;
 		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
@@ -578,12 +578,12 @@ xfs_scrub_bmap_check_rmaps(
  * Then we unconditionally scan the incore extent cache.
  */
 STATIC int
-xfs_scrub_bmap(
+xchk_bmap(
 	struct xfs_scrub_context	*sc,
 	int				whichfork)
 {
 	struct xfs_bmbt_irec		irec;
-	struct xfs_scrub_bmap_info	info = { NULL };
+	struct xchk_bmap_info		info = { NULL };
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_inode		*ip = sc->ip;
 	struct xfs_ifork		*ifp;
@@ -605,7 +605,7 @@ xfs_scrub_bmap(
 			goto out;
 		/* No CoW forks on non-reflink inodes/filesystems. */
 		if (!xfs_is_reflink_inode(ip)) {
-			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 			goto out;
 		}
 		break;
@@ -614,7 +614,7 @@ xfs_scrub_bmap(
 			goto out_check_rmap;
 		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
 		    !xfs_sb_version_hasattr2(&mp->m_sb))
-			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 		break;
 	default:
 		ASSERT(whichfork == XFS_DATA_FORK);
@@ -630,22 +630,22 @@ xfs_scrub_bmap(
 		goto out;
 	case XFS_DINODE_FMT_EXTENTS:
 		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
+			xchk_fblock_set_corrupt(sc, whichfork, 0);
 			goto out;
 		}
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		if (whichfork == XFS_COW_FORK) {
-			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
+			xchk_fblock_set_corrupt(sc, whichfork, 0);
 			goto out;
 		}
 
-		error = xfs_scrub_bmap_btree(sc, whichfork, &info);
+		error = xchk_bmap_btree(sc, whichfork, &info);
 		if (error)
 			goto out;
 		break;
 	default:
-		xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
+		xchk_fblock_set_corrupt(sc, whichfork, 0);
 		goto out;
 	}
 
@@ -655,37 +655,37 @@ xfs_scrub_bmap(
 	/* Now try to scrub the in-memory extent list. */
         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
 		error = xfs_iread_extents(sc->tp, ip, whichfork);
-		if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
+		if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
 			goto out;
 	}
 
 	/* Find the offset of the last extent in the mapping. */
 	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
-	if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
+	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
 		goto out;
 
 	/* Scrub extent records. */
 	info.lastoff = 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	for_each_xfs_iext(ifp, &icur, &irec) {
-		if (xfs_scrub_should_terminate(sc, &error) ||
+		if (xchk_should_terminate(sc, &error) ||
 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 			break;
 		if (isnullstartblock(irec.br_startblock))
 			continue;
 		if (irec.br_startoff >= endoff) {
-			xfs_scrub_fblock_set_corrupt(sc, whichfork,
+			xchk_fblock_set_corrupt(sc, whichfork,
 					irec.br_startoff);
 			goto out;
 		}
-		error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
+		error = xchk_bmap_extent(ip, NULL, &info, &irec);
 		if (error)
 			goto out;
 	}
 
 out_check_rmap:
-	error = xfs_scrub_bmap_check_rmaps(sc, whichfork);
-	if (!xfs_scrub_fblock_xref_process_error(sc, whichfork, 0, &error))
+	error = xchk_bmap_check_rmaps(sc, whichfork);
+	if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
 		goto out;
 out:
 	return error;
@@ -693,27 +693,27 @@ out:
 
 /* Scrub an inode's data fork. */
 int
-xfs_scrub_bmap_data(
+xchk_bmap_data(
 	struct xfs_scrub_context	*sc)
 {
-	return xfs_scrub_bmap(sc, XFS_DATA_FORK);
+	return xchk_bmap(sc, XFS_DATA_FORK);
 }
 
 /* Scrub an inode's attr fork. */
 int
-xfs_scrub_bmap_attr(
+xchk_bmap_attr(
 	struct xfs_scrub_context	*sc)
 {
-	return xfs_scrub_bmap(sc, XFS_ATTR_FORK);
+	return xchk_bmap(sc, XFS_ATTR_FORK);
 }
 
 /* Scrub an inode's CoW fork. */
 int
-xfs_scrub_bmap_cow(
+xchk_bmap_cow(
 	struct xfs_scrub_context	*sc)
 {
 	if (!xfs_is_reflink_inode(sc->ip))
 		return -ENOENT;
 
-	return xfs_scrub_bmap(sc, XFS_COW_FORK);
+	return xchk_bmap(sc, XFS_COW_FORK);
 }
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 5b472045f036..30fe9a147959 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -29,7 +29,7 @@
  * operational errors in common.c.
  */
 static bool
-__xfs_scrub_btree_process_error(
+__xchk_btree_process_error(
 	struct xfs_scrub_context	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
@@ -43,7 +43,7 @@ __xfs_scrub_btree_process_error(
 	switch (*error) {
 	case -EDEADLOCK:
 		/* Used to restart an op with deadlock avoidance. */
-		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 		break;
 	case -EFSBADCRC:
 	case -EFSCORRUPTED:
@@ -53,10 +53,10 @@ __xfs_scrub_btree_process_error(
 		/* fall through */
 	default:
 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
-			trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
+			trace_xchk_ifork_btree_op_error(sc, cur, level,
 					*error, ret_ip);
 		else
-			trace_xfs_scrub_btree_op_error(sc, cur, level,
+			trace_xchk_btree_op_error(sc, cur, level,
 					*error, ret_ip);
 		break;
 	}
@@ -64,30 +64,30 @@ __xfs_scrub_btree_process_error(
 }
 
 bool
-xfs_scrub_btree_process_error(
+xchk_btree_process_error(
 	struct xfs_scrub_context	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
 	int				*error)
 {
-	return __xfs_scrub_btree_process_error(sc, cur, level, error,
+	return __xchk_btree_process_error(sc, cur, level, error,
 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 }
 
 bool
-xfs_scrub_btree_xref_process_error(
+xchk_btree_xref_process_error(
 	struct xfs_scrub_context	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
 	int				*error)
 {
-	return __xfs_scrub_btree_process_error(sc, cur, level, error,
+	return __xchk_btree_process_error(sc, cur, level, error,
 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 }
 
 /* Record btree block corruption. */
 static void
-__xfs_scrub_btree_set_corrupt(
+__xchk_btree_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
@@ -97,30 +97,30 @@ __xfs_scrub_btree_set_corrupt(
 	sc->sm->sm_flags |= errflag;
 
 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
-		trace_xfs_scrub_ifork_btree_error(sc, cur, level,
+		trace_xchk_ifork_btree_error(sc, cur, level,
 				ret_ip);
 	else
-		trace_xfs_scrub_btree_error(sc, cur, level,
+		trace_xchk_btree_error(sc, cur, level,
 				ret_ip);
 }
 
 void
-xfs_scrub_btree_set_corrupt(
+xchk_btree_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level)
 {
-	__xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
+	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
 			__return_address);
 }
 
 void
-xfs_scrub_btree_xref_set_corrupt(
+xchk_btree_xref_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level)
 {
-	__xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
+	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
 			__return_address);
 }
 
@@ -129,8 +129,8 @@ xfs_scrub_btree_xref_set_corrupt(
  * keys.
  */
 STATIC void
-xfs_scrub_btree_rec(
-	struct xfs_scrub_btree	*bs)
+xchk_btree_rec(
+	struct xchk_btree	*bs)
 {
 	struct xfs_btree_cur	*cur = bs->cur;
 	union xfs_btree_rec	*rec;
@@ -144,11 +144,11 @@ xfs_scrub_btree_rec(
 	block = xfs_btree_get_block(cur, 0, &bp);
 	rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
 
-	trace_xfs_scrub_btree_rec(bs->sc, cur, 0);
+	trace_xchk_btree_rec(bs->sc, cur, 0);
 
 	/* If this isn't the first record, are they in order? */
 	if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, 0);
+		xchk_btree_set_corrupt(bs->sc, cur, 0);
 	bs->firstrec = false;
 	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
 
@@ -160,7 +160,7 @@ xfs_scrub_btree_rec(
 	keyblock = xfs_btree_get_block(cur, 1, &bp);
 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
 	if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
+		xchk_btree_set_corrupt(bs->sc, cur, 1);
 
 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
 		return;
@@ -169,7 +169,7 @@ xfs_scrub_btree_rec(
 	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
 	if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
+		xchk_btree_set_corrupt(bs->sc, cur, 1);
 }
 
 /*
@@ -177,8 +177,8 @@ xfs_scrub_btree_rec(
  * keys.
  */
 STATIC void
-xfs_scrub_btree_key(
-	struct xfs_scrub_btree	*bs,
+xchk_btree_key(
+	struct xchk_btree	*bs,
 	int			level)
 {
 	struct xfs_btree_cur	*cur = bs->cur;
@@ -191,12 +191,12 @@ xfs_scrub_btree_key(
 	block = xfs_btree_get_block(cur, level, &bp);
 	key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
 
-	trace_xfs_scrub_btree_key(bs->sc, cur, level);
+	trace_xchk_btree_key(bs->sc, cur, level);
 
 	/* If this isn't the first key, are they in order? */
 	if (!bs->firstkey[level] &&
 	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
+		xchk_btree_set_corrupt(bs->sc, cur, level);
 	bs->firstkey[level] = false;
 	memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
 
@@ -207,7 +207,7 @@ xfs_scrub_btree_key(
 	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
 	if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
+		xchk_btree_set_corrupt(bs->sc, cur, level);
 
 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
 		return;
@@ -216,7 +216,7 @@ xfs_scrub_btree_key(
 	key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
 	if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
+		xchk_btree_set_corrupt(bs->sc, cur, level);
 }
 
 /*
@@ -224,8 +224,8 @@ xfs_scrub_btree_key(
  * Callers do not need to set the corrupt flag.
  */
 static bool
-xfs_scrub_btree_ptr_ok(
-	struct xfs_scrub_btree		*bs,
+xchk_btree_ptr_ok(
+	struct xchk_btree		*bs,
 	int				level,
 	union xfs_btree_ptr		*ptr)
 {
@@ -242,15 +242,15 @@ xfs_scrub_btree_ptr_ok(
 	else
 		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
 	if (!res)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 
 	return res;
 }
 
 /* Check that a btree block's sibling matches what we expect it. */
 STATIC int
-xfs_scrub_btree_block_check_sibling(
-	struct xfs_scrub_btree		*bs,
+xchk_btree_block_check_sibling(
+	struct xchk_btree		*bs,
 	int				level,
 	int				direction,
 	union xfs_btree_ptr		*sibling)
@@ -264,7 +264,7 @@ xfs_scrub_btree_block_check_sibling(
 	int				error;
 
 	error = xfs_btree_dup_cursor(cur, &ncur);
-	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) ||
+	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
 	    !ncur)
 		return error;
 
@@ -278,7 +278,7 @@ xfs_scrub_btree_block_check_sibling(
 		else
 			error = xfs_btree_decrement(ncur, level + 1, &success);
 		if (error == 0 && success)
-			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
+			xchk_btree_set_corrupt(bs->sc, cur, level);
 		error = 0;
 		goto out;
 	}
@@ -288,23 +288,23 @@ xfs_scrub_btree_block_check_sibling(
 		error = xfs_btree_increment(ncur, level + 1, &success);
 	else
 		error = xfs_btree_decrement(ncur, level + 1, &success);
-	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error))
+	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
 		goto out;
 	if (!success) {
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1);
+		xchk_btree_set_corrupt(bs->sc, cur, level + 1);
 		goto out;
 	}
 
 	/* Compare upper level pointer to sibling pointer. */
 	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
 	pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
-	if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
+	if (!xchk_btree_ptr_ok(bs, level + 1, pp))
 		goto out;
 	if (pbp)
-		xfs_scrub_buffer_recheck(bs->sc, pbp);
+		xchk_buffer_recheck(bs->sc, pbp);
 
 	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
+		xchk_btree_set_corrupt(bs->sc, cur, level);
 out:
 	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
 	return error;
@@ -312,8 +312,8 @@ out:
 
 /* Check the siblings of a btree block. */
 STATIC int
-xfs_scrub_btree_block_check_siblings(
-	struct xfs_scrub_btree		*bs,
+xchk_btree_block_check_siblings(
+	struct xchk_btree		*bs,
 	struct xfs_btree_block		*block)
 {
 	struct xfs_btree_cur		*cur = bs->cur;
@@ -330,7 +330,7 @@ xfs_scrub_btree_block_check_siblings(
 	if (level == cur->bc_nlevels - 1) {
 		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
 		    !xfs_btree_ptr_is_null(cur, &rightsib))
-			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
+			xchk_btree_set_corrupt(bs->sc, cur, level);
 		goto out;
 	}
 
@@ -339,10 +339,10 @@ xfs_scrub_btree_block_check_siblings(
 	 * parent level pointers?
 	 * (These function absorbs error codes for us.)
 	 */
-	error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib);
+	error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
 	if (error)
 		return error;
-	error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib);
+	error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
 	if (error)
 		return error;
 out:
@@ -360,8 +360,8 @@ struct check_owner {
  * an rmap record for it.
  */
 STATIC int
-xfs_scrub_btree_check_block_owner(
-	struct xfs_scrub_btree		*bs,
+xchk_btree_check_block_owner(
+	struct xchk_btree		*bs,
 	int				level,
 	xfs_daddr_t			daddr)
 {
@@ -380,13 +380,13 @@ xfs_scrub_btree_check_block_owner(
 
 	init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
 	if (init_sa) {
-		error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa);
-		if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur,
+		error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
+		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
 				level, &error))
 			return error;
 	}
 
-	xfs_scrub_xref_is_used_space(bs->sc, agbno, 1);
+	xchk_xref_is_used_space(bs->sc, agbno, 1);
 	/*
 	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
 	 * have to nullify it (to shut down further block owner checks) if
@@ -395,20 +395,20 @@ xfs_scrub_btree_check_block_owner(
 	if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
 		bs->cur = NULL;
 
-	xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
+	xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
 	if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
 		bs->cur = NULL;
 
 	if (init_sa)
-		xfs_scrub_ag_free(bs->sc, &bs->sc->sa);
+		xchk_ag_free(bs->sc, &bs->sc->sa);
 
 	return error;
 }
 
 /* Check the owner of a btree block. */
 STATIC int
-xfs_scrub_btree_check_owner(
-	struct xfs_scrub_btree		*bs,
+xchk_btree_check_owner(
+	struct xchk_btree		*bs,
 	int				level,
 	struct xfs_buf			*bp)
 {
@@ -437,7 +437,7 @@ xfs_scrub_btree_check_owner(
 		return 0;
 	}
 
-	return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
+	return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
 }
 
 /*
@@ -445,8 +445,8 @@ xfs_scrub_btree_check_owner(
  * special blocks that don't require that.
  */
 STATIC void
-xfs_scrub_btree_check_minrecs(
-	struct xfs_scrub_btree	*bs,
+xchk_btree_check_minrecs(
+	struct xchk_btree	*bs,
 	int			level,
 	struct xfs_btree_block	*block)
 {
@@ -475,7 +475,7 @@ xfs_scrub_btree_check_minrecs(
 	if (level >= ok_level)
 		return;
 
-	xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
+	xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 }
 
 /*
@@ -483,8 +483,8 @@ xfs_scrub_btree_check_minrecs(
  * and buffer pointers (if applicable) if they're ok to use.
  */
 STATIC int
-xfs_scrub_btree_get_block(
-	struct xfs_scrub_btree		*bs,
+xchk_btree_get_block(
+	struct xchk_btree		*bs,
 	int				level,
 	union xfs_btree_ptr		*pp,
 	struct xfs_btree_block		**pblock,
@@ -497,7 +497,7 @@ xfs_scrub_btree_get_block(
 	*pbp = NULL;
 
 	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
-	if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) ||
+	if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
 	    !*pblock)
 		return error;
 
@@ -509,19 +509,19 @@ xfs_scrub_btree_get_block(
 		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
 				 level, *pbp);
 	if (failed_at) {
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 		return 0;
 	}
 	if (*pbp)
-		xfs_scrub_buffer_recheck(bs->sc, *pbp);
+		xchk_buffer_recheck(bs->sc, *pbp);
 
-	xfs_scrub_btree_check_minrecs(bs, level, *pblock);
+	xchk_btree_check_minrecs(bs, level, *pblock);
 
 	/*
 	 * Check the block's owner; this function absorbs error codes
 	 * for us.
 	 */
-	error = xfs_scrub_btree_check_owner(bs, level, *pbp);
+	error = xchk_btree_check_owner(bs, level, *pbp);
 	if (error)
 		return error;
 
@@ -529,7 +529,7 @@ xfs_scrub_btree_get_block(
 	 * Check the block's siblings; this function absorbs error codes
 	 * for us.
 	 */
-	return xfs_scrub_btree_block_check_siblings(bs, *pblock);
+	return xchk_btree_block_check_siblings(bs, *pblock);
 }
 
 /*
@@ -537,8 +537,8 @@ xfs_scrub_btree_get_block(
  * in the parent block.
  */
 STATIC void
-xfs_scrub_btree_block_keys(
-	struct xfs_scrub_btree		*bs,
+xchk_btree_block_keys(
+	struct xchk_btree		*bs,
 	int				level,
 	struct xfs_btree_block		*block)
 {
@@ -562,7 +562,7 @@ xfs_scrub_btree_block_keys(
 			parent_block);
 
 	if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
+		xchk_btree_set_corrupt(bs->sc, cur, 1);
 
 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
 		return;
@@ -573,7 +573,7 @@ xfs_scrub_btree_block_keys(
 			parent_block);
 
 	if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
+		xchk_btree_set_corrupt(bs->sc, cur, 1);
 }
 
 /*
@@ -582,14 +582,14 @@ xfs_scrub_btree_block_keys(
  * so that the caller can verify individual records.
  */
 int
-xfs_scrub_btree(
+xchk_btree(
 	struct xfs_scrub_context	*sc,
 	struct xfs_btree_cur		*cur,
-	xfs_scrub_btree_rec_fn		scrub_fn,
+	xchk_btree_rec_fn		scrub_fn,
 	struct xfs_owner_info		*oinfo,
 	void				*private)
 {
-	struct xfs_scrub_btree		bs = { NULL };
+	struct xchk_btree		bs = { NULL };
 	union xfs_btree_ptr		ptr;
 	union xfs_btree_ptr		*pp;
 	union xfs_btree_rec		*recp;
@@ -614,7 +614,7 @@ xfs_scrub_btree(
 
 	/* Don't try to check a tree with a height we can't handle. */
 	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
-		xfs_scrub_btree_set_corrupt(sc, cur, 0);
+		xchk_btree_set_corrupt(sc, cur, 0);
 		goto out;
 	}
 
@@ -624,9 +624,9 @@ xfs_scrub_btree(
 	 */
 	level = cur->bc_nlevels - 1;
 	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
-	if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
+	if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
 		goto out;
-	error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp);
+	error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
 	if (error || !block)
 		goto out;
 
@@ -639,7 +639,7 @@ xfs_scrub_btree(
 			/* End of leaf, pop back towards the root. */
 			if (cur->bc_ptrs[level] >
 			    be16_to_cpu(block->bb_numrecs)) {
-				xfs_scrub_btree_block_keys(&bs, level, block);
+				xchk_btree_block_keys(&bs, level, block);
 				if (level < cur->bc_nlevels - 1)
 					cur->bc_ptrs[level + 1]++;
 				level++;
@@ -647,14 +647,14 @@ xfs_scrub_btree(
 			}
 
 			/* Records in order for scrub? */
-			xfs_scrub_btree_rec(&bs);
+			xchk_btree_rec(&bs);
 
 			/* Call out to the record checker. */
 			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
 			error = bs.scrub_rec(&bs, recp);
 			if (error)
 				break;
-			if (xfs_scrub_should_terminate(sc, &error) ||
+			if (xchk_should_terminate(sc, &error) ||
 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 				break;
 
@@ -664,7 +664,7 @@ xfs_scrub_btree(
 
 		/* End of node, pop back towards the root. */
 		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
-			xfs_scrub_btree_block_keys(&bs, level, block);
+			xchk_btree_block_keys(&bs, level, block);
 			if (level < cur->bc_nlevels - 1)
 				cur->bc_ptrs[level + 1]++;
 			level++;
@@ -672,16 +672,16 @@ xfs_scrub_btree(
 		}
 
 		/* Keys in order for scrub? */
-		xfs_scrub_btree_key(&bs, level);
+		xchk_btree_key(&bs, level);
 
 		/* Drill another level deeper. */
 		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
-		if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) {
+		if (!xchk_btree_ptr_ok(&bs, level, pp)) {
 			cur->bc_ptrs[level]++;
 			continue;
 		}
 		level--;
-		error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp);
+		error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
 		if (error || !block)
 			goto out;
 
@@ -692,7 +692,7 @@ out:
 	/* Process deferred owner checks on btree blocks. */
 	list_for_each_entry_safe(co, n, &bs.to_check, list) {
 		if (!error && bs.cur)
-			error = xfs_scrub_btree_check_block_owner(&bs,
+			error = xchk_btree_check_block_owner(&bs,
 					co->level, co->daddr);
 		list_del(&co->list);
 		kmem_free(co);
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index 956627500f2c..598ac04a6c3e 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -9,32 +9,32 @@
 /* btree scrub */
 
 /* Check for btree operation errors. */
-bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc,
+bool xchk_btree_process_error(struct xfs_scrub_context *sc,
 		struct xfs_btree_cur *cur, int level, int *error);
 
 /* Check for btree xref operation errors. */
-bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc,
+bool xchk_btree_xref_process_error(struct xfs_scrub_context *sc,
 				struct xfs_btree_cur *cur, int level,
 				int *error);
 
 /* Check for btree corruption. */
-void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_btree_set_corrupt(struct xfs_scrub_context *sc,
 		struct xfs_btree_cur *cur, int level);
 
 /* Check for btree xref discrepancies. */
-void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_btree_xref_set_corrupt(struct xfs_scrub_context *sc,
 		struct xfs_btree_cur *cur, int level);
 
-struct xfs_scrub_btree;
-typedef int (*xfs_scrub_btree_rec_fn)(
-	struct xfs_scrub_btree	*bs,
+struct xchk_btree;
+typedef int (*xchk_btree_rec_fn)(
+	struct xchk_btree	*bs,
 	union xfs_btree_rec	*rec);
 
-struct xfs_scrub_btree {
+struct xchk_btree {
 	/* caller-provided scrub state */
 	struct xfs_scrub_context	*sc;
 	struct xfs_btree_cur		*cur;
-	xfs_scrub_btree_rec_fn		scrub_rec;
+	xchk_btree_rec_fn		scrub_rec;
 	struct xfs_owner_info		*oinfo;
 	void				*private;
 
@@ -45,8 +45,8 @@ struct xfs_scrub_btree {
 	bool				firstkey[XFS_BTREE_MAXLEVELS];
 	struct list_head		to_check;
 };
-int xfs_scrub_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
-		    xfs_scrub_btree_rec_fn scrub_fn,
+int xchk_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+		    xchk_btree_rec_fn scrub_fn,
 		    struct xfs_owner_info *oinfo, void *private);
 
 #endif /* __XFS_SCRUB_BTREE_H__ */
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 385fa5b9c878..5c3d4e7c6166 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -68,7 +68,7 @@
 
 /* Check for operational errors. */
 static bool
-__xfs_scrub_process_error(
+__xchk_process_error(
 	struct xfs_scrub_context	*sc,
 	xfs_agnumber_t			agno,
 	xfs_agblock_t			bno,
@@ -81,7 +81,7 @@ __xfs_scrub_process_error(
 		return true;
 	case -EDEADLOCK:
 		/* Used to restart an op with deadlock avoidance. */
-		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 		break;
 	case -EFSBADCRC:
 	case -EFSCORRUPTED:
@@ -90,7 +90,7 @@ __xfs_scrub_process_error(
 		*error = 0;
 		/* fall through */
 	default:
-		trace_xfs_scrub_op_error(sc, agno, bno, *error,
+		trace_xchk_op_error(sc, agno, bno, *error,
 				ret_ip);
 		break;
 	}
@@ -98,30 +98,30 @@ __xfs_scrub_process_error(
 }
 
 bool
-xfs_scrub_process_error(
+xchk_process_error(
 	struct xfs_scrub_context	*sc,
 	xfs_agnumber_t			agno,
 	xfs_agblock_t			bno,
 	int				*error)
 {
-	return __xfs_scrub_process_error(sc, agno, bno, error,
+	return __xchk_process_error(sc, agno, bno, error,
 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 }
 
 bool
-xfs_scrub_xref_process_error(
+xchk_xref_process_error(
 	struct xfs_scrub_context	*sc,
 	xfs_agnumber_t			agno,
 	xfs_agblock_t			bno,
 	int				*error)
 {
-	return __xfs_scrub_process_error(sc, agno, bno, error,
+	return __xchk_process_error(sc, agno, bno, error,
 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 }
 
 /* Check for operational errors for a file offset. */
 static bool
-__xfs_scrub_fblock_process_error(
+__xchk_fblock_process_error(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset,
@@ -134,7 +134,7 @@ __xfs_scrub_fblock_process_error(
 		return true;
 	case -EDEADLOCK:
 		/* Used to restart an op with deadlock avoidance. */
-		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 		break;
 	case -EFSBADCRC:
 	case -EFSCORRUPTED:
@@ -143,7 +143,7 @@ __xfs_scrub_fblock_process_error(
 		*error = 0;
 		/* fall through */
 	default:
-		trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
+		trace_xchk_file_op_error(sc, whichfork, offset, *error,
 				ret_ip);
 		break;
 	}
@@ -151,24 +151,24 @@ __xfs_scrub_fblock_process_error(
 }
 
 bool
-xfs_scrub_fblock_process_error(
+xchk_fblock_process_error(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset,
 	int				*error)
 {
-	return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+	return __xchk_fblock_process_error(sc, whichfork, offset, error,
 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 }
 
 bool
-xfs_scrub_fblock_xref_process_error(
+xchk_fblock_xref_process_error(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset,
 	int				*error)
 {
-	return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+	return __xchk_fblock_process_error(sc, whichfork, offset, error,
 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 }
 
@@ -186,12 +186,12 @@ xfs_scrub_fblock_xref_process_error(
 
 /* Record a block which could be optimized. */
 void
-xfs_scrub_block_set_preen(
+xchk_block_set_preen(
 	struct xfs_scrub_context	*sc,
 	struct xfs_buf			*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
-	trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address);
+	trace_xchk_block_preen(sc, bp->b_bn, __return_address);
 }
 
 /*
@@ -200,32 +200,32 @@ xfs_scrub_block_set_preen(
  * the block location of the inode record itself.
  */
 void
-xfs_scrub_ino_set_preen(
+xchk_ino_set_preen(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
-	trace_xfs_scrub_ino_preen(sc, ino, __return_address);
+	trace_xchk_ino_preen(sc, ino, __return_address);
 }
 
 /* Record a corrupt block. */
 void
-xfs_scrub_block_set_corrupt(
+xchk_block_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_buf			*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
-	trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
+	trace_xchk_block_error(sc, bp->b_bn, __return_address);
 }
 
 /* Record a corruption while cross-referencing. */
 void
-xfs_scrub_block_xref_set_corrupt(
+xchk_block_xref_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_buf			*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
-	trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
+	trace_xchk_block_error(sc, bp->b_bn, __return_address);
 }
 
 /*
@@ -234,44 +234,44 @@ xfs_scrub_block_xref_set_corrupt(
  * inode record itself.
  */
 void
-xfs_scrub_ino_set_corrupt(
+xchk_ino_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
-	trace_xfs_scrub_ino_error(sc, ino, __return_address);
+	trace_xchk_ino_error(sc, ino, __return_address);
 }
 
 /* Record a corruption while cross-referencing with an inode. */
 void
-xfs_scrub_ino_xref_set_corrupt(
+xchk_ino_xref_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
-	trace_xfs_scrub_ino_error(sc, ino, __return_address);
+	trace_xchk_ino_error(sc, ino, __return_address);
 }
 
 /* Record corruption in a block indexed by a file fork. */
 void
-xfs_scrub_fblock_set_corrupt(
+xchk_fblock_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
-	trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
+	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
 }
 
 /* Record a corruption while cross-referencing a fork block. */
 void
-xfs_scrub_fblock_xref_set_corrupt(
+xchk_fblock_xref_set_corrupt(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
-	trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
+	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
 }
 
 /*
@@ -279,32 +279,32 @@ xfs_scrub_fblock_xref_set_corrupt(
  * incorrect.
  */
 void
-xfs_scrub_ino_set_warning(
+xchk_ino_set_warning(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
-	trace_xfs_scrub_ino_warning(sc, ino, __return_address);
+	trace_xchk_ino_warning(sc, ino, __return_address);
 }
 
 /* Warn about a block indexed by a file fork that needs review. */
 void
-xfs_scrub_fblock_set_warning(
+xchk_fblock_set_warning(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
-	trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address);
+	trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
 }
 
 /* Signal an incomplete scrub. */
 void
-xfs_scrub_set_incomplete(
+xchk_set_incomplete(
 	struct xfs_scrub_context	*sc)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
-	trace_xfs_scrub_incomplete(sc, __return_address);
+	trace_xchk_incomplete(sc, __return_address);
 }
 
 /*
@@ -312,18 +312,18 @@ xfs_scrub_set_incomplete(
  * at least according to the reverse mapping data.
  */
 
-struct xfs_scrub_rmap_ownedby_info {
+struct xchk_rmap_ownedby_info {
 	struct xfs_owner_info	*oinfo;
 	xfs_filblks_t		*blocks;
 };
 
 STATIC int
-xfs_scrub_count_rmap_ownedby_irec(
+xchk_count_rmap_ownedby_irec(
 	struct xfs_btree_cur			*cur,
 	struct xfs_rmap_irec			*rec,
 	void					*priv)
 {
-	struct xfs_scrub_rmap_ownedby_info	*sroi = priv;
+	struct xchk_rmap_ownedby_info		*sroi = priv;
 	bool					irec_attr;
 	bool					oinfo_attr;
 
@@ -344,19 +344,19 @@ xfs_scrub_count_rmap_ownedby_irec(
  * The caller should pass us an rmapbt cursor.
  */
 int
-xfs_scrub_count_rmap_ownedby_ag(
+xchk_count_rmap_ownedby_ag(
 	struct xfs_scrub_context		*sc,
 	struct xfs_btree_cur			*cur,
 	struct xfs_owner_info			*oinfo,
 	xfs_filblks_t				*blocks)
 {
-	struct xfs_scrub_rmap_ownedby_info	sroi;
+	struct xchk_rmap_ownedby_info		sroi;
 
 	sroi.oinfo = oinfo;
 	*blocks = 0;
 	sroi.blocks = blocks;
 
-	return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec,
+	return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
 			&sroi);
 }
 
@@ -392,12 +392,12 @@ want_ag_read_header_failure(
 /*
  * Grab all the headers for an AG.
  *
- * The headers should be released by xfs_scrub_ag_free, but as a fail
+ * The headers should be released by xchk_ag_free, but as a fail
  * safe we attach all the buffers we grab to the scrub transaction so
  * they'll all be freed when we cancel it.
  */
 int
-xfs_scrub_ag_read_headers(
+xchk_ag_read_headers(
 	struct xfs_scrub_context	*sc,
 	xfs_agnumber_t			agno,
 	struct xfs_buf			**agi,
@@ -425,8 +425,8 @@ out:
 
 /* Release all the AG btree cursors. */
 void
-xfs_scrub_ag_btcur_free(
-	struct xfs_scrub_ag		*sa)
+xchk_ag_btcur_free(
+	struct xchk_ag		*sa)
 {
 	if (sa->refc_cur)
 		xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
@@ -451,9 +451,9 @@ xfs_scrub_ag_btcur_free(
 
 /* Initialize all the btree cursors for an AG. */
 int
-xfs_scrub_ag_btcur_init(
+xchk_ag_btcur_init(
 	struct xfs_scrub_context	*sc,
-	struct xfs_scrub_ag		*sa)
+	struct xchk_ag		*sa)
 {
 	struct xfs_mount		*mp = sc->mp;
 	xfs_agnumber_t			agno = sa->agno;
@@ -511,11 +511,11 @@ err:
 
 /* Release the AG header context and btree cursors. */
 void
-xfs_scrub_ag_free(
+xchk_ag_free(
 	struct xfs_scrub_context	*sc,
-	struct xfs_scrub_ag		*sa)
+	struct xchk_ag		*sa)
 {
-	xfs_scrub_ag_btcur_free(sa);
+	xchk_ag_btcur_free(sa);
 	if (sa->agfl_bp) {
 		xfs_trans_brelse(sc->tp, sa->agfl_bp);
 		sa->agfl_bp = NULL;
@@ -543,30 +543,30 @@ xfs_scrub_ag_free(
  * transaction ourselves.
  */
 int
-xfs_scrub_ag_init(
+xchk_ag_init(
 	struct xfs_scrub_context	*sc,
 	xfs_agnumber_t			agno,
-	struct xfs_scrub_ag		*sa)
+	struct xchk_ag		*sa)
 {
 	int				error;
 
 	sa->agno = agno;
-	error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
+	error = xchk_ag_read_headers(sc, agno, &sa->agi_bp,
 			&sa->agf_bp, &sa->agfl_bp);
 	if (error)
 		return error;
 
-	return xfs_scrub_ag_btcur_init(sc, sa);
+	return xchk_ag_btcur_init(sc, sa);
 }
 
 /*
  * Grab the per-ag structure if we haven't already gotten it.  Teardown of the
- * xfs_scrub_ag will release it for us.
+ * xchk_ag will release it for us.
  */
 void
-xfs_scrub_perag_get(
+xchk_perag_get(
 	struct xfs_mount	*mp,
-	struct xfs_scrub_ag	*sa)
+	struct xchk_ag	*sa)
 {
 	if (!sa->pag)
 		sa->pag = xfs_perag_get(mp, sa->agno);
@@ -585,7 +585,7 @@ xfs_scrub_perag_get(
  * the metadata object.
  */
 int
-xfs_scrub_trans_alloc(
+xchk_trans_alloc(
 	struct xfs_scrub_context	*sc,
 	uint				resblks)
 {
@@ -598,19 +598,19 @@ xfs_scrub_trans_alloc(
 
 /* Set us up with a transaction and an empty context. */
 int
-xfs_scrub_setup_fs(
+xchk_setup_fs(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
 	uint				resblks;
 
 	resblks = xfs_repair_calc_ag_resblks(sc);
-	return xfs_scrub_trans_alloc(sc, resblks);
+	return xchk_trans_alloc(sc, resblks);
 }
 
 /* Set us up with AG headers and btree cursors. */
 int
-xfs_scrub_setup_ag_btree(
+xchk_setup_ag_btree(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip,
 	bool				force_log)
@@ -625,21 +625,21 @@ xfs_scrub_setup_ag_btree(
 	 * document why they need to do so.
 	 */
 	if (force_log) {
-		error = xfs_scrub_checkpoint_log(mp);
+		error = xchk_checkpoint_log(mp);
 		if (error)
 			return error;
 	}
 
-	error = xfs_scrub_setup_fs(sc, ip);
+	error = xchk_setup_fs(sc, ip);
 	if (error)
 		return error;
 
-	return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa);
+	return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
 }
 
 /* Push everything out of the log onto disk. */
 int
-xfs_scrub_checkpoint_log(
+xchk_checkpoint_log(
 	struct xfs_mount	*mp)
 {
 	int			error;
@@ -657,7 +657,7 @@ xfs_scrub_checkpoint_log(
  * The inode is not locked.
  */
 int
-xfs_scrub_get_inode(
+xchk_get_inode(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip_in)
 {
@@ -704,7 +704,7 @@ xfs_scrub_get_inode(
 		error = -EFSCORRUPTED;
 		/* fall through */
 	default:
-		trace_xfs_scrub_op_error(sc,
+		trace_xchk_op_error(sc,
 				XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
 				XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
 				error, __return_address);
@@ -721,21 +721,21 @@ xfs_scrub_get_inode(
 
 /* Set us up to scrub a file's contents. */
 int
-xfs_scrub_setup_inode_contents(
+xchk_setup_inode_contents(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip,
 	unsigned int			resblks)
 {
 	int				error;
 
-	error = xfs_scrub_get_inode(sc, ip);
+	error = xchk_get_inode(sc, ip);
 	if (error)
 		return error;
 
 	/* Got the inode, lock it and we're ready to go. */
 	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 	xfs_ilock(sc->ip, sc->ilock_flags);
-	error = xfs_scrub_trans_alloc(sc, resblks);
+	error = xchk_trans_alloc(sc, resblks);
 	if (error)
 		goto out;
 	sc->ilock_flags |= XFS_ILOCK_EXCL;
@@ -752,13 +752,13 @@ out:
  * the cursor and skip the check.
  */
 bool
-xfs_scrub_should_check_xref(
+xchk_should_check_xref(
 	struct xfs_scrub_context	*sc,
 	int				*error,
 	struct xfs_btree_cur		**curpp)
 {
 	/* No point in xref if we already know we're corrupt. */
-	if (xfs_scrub_skip_xref(sc->sm))
+	if (xchk_skip_xref(sc->sm))
 		return false;
 
 	if (*error == 0)
@@ -775,7 +775,7 @@ xfs_scrub_should_check_xref(
 	}
 
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
-	trace_xfs_scrub_xref_error(sc, *error, __return_address);
+	trace_xchk_xref_error(sc, *error, __return_address);
 
 	/*
 	 * Errors encountered during cross-referencing with another
@@ -787,25 +787,25 @@ xfs_scrub_should_check_xref(
 
 /* Run the structure verifiers on in-memory buffers to detect bad memory. */
 void
-xfs_scrub_buffer_recheck(
+xchk_buffer_recheck(
 	struct xfs_scrub_context	*sc,
 	struct xfs_buf			*bp)
 {
 	xfs_failaddr_t			fa;
 
 	if (bp->b_ops == NULL) {
-		xfs_scrub_block_set_corrupt(sc, bp);
+		xchk_block_set_corrupt(sc, bp);
 		return;
 	}
 	if (bp->b_ops->verify_struct == NULL) {
-		xfs_scrub_set_incomplete(sc);
+		xchk_set_incomplete(sc);
 		return;
 	}
 	fa = bp->b_ops->verify_struct(bp);
 	if (!fa)
 		return;
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
-	trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
+	trace_xchk_block_error(sc, bp->b_bn, fa);
 }
 
 /*
@@ -813,7 +813,7 @@ xfs_scrub_buffer_recheck(
  * pointed to by sc->ip and the ILOCK must be held.
  */
 int
-xfs_scrub_metadata_inode_forks(
+xchk_metadata_inode_forks(
 	struct xfs_scrub_context	*sc)
 {
 	__u32				smtype;
@@ -825,26 +825,26 @@ xfs_scrub_metadata_inode_forks(
 
 	/* Metadata inodes don't live on the rt device. */
 	if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
-		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 		return 0;
 	}
 
 	/* They should never participate in reflink. */
 	if (xfs_is_reflink_inode(sc->ip)) {
-		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 		return 0;
 	}
 
 	/* They also should never have extended attributes. */
 	if (xfs_inode_hasattr(sc->ip)) {
-		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 		return 0;
 	}
 
 	/* Invoke the data fork scrubber. */
 	smtype = sc->sm->sm_type;
 	sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
-	error = xfs_scrub_bmap_data(sc);
+	error = xchk_bmap_data(sc);
 	sc->sm->sm_type = smtype;
 	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 		return error;
@@ -853,11 +853,11 @@ xfs_scrub_metadata_inode_forks(
 	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
 		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
 				&shared);
-		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
+		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
 				&error))
 			return error;
 		if (shared)
-			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 	}
 
 	return error;
@@ -871,7 +871,7 @@ xfs_scrub_metadata_inode_forks(
  * we can't.
  */
 int
-xfs_scrub_ilock_inverted(
+xchk_ilock_inverted(
 	struct xfs_inode	*ip,
 	uint			lock_mode)
 {
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 2172bd5361e2..5881cb2ecc26 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -12,7 +12,7 @@
  * Note that we're careful not to make any judgements about *error.
  */
 static inline bool
-xfs_scrub_should_terminate(
+xchk_should_terminate(
 	struct xfs_scrub_context	*sc,
 	int				*error)
 {
@@ -24,121 +24,121 @@ xfs_scrub_should_terminate(
 	return false;
 }
 
-int xfs_scrub_trans_alloc(struct xfs_scrub_context *sc, uint resblks);
-bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+int xchk_trans_alloc(struct xfs_scrub_context *sc, uint resblks);
+bool xchk_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
 		xfs_agblock_t bno, int *error);
-bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
+bool xchk_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
 		xfs_fileoff_t offset, int *error);
 
-bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc,
+bool xchk_xref_process_error(struct xfs_scrub_context *sc,
 		xfs_agnumber_t agno, xfs_agblock_t bno, int *error);
-bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
+bool xchk_fblock_xref_process_error(struct xfs_scrub_context *sc,
 		int whichfork, xfs_fileoff_t offset, int *error);
 
-void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
+void xchk_block_set_preen(struct xfs_scrub_context *sc,
 		struct xfs_buf *bp);
-void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino);
+void xchk_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino);
 
-void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_block_set_corrupt(struct xfs_scrub_context *sc,
 		struct xfs_buf *bp);
-void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino);
-void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
+void xchk_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino);
+void xchk_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
 		xfs_fileoff_t offset);
 
-void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_block_xref_set_corrupt(struct xfs_scrub_context *sc,
 		struct xfs_buf *bp);
-void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_ino_xref_set_corrupt(struct xfs_scrub_context *sc,
 		xfs_ino_t ino);
-void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
 		int whichfork, xfs_fileoff_t offset);
 
-void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino);
-void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
+void xchk_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino);
+void xchk_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
 		xfs_fileoff_t offset);
 
-void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
-int xfs_scrub_checkpoint_log(struct xfs_mount *mp);
+void xchk_set_incomplete(struct xfs_scrub_context *sc);
+int xchk_checkpoint_log(struct xfs_mount *mp);
 
 /* Are we set up for a cross-referencing check? */
-bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error,
+bool xchk_should_check_xref(struct xfs_scrub_context *sc, int *error,
 			   struct xfs_btree_cur **curpp);
 
 /* Setup functions */
-int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
-int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc,
+int xchk_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
+int xchk_setup_ag_allocbt(struct xfs_scrub_context *sc,
 			       struct xfs_inode *ip);
-int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
+int xchk_setup_ag_iallocbt(struct xfs_scrub_context *sc,
 				struct xfs_inode *ip);
-int xfs_scrub_setup_ag_rmapbt(struct xfs_scrub_context *sc,
+int xchk_setup_ag_rmapbt(struct xfs_scrub_context *sc,
 			      struct xfs_inode *ip);
-int xfs_scrub_setup_ag_refcountbt(struct xfs_scrub_context *sc,
+int xchk_setup_ag_refcountbt(struct xfs_scrub_context *sc,
 				  struct xfs_inode *ip);
-int xfs_scrub_setup_inode(struct xfs_scrub_context *sc,
+int xchk_setup_inode(struct xfs_scrub_context *sc,
 			  struct xfs_inode *ip);
-int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc,
+int xchk_setup_inode_bmap(struct xfs_scrub_context *sc,
 			       struct xfs_inode *ip);
-int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc,
+int xchk_setup_inode_bmap_data(struct xfs_scrub_context *sc,
 				    struct xfs_inode *ip);
-int xfs_scrub_setup_directory(struct xfs_scrub_context *sc,
+int xchk_setup_directory(struct xfs_scrub_context *sc,
 			      struct xfs_inode *ip);
-int xfs_scrub_setup_xattr(struct xfs_scrub_context *sc,
+int xchk_setup_xattr(struct xfs_scrub_context *sc,
 			  struct xfs_inode *ip);
-int xfs_scrub_setup_symlink(struct xfs_scrub_context *sc,
+int xchk_setup_symlink(struct xfs_scrub_context *sc,
 			    struct xfs_inode *ip);
-int xfs_scrub_setup_parent(struct xfs_scrub_context *sc,
+int xchk_setup_parent(struct xfs_scrub_context *sc,
 			   struct xfs_inode *ip);
 #ifdef CONFIG_XFS_RT
-int xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip);
+int xchk_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip);
 #else
 static inline int
-xfs_scrub_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip)
+xchk_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip)
 {
 	return -ENOENT;
 }
 #endif
 #ifdef CONFIG_XFS_QUOTA
-int xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip);
+int xchk_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip);
 #else
 static inline int
-xfs_scrub_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip)
+xchk_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip)
 {
 	return -ENOENT;
 }
 #endif
 
-void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa);
-int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
-		      struct xfs_scrub_ag *sa);
-void xfs_scrub_perag_get(struct xfs_mount *mp, struct xfs_scrub_ag *sa);
-int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+void xchk_ag_free(struct xfs_scrub_context *sc, struct xchk_ag *sa);
+int xchk_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+		      struct xchk_ag *sa);
+void xchk_perag_get(struct xfs_mount *mp, struct xchk_ag *sa);
+int xchk_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
 			      struct xfs_buf **agi, struct xfs_buf **agf,
 			      struct xfs_buf **agfl);
-void xfs_scrub_ag_btcur_free(struct xfs_scrub_ag *sa);
-int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
-			    struct xfs_scrub_ag *sa);
-int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
+void xchk_ag_btcur_free(struct xchk_ag *sa);
+int xchk_ag_btcur_init(struct xfs_scrub_context *sc,
+			    struct xchk_ag *sa);
+int xchk_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
 				    struct xfs_btree_cur *cur,
 				    struct xfs_owner_info *oinfo,
 				    xfs_filblks_t *blocks);
 
-int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc,
+int xchk_setup_ag_btree(struct xfs_scrub_context *sc,
 			     struct xfs_inode *ip, bool force_log);
-int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
-int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc,
+int xchk_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
+int xchk_setup_inode_contents(struct xfs_scrub_context *sc,
 				   struct xfs_inode *ip, unsigned int resblks);
-void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
+void xchk_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
 
 /*
  * Don't bother cross-referencing if we already found corruption or cross
  * referencing discrepancies.
  */
-static inline bool xfs_scrub_skip_xref(struct xfs_scrub_metadata *sm)
+static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
 {
 	return sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
 			       XFS_SCRUB_OFLAG_XCORRUPT);
 }
 
-int xfs_scrub_metadata_inode_forks(struct xfs_scrub_context *sc);
-int xfs_scrub_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
+int xchk_metadata_inode_forks(struct xfs_scrub_context *sc);
+int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
 
 #endif	/* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index d700c4d4d4ef..fee80f6ddfd7 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -35,8 +35,8 @@
  * operational errors in common.c.
  */
 bool
-xfs_scrub_da_process_error(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_process_error(
+	struct xchk_da_btree		*ds,
 	int				level,
 	int				*error)
 {
@@ -48,7 +48,7 @@ xfs_scrub_da_process_error(
 	switch (*error) {
 	case -EDEADLOCK:
 		/* Used to restart an op with deadlock avoidance. */
-		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 		break;
 	case -EFSBADCRC:
 	case -EFSCORRUPTED:
@@ -57,7 +57,7 @@ xfs_scrub_da_process_error(
 		*error = 0;
 		/* fall through */
 	default:
-		trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork,
+		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
 				xfs_dir2_da_to_db(ds->dargs.geo,
 					ds->state->path.blk[level].blkno),
 				*error, __return_address);
@@ -71,15 +71,15 @@ xfs_scrub_da_process_error(
  * operational errors in common.c.
  */
 void
-xfs_scrub_da_set_corrupt(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_set_corrupt(
+	struct xchk_da_btree		*ds,
 	int				level)
 {
 	struct xfs_scrub_context	*sc = ds->sc;
 
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 
-	trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork,
+	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
 			xfs_dir2_da_to_db(ds->dargs.geo,
 				ds->state->path.blk[level].blkno),
 			__return_address);
@@ -87,8 +87,8 @@ xfs_scrub_da_set_corrupt(
 
 /* Find an entry at a certain level in a da btree. */
 STATIC void *
-xfs_scrub_da_btree_entry(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_btree_entry(
+	struct xchk_da_btree		*ds,
 	int				level,
 	int				rec)
 {
@@ -123,8 +123,8 @@ xfs_scrub_da_btree_entry(
 
 /* Scrub a da btree hash (key). */
 int
-xfs_scrub_da_btree_hash(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_btree_hash(
+	struct xchk_da_btree		*ds,
 	int				level,
 	__be32				*hashp)
 {
@@ -136,7 +136,7 @@ xfs_scrub_da_btree_hash(
 	/* Is this hash in order? */
 	hash = be32_to_cpu(*hashp);
 	if (hash < ds->hashes[level])
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 	ds->hashes[level] = hash;
 
 	if (level == 0)
@@ -144,10 +144,10 @@ xfs_scrub_da_btree_hash(
 
 	/* Is this hash no larger than the parent hash? */
 	blks = ds->state->path.blk;
-	entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index);
+	entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index);
 	parent_hash = be32_to_cpu(entry->hashval);
 	if (parent_hash < hash)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 
 	return 0;
 }
@@ -157,13 +157,13 @@ xfs_scrub_da_btree_hash(
  * pointer.
  */
 STATIC bool
-xfs_scrub_da_btree_ptr_ok(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_btree_ptr_ok(
+	struct xchk_da_btree		*ds,
 	int				level,
 	xfs_dablk_t			blkno)
 {
 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 		return false;
 	}
 
@@ -176,7 +176,7 @@ xfs_scrub_da_btree_ptr_ok(
  * leaf1, we must multiplex the verifiers.
  */
 static void
-xfs_scrub_da_btree_read_verify(
+xchk_da_btree_read_verify(
 	struct xfs_buf		*bp)
 {
 	struct xfs_da_blkinfo	*info = bp->b_addr;
@@ -198,7 +198,7 @@ xfs_scrub_da_btree_read_verify(
 	}
 }
 static void
-xfs_scrub_da_btree_write_verify(
+xchk_da_btree_write_verify(
 	struct xfs_buf		*bp)
 {
 	struct xfs_da_blkinfo	*info = bp->b_addr;
@@ -220,7 +220,7 @@ xfs_scrub_da_btree_write_verify(
 	}
 }
 static void *
-xfs_scrub_da_btree_verify(
+xchk_da_btree_verify(
 	struct xfs_buf		*bp)
 {
 	struct xfs_da_blkinfo	*info = bp->b_addr;
@@ -236,17 +236,17 @@ xfs_scrub_da_btree_verify(
 	}
 }
 
-static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
-	.name = "xfs_scrub_da_btree",
-	.verify_read = xfs_scrub_da_btree_read_verify,
-	.verify_write = xfs_scrub_da_btree_write_verify,
-	.verify_struct = xfs_scrub_da_btree_verify,
+static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
+	.name = "xchk_da_btree",
+	.verify_read = xchk_da_btree_read_verify,
+	.verify_write = xchk_da_btree_write_verify,
+	.verify_struct = xchk_da_btree_verify,
 };
 
 /* Check a block's sibling. */
 STATIC int
-xfs_scrub_da_btree_block_check_sibling(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_btree_block_check_sibling(
+	struct xchk_da_btree		*ds,
 	int				level,
 	int				direction,
 	xfs_dablk_t			sibling)
@@ -265,7 +265,7 @@ xfs_scrub_da_btree_block_check_sibling(
 		error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
 				direction, false, &retval);
 		if (error == 0 && retval == 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 		error = 0;
 		goto out;
 	}
@@ -273,19 +273,19 @@ xfs_scrub_da_btree_block_check_sibling(
 	/* Move the alternate cursor one block in the direction given. */
 	error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
 			direction, false, &retval);
-	if (!xfs_scrub_da_process_error(ds, level, &error))
+	if (!xchk_da_process_error(ds, level, &error))
 		return error;
 	if (retval) {
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 		return error;
 	}
 	if (ds->state->altpath.blk[level].bp)
-		xfs_scrub_buffer_recheck(ds->sc,
+		xchk_buffer_recheck(ds->sc,
 				ds->state->altpath.blk[level].bp);
 
 	/* Compare upper level pointer to sibling pointer. */
 	if (ds->state->altpath.blk[level].blkno != sibling)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 	xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
 out:
 	return error;
@@ -293,8 +293,8 @@ out:
 
 /* Check a block's sibling pointers. */
 STATIC int
-xfs_scrub_da_btree_block_check_siblings(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_btree_block_check_siblings(
+	struct xchk_da_btree		*ds,
 	int				level,
 	struct xfs_da_blkinfo		*hdr)
 {
@@ -308,7 +308,7 @@ xfs_scrub_da_btree_block_check_siblings(
 	/* Top level blocks should not have sibling pointers. */
 	if (level == 0) {
 		if (forw != 0 || back != 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 		return 0;
 	}
 
@@ -316,10 +316,10 @@ xfs_scrub_da_btree_block_check_siblings(
 	 * Check back (left) and forw (right) pointers.  These functions
 	 * absorb error codes for us.
 	 */
-	error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back);
+	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
 	if (error)
 		goto out;
-	error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw);
+	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
 
 out:
 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
@@ -328,8 +328,8 @@ out:
 
 /* Load a dir/attribute block from a btree. */
 STATIC int
-xfs_scrub_da_btree_block(
-	struct xfs_scrub_da_btree	*ds,
+xchk_da_btree_block(
+	struct xchk_da_btree		*ds,
 	int				level,
 	xfs_dablk_t			blkno)
 {
@@ -355,17 +355,17 @@ xfs_scrub_da_btree_block(
 
 	/* Check the pointer. */
 	blk->blkno = blkno;
-	if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno))
+	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
 		goto out_nobuf;
 
 	/* Read the buffer. */
 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
 			&blk->bp, dargs->whichfork,
-			&xfs_scrub_da_btree_buf_ops);
-	if (!xfs_scrub_da_process_error(ds, level, &error))
+			&xchk_da_btree_buf_ops);
+	if (!xchk_da_process_error(ds, level, &error))
 		goto out_nobuf;
 	if (blk->bp)
-		xfs_scrub_buffer_recheck(ds->sc, blk->bp);
+		xchk_buffer_recheck(ds->sc, blk->bp);
 
 	/*
 	 * We didn't find a dir btree root block, which means that
@@ -378,7 +378,7 @@ xfs_scrub_da_btree_block(
 
 	/* It's /not/ ok for attr trees not to have a da btree. */
 	if (blk->bp == NULL) {
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 		goto out_nobuf;
 	}
 
@@ -388,17 +388,17 @@ xfs_scrub_da_btree_block(
 
 	/* We only started zeroing the header on v5 filesystems. */
 	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 
 	/* Check the owner. */
 	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
 		owner = be64_to_cpu(hdr3->owner);
 		if (owner != ip->i_ino)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 	}
 
 	/* Check the siblings. */
-	error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
+	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
 	if (error)
 		goto out;
 
@@ -411,7 +411,7 @@ xfs_scrub_da_btree_block(
 		blk->magic = XFS_ATTR_LEAF_MAGIC;
 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
 		if (ds->tree_level != 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 		break;
 	case XFS_DIR2_LEAFN_MAGIC:
 	case XFS_DIR3_LEAFN_MAGIC:
@@ -420,7 +420,7 @@ xfs_scrub_da_btree_block(
 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
 		if (ds->tree_level != 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 		break;
 	case XFS_DIR2_LEAF1_MAGIC:
 	case XFS_DIR3_LEAF1_MAGIC:
@@ -429,7 +429,7 @@ xfs_scrub_da_btree_block(
 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
 		if (ds->tree_level != 0)
-			xfs_scrub_da_set_corrupt(ds, level);
+			xchk_da_set_corrupt(ds, level);
 		break;
 	case XFS_DA_NODE_MAGIC:
 	case XFS_DA3_NODE_MAGIC:
@@ -443,13 +443,13 @@ xfs_scrub_da_btree_block(
 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
 		if (level == 0) {
 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
-				xfs_scrub_da_set_corrupt(ds, level);
+				xchk_da_set_corrupt(ds, level);
 				goto out_freebp;
 			}
 			ds->tree_level = nodehdr.level;
 		} else {
 			if (ds->tree_level != nodehdr.level) {
-				xfs_scrub_da_set_corrupt(ds, level);
+				xchk_da_set_corrupt(ds, level);
 				goto out_freebp;
 			}
 		}
@@ -457,7 +457,7 @@ xfs_scrub_da_btree_block(
 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
 		break;
 	default:
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 		goto out_freebp;
 	}
 
@@ -473,13 +473,13 @@ out_nobuf:
 
 /* Visit all nodes and leaves of a da btree. */
 int
-xfs_scrub_da_btree(
+xchk_da_btree(
 	struct xfs_scrub_context	*sc,
 	int				whichfork,
-	xfs_scrub_da_btree_rec_fn	scrub_fn,
+	xchk_da_btree_rec_fn	scrub_fn,
 	void				*private)
 {
-	struct xfs_scrub_da_btree	ds = {};
+	struct xchk_da_btree		ds = {};
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_da_state_blk		*blks;
 	struct xfs_da_node_entry	*key;
@@ -517,7 +517,7 @@ xfs_scrub_da_btree(
 
 	/* Find the root of the da tree, if present. */
 	blks = ds.state->path.blk;
-	error = xfs_scrub_da_btree_block(&ds, level, blkno);
+	error = xchk_da_btree_block(&ds, level, blkno);
 	if (error)
 		goto out_state;
 	/*
@@ -542,12 +542,12 @@ xfs_scrub_da_btree(
 			}
 
 			/* Dispatch record scrubbing. */
-			rec = xfs_scrub_da_btree_entry(&ds, level,
+			rec = xchk_da_btree_entry(&ds, level,
 					blks[level].index);
 			error = scrub_fn(&ds, level, rec);
 			if (error)
 				break;
-			if (xfs_scrub_should_terminate(sc, &error) ||
+			if (xchk_should_terminate(sc, &error) ||
 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 				break;
 
@@ -566,8 +566,8 @@ xfs_scrub_da_btree(
 		}
 
 		/* Hashes in order for scrub? */
-		key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index);
-		error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval);
+		key = xchk_da_btree_entry(&ds, level, blks[level].index);
+		error = xchk_da_btree_hash(&ds, level, &key->hashval);
 		if (error)
 			goto out;
 
@@ -575,7 +575,7 @@ xfs_scrub_da_btree(
 		blkno = be32_to_cpu(key->before);
 		level++;
 		ds.tree_level--;
-		error = xfs_scrub_da_btree_block(&ds, level, blkno);
+		error = xchk_da_btree_block(&ds, level, blkno);
 		if (error)
 			goto out;
 		if (blks[level].bp == NULL)
diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h
index 365f9f0019e6..80e4af0e2589 100644
--- a/fs/xfs/scrub/dabtree.h
+++ b/fs/xfs/scrub/dabtree.h
@@ -8,7 +8,7 @@
 
 /* dir/attr btree */
 
-struct xfs_scrub_da_btree {
+struct xchk_da_btree {
 	struct xfs_da_args		dargs;
 	xfs_dahash_t			hashes[XFS_DA_NODE_MAXDEPTH];
 	int				maxrecs[XFS_DA_NODE_MAXDEPTH];
@@ -28,18 +28,18 @@ struct xfs_scrub_da_btree {
 	int				tree_level;
 };
 
-typedef int (*xfs_scrub_da_btree_rec_fn)(struct xfs_scrub_da_btree *ds,
+typedef int (*xchk_da_btree_rec_fn)(struct xchk_da_btree *ds,
 		int level, void *rec);
 
 /* Check for da btree operation errors. */
-bool xfs_scrub_da_process_error(struct xfs_scrub_da_btree *ds, int level, int *error);
+bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error);
 
 /* Check for da btree corruption. */
-void xfs_scrub_da_set_corrupt(struct xfs_scrub_da_btree *ds, int level);
+void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level);
 
-int xfs_scrub_da_btree_hash(struct xfs_scrub_da_btree *ds, int level,
+int xchk_da_btree_hash(struct xchk_da_btree *ds, int level,
 			    __be32 *hashp);
-int xfs_scrub_da_btree(struct xfs_scrub_context *sc, int whichfork,
-		       xfs_scrub_da_btree_rec_fn scrub_fn, void *private);
+int xchk_da_btree(struct xfs_scrub_context *sc, int whichfork,
+		       xchk_da_btree_rec_fn scrub_fn, void *private);
 
 #endif /* __XFS_SCRUB_DABTREE_H__ */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 86324775fc9b..2ac07bb73478 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -31,18 +31,18 @@
 
 /* Set us up to scrub directories. */
 int
-xfs_scrub_setup_directory(
+xchk_setup_directory(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	return xfs_scrub_setup_inode_contents(sc, ip, 0);
+	return xchk_setup_inode_contents(sc, ip, 0);
 }
 
 /* Directories */
 
 /* Scrub a directory entry. */
 
-struct xfs_scrub_dir_ctx {
+struct xchk_dir_ctx {
 	/* VFS fill-directory iterator */
 	struct dir_context		dir_iter;
 
@@ -51,8 +51,8 @@ struct xfs_scrub_dir_ctx {
 
 /* Check that an inode's mode matches a given DT_ type. */
 STATIC int
-xfs_scrub_dir_check_ftype(
-	struct xfs_scrub_dir_ctx	*sdc,
+xchk_dir_check_ftype(
+	struct xchk_dir_ctx		*sdc,
 	xfs_fileoff_t			offset,
 	xfs_ino_t			inum,
 	int				dtype)
@@ -64,7 +64,7 @@ xfs_scrub_dir_check_ftype(
 
 	if (!xfs_sb_version_hasftype(&mp->m_sb)) {
 		if (dtype != DT_UNKNOWN && dtype != DT_DIR)
-			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
 		goto out;
 	}
@@ -78,7 +78,7 @@ xfs_scrub_dir_check_ftype(
 	 * inodes can trigger immediate inactive cleanup of the inode.
 	 */
 	error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
-	if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
+	if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
 			&error))
 		goto out;
 
@@ -86,7 +86,7 @@ xfs_scrub_dir_check_ftype(
 	ino_dtype = xfs_dir3_get_dtype(mp,
 			xfs_mode_to_ftype(VFS_I(ip)->i_mode));
 	if (ino_dtype != dtype)
-		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
 	iput(VFS_I(ip));
 out:
 	return error;
@@ -101,7 +101,7 @@ out:
  * we can look up this filename.  Finally, we check the ftype.
  */
 STATIC int
-xfs_scrub_dir_actor(
+xchk_dir_actor(
 	struct dir_context		*dir_iter,
 	const char			*name,
 	int				namelen,
@@ -111,13 +111,13 @@ xfs_scrub_dir_actor(
 {
 	struct xfs_mount		*mp;
 	struct xfs_inode		*ip;
-	struct xfs_scrub_dir_ctx	*sdc;
+	struct xchk_dir_ctx		*sdc;
 	struct xfs_name			xname;
 	xfs_ino_t			lookup_ino;
 	xfs_dablk_t			offset;
 	int				error = 0;
 
-	sdc = container_of(dir_iter, struct xfs_scrub_dir_ctx, dir_iter);
+	sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter);
 	ip = sdc->sc->ip;
 	mp = ip->i_mount;
 	offset = xfs_dir2_db_to_da(mp->m_dir_geo,
@@ -125,17 +125,17 @@ xfs_scrub_dir_actor(
 
 	/* Does this inode number make sense? */
 	if (!xfs_verify_dir_ino(mp, ino)) {
-		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
 		goto out;
 	}
 
 	if (!strncmp(".", name, namelen)) {
 		/* If this is "." then check that the inum matches the dir. */
 		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
-			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
 		if (ino != ip->i_ino)
-			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
 	} else if (!strncmp("..", name, namelen)) {
 		/*
@@ -143,10 +143,10 @@ xfs_scrub_dir_actor(
 		 * matches this dir.
 		 */
 		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
-			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
 		if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino)
-			xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
 	}
 
@@ -156,23 +156,23 @@ xfs_scrub_dir_actor(
 	xname.type = XFS_DIR3_FT_UNKNOWN;
 
 	error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
-	if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
+	if (!xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
 			&error))
 		goto out;
 	if (lookup_ino != ino) {
-		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
 		goto out;
 	}
 
 	/* Verify the file type.  This function absorbs error codes. */
-	error = xfs_scrub_dir_check_ftype(sdc, offset, lookup_ino, type);
+	error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type);
 	if (error)
 		goto out;
 out:
 	/*
 	 * A negative error code returned here is supposed to cause the
 	 * dir_emit caller (xfs_readdir) to abort the directory iteration
-	 * and return zero to xfs_scrub_directory.
+	 * and return zero to xchk_directory.
 	 */
 	if (error == 0 && sdc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return -EFSCORRUPTED;
@@ -181,8 +181,8 @@ out:
 
 /* Scrub a directory btree record. */
 STATIC int
-xfs_scrub_dir_rec(
-	struct xfs_scrub_da_btree	*ds,
+xchk_dir_rec(
+	struct xchk_da_btree		*ds,
 	int				level,
 	void				*rec)
 {
@@ -203,7 +203,7 @@ xfs_scrub_dir_rec(
 	int				error;
 
 	/* Check the hash of the entry. */
-	error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
+	error = xchk_da_btree_hash(ds, level, &ent->hashval);
 	if (error)
 		goto out;
 
@@ -218,18 +218,18 @@ xfs_scrub_dir_rec(
 	rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db);
 
 	if (rec_bno >= mp->m_dir_geo->leafblk) {
-		xfs_scrub_da_set_corrupt(ds, level);
+		xchk_da_set_corrupt(ds, level);
 		goto out;
 	}
 	error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp);
-	if (!xfs_scrub_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
+	if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
 			&error))
 		goto out;
 	if (!bp) {
-		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+		xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 		goto out;
 	}
-	xfs_scrub_buffer_recheck(ds->sc, bp);
+	xchk_buffer_recheck(ds->sc, bp);
 
 	if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out_relse;
@@ -240,7 +240,7 @@ xfs_scrub_dir_rec(
 	p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr);
 	endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
 	if (!endp) {
-		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+		xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 		goto out_relse;
 	}
 	while (p < endp) {
@@ -258,7 +258,7 @@ xfs_scrub_dir_rec(
 		p += mp->m_dir_inode_ops->data_entsize(dep->namelen);
 	}
 	if (p >= endp) {
-		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+		xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 		goto out_relse;
 	}
 
@@ -267,14 +267,14 @@ xfs_scrub_dir_rec(
 	hash = be32_to_cpu(ent->hashval);
 	tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
 	if (!xfs_verify_dir_ino(mp, ino) || tag != off)
-		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+		xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 	if (dent->namelen == 0) {
-		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+		xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 		goto out_relse;
 	}
 	calc_hash = xfs_da_hashname(dent->name, dent->namelen);
 	if (calc_hash != hash)
-		xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+		xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
 
 out_relse:
 	xfs_trans_brelse(ds->dargs.trans, bp);
@@ -288,7 +288,7 @@ out:
  * shortest, and that there aren't any bogus entries.
  */
 STATIC void
-xfs_scrub_directory_check_free_entry(
+xchk_directory_check_free_entry(
 	struct xfs_scrub_context	*sc,
 	xfs_dablk_t			lblk,
 	struct xfs_dir2_data_free	*bf,
@@ -308,12 +308,12 @@ xfs_scrub_directory_check_free_entry(
 			return;
 
 	/* Unused entry should be in the bestfrees but wasn't found. */
-	xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+	xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 }
 
 /* Check free space info in a directory data block. */
 STATIC int
-xfs_scrub_directory_data_bestfree(
+xchk_directory_data_bestfree(
 	struct xfs_scrub_context	*sc,
 	xfs_dablk_t			lblk,
 	bool				is_block)
@@ -339,15 +339,15 @@ xfs_scrub_directory_data_bestfree(
 	if (is_block) {
 		/* dir block format */
 		if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 		error = xfs_dir3_block_read(sc->tp, sc->ip, &bp);
 	} else {
 		/* dir data format */
 		error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp);
 	}
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 		goto out;
-	xfs_scrub_buffer_recheck(sc, bp);
+	xchk_buffer_recheck(sc, bp);
 
 	/* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
 
@@ -362,7 +362,7 @@ xfs_scrub_directory_data_bestfree(
 		if (offset == 0)
 			continue;
 		if (offset >= mp->m_dir_geo->blksize) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out_buf;
 		}
 		dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset);
@@ -372,13 +372,13 @@ xfs_scrub_directory_data_bestfree(
 		if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
 		    be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
 		    tag != ((char *)dup - (char *)bp->b_addr)) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out_buf;
 		}
 
 		/* bestfree records should be ordered largest to smallest */
 		if (smallest_bestfree < be16_to_cpu(dfp->length)) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out_buf;
 		}
 
@@ -400,7 +400,7 @@ xfs_scrub_directory_data_bestfree(
 			dep = (struct xfs_dir2_data_entry *)ptr;
 			newlen = d_ops->data_entsize(dep->namelen);
 			if (newlen <= 0) {
-				xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
+				xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
 						lblk);
 				goto out_buf;
 			}
@@ -411,7 +411,7 @@ xfs_scrub_directory_data_bestfree(
 		/* Spot check this free entry */
 		tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
 		if (tag != ((char *)dup - (char *)bp->b_addr)) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out_buf;
 		}
 
@@ -419,14 +419,14 @@ xfs_scrub_directory_data_bestfree(
 		 * Either this entry is a bestfree or it's smaller than
 		 * any of the bestfrees.
 		 */
-		xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup);
+		xchk_directory_check_free_entry(sc, lblk, bf, dup);
 		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 			goto out_buf;
 
 		/* Move on. */
 		newlen = be16_to_cpu(dup->length);
 		if (newlen <= 0) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out_buf;
 		}
 		ptr += newlen;
@@ -436,11 +436,11 @@ xfs_scrub_directory_data_bestfree(
 
 	/* We're required to fill all the space. */
 	if (ptr != endptr)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 
 	/* Did we see at least as many free slots as there are bestfrees? */
 	if (nr_frees < nr_bestfrees)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 out_buf:
 	xfs_trans_brelse(sc->tp, bp);
 out:
@@ -454,7 +454,7 @@ out:
  * array is in order.
  */
 STATIC void
-xfs_scrub_directory_check_freesp(
+xchk_directory_check_freesp(
 	struct xfs_scrub_context	*sc,
 	xfs_dablk_t			lblk,
 	struct xfs_buf			*dbp,
@@ -465,15 +465,15 @@ xfs_scrub_directory_check_freesp(
 	dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr);
 
 	if (len != be16_to_cpu(dfp->length))
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 
 	if (len > 0 && be16_to_cpu(dfp->offset) == 0)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 }
 
 /* Check free space info in a directory leaf1 block. */
 STATIC int
-xfs_scrub_directory_leaf1_bestfree(
+xchk_directory_leaf1_bestfree(
 	struct xfs_scrub_context	*sc,
 	struct xfs_da_args		*args,
 	xfs_dablk_t			lblk)
@@ -497,9 +497,9 @@ xfs_scrub_directory_leaf1_bestfree(
 
 	/* Read the free space block. */
 	error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 		goto out;
-	xfs_scrub_buffer_recheck(sc, bp);
+	xchk_buffer_recheck(sc, bp);
 
 	leaf = bp->b_addr;
 	d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
@@ -512,7 +512,7 @@ xfs_scrub_directory_leaf1_bestfree(
 		struct xfs_dir3_leaf_hdr	*hdr3 = bp->b_addr;
 
 		if (hdr3->pad != cpu_to_be32(0))
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 	}
 
 	/*
@@ -520,19 +520,19 @@ xfs_scrub_directory_leaf1_bestfree(
 	 * blocks that can fit under i_size.
 	 */
 	if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_d.di_size)) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 		goto out;
 	}
 
 	/* Is the leaf count even remotely sane? */
 	if (leafhdr.count > d_ops->leaf_max_ents(geo)) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 		goto out;
 	}
 
 	/* Leaves and bests don't overlap in leaf format. */
 	if ((char *)&ents[leafhdr.count] > (char *)bestp) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 		goto out;
 	}
 
@@ -540,13 +540,13 @@ xfs_scrub_directory_leaf1_bestfree(
 	for (i = 0; i < leafhdr.count; i++) {
 		hash = be32_to_cpu(ents[i].hashval);
 		if (i > 0 && lasthash > hash)
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 		lasthash = hash;
 		if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
 			stale++;
 	}
 	if (leafhdr.stale != stale)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
 
@@ -557,10 +557,10 @@ xfs_scrub_directory_leaf1_bestfree(
 			continue;
 		error = xfs_dir3_data_read(sc->tp, sc->ip,
 				i * args->geo->fsbcount, -1, &dbp);
-		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
+		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
 				&error))
 			break;
-		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
+		xchk_directory_check_freesp(sc, lblk, dbp, best);
 		xfs_trans_brelse(sc->tp, dbp);
 		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 			goto out;
@@ -571,7 +571,7 @@ out:
 
 /* Check free space info in a directory freespace block. */
 STATIC int
-xfs_scrub_directory_free_bestfree(
+xchk_directory_free_bestfree(
 	struct xfs_scrub_context	*sc,
 	struct xfs_da_args		*args,
 	xfs_dablk_t			lblk)
@@ -587,15 +587,15 @@ xfs_scrub_directory_free_bestfree(
 
 	/* Read the free space block */
 	error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 		goto out;
-	xfs_scrub_buffer_recheck(sc, bp);
+	xchk_buffer_recheck(sc, bp);
 
 	if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
 		struct xfs_dir3_free_hdr	*hdr3 = bp->b_addr;
 
 		if (hdr3->pad != cpu_to_be32(0))
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 	}
 
 	/* Check all the entries. */
@@ -610,22 +610,22 @@ xfs_scrub_directory_free_bestfree(
 		error = xfs_dir3_data_read(sc->tp, sc->ip,
 				(freehdr.firstdb + i) * args->geo->fsbcount,
 				-1, &dbp);
-		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
+		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
 				&error))
 			break;
-		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
+		xchk_directory_check_freesp(sc, lblk, dbp, best);
 		xfs_trans_brelse(sc->tp, dbp);
 	}
 
 	if (freehdr.nused + stale != freehdr.nvalid)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 out:
 	return error;
 }
 
 /* Check free space information in directories. */
 STATIC int
-xfs_scrub_directory_blocks(
+xchk_directory_blocks(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_bmbt_irec		got;
@@ -656,7 +656,7 @@ xfs_scrub_directory_blocks(
 	args.geo = mp->m_dir_geo;
 	args.trans = sc->tp;
 	error = xfs_dir2_isblock(&args, &is_block);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 		goto out;
 
 	/* Iterate all the data extents in the directory... */
@@ -666,7 +666,7 @@ xfs_scrub_directory_blocks(
 		if (is_block &&
 		    (got.br_startoff > 0 ||
 		     got.br_blockcount != args.geo->fsbcount)) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
 					got.br_startoff);
 			break;
 		}
@@ -690,7 +690,7 @@ xfs_scrub_directory_blocks(
 				args.geo->fsbcount);
 		     lblk < got.br_startoff + got.br_blockcount;
 		     lblk += args.geo->fsbcount) {
-			error = xfs_scrub_directory_data_bestfree(sc, lblk,
+			error = xchk_directory_data_bestfree(sc, lblk,
 					is_block);
 			if (error)
 				goto out;
@@ -709,10 +709,10 @@ xfs_scrub_directory_blocks(
 	    got.br_blockcount == args.geo->fsbcount &&
 	    !xfs_iext_next_extent(ifp, &icur, &got)) {
 		if (is_block) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out;
 		}
-		error = xfs_scrub_directory_leaf1_bestfree(sc, &args,
+		error = xchk_directory_leaf1_bestfree(sc, &args,
 				leaf_lblk);
 		if (error)
 			goto out;
@@ -731,11 +731,11 @@ xfs_scrub_directory_blocks(
 		 */
 		lblk = got.br_startoff;
 		if (lblk & ~0xFFFFFFFFULL) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out;
 		}
 		if (is_block) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
 			goto out;
 		}
 
@@ -754,7 +754,7 @@ xfs_scrub_directory_blocks(
 				args.geo->fsbcount);
 		     lblk < got.br_startoff + got.br_blockcount;
 		     lblk += args.geo->fsbcount) {
-			error = xfs_scrub_directory_free_bestfree(sc, &args,
+			error = xchk_directory_free_bestfree(sc, &args,
 					lblk);
 			if (error)
 				goto out;
@@ -769,11 +769,11 @@ out:
 
 /* Scrub a whole directory. */
 int
-xfs_scrub_directory(
+xchk_directory(
 	struct xfs_scrub_context	*sc)
 {
-	struct xfs_scrub_dir_ctx	sdc = {
-		.dir_iter.actor = xfs_scrub_dir_actor,
+	struct xchk_dir_ctx		sdc = {
+		.dir_iter.actor = xchk_dir_actor,
 		.dir_iter.pos = 0,
 		.sc = sc,
 	};
@@ -786,12 +786,12 @@ xfs_scrub_directory(
 
 	/* Plausible size? */
 	if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) {
-		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 		goto out;
 	}
 
 	/* Check directory tree structure */
-	error = xfs_scrub_da_btree(sc, XFS_DATA_FORK, xfs_scrub_dir_rec, NULL);
+	error = xchk_da_btree(sc, XFS_DATA_FORK, xchk_dir_rec, NULL);
 	if (error)
 		return error;
 
@@ -799,7 +799,7 @@ xfs_scrub_directory(
 		return error;
 
 	/* Check the freespace. */
-	error = xfs_scrub_directory_blocks(sc);
+	error = xchk_directory_blocks(sc);
 	if (error)
 		return error;
 
@@ -816,7 +816,7 @@ xfs_scrub_directory(
 	/*
 	 * Look up every name in this directory by hash.
 	 *
-	 * Use the xfs_readdir function to call xfs_scrub_dir_actor on
+	 * Use the xfs_readdir function to call xchk_dir_actor on
 	 * every directory entry in this directory.  In _actor, we check
 	 * the name, inode number, and ftype (if applicable) of the
 	 * entry.  xfs_readdir uses the VFS filldir functions to provide
@@ -834,7 +834,7 @@ xfs_scrub_directory(
 	xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
 	while (true) {
 		error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize);
-		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
+		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
 				&error))
 			goto out;
 		if (oldpos == sdc.dir_iter.pos)
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 13d43d108574..69d652b7299c 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -35,11 +35,11 @@
  * try again after forcing logged inode cores out to disk.
  */
 int
-xfs_scrub_setup_ag_iallocbt(
+xchk_setup_ag_iallocbt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	return xfs_scrub_setup_ag_btree(sc, ip, sc->try_harder);
+	return xchk_setup_ag_btree(sc, ip, sc->try_harder);
 }
 
 /* Inode btree scrubber. */
@@ -50,7 +50,7 @@ xfs_scrub_setup_ag_iallocbt(
  * we have a record or not depending on freecount.
  */
 static inline void
-xfs_scrub_iallocbt_chunk_xref_other(
+xchk_iallocbt_chunk_xref_other(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inobt_rec_incore	*irec,
 	xfs_agino_t			agino)
@@ -66,16 +66,16 @@ xfs_scrub_iallocbt_chunk_xref_other(
 	if (!(*pcur))
 		return;
 	error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec);
-	if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+	if (!xchk_should_check_xref(sc, &error, pcur))
 		return;
 	if (((irec->ir_freecount > 0 && !has_irec) ||
 	     (irec->ir_freecount == 0 && has_irec)))
-		xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+		xchk_btree_xref_set_corrupt(sc, *pcur, 0);
 }
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_iallocbt_chunk_xref(
+xchk_iallocbt_chunk_xref(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inobt_rec_incore	*irec,
 	xfs_agino_t			agino,
@@ -87,17 +87,17 @@ xfs_scrub_iallocbt_chunk_xref(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, len);
-	xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino);
+	xchk_xref_is_used_space(sc, agbno, len);
+	xchk_iallocbt_chunk_xref_other(sc, irec, agino);
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
-	xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo);
-	xfs_scrub_xref_is_not_shared(sc, agbno, len);
+	xchk_xref_is_owned_by(sc, agbno, len, &oinfo);
+	xchk_xref_is_not_shared(sc, agbno, len);
 }
 
 /* Is this chunk worth checking? */
 STATIC bool
-xfs_scrub_iallocbt_chunk(
-	struct xfs_scrub_btree		*bs,
+xchk_iallocbt_chunk(
+	struct xchk_btree		*bs,
 	struct xfs_inobt_rec_incore	*irec,
 	xfs_agino_t			agino,
 	xfs_extlen_t			len)
@@ -110,16 +110,16 @@ xfs_scrub_iallocbt_chunk(
 	if (bno + len <= bno ||
 	    !xfs_verify_agbno(mp, agno, bno) ||
 	    !xfs_verify_agbno(mp, agno, bno + len - 1))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
-	xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
+	xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
 
 	return true;
 }
 
 /* Count the number of free inodes. */
 static unsigned int
-xfs_scrub_iallocbt_freecount(
+xchk_iallocbt_freecount(
 	xfs_inofree_t			freemask)
 {
 	BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64));
@@ -128,8 +128,8 @@ xfs_scrub_iallocbt_freecount(
 
 /* Check a particular inode with ir_free. */
 STATIC int
-xfs_scrub_iallocbt_check_cluster_freemask(
-	struct xfs_scrub_btree		*bs,
+xchk_iallocbt_check_cluster_freemask(
+	struct xchk_btree		*bs,
 	xfs_ino_t			fsino,
 	xfs_agino_t			chunkino,
 	xfs_agino_t			clusterino,
@@ -143,14 +143,14 @@ xfs_scrub_iallocbt_check_cluster_freemask(
 	bool				inuse;
 	int				error = 0;
 
-	if (xfs_scrub_should_terminate(bs->sc, &error))
+	if (xchk_should_terminate(bs->sc, &error))
 		return error;
 
 	dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
 	if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
 	    (dip->di_version >= 3 &&
 	     be64_to_cpu(dip->di_ino) != fsino + clusterino)) {
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 		goto out;
 	}
 
@@ -175,15 +175,15 @@ xfs_scrub_iallocbt_check_cluster_freemask(
 		freemask_ok = inode_is_free ^ inuse;
 	}
 	if (!freemask_ok)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 out:
 	return 0;
 }
 
 /* Make sure the free mask is consistent with what the inodes think. */
 STATIC int
-xfs_scrub_iallocbt_check_freemask(
-	struct xfs_scrub_btree		*bs,
+xchk_iallocbt_check_freemask(
+	struct xchk_btree		*bs,
 	struct xfs_inobt_rec_incore	*irec)
 {
 	struct xfs_owner_info		oinfo;
@@ -223,18 +223,18 @@ xfs_scrub_iallocbt_check_freemask(
 		/* The whole cluster must be a hole or not a hole. */
 		ir_holemask = (irec->ir_holemask & holemask);
 		if (ir_holemask != holemask && ir_holemask != 0) {
-			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 			continue;
 		}
 
 		/* If any part of this is a hole, skip it. */
 		if (ir_holemask) {
-			xfs_scrub_xref_is_not_owned_by(bs->sc, agbno,
+			xchk_xref_is_not_owned_by(bs->sc, agbno,
 					blks_per_cluster, &oinfo);
 			continue;
 		}
 
-		xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
+		xchk_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
 				&oinfo);
 
 		/* Grab the inode cluster buffer. */
@@ -245,13 +245,13 @@ xfs_scrub_iallocbt_check_freemask(
 
 		error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
 				&dip, &bp, 0, 0);
-		if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 0,
+		if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0,
 				&error))
 			continue;
 
 		/* Which inodes are free? */
 		for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
-			error = xfs_scrub_iallocbt_check_cluster_freemask(bs,
+			error = xchk_iallocbt_check_cluster_freemask(bs,
 					fsino, chunkino, clusterino, irec, bp);
 			if (error) {
 				xfs_trans_brelse(bs->cur->bc_tp, bp);
@@ -267,8 +267,8 @@ xfs_scrub_iallocbt_check_freemask(
 
 /* Scrub an inobt/finobt record. */
 STATIC int
-xfs_scrub_iallocbt_rec(
-	struct xfs_scrub_btree		*bs,
+xchk_iallocbt_rec(
+	struct xchk_btree		*bs,
 	union xfs_btree_rec		*rec)
 {
 	struct xfs_mount		*mp = bs->cur->bc_mp;
@@ -289,18 +289,18 @@ xfs_scrub_iallocbt_rec(
 
 	if (irec.ir_count > XFS_INODES_PER_CHUNK ||
 	    irec.ir_freecount > XFS_INODES_PER_CHUNK)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	real_freecount = irec.ir_freecount +
 			(XFS_INODES_PER_CHUNK - irec.ir_count);
-	if (real_freecount != xfs_scrub_iallocbt_freecount(irec.ir_free))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+	if (real_freecount != xchk_iallocbt_freecount(irec.ir_free))
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	agino = irec.ir_startino;
 	/* Record has to be properly aligned within the AG. */
 	if (!xfs_verify_agino(mp, agno, agino) ||
 	    !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) {
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 		goto out;
 	}
 
@@ -308,7 +308,7 @@ xfs_scrub_iallocbt_rec(
 	agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
 	if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) ||
 	    (agbno & (xfs_icluster_size_fsb(mp) - 1)))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	*inode_blocks += XFS_B_TO_FSB(mp,
 			irec.ir_count * mp->m_sb.sb_inodesize);
@@ -318,9 +318,9 @@ xfs_scrub_iallocbt_rec(
 		len = XFS_B_TO_FSB(mp,
 				XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize);
 		if (irec.ir_count != XFS_INODES_PER_CHUNK)
-			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
-		if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len))
+		if (!xchk_iallocbt_chunk(bs, &irec, agino, len))
 			goto out;
 		goto check_freemask;
 	}
@@ -333,12 +333,12 @@ xfs_scrub_iallocbt_rec(
 	holes = ~xfs_inobt_irec_to_allocmask(&irec);
 	if ((holes & irec.ir_free) != holes ||
 	    irec.ir_freecount > irec.ir_count)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) {
 		if (holemask & 1)
 			holecount += XFS_INODES_PER_HOLEMASK_BIT;
-		else if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len))
+		else if (!xchk_iallocbt_chunk(bs, &irec, agino, len))
 			break;
 		holemask >>= 1;
 		agino += XFS_INODES_PER_HOLEMASK_BIT;
@@ -346,10 +346,10 @@ xfs_scrub_iallocbt_rec(
 
 	if (holecount > XFS_INODES_PER_CHUNK ||
 	    holecount + irec.ir_count != XFS_INODES_PER_CHUNK)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 check_freemask:
-	error = xfs_scrub_iallocbt_check_freemask(bs, &irec);
+	error = xchk_iallocbt_check_freemask(bs, &irec);
 	if (error)
 		goto out;
 
@@ -362,7 +362,7 @@ out:
  * Don't bother if we're missing btree cursors, as we're already corrupt.
  */
 STATIC void
-xfs_scrub_iallocbt_xref_rmap_btreeblks(
+xchk_iallocbt_xref_rmap_btreeblks(
 	struct xfs_scrub_context	*sc,
 	int				which)
 {
@@ -374,27 +374,27 @@ xfs_scrub_iallocbt_xref_rmap_btreeblks(
 
 	if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
 	    (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur) ||
-	    xfs_scrub_skip_xref(sc->sm))
+	    xchk_skip_xref(sc->sm))
 		return;
 
 	/* Check that we saw as many inobt blocks as the rmap says. */
 	error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks);
-	if (!xfs_scrub_process_error(sc, 0, 0, &error))
+	if (!xchk_process_error(sc, 0, 0, &error))
 		return;
 
 	if (sc->sa.fino_cur) {
 		error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks);
-		if (!xfs_scrub_process_error(sc, 0, 0, &error))
+		if (!xchk_process_error(sc, 0, 0, &error))
 			return;
 	}
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
-	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
 			&blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
 	if (blocks != inobt_blocks + finobt_blocks)
-		xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+		xchk_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
 }
 
 /*
@@ -402,7 +402,7 @@ xfs_scrub_iallocbt_xref_rmap_btreeblks(
  * the rmap says are owned by inodes.
  */
 STATIC void
-xfs_scrub_iallocbt_xref_rmap_inodes(
+xchk_iallocbt_xref_rmap_inodes(
 	struct xfs_scrub_context	*sc,
 	int				which,
 	xfs_filblks_t			inode_blocks)
@@ -411,22 +411,22 @@ xfs_scrub_iallocbt_xref_rmap_inodes(
 	xfs_filblks_t			blocks;
 	int				error;
 
-	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	/* Check that we saw as many inode blocks as the rmap knows about. */
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
-	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
 			&blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
 	if (blocks != inode_blocks)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 }
 
 /* Scrub the inode btrees for some AG. */
 STATIC int
-xfs_scrub_iallocbt(
+xchk_iallocbt(
 	struct xfs_scrub_context	*sc,
 	xfs_btnum_t			which)
 {
@@ -437,12 +437,12 @@ xfs_scrub_iallocbt(
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
 	cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
-	error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo,
+	error = xchk_btree(sc, cur, xchk_iallocbt_rec, &oinfo,
 			&inode_blocks);
 	if (error)
 		return error;
 
-	xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which);
+	xchk_iallocbt_xref_rmap_btreeblks(sc, which);
 
 	/*
 	 * If we're scrubbing the inode btree, inode_blocks is the number of
@@ -452,28 +452,28 @@ xfs_scrub_iallocbt(
 	 * to inode chunks with free inodes.
 	 */
 	if (which == XFS_BTNUM_INO)
-		xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
+		xchk_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
 
 	return error;
 }
 
 int
-xfs_scrub_inobt(
+xchk_inobt(
 	struct xfs_scrub_context	*sc)
 {
-	return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO);
+	return xchk_iallocbt(sc, XFS_BTNUM_INO);
 }
 
 int
-xfs_scrub_finobt(
+xchk_finobt(
 	struct xfs_scrub_context	*sc)
 {
-	return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
+	return xchk_iallocbt(sc, XFS_BTNUM_FINO);
 }
 
 /* See if an inode btree has (or doesn't have) an inode chunk record. */
 static inline void
-xfs_scrub_xref_inode_check(
+xchk_xref_inode_check(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len,
@@ -483,33 +483,33 @@ xfs_scrub_xref_inode_check(
 	bool				has_inodes;
 	int				error;
 
-	if (!(*icur) || xfs_scrub_skip_xref(sc->sm))
+	if (!(*icur) || xchk_skip_xref(sc->sm))
 		return;
 
 	error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes);
-	if (!xfs_scrub_should_check_xref(sc, &error, icur))
+	if (!xchk_should_check_xref(sc, &error, icur))
 		return;
 	if (has_inodes != should_have_inodes)
-		xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0);
+		xchk_btree_xref_set_corrupt(sc, *icur, 0);
 }
 
 /* xref check that the extent is not covered by inodes */
 void
-xfs_scrub_xref_is_not_inode_chunk(
+xchk_xref_is_not_inode_chunk(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
-	xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
-	xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
+	xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
+	xchk_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
 }
 
 /* xref check that the extent is covered by inodes */
 void
-xfs_scrub_xref_is_inode_chunk(
+xchk_xref_is_inode_chunk(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
-	xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
+	xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
 }
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 7a6208505980..d85fbec39e52 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -37,7 +37,7 @@
  * the goal.
  */
 int
-xfs_scrub_setup_inode(
+xchk_setup_inode(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
@@ -47,13 +47,13 @@ xfs_scrub_setup_inode(
 	 * Try to get the inode.  If the verifiers fail, we try again
 	 * in raw mode.
 	 */
-	error = xfs_scrub_get_inode(sc, ip);
+	error = xchk_get_inode(sc, ip);
 	switch (error) {
 	case 0:
 		break;
 	case -EFSCORRUPTED:
 	case -EFSBADCRC:
-		return xfs_scrub_trans_alloc(sc, 0);
+		return xchk_trans_alloc(sc, 0);
 	default:
 		return error;
 	}
@@ -61,7 +61,7 @@ xfs_scrub_setup_inode(
 	/* Got the inode, lock it and we're ready to go. */
 	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 	xfs_ilock(sc->ip, sc->ilock_flags);
-	error = xfs_scrub_trans_alloc(sc, 0);
+	error = xchk_trans_alloc(sc, 0);
 	if (error)
 		goto out;
 	sc->ilock_flags |= XFS_ILOCK_EXCL;
@@ -76,7 +76,7 @@ out:
 
 /* Validate di_extsize hint. */
 STATIC void
-xfs_scrub_inode_extsize(
+xchk_inode_extsize(
 	struct xfs_scrub_context	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
@@ -88,7 +88,7 @@ xfs_scrub_inode_extsize(
 	fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
 			mode, flags);
 	if (fa)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 }
 
 /*
@@ -98,7 +98,7 @@ xfs_scrub_inode_extsize(
  * These functions must be kept in sync with each other.
  */
 STATIC void
-xfs_scrub_inode_cowextsize(
+xchk_inode_cowextsize(
 	struct xfs_scrub_context	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
@@ -112,12 +112,12 @@ xfs_scrub_inode_cowextsize(
 			be32_to_cpu(dip->di_cowextsize), mode, flags,
 			flags2);
 	if (fa)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 }
 
 /* Make sure the di_flags make sense for the inode. */
 STATIC void
-xfs_scrub_inode_flags(
+xchk_inode_flags(
 	struct xfs_scrub_context	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
@@ -157,12 +157,12 @@ xfs_scrub_inode_flags(
 
 	return;
 bad:
-	xfs_scrub_ino_set_corrupt(sc, ino);
+	xchk_ino_set_corrupt(sc, ino);
 }
 
 /* Make sure the di_flags2 make sense for the inode. */
 STATIC void
-xfs_scrub_inode_flags2(
+xchk_inode_flags2(
 	struct xfs_scrub_context	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
@@ -200,12 +200,12 @@ xfs_scrub_inode_flags2(
 
 	return;
 bad:
-	xfs_scrub_ino_set_corrupt(sc, ino);
+	xchk_ino_set_corrupt(sc, ino);
 }
 
 /* Scrub all the ondisk inode fields. */
 STATIC void
-xfs_scrub_dinode(
+xchk_dinode(
 	struct xfs_scrub_context	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino)
@@ -237,7 +237,7 @@ xfs_scrub_dinode(
 		/* mode is recognized */
 		break;
 	default:
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 		break;
 	}
 
@@ -248,22 +248,22 @@ xfs_scrub_dinode(
 		 * We autoconvert v1 inodes into v2 inodes on writeout,
 		 * so just mark this inode for preening.
 		 */
-		xfs_scrub_ino_set_preen(sc, ino);
+		xchk_ino_set_preen(sc, ino);
 		break;
 	case 2:
 	case 3:
 		if (dip->di_onlink != 0)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 
 		if (dip->di_mode == 0 && sc->ip)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 
 		if (dip->di_projid_hi != 0 &&
 		    !xfs_sb_version_hasprojid32bit(&mp->m_sb))
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	default:
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 		return;
 	}
 
@@ -273,40 +273,40 @@ xfs_scrub_dinode(
 	 */
 	if (dip->di_uid == cpu_to_be32(-1U) ||
 	    dip->di_gid == cpu_to_be32(-1U))
-		xfs_scrub_ino_set_warning(sc, ino);
+		xchk_ino_set_warning(sc, ino);
 
 	/* di_format */
 	switch (dip->di_format) {
 	case XFS_DINODE_FMT_DEV:
 		if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
 		    !S_ISFIFO(mode) && !S_ISSOCK(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_LOCAL:
 		if (!S_ISDIR(mode) && !S_ISLNK(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
 		if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		if (!S_ISREG(mode) && !S_ISDIR(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_UUID:
 	default:
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 		break;
 	}
 
 	/* di_[amc]time.nsec */
 	if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 	if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 	if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 
 	/*
 	 * di_size.  xfs_dinode_verify checks for things that screw up
@@ -315,19 +315,19 @@ xfs_scrub_dinode(
 	 */
 	isize = be64_to_cpu(dip->di_size);
 	if (isize & (1ULL << 63))
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 
 	/* Devices, fifos, and sockets must have zero size */
 	if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 
 	/* Directories can't be larger than the data section size (32G) */
 	if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 
 	/* Symlinks can't be larger than SYMLINK_MAXLEN */
 	if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 
 	/*
 	 * Warn if the running kernel can't handle the kinds of offsets
@@ -336,7 +336,7 @@ xfs_scrub_dinode(
 	 * overly large offsets, flag the inode for admin review.
 	 */
 	if (isize >= mp->m_super->s_maxbytes)
-		xfs_scrub_ino_set_warning(sc, ino);
+		xchk_ino_set_warning(sc, ino);
 
 	/* di_nblocks */
 	if (flags2 & XFS_DIFLAG2_REFLINK) {
@@ -351,15 +351,15 @@ xfs_scrub_dinode(
 		 */
 		if (be64_to_cpu(dip->di_nblocks) >=
 		    mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 	} else {
 		if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 	}
 
-	xfs_scrub_inode_flags(sc, dip, ino, mode, flags);
+	xchk_inode_flags(sc, dip, ino, mode, flags);
 
-	xfs_scrub_inode_extsize(sc, dip, ino, mode, flags);
+	xchk_inode_extsize(sc, dip, ino, mode, flags);
 
 	/* di_nextents */
 	nextents = be32_to_cpu(dip->di_nextents);
@@ -367,31 +367,31 @@ xfs_scrub_dinode(
 	switch (dip->di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		if (nextents > fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		if (nextents <= fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	default:
 		if (nextents != 0)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	}
 
 	/* di_forkoff */
 	if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 	if (dip->di_anextents != 0 && dip->di_forkoff == 0)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 	if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 
 	/* di_aformat */
 	if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
 	    dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
 	    dip->di_aformat != XFS_DINODE_FMT_BTREE)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 
 	/* di_anextents */
 	nextents = be16_to_cpu(dip->di_anextents);
@@ -399,22 +399,22 @@ xfs_scrub_dinode(
 	switch (dip->di_aformat) {
 	case XFS_DINODE_FMT_EXTENTS:
 		if (nextents > fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		if (nextents <= fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 		break;
 	default:
 		if (nextents != 0)
-			xfs_scrub_ino_set_corrupt(sc, ino);
+			xchk_ino_set_corrupt(sc, ino);
 	}
 
 	if (dip->di_version >= 3) {
 		if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
-			xfs_scrub_ino_set_corrupt(sc, ino);
-		xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2);
-		xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags,
+			xchk_ino_set_corrupt(sc, ino);
+		xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
+		xchk_inode_cowextsize(sc, dip, ino, mode, flags,
 				flags2);
 	}
 }
@@ -425,7 +425,7 @@ xfs_scrub_dinode(
  * IGET_UNTRUSTED, which checks the inobt for us.
  */
 static void
-xfs_scrub_inode_xref_finobt(
+xchk_inode_xref_finobt(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			ino)
 {
@@ -434,7 +434,7 @@ xfs_scrub_inode_xref_finobt(
 	int				has_record;
 	int				error;
 
-	if (!sc->sa.fino_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	agino = XFS_INO_TO_AGINO(sc->mp, ino);
@@ -445,12 +445,12 @@ xfs_scrub_inode_xref_finobt(
 	 */
 	error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
 			&has_record);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
 	    !has_record)
 		return;
 
 	error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
 	    !has_record)
 		return;
 
@@ -463,12 +463,12 @@ xfs_scrub_inode_xref_finobt(
 		return;
 
 	if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
 }
 
 /* Cross reference the inode fields with the forks. */
 STATIC void
-xfs_scrub_inode_xref_bmap(
+xchk_inode_xref_bmap(
 	struct xfs_scrub_context	*sc,
 	struct xfs_dinode		*dip)
 {
@@ -477,32 +477,32 @@ xfs_scrub_inode_xref_bmap(
 	xfs_filblks_t			acount;
 	int				error;
 
-	if (xfs_scrub_skip_xref(sc->sm))
+	if (xchk_skip_xref(sc->sm))
 		return;
 
 	/* Walk all the extents to check nextents/naextents/nblocks. */
 	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
 			&nextents, &count);
-	if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+	if (!xchk_should_check_xref(sc, &error, NULL))
 		return;
 	if (nextents < be32_to_cpu(dip->di_nextents))
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
+		xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 
 	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
 			&nextents, &acount);
-	if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+	if (!xchk_should_check_xref(sc, &error, NULL))
 		return;
 	if (nextents != be16_to_cpu(dip->di_anextents))
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
+		xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 
 	/* Check nblocks against the inode. */
 	if (count + acount != be64_to_cpu(dip->di_nblocks))
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
+		xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 }
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_inode_xref(
+xchk_inode_xref(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			ino,
 	struct xfs_dinode		*dip)
@@ -518,18 +518,18 @@ xfs_scrub_inode_xref(
 	agno = XFS_INO_TO_AGNO(sc->mp, ino);
 	agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
 
-	error = xfs_scrub_ag_init(sc, agno, &sc->sa);
-	if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+	error = xchk_ag_init(sc, agno, &sc->sa);
+	if (!xchk_xref_process_error(sc, agno, agbno, &error))
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, 1);
-	xfs_scrub_inode_xref_finobt(sc, ino);
+	xchk_xref_is_used_space(sc, agbno, 1);
+	xchk_inode_xref_finobt(sc, ino);
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
-	xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
-	xfs_scrub_xref_is_not_shared(sc, agbno, 1);
-	xfs_scrub_inode_xref_bmap(sc, dip);
+	xchk_xref_is_owned_by(sc, agbno, 1, &oinfo);
+	xchk_xref_is_not_shared(sc, agbno, 1);
+	xchk_inode_xref_bmap(sc, dip);
 
-	xfs_scrub_ag_free(sc, &sc->sa);
+	xchk_ag_free(sc, &sc->sa);
 }
 
 /*
@@ -539,7 +539,7 @@ xfs_scrub_inode_xref(
  * reflink filesystem.
  */
 static void
-xfs_scrub_inode_check_reflink_iflag(
+xchk_inode_check_reflink_iflag(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			ino)
 {
@@ -552,18 +552,18 @@ xfs_scrub_inode_check_reflink_iflag(
 
 	error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
 			&has_shared);
-	if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
+	if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
 			XFS_INO_TO_AGBNO(mp, ino), &error))
 		return;
 	if (xfs_is_reflink_inode(sc->ip) && !has_shared)
-		xfs_scrub_ino_set_preen(sc, ino);
+		xchk_ino_set_preen(sc, ino);
 	else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
-		xfs_scrub_ino_set_corrupt(sc, ino);
+		xchk_ino_set_corrupt(sc, ino);
 }
 
 /* Scrub an inode. */
 int
-xfs_scrub_inode(
+xchk_inode(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_dinode		di;
@@ -575,13 +575,13 @@ xfs_scrub_inode(
 	 * and a NULL inode, so flag the corruption error and return.
 	 */
 	if (!sc->ip) {
-		xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino);
+		xchk_ino_set_corrupt(sc, sc->sm->sm_ino);
 		return 0;
 	}
 
 	/* Scrub the inode core. */
 	xfs_inode_to_disk(sc->ip, &di, 0);
-	xfs_scrub_dinode(sc, &di, sc->ip->i_ino);
+	xchk_dinode(sc, &di, sc->ip->i_ino);
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
 
@@ -591,9 +591,9 @@ xfs_scrub_inode(
 	 * we scrubbed the dinode.
 	 */
 	if (S_ISREG(VFS_I(sc->ip)->i_mode))
-		xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino);
+		xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino);
 
-	xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di);
+	xchk_inode_xref(sc, sc->ip->i_ino, &di);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index e2bda58c32f0..0a78d8411f23 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -27,18 +27,18 @@
 
 /* Set us up to scrub parents. */
 int
-xfs_scrub_setup_parent(
+xchk_setup_parent(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	return xfs_scrub_setup_inode_contents(sc, ip, 0);
+	return xchk_setup_inode_contents(sc, ip, 0);
 }
 
 /* Parent pointers */
 
 /* Look for an entry in a parent pointing to this inode. */
 
-struct xfs_scrub_parent_ctx {
+struct xchk_parent_ctx {
 	struct dir_context		dc;
 	xfs_ino_t			ino;
 	xfs_nlink_t			nlink;
@@ -46,7 +46,7 @@ struct xfs_scrub_parent_ctx {
 
 /* Look for a single entry in a directory pointing to an inode. */
 STATIC int
-xfs_scrub_parent_actor(
+xchk_parent_actor(
 	struct dir_context		*dc,
 	const char			*name,
 	int				namelen,
@@ -54,9 +54,9 @@ xfs_scrub_parent_actor(
 	u64				ino,
 	unsigned			type)
 {
-	struct xfs_scrub_parent_ctx	*spc;
+	struct xchk_parent_ctx		*spc;
 
-	spc = container_of(dc, struct xfs_scrub_parent_ctx, dc);
+	spc = container_of(dc, struct xchk_parent_ctx, dc);
 	if (spc->ino == ino)
 		spc->nlink++;
 	return 0;
@@ -64,13 +64,13 @@ xfs_scrub_parent_actor(
 
 /* Count the number of dentries in the parent dir that point to this inode. */
 STATIC int
-xfs_scrub_parent_count_parent_dentries(
+xchk_parent_count_parent_dentries(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*parent,
 	xfs_nlink_t			*nlink)
 {
-	struct xfs_scrub_parent_ctx	spc = {
-		.dc.actor = xfs_scrub_parent_actor,
+	struct xchk_parent_ctx		spc = {
+		.dc.actor = xchk_parent_actor,
 		.dc.pos = 0,
 		.ino = sc->ip->i_ino,
 		.nlink = 0,
@@ -120,7 +120,7 @@ out:
  * entry pointing back to the inode being scrubbed.
  */
 STATIC int
-xfs_scrub_parent_validate(
+xchk_parent_validate(
 	struct xfs_scrub_context	*sc,
 	xfs_ino_t			dnum,
 	bool				*try_again)
@@ -138,7 +138,7 @@ xfs_scrub_parent_validate(
 
 	/* '..' must not point to ourselves. */
 	if (sc->ip->i_ino == dnum) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out;
 	}
 
@@ -165,13 +165,13 @@ xfs_scrub_parent_validate(
 	error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
 	if (error == -EINVAL) {
 		error = -EFSCORRUPTED;
-		xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
+		xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
 		goto out;
 	}
-	if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
+	if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out;
 	if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out_rele;
 	}
 
@@ -183,12 +183,12 @@ xfs_scrub_parent_validate(
 	 * the child inodes.
 	 */
 	if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
-		error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
-		if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
+		error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
+		if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
 				&error))
 			goto out_unlock;
 		if (nlink != expected_nlink)
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out_unlock;
 	}
 
@@ -200,18 +200,18 @@ xfs_scrub_parent_validate(
 	 */
 	xfs_iunlock(sc->ip, sc->ilock_flags);
 	sc->ilock_flags = 0;
-	error = xfs_scrub_ilock_inverted(dp, XFS_IOLOCK_SHARED);
+	error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED);
 	if (error)
 		goto out_rele;
 
 	/* Go looking for our dentry. */
-	error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
-	if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
+	error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
+	if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out_unlock;
 
 	/* Drop the parent lock, relock this inode. */
 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
-	error = xfs_scrub_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL);
+	error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL);
 	if (error)
 		goto out_rele;
 	sc->ilock_flags = XFS_IOLOCK_EXCL;
@@ -225,7 +225,7 @@ xfs_scrub_parent_validate(
 
 	/* Look up '..' to see if the inode changed. */
 	error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out_rele;
 
 	/* Drat, parent changed.  Try again! */
@@ -241,7 +241,7 @@ xfs_scrub_parent_validate(
 	 * for us in the parent.
 	 */
 	if (nlink != expected_nlink)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 	return error;
 
 out_unlock:
@@ -254,7 +254,7 @@ out:
 
 /* Scrub a parent pointer. */
 int
-xfs_scrub_parent(
+xchk_parent(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
@@ -272,7 +272,7 @@ xfs_scrub_parent(
 
 	/* We're not a special inode, are we? */
 	if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out;
 	}
 
@@ -288,10 +288,10 @@ xfs_scrub_parent(
 
 	/* Look up '..' */
 	error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out;
 	if (!xfs_verify_dir_ino(mp, dnum)) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out;
 	}
 
@@ -299,12 +299,12 @@ xfs_scrub_parent(
 	if (sc->ip == mp->m_rootip) {
 		if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
 		    sc->ip->i_ino != dnum)
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out;
 	}
 
 	do {
-		error = xfs_scrub_parent_validate(sc, dnum, &try_again);
+		error = xchk_parent_validate(sc, dnum, &try_again);
 		if (error)
 			goto out;
 	} while (try_again && ++tries < 20);
@@ -314,7 +314,7 @@ xfs_scrub_parent(
 	 * incomplete.  Userspace can decide if it wants to try again.
 	 */
 	if (try_again && tries == 20)
-		xfs_scrub_set_incomplete(sc);
+		xchk_set_incomplete(sc);
 out:
 	/*
 	 * If we failed to lock the parent inode even after a retry, just mark
@@ -322,7 +322,7 @@ out:
 	 */
 	if (sc->try_harder && error == -EDEADLOCK) {
 		error = 0;
-		xfs_scrub_set_incomplete(sc);
+		xchk_set_incomplete(sc);
 	}
 	return error;
 }
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 6ff906aa0a3b..d1b52dd7efcd 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -30,7 +30,7 @@
 
 /* Convert a scrub type code to a DQ flag, or return 0 if error. */
 static inline uint
-xfs_scrub_quota_to_dqtype(
+xchk_quota_to_dqtype(
 	struct xfs_scrub_context	*sc)
 {
 	switch (sc->sm->sm_type) {
@@ -47,7 +47,7 @@ xfs_scrub_quota_to_dqtype(
 
 /* Set us up to scrub a quota. */
 int
-xfs_scrub_setup_quota(
+xchk_setup_quota(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
@@ -57,14 +57,14 @@ xfs_scrub_setup_quota(
 	if (!XFS_IS_QUOTA_RUNNING(sc->mp) || !XFS_IS_QUOTA_ON(sc->mp))
 		return -ENOENT;
 
-	dqtype = xfs_scrub_quota_to_dqtype(sc);
+	dqtype = xchk_quota_to_dqtype(sc);
 	if (dqtype == 0)
 		return -EINVAL;
 	sc->has_quotaofflock = true;
 	mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
 	if (!xfs_this_quota_on(sc->mp, dqtype))
 		return -ENOENT;
-	error = xfs_scrub_setup_fs(sc, ip);
+	error = xchk_setup_fs(sc, ip);
 	if (error)
 		return error;
 	sc->ip = xfs_quota_inode(sc->mp, dqtype);
@@ -75,19 +75,19 @@ xfs_scrub_setup_quota(
 
 /* Quotas. */
 
-struct xfs_scrub_quota_info {
+struct xchk_quota_info {
 	struct xfs_scrub_context	*sc;
 	xfs_dqid_t			last_id;
 };
 
 /* Scrub the fields in an individual quota item. */
 STATIC int
-xfs_scrub_quota_item(
+xchk_quota_item(
 	struct xfs_dquot		*dq,
 	uint				dqtype,
 	void				*priv)
 {
-	struct xfs_scrub_quota_info	*sqi = priv;
+	struct xchk_quota_info		*sqi = priv;
 	struct xfs_scrub_context	*sc = sqi->sc;
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_disk_dquot		*d = &dq->q_core;
@@ -111,16 +111,16 @@ xfs_scrub_quota_item(
 	 */
 	offset = id / qi->qi_dqperchunk;
 	if (id && id <= sqi->last_id)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
 	sqi->last_id = id;
 
 	/* Did we get the dquot type we wanted? */
 	if (dqtype != (d->d_flags & XFS_DQ_ALLTYPES))
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
 	if (d->d_pad0 != cpu_to_be32(0) || d->d_pad != cpu_to_be16(0))
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
 	/* Check the limits. */
 	bhard = be64_to_cpu(d->d_blk_hardlimit);
@@ -140,19 +140,19 @@ xfs_scrub_quota_item(
 	 * the hard limit.
 	 */
 	if (bhard > mp->m_sb.sb_dblocks)
-		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
 	if (bsoft > bhard)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
 	if (ihard > mp->m_maxicount)
-		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
 	if (isoft > ihard)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
 	if (rhard > mp->m_sb.sb_rblocks)
-		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
 	if (rsoft > rhard)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
 	/* Check the resource counts. */
 	bcount = be64_to_cpu(d->d_bcount);
@@ -167,15 +167,15 @@ xfs_scrub_quota_item(
 	 */
 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 		if (mp->m_sb.sb_dblocks < bcount)
-			xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK,
+			xchk_fblock_set_warning(sc, XFS_DATA_FORK,
 					offset);
 	} else {
 		if (mp->m_sb.sb_dblocks < bcount)
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
 					offset);
 	}
 	if (icount > fs_icount || rcount > mp->m_sb.sb_rblocks)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
 	/*
 	 * We can violate the hard limits if the admin suddenly sets a
@@ -183,18 +183,18 @@ xfs_scrub_quota_item(
 	 * admin review.
 	 */
 	if (id != 0 && bhard != 0 && bcount > bhard)
-		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
 	if (id != 0 && ihard != 0 && icount > ihard)
-		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
 	if (id != 0 && rhard != 0 && rcount > rhard)
-		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+		xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
 
 	return 0;
 }
 
 /* Check the quota's data fork. */
 STATIC int
-xfs_scrub_quota_data_fork(
+xchk_quota_data_fork(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_bmbt_irec		irec = { 0 };
@@ -205,7 +205,7 @@ xfs_scrub_quota_data_fork(
 	int				error = 0;
 
 	/* Invoke the fork scrubber. */
-	error = xfs_scrub_metadata_inode_forks(sc);
+	error = xchk_metadata_inode_forks(sc);
 	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 		return error;
 
@@ -213,7 +213,7 @@ xfs_scrub_quota_data_fork(
 	max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
 	ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
 	for_each_xfs_iext(ifp, &icur, &irec) {
-		if (xfs_scrub_should_terminate(sc, &error))
+		if (xchk_should_terminate(sc, &error))
 			break;
 		/*
 		 * delalloc extents or blocks mapped above the highest
@@ -222,7 +222,7 @@ xfs_scrub_quota_data_fork(
 		if (isnullstartblock(irec.br_startblock) ||
 		    irec.br_startoff > max_dqid_off ||
 		    irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) {
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
 					irec.br_startoff);
 			break;
 		}
@@ -233,19 +233,19 @@ xfs_scrub_quota_data_fork(
 
 /* Scrub all of a quota type's items. */
 int
-xfs_scrub_quota(
+xchk_quota(
 	struct xfs_scrub_context	*sc)
 {
-	struct xfs_scrub_quota_info	sqi;
+	struct xchk_quota_info		sqi;
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_quotainfo		*qi = mp->m_quotainfo;
 	uint				dqtype;
 	int				error = 0;
 
-	dqtype = xfs_scrub_quota_to_dqtype(sc);
+	dqtype = xchk_quota_to_dqtype(sc);
 
 	/* Look for problem extents. */
-	error = xfs_scrub_quota_data_fork(sc);
+	error = xchk_quota_data_fork(sc);
 	if (error)
 		goto out;
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
@@ -260,10 +260,10 @@ xfs_scrub_quota(
 	sc->ilock_flags = 0;
 	sqi.sc = sc;
 	sqi.last_id = 0;
-	error = xfs_qm_dqiterate(mp, dqtype, xfs_scrub_quota_item, &sqi);
+	error = xfs_qm_dqiterate(mp, dqtype, xchk_quota_item, &sqi);
 	sc->ilock_flags = XFS_ILOCK_EXCL;
 	xfs_ilock(sc->ip, sc->ilock_flags);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK,
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK,
 			sqi.last_id * qi->qi_dqperchunk, &error))
 		goto out;
 
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 607a9faa8ecc..274febc49b23 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -28,11 +28,11 @@
  * Set us up to scrub reference count btrees.
  */
 int
-xfs_scrub_setup_ag_refcountbt(
+xchk_setup_ag_refcountbt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	return xfs_scrub_setup_ag_btree(sc, ip, false);
+	return xchk_setup_ag_btree(sc, ip, false);
 }
 
 /* Reference count btree scrubber. */
@@ -73,12 +73,12 @@ xfs_scrub_setup_ag_refcountbt(
  * If the refcount is correct, all the check conditions in the algorithm
  * should always hold true.  If not, the refcount is incorrect.
  */
-struct xfs_scrub_refcnt_frag {
+struct xchk_refcnt_frag {
 	struct list_head		list;
 	struct xfs_rmap_irec		rm;
 };
 
-struct xfs_scrub_refcnt_check {
+struct xchk_refcnt_check {
 	struct xfs_scrub_context	*sc;
 	struct list_head		fragments;
 
@@ -99,18 +99,18 @@ struct xfs_scrub_refcnt_check {
  * fragments as the refcountbt says we should have.
  */
 STATIC int
-xfs_scrub_refcountbt_rmap_check(
+xchk_refcountbt_rmap_check(
 	struct xfs_btree_cur		*cur,
 	struct xfs_rmap_irec		*rec,
 	void				*priv)
 {
-	struct xfs_scrub_refcnt_check	*refchk = priv;
-	struct xfs_scrub_refcnt_frag	*frag;
+	struct xchk_refcnt_check	*refchk = priv;
+	struct xchk_refcnt_frag		*frag;
 	xfs_agblock_t			rm_last;
 	xfs_agblock_t			rc_last;
 	int				error = 0;
 
-	if (xfs_scrub_should_terminate(refchk->sc, &error))
+	if (xchk_should_terminate(refchk->sc, &error))
 		return error;
 
 	rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
@@ -118,7 +118,7 @@ xfs_scrub_refcountbt_rmap_check(
 
 	/* Confirm that a single-owner refc extent is a CoW stage. */
 	if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
-		xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0);
+		xchk_btree_xref_set_corrupt(refchk->sc, cur, 0);
 		return 0;
 	}
 
@@ -135,7 +135,7 @@ xfs_scrub_refcountbt_rmap_check(
 		 * is healthy each rmap_irec we see will be in agbno order
 		 * so we don't need insertion sort here.
 		 */
-		frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
+		frag = kmem_alloc(sizeof(struct xchk_refcnt_frag),
 				KM_MAYFAIL);
 		if (!frag)
 			return -ENOMEM;
@@ -154,12 +154,12 @@ xfs_scrub_refcountbt_rmap_check(
  * we have a refcountbt error.
  */
 STATIC void
-xfs_scrub_refcountbt_process_rmap_fragments(
-	struct xfs_scrub_refcnt_check	*refchk)
+xchk_refcountbt_process_rmap_fragments(
+	struct xchk_refcnt_check	*refchk)
 {
 	struct list_head		worklist;
-	struct xfs_scrub_refcnt_frag	*frag;
-	struct xfs_scrub_refcnt_frag	*n;
+	struct xchk_refcnt_frag		*frag;
+	struct xchk_refcnt_frag		*n;
 	xfs_agblock_t			bno;
 	xfs_agblock_t			rbno;
 	xfs_agblock_t			next_rbno;
@@ -277,13 +277,13 @@ done:
 
 /* Use the rmap entries covering this extent to verify the refcount. */
 STATIC void
-xfs_scrub_refcountbt_xref_rmap(
+xchk_refcountbt_xref_rmap(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	xfs_nlink_t			refcount)
 {
-	struct xfs_scrub_refcnt_check	refchk = {
+	struct xchk_refcnt_check	refchk = {
 		.sc = sc,
 		.bno = bno,
 		.len = len,
@@ -292,11 +292,11 @@ xfs_scrub_refcountbt_xref_rmap(
 	};
 	struct xfs_rmap_irec		low;
 	struct xfs_rmap_irec		high;
-	struct xfs_scrub_refcnt_frag	*frag;
-	struct xfs_scrub_refcnt_frag	*n;
+	struct xchk_refcnt_frag		*frag;
+	struct xchk_refcnt_frag		*n;
 	int				error;
 
-	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	/* Cross-reference with the rmapbt to confirm the refcount. */
@@ -307,13 +307,13 @@ xfs_scrub_refcountbt_xref_rmap(
 
 	INIT_LIST_HEAD(&refchk.fragments);
 	error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
-			&xfs_scrub_refcountbt_rmap_check, &refchk);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+			&xchk_refcountbt_rmap_check, &refchk);
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		goto out_free;
 
-	xfs_scrub_refcountbt_process_rmap_fragments(&refchk);
+	xchk_refcountbt_process_rmap_fragments(&refchk);
 	if (refcount != refchk.seen)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 
 out_free:
 	list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
@@ -324,7 +324,7 @@ out_free:
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_refcountbt_xref(
+xchk_refcountbt_xref(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len,
@@ -333,15 +333,15 @@ xfs_scrub_refcountbt_xref(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, len);
-	xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
-	xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount);
+	xchk_xref_is_used_space(sc, agbno, len);
+	xchk_xref_is_not_inode_chunk(sc, agbno, len);
+	xchk_refcountbt_xref_rmap(sc, agbno, len, refcount);
 }
 
 /* Scrub a refcountbt record. */
 STATIC int
-xfs_scrub_refcountbt_rec(
-	struct xfs_scrub_btree		*bs,
+xchk_refcountbt_rec(
+	struct xchk_btree		*bs,
 	union xfs_btree_rec		*rec)
 {
 	struct xfs_mount		*mp = bs->cur->bc_mp;
@@ -360,7 +360,7 @@ xfs_scrub_refcountbt_rec(
 	/* Only CoW records can have refcount == 1. */
 	has_cowflag = (bno & XFS_REFC_COW_START);
 	if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 	if (has_cowflag)
 		(*cow_blocks) += len;
 
@@ -369,19 +369,19 @@ xfs_scrub_refcountbt_rec(
 	if (bno + len <= bno ||
 	    !xfs_verify_agbno(mp, agno, bno) ||
 	    !xfs_verify_agbno(mp, agno, bno + len - 1))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	if (refcount == 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
-	xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount);
+	xchk_refcountbt_xref(bs->sc, bno, len, refcount);
 
 	return error;
 }
 
 /* Make sure we have as many refc blocks as the rmap says. */
 STATIC void
-xfs_scrub_refcount_xref_rmap(
+xchk_refcount_xref_rmap(
 	struct xfs_scrub_context	*sc,
 	struct xfs_owner_info		*oinfo,
 	xfs_filblks_t			cow_blocks)
@@ -390,33 +390,33 @@ xfs_scrub_refcount_xref_rmap(
 	xfs_filblks_t			blocks;
 	int				error;
 
-	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	/* Check that we saw as many refcbt blocks as the rmap knows about. */
 	error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
-	if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
+	if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
 		return;
-	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
 			&blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
 	if (blocks != refcbt_blocks)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 
 	/* Check that we saw as many cow blocks as the rmap knows about. */
 	xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
-	error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
 			&blocks);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
 	if (blocks != cow_blocks)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 }
 
 /* Scrub the refcount btree for some AG. */
 int
-xfs_scrub_refcountbt(
+xchk_refcountbt(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_owner_info		oinfo;
@@ -424,19 +424,19 @@ xfs_scrub_refcountbt(
 	int				error;
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
-	error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
+	error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec,
 			&oinfo, &cow_blocks);
 	if (error)
 		return error;
 
-	xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
+	xchk_refcount_xref_rmap(sc, &oinfo, cow_blocks);
 
 	return 0;
 }
 
 /* xref check that a cow staging extent is marked in the refcountbt. */
 void
-xfs_scrub_xref_is_cow_staging(
+xchk_xref_is_cow_staging(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
@@ -446,35 +446,35 @@ xfs_scrub_xref_is_cow_staging(
 	int				has_refcount;
 	int				error;
 
-	if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	/* Find the CoW staging extent. */
 	error = xfs_refcount_lookup_le(sc->sa.refc_cur,
 			agbno + XFS_REFC_COW_START, &has_refcount);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 		return;
 	if (!has_refcount) {
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 		return;
 	}
 
 	error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 		return;
 	if (!has_refcount) {
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 		return;
 	}
 
 	/* CoW flag must be set, refcount must be 1. */
 	has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
 	if (!has_cowflag || rc.rc_refcount != 1)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 
 	/* Must be at least as long as what was passed in */
 	if (rc.rc_blockcount < len)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 }
 
 /*
@@ -482,7 +482,7 @@ xfs_scrub_xref_is_cow_staging(
  * can have multiple owners.
  */
 void
-xfs_scrub_xref_is_not_shared(
+xchk_xref_is_not_shared(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
@@ -490,12 +490,12 @@ xfs_scrub_xref_is_not_shared(
 	bool				shared;
 	int				error;
 
-	if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 		return;
 	if (shared)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 }
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index ea39e2bdc96a..5eccd89c64a8 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -50,7 +50,7 @@ xfs_repair_attempt(
 
 	trace_xfs_repair_attempt(ip, sc->sm, error);
 
-	xfs_scrub_ag_btcur_free(&sc->sa);
+	xchk_ag_btcur_free(&sc->sa);
 
 	/* Repair whatever's broken. */
 	ASSERT(sc->ops->repair);
@@ -110,7 +110,7 @@ xfs_repair_probe(
 {
 	int				error = 0;
 
-	if (xfs_scrub_should_terminate(sc, &error))
+	if (xchk_should_terminate(sc, &error))
 		return error;
 
 	return 0;
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index c6d763236ba7..4b75fc2f31f3 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -29,18 +29,18 @@
  * Set us up to scrub reverse mapping btrees.
  */
 int
-xfs_scrub_setup_ag_rmapbt(
+xchk_setup_ag_rmapbt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	return xfs_scrub_setup_ag_btree(sc, ip, false);
+	return xchk_setup_ag_btree(sc, ip, false);
 }
 
 /* Reverse-mapping scrubber. */
 
 /* Cross-reference a rmap against the refcount btree. */
 STATIC void
-xfs_scrub_rmapbt_xref_refc(
+xchk_rmapbt_xref_refc(
 	struct xfs_scrub_context	*sc,
 	struct xfs_rmap_irec		*irec)
 {
@@ -52,7 +52,7 @@ xfs_scrub_rmapbt_xref_refc(
 	bool				is_unwritten;
 	int				error;
 
-	if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
@@ -63,15 +63,15 @@ xfs_scrub_rmapbt_xref_refc(
 	/* If this is shared, must be a data fork extent. */
 	error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock,
 			irec->rm_blockcount, &fbno, &flen, false);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
 		return;
 	if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten))
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
 }
 
 /* Cross-reference with the other btrees. */
 STATIC void
-xfs_scrub_rmapbt_xref(
+xchk_rmapbt_xref(
 	struct xfs_scrub_context	*sc,
 	struct xfs_rmap_irec		*irec)
 {
@@ -81,22 +81,22 @@ xfs_scrub_rmapbt_xref(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
 
-	xfs_scrub_xref_is_used_space(sc, agbno, len);
+	xchk_xref_is_used_space(sc, agbno, len);
 	if (irec->rm_owner == XFS_RMAP_OWN_INODES)
-		xfs_scrub_xref_is_inode_chunk(sc, agbno, len);
+		xchk_xref_is_inode_chunk(sc, agbno, len);
 	else
-		xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+		xchk_xref_is_not_inode_chunk(sc, agbno, len);
 	if (irec->rm_owner == XFS_RMAP_OWN_COW)
-		xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock,
+		xchk_xref_is_cow_staging(sc, irec->rm_startblock,
 				irec->rm_blockcount);
 	else
-		xfs_scrub_rmapbt_xref_refc(sc, irec);
+		xchk_rmapbt_xref_refc(sc, irec);
 }
 
 /* Scrub an rmapbt record. */
 STATIC int
-xfs_scrub_rmapbt_rec(
-	struct xfs_scrub_btree		*bs,
+xchk_rmapbt_rec(
+	struct xchk_btree		*bs,
 	union xfs_btree_rec		*rec)
 {
 	struct xfs_mount		*mp = bs->cur->bc_mp;
@@ -109,12 +109,12 @@ xfs_scrub_rmapbt_rec(
 	int				error;
 
 	error = xfs_rmap_btrec_to_irec(rec, &irec);
-	if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
+	if (!xchk_btree_process_error(bs->sc, bs->cur, 0, &error))
 		goto out;
 
 	/* Check extent. */
 	if (irec.rm_startblock + irec.rm_blockcount <= irec.rm_startblock)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	if (irec.rm_owner == XFS_RMAP_OWN_FS) {
 		/*
@@ -124,7 +124,7 @@ xfs_scrub_rmapbt_rec(
 		 */
 		if (irec.rm_startblock != 0 ||
 		    irec.rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
-			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 	} else {
 		/*
 		 * Otherwise we must point somewhere past the static metadata
@@ -133,7 +133,7 @@ xfs_scrub_rmapbt_rec(
 		if (!xfs_verify_agbno(mp, agno, irec.rm_startblock) ||
 		    !xfs_verify_agbno(mp, agno, irec.rm_startblock +
 				irec.rm_blockcount - 1))
-			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 	}
 
 	/* Check flags. */
@@ -143,47 +143,47 @@ xfs_scrub_rmapbt_rec(
 	is_unwritten = irec.rm_flags & XFS_RMAP_UNWRITTEN;
 
 	if (is_bmbt && irec.rm_offset != 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	if (non_inode && irec.rm_offset != 0)
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	if (is_unwritten && (is_bmbt || non_inode || is_attr))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	if (non_inode && (is_bmbt || is_unwritten || is_attr))
-		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 
 	if (!non_inode) {
 		if (!xfs_verify_ino(mp, irec.rm_owner))
-			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 	} else {
 		/* Non-inode owner within the magic values? */
 		if (irec.rm_owner <= XFS_RMAP_OWN_MIN ||
 		    irec.rm_owner > XFS_RMAP_OWN_FS)
-			xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
 	}
 
-	xfs_scrub_rmapbt_xref(bs->sc, &irec);
+	xchk_rmapbt_xref(bs->sc, &irec);
 out:
 	return error;
 }
 
 /* Scrub the rmap btree for some AG. */
 int
-xfs_scrub_rmapbt(
+xchk_rmapbt(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_owner_info		oinfo;
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
-	return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec,
+	return xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
 			&oinfo, NULL);
 }
 
 /* xref check that the extent is owned by a given owner */
 static inline void
-xfs_scrub_xref_check_owner(
+xchk_xref_check_owner(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
@@ -193,42 +193,42 @@ xfs_scrub_xref_check_owner(
 	bool				has_rmap;
 	int				error;
 
-	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo,
 			&has_rmap);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
 	if (has_rmap != should_have_rmap)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 }
 
 /* xref check that the extent is owned by a given owner */
 void
-xfs_scrub_xref_is_owned_by(
+xchk_xref_is_owned_by(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	struct xfs_owner_info		*oinfo)
 {
-	xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true);
+	xchk_xref_check_owner(sc, bno, len, oinfo, true);
 }
 
 /* xref check that the extent is not owned by a given owner */
 void
-xfs_scrub_xref_is_not_owned_by(
+xchk_xref_is_not_owned_by(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	struct xfs_owner_info		*oinfo)
 {
-	xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false);
+	xchk_xref_check_owner(sc, bno, len, oinfo, false);
 }
 
 /* xref check that the extent has no reverse mapping at all */
 void
-xfs_scrub_xref_has_no_owner(
+xchk_xref_has_no_owner(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len)
@@ -236,12 +236,12 @@ xfs_scrub_xref_has_no_owner(
 	bool				has_rmap;
 	int				error;
 
-	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
+	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
 
 	error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap);
-	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
 	if (has_rmap)
-		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 }
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 1f86e02a07ca..3f0fc83562ae 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -25,13 +25,13 @@
 
 /* Set us up with the realtime metadata locked. */
 int
-xfs_scrub_setup_rt(
+xchk_setup_rt(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
 	int				error;
 
-	error = xfs_scrub_setup_fs(sc, ip);
+	error = xchk_setup_fs(sc, ip);
 	if (error)
 		return error;
 
@@ -46,7 +46,7 @@ xfs_scrub_setup_rt(
 
 /* Scrub a free extent record from the realtime bitmap. */
 STATIC int
-xfs_scrub_rtbitmap_rec(
+xchk_rtbitmap_rec(
 	struct xfs_trans		*tp,
 	struct xfs_rtalloc_rec		*rec,
 	void				*priv)
@@ -61,24 +61,24 @@ xfs_scrub_rtbitmap_rec(
 	if (startblock + blockcount <= startblock ||
 	    !xfs_verify_rtbno(sc->mp, startblock) ||
 	    !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1))
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 	return 0;
 }
 
 /* Scrub the realtime bitmap. */
 int
-xfs_scrub_rtbitmap(
+xchk_rtbitmap(
 	struct xfs_scrub_context	*sc)
 {
 	int				error;
 
 	/* Invoke the fork scrubber. */
-	error = xfs_scrub_metadata_inode_forks(sc);
+	error = xchk_metadata_inode_forks(sc);
 	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 		return error;
 
-	error = xfs_rtalloc_query_all(sc->tp, xfs_scrub_rtbitmap_rec, sc);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+	error = xfs_rtalloc_query_all(sc->tp, xchk_rtbitmap_rec, sc);
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out;
 
 out:
@@ -87,7 +87,7 @@ out:
 
 /* Scrub the realtime summary. */
 int
-xfs_scrub_rtsummary(
+xchk_rtsummary(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_inode		*rsumip = sc->mp->m_rsumip;
@@ -107,12 +107,12 @@ xfs_scrub_rtsummary(
 	xfs_ilock(sc->ip, sc->ilock_flags);
 
 	/* Invoke the fork scrubber. */
-	error = xfs_scrub_metadata_inode_forks(sc);
+	error = xchk_metadata_inode_forks(sc);
 	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 		goto out;
 
 	/* XXX: implement this some day */
-	xfs_scrub_set_incomplete(sc);
+	xchk_set_incomplete(sc);
 out:
 	/* Switch back to the rtbitmap inode and lock flags. */
 	xfs_iunlock(sc->ip, sc->ilock_flags);
@@ -124,7 +124,7 @@ out:
 
 /* xref check that the extent is not free in the rtbitmap */
 void
-xfs_scrub_xref_is_used_rt_space(
+xchk_xref_is_used_rt_space(
 	struct xfs_scrub_context	*sc,
 	xfs_rtblock_t			fsbno,
 	xfs_extlen_t			len)
@@ -135,7 +135,7 @@ xfs_scrub_xref_is_used_rt_space(
 	bool				is_free;
 	int				error;
 
-	if (xfs_scrub_skip_xref(sc->sm))
+	if (xchk_skip_xref(sc->sm))
 		return;
 
 	startext = fsbno;
@@ -147,10 +147,10 @@ xfs_scrub_xref_is_used_rt_space(
 	xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
 	error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, extcount,
 			&is_free);
-	if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+	if (!xchk_should_check_xref(sc, &error, NULL))
 		goto out_unlock;
 	if (is_free)
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino);
+		xchk_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino);
 out_unlock:
 	xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
 }
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 58ae76b3a421..ead97ab91a3c 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -131,6 +131,12 @@
  * optimize the structure so that the rebuild knows what to do.  The
  * second check evaluates the completeness of the repair; that is what
  * is reported to userspace.
+ *
+ * A quick note on symbol prefixes:
+ * - "xfs_" are general XFS symbols.
+ * - "xchk_" are symbols related to metadata checking.
+ * - "xrep_" are symbols related to metadata repair.
+ * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS.
  */
 
 /*
@@ -144,12 +150,12 @@
  * supported by the running kernel.
  */
 static int
-xfs_scrub_probe(
+xchk_probe(
 	struct xfs_scrub_context	*sc)
 {
 	int				error = 0;
 
-	if (xfs_scrub_should_terminate(sc, &error))
+	if (xchk_should_terminate(sc, &error))
 		return error;
 
 	return 0;
@@ -159,12 +165,12 @@ xfs_scrub_probe(
 
 /* Free all the resources and finish the transactions. */
 STATIC int
-xfs_scrub_teardown(
+xchk_teardown(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip_in,
 	int				error)
 {
-	xfs_scrub_ag_free(sc, &sc->sa);
+	xchk_ag_free(sc, &sc->sa);
 	if (sc->tp) {
 		if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
 			error = xfs_trans_commit(sc->tp);
@@ -191,165 +197,165 @@ xfs_scrub_teardown(
 
 /* Scrubbing dispatch. */
 
-static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
+static const struct xchk_meta_ops meta_scrub_ops[] = {
 	[XFS_SCRUB_TYPE_PROBE] = {	/* ioctl presence test */
 		.type	= ST_NONE,
-		.setup	= xfs_scrub_setup_fs,
-		.scrub	= xfs_scrub_probe,
+		.setup	= xchk_setup_fs,
+		.scrub	= xchk_probe,
 		.repair = xfs_repair_probe,
 	},
 	[XFS_SCRUB_TYPE_SB] = {		/* superblock */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_fs,
-		.scrub	= xfs_scrub_superblock,
+		.setup	= xchk_setup_fs,
+		.scrub	= xchk_superblock,
 		.repair	= xfs_repair_superblock,
 	},
 	[XFS_SCRUB_TYPE_AGF] = {	/* agf */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_fs,
-		.scrub	= xfs_scrub_agf,
+		.setup	= xchk_setup_fs,
+		.scrub	= xchk_agf,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_fs,
-		.scrub	= xfs_scrub_agfl,
+		.setup	= xchk_setup_fs,
+		.scrub	= xchk_agfl,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_AGI] = {	/* agi */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_fs,
-		.scrub	= xfs_scrub_agi,
+		.setup	= xchk_setup_fs,
+		.scrub	= xchk_agi,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BNOBT] = {	/* bnobt */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_ag_allocbt,
-		.scrub	= xfs_scrub_bnobt,
+		.setup	= xchk_setup_ag_allocbt,
+		.scrub	= xchk_bnobt,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_CNTBT] = {	/* cntbt */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_ag_allocbt,
-		.scrub	= xfs_scrub_cntbt,
+		.setup	= xchk_setup_ag_allocbt,
+		.scrub	= xchk_cntbt,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_INOBT] = {	/* inobt */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_ag_iallocbt,
-		.scrub	= xfs_scrub_inobt,
+		.setup	= xchk_setup_ag_iallocbt,
+		.scrub	= xchk_inobt,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_FINOBT] = {	/* finobt */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_ag_iallocbt,
-		.scrub	= xfs_scrub_finobt,
+		.setup	= xchk_setup_ag_iallocbt,
+		.scrub	= xchk_finobt,
 		.has	= xfs_sb_version_hasfinobt,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RMAPBT] = {	/* rmapbt */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_ag_rmapbt,
-		.scrub	= xfs_scrub_rmapbt,
+		.setup	= xchk_setup_ag_rmapbt,
+		.scrub	= xchk_rmapbt,
 		.has	= xfs_sb_version_hasrmapbt,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_REFCNTBT] = {	/* refcountbt */
 		.type	= ST_PERAG,
-		.setup	= xfs_scrub_setup_ag_refcountbt,
-		.scrub	= xfs_scrub_refcountbt,
+		.setup	= xchk_setup_ag_refcountbt,
+		.scrub	= xchk_refcountbt,
 		.has	= xfs_sb_version_hasreflink,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_INODE] = {	/* inode record */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_inode,
-		.scrub	= xfs_scrub_inode,
+		.setup	= xchk_setup_inode,
+		.scrub	= xchk_inode,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTD] = {	/* inode data fork */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_inode_bmap,
-		.scrub	= xfs_scrub_bmap_data,
+		.setup	= xchk_setup_inode_bmap,
+		.scrub	= xchk_bmap_data,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTA] = {	/* inode attr fork */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_inode_bmap,
-		.scrub	= xfs_scrub_bmap_attr,
+		.setup	= xchk_setup_inode_bmap,
+		.scrub	= xchk_bmap_attr,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTC] = {	/* inode CoW fork */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_inode_bmap,
-		.scrub	= xfs_scrub_bmap_cow,
+		.setup	= xchk_setup_inode_bmap,
+		.scrub	= xchk_bmap_cow,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_DIR] = {	/* directory */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_directory,
-		.scrub	= xfs_scrub_directory,
+		.setup	= xchk_setup_directory,
+		.scrub	= xchk_directory,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_XATTR] = {	/* extended attributes */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_xattr,
-		.scrub	= xfs_scrub_xattr,
+		.setup	= xchk_setup_xattr,
+		.scrub	= xchk_xattr,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_SYMLINK] = {	/* symbolic link */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_symlink,
-		.scrub	= xfs_scrub_symlink,
+		.setup	= xchk_setup_symlink,
+		.scrub	= xchk_symlink,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_PARENT] = {	/* parent pointers */
 		.type	= ST_INODE,
-		.setup	= xfs_scrub_setup_parent,
-		.scrub	= xfs_scrub_parent,
+		.setup	= xchk_setup_parent,
+		.scrub	= xchk_parent,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RTBITMAP] = {	/* realtime bitmap */
 		.type	= ST_FS,
-		.setup	= xfs_scrub_setup_rt,
-		.scrub	= xfs_scrub_rtbitmap,
+		.setup	= xchk_setup_rt,
+		.scrub	= xchk_rtbitmap,
 		.has	= xfs_sb_version_hasrealtime,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RTSUM] = {	/* realtime summary */
 		.type	= ST_FS,
-		.setup	= xfs_scrub_setup_rt,
-		.scrub	= xfs_scrub_rtsummary,
+		.setup	= xchk_setup_rt,
+		.scrub	= xchk_rtsummary,
 		.has	= xfs_sb_version_hasrealtime,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_UQUOTA] = {	/* user quota */
 		.type	= ST_FS,
-		.setup	= xfs_scrub_setup_quota,
-		.scrub	= xfs_scrub_quota,
+		.setup	= xchk_setup_quota,
+		.scrub	= xchk_quota,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_GQUOTA] = {	/* group quota */
 		.type	= ST_FS,
-		.setup	= xfs_scrub_setup_quota,
-		.scrub	= xfs_scrub_quota,
+		.setup	= xchk_setup_quota,
+		.scrub	= xchk_quota,
 		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_PQUOTA] = {	/* project quota */
 		.type	= ST_FS,
-		.setup	= xfs_scrub_setup_quota,
-		.scrub	= xfs_scrub_quota,
+		.setup	= xchk_setup_quota,
+		.scrub	= xchk_quota,
 		.repair	= xfs_repair_notsupported,
 	},
 };
 
 /* This isn't a stable feature, warn once per day. */
 static inline void
-xfs_scrub_experimental_warning(
+xchk_experimental_warning(
 	struct xfs_mount	*mp)
 {
 	static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT(
-			"xfs_scrub_warning", 86400 * HZ, 1);
+			"xchk_warning", 86400 * HZ, 1);
 	ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE);
 
 	if (__ratelimit(&scrub_warning))
@@ -358,12 +364,12 @@ xfs_scrub_experimental_warning(
 }
 
 static int
-xfs_scrub_validate_inputs(
+xchk_validate_inputs(
 	struct xfs_mount		*mp,
 	struct xfs_scrub_metadata	*sm)
 {
 	int				error;
-	const struct xfs_scrub_meta_ops	*ops;
+	const struct xchk_meta_ops	*ops;
 
 	error = -EINVAL;
 	/* Check our inputs. */
@@ -441,7 +447,7 @@ out:
 }
 
 #ifdef CONFIG_XFS_ONLINE_REPAIR
-static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc)
+static inline void xchk_postmortem(struct xfs_scrub_context *sc)
 {
 	/*
 	 * Userspace asked us to repair something, we repaired it, rescanned
@@ -454,7 +460,7 @@ static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc)
 		xfs_repair_failure(sc->mp);
 }
 #else
-static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc)
+static inline void xchk_postmortem(struct xfs_scrub_context *sc)
 {
 	/*
 	 * Userspace asked us to scrub something, it's broken, and we have no
@@ -480,9 +486,9 @@ xfs_scrub_metadata(
 	int				error = 0;
 
 	BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
-		(sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR));
+		(sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));
 
-	trace_xfs_scrub_start(ip, sm, error);
+	trace_xchk_start(ip, sm, error);
 
 	/* Forbidden if we are shut down or mounted norecovery. */
 	error = -ESHUTDOWN;
@@ -492,11 +498,11 @@ xfs_scrub_metadata(
 	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
 		goto out;
 
-	error = xfs_scrub_validate_inputs(mp, sm);
+	error = xchk_validate_inputs(mp, sm);
 	if (error)
 		goto out;
 
-	xfs_scrub_experimental_warning(mp);
+	xchk_experimental_warning(mp);
 
 retry_op:
 	/* Set up for the operation. */
@@ -518,7 +524,7 @@ retry_op:
 		 * Tear down everything we hold, then set up again with
 		 * preparation for worst-case scenarios.
 		 */
-		error = xfs_scrub_teardown(&sc, ip, 0);
+		error = xchk_teardown(&sc, ip, 0);
 		if (error)
 			goto out;
 		try_harder = true;
@@ -553,7 +559,7 @@ retry_op:
 		if (error == -EAGAIN) {
 			if (sc.try_harder)
 				try_harder = true;
-			error = xfs_scrub_teardown(&sc, ip, 0);
+			error = xchk_teardown(&sc, ip, 0);
 			if (error) {
 				xfs_repair_failure(mp);
 				goto out;
@@ -563,11 +569,11 @@ retry_op:
 	}
 
 out_nofix:
-	xfs_scrub_postmortem(&sc);
+	xchk_postmortem(&sc);
 out_teardown:
-	error = xfs_scrub_teardown(&sc, ip, error);
+	error = xchk_teardown(&sc, ip, error);
 out:
-	trace_xfs_scrub_done(ip, sm, error);
+	trace_xchk_done(ip, sm, error);
 	if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
 		sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 		error = 0;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index b295edd5fc0e..0f59a47c4bb0 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -9,14 +9,14 @@
 struct xfs_scrub_context;
 
 /* Type info and names for the scrub types. */
-enum xfs_scrub_type {
+enum xchk_type {
 	ST_NONE = 1,	/* disabled */
 	ST_PERAG,	/* per-AG metadata */
 	ST_FS,		/* per-FS metadata */
 	ST_INODE,	/* per-inode metadata */
 };
 
-struct xfs_scrub_meta_ops {
+struct xchk_meta_ops {
 	/* Acquire whatever resources are needed for the operation. */
 	int		(*setup)(struct xfs_scrub_context *,
 				 struct xfs_inode *);
@@ -31,11 +31,11 @@ struct xfs_scrub_meta_ops {
 	bool		(*has)(struct xfs_sb *);
 
 	/* type describing required/allowed inputs */
-	enum xfs_scrub_type	type;
+	enum xchk_type	type;
 };
 
 /* Buffer pointers and btree cursors for an entire AG. */
-struct xfs_scrub_ag {
+struct xchk_ag {
 	xfs_agnumber_t			agno;
 	struct xfs_perag		*pag;
 
@@ -57,7 +57,7 @@ struct xfs_scrub_context {
 	/* General scrub state. */
 	struct xfs_mount		*mp;
 	struct xfs_scrub_metadata	*sm;
-	const struct xfs_scrub_meta_ops	*ops;
+	const struct xchk_meta_ops	*ops;
 	struct xfs_trans		*tp;
 	struct xfs_inode		*ip;
 	void				*buf;
@@ -66,78 +66,78 @@ struct xfs_scrub_context {
 	bool				has_quotaofflock;
 
 	/* State tracking for single-AG operations. */
-	struct xfs_scrub_ag		sa;
+	struct xchk_ag		sa;
 };
 
 /* Metadata scrubbers */
-int xfs_scrub_tester(struct xfs_scrub_context *sc);
-int xfs_scrub_superblock(struct xfs_scrub_context *sc);
-int xfs_scrub_agf(struct xfs_scrub_context *sc);
-int xfs_scrub_agfl(struct xfs_scrub_context *sc);
-int xfs_scrub_agi(struct xfs_scrub_context *sc);
-int xfs_scrub_bnobt(struct xfs_scrub_context *sc);
-int xfs_scrub_cntbt(struct xfs_scrub_context *sc);
-int xfs_scrub_inobt(struct xfs_scrub_context *sc);
-int xfs_scrub_finobt(struct xfs_scrub_context *sc);
-int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
-int xfs_scrub_refcountbt(struct xfs_scrub_context *sc);
-int xfs_scrub_inode(struct xfs_scrub_context *sc);
-int xfs_scrub_bmap_data(struct xfs_scrub_context *sc);
-int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
-int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
-int xfs_scrub_directory(struct xfs_scrub_context *sc);
-int xfs_scrub_xattr(struct xfs_scrub_context *sc);
-int xfs_scrub_symlink(struct xfs_scrub_context *sc);
-int xfs_scrub_parent(struct xfs_scrub_context *sc);
+int xchk_tester(struct xfs_scrub_context *sc);
+int xchk_superblock(struct xfs_scrub_context *sc);
+int xchk_agf(struct xfs_scrub_context *sc);
+int xchk_agfl(struct xfs_scrub_context *sc);
+int xchk_agi(struct xfs_scrub_context *sc);
+int xchk_bnobt(struct xfs_scrub_context *sc);
+int xchk_cntbt(struct xfs_scrub_context *sc);
+int xchk_inobt(struct xfs_scrub_context *sc);
+int xchk_finobt(struct xfs_scrub_context *sc);
+int xchk_rmapbt(struct xfs_scrub_context *sc);
+int xchk_refcountbt(struct xfs_scrub_context *sc);
+int xchk_inode(struct xfs_scrub_context *sc);
+int xchk_bmap_data(struct xfs_scrub_context *sc);
+int xchk_bmap_attr(struct xfs_scrub_context *sc);
+int xchk_bmap_cow(struct xfs_scrub_context *sc);
+int xchk_directory(struct xfs_scrub_context *sc);
+int xchk_xattr(struct xfs_scrub_context *sc);
+int xchk_symlink(struct xfs_scrub_context *sc);
+int xchk_parent(struct xfs_scrub_context *sc);
 #ifdef CONFIG_XFS_RT
-int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc);
-int xfs_scrub_rtsummary(struct xfs_scrub_context *sc);
+int xchk_rtbitmap(struct xfs_scrub_context *sc);
+int xchk_rtsummary(struct xfs_scrub_context *sc);
 #else
 static inline int
-xfs_scrub_rtbitmap(struct xfs_scrub_context *sc)
+xchk_rtbitmap(struct xfs_scrub_context *sc)
 {
 	return -ENOENT;
 }
 static inline int
-xfs_scrub_rtsummary(struct xfs_scrub_context *sc)
+xchk_rtsummary(struct xfs_scrub_context *sc)
 {
 	return -ENOENT;
 }
 #endif
 #ifdef CONFIG_XFS_QUOTA
-int xfs_scrub_quota(struct xfs_scrub_context *sc);
+int xchk_quota(struct xfs_scrub_context *sc);
 #else
 static inline int
-xfs_scrub_quota(struct xfs_scrub_context *sc)
+xchk_quota(struct xfs_scrub_context *sc)
 {
 	return -ENOENT;
 }
 #endif
 
 /* cross-referencing helpers */
-void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc,
+void xchk_xref_is_used_space(struct xfs_scrub_context *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc,
+void xchk_xref_is_not_inode_chunk(struct xfs_scrub_context *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc,
+void xchk_xref_is_inode_chunk(struct xfs_scrub_context *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc,
+void xchk_xref_is_owned_by(struct xfs_scrub_context *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len,
 		struct xfs_owner_info *oinfo);
-void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc,
+void xchk_xref_is_not_owned_by(struct xfs_scrub_context *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len,
 		struct xfs_owner_info *oinfo);
-void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc,
+void xchk_xref_has_no_owner(struct xfs_scrub_context *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc,
+void xchk_xref_is_cow_staging(struct xfs_scrub_context *sc,
 		xfs_agblock_t bno, xfs_extlen_t len);
-void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc,
+void xchk_xref_is_not_shared(struct xfs_scrub_context *sc,
 		xfs_agblock_t bno, xfs_extlen_t len);
 #ifdef CONFIG_XFS_RT
-void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc,
+void xchk_xref_is_used_rt_space(struct xfs_scrub_context *sc,
 		xfs_rtblock_t rtbno, xfs_extlen_t len);
 #else
-# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
+# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
 #endif
 
 #endif	/* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index 570a89812116..e2a288e34337 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -25,7 +25,7 @@
 
 /* Set us up to scrub a symbolic link. */
 int
-xfs_scrub_setup_symlink(
+xchk_setup_symlink(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
@@ -34,13 +34,13 @@ xfs_scrub_setup_symlink(
 	if (!sc->buf)
 		return -ENOMEM;
 
-	return xfs_scrub_setup_inode_contents(sc, ip, 0);
+	return xchk_setup_inode_contents(sc, ip, 0);
 }
 
 /* Symbolic links. */
 
 int
-xfs_scrub_symlink(
+xchk_symlink(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_inode		*ip = sc->ip;
@@ -55,7 +55,7 @@ xfs_scrub_symlink(
 
 	/* Plausible size? */
 	if (len > XFS_SYMLINK_MAXLEN || len <= 0) {
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out;
 	}
 
@@ -63,16 +63,16 @@ xfs_scrub_symlink(
 	if (ifp->if_flags & XFS_IFINLINE) {
 		if (len > XFS_IFORK_DSIZE(ip) ||
 		    len > strnlen(ifp->if_u1.if_data, XFS_IFORK_DSIZE(ip)))
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 		goto out;
 	}
 
 	/* Remote symlink; must read the contents. */
 	error = xfs_readlink_bmap_ilocked(sc->ip, sc->buf);
-	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out;
 	if (strnlen(sc->buf, XFS_SYMLINK_MAXLEN) < len)
-		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 7c76d8b5cb05..1ef1202a1e45 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -22,7 +22,7 @@
 
 /* Figure out which block the btree cursor was pointing to. */
 static inline xfs_fsblock_t
-xfs_scrub_btree_cur_fsbno(
+xchk_btree_cur_fsbno(
 	struct xfs_btree_cur		*cur,
 	int				level)
 {
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index cec3e5ece5a1..11967d3942ac 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -12,7 +12,7 @@
 #include <linux/tracepoint.h>
 #include "xfs_bit.h"
 
-DECLARE_EVENT_CLASS(xfs_scrub_class,
+DECLARE_EVENT_CLASS(xchk_class,
 	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
 		 int error),
 	TP_ARGS(ip, sm, error),
@@ -47,18 +47,18 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
 		  __entry->error)
 )
 #define DEFINE_SCRUB_EVENT(name) \
-DEFINE_EVENT(xfs_scrub_class, name, \
+DEFINE_EVENT(xchk_class, name, \
 	TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, \
 		 int error), \
 	TP_ARGS(ip, sm, error))
 
-DEFINE_SCRUB_EVENT(xfs_scrub_start);
-DEFINE_SCRUB_EVENT(xfs_scrub_done);
-DEFINE_SCRUB_EVENT(xfs_scrub_deadlock_retry);
+DEFINE_SCRUB_EVENT(xchk_start);
+DEFINE_SCRUB_EVENT(xchk_done);
+DEFINE_SCRUB_EVENT(xchk_deadlock_retry);
 DEFINE_SCRUB_EVENT(xfs_repair_attempt);
 DEFINE_SCRUB_EVENT(xfs_repair_done);
 
-TRACE_EVENT(xfs_scrub_op_error,
+TRACE_EVENT(xchk_op_error,
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
 		 xfs_agblock_t bno, int error, void *ret_ip),
 	TP_ARGS(sc, agno, bno, error, ret_ip),
@@ -87,7 +87,7 @@ TRACE_EVENT(xfs_scrub_op_error,
 		  __entry->ret_ip)
 );
 
-TRACE_EVENT(xfs_scrub_file_op_error,
+TRACE_EVENT(xchk_file_op_error,
 	TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
 		 xfs_fileoff_t offset, int error, void *ret_ip),
 	TP_ARGS(sc, whichfork, offset, error, ret_ip),
@@ -119,7 +119,7 @@ TRACE_EVENT(xfs_scrub_file_op_error,
 		  __entry->ret_ip)
 );
 
-DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
+DECLARE_EVENT_CLASS(xchk_block_error_class,
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, void *ret_ip),
 	TP_ARGS(sc, daddr, ret_ip),
 	TP_STRUCT__entry(
@@ -153,15 +153,15 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
 )
 
 #define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \
-DEFINE_EVENT(xfs_scrub_block_error_class, name, \
+DEFINE_EVENT(xchk_block_error_class, name, \
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, \
 		 void *ret_ip), \
 	TP_ARGS(sc, daddr, ret_ip))
 
-DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error);
-DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen);
+DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_error);
+DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_preen);
 
-DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
+DECLARE_EVENT_CLASS(xchk_ino_error_class,
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip),
 	TP_ARGS(sc, ino, ret_ip),
 	TP_STRUCT__entry(
@@ -184,16 +184,16 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
 )
 
 #define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
-DEFINE_EVENT(xfs_scrub_ino_error_class, name, \
+DEFINE_EVENT(xchk_ino_error_class, name, \
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \
 		 void *ret_ip), \
 	TP_ARGS(sc, ino, ret_ip))
 
-DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error);
-DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen);
-DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_warning);
+DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_error);
+DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_preen);
+DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_warning);
 
-DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
+DECLARE_EVENT_CLASS(xchk_fblock_error_class,
 	TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
 		 xfs_fileoff_t offset, void *ret_ip),
 	TP_ARGS(sc, whichfork, offset, ret_ip),
@@ -223,15 +223,15 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
 );
 
 #define DEFINE_SCRUB_FBLOCK_ERROR_EVENT(name) \
-DEFINE_EVENT(xfs_scrub_fblock_error_class, name, \
+DEFINE_EVENT(xchk_fblock_error_class, name, \
 	TP_PROTO(struct xfs_scrub_context *sc, int whichfork, \
 		 xfs_fileoff_t offset, void *ret_ip), \
 	TP_ARGS(sc, whichfork, offset, ret_ip))
 
-DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_error);
-DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xfs_scrub_fblock_warning);
+DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error);
+DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning);
 
-TRACE_EVENT(xfs_scrub_incomplete,
+TRACE_EVENT(xchk_incomplete,
 	TP_PROTO(struct xfs_scrub_context *sc, void *ret_ip),
 	TP_ARGS(sc, ret_ip),
 	TP_STRUCT__entry(
@@ -250,7 +250,7 @@ TRACE_EVENT(xfs_scrub_incomplete,
 		  __entry->ret_ip)
 );
 
-TRACE_EVENT(xfs_scrub_btree_op_error,
+TRACE_EVENT(xchk_btree_op_error,
 	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
 		 int level, int error, void *ret_ip),
 	TP_ARGS(sc, cur, level, error, ret_ip),
@@ -266,7 +266,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error,
 		__field(void *, ret_ip)
 	),
 	TP_fast_assign(
-		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
+		xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level);
 
 		__entry->dev = sc->mp->m_super->s_dev;
 		__entry->type = sc->sm->sm_type;
@@ -290,7 +290,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error,
 		  __entry->ret_ip)
 );
 
-TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
+TRACE_EVENT(xchk_ifork_btree_op_error,
 	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
 		 int level, int error, void *ret_ip),
 	TP_ARGS(sc, cur, level, error, ret_ip),
@@ -308,7 +308,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
 		__field(void *, ret_ip)
 	),
 	TP_fast_assign(
-		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
+		xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level);
 		__entry->dev = sc->mp->m_super->s_dev;
 		__entry->ino = sc->ip->i_ino;
 		__entry->whichfork = cur->bc_private.b.whichfork;
@@ -335,7 +335,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
 		  __entry->ret_ip)
 );
 
-TRACE_EVENT(xfs_scrub_btree_error,
+TRACE_EVENT(xchk_btree_error,
 	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
 		 int level, void *ret_ip),
 	TP_ARGS(sc, cur, level, ret_ip),
@@ -350,7 +350,7 @@ TRACE_EVENT(xfs_scrub_btree_error,
 		__field(void *, ret_ip)
 	),
 	TP_fast_assign(
-		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
+		xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level);
 		__entry->dev = sc->mp->m_super->s_dev;
 		__entry->type = sc->sm->sm_type;
 		__entry->btnum = cur->bc_btnum;
@@ -371,7 +371,7 @@ TRACE_EVENT(xfs_scrub_btree_error,
 		  __entry->ret_ip)
 );
 
-TRACE_EVENT(xfs_scrub_ifork_btree_error,
+TRACE_EVENT(xchk_ifork_btree_error,
 	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
 		 int level, void *ret_ip),
 	TP_ARGS(sc, cur, level, ret_ip),
@@ -388,7 +388,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error,
 		__field(void *, ret_ip)
 	),
 	TP_fast_assign(
-		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
+		xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level);
 		__entry->dev = sc->mp->m_super->s_dev;
 		__entry->ino = sc->ip->i_ino;
 		__entry->whichfork = cur->bc_private.b.whichfork;
@@ -413,7 +413,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error,
 		  __entry->ret_ip)
 );
 
-DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class,
+DECLARE_EVENT_CLASS(xchk_sbtree_class,
 	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
 		 int level),
 	TP_ARGS(sc, cur, level),
@@ -428,7 +428,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class,
 		__field(int, ptr)
 	),
 	TP_fast_assign(
-		xfs_fsblock_t fsbno = xfs_scrub_btree_cur_fsbno(cur, level);
+		xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level);
 
 		__entry->dev = sc->mp->m_super->s_dev;
 		__entry->type = sc->sm->sm_type;
@@ -450,15 +450,15 @@ DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class,
 		  __entry->ptr)
 )
 #define DEFINE_SCRUB_SBTREE_EVENT(name) \
-DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
+DEFINE_EVENT(xchk_sbtree_class, name, \
 	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, \
 		 int level), \
 	TP_ARGS(sc, cur, level))
 
-DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
-DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
+DEFINE_SCRUB_SBTREE_EVENT(xchk_btree_rec);
+DEFINE_SCRUB_SBTREE_EVENT(xchk_btree_key);
 
-TRACE_EVENT(xfs_scrub_xref_error,
+TRACE_EVENT(xchk_xref_error,
 	TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip),
 	TP_ARGS(sc, error, ret_ip),
 	TP_STRUCT__entry(
-- 
cgit v1.2.3


From b5e2196e9c7217387bab2ab4231ad9f4585f55c5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:29:11 -0700
Subject: xfs: shorten xfs_repair_ prefix to xrep_

Shorten all the metadata repair xfs_repair_* symbols to xrep_.
Whitespace damage will be fixed by a subsequent patch.  There are no
functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/agheader_repair.c |   2 +-
 fs/xfs/scrub/common.c          |   2 +-
 fs/xfs/scrub/repair.c          | 164 ++++++++++++++++++++---------------------
 fs/xfs/scrub/repair.h          |  74 +++++++++----------
 fs/xfs/scrub/scrub.c           |  54 +++++++-------
 fs/xfs/scrub/trace.h           |  40 +++++-----
 6 files changed, 168 insertions(+), 168 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 117eedac53df..631940f3ca4d 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -28,7 +28,7 @@
 
 /* Repair the superblock. */
 int
-xfs_repair_superblock(
+xrep_superblock(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 5c3d4e7c6166..d40bba9e7596 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -604,7 +604,7 @@ xchk_setup_fs(
 {
 	uint				resblks;
 
-	resblks = xfs_repair_calc_ag_resblks(sc);
+	resblks = xrep_calc_ag_resblks(sc);
 	return xchk_trans_alloc(sc, resblks);
 }
 
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 5eccd89c64a8..7e6a56a5f59d 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -41,21 +41,21 @@
  * and will set *fixed to true if it thinks it repaired anything.
  */
 int
-xfs_repair_attempt(
+xrep_attempt(
 	struct xfs_inode		*ip,
 	struct xfs_scrub_context	*sc,
 	bool				*fixed)
 {
 	int				error = 0;
 
-	trace_xfs_repair_attempt(ip, sc->sm, error);
+	trace_xrep_attempt(ip, sc->sm, error);
 
 	xchk_ag_btcur_free(&sc->sa);
 
 	/* Repair whatever's broken. */
 	ASSERT(sc->ops->repair);
 	error = sc->ops->repair(sc);
-	trace_xfs_repair_done(ip, sc->sm, error);
+	trace_xrep_done(ip, sc->sm, error);
 	switch (error) {
 	case 0:
 		/*
@@ -93,7 +93,7 @@ xfs_repair_attempt(
  * structure to track rate limiting information.
  */
 void
-xfs_repair_failure(
+xrep_failure(
 	struct xfs_mount		*mp)
 {
 	xfs_alert_ratelimited(mp,
@@ -105,7 +105,7 @@ xfs_repair_failure(
  * given mountpoint.
  */
 int
-xfs_repair_probe(
+xrep_probe(
 	struct xfs_scrub_context	*sc)
 {
 	int				error = 0;
@@ -121,7 +121,7 @@ xfs_repair_probe(
  * the btree cursors.
  */
 int
-xfs_repair_roll_ag_trans(
+xrep_roll_ag_trans(
 	struct xfs_scrub_context	*sc)
 {
 	int				error;
@@ -162,7 +162,7 @@ out_release:
  * in AG reservations) to construct a whole btree.
  */
 bool
-xfs_repair_ag_has_space(
+xrep_ag_has_space(
 	struct xfs_perag		*pag,
 	xfs_extlen_t			nr_blocks,
 	enum xfs_ag_resv_type		type)
@@ -178,7 +178,7 @@ xfs_repair_ag_has_space(
  * any type of per-AG btree.
  */
 xfs_extlen_t
-xfs_repair_calc_ag_resblks(
+xrep_calc_ag_resblks(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
@@ -231,7 +231,7 @@ xfs_repair_calc_ag_resblks(
 	}
 	xfs_perag_put(pag);
 
-	trace_xfs_repair_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
+	trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
 			freelen, usedlen);
 
 	/*
@@ -270,7 +270,7 @@ xfs_repair_calc_ag_resblks(
 		rmapbt_sz = 0;
 	}
 
-	trace_xfs_repair_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
+	trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
 			inobt_sz, rmapbt_sz, refcbt_sz);
 
 	return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
@@ -278,7 +278,7 @@ xfs_repair_calc_ag_resblks(
 
 /* Allocate a block in an AG. */
 int
-xfs_repair_alloc_ag_block(
+xrep_alloc_ag_block(
 	struct xfs_scrub_context	*sc,
 	struct xfs_owner_info		*oinfo,
 	xfs_fsblock_t			*fsbno,
@@ -329,7 +329,7 @@ xfs_repair_alloc_ag_block(
 
 /* Initialize a new AG btree root block with zero entries. */
 int
-xfs_repair_init_btblock(
+xrep_init_btblock(
 	struct xfs_scrub_context	*sc,
 	xfs_fsblock_t			fsb,
 	struct xfs_buf			**bpp,
@@ -340,7 +340,7 @@ xfs_repair_init_btblock(
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_buf			*bp;
 
-	trace_xfs_repair_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
+	trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
 			XFS_FSB_TO_AGBNO(mp, fsb), btnum);
 
 	ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
@@ -384,19 +384,19 @@ xfs_repair_init_btblock(
 
 /* Collect a dead btree extent for later disposal. */
 int
-xfs_repair_collect_btree_extent(
+xrep_collect_btree_extent(
 	struct xfs_scrub_context	*sc,
-	struct xfs_repair_extent_list	*exlist,
+	struct xrep_extent_list		*exlist,
 	xfs_fsblock_t			fsbno,
 	xfs_extlen_t			len)
 {
-	struct xfs_repair_extent	*rex;
+	struct xrep_extent		*rex;
 
-	trace_xfs_repair_collect_btree_extent(sc->mp,
+	trace_xrep_collect_btree_extent(sc->mp,
 			XFS_FSB_TO_AGNO(sc->mp, fsbno),
 			XFS_FSB_TO_AGBNO(sc->mp, fsbno), len);
 
-	rex = kmem_alloc(sizeof(struct xfs_repair_extent), KM_MAYFAIL);
+	rex = kmem_alloc(sizeof(struct xrep_extent), KM_MAYFAIL);
 	if (!rex)
 		return -ENOMEM;
 
@@ -414,14 +414,14 @@ xfs_repair_collect_btree_extent(
  * Therefore, free all the memory associated with the list so we can die.
  */
 void
-xfs_repair_cancel_btree_extents(
+xrep_cancel_btree_extents(
 	struct xfs_scrub_context	*sc,
-	struct xfs_repair_extent_list	*exlist)
+	struct xrep_extent_list		*exlist)
 {
-	struct xfs_repair_extent	*rex;
-	struct xfs_repair_extent	*n;
+	struct xrep_extent		*rex;
+	struct xrep_extent		*n;
 
-	for_each_xfs_repair_extent_safe(rex, n, exlist) {
+	for_each_xrep_extent_safe(rex, n, exlist) {
 		list_del(&rex->list);
 		kmem_free(rex);
 	}
@@ -429,16 +429,16 @@ xfs_repair_cancel_btree_extents(
 
 /* Compare two btree extents. */
 static int
-xfs_repair_btree_extent_cmp(
+xrep_btree_extent_cmp(
 	void				*priv,
 	struct list_head		*a,
 	struct list_head		*b)
 {
-	struct xfs_repair_extent	*ap;
-	struct xfs_repair_extent	*bp;
+	struct xrep_extent		*ap;
+	struct xrep_extent		*bp;
 
-	ap = container_of(a, struct xfs_repair_extent, list);
-	bp = container_of(b, struct xfs_repair_extent, list);
+	ap = container_of(a, struct xrep_extent, list);
+	bp = container_of(b, struct xrep_extent, list);
 
 	if (ap->fsbno > bp->fsbno)
 		return 1;
@@ -462,15 +462,15 @@ xfs_repair_btree_extent_cmp(
 #define LEFT_ALIGNED	(1 << 0)
 #define RIGHT_ALIGNED	(1 << 1)
 int
-xfs_repair_subtract_extents(
+xrep_subtract_extents(
 	struct xfs_scrub_context	*sc,
-	struct xfs_repair_extent_list	*exlist,
-	struct xfs_repair_extent_list	*sublist)
+	struct xrep_extent_list		*exlist,
+	struct xrep_extent_list		*sublist)
 {
 	struct list_head		*lp;
-	struct xfs_repair_extent	*ex;
-	struct xfs_repair_extent	*newex;
-	struct xfs_repair_extent	*subex;
+	struct xrep_extent		*ex;
+	struct xrep_extent		*newex;
+	struct xrep_extent		*subex;
 	xfs_fsblock_t			sub_fsb;
 	xfs_extlen_t			sub_len;
 	int				state;
@@ -480,8 +480,8 @@ xfs_repair_subtract_extents(
 		return 0;
 	ASSERT(!list_empty(&sublist->list));
 
-	list_sort(NULL, &exlist->list, xfs_repair_btree_extent_cmp);
-	list_sort(NULL, &sublist->list, xfs_repair_btree_extent_cmp);
+	list_sort(NULL, &exlist->list, xrep_btree_extent_cmp);
+	list_sort(NULL, &sublist->list, xrep_btree_extent_cmp);
 
 	/*
 	 * Now that we've sorted both lists, we iterate exlist once, rolling
@@ -491,11 +491,11 @@ xfs_repair_subtract_extents(
 	 * list traversal is similar to merge sort, but we're deleting
 	 * instead.  In this manner we avoid O(n^2) operations.
 	 */
-	subex = list_first_entry(&sublist->list, struct xfs_repair_extent,
+	subex = list_first_entry(&sublist->list, struct xrep_extent,
 			list);
 	lp = exlist->list.next;
 	while (lp != &exlist->list) {
-		ex = list_entry(lp, struct xfs_repair_extent, list);
+		ex = list_entry(lp, struct xrep_extent, list);
 
 		/*
 		 * Advance subex and/or ex until we find a pair that
@@ -548,7 +548,7 @@ xfs_repair_subtract_extents(
 			 * Deleting from the middle: add the new right extent
 			 * and then shrink the left extent.
 			 */
-			newex = kmem_alloc(sizeof(struct xfs_repair_extent),
+			newex = kmem_alloc(sizeof(struct xrep_extent),
 					KM_MAYFAIL);
 			if (!newex) {
 				error = -ENOMEM;
@@ -619,12 +619,12 @@ out:
  * is not intended for use with file data repairs; we have bunmapi for that.
  */
 int
-xfs_repair_invalidate_blocks(
+xrep_invalidate_blocks(
 	struct xfs_scrub_context	*sc,
-	struct xfs_repair_extent_list	*exlist)
+	struct xrep_extent_list		*exlist)
 {
-	struct xfs_repair_extent	*rex;
-	struct xfs_repair_extent	*n;
+	struct xrep_extent		*rex;
+	struct xrep_extent		*n;
 	struct xfs_buf			*bp;
 	xfs_fsblock_t			fsbno;
 	xfs_agblock_t			i;
@@ -637,7 +637,7 @@ xfs_repair_invalidate_blocks(
 	 * because we never own those; and if we can't TRYLOCK the buffer we
 	 * assume it's owned by someone else.
 	 */
-	for_each_xfs_repair_extent_safe(rex, n, exlist) {
+	for_each_xrep_extent_safe(rex, n, exlist) {
 		for (fsbno = rex->fsbno, i = rex->len; i > 0; fsbno++, i--) {
 			/* Skip AG headers and post-EOFS blocks */
 			if (!xfs_verify_fsbno(sc->mp, fsbno))
@@ -657,7 +657,7 @@ xfs_repair_invalidate_blocks(
 
 /* Ensure the freelist is the correct size. */
 int
-xfs_repair_fix_freelist(
+xrep_fix_freelist(
 	struct xfs_scrub_context	*sc,
 	bool				can_shrink)
 {
@@ -677,7 +677,7 @@ xfs_repair_fix_freelist(
  * Put a block back on the AGFL.
  */
 STATIC int
-xfs_repair_put_freelist(
+xrep_put_freelist(
 	struct xfs_scrub_context	*sc,
 	xfs_agblock_t			agbno)
 {
@@ -685,7 +685,7 @@ xfs_repair_put_freelist(
 	int				error;
 
 	/* Make sure there's space on the freelist. */
-	error = xfs_repair_fix_freelist(sc, true);
+	error = xrep_fix_freelist(sc, true);
 	if (error)
 		return error;
 
@@ -713,7 +713,7 @@ xfs_repair_put_freelist(
 
 /* Dispose of a single metadata block. */
 STATIC int
-xfs_repair_dispose_btree_block(
+xrep_dispose_btree_block(
 	struct xfs_scrub_context	*sc,
 	xfs_fsblock_t			fsbno,
 	struct xfs_owner_info		*oinfo,
@@ -767,7 +767,7 @@ xfs_repair_dispose_btree_block(
 	if (has_other_rmap)
 		error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1, oinfo);
 	else if (resv == XFS_AG_RESV_AGFL)
-		error = xfs_repair_put_freelist(sc, agbno);
+		error = xrep_put_freelist(sc, agbno);
 	else
 		error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
 	if (agf_bp != sc->sa.agf_bp)
@@ -777,7 +777,7 @@ xfs_repair_dispose_btree_block(
 
 	if (sc->ip)
 		return xfs_trans_roll_inode(&sc->tp, sc->ip);
-	return xfs_repair_roll_ag_trans(sc);
+	return xrep_roll_ag_trans(sc);
 
 out_free:
 	if (agf_bp != sc->sa.agf_bp)
@@ -787,29 +787,29 @@ out_free:
 
 /* Dispose of btree blocks from an old per-AG btree. */
 int
-xfs_repair_reap_btree_extents(
+xrep_reap_btree_extents(
 	struct xfs_scrub_context	*sc,
-	struct xfs_repair_extent_list	*exlist,
+	struct xrep_extent_list		*exlist,
 	struct xfs_owner_info		*oinfo,
 	enum xfs_ag_resv_type		type)
 {
-	struct xfs_repair_extent	*rex;
-	struct xfs_repair_extent	*n;
+	struct xrep_extent		*rex;
+	struct xrep_extent		*n;
 	int				error = 0;
 
 	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
 
 	/* Dispose of every block from the old btree. */
-	for_each_xfs_repair_extent_safe(rex, n, exlist) {
+	for_each_xrep_extent_safe(rex, n, exlist) {
 		ASSERT(sc->ip != NULL ||
 		       XFS_FSB_TO_AGNO(sc->mp, rex->fsbno) == sc->sa.agno);
 
-		trace_xfs_repair_dispose_btree_extent(sc->mp,
+		trace_xrep_dispose_btree_extent(sc->mp,
 				XFS_FSB_TO_AGNO(sc->mp, rex->fsbno),
 				XFS_FSB_TO_AGBNO(sc->mp, rex->fsbno), rex->len);
 
 		for (; rex->len > 0; rex->len--, rex->fsbno++) {
-			error = xfs_repair_dispose_btree_block(sc, rex->fsbno,
+			error = xrep_dispose_btree_block(sc, rex->fsbno,
 					oinfo, type);
 			if (error)
 				goto out;
@@ -819,7 +819,7 @@ xfs_repair_reap_btree_extents(
 	}
 
 out:
-	xfs_repair_cancel_btree_extents(sc, exlist);
+	xrep_cancel_btree_extents(sc, exlist);
 	return error;
 }
 
@@ -831,12 +831,12 @@ out:
  * btree roots.  This is not guaranteed to work if the AG is heavily damaged
  * or the rmap data are corrupt.
  *
- * Callers of xfs_repair_find_ag_btree_roots must lock the AGF and AGFL
+ * Callers of xrep_find_ag_btree_roots must lock the AGF and AGFL
  * buffers if the AGF is being rebuilt; or the AGF and AGI buffers if the
  * AGI is being rebuilt.  It must maintain these locks until it's safe for
  * other threads to change the btrees' shapes.  The caller provides
  * information about the btrees to look for by passing in an array of
- * xfs_repair_find_ag_btree with the (rmap owner, buf_ops, magic) fields set.
+ * xrep_find_ag_btree with the (rmap owner, buf_ops, magic) fields set.
  * The (root, height) fields will be set on return if anything is found.  The
  * last element of the array should have a NULL buf_ops to mark the end of the
  * array.
@@ -850,16 +850,16 @@ out:
  * should be the roots.
  */
 
-struct xfs_repair_findroot {
+struct xrep_findroot {
 	struct xfs_scrub_context	*sc;
 	struct xfs_buf			*agfl_bp;
 	struct xfs_agf			*agf;
-	struct xfs_repair_find_ag_btree	*btree_info;
+	struct xrep_find_ag_btree	*btree_info;
 };
 
 /* See if our block is in the AGFL. */
 STATIC int
-xfs_repair_findroot_agfl_walk(
+xrep_findroot_agfl_walk(
 	struct xfs_mount		*mp,
 	xfs_agblock_t			bno,
 	void				*priv)
@@ -871,9 +871,9 @@ xfs_repair_findroot_agfl_walk(
 
 /* Does this block match the btree information passed in? */
 STATIC int
-xfs_repair_findroot_block(
-	struct xfs_repair_findroot	*ri,
-	struct xfs_repair_find_ag_btree	*fab,
+xrep_findroot_block(
+	struct xrep_findroot		*ri,
+	struct xrep_find_ag_btree	*fab,
 	uint64_t			owner,
 	xfs_agblock_t			agbno,
 	bool				*found_it)
@@ -894,7 +894,7 @@ xfs_repair_findroot_block(
 	 */
 	if (owner == XFS_RMAP_OWN_AG) {
 		error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
-				xfs_repair_findroot_agfl_walk, &agbno);
+				xrep_findroot_agfl_walk, &agbno);
 		if (error == XFS_BTREE_QUERY_RANGE_ABORT)
 			return 0;
 		if (error)
@@ -932,7 +932,7 @@ xfs_repair_findroot_block(
 	fab->height = xfs_btree_get_level(btblock) + 1;
 	*found_it = true;
 
-	trace_xfs_repair_findroot_block(mp, ri->sc->sa.agno, agbno,
+	trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
 			be32_to_cpu(btblock->bb_magic), fab->height - 1);
 out:
 	xfs_trans_brelse(ri->sc->tp, bp);
@@ -944,13 +944,13 @@ out:
  * looking for?
  */
 STATIC int
-xfs_repair_findroot_rmap(
+xrep_findroot_rmap(
 	struct xfs_btree_cur		*cur,
 	struct xfs_rmap_irec		*rec,
 	void				*priv)
 {
-	struct xfs_repair_findroot	*ri = priv;
-	struct xfs_repair_find_ag_btree	*fab;
+	struct xrep_findroot		*ri = priv;
+	struct xrep_find_ag_btree	*fab;
 	xfs_agblock_t			b;
 	bool				found_it;
 	int				error = 0;
@@ -965,7 +965,7 @@ xfs_repair_findroot_rmap(
 		for (fab = ri->btree_info; fab->buf_ops; fab++) {
 			if (rec->rm_owner != fab->rmap_owner)
 				continue;
-			error = xfs_repair_findroot_block(ri, fab,
+			error = xrep_findroot_block(ri, fab,
 					rec->rm_owner, rec->rm_startblock + b,
 					&found_it);
 			if (error)
@@ -980,15 +980,15 @@ xfs_repair_findroot_rmap(
 
 /* Find the roots of the per-AG btrees described in btree_info. */
 int
-xfs_repair_find_ag_btree_roots(
+xrep_find_ag_btree_roots(
 	struct xfs_scrub_context	*sc,
 	struct xfs_buf			*agf_bp,
-	struct xfs_repair_find_ag_btree	*btree_info,
+	struct xrep_find_ag_btree	*btree_info,
 	struct xfs_buf			*agfl_bp)
 {
 	struct xfs_mount		*mp = sc->mp;
-	struct xfs_repair_findroot	ri;
-	struct xfs_repair_find_ag_btree	*fab;
+	struct xrep_findroot		ri;
+	struct xrep_find_ag_btree	*fab;
 	struct xfs_btree_cur		*cur;
 	int				error;
 
@@ -1007,7 +1007,7 @@ xfs_repair_find_ag_btree_roots(
 	}
 
 	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
-	error = xfs_rmap_query_all(cur, xfs_repair_findroot_rmap, &ri);
+	error = xfs_rmap_query_all(cur, xrep_findroot_rmap, &ri);
 	xfs_btree_del_cursor(cur, error);
 
 	return error;
@@ -1015,7 +1015,7 @@ xfs_repair_find_ag_btree_roots(
 
 /* Force a quotacheck the next time we mount. */
 void
-xfs_repair_force_quotacheck(
+xrep_force_quotacheck(
 	struct xfs_scrub_context	*sc,
 	uint				dqtype)
 {
@@ -1043,7 +1043,7 @@ xfs_repair_force_quotacheck(
  * repair corruptions in the quota metadata.
  */
 int
-xfs_repair_ino_dqattach(
+xrep_ino_dqattach(
 	struct xfs_scrub_context	*sc)
 {
 	int				error;
@@ -1057,11 +1057,11 @@ xfs_repair_ino_dqattach(
 "inode %llu repair encountered quota error %d, quotacheck forced.",
 				(unsigned long long)sc->ip->i_ino, error);
 		if (XFS_IS_UQUOTA_ON(sc->mp) && !sc->ip->i_udquot)
-			xfs_repair_force_quotacheck(sc, XFS_DQ_USER);
+			xrep_force_quotacheck(sc, XFS_DQ_USER);
 		if (XFS_IS_GQUOTA_ON(sc->mp) && !sc->ip->i_gdquot)
-			xfs_repair_force_quotacheck(sc, XFS_DQ_GROUP);
+			xrep_force_quotacheck(sc, XFS_DQ_GROUP);
 		if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot)
-			xfs_repair_force_quotacheck(sc, XFS_DQ_PROJ);
+			xrep_force_quotacheck(sc, XFS_DQ_PROJ);
 		/* fall through */
 	case -ESRCH:
 		error = 0;
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index ef47826b6725..60d81294797b 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -6,7 +6,7 @@
 #ifndef __XFS_SCRUB_REPAIR_H__
 #define __XFS_SCRUB_REPAIR_H__
 
-static inline int xfs_repair_notsupported(struct xfs_scrub_context *sc)
+static inline int xrep_notsupported(struct xfs_scrub_context *sc)
 {
 	return -EOPNOTSUPP;
 }
@@ -15,55 +15,55 @@ static inline int xfs_repair_notsupported(struct xfs_scrub_context *sc)
 
 /* Repair helpers */
 
-int xfs_repair_attempt(struct xfs_inode *ip, struct xfs_scrub_context *sc,
+int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub_context *sc,
 		bool *fixed);
-void xfs_repair_failure(struct xfs_mount *mp);
-int xfs_repair_roll_ag_trans(struct xfs_scrub_context *sc);
-bool xfs_repair_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
+void xrep_failure(struct xfs_mount *mp);
+int xrep_roll_ag_trans(struct xfs_scrub_context *sc);
+bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
 		enum xfs_ag_resv_type type);
-xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc);
-int xfs_repair_alloc_ag_block(struct xfs_scrub_context *sc,
+xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub_context *sc);
+int xrep_alloc_ag_block(struct xfs_scrub_context *sc,
 		struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
 		enum xfs_ag_resv_type resv);
-int xfs_repair_init_btblock(struct xfs_scrub_context *sc, xfs_fsblock_t fsb,
+int xrep_init_btblock(struct xfs_scrub_context *sc, xfs_fsblock_t fsb,
 		struct xfs_buf **bpp, xfs_btnum_t btnum,
 		const struct xfs_buf_ops *ops);
 
-struct xfs_repair_extent {
+struct xrep_extent {
 	struct list_head		list;
 	xfs_fsblock_t			fsbno;
 	xfs_extlen_t			len;
 };
 
-struct xfs_repair_extent_list {
+struct xrep_extent_list {
 	struct list_head		list;
 };
 
 static inline void
-xfs_repair_init_extent_list(
-	struct xfs_repair_extent_list	*exlist)
+xrep_init_extent_list(
+	struct xrep_extent_list		*exlist)
 {
 	INIT_LIST_HEAD(&exlist->list);
 }
 
-#define for_each_xfs_repair_extent_safe(rbe, n, exlist) \
+#define for_each_xrep_extent_safe(rbe, n, exlist) \
 	list_for_each_entry_safe((rbe), (n), &(exlist)->list, list)
-int xfs_repair_collect_btree_extent(struct xfs_scrub_context *sc,
-		struct xfs_repair_extent_list *btlist, xfs_fsblock_t fsbno,
+int xrep_collect_btree_extent(struct xfs_scrub_context *sc,
+		struct xrep_extent_list *btlist, xfs_fsblock_t fsbno,
 		xfs_extlen_t len);
-void xfs_repair_cancel_btree_extents(struct xfs_scrub_context *sc,
-		struct xfs_repair_extent_list *btlist);
-int xfs_repair_subtract_extents(struct xfs_scrub_context *sc,
-		struct xfs_repair_extent_list *exlist,
-		struct xfs_repair_extent_list *sublist);
-int xfs_repair_fix_freelist(struct xfs_scrub_context *sc, bool can_shrink);
-int xfs_repair_invalidate_blocks(struct xfs_scrub_context *sc,
-		struct xfs_repair_extent_list *btlist);
-int xfs_repair_reap_btree_extents(struct xfs_scrub_context *sc,
-		struct xfs_repair_extent_list *exlist,
+void xrep_cancel_btree_extents(struct xfs_scrub_context *sc,
+		struct xrep_extent_list *btlist);
+int xrep_subtract_extents(struct xfs_scrub_context *sc,
+		struct xrep_extent_list *exlist,
+		struct xrep_extent_list *sublist);
+int xrep_fix_freelist(struct xfs_scrub_context *sc, bool can_shrink);
+int xrep_invalidate_blocks(struct xfs_scrub_context *sc,
+		struct xrep_extent_list *btlist);
+int xrep_reap_btree_extents(struct xfs_scrub_context *sc,
+		struct xrep_extent_list *exlist,
 		struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
 
-struct xfs_repair_find_ag_btree {
+struct xrep_find_ag_btree {
 	/* in: rmap owner of the btree we're looking for */
 	uint64_t			rmap_owner;
 
@@ -78,21 +78,21 @@ struct xfs_repair_find_ag_btree {
 	unsigned int			height;
 };
 
-int xfs_repair_find_ag_btree_roots(struct xfs_scrub_context *sc,
+int xrep_find_ag_btree_roots(struct xfs_scrub_context *sc,
 		struct xfs_buf *agf_bp,
-		struct xfs_repair_find_ag_btree *btree_info,
+		struct xrep_find_ag_btree *btree_info,
 		struct xfs_buf *agfl_bp);
-void xfs_repair_force_quotacheck(struct xfs_scrub_context *sc, uint dqtype);
-int xfs_repair_ino_dqattach(struct xfs_scrub_context *sc);
+void xrep_force_quotacheck(struct xfs_scrub_context *sc, uint dqtype);
+int xrep_ino_dqattach(struct xfs_scrub_context *sc);
 
 /* Metadata repairers */
 
-int xfs_repair_probe(struct xfs_scrub_context *sc);
-int xfs_repair_superblock(struct xfs_scrub_context *sc);
+int xrep_probe(struct xfs_scrub_context *sc);
+int xrep_superblock(struct xfs_scrub_context *sc);
 
 #else
 
-static inline int xfs_repair_attempt(
+static inline int xrep_attempt(
 	struct xfs_inode		*ip,
 	struct xfs_scrub_context	*sc,
 	bool				*fixed)
@@ -100,18 +100,18 @@ static inline int xfs_repair_attempt(
 	return -EOPNOTSUPP;
 }
 
-static inline void xfs_repair_failure(struct xfs_mount *mp) {}
+static inline void xrep_failure(struct xfs_mount *mp) {}
 
 static inline xfs_extlen_t
-xfs_repair_calc_ag_resblks(
+xrep_calc_ag_resblks(
 	struct xfs_scrub_context	*sc)
 {
 	ASSERT(!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR));
 	return 0;
 }
 
-#define xfs_repair_probe		xfs_repair_notsupported
-#define xfs_repair_superblock		xfs_repair_notsupported
+#define xrep_probe			xrep_notsupported
+#define xrep_superblock			xrep_notsupported
 
 #endif /* CONFIG_XFS_ONLINE_REPAIR */
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index ead97ab91a3c..a6efede6e430 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -202,150 +202,150 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
 		.type	= ST_NONE,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_probe,
-		.repair = xfs_repair_probe,
+		.repair = xrep_probe,
 	},
 	[XFS_SCRUB_TYPE_SB] = {		/* superblock */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_superblock,
-		.repair	= xfs_repair_superblock,
+		.repair	= xrep_superblock,
 	},
 	[XFS_SCRUB_TYPE_AGF] = {	/* agf */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_agf,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_agfl,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_AGI] = {	/* agi */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_agi,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BNOBT] = {	/* bnobt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_allocbt,
 		.scrub	= xchk_bnobt,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_CNTBT] = {	/* cntbt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_allocbt,
 		.scrub	= xchk_cntbt,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_INOBT] = {	/* inobt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_iallocbt,
 		.scrub	= xchk_inobt,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_FINOBT] = {	/* finobt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_iallocbt,
 		.scrub	= xchk_finobt,
 		.has	= xfs_sb_version_hasfinobt,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RMAPBT] = {	/* rmapbt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_rmapbt,
 		.scrub	= xchk_rmapbt,
 		.has	= xfs_sb_version_hasrmapbt,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_REFCNTBT] = {	/* refcountbt */
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_ag_refcountbt,
 		.scrub	= xchk_refcountbt,
 		.has	= xfs_sb_version_hasreflink,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_INODE] = {	/* inode record */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_inode,
 		.scrub	= xchk_inode,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTD] = {	/* inode data fork */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_inode_bmap,
 		.scrub	= xchk_bmap_data,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTA] = {	/* inode attr fork */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_inode_bmap,
 		.scrub	= xchk_bmap_attr,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTC] = {	/* inode CoW fork */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_inode_bmap,
 		.scrub	= xchk_bmap_cow,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_DIR] = {	/* directory */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_directory,
 		.scrub	= xchk_directory,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_XATTR] = {	/* extended attributes */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_xattr,
 		.scrub	= xchk_xattr,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_SYMLINK] = {	/* symbolic link */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_symlink,
 		.scrub	= xchk_symlink,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_PARENT] = {	/* parent pointers */
 		.type	= ST_INODE,
 		.setup	= xchk_setup_parent,
 		.scrub	= xchk_parent,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RTBITMAP] = {	/* realtime bitmap */
 		.type	= ST_FS,
 		.setup	= xchk_setup_rt,
 		.scrub	= xchk_rtbitmap,
 		.has	= xfs_sb_version_hasrealtime,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RTSUM] = {	/* realtime summary */
 		.type	= ST_FS,
 		.setup	= xchk_setup_rt,
 		.scrub	= xchk_rtsummary,
 		.has	= xfs_sb_version_hasrealtime,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_UQUOTA] = {	/* user quota */
 		.type	= ST_FS,
 		.setup	= xchk_setup_quota,
 		.scrub	= xchk_quota,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_GQUOTA] = {	/* group quota */
 		.type	= ST_FS,
 		.setup	= xchk_setup_quota,
 		.scrub	= xchk_quota,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 	[XFS_SCRUB_TYPE_PQUOTA] = {	/* project quota */
 		.type	= ST_FS,
 		.setup	= xchk_setup_quota,
 		.scrub	= xchk_quota,
-		.repair	= xfs_repair_notsupported,
+		.repair	= xrep_notsupported,
 	},
 };
 
@@ -457,7 +457,7 @@ static inline void xchk_postmortem(struct xfs_scrub_context *sc)
 	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
 	    (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
 				 XFS_SCRUB_OFLAG_XCORRUPT)))
-		xfs_repair_failure(sc->mp);
+		xrep_failure(sc->mp);
 }
 #else
 static inline void xchk_postmortem(struct xfs_scrub_context *sc)
@@ -555,13 +555,13 @@ retry_op:
 		 * If it's broken, userspace wants us to fix it, and we haven't
 		 * already tried to fix it, then attempt a repair.
 		 */
-		error = xfs_repair_attempt(ip, &sc, &already_fixed);
+		error = xrep_attempt(ip, &sc, &already_fixed);
 		if (error == -EAGAIN) {
 			if (sc.try_harder)
 				try_harder = true;
 			error = xchk_teardown(&sc, ip, 0);
 			if (error) {
-				xfs_repair_failure(mp);
+				xrep_failure(mp);
 				goto out;
 			}
 			goto retry_op;
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 11967d3942ac..96f3edda3e91 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -55,8 +55,8 @@ DEFINE_EVENT(xchk_class, name, \
 DEFINE_SCRUB_EVENT(xchk_start);
 DEFINE_SCRUB_EVENT(xchk_done);
 DEFINE_SCRUB_EVENT(xchk_deadlock_retry);
-DEFINE_SCRUB_EVENT(xfs_repair_attempt);
-DEFINE_SCRUB_EVENT(xfs_repair_done);
+DEFINE_SCRUB_EVENT(xrep_attempt);
+DEFINE_SCRUB_EVENT(xrep_done);
 
 TRACE_EVENT(xchk_op_error,
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
@@ -483,7 +483,7 @@ TRACE_EVENT(xchk_xref_error,
 /* repair tracepoints */
 #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
 
-DECLARE_EVENT_CLASS(xfs_repair_extent_class,
+DECLARE_EVENT_CLASS(xrep_extent_class,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
 		 xfs_agblock_t agbno, xfs_extlen_t len),
 	TP_ARGS(mp, agno, agbno, len),
@@ -506,15 +506,15 @@ DECLARE_EVENT_CLASS(xfs_repair_extent_class,
 		  __entry->len)
 );
 #define DEFINE_REPAIR_EXTENT_EVENT(name) \
-DEFINE_EVENT(xfs_repair_extent_class, name, \
+DEFINE_EVENT(xrep_extent_class, name, \
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
 		 xfs_agblock_t agbno, xfs_extlen_t len), \
 	TP_ARGS(mp, agno, agbno, len))
-DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_dispose_btree_extent);
-DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_collect_btree_extent);
-DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_agfl_insert);
+DEFINE_REPAIR_EXTENT_EVENT(xrep_dispose_btree_extent);
+DEFINE_REPAIR_EXTENT_EVENT(xrep_collect_btree_extent);
+DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
 
-DECLARE_EVENT_CLASS(xfs_repair_rmap_class,
+DECLARE_EVENT_CLASS(xrep_rmap_class,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
 		 xfs_agblock_t agbno, xfs_extlen_t len,
 		 uint64_t owner, uint64_t offset, unsigned int flags),
@@ -547,17 +547,17 @@ DECLARE_EVENT_CLASS(xfs_repair_rmap_class,
 		  __entry->flags)
 );
 #define DEFINE_REPAIR_RMAP_EVENT(name) \
-DEFINE_EVENT(xfs_repair_rmap_class, name, \
+DEFINE_EVENT(xrep_rmap_class, name, \
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
 		 xfs_agblock_t agbno, xfs_extlen_t len, \
 		 uint64_t owner, uint64_t offset, unsigned int flags), \
 	TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
-DEFINE_REPAIR_RMAP_EVENT(xfs_repair_alloc_extent_fn);
-DEFINE_REPAIR_RMAP_EVENT(xfs_repair_ialloc_extent_fn);
-DEFINE_REPAIR_RMAP_EVENT(xfs_repair_rmap_extent_fn);
-DEFINE_REPAIR_RMAP_EVENT(xfs_repair_bmap_extent_fn);
+DEFINE_REPAIR_RMAP_EVENT(xrep_alloc_extent_fn);
+DEFINE_REPAIR_RMAP_EVENT(xrep_ialloc_extent_fn);
+DEFINE_REPAIR_RMAP_EVENT(xrep_rmap_extent_fn);
+DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_extent_fn);
 
-TRACE_EVENT(xfs_repair_refcount_extent_fn,
+TRACE_EVENT(xrep_refcount_extent_fn,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
 		 struct xfs_refcount_irec *irec),
 	TP_ARGS(mp, agno, irec),
@@ -583,7 +583,7 @@ TRACE_EVENT(xfs_repair_refcount_extent_fn,
 		  __entry->refcount)
 )
 
-TRACE_EVENT(xfs_repair_init_btblock,
+TRACE_EVENT(xrep_init_btblock,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
 		 xfs_btnum_t btnum),
 	TP_ARGS(mp, agno, agbno, btnum),
@@ -605,7 +605,7 @@ TRACE_EVENT(xfs_repair_init_btblock,
 		  __entry->agbno,
 		  __entry->btnum)
 )
-TRACE_EVENT(xfs_repair_findroot_block,
+TRACE_EVENT(xrep_findroot_block,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
 		 uint32_t magic, uint16_t level),
 	TP_ARGS(mp, agno, agbno, magic, level),
@@ -630,7 +630,7 @@ TRACE_EVENT(xfs_repair_findroot_block,
 		  __entry->magic,
 		  __entry->level)
 )
-TRACE_EVENT(xfs_repair_calc_ag_resblks,
+TRACE_EVENT(xrep_calc_ag_resblks,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
 		 xfs_agino_t icount, xfs_agblock_t aglen, xfs_agblock_t freelen,
 		 xfs_agblock_t usedlen),
@@ -659,7 +659,7 @@ TRACE_EVENT(xfs_repair_calc_ag_resblks,
 		  __entry->freelen,
 		  __entry->usedlen)
 )
-TRACE_EVENT(xfs_repair_calc_ag_resblks_btsize,
+TRACE_EVENT(xrep_calc_ag_resblks_btsize,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
 		 xfs_agblock_t bnobt_sz, xfs_agblock_t inobt_sz,
 		 xfs_agblock_t rmapbt_sz, xfs_agblock_t refcbt_sz),
@@ -688,7 +688,7 @@ TRACE_EVENT(xfs_repair_calc_ag_resblks_btsize,
 		  __entry->rmapbt_sz,
 		  __entry->refcbt_sz)
 )
-TRACE_EVENT(xfs_repair_reset_counters,
+TRACE_EVENT(xrep_reset_counters,
 	TP_PROTO(struct xfs_mount *mp),
 	TP_ARGS(mp),
 	TP_STRUCT__entry(
@@ -701,7 +701,7 @@ TRACE_EVENT(xfs_repair_reset_counters,
 		  MAJOR(__entry->dev), MINOR(__entry->dev))
 )
 
-TRACE_EVENT(xfs_repair_ialloc_insert,
+TRACE_EVENT(xrep_ialloc_insert,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
 		 xfs_agino_t startino, uint16_t holemask, uint8_t count,
 		 uint8_t freecount, uint64_t freemask),
-- 
cgit v1.2.3


From 1d8a748a8aa94a7da8f3d4fac1892037890d3cff Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:29:12 -0700
Subject: xfs: shorten struct xfs_scrub_context to struct xfs_scrub

Shorten the name of the online fsck context structure.  Whitespace
damage will be fixed by a subsequent patch.  There are no functional
changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/agheader.c        | 32 +++++++--------
 fs/xfs/scrub/agheader_repair.c |  2 +-
 fs/xfs/scrub/alloc.c           | 14 +++----
 fs/xfs/scrub/attr.c            | 10 ++---
 fs/xfs/scrub/bmap.c            | 22 +++++------
 fs/xfs/scrub/btree.c           | 14 +++----
 fs/xfs/scrub/btree.h           | 12 +++---
 fs/xfs/scrub/common.c          | 62 ++++++++++++++---------------
 fs/xfs/scrub/common.h          | 88 +++++++++++++++++++++---------------------
 fs/xfs/scrub/dabtree.c         |  6 +--
 fs/xfs/scrub/dabtree.h         |  4 +-
 fs/xfs/scrub/dir.c             | 18 ++++-----
 fs/xfs/scrub/ialloc.c          | 22 +++++------
 fs/xfs/scrub/inode.c           | 22 +++++------
 fs/xfs/scrub/parent.c          |  8 ++--
 fs/xfs/scrub/quota.c           | 12 +++---
 fs/xfs/scrub/refcount.c        | 16 ++++----
 fs/xfs/scrub/repair.c          | 36 ++++++++---------
 fs/xfs/scrub/repair.h          | 38 +++++++++---------
 fs/xfs/scrub/rmap.c            | 16 ++++----
 fs/xfs/scrub/rtbitmap.c        | 10 ++---
 fs/xfs/scrub/scrub.c           | 10 ++---
 fs/xfs/scrub/scrub.h           | 78 ++++++++++++++++++-------------------
 fs/xfs/scrub/symlink.c         |  4 +-
 fs/xfs/scrub/trace.h           | 32 +++++++--------
 25 files changed, 294 insertions(+), 294 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index c0625ec16d63..14ba4189ae8f 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -29,7 +29,7 @@
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_superblock_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_buf			*bp)
 {
 	struct xfs_owner_info		oinfo;
@@ -66,7 +66,7 @@ xchk_superblock_xref(
  */
 int
 xchk_superblock(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_buf			*bp;
@@ -365,7 +365,7 @@ xchk_agf_record_bno_lengths(
 /* Check agf_freeblks */
 static inline void
 xchk_agf_xref_freeblks(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 	xfs_extlen_t			blocks = 0;
@@ -385,7 +385,7 @@ xchk_agf_xref_freeblks(
 /* Cross reference the AGF with the cntbt (freespace by length btree) */
 static inline void
 xchk_agf_xref_cntbt(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 	xfs_agblock_t			agbno;
@@ -417,7 +417,7 @@ xchk_agf_xref_cntbt(
 /* Check the btree block counts in the AGF against the btrees. */
 STATIC void
 xchk_agf_xref_btreeblks(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 	struct xfs_mount		*mp = sc->mp;
@@ -463,7 +463,7 @@ xchk_agf_xref_btreeblks(
 /* Check agf_refcount_blocks against tree size */
 static inline void
 xchk_agf_xref_refcblks(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 	xfs_agblock_t			blocks;
@@ -482,7 +482,7 @@ xchk_agf_xref_refcblks(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_agf_xref(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_owner_info		oinfo;
 	struct xfs_mount		*mp = sc->mp;
@@ -514,7 +514,7 @@ xchk_agf_xref(
 /* Scrub the AGF. */
 int
 xchk_agf(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_agf			*agf;
@@ -602,13 +602,13 @@ struct xchk_agfl_info {
 	unsigned int			sz_entries;
 	unsigned int			nr_entries;
 	xfs_agblock_t			*entries;
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 };
 
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_agfl_block_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	struct xfs_owner_info		*oinfo)
 {
@@ -629,7 +629,7 @@ xchk_agfl_block(
 	void				*priv)
 {
 	struct xchk_agfl_info		*sai = priv;
-	struct xfs_scrub_context	*sc = sai->sc;
+	struct xfs_scrub	*sc = sai->sc;
 	xfs_agnumber_t			agno = sc->sa.agno;
 
 	if (xfs_verify_agbno(mp, agno, agbno) &&
@@ -660,7 +660,7 @@ xchk_agblock_cmp(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_agfl_xref(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_owner_info		oinfo;
 	struct xfs_mount		*mp = sc->mp;
@@ -691,7 +691,7 @@ xchk_agfl_xref(
 /* Scrub the AGFL. */
 int
 xchk_agfl(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xchk_agfl_info		sai;
 	struct xfs_agf			*agf;
@@ -768,7 +768,7 @@ out:
 /* Check agi_count/agi_freecount */
 static inline void
 xchk_agi_xref_icounts(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_agi			*agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
 	xfs_agino_t			icount;
@@ -789,7 +789,7 @@ xchk_agi_xref_icounts(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_agi_xref(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_owner_info		oinfo;
 	struct xfs_mount		*mp = sc->mp;
@@ -818,7 +818,7 @@ xchk_agi_xref(
 /* Scrub the AGI. */
 int
 xchk_agi(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_agi			*agi;
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 631940f3ca4d..2457968482f8 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -29,7 +29,7 @@
 /* Repair the superblock. */
 int
 xrep_superblock(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_buf			*bp;
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 1f6e3a6a1fdd..653d80b3aa39 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -29,7 +29,7 @@
  */
 int
 xchk_setup_ag_allocbt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, false);
@@ -42,7 +42,7 @@ xchk_setup_ag_allocbt(
  */
 STATIC void
 xchk_allocbt_xref_other(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
@@ -82,7 +82,7 @@ xchk_allocbt_xref_other(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_allocbt_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
@@ -123,7 +123,7 @@ xchk_allocbt_rec(
 /* Scrub the freespace btrees for some AG. */
 STATIC int
 xchk_allocbt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_btnum_t			which)
 {
 	struct xfs_owner_info		oinfo;
@@ -136,14 +136,14 @@ xchk_allocbt(
 
 int
 xchk_bnobt(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	return xchk_allocbt(sc, XFS_BTNUM_BNO);
 }
 
 int
 xchk_cntbt(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	return xchk_allocbt(sc, XFS_BTNUM_CNT);
 }
@@ -151,7 +151,7 @@ xchk_cntbt(
 /* xref check that the extent is not free */
 void
 xchk_xref_is_used_space(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 0068bebddf3e..6650fb3010b6 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -33,7 +33,7 @@
 /* Set us up to scrub an inode's extended attributes. */
 int
 xchk_setup_xattr(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	size_t				sz;
@@ -57,7 +57,7 @@ xchk_setup_xattr(
 
 struct xchk_xattr {
 	struct xfs_attr_list_context	context;
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 };
 
 /*
@@ -127,7 +127,7 @@ fail_xref:
  */
 STATIC bool
 xchk_xattr_set_map(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	unsigned long			*map,
 	unsigned int			start,
 	unsigned int			len)
@@ -155,7 +155,7 @@ xchk_xattr_set_map(
  */
 STATIC bool
 xchk_xattr_check_freemap(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	unsigned long			*map,
 	struct xfs_attr3_icleaf_hdr	*leafhdr)
 {
@@ -405,7 +405,7 @@ out:
 /* Scrub the extended attribute metadata. */
 int
 xchk_xattr(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xchk_xattr		sx;
 	struct attrlist_cursor_kern	cursor = { 0 };
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 19cfbd3910a2..0e5166232b15 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -34,7 +34,7 @@
 /* Set us up with an inode's bmap. */
 int
 xchk_setup_inode_bmap(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	int				error;
@@ -79,7 +79,7 @@ out:
  */
 
 struct xchk_bmap_info {
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 	xfs_fileoff_t			lastoff;
 	bool				is_rt;
 	bool				is_shared;
@@ -391,7 +391,7 @@ xchk_bmapbt_rec(
 /* Scan the btree records. */
 STATIC int
 xchk_bmap_btree(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	struct xchk_bmap_info		*info)
 {
@@ -409,7 +409,7 @@ xchk_bmap_btree(
 }
 
 struct xchk_bmap_check_rmap_info {
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 	int				whichfork;
 	struct xfs_iext_cursor		icur;
 };
@@ -424,7 +424,7 @@ xchk_bmap_check_rmap(
 	struct xfs_bmbt_irec		irec;
 	struct xchk_bmap_check_rmap_info	*sbcri = priv;
 	struct xfs_ifork		*ifp;
-	struct xfs_scrub_context	*sc = sbcri->sc;
+	struct xfs_scrub	*sc = sbcri->sc;
 	bool				have_map;
 
 	/* Is this even the right fork? */
@@ -488,7 +488,7 @@ out:
 /* Make sure each rmap has a corresponding bmbt entry. */
 STATIC int
 xchk_bmap_check_ag_rmaps(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xfs_agnumber_t			agno)
 {
@@ -522,7 +522,7 @@ out_agf:
 /* Make sure each rmap has a corresponding bmbt entry. */
 STATIC int
 xchk_bmap_check_rmaps(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork)
 {
 	loff_t				size;
@@ -579,7 +579,7 @@ xchk_bmap_check_rmaps(
  */
 STATIC int
 xchk_bmap(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork)
 {
 	struct xfs_bmbt_irec		irec;
@@ -694,7 +694,7 @@ out:
 /* Scrub an inode's data fork. */
 int
 xchk_bmap_data(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	return xchk_bmap(sc, XFS_DATA_FORK);
 }
@@ -702,7 +702,7 @@ xchk_bmap_data(
 /* Scrub an inode's attr fork. */
 int
 xchk_bmap_attr(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	return xchk_bmap(sc, XFS_ATTR_FORK);
 }
@@ -710,7 +710,7 @@ xchk_bmap_attr(
 /* Scrub an inode's CoW fork. */
 int
 xchk_bmap_cow(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	if (!xfs_is_reflink_inode(sc->ip))
 		return -ENOENT;
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 30fe9a147959..c4e1dce8c5b3 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -30,7 +30,7 @@
  */
 static bool
 __xchk_btree_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
 	int				*error,
@@ -65,7 +65,7 @@ __xchk_btree_process_error(
 
 bool
 xchk_btree_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
 	int				*error)
@@ -76,7 +76,7 @@ xchk_btree_process_error(
 
 bool
 xchk_btree_xref_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
 	int				*error)
@@ -88,7 +88,7 @@ xchk_btree_xref_process_error(
 /* Record btree block corruption. */
 static void
 __xchk_btree_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level,
 	__u32				errflag,
@@ -106,7 +106,7 @@ __xchk_btree_set_corrupt(
 
 void
 xchk_btree_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level)
 {
@@ -116,7 +116,7 @@ xchk_btree_set_corrupt(
 
 void
 xchk_btree_xref_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_btree_cur		*cur,
 	int				level)
 {
@@ -583,7 +583,7 @@ xchk_btree_block_keys(
  */
 int
 xchk_btree(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_btree_cur		*cur,
 	xchk_btree_rec_fn		scrub_fn,
 	struct xfs_owner_info		*oinfo,
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index 598ac04a6c3e..a0b74b515b9b 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -9,20 +9,20 @@
 /* btree scrub */
 
 /* Check for btree operation errors. */
-bool xchk_btree_process_error(struct xfs_scrub_context *sc,
+bool xchk_btree_process_error(struct xfs_scrub *sc,
 		struct xfs_btree_cur *cur, int level, int *error);
 
 /* Check for btree xref operation errors. */
-bool xchk_btree_xref_process_error(struct xfs_scrub_context *sc,
+bool xchk_btree_xref_process_error(struct xfs_scrub *sc,
 				struct xfs_btree_cur *cur, int level,
 				int *error);
 
 /* Check for btree corruption. */
-void xchk_btree_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_btree_set_corrupt(struct xfs_scrub *sc,
 		struct xfs_btree_cur *cur, int level);
 
 /* Check for btree xref discrepancies. */
-void xchk_btree_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_btree_xref_set_corrupt(struct xfs_scrub *sc,
 		struct xfs_btree_cur *cur, int level);
 
 struct xchk_btree;
@@ -32,7 +32,7 @@ typedef int (*xchk_btree_rec_fn)(
 
 struct xchk_btree {
 	/* caller-provided scrub state */
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 	struct xfs_btree_cur		*cur;
 	xchk_btree_rec_fn		scrub_rec;
 	struct xfs_owner_info		*oinfo;
@@ -45,7 +45,7 @@ struct xchk_btree {
 	bool				firstkey[XFS_BTREE_MAXLEVELS];
 	struct list_head		to_check;
 };
-int xchk_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
 		    xchk_btree_rec_fn scrub_fn,
 		    struct xfs_owner_info *oinfo, void *private);
 
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index d40bba9e7596..ed9195116556 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -69,7 +69,7 @@
 /* Check for operational errors. */
 static bool
 __xchk_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agnumber_t			agno,
 	xfs_agblock_t			bno,
 	int				*error,
@@ -99,7 +99,7 @@ __xchk_process_error(
 
 bool
 xchk_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agnumber_t			agno,
 	xfs_agblock_t			bno,
 	int				*error)
@@ -110,7 +110,7 @@ xchk_process_error(
 
 bool
 xchk_xref_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agnumber_t			agno,
 	xfs_agblock_t			bno,
 	int				*error)
@@ -122,7 +122,7 @@ xchk_xref_process_error(
 /* Check for operational errors for a file offset. */
 static bool
 __xchk_fblock_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset,
 	int				*error,
@@ -152,7 +152,7 @@ __xchk_fblock_process_error(
 
 bool
 xchk_fblock_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset,
 	int				*error)
@@ -163,7 +163,7 @@ xchk_fblock_process_error(
 
 bool
 xchk_fblock_xref_process_error(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset,
 	int				*error)
@@ -187,7 +187,7 @@ xchk_fblock_xref_process_error(
 /* Record a block which could be optimized. */
 void
 xchk_block_set_preen(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_buf			*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
@@ -201,7 +201,7 @@ xchk_block_set_preen(
  */
 void
 xchk_ino_set_preen(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
@@ -211,7 +211,7 @@ xchk_ino_set_preen(
 /* Record a corrupt block. */
 void
 xchk_block_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_buf			*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
@@ -221,7 +221,7 @@ xchk_block_set_corrupt(
 /* Record a corruption while cross-referencing. */
 void
 xchk_block_xref_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_buf			*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
@@ -235,7 +235,7 @@ xchk_block_xref_set_corrupt(
  */
 void
 xchk_ino_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
@@ -245,7 +245,7 @@ xchk_ino_set_corrupt(
 /* Record a corruption while cross-referencing with an inode. */
 void
 xchk_ino_xref_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
@@ -255,7 +255,7 @@ xchk_ino_xref_set_corrupt(
 /* Record corruption in a block indexed by a file fork. */
 void
 xchk_fblock_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset)
 {
@@ -266,7 +266,7 @@ xchk_fblock_set_corrupt(
 /* Record a corruption while cross-referencing a fork block. */
 void
 xchk_fblock_xref_set_corrupt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset)
 {
@@ -280,7 +280,7 @@ xchk_fblock_xref_set_corrupt(
  */
 void
 xchk_ino_set_warning(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
@@ -290,7 +290,7 @@ xchk_ino_set_warning(
 /* Warn about a block indexed by a file fork that needs review. */
 void
 xchk_fblock_set_warning(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xfs_fileoff_t			offset)
 {
@@ -301,7 +301,7 @@ xchk_fblock_set_warning(
 /* Signal an incomplete scrub. */
 void
 xchk_set_incomplete(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
 	trace_xchk_incomplete(sc, __return_address);
@@ -345,7 +345,7 @@ xchk_count_rmap_ownedby_irec(
  */
 int
 xchk_count_rmap_ownedby_ag(
-	struct xfs_scrub_context		*sc,
+	struct xfs_scrub		*sc,
 	struct xfs_btree_cur			*cur,
 	struct xfs_owner_info			*oinfo,
 	xfs_filblks_t				*blocks)
@@ -371,7 +371,7 @@ xchk_count_rmap_ownedby_ag(
 /* Decide if we want to return an AG header read failure. */
 static inline bool
 want_ag_read_header_failure(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	unsigned int			type)
 {
 	/* Return all AG header read failures when scanning btrees. */
@@ -398,7 +398,7 @@ want_ag_read_header_failure(
  */
 int
 xchk_ag_read_headers(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agnumber_t			agno,
 	struct xfs_buf			**agi,
 	struct xfs_buf			**agf,
@@ -452,7 +452,7 @@ xchk_ag_btcur_free(
 /* Initialize all the btree cursors for an AG. */
 int
 xchk_ag_btcur_init(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xchk_ag		*sa)
 {
 	struct xfs_mount		*mp = sc->mp;
@@ -512,7 +512,7 @@ err:
 /* Release the AG header context and btree cursors. */
 void
 xchk_ag_free(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xchk_ag		*sa)
 {
 	xchk_ag_btcur_free(sa);
@@ -544,7 +544,7 @@ xchk_ag_free(
  */
 int
 xchk_ag_init(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agnumber_t			agno,
 	struct xchk_ag		*sa)
 {
@@ -586,7 +586,7 @@ xchk_perag_get(
  */
 int
 xchk_trans_alloc(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	uint				resblks)
 {
 	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
@@ -599,7 +599,7 @@ xchk_trans_alloc(
 /* Set us up with a transaction and an empty context. */
 int
 xchk_setup_fs(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	uint				resblks;
@@ -611,7 +611,7 @@ xchk_setup_fs(
 /* Set us up with AG headers and btree cursors. */
 int
 xchk_setup_ag_btree(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip,
 	bool				force_log)
 {
@@ -658,7 +658,7 @@ xchk_checkpoint_log(
  */
 int
 xchk_get_inode(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip_in)
 {
 	struct xfs_imap			imap;
@@ -722,7 +722,7 @@ xchk_get_inode(
 /* Set us up to scrub a file's contents. */
 int
 xchk_setup_inode_contents(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip,
 	unsigned int			resblks)
 {
@@ -753,7 +753,7 @@ out:
  */
 bool
 xchk_should_check_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				*error,
 	struct xfs_btree_cur		**curpp)
 {
@@ -788,7 +788,7 @@ xchk_should_check_xref(
 /* Run the structure verifiers on in-memory buffers to detect bad memory. */
 void
 xchk_buffer_recheck(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_buf			*bp)
 {
 	xfs_failaddr_t			fa;
@@ -814,7 +814,7 @@ xchk_buffer_recheck(
  */
 int
 xchk_metadata_inode_forks(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	__u32				smtype;
 	bool				shared;
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 5881cb2ecc26..c321230d32dc 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -13,7 +13,7 @@
  */
 static inline bool
 xchk_should_terminate(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				*error)
 {
 	if (fatal_signal_pending(current)) {
@@ -24,109 +24,109 @@ xchk_should_terminate(
 	return false;
 }
 
-int xchk_trans_alloc(struct xfs_scrub_context *sc, uint resblks);
-bool xchk_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
+bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno,
 		xfs_agblock_t bno, int *error);
-bool xchk_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
+bool xchk_fblock_process_error(struct xfs_scrub *sc, int whichfork,
 		xfs_fileoff_t offset, int *error);
 
-bool xchk_xref_process_error(struct xfs_scrub_context *sc,
+bool xchk_xref_process_error(struct xfs_scrub *sc,
 		xfs_agnumber_t agno, xfs_agblock_t bno, int *error);
-bool xchk_fblock_xref_process_error(struct xfs_scrub_context *sc,
+bool xchk_fblock_xref_process_error(struct xfs_scrub *sc,
 		int whichfork, xfs_fileoff_t offset, int *error);
 
-void xchk_block_set_preen(struct xfs_scrub_context *sc,
+void xchk_block_set_preen(struct xfs_scrub *sc,
 		struct xfs_buf *bp);
-void xchk_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino);
+void xchk_ino_set_preen(struct xfs_scrub *sc, xfs_ino_t ino);
 
-void xchk_block_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_block_set_corrupt(struct xfs_scrub *sc,
 		struct xfs_buf *bp);
-void xchk_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino);
-void xchk_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
+void xchk_ino_set_corrupt(struct xfs_scrub *sc, xfs_ino_t ino);
+void xchk_fblock_set_corrupt(struct xfs_scrub *sc, int whichfork,
 		xfs_fileoff_t offset);
 
-void xchk_block_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_block_xref_set_corrupt(struct xfs_scrub *sc,
 		struct xfs_buf *bp);
-void xchk_ino_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_ino_xref_set_corrupt(struct xfs_scrub *sc,
 		xfs_ino_t ino);
-void xchk_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
+void xchk_fblock_xref_set_corrupt(struct xfs_scrub *sc,
 		int whichfork, xfs_fileoff_t offset);
 
-void xchk_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino);
-void xchk_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
+void xchk_ino_set_warning(struct xfs_scrub *sc, xfs_ino_t ino);
+void xchk_fblock_set_warning(struct xfs_scrub *sc, int whichfork,
 		xfs_fileoff_t offset);
 
-void xchk_set_incomplete(struct xfs_scrub_context *sc);
+void xchk_set_incomplete(struct xfs_scrub *sc);
 int xchk_checkpoint_log(struct xfs_mount *mp);
 
 /* Are we set up for a cross-referencing check? */
-bool xchk_should_check_xref(struct xfs_scrub_context *sc, int *error,
+bool xchk_should_check_xref(struct xfs_scrub *sc, int *error,
 			   struct xfs_btree_cur **curpp);
 
 /* Setup functions */
-int xchk_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
-int xchk_setup_ag_allocbt(struct xfs_scrub_context *sc,
+int xchk_setup_fs(struct xfs_scrub *sc, struct xfs_inode *ip);
+int xchk_setup_ag_allocbt(struct xfs_scrub *sc,
 			       struct xfs_inode *ip);
-int xchk_setup_ag_iallocbt(struct xfs_scrub_context *sc,
+int xchk_setup_ag_iallocbt(struct xfs_scrub *sc,
 				struct xfs_inode *ip);
-int xchk_setup_ag_rmapbt(struct xfs_scrub_context *sc,
+int xchk_setup_ag_rmapbt(struct xfs_scrub *sc,
 			      struct xfs_inode *ip);
-int xchk_setup_ag_refcountbt(struct xfs_scrub_context *sc,
+int xchk_setup_ag_refcountbt(struct xfs_scrub *sc,
 				  struct xfs_inode *ip);
-int xchk_setup_inode(struct xfs_scrub_context *sc,
+int xchk_setup_inode(struct xfs_scrub *sc,
 			  struct xfs_inode *ip);
-int xchk_setup_inode_bmap(struct xfs_scrub_context *sc,
+int xchk_setup_inode_bmap(struct xfs_scrub *sc,
 			       struct xfs_inode *ip);
-int xchk_setup_inode_bmap_data(struct xfs_scrub_context *sc,
+int xchk_setup_inode_bmap_data(struct xfs_scrub *sc,
 				    struct xfs_inode *ip);
-int xchk_setup_directory(struct xfs_scrub_context *sc,
+int xchk_setup_directory(struct xfs_scrub *sc,
 			      struct xfs_inode *ip);
-int xchk_setup_xattr(struct xfs_scrub_context *sc,
+int xchk_setup_xattr(struct xfs_scrub *sc,
 			  struct xfs_inode *ip);
-int xchk_setup_symlink(struct xfs_scrub_context *sc,
+int xchk_setup_symlink(struct xfs_scrub *sc,
 			    struct xfs_inode *ip);
-int xchk_setup_parent(struct xfs_scrub_context *sc,
+int xchk_setup_parent(struct xfs_scrub *sc,
 			   struct xfs_inode *ip);
 #ifdef CONFIG_XFS_RT
-int xchk_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip);
+int xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip);
 #else
 static inline int
-xchk_setup_rt(struct xfs_scrub_context *sc, struct xfs_inode *ip)
+xchk_setup_rt(struct xfs_scrub *sc, struct xfs_inode *ip)
 {
 	return -ENOENT;
 }
 #endif
 #ifdef CONFIG_XFS_QUOTA
-int xchk_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip);
+int xchk_setup_quota(struct xfs_scrub *sc, struct xfs_inode *ip);
 #else
 static inline int
-xchk_setup_quota(struct xfs_scrub_context *sc, struct xfs_inode *ip)
+xchk_setup_quota(struct xfs_scrub *sc, struct xfs_inode *ip)
 {
 	return -ENOENT;
 }
 #endif
 
-void xchk_ag_free(struct xfs_scrub_context *sc, struct xchk_ag *sa);
-int xchk_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
+int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
 		      struct xchk_ag *sa);
 void xchk_perag_get(struct xfs_mount *mp, struct xchk_ag *sa);
-int xchk_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno,
 			      struct xfs_buf **agi, struct xfs_buf **agf,
 			      struct xfs_buf **agfl);
 void xchk_ag_btcur_free(struct xchk_ag *sa);
-int xchk_ag_btcur_init(struct xfs_scrub_context *sc,
+int xchk_ag_btcur_init(struct xfs_scrub *sc,
 			    struct xchk_ag *sa);
-int xchk_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
+int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc,
 				    struct xfs_btree_cur *cur,
 				    struct xfs_owner_info *oinfo,
 				    xfs_filblks_t *blocks);
 
-int xchk_setup_ag_btree(struct xfs_scrub_context *sc,
+int xchk_setup_ag_btree(struct xfs_scrub *sc,
 			     struct xfs_inode *ip, bool force_log);
-int xchk_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
-int xchk_setup_inode_contents(struct xfs_scrub_context *sc,
+int xchk_get_inode(struct xfs_scrub *sc, struct xfs_inode *ip_in);
+int xchk_setup_inode_contents(struct xfs_scrub *sc,
 				   struct xfs_inode *ip, unsigned int resblks);
-void xchk_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
+void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp);
 
 /*
  * Don't bother cross-referencing if we already found corruption or cross
@@ -138,7 +138,7 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
 			       XFS_SCRUB_OFLAG_XCORRUPT);
 }
 
-int xchk_metadata_inode_forks(struct xfs_scrub_context *sc);
+int xchk_metadata_inode_forks(struct xfs_scrub *sc);
 int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
 
 #endif	/* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index fee80f6ddfd7..7fc12d540ea6 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -40,7 +40,7 @@ xchk_da_process_error(
 	int				level,
 	int				*error)
 {
-	struct xfs_scrub_context	*sc = ds->sc;
+	struct xfs_scrub	*sc = ds->sc;
 
 	if (*error == 0)
 		return true;
@@ -75,7 +75,7 @@ xchk_da_set_corrupt(
 	struct xchk_da_btree		*ds,
 	int				level)
 {
-	struct xfs_scrub_context	*sc = ds->sc;
+	struct xfs_scrub	*sc = ds->sc;
 
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 
@@ -474,7 +474,7 @@ out_nobuf:
 /* Visit all nodes and leaves of a da btree. */
 int
 xchk_da_btree(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				whichfork,
 	xchk_da_btree_rec_fn	scrub_fn,
 	void				*private)
diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h
index 80e4af0e2589..a15c03389e8f 100644
--- a/fs/xfs/scrub/dabtree.h
+++ b/fs/xfs/scrub/dabtree.h
@@ -13,7 +13,7 @@ struct xchk_da_btree {
 	xfs_dahash_t			hashes[XFS_DA_NODE_MAXDEPTH];
 	int				maxrecs[XFS_DA_NODE_MAXDEPTH];
 	struct xfs_da_state		*state;
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 	void				*private;
 
 	/*
@@ -39,7 +39,7 @@ void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level);
 
 int xchk_da_btree_hash(struct xchk_da_btree *ds, int level,
 			    __be32 *hashp);
-int xchk_da_btree(struct xfs_scrub_context *sc, int whichfork,
+int xchk_da_btree(struct xfs_scrub *sc, int whichfork,
 		       xchk_da_btree_rec_fn scrub_fn, void *private);
 
 #endif /* __XFS_SCRUB_DABTREE_H__ */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 2ac07bb73478..194a3ef69a9f 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -32,7 +32,7 @@
 /* Set us up to scrub directories. */
 int
 xchk_setup_directory(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	return xchk_setup_inode_contents(sc, ip, 0);
@@ -46,7 +46,7 @@ struct xchk_dir_ctx {
 	/* VFS fill-directory iterator */
 	struct dir_context		dir_iter;
 
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 };
 
 /* Check that an inode's mode matches a given DT_ type. */
@@ -289,7 +289,7 @@ out:
  */
 STATIC void
 xchk_directory_check_free_entry(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_dablk_t			lblk,
 	struct xfs_dir2_data_free	*bf,
 	struct xfs_dir2_data_unused	*dup)
@@ -314,7 +314,7 @@ xchk_directory_check_free_entry(
 /* Check free space info in a directory data block. */
 STATIC int
 xchk_directory_data_bestfree(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_dablk_t			lblk,
 	bool				is_block)
 {
@@ -455,7 +455,7 @@ out:
  */
 STATIC void
 xchk_directory_check_freesp(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_dablk_t			lblk,
 	struct xfs_buf			*dbp,
 	unsigned int			len)
@@ -474,7 +474,7 @@ xchk_directory_check_freesp(
 /* Check free space info in a directory leaf1 block. */
 STATIC int
 xchk_directory_leaf1_bestfree(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_da_args		*args,
 	xfs_dablk_t			lblk)
 {
@@ -572,7 +572,7 @@ out:
 /* Check free space info in a directory freespace block. */
 STATIC int
 xchk_directory_free_bestfree(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_da_args		*args,
 	xfs_dablk_t			lblk)
 {
@@ -626,7 +626,7 @@ out:
 /* Check free space information in directories. */
 STATIC int
 xchk_directory_blocks(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_bmbt_irec		got;
 	struct xfs_da_args		args;
@@ -770,7 +770,7 @@ out:
 /* Scrub a whole directory. */
 int
 xchk_directory(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xchk_dir_ctx		sdc = {
 		.dir_iter.actor = xchk_dir_actor,
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 69d652b7299c..6df8eba9f52b 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -36,7 +36,7 @@
  */
 int
 xchk_setup_ag_iallocbt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, sc->try_harder);
@@ -51,7 +51,7 @@ xchk_setup_ag_iallocbt(
  */
 static inline void
 xchk_iallocbt_chunk_xref_other(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inobt_rec_incore	*irec,
 	xfs_agino_t			agino)
 {
@@ -76,7 +76,7 @@ xchk_iallocbt_chunk_xref_other(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_iallocbt_chunk_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inobt_rec_incore	*irec,
 	xfs_agino_t			agino,
 	xfs_agblock_t			agbno,
@@ -363,7 +363,7 @@ out:
  */
 STATIC void
 xchk_iallocbt_xref_rmap_btreeblks(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				which)
 {
 	struct xfs_owner_info		oinfo;
@@ -403,7 +403,7 @@ xchk_iallocbt_xref_rmap_btreeblks(
  */
 STATIC void
 xchk_iallocbt_xref_rmap_inodes(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	int				which,
 	xfs_filblks_t			inode_blocks)
 {
@@ -427,7 +427,7 @@ xchk_iallocbt_xref_rmap_inodes(
 /* Scrub the inode btrees for some AG. */
 STATIC int
 xchk_iallocbt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_btnum_t			which)
 {
 	struct xfs_btree_cur		*cur;
@@ -459,14 +459,14 @@ xchk_iallocbt(
 
 int
 xchk_inobt(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	return xchk_iallocbt(sc, XFS_BTNUM_INO);
 }
 
 int
 xchk_finobt(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	return xchk_iallocbt(sc, XFS_BTNUM_FINO);
 }
@@ -474,7 +474,7 @@ xchk_finobt(
 /* See if an inode btree has (or doesn't have) an inode chunk record. */
 static inline void
 xchk_xref_inode_check(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len,
 	struct xfs_btree_cur		**icur,
@@ -496,7 +496,7 @@ xchk_xref_inode_check(
 /* xref check that the extent is not covered by inodes */
 void
 xchk_xref_is_not_inode_chunk(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
@@ -507,7 +507,7 @@ xchk_xref_is_not_inode_chunk(
 /* xref check that the extent is covered by inodes */
 void
 xchk_xref_is_inode_chunk(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index d85fbec39e52..6cc027983c13 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -38,7 +38,7 @@
  */
 int
 xchk_setup_inode(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	int				error;
@@ -77,7 +77,7 @@ out:
 /* Validate di_extsize hint. */
 STATIC void
 xchk_inode_extsize(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
@@ -99,7 +99,7 @@ xchk_inode_extsize(
  */
 STATIC void
 xchk_inode_cowextsize(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
@@ -118,7 +118,7 @@ xchk_inode_cowextsize(
 /* Make sure the di_flags make sense for the inode. */
 STATIC void
 xchk_inode_flags(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
@@ -163,7 +163,7 @@ bad:
 /* Make sure the di_flags2 make sense for the inode. */
 STATIC void
 xchk_inode_flags2(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
@@ -206,7 +206,7 @@ bad:
 /* Scrub all the ondisk inode fields. */
 STATIC void
 xchk_dinode(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino)
 {
@@ -426,7 +426,7 @@ xchk_dinode(
  */
 static void
 xchk_inode_xref_finobt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			ino)
 {
 	struct xfs_inobt_rec_incore	rec;
@@ -469,7 +469,7 @@ xchk_inode_xref_finobt(
 /* Cross reference the inode fields with the forks. */
 STATIC void
 xchk_inode_xref_bmap(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_dinode		*dip)
 {
 	xfs_extnum_t			nextents;
@@ -503,7 +503,7 @@ xchk_inode_xref_bmap(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_inode_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			ino,
 	struct xfs_dinode		*dip)
 {
@@ -540,7 +540,7 @@ xchk_inode_xref(
  */
 static void
 xchk_inode_check_reflink_iflag(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			ino)
 {
 	struct xfs_mount		*mp = sc->mp;
@@ -564,7 +564,7 @@ xchk_inode_check_reflink_iflag(
 /* Scrub an inode. */
 int
 xchk_inode(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_dinode		di;
 	int				error = 0;
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 0a78d8411f23..808459ad0c35 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -28,7 +28,7 @@
 /* Set us up to scrub parents. */
 int
 xchk_setup_parent(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	return xchk_setup_inode_contents(sc, ip, 0);
@@ -65,7 +65,7 @@ xchk_parent_actor(
 /* Count the number of dentries in the parent dir that point to this inode. */
 STATIC int
 xchk_parent_count_parent_dentries(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*parent,
 	xfs_nlink_t			*nlink)
 {
@@ -121,7 +121,7 @@ out:
  */
 STATIC int
 xchk_parent_validate(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_ino_t			dnum,
 	bool				*try_again)
 {
@@ -255,7 +255,7 @@ out:
 /* Scrub a parent pointer. */
 int
 xchk_parent(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
 	xfs_ino_t			dnum;
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index d1b52dd7efcd..309ebeecfa5d 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -31,7 +31,7 @@
 /* Convert a scrub type code to a DQ flag, or return 0 if error. */
 static inline uint
 xchk_quota_to_dqtype(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	switch (sc->sm->sm_type) {
 	case XFS_SCRUB_TYPE_UQUOTA:
@@ -48,7 +48,7 @@ xchk_quota_to_dqtype(
 /* Set us up to scrub a quota. */
 int
 xchk_setup_quota(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	uint				dqtype;
@@ -76,7 +76,7 @@ xchk_setup_quota(
 /* Quotas. */
 
 struct xchk_quota_info {
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 	xfs_dqid_t			last_id;
 };
 
@@ -88,7 +88,7 @@ xchk_quota_item(
 	void				*priv)
 {
 	struct xchk_quota_info		*sqi = priv;
-	struct xfs_scrub_context	*sc = sqi->sc;
+	struct xfs_scrub	*sc = sqi->sc;
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_disk_dquot		*d = &dq->q_core;
 	struct xfs_quotainfo		*qi = mp->m_quotainfo;
@@ -195,7 +195,7 @@ xchk_quota_item(
 /* Check the quota's data fork. */
 STATIC int
 xchk_quota_data_fork(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_bmbt_irec		irec = { 0 };
 	struct xfs_iext_cursor		icur;
@@ -234,7 +234,7 @@ xchk_quota_data_fork(
 /* Scrub all of a quota type's items. */
 int
 xchk_quota(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xchk_quota_info		sqi;
 	struct xfs_mount		*mp = sc->mp;
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 274febc49b23..c1162d408987 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -29,7 +29,7 @@
  */
 int
 xchk_setup_ag_refcountbt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, false);
@@ -79,7 +79,7 @@ struct xchk_refcnt_frag {
 };
 
 struct xchk_refcnt_check {
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 	struct list_head		fragments;
 
 	/* refcount extent we're examining */
@@ -278,7 +278,7 @@ done:
 /* Use the rmap entries covering this extent to verify the refcount. */
 STATIC void
 xchk_refcountbt_xref_rmap(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	xfs_nlink_t			refcount)
@@ -325,7 +325,7 @@ out_free:
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_refcountbt_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len,
 	xfs_nlink_t			refcount)
@@ -382,7 +382,7 @@ xchk_refcountbt_rec(
 /* Make sure we have as many refc blocks as the rmap says. */
 STATIC void
 xchk_refcount_xref_rmap(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_owner_info		*oinfo,
 	xfs_filblks_t			cow_blocks)
 {
@@ -417,7 +417,7 @@ xchk_refcount_xref_rmap(
 /* Scrub the refcount btree for some AG. */
 int
 xchk_refcountbt(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_owner_info		oinfo;
 	xfs_agblock_t			cow_blocks = 0;
@@ -437,7 +437,7 @@ xchk_refcountbt(
 /* xref check that a cow staging extent is marked in the refcountbt. */
 void
 xchk_xref_is_cow_staging(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
@@ -483,7 +483,7 @@ xchk_xref_is_cow_staging(
  */
 void
 xchk_xref_is_not_shared(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 7e6a56a5f59d..29debd5649ac 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -43,7 +43,7 @@
 int
 xrep_attempt(
 	struct xfs_inode		*ip,
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	bool				*fixed)
 {
 	int				error = 0;
@@ -106,7 +106,7 @@ xrep_failure(
  */
 int
 xrep_probe(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	int				error = 0;
 
@@ -122,7 +122,7 @@ xrep_probe(
  */
 int
 xrep_roll_ag_trans(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	int				error;
 
@@ -179,7 +179,7 @@ xrep_ag_has_space(
  */
 xfs_extlen_t
 xrep_calc_ag_resblks(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_scrub_metadata	*sm = sc->sm;
@@ -279,7 +279,7 @@ xrep_calc_ag_resblks(
 /* Allocate a block in an AG. */
 int
 xrep_alloc_ag_block(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_owner_info		*oinfo,
 	xfs_fsblock_t			*fsbno,
 	enum xfs_ag_resv_type		resv)
@@ -330,7 +330,7 @@ xrep_alloc_ag_block(
 /* Initialize a new AG btree root block with zero entries. */
 int
 xrep_init_btblock(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_fsblock_t			fsb,
 	struct xfs_buf			**bpp,
 	xfs_btnum_t			btnum,
@@ -385,7 +385,7 @@ xrep_init_btblock(
 /* Collect a dead btree extent for later disposal. */
 int
 xrep_collect_btree_extent(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xrep_extent_list		*exlist,
 	xfs_fsblock_t			fsbno,
 	xfs_extlen_t			len)
@@ -415,7 +415,7 @@ xrep_collect_btree_extent(
  */
 void
 xrep_cancel_btree_extents(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xrep_extent_list		*exlist)
 {
 	struct xrep_extent		*rex;
@@ -463,7 +463,7 @@ xrep_btree_extent_cmp(
 #define RIGHT_ALIGNED	(1 << 1)
 int
 xrep_subtract_extents(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xrep_extent_list		*exlist,
 	struct xrep_extent_list		*sublist)
 {
@@ -620,7 +620,7 @@ out:
  */
 int
 xrep_invalidate_blocks(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xrep_extent_list		*exlist)
 {
 	struct xrep_extent		*rex;
@@ -658,7 +658,7 @@ xrep_invalidate_blocks(
 /* Ensure the freelist is the correct size. */
 int
 xrep_fix_freelist(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	bool				can_shrink)
 {
 	struct xfs_alloc_arg		args = {0};
@@ -678,7 +678,7 @@ xrep_fix_freelist(
  */
 STATIC int
 xrep_put_freelist(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			agbno)
 {
 	struct xfs_owner_info		oinfo;
@@ -714,7 +714,7 @@ xrep_put_freelist(
 /* Dispose of a single metadata block. */
 STATIC int
 xrep_dispose_btree_block(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_fsblock_t			fsbno,
 	struct xfs_owner_info		*oinfo,
 	enum xfs_ag_resv_type		resv)
@@ -788,7 +788,7 @@ out_free:
 /* Dispose of btree blocks from an old per-AG btree. */
 int
 xrep_reap_btree_extents(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xrep_extent_list		*exlist,
 	struct xfs_owner_info		*oinfo,
 	enum xfs_ag_resv_type		type)
@@ -851,7 +851,7 @@ out:
  */
 
 struct xrep_findroot {
-	struct xfs_scrub_context	*sc;
+	struct xfs_scrub	*sc;
 	struct xfs_buf			*agfl_bp;
 	struct xfs_agf			*agf;
 	struct xrep_find_ag_btree	*btree_info;
@@ -981,7 +981,7 @@ xrep_findroot_rmap(
 /* Find the roots of the per-AG btrees described in btree_info. */
 int
 xrep_find_ag_btree_roots(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_buf			*agf_bp,
 	struct xrep_find_ag_btree	*btree_info,
 	struct xfs_buf			*agfl_bp)
@@ -1016,7 +1016,7 @@ xrep_find_ag_btree_roots(
 /* Force a quotacheck the next time we mount. */
 void
 xrep_force_quotacheck(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	uint				dqtype)
 {
 	uint				flag;
@@ -1044,7 +1044,7 @@ xrep_force_quotacheck(
  */
 int
 xrep_ino_dqattach(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	int				error;
 
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 60d81294797b..677f4b73b5ec 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -6,7 +6,7 @@
 #ifndef __XFS_SCRUB_REPAIR_H__
 #define __XFS_SCRUB_REPAIR_H__
 
-static inline int xrep_notsupported(struct xfs_scrub_context *sc)
+static inline int xrep_notsupported(struct xfs_scrub *sc)
 {
 	return -EOPNOTSUPP;
 }
@@ -15,17 +15,17 @@ static inline int xrep_notsupported(struct xfs_scrub_context *sc)
 
 /* Repair helpers */
 
-int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub_context *sc,
+int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc,
 		bool *fixed);
 void xrep_failure(struct xfs_mount *mp);
-int xrep_roll_ag_trans(struct xfs_scrub_context *sc);
+int xrep_roll_ag_trans(struct xfs_scrub *sc);
 bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
 		enum xfs_ag_resv_type type);
-xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub_context *sc);
-int xrep_alloc_ag_block(struct xfs_scrub_context *sc,
+xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc);
+int xrep_alloc_ag_block(struct xfs_scrub *sc,
 		struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
 		enum xfs_ag_resv_type resv);
-int xrep_init_btblock(struct xfs_scrub_context *sc, xfs_fsblock_t fsb,
+int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
 		struct xfs_buf **bpp, xfs_btnum_t btnum,
 		const struct xfs_buf_ops *ops);
 
@@ -48,18 +48,18 @@ xrep_init_extent_list(
 
 #define for_each_xrep_extent_safe(rbe, n, exlist) \
 	list_for_each_entry_safe((rbe), (n), &(exlist)->list, list)
-int xrep_collect_btree_extent(struct xfs_scrub_context *sc,
+int xrep_collect_btree_extent(struct xfs_scrub *sc,
 		struct xrep_extent_list *btlist, xfs_fsblock_t fsbno,
 		xfs_extlen_t len);
-void xrep_cancel_btree_extents(struct xfs_scrub_context *sc,
+void xrep_cancel_btree_extents(struct xfs_scrub *sc,
 		struct xrep_extent_list *btlist);
-int xrep_subtract_extents(struct xfs_scrub_context *sc,
+int xrep_subtract_extents(struct xfs_scrub *sc,
 		struct xrep_extent_list *exlist,
 		struct xrep_extent_list *sublist);
-int xrep_fix_freelist(struct xfs_scrub_context *sc, bool can_shrink);
-int xrep_invalidate_blocks(struct xfs_scrub_context *sc,
+int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
+int xrep_invalidate_blocks(struct xfs_scrub *sc,
 		struct xrep_extent_list *btlist);
-int xrep_reap_btree_extents(struct xfs_scrub_context *sc,
+int xrep_reap_btree_extents(struct xfs_scrub *sc,
 		struct xrep_extent_list *exlist,
 		struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
 
@@ -78,23 +78,23 @@ struct xrep_find_ag_btree {
 	unsigned int			height;
 };
 
-int xrep_find_ag_btree_roots(struct xfs_scrub_context *sc,
+int xrep_find_ag_btree_roots(struct xfs_scrub *sc,
 		struct xfs_buf *agf_bp,
 		struct xrep_find_ag_btree *btree_info,
 		struct xfs_buf *agfl_bp);
-void xrep_force_quotacheck(struct xfs_scrub_context *sc, uint dqtype);
-int xrep_ino_dqattach(struct xfs_scrub_context *sc);
+void xrep_force_quotacheck(struct xfs_scrub *sc, uint dqtype);
+int xrep_ino_dqattach(struct xfs_scrub *sc);
 
 /* Metadata repairers */
 
-int xrep_probe(struct xfs_scrub_context *sc);
-int xrep_superblock(struct xfs_scrub_context *sc);
+int xrep_probe(struct xfs_scrub *sc);
+int xrep_superblock(struct xfs_scrub *sc);
 
 #else
 
 static inline int xrep_attempt(
 	struct xfs_inode		*ip,
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	bool				*fixed)
 {
 	return -EOPNOTSUPP;
@@ -104,7 +104,7 @@ static inline void xrep_failure(struct xfs_mount *mp) {}
 
 static inline xfs_extlen_t
 xrep_calc_ag_resblks(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	ASSERT(!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR));
 	return 0;
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 4b75fc2f31f3..dc9c91a706ff 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -30,7 +30,7 @@
  */
 int
 xchk_setup_ag_rmapbt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, false);
@@ -41,7 +41,7 @@ xchk_setup_ag_rmapbt(
 /* Cross-reference a rmap against the refcount btree. */
 STATIC void
 xchk_rmapbt_xref_refc(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_rmap_irec		*irec)
 {
 	xfs_agblock_t			fbno;
@@ -72,7 +72,7 @@ xchk_rmapbt_xref_refc(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_rmapbt_xref(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_rmap_irec		*irec)
 {
 	xfs_agblock_t			agbno = irec->rm_startblock;
@@ -172,7 +172,7 @@ out:
 /* Scrub the rmap btree for some AG. */
 int
 xchk_rmapbt(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_owner_info		oinfo;
 
@@ -184,7 +184,7 @@ xchk_rmapbt(
 /* xref check that the extent is owned by a given owner */
 static inline void
 xchk_xref_check_owner(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	struct xfs_owner_info		*oinfo,
@@ -207,7 +207,7 @@ xchk_xref_check_owner(
 /* xref check that the extent is owned by a given owner */
 void
 xchk_xref_is_owned_by(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	struct xfs_owner_info		*oinfo)
@@ -218,7 +218,7 @@ xchk_xref_is_owned_by(
 /* xref check that the extent is not owned by a given owner */
 void
 xchk_xref_is_not_owned_by(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	struct xfs_owner_info		*oinfo)
@@ -229,7 +229,7 @@ xchk_xref_is_not_owned_by(
 /* xref check that the extent has no reverse mapping at all */
 void
 xchk_xref_has_no_owner(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len)
 {
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 3f0fc83562ae..653a809bba34 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -26,7 +26,7 @@
 /* Set us up with the realtime metadata locked. */
 int
 xchk_setup_rt(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	int				error;
@@ -51,7 +51,7 @@ xchk_rtbitmap_rec(
 	struct xfs_rtalloc_rec		*rec,
 	void				*priv)
 {
-	struct xfs_scrub_context	*sc = priv;
+	struct xfs_scrub	*sc = priv;
 	xfs_rtblock_t			startblock;
 	xfs_rtblock_t			blockcount;
 
@@ -68,7 +68,7 @@ xchk_rtbitmap_rec(
 /* Scrub the realtime bitmap. */
 int
 xchk_rtbitmap(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	int				error;
 
@@ -88,7 +88,7 @@ out:
 /* Scrub the realtime summary. */
 int
 xchk_rtsummary(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_inode		*rsumip = sc->mp->m_rsumip;
 	struct xfs_inode		*old_ip = sc->ip;
@@ -125,7 +125,7 @@ out:
 /* xref check that the extent is not free in the rtbitmap */
 void
 xchk_xref_is_used_rt_space(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	xfs_rtblock_t			fsbno,
 	xfs_extlen_t			len)
 {
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index a6efede6e430..b3c6420ccae5 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -151,7 +151,7 @@
  */
 static int
 xchk_probe(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	int				error = 0;
 
@@ -166,7 +166,7 @@ xchk_probe(
 /* Free all the resources and finish the transactions. */
 STATIC int
 xchk_teardown(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip_in,
 	int				error)
 {
@@ -447,7 +447,7 @@ out:
 }
 
 #ifdef CONFIG_XFS_ONLINE_REPAIR
-static inline void xchk_postmortem(struct xfs_scrub_context *sc)
+static inline void xchk_postmortem(struct xfs_scrub *sc)
 {
 	/*
 	 * Userspace asked us to repair something, we repaired it, rescanned
@@ -460,7 +460,7 @@ static inline void xchk_postmortem(struct xfs_scrub_context *sc)
 		xrep_failure(sc->mp);
 }
 #else
-static inline void xchk_postmortem(struct xfs_scrub_context *sc)
+static inline void xchk_postmortem(struct xfs_scrub *sc)
 {
 	/*
 	 * Userspace asked us to scrub something, it's broken, and we have no
@@ -479,7 +479,7 @@ xfs_scrub_metadata(
 	struct xfs_inode		*ip,
 	struct xfs_scrub_metadata	*sm)
 {
-	struct xfs_scrub_context	sc;
+	struct xfs_scrub	sc;
 	struct xfs_mount		*mp = ip->i_mount;
 	bool				try_harder = false;
 	bool				already_fixed = false;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 0f59a47c4bb0..47c75d2f28da 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -6,7 +6,7 @@
 #ifndef __XFS_SCRUB_SCRUB_H__
 #define __XFS_SCRUB_SCRUB_H__
 
-struct xfs_scrub_context;
+struct xfs_scrub;
 
 /* Type info and names for the scrub types. */
 enum xchk_type {
@@ -18,14 +18,14 @@ enum xchk_type {
 
 struct xchk_meta_ops {
 	/* Acquire whatever resources are needed for the operation. */
-	int		(*setup)(struct xfs_scrub_context *,
+	int		(*setup)(struct xfs_scrub *,
 				 struct xfs_inode *);
 
 	/* Examine metadata for errors. */
-	int		(*scrub)(struct xfs_scrub_context *);
+	int		(*scrub)(struct xfs_scrub *);
 
 	/* Repair or optimize the metadata. */
-	int		(*repair)(struct xfs_scrub_context *);
+	int		(*repair)(struct xfs_scrub *);
 
 	/* Decide if we even have this piece of metadata. */
 	bool		(*has)(struct xfs_sb *);
@@ -53,7 +53,7 @@ struct xchk_ag {
 	struct xfs_btree_cur		*refc_cur;
 };
 
-struct xfs_scrub_context {
+struct xfs_scrub {
 	/* General scrub state. */
 	struct xfs_mount		*mp;
 	struct xfs_scrub_metadata	*sm;
@@ -70,71 +70,71 @@ struct xfs_scrub_context {
 };
 
 /* Metadata scrubbers */
-int xchk_tester(struct xfs_scrub_context *sc);
-int xchk_superblock(struct xfs_scrub_context *sc);
-int xchk_agf(struct xfs_scrub_context *sc);
-int xchk_agfl(struct xfs_scrub_context *sc);
-int xchk_agi(struct xfs_scrub_context *sc);
-int xchk_bnobt(struct xfs_scrub_context *sc);
-int xchk_cntbt(struct xfs_scrub_context *sc);
-int xchk_inobt(struct xfs_scrub_context *sc);
-int xchk_finobt(struct xfs_scrub_context *sc);
-int xchk_rmapbt(struct xfs_scrub_context *sc);
-int xchk_refcountbt(struct xfs_scrub_context *sc);
-int xchk_inode(struct xfs_scrub_context *sc);
-int xchk_bmap_data(struct xfs_scrub_context *sc);
-int xchk_bmap_attr(struct xfs_scrub_context *sc);
-int xchk_bmap_cow(struct xfs_scrub_context *sc);
-int xchk_directory(struct xfs_scrub_context *sc);
-int xchk_xattr(struct xfs_scrub_context *sc);
-int xchk_symlink(struct xfs_scrub_context *sc);
-int xchk_parent(struct xfs_scrub_context *sc);
+int xchk_tester(struct xfs_scrub *sc);
+int xchk_superblock(struct xfs_scrub *sc);
+int xchk_agf(struct xfs_scrub *sc);
+int xchk_agfl(struct xfs_scrub *sc);
+int xchk_agi(struct xfs_scrub *sc);
+int xchk_bnobt(struct xfs_scrub *sc);
+int xchk_cntbt(struct xfs_scrub *sc);
+int xchk_inobt(struct xfs_scrub *sc);
+int xchk_finobt(struct xfs_scrub *sc);
+int xchk_rmapbt(struct xfs_scrub *sc);
+int xchk_refcountbt(struct xfs_scrub *sc);
+int xchk_inode(struct xfs_scrub *sc);
+int xchk_bmap_data(struct xfs_scrub *sc);
+int xchk_bmap_attr(struct xfs_scrub *sc);
+int xchk_bmap_cow(struct xfs_scrub *sc);
+int xchk_directory(struct xfs_scrub *sc);
+int xchk_xattr(struct xfs_scrub *sc);
+int xchk_symlink(struct xfs_scrub *sc);
+int xchk_parent(struct xfs_scrub *sc);
 #ifdef CONFIG_XFS_RT
-int xchk_rtbitmap(struct xfs_scrub_context *sc);
-int xchk_rtsummary(struct xfs_scrub_context *sc);
+int xchk_rtbitmap(struct xfs_scrub *sc);
+int xchk_rtsummary(struct xfs_scrub *sc);
 #else
 static inline int
-xchk_rtbitmap(struct xfs_scrub_context *sc)
+xchk_rtbitmap(struct xfs_scrub *sc)
 {
 	return -ENOENT;
 }
 static inline int
-xchk_rtsummary(struct xfs_scrub_context *sc)
+xchk_rtsummary(struct xfs_scrub *sc)
 {
 	return -ENOENT;
 }
 #endif
 #ifdef CONFIG_XFS_QUOTA
-int xchk_quota(struct xfs_scrub_context *sc);
+int xchk_quota(struct xfs_scrub *sc);
 #else
 static inline int
-xchk_quota(struct xfs_scrub_context *sc)
+xchk_quota(struct xfs_scrub *sc)
 {
 	return -ENOENT;
 }
 #endif
 
 /* cross-referencing helpers */
-void xchk_xref_is_used_space(struct xfs_scrub_context *sc,
+void xchk_xref_is_used_space(struct xfs_scrub *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_not_inode_chunk(struct xfs_scrub_context *sc,
+void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_inode_chunk(struct xfs_scrub_context *sc,
+void xchk_xref_is_inode_chunk(struct xfs_scrub *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_owned_by(struct xfs_scrub_context *sc,
+void xchk_xref_is_owned_by(struct xfs_scrub *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len,
 		struct xfs_owner_info *oinfo);
-void xchk_xref_is_not_owned_by(struct xfs_scrub_context *sc,
+void xchk_xref_is_not_owned_by(struct xfs_scrub *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len,
 		struct xfs_owner_info *oinfo);
-void xchk_xref_has_no_owner(struct xfs_scrub_context *sc,
+void xchk_xref_has_no_owner(struct xfs_scrub *sc,
 		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_cow_staging(struct xfs_scrub_context *sc,
+void xchk_xref_is_cow_staging(struct xfs_scrub *sc,
 		xfs_agblock_t bno, xfs_extlen_t len);
-void xchk_xref_is_not_shared(struct xfs_scrub_context *sc,
+void xchk_xref_is_not_shared(struct xfs_scrub *sc,
 		xfs_agblock_t bno, xfs_extlen_t len);
 #ifdef CONFIG_XFS_RT
-void xchk_xref_is_used_rt_space(struct xfs_scrub_context *sc,
+void xchk_xref_is_used_rt_space(struct xfs_scrub *sc,
 		xfs_rtblock_t rtbno, xfs_extlen_t len);
 #else
 # define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index e2a288e34337..56c6347e9482 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -26,7 +26,7 @@
 /* Set us up to scrub a symbolic link. */
 int
 xchk_setup_symlink(
-	struct xfs_scrub_context	*sc,
+	struct xfs_scrub	*sc,
 	struct xfs_inode		*ip)
 {
 	/* Allocate the buffer without the inode lock held. */
@@ -41,7 +41,7 @@ xchk_setup_symlink(
 
 int
 xchk_symlink(
-	struct xfs_scrub_context	*sc)
+	struct xfs_scrub	*sc)
 {
 	struct xfs_inode		*ip = sc->ip;
 	struct xfs_ifork		*ifp;
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 96f3edda3e91..93db22c39b51 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -59,7 +59,7 @@ DEFINE_SCRUB_EVENT(xrep_attempt);
 DEFINE_SCRUB_EVENT(xrep_done);
 
 TRACE_EVENT(xchk_op_error,
-	TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+	TP_PROTO(struct xfs_scrub *sc, xfs_agnumber_t agno,
 		 xfs_agblock_t bno, int error, void *ret_ip),
 	TP_ARGS(sc, agno, bno, error, ret_ip),
 	TP_STRUCT__entry(
@@ -88,7 +88,7 @@ TRACE_EVENT(xchk_op_error,
 );
 
 TRACE_EVENT(xchk_file_op_error,
-	TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
+	TP_PROTO(struct xfs_scrub *sc, int whichfork,
 		 xfs_fileoff_t offset, int error, void *ret_ip),
 	TP_ARGS(sc, whichfork, offset, error, ret_ip),
 	TP_STRUCT__entry(
@@ -120,7 +120,7 @@ TRACE_EVENT(xchk_file_op_error,
 );
 
 DECLARE_EVENT_CLASS(xchk_block_error_class,
-	TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, void *ret_ip),
+	TP_PROTO(struct xfs_scrub *sc, xfs_daddr_t daddr, void *ret_ip),
 	TP_ARGS(sc, daddr, ret_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -154,7 +154,7 @@ DECLARE_EVENT_CLASS(xchk_block_error_class,
 
 #define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \
 DEFINE_EVENT(xchk_block_error_class, name, \
-	TP_PROTO(struct xfs_scrub_context *sc, xfs_daddr_t daddr, \
+	TP_PROTO(struct xfs_scrub *sc, xfs_daddr_t daddr, \
 		 void *ret_ip), \
 	TP_ARGS(sc, daddr, ret_ip))
 
@@ -162,7 +162,7 @@ DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_error);
 DEFINE_SCRUB_BLOCK_ERROR_EVENT(xchk_block_preen);
 
 DECLARE_EVENT_CLASS(xchk_ino_error_class,
-	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip),
+	TP_PROTO(struct xfs_scrub *sc, xfs_ino_t ino, void *ret_ip),
 	TP_ARGS(sc, ino, ret_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(xchk_ino_error_class,
 
 #define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
 DEFINE_EVENT(xchk_ino_error_class, name, \
-	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \
+	TP_PROTO(struct xfs_scrub *sc, xfs_ino_t ino, \
 		 void *ret_ip), \
 	TP_ARGS(sc, ino, ret_ip))
 
@@ -194,7 +194,7 @@ DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_preen);
 DEFINE_SCRUB_INO_ERROR_EVENT(xchk_ino_warning);
 
 DECLARE_EVENT_CLASS(xchk_fblock_error_class,
-	TP_PROTO(struct xfs_scrub_context *sc, int whichfork,
+	TP_PROTO(struct xfs_scrub *sc, int whichfork,
 		 xfs_fileoff_t offset, void *ret_ip),
 	TP_ARGS(sc, whichfork, offset, ret_ip),
 	TP_STRUCT__entry(
@@ -224,7 +224,7 @@ DECLARE_EVENT_CLASS(xchk_fblock_error_class,
 
 #define DEFINE_SCRUB_FBLOCK_ERROR_EVENT(name) \
 DEFINE_EVENT(xchk_fblock_error_class, name, \
-	TP_PROTO(struct xfs_scrub_context *sc, int whichfork, \
+	TP_PROTO(struct xfs_scrub *sc, int whichfork, \
 		 xfs_fileoff_t offset, void *ret_ip), \
 	TP_ARGS(sc, whichfork, offset, ret_ip))
 
@@ -232,7 +232,7 @@ DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error);
 DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning);
 
 TRACE_EVENT(xchk_incomplete,
-	TP_PROTO(struct xfs_scrub_context *sc, void *ret_ip),
+	TP_PROTO(struct xfs_scrub *sc, void *ret_ip),
 	TP_ARGS(sc, ret_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -251,7 +251,7 @@ TRACE_EVENT(xchk_incomplete,
 );
 
 TRACE_EVENT(xchk_btree_op_error,
-	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+	TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
 		 int level, int error, void *ret_ip),
 	TP_ARGS(sc, cur, level, error, ret_ip),
 	TP_STRUCT__entry(
@@ -291,7 +291,7 @@ TRACE_EVENT(xchk_btree_op_error,
 );
 
 TRACE_EVENT(xchk_ifork_btree_op_error,
-	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+	TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
 		 int level, int error, void *ret_ip),
 	TP_ARGS(sc, cur, level, error, ret_ip),
 	TP_STRUCT__entry(
@@ -336,7 +336,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error,
 );
 
 TRACE_EVENT(xchk_btree_error,
-	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+	TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
 		 int level, void *ret_ip),
 	TP_ARGS(sc, cur, level, ret_ip),
 	TP_STRUCT__entry(
@@ -372,7 +372,7 @@ TRACE_EVENT(xchk_btree_error,
 );
 
 TRACE_EVENT(xchk_ifork_btree_error,
-	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+	TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
 		 int level, void *ret_ip),
 	TP_ARGS(sc, cur, level, ret_ip),
 	TP_STRUCT__entry(
@@ -414,7 +414,7 @@ TRACE_EVENT(xchk_ifork_btree_error,
 );
 
 DECLARE_EVENT_CLASS(xchk_sbtree_class,
-	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+	TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
 		 int level),
 	TP_ARGS(sc, cur, level),
 	TP_STRUCT__entry(
@@ -451,7 +451,7 @@ DECLARE_EVENT_CLASS(xchk_sbtree_class,
 )
 #define DEFINE_SCRUB_SBTREE_EVENT(name) \
 DEFINE_EVENT(xchk_sbtree_class, name, \
-	TP_PROTO(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur, \
+	TP_PROTO(struct xfs_scrub *sc, struct xfs_btree_cur *cur, \
 		 int level), \
 	TP_ARGS(sc, cur, level))
 
@@ -459,7 +459,7 @@ DEFINE_SCRUB_SBTREE_EVENT(xchk_btree_rec);
 DEFINE_SCRUB_SBTREE_EVENT(xchk_btree_key);
 
 TRACE_EVENT(xchk_xref_error,
-	TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip),
+	TP_PROTO(struct xfs_scrub *sc, int error, void *ret_ip),
 	TP_ARGS(sc, error, ret_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
-- 
cgit v1.2.3


From 032d91f9820f6d241dc5584c27a668cfd377aaf0 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:29:12 -0700
Subject: xfs: fix indentation and other whitespace problems in scrub/repair

Now that we've shortened everything, fix up all the indentation and
whitespace problems.  There are no functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/agheader.c        | 172 ++++++++++++++++++++---------------------
 fs/xfs/scrub/agheader_repair.c |   8 +-
 fs/xfs/scrub/alloc.c           |  48 ++++++------
 fs/xfs/scrub/attr.c            |  20 ++---
 fs/xfs/scrub/bmap.c            | 150 +++++++++++++++++------------------
 fs/xfs/scrub/btree.c           | 172 ++++++++++++++++++++---------------------
 fs/xfs/scrub/btree.h           |  25 +++---
 fs/xfs/scrub/common.c          | 158 ++++++++++++++++++-------------------
 fs/xfs/scrub/common.h          |  23 +++---
 fs/xfs/scrub/dabtree.c         |  56 +++++++-------
 fs/xfs/scrub/dabtree.h         |  21 +++--
 fs/xfs/scrub/dir.c             |  88 ++++++++++-----------
 fs/xfs/scrub/ialloc.c          |  58 +++++++-------
 fs/xfs/scrub/inode.c           | 102 ++++++++++++------------
 fs/xfs/scrub/parent.c          |  60 +++++++-------
 fs/xfs/scrub/quota.c           |  68 ++++++++--------
 fs/xfs/scrub/refcount.c        |  70 ++++++++---------
 fs/xfs/scrub/repair.c          | 148 +++++++++++++++++------------------
 fs/xfs/scrub/repair.h          |  28 +++----
 fs/xfs/scrub/rmap.c            |  78 +++++++++----------
 fs/xfs/scrub/rtbitmap.c        |  38 ++++-----
 fs/xfs/scrub/scrub.c           |   8 +-
 fs/xfs/scrub/scrub.h           |  62 +++++++--------
 fs/xfs/scrub/symlink.c         |  10 +--
 fs/xfs/scrub/trace.c           |   4 +-
 25 files changed, 832 insertions(+), 843 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 14ba4189ae8f..3068a9382feb 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -30,13 +30,13 @@
 STATIC void
 xchk_superblock_xref(
 	struct xfs_scrub	*sc,
-	struct xfs_buf			*bp)
+	struct xfs_buf		*bp)
 {
-	struct xfs_owner_info		oinfo;
-	struct xfs_mount		*mp = sc->mp;
-	xfs_agnumber_t			agno = sc->sm->sm_agno;
-	xfs_agblock_t			agbno;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agnumber_t		agno = sc->sm->sm_agno;
+	xfs_agblock_t		agbno;
+	int			error;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -68,14 +68,14 @@ int
 xchk_superblock(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_buf			*bp;
-	struct xfs_dsb			*sb;
-	xfs_agnumber_t			agno;
-	uint32_t			v2_ok;
-	__be32				features_mask;
-	int				error;
-	__be16				vernum_mask;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_buf		*bp;
+	struct xfs_dsb		*sb;
+	xfs_agnumber_t		agno;
+	uint32_t		v2_ok;
+	__be32			features_mask;
+	int			error;
+	__be16			vernum_mask;
 
 	agno = sc->sm->sm_agno;
 	if (agno == 0)
@@ -367,9 +367,9 @@ static inline void
 xchk_agf_xref_freeblks(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
-	xfs_extlen_t			blocks = 0;
-	int				error;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+	xfs_extlen_t		blocks = 0;
+	int			error;
 
 	if (!sc->sa.bno_cur)
 		return;
@@ -387,11 +387,11 @@ static inline void
 xchk_agf_xref_cntbt(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
-	xfs_agblock_t			agbno;
-	xfs_extlen_t			blocks;
-	int				have;
-	int				error;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+	xfs_agblock_t		agbno;
+	xfs_extlen_t		blocks;
+	int			have;
+	int			error;
 
 	if (!sc->sa.cnt_cur)
 		return;
@@ -419,11 +419,11 @@ STATIC void
 xchk_agf_xref_btreeblks(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
-	struct xfs_mount		*mp = sc->mp;
-	xfs_agblock_t			blocks;
-	xfs_agblock_t			btreeblks;
-	int				error;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agblock_t		blocks;
+	xfs_agblock_t		btreeblks;
+	int			error;
 
 	/* Check agf_rmap_blocks; set up for agf_btreeblks check */
 	if (sc->sa.rmap_cur) {
@@ -465,9 +465,9 @@ static inline void
 xchk_agf_xref_refcblks(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_agf			*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
-	xfs_agblock_t			blocks;
-	int				error;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+	xfs_agblock_t		blocks;
+	int			error;
 
 	if (!sc->sa.refc_cur)
 		return;
@@ -484,10 +484,10 @@ STATIC void
 xchk_agf_xref(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_owner_info		oinfo;
-	struct xfs_mount		*mp = sc->mp;
-	xfs_agblock_t			agbno;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agblock_t		agbno;
+	int			error;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -516,17 +516,17 @@ int
 xchk_agf(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_agf			*agf;
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	xfs_agblock_t			eoag;
-	xfs_agblock_t			agfl_first;
-	xfs_agblock_t			agfl_last;
-	xfs_agblock_t			agfl_count;
-	xfs_agblock_t			fl_count;
-	int				level;
-	int				error = 0;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_agf		*agf;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	xfs_agblock_t		eoag;
+	xfs_agblock_t		agfl_first;
+	xfs_agblock_t		agfl_last;
+	xfs_agblock_t		agfl_count;
+	xfs_agblock_t		fl_count;
+	int			level;
+	int			error = 0;
 
 	agno = sc->sa.agno = sc->sm->sm_agno;
 	error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp,
@@ -598,10 +598,10 @@ out:
 /* AGFL */
 
 struct xchk_agfl_info {
-	struct xfs_owner_info		oinfo;
-	unsigned int			sz_entries;
-	unsigned int			nr_entries;
-	xfs_agblock_t			*entries;
+	struct xfs_owner_info	oinfo;
+	unsigned int		sz_entries;
+	unsigned int		nr_entries;
+	xfs_agblock_t		*entries;
 	struct xfs_scrub	*sc;
 };
 
@@ -609,8 +609,8 @@ struct xchk_agfl_info {
 STATIC void
 xchk_agfl_block_xref(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	struct xfs_owner_info		*oinfo)
+	xfs_agblock_t		agbno,
+	struct xfs_owner_info	*oinfo)
 {
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -624,13 +624,13 @@ xchk_agfl_block_xref(
 /* Scrub an AGFL block. */
 STATIC int
 xchk_agfl_block(
-	struct xfs_mount		*mp,
-	xfs_agblock_t			agbno,
-	void				*priv)
+	struct xfs_mount	*mp,
+	xfs_agblock_t		agbno,
+	void			*priv)
 {
-	struct xchk_agfl_info		*sai = priv;
+	struct xchk_agfl_info	*sai = priv;
 	struct xfs_scrub	*sc = sai->sc;
-	xfs_agnumber_t			agno = sc->sa.agno;
+	xfs_agnumber_t		agno = sc->sa.agno;
 
 	if (xfs_verify_agbno(mp, agno, agbno) &&
 	    sai->nr_entries < sai->sz_entries)
@@ -662,10 +662,10 @@ STATIC void
 xchk_agfl_xref(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_owner_info		oinfo;
-	struct xfs_mount		*mp = sc->mp;
-	xfs_agblock_t			agbno;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agblock_t		agbno;
+	int			error;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -693,12 +693,12 @@ int
 xchk_agfl(
 	struct xfs_scrub	*sc)
 {
-	struct xchk_agfl_info		sai;
-	struct xfs_agf			*agf;
-	xfs_agnumber_t			agno;
-	unsigned int			agflcount;
-	unsigned int			i;
-	int				error;
+	struct xchk_agfl_info	sai;
+	struct xfs_agf		*agf;
+	xfs_agnumber_t		agno;
+	unsigned int		agflcount;
+	unsigned int		i;
+	int			error;
 
 	agno = sc->sa.agno = sc->sm->sm_agno;
 	error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp,
@@ -770,10 +770,10 @@ static inline void
 xchk_agi_xref_icounts(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_agi			*agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
-	xfs_agino_t			icount;
-	xfs_agino_t			freecount;
-	int				error;
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+	xfs_agino_t		icount;
+	xfs_agino_t		freecount;
+	int			error;
 
 	if (!sc->sa.ino_cur)
 		return;
@@ -791,10 +791,10 @@ STATIC void
 xchk_agi_xref(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_owner_info		oinfo;
-	struct xfs_mount		*mp = sc->mp;
-	xfs_agblock_t			agbno;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agblock_t		agbno;
+	int			error;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -820,18 +820,18 @@ int
 xchk_agi(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_agi			*agi;
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	xfs_agblock_t			eoag;
-	xfs_agino_t			agino;
-	xfs_agino_t			first_agino;
-	xfs_agino_t			last_agino;
-	xfs_agino_t			icount;
-	int				i;
-	int				level;
-	int				error = 0;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_agi		*agi;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	xfs_agblock_t		eoag;
+	xfs_agino_t		agino;
+	xfs_agino_t		first_agino;
+	xfs_agino_t		last_agino;
+	xfs_agino_t		icount;
+	int			i;
+	int			level;
+	int			error = 0;
 
 	agno = sc->sa.agno = sc->sm->sm_agno;
 	error = xchk_ag_read_headers(sc, agno, &sc->sa.agi_bp,
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 2457968482f8..1e96621ece3a 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -31,10 +31,10 @@ int
 xrep_superblock(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_buf			*bp;
-	xfs_agnumber_t			agno;
-	int				error;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_buf		*bp;
+	xfs_agnumber_t		agno;
+	int			error;
 
 	/* Don't try to repair AG 0's sb; let xfs_repair deal with it. */
 	agno = sc->sm->sm_agno;
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 653d80b3aa39..036b5c7021eb 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -30,7 +30,7 @@
 int
 xchk_setup_ag_allocbt(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, false);
 }
@@ -43,14 +43,14 @@ xchk_setup_ag_allocbt(
 STATIC void
 xchk_allocbt_xref_other(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len)
 {
-	struct xfs_btree_cur		**pcur;
-	xfs_agblock_t			fbno;
-	xfs_extlen_t			flen;
-	int				has_otherrec;
-	int				error;
+	struct xfs_btree_cur	**pcur;
+	xfs_agblock_t		fbno;
+	xfs_extlen_t		flen;
+	int			has_otherrec;
+	int			error;
 
 	if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
 		pcur = &sc->sa.cnt_cur;
@@ -83,8 +83,8 @@ xchk_allocbt_xref_other(
 STATIC void
 xchk_allocbt_xref(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len)
 {
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -98,14 +98,14 @@ xchk_allocbt_xref(
 /* Scrub a bnobt/cntbt record. */
 STATIC int
 xchk_allocbt_rec(
-	struct xchk_btree		*bs,
-	union xfs_btree_rec		*rec)
+	struct xchk_btree	*bs,
+	union xfs_btree_rec	*rec)
 {
-	struct xfs_mount		*mp = bs->cur->bc_mp;
-	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
-	xfs_agblock_t			bno;
-	xfs_extlen_t			len;
-	int				error = 0;
+	struct xfs_mount	*mp = bs->cur->bc_mp;
+	xfs_agnumber_t		agno = bs->cur->bc_private.a.agno;
+	xfs_agblock_t		bno;
+	xfs_extlen_t		len;
+	int			error = 0;
 
 	bno = be32_to_cpu(rec->alloc.ar_startblock);
 	len = be32_to_cpu(rec->alloc.ar_blockcount);
@@ -124,10 +124,10 @@ xchk_allocbt_rec(
 STATIC int
 xchk_allocbt(
 	struct xfs_scrub	*sc,
-	xfs_btnum_t			which)
+	xfs_btnum_t		which)
 {
-	struct xfs_owner_info		oinfo;
-	struct xfs_btree_cur		*cur;
+	struct xfs_owner_info	oinfo;
+	struct xfs_btree_cur	*cur;
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
 	cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
@@ -152,11 +152,11 @@ xchk_cntbt(
 void
 xchk_xref_is_used_space(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len)
 {
-	bool				is_freesp;
-	int				error;
+	bool			is_freesp;
+	int			error;
 
 	if (!sc->sa.bno_cur || xchk_skip_xref(sc->sm))
 		return;
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 6650fb3010b6..81d5e90547a1 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -34,9 +34,9 @@
 int
 xchk_setup_xattr(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
-	size_t				sz;
+	size_t			sz;
 
 	/*
 	 * Allocate the buffer without the inode lock held.  We need enough
@@ -57,7 +57,7 @@ xchk_setup_xattr(
 
 struct xchk_xattr {
 	struct xfs_attr_list_context	context;
-	struct xfs_scrub	*sc;
+	struct xfs_scrub		*sc;
 };
 
 /*
@@ -128,12 +128,12 @@ fail_xref:
 STATIC bool
 xchk_xattr_set_map(
 	struct xfs_scrub	*sc,
-	unsigned long			*map,
-	unsigned int			start,
-	unsigned int			len)
+	unsigned long		*map,
+	unsigned int		start,
+	unsigned int		len)
 {
-	unsigned int			mapsize = sc->mp->m_attr_geo->blksize;
-	bool				ret = true;
+	unsigned int		mapsize = sc->mp->m_attr_geo->blksize;
+	bool			ret = true;
 
 	if (start >= mapsize)
 		return false;
@@ -155,7 +155,7 @@ xchk_xattr_set_map(
  */
 STATIC bool
 xchk_xattr_check_freemap(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	unsigned long			*map,
 	struct xfs_attr3_icleaf_hdr	*leafhdr)
 {
@@ -405,7 +405,7 @@ out:
 /* Scrub the extended attribute metadata. */
 int
 xchk_xattr(
-	struct xfs_scrub	*sc)
+	struct xfs_scrub		*sc)
 {
 	struct xchk_xattr		sx;
 	struct attrlist_cursor_kern	cursor = { 0 };
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 0e5166232b15..e1d11f3223e3 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -35,9 +35,9 @@
 int
 xchk_setup_inode_bmap(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
-	int				error;
+	int			error;
 
 	error = xchk_get_inode(sc, ip);
 	if (error)
@@ -80,25 +80,25 @@ out:
 
 struct xchk_bmap_info {
 	struct xfs_scrub	*sc;
-	xfs_fileoff_t			lastoff;
-	bool				is_rt;
-	bool				is_shared;
-	int				whichfork;
+	xfs_fileoff_t		lastoff;
+	bool			is_rt;
+	bool			is_shared;
+	int			whichfork;
 };
 
 /* Look for a corresponding rmap for this irec. */
 static inline bool
 xchk_bmap_get_rmap(
-	struct xchk_bmap_info		*info,
-	struct xfs_bmbt_irec		*irec,
-	xfs_agblock_t			agbno,
-	uint64_t			owner,
-	struct xfs_rmap_irec		*rmap)
+	struct xchk_bmap_info	*info,
+	struct xfs_bmbt_irec	*irec,
+	xfs_agblock_t		agbno,
+	uint64_t		owner,
+	struct xfs_rmap_irec	*rmap)
 {
-	xfs_fileoff_t			offset;
-	unsigned int			rflags = 0;
-	int				has_rmap;
-	int				error;
+	xfs_fileoff_t		offset;
+	unsigned int		rflags = 0;
+	int			has_rmap;
+	int			error;
 
 	if (info->whichfork == XFS_ATTR_FORK)
 		rflags |= XFS_RMAP_ATTR_FORK;
@@ -152,13 +152,13 @@ out:
 /* Make sure that we have rmapbt records for this extent. */
 STATIC void
 xchk_bmap_xref_rmap(
-	struct xchk_bmap_info		*info,
-	struct xfs_bmbt_irec		*irec,
-	xfs_agblock_t			agbno)
+	struct xchk_bmap_info	*info,
+	struct xfs_bmbt_irec	*irec,
+	xfs_agblock_t		agbno)
 {
-	struct xfs_rmap_irec		rmap;
-	unsigned long long		rmap_end;
-	uint64_t			owner;
+	struct xfs_rmap_irec	rmap;
+	unsigned long long	rmap_end;
+	uint64_t		owner;
 
 	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
 		return;
@@ -222,10 +222,10 @@ xchk_bmap_xref_rmap(
 /* Cross-reference a single rtdev extent record. */
 STATIC void
 xchk_bmap_rt_extent_xref(
-	struct xchk_bmap_info		*info,
-	struct xfs_inode		*ip,
-	struct xfs_btree_cur		*cur,
-	struct xfs_bmbt_irec		*irec)
+	struct xchk_bmap_info	*info,
+	struct xfs_inode	*ip,
+	struct xfs_btree_cur	*cur,
+	struct xfs_bmbt_irec	*irec)
 {
 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -237,16 +237,16 @@ xchk_bmap_rt_extent_xref(
 /* Cross-reference a single datadev extent record. */
 STATIC void
 xchk_bmap_extent_xref(
-	struct xchk_bmap_info		*info,
-	struct xfs_inode		*ip,
-	struct xfs_btree_cur		*cur,
-	struct xfs_bmbt_irec		*irec)
+	struct xchk_bmap_info	*info,
+	struct xfs_inode	*ip,
+	struct xfs_btree_cur	*cur,
+	struct xfs_bmbt_irec	*irec)
 {
-	struct xfs_mount		*mp = info->sc->mp;
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	xfs_extlen_t			len;
-	int				error;
+	struct xfs_mount	*mp = info->sc->mp;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	xfs_extlen_t		len;
+	int			error;
 
 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -284,15 +284,15 @@ xchk_bmap_extent_xref(
 /* Scrub a single extent record. */
 STATIC int
 xchk_bmap_extent(
-	struct xfs_inode		*ip,
-	struct xfs_btree_cur		*cur,
-	struct xchk_bmap_info		*info,
-	struct xfs_bmbt_irec		*irec)
+	struct xfs_inode	*ip,
+	struct xfs_btree_cur	*cur,
+	struct xchk_bmap_info	*info,
+	struct xfs_bmbt_irec	*irec)
 {
-	struct xfs_mount		*mp = info->sc->mp;
-	struct xfs_buf			*bp = NULL;
-	xfs_filblks_t			end;
-	int				error = 0;
+	struct xfs_mount	*mp = info->sc->mp;
+	struct xfs_buf		*bp = NULL;
+	xfs_filblks_t		end;
+	int			error = 0;
 
 	if (cur)
 		xfs_btree_get_block(cur, 0, &bp);
@@ -357,16 +357,16 @@ xchk_bmap_extent(
 /* Scrub a bmbt record. */
 STATIC int
 xchk_bmapbt_rec(
-	struct xchk_btree		*bs,
-	union xfs_btree_rec		*rec)
+	struct xchk_btree	*bs,
+	union xfs_btree_rec	*rec)
 {
-	struct xfs_bmbt_irec		irec;
-	struct xchk_bmap_info		*info = bs->private;
-	struct xfs_inode		*ip = bs->cur->bc_private.b.ip;
-	struct xfs_buf			*bp = NULL;
-	struct xfs_btree_block		*block;
-	uint64_t			owner;
-	int				i;
+	struct xfs_bmbt_irec	irec;
+	struct xchk_bmap_info	*info = bs->private;
+	struct xfs_inode	*ip = bs->cur->bc_private.b.ip;
+	struct xfs_buf		*bp = NULL;
+	struct xfs_btree_block	*block;
+	uint64_t		owner;
+	int			i;
 
 	/*
 	 * Check the owners of the btree blocks up to the level below
@@ -392,14 +392,14 @@ xchk_bmapbt_rec(
 STATIC int
 xchk_bmap_btree(
 	struct xfs_scrub	*sc,
-	int				whichfork,
-	struct xchk_bmap_info		*info)
+	int			whichfork,
+	struct xchk_bmap_info	*info)
 {
-	struct xfs_owner_info		oinfo;
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_inode		*ip = sc->ip;
-	struct xfs_btree_cur		*cur;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_inode	*ip = sc->ip;
+	struct xfs_btree_cur	*cur;
+	int			error;
 
 	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
@@ -410,8 +410,8 @@ xchk_bmap_btree(
 
 struct xchk_bmap_check_rmap_info {
 	struct xfs_scrub	*sc;
-	int				whichfork;
-	struct xfs_iext_cursor		icur;
+	int			whichfork;
+	struct xfs_iext_cursor	icur;
 };
 
 /* Can we find bmaps that fit this rmap? */
@@ -424,7 +424,7 @@ xchk_bmap_check_rmap(
 	struct xfs_bmbt_irec		irec;
 	struct xchk_bmap_check_rmap_info	*sbcri = priv;
 	struct xfs_ifork		*ifp;
-	struct xfs_scrub	*sc = sbcri->sc;
+	struct xfs_scrub		*sc = sbcri->sc;
 	bool				have_map;
 
 	/* Is this even the right fork? */
@@ -488,7 +488,7 @@ out:
 /* Make sure each rmap has a corresponding bmbt entry. */
 STATIC int
 xchk_bmap_check_ag_rmaps(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	int				whichfork,
 	xfs_agnumber_t			agno)
 {
@@ -523,11 +523,11 @@ out_agf:
 STATIC int
 xchk_bmap_check_rmaps(
 	struct xfs_scrub	*sc,
-	int				whichfork)
+	int			whichfork)
 {
-	loff_t				size;
-	xfs_agnumber_t			agno;
-	int				error;
+	loff_t			size;
+	xfs_agnumber_t		agno;
+	int			error;
 
 	if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
 	    whichfork == XFS_COW_FORK ||
@@ -580,16 +580,16 @@ xchk_bmap_check_rmaps(
 STATIC int
 xchk_bmap(
 	struct xfs_scrub	*sc,
-	int				whichfork)
+	int			whichfork)
 {
-	struct xfs_bmbt_irec		irec;
-	struct xchk_bmap_info		info = { NULL };
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_inode		*ip = sc->ip;
-	struct xfs_ifork		*ifp;
-	xfs_fileoff_t			endoff;
-	struct xfs_iext_cursor		icur;
-	int				error = 0;
+	struct xfs_bmbt_irec	irec;
+	struct xchk_bmap_info	info = { NULL };
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_inode	*ip = sc->ip;
+	struct xfs_ifork	*ifp;
+	xfs_fileoff_t		endoff;
+	struct xfs_iext_cursor	icur;
+	int			error = 0;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index c4e1dce8c5b3..4ae959f7ad2c 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -31,11 +31,11 @@
 static bool
 __xchk_btree_process_error(
 	struct xfs_scrub	*sc,
-	struct xfs_btree_cur		*cur,
-	int				level,
-	int				*error,
-	__u32				errflag,
-	void				*ret_ip)
+	struct xfs_btree_cur	*cur,
+	int			level,
+	int			*error,
+	__u32			errflag,
+	void			*ret_ip)
 {
 	if (*error == 0)
 		return true;
@@ -66,9 +66,9 @@ __xchk_btree_process_error(
 bool
 xchk_btree_process_error(
 	struct xfs_scrub	*sc,
-	struct xfs_btree_cur		*cur,
-	int				level,
-	int				*error)
+	struct xfs_btree_cur	*cur,
+	int			level,
+	int			*error)
 {
 	return __xchk_btree_process_error(sc, cur, level, error,
 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
@@ -77,9 +77,9 @@ xchk_btree_process_error(
 bool
 xchk_btree_xref_process_error(
 	struct xfs_scrub	*sc,
-	struct xfs_btree_cur		*cur,
-	int				level,
-	int				*error)
+	struct xfs_btree_cur	*cur,
+	int			level,
+	int			*error)
 {
 	return __xchk_btree_process_error(sc, cur, level, error,
 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
@@ -89,10 +89,10 @@ xchk_btree_xref_process_error(
 static void
 __xchk_btree_set_corrupt(
 	struct xfs_scrub	*sc,
-	struct xfs_btree_cur		*cur,
-	int				level,
-	__u32				errflag,
-	void				*ret_ip)
+	struct xfs_btree_cur	*cur,
+	int			level,
+	__u32			errflag,
+	void			*ret_ip)
 {
 	sc->sm->sm_flags |= errflag;
 
@@ -107,8 +107,8 @@ __xchk_btree_set_corrupt(
 void
 xchk_btree_set_corrupt(
 	struct xfs_scrub	*sc,
-	struct xfs_btree_cur		*cur,
-	int				level)
+	struct xfs_btree_cur	*cur,
+	int			level)
 {
 	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
 			__return_address);
@@ -117,8 +117,8 @@ xchk_btree_set_corrupt(
 void
 xchk_btree_xref_set_corrupt(
 	struct xfs_scrub	*sc,
-	struct xfs_btree_cur		*cur,
-	int				level)
+	struct xfs_btree_cur	*cur,
+	int			level)
 {
 	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
 			__return_address);
@@ -225,11 +225,11 @@ xchk_btree_key(
  */
 static bool
 xchk_btree_ptr_ok(
-	struct xchk_btree		*bs,
-	int				level,
-	union xfs_btree_ptr		*ptr)
+	struct xchk_btree	*bs,
+	int			level,
+	union xfs_btree_ptr	*ptr)
 {
-	bool				res;
+	bool			res;
 
 	/* A btree rooted in an inode has no block pointer to the root. */
 	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
@@ -250,18 +250,18 @@ xchk_btree_ptr_ok(
 /* Check that a btree block's sibling matches what we expect it. */
 STATIC int
 xchk_btree_block_check_sibling(
-	struct xchk_btree		*bs,
-	int				level,
-	int				direction,
-	union xfs_btree_ptr		*sibling)
+	struct xchk_btree	*bs,
+	int			level,
+	int			direction,
+	union xfs_btree_ptr	*sibling)
 {
-	struct xfs_btree_cur		*cur = bs->cur;
-	struct xfs_btree_block		*pblock;
-	struct xfs_buf			*pbp;
-	struct xfs_btree_cur		*ncur = NULL;
-	union xfs_btree_ptr		*pp;
-	int				success;
-	int				error;
+	struct xfs_btree_cur	*cur = bs->cur;
+	struct xfs_btree_block	*pblock;
+	struct xfs_buf		*pbp;
+	struct xfs_btree_cur	*ncur = NULL;
+	union xfs_btree_ptr	*pp;
+	int			success;
+	int			error;
 
 	error = xfs_btree_dup_cursor(cur, &ncur);
 	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
@@ -313,14 +313,14 @@ out:
 /* Check the siblings of a btree block. */
 STATIC int
 xchk_btree_block_check_siblings(
-	struct xchk_btree		*bs,
-	struct xfs_btree_block		*block)
+	struct xchk_btree	*bs,
+	struct xfs_btree_block	*block)
 {
-	struct xfs_btree_cur		*cur = bs->cur;
-	union xfs_btree_ptr		leftsib;
-	union xfs_btree_ptr		rightsib;
-	int				level;
-	int				error = 0;
+	struct xfs_btree_cur	*cur = bs->cur;
+	union xfs_btree_ptr	leftsib;
+	union xfs_btree_ptr	rightsib;
+	int			level;
+	int			error = 0;
 
 	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
 	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
@@ -361,15 +361,15 @@ struct check_owner {
  */
 STATIC int
 xchk_btree_check_block_owner(
-	struct xchk_btree		*bs,
-	int				level,
-	xfs_daddr_t			daddr)
+	struct xchk_btree	*bs,
+	int			level,
+	xfs_daddr_t		daddr)
 {
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	xfs_btnum_t			btnum;
-	bool				init_sa;
-	int				error = 0;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	xfs_btnum_t		btnum;
+	bool			init_sa;
+	int			error = 0;
 
 	if (!bs->cur)
 		return 0;
@@ -408,12 +408,12 @@ xchk_btree_check_block_owner(
 /* Check the owner of a btree block. */
 STATIC int
 xchk_btree_check_owner(
-	struct xchk_btree		*bs,
-	int				level,
-	struct xfs_buf			*bp)
+	struct xchk_btree	*bs,
+	int			level,
+	struct xfs_buf		*bp)
 {
-	struct xfs_btree_cur		*cur = bs->cur;
-	struct check_owner		*co;
+	struct xfs_btree_cur	*cur = bs->cur;
+	struct check_owner	*co;
 
 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
 		return 0;
@@ -484,14 +484,14 @@ xchk_btree_check_minrecs(
  */
 STATIC int
 xchk_btree_get_block(
-	struct xchk_btree		*bs,
-	int				level,
-	union xfs_btree_ptr		*pp,
-	struct xfs_btree_block		**pblock,
-	struct xfs_buf			**pbp)
+	struct xchk_btree	*bs,
+	int			level,
+	union xfs_btree_ptr	*pp,
+	struct xfs_btree_block	**pblock,
+	struct xfs_buf		**pbp)
 {
-	void				*failed_at;
-	int				error;
+	xfs_failaddr_t		failed_at;
+	int			error;
 
 	*pblock = NULL;
 	*pbp = NULL;
@@ -538,17 +538,17 @@ xchk_btree_get_block(
  */
 STATIC void
 xchk_btree_block_keys(
-	struct xchk_btree		*bs,
-	int				level,
-	struct xfs_btree_block		*block)
+	struct xchk_btree	*bs,
+	int			level,
+	struct xfs_btree_block	*block)
 {
-	union xfs_btree_key		block_keys;
-	struct xfs_btree_cur		*cur = bs->cur;
-	union xfs_btree_key		*high_bk;
-	union xfs_btree_key		*parent_keys;
-	union xfs_btree_key		*high_pk;
-	struct xfs_btree_block		*parent_block;
-	struct xfs_buf			*bp;
+	union xfs_btree_key	block_keys;
+	struct xfs_btree_cur	*cur = bs->cur;
+	union xfs_btree_key	*high_bk;
+	union xfs_btree_key	*parent_keys;
+	union xfs_btree_key	*high_pk;
+	struct xfs_btree_block	*parent_block;
+	struct xfs_buf		*bp;
 
 	if (level >= cur->bc_nlevels - 1)
 		return;
@@ -584,22 +584,22 @@ xchk_btree_block_keys(
 int
 xchk_btree(
 	struct xfs_scrub	*sc,
-	struct xfs_btree_cur		*cur,
-	xchk_btree_rec_fn		scrub_fn,
-	struct xfs_owner_info		*oinfo,
-	void				*private)
+	struct xfs_btree_cur	*cur,
+	xchk_btree_rec_fn	scrub_fn,
+	struct xfs_owner_info	*oinfo,
+	void			*private)
 {
-	struct xchk_btree		bs = { NULL };
-	union xfs_btree_ptr		ptr;
-	union xfs_btree_ptr		*pp;
-	union xfs_btree_rec		*recp;
-	struct xfs_btree_block		*block;
-	int				level;
-	struct xfs_buf			*bp;
-	struct check_owner		*co;
-	struct check_owner		*n;
-	int				i;
-	int				error = 0;
+	struct xchk_btree	bs = { NULL };
+	union xfs_btree_ptr	ptr;
+	union xfs_btree_ptr	*pp;
+	union xfs_btree_rec	*recp;
+	struct xfs_btree_block	*block;
+	int			level;
+	struct xfs_buf		*bp;
+	struct check_owner	*co;
+	struct check_owner	*n;
+	int			i;
+	int			error = 0;
 
 	/* Initialize scrub state */
 	bs.cur = cur;
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index a0b74b515b9b..aada763cd006 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -14,8 +14,7 @@ bool xchk_btree_process_error(struct xfs_scrub *sc,
 
 /* Check for btree xref operation errors. */
 bool xchk_btree_xref_process_error(struct xfs_scrub *sc,
-				struct xfs_btree_cur *cur, int level,
-				int *error);
+		struct xfs_btree_cur *cur, int level, int *error);
 
 /* Check for btree corruption. */
 void xchk_btree_set_corrupt(struct xfs_scrub *sc,
@@ -33,20 +32,20 @@ typedef int (*xchk_btree_rec_fn)(
 struct xchk_btree {
 	/* caller-provided scrub state */
 	struct xfs_scrub	*sc;
-	struct xfs_btree_cur		*cur;
-	xchk_btree_rec_fn		scrub_rec;
-	struct xfs_owner_info		*oinfo;
-	void				*private;
+	struct xfs_btree_cur	*cur;
+	xchk_btree_rec_fn	scrub_rec;
+	struct xfs_owner_info	*oinfo;
+	void			*private;
 
 	/* internal scrub state */
-	union xfs_btree_rec		lastrec;
-	bool				firstrec;
-	union xfs_btree_key		lastkey[XFS_BTREE_MAXLEVELS];
-	bool				firstkey[XFS_BTREE_MAXLEVELS];
-	struct list_head		to_check;
+	union xfs_btree_rec	lastrec;
+	bool			firstrec;
+	union xfs_btree_key	lastkey[XFS_BTREE_MAXLEVELS];
+	bool			firstkey[XFS_BTREE_MAXLEVELS];
+	struct list_head	to_check;
 };
 int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
-		    xchk_btree_rec_fn scrub_fn,
-		    struct xfs_owner_info *oinfo, void *private);
+		xchk_btree_rec_fn scrub_fn, struct xfs_owner_info *oinfo,
+		void *private);
 
 #endif /* __XFS_SCRUB_BTREE_H__ */
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index ed9195116556..baac08304a5a 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -70,11 +70,11 @@
 static bool
 __xchk_process_error(
 	struct xfs_scrub	*sc,
-	xfs_agnumber_t			agno,
-	xfs_agblock_t			bno,
-	int				*error,
-	__u32				errflag,
-	void				*ret_ip)
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		bno,
+	int			*error,
+	__u32			errflag,
+	void			*ret_ip)
 {
 	switch (*error) {
 	case 0:
@@ -100,9 +100,9 @@ __xchk_process_error(
 bool
 xchk_process_error(
 	struct xfs_scrub	*sc,
-	xfs_agnumber_t			agno,
-	xfs_agblock_t			bno,
-	int				*error)
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		bno,
+	int			*error)
 {
 	return __xchk_process_error(sc, agno, bno, error,
 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
@@ -111,9 +111,9 @@ xchk_process_error(
 bool
 xchk_xref_process_error(
 	struct xfs_scrub	*sc,
-	xfs_agnumber_t			agno,
-	xfs_agblock_t			bno,
-	int				*error)
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		bno,
+	int			*error)
 {
 	return __xchk_process_error(sc, agno, bno, error,
 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
@@ -123,11 +123,11 @@ xchk_xref_process_error(
 static bool
 __xchk_fblock_process_error(
 	struct xfs_scrub	*sc,
-	int				whichfork,
-	xfs_fileoff_t			offset,
-	int				*error,
-	__u32				errflag,
-	void				*ret_ip)
+	int			whichfork,
+	xfs_fileoff_t		offset,
+	int			*error,
+	__u32			errflag,
+	void			*ret_ip)
 {
 	switch (*error) {
 	case 0:
@@ -153,9 +153,9 @@ __xchk_fblock_process_error(
 bool
 xchk_fblock_process_error(
 	struct xfs_scrub	*sc,
-	int				whichfork,
-	xfs_fileoff_t			offset,
-	int				*error)
+	int			whichfork,
+	xfs_fileoff_t		offset,
+	int			*error)
 {
 	return __xchk_fblock_process_error(sc, whichfork, offset, error,
 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
@@ -164,9 +164,9 @@ xchk_fblock_process_error(
 bool
 xchk_fblock_xref_process_error(
 	struct xfs_scrub	*sc,
-	int				whichfork,
-	xfs_fileoff_t			offset,
-	int				*error)
+	int			whichfork,
+	xfs_fileoff_t		offset,
+	int			*error)
 {
 	return __xchk_fblock_process_error(sc, whichfork, offset, error,
 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
@@ -188,7 +188,7 @@ xchk_fblock_xref_process_error(
 void
 xchk_block_set_preen(
 	struct xfs_scrub	*sc,
-	struct xfs_buf			*bp)
+	struct xfs_buf		*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
 	trace_xchk_block_preen(sc, bp->b_bn, __return_address);
@@ -202,7 +202,7 @@ xchk_block_set_preen(
 void
 xchk_ino_set_preen(
 	struct xfs_scrub	*sc,
-	xfs_ino_t			ino)
+	xfs_ino_t		ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
 	trace_xchk_ino_preen(sc, ino, __return_address);
@@ -212,7 +212,7 @@ xchk_ino_set_preen(
 void
 xchk_block_set_corrupt(
 	struct xfs_scrub	*sc,
-	struct xfs_buf			*bp)
+	struct xfs_buf		*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 	trace_xchk_block_error(sc, bp->b_bn, __return_address);
@@ -222,7 +222,7 @@ xchk_block_set_corrupt(
 void
 xchk_block_xref_set_corrupt(
 	struct xfs_scrub	*sc,
-	struct xfs_buf			*bp)
+	struct xfs_buf		*bp)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 	trace_xchk_block_error(sc, bp->b_bn, __return_address);
@@ -236,7 +236,7 @@ xchk_block_xref_set_corrupt(
 void
 xchk_ino_set_corrupt(
 	struct xfs_scrub	*sc,
-	xfs_ino_t			ino)
+	xfs_ino_t		ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 	trace_xchk_ino_error(sc, ino, __return_address);
@@ -246,7 +246,7 @@ xchk_ino_set_corrupt(
 void
 xchk_ino_xref_set_corrupt(
 	struct xfs_scrub	*sc,
-	xfs_ino_t			ino)
+	xfs_ino_t		ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 	trace_xchk_ino_error(sc, ino, __return_address);
@@ -256,8 +256,8 @@ xchk_ino_xref_set_corrupt(
 void
 xchk_fblock_set_corrupt(
 	struct xfs_scrub	*sc,
-	int				whichfork,
-	xfs_fileoff_t			offset)
+	int			whichfork,
+	xfs_fileoff_t		offset)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
@@ -267,8 +267,8 @@ xchk_fblock_set_corrupt(
 void
 xchk_fblock_xref_set_corrupt(
 	struct xfs_scrub	*sc,
-	int				whichfork,
-	xfs_fileoff_t			offset)
+	int			whichfork,
+	xfs_fileoff_t		offset)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
@@ -281,7 +281,7 @@ xchk_fblock_xref_set_corrupt(
 void
 xchk_ino_set_warning(
 	struct xfs_scrub	*sc,
-	xfs_ino_t			ino)
+	xfs_ino_t		ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
 	trace_xchk_ino_warning(sc, ino, __return_address);
@@ -291,8 +291,8 @@ xchk_ino_set_warning(
 void
 xchk_fblock_set_warning(
 	struct xfs_scrub	*sc,
-	int				whichfork,
-	xfs_fileoff_t			offset)
+	int			whichfork,
+	xfs_fileoff_t		offset)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
 	trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
@@ -319,13 +319,13 @@ struct xchk_rmap_ownedby_info {
 
 STATIC int
 xchk_count_rmap_ownedby_irec(
-	struct xfs_btree_cur			*cur,
-	struct xfs_rmap_irec			*rec,
-	void					*priv)
+	struct xfs_btree_cur		*cur,
+	struct xfs_rmap_irec		*rec,
+	void				*priv)
 {
-	struct xchk_rmap_ownedby_info		*sroi = priv;
-	bool					irec_attr;
-	bool					oinfo_attr;
+	struct xchk_rmap_ownedby_info	*sroi = priv;
+	bool				irec_attr;
+	bool				oinfo_attr;
 
 	irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
 	oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
@@ -346,11 +346,11 @@ xchk_count_rmap_ownedby_irec(
 int
 xchk_count_rmap_ownedby_ag(
 	struct xfs_scrub		*sc,
-	struct xfs_btree_cur			*cur,
-	struct xfs_owner_info			*oinfo,
-	xfs_filblks_t				*blocks)
+	struct xfs_btree_cur		*cur,
+	struct xfs_owner_info		*oinfo,
+	xfs_filblks_t			*blocks)
 {
-	struct xchk_rmap_ownedby_info		sroi;
+	struct xchk_rmap_ownedby_info	sroi;
 
 	sroi.oinfo = oinfo;
 	*blocks = 0;
@@ -372,7 +372,7 @@ xchk_count_rmap_ownedby_ag(
 static inline bool
 want_ag_read_header_failure(
 	struct xfs_scrub	*sc,
-	unsigned int			type)
+	unsigned int		type)
 {
 	/* Return all AG header read failures when scanning btrees. */
 	if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
@@ -399,13 +399,13 @@ want_ag_read_header_failure(
 int
 xchk_ag_read_headers(
 	struct xfs_scrub	*sc,
-	xfs_agnumber_t			agno,
-	struct xfs_buf			**agi,
-	struct xfs_buf			**agf,
-	struct xfs_buf			**agfl)
+	xfs_agnumber_t		agno,
+	struct xfs_buf		**agi,
+	struct xfs_buf		**agf,
+	struct xfs_buf		**agfl)
 {
-	struct xfs_mount		*mp = sc->mp;
-	int				error;
+	struct xfs_mount	*mp = sc->mp;
+	int			error;
 
 	error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
@@ -455,8 +455,8 @@ xchk_ag_btcur_init(
 	struct xfs_scrub	*sc,
 	struct xchk_ag		*sa)
 {
-	struct xfs_mount		*mp = sc->mp;
-	xfs_agnumber_t			agno = sa->agno;
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agnumber_t		agno = sa->agno;
 
 	if (sa->agf_bp) {
 		/* Set up a bnobt cursor for cross-referencing. */
@@ -545,10 +545,10 @@ xchk_ag_free(
 int
 xchk_ag_init(
 	struct xfs_scrub	*sc,
-	xfs_agnumber_t			agno,
+	xfs_agnumber_t		agno,
 	struct xchk_ag		*sa)
 {
-	int				error;
+	int			error;
 
 	sa->agno = agno;
 	error = xchk_ag_read_headers(sc, agno, &sa->agi_bp,
@@ -566,7 +566,7 @@ xchk_ag_init(
 void
 xchk_perag_get(
 	struct xfs_mount	*mp,
-	struct xchk_ag	*sa)
+	struct xchk_ag		*sa)
 {
 	if (!sa->pag)
 		sa->pag = xfs_perag_get(mp, sa->agno);
@@ -587,7 +587,7 @@ xchk_perag_get(
 int
 xchk_trans_alloc(
 	struct xfs_scrub	*sc,
-	uint				resblks)
+	uint			resblks)
 {
 	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
 		return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
@@ -600,9 +600,9 @@ xchk_trans_alloc(
 int
 xchk_setup_fs(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
-	uint				resblks;
+	uint			resblks;
 
 	resblks = xrep_calc_ag_resblks(sc);
 	return xchk_trans_alloc(sc, resblks);
@@ -612,11 +612,11 @@ xchk_setup_fs(
 int
 xchk_setup_ag_btree(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip,
-	bool				force_log)
+	struct xfs_inode	*ip,
+	bool			force_log)
 {
-	struct xfs_mount		*mp = sc->mp;
-	int				error;
+	struct xfs_mount	*mp = sc->mp;
+	int			error;
 
 	/*
 	 * If the caller asks us to checkpont the log, do so.  This
@@ -659,12 +659,12 @@ xchk_checkpoint_log(
 int
 xchk_get_inode(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip_in)
+	struct xfs_inode	*ip_in)
 {
-	struct xfs_imap			imap;
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_inode		*ip = NULL;
-	int				error;
+	struct xfs_imap		imap;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_inode	*ip = NULL;
+	int			error;
 
 	/* We want to scan the inode we already had opened. */
 	if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
@@ -723,10 +723,10 @@ xchk_get_inode(
 int
 xchk_setup_inode_contents(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip,
-	unsigned int			resblks)
+	struct xfs_inode	*ip,
+	unsigned int		resblks)
 {
-	int				error;
+	int			error;
 
 	error = xchk_get_inode(sc, ip);
 	if (error)
@@ -754,8 +754,8 @@ out:
 bool
 xchk_should_check_xref(
 	struct xfs_scrub	*sc,
-	int				*error,
-	struct xfs_btree_cur		**curpp)
+	int			*error,
+	struct xfs_btree_cur	**curpp)
 {
 	/* No point in xref if we already know we're corrupt. */
 	if (xchk_skip_xref(sc->sm))
@@ -789,9 +789,9 @@ xchk_should_check_xref(
 void
 xchk_buffer_recheck(
 	struct xfs_scrub	*sc,
-	struct xfs_buf			*bp)
+	struct xfs_buf		*bp)
 {
-	xfs_failaddr_t			fa;
+	xfs_failaddr_t		fa;
 
 	if (bp->b_ops == NULL) {
 		xchk_block_set_corrupt(sc, bp);
@@ -816,9 +816,9 @@ int
 xchk_metadata_inode_forks(
 	struct xfs_scrub	*sc)
 {
-	__u32				smtype;
-	bool				shared;
-	int				error;
+	__u32			smtype;
+	bool			shared;
+	int			error;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return 0;
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index c321230d32dc..2d4324d12f9a 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -108,24 +108,21 @@ xchk_setup_quota(struct xfs_scrub *sc, struct xfs_inode *ip)
 
 void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
 int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
-		      struct xchk_ag *sa);
+		struct xchk_ag *sa);
 void xchk_perag_get(struct xfs_mount *mp, struct xchk_ag *sa);
 int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno,
-			      struct xfs_buf **agi, struct xfs_buf **agf,
-			      struct xfs_buf **agfl);
+		struct xfs_buf **agi, struct xfs_buf **agf,
+		struct xfs_buf **agfl);
 void xchk_ag_btcur_free(struct xchk_ag *sa);
-int xchk_ag_btcur_init(struct xfs_scrub *sc,
-			    struct xchk_ag *sa);
-int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc,
-				    struct xfs_btree_cur *cur,
-				    struct xfs_owner_info *oinfo,
-				    xfs_filblks_t *blocks);
+int xchk_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
+int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
+		struct xfs_owner_info *oinfo, xfs_filblks_t *blocks);
 
-int xchk_setup_ag_btree(struct xfs_scrub *sc,
-			     struct xfs_inode *ip, bool force_log);
+int xchk_setup_ag_btree(struct xfs_scrub *sc, struct xfs_inode *ip,
+		bool force_log);
 int xchk_get_inode(struct xfs_scrub *sc, struct xfs_inode *ip_in);
-int xchk_setup_inode_contents(struct xfs_scrub *sc,
-				   struct xfs_inode *ip, unsigned int resblks);
+int xchk_setup_inode_contents(struct xfs_scrub *sc, struct xfs_inode *ip,
+		unsigned int resblks);
 void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp);
 
 /*
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 7fc12d540ea6..f1260b4bfdee 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -36,9 +36,9 @@
  */
 bool
 xchk_da_process_error(
-	struct xchk_da_btree		*ds,
-	int				level,
-	int				*error)
+	struct xchk_da_btree	*ds,
+	int			level,
+	int			*error)
 {
 	struct xfs_scrub	*sc = ds->sc;
 
@@ -72,8 +72,8 @@ xchk_da_process_error(
  */
 void
 xchk_da_set_corrupt(
-	struct xchk_da_btree		*ds,
-	int				level)
+	struct xchk_da_btree	*ds,
+	int			level)
 {
 	struct xfs_scrub	*sc = ds->sc;
 
@@ -88,13 +88,13 @@ xchk_da_set_corrupt(
 /* Find an entry at a certain level in a da btree. */
 STATIC void *
 xchk_da_btree_entry(
-	struct xchk_da_btree		*ds,
-	int				level,
-	int				rec)
+	struct xchk_da_btree	*ds,
+	int			level,
+	int			rec)
 {
-	char				*ents;
-	struct xfs_da_state_blk		*blk;
-	void				*baddr;
+	char			*ents;
+	struct xfs_da_state_blk	*blk;
+	void			*baddr;
 
 	/* Dispatch the entry finding function. */
 	blk = &ds->state->path.blk[level];
@@ -158,9 +158,9 @@ xchk_da_btree_hash(
  */
 STATIC bool
 xchk_da_btree_ptr_ok(
-	struct xchk_da_btree		*ds,
-	int				level,
-	xfs_dablk_t			blkno)
+	struct xchk_da_btree	*ds,
+	int			level,
+	xfs_dablk_t		blkno)
 {
 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
 		xchk_da_set_corrupt(ds, level);
@@ -246,13 +246,13 @@ static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
 /* Check a block's sibling. */
 STATIC int
 xchk_da_btree_block_check_sibling(
-	struct xchk_da_btree		*ds,
-	int				level,
-	int				direction,
-	xfs_dablk_t			sibling)
+	struct xchk_da_btree	*ds,
+	int			level,
+	int			direction,
+	xfs_dablk_t		sibling)
 {
-	int				retval;
-	int				error;
+	int			retval;
+	int			error;
 
 	memcpy(&ds->state->altpath, &ds->state->path,
 			sizeof(ds->state->altpath));
@@ -294,13 +294,13 @@ out:
 /* Check a block's sibling pointers. */
 STATIC int
 xchk_da_btree_block_check_siblings(
-	struct xchk_da_btree		*ds,
-	int				level,
-	struct xfs_da_blkinfo		*hdr)
+	struct xchk_da_btree	*ds,
+	int			level,
+	struct xfs_da_blkinfo	*hdr)
 {
-	xfs_dablk_t			forw;
-	xfs_dablk_t			back;
-	int				error = 0;
+	xfs_dablk_t		forw;
+	xfs_dablk_t		back;
+	int			error = 0;
 
 	forw = be32_to_cpu(hdr->forw);
 	back = be32_to_cpu(hdr->back);
@@ -474,9 +474,9 @@ out_nobuf:
 /* Visit all nodes and leaves of a da btree. */
 int
 xchk_da_btree(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	int				whichfork,
-	xchk_da_btree_rec_fn	scrub_fn,
+	xchk_da_btree_rec_fn		scrub_fn,
 	void				*private)
 {
 	struct xchk_da_btree		ds = {};
diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h
index a15c03389e8f..cb3f0003245b 100644
--- a/fs/xfs/scrub/dabtree.h
+++ b/fs/xfs/scrub/dabtree.h
@@ -9,12 +9,12 @@
 /* dir/attr btree */
 
 struct xchk_da_btree {
-	struct xfs_da_args		dargs;
-	xfs_dahash_t			hashes[XFS_DA_NODE_MAXDEPTH];
-	int				maxrecs[XFS_DA_NODE_MAXDEPTH];
-	struct xfs_da_state		*state;
+	struct xfs_da_args	dargs;
+	xfs_dahash_t		hashes[XFS_DA_NODE_MAXDEPTH];
+	int			maxrecs[XFS_DA_NODE_MAXDEPTH];
+	struct xfs_da_state	*state;
 	struct xfs_scrub	*sc;
-	void				*private;
+	void			*private;
 
 	/*
 	 * Lowest and highest directory block address in which we expect
@@ -22,10 +22,10 @@ struct xchk_da_btree {
 	 * (presumably) means between LEAF_OFFSET and FREE_OFFSET; for
 	 * attributes there is no limit.
 	 */
-	xfs_dablk_t			lowest;
-	xfs_dablk_t			highest;
+	xfs_dablk_t		lowest;
+	xfs_dablk_t		highest;
 
-	int				tree_level;
+	int			tree_level;
 };
 
 typedef int (*xchk_da_btree_rec_fn)(struct xchk_da_btree *ds,
@@ -37,9 +37,8 @@ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error);
 /* Check for da btree corruption. */
 void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level);
 
-int xchk_da_btree_hash(struct xchk_da_btree *ds, int level,
-			    __be32 *hashp);
+int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp);
 int xchk_da_btree(struct xfs_scrub *sc, int whichfork,
-		       xchk_da_btree_rec_fn scrub_fn, void *private);
+		xchk_da_btree_rec_fn scrub_fn, void *private);
 
 #endif /* __XFS_SCRUB_DABTREE_H__ */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 194a3ef69a9f..f58709052b03 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -33,7 +33,7 @@
 int
 xchk_setup_directory(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
 	return xchk_setup_inode_contents(sc, ip, 0);
 }
@@ -44,7 +44,7 @@ xchk_setup_directory(
 
 struct xchk_dir_ctx {
 	/* VFS fill-directory iterator */
-	struct dir_context		dir_iter;
+	struct dir_context	dir_iter;
 
 	struct xfs_scrub	*sc;
 };
@@ -52,15 +52,15 @@ struct xchk_dir_ctx {
 /* Check that an inode's mode matches a given DT_ type. */
 STATIC int
 xchk_dir_check_ftype(
-	struct xchk_dir_ctx		*sdc,
-	xfs_fileoff_t			offset,
-	xfs_ino_t			inum,
-	int				dtype)
+	struct xchk_dir_ctx	*sdc,
+	xfs_fileoff_t		offset,
+	xfs_ino_t		inum,
+	int			dtype)
 {
-	struct xfs_mount		*mp = sdc->sc->mp;
-	struct xfs_inode		*ip;
-	int				ino_dtype;
-	int				error = 0;
+	struct xfs_mount	*mp = sdc->sc->mp;
+	struct xfs_inode	*ip;
+	int			ino_dtype;
+	int			error = 0;
 
 	if (!xfs_sb_version_hasftype(&mp->m_sb)) {
 		if (dtype != DT_UNKNOWN && dtype != DT_DIR)
@@ -102,20 +102,20 @@ out:
  */
 STATIC int
 xchk_dir_actor(
-	struct dir_context		*dir_iter,
-	const char			*name,
-	int				namelen,
-	loff_t				pos,
-	u64				ino,
-	unsigned			type)
+	struct dir_context	*dir_iter,
+	const char		*name,
+	int			namelen,
+	loff_t			pos,
+	u64			ino,
+	unsigned		type)
 {
-	struct xfs_mount		*mp;
-	struct xfs_inode		*ip;
-	struct xchk_dir_ctx		*sdc;
-	struct xfs_name			xname;
-	xfs_ino_t			lookup_ino;
-	xfs_dablk_t			offset;
-	int				error = 0;
+	struct xfs_mount	*mp;
+	struct xfs_inode	*ip;
+	struct xchk_dir_ctx	*sdc;
+	struct xfs_name		xname;
+	xfs_ino_t		lookup_ino;
+	xfs_dablk_t		offset;
+	int			error = 0;
 
 	sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter);
 	ip = sdc->sc->ip;
@@ -289,7 +289,7 @@ out:
  */
 STATIC void
 xchk_directory_check_free_entry(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	xfs_dablk_t			lblk,
 	struct xfs_dir2_data_free	*bf,
 	struct xfs_dir2_data_unused	*dup)
@@ -314,7 +314,7 @@ xchk_directory_check_free_entry(
 /* Check free space info in a directory data block. */
 STATIC int
 xchk_directory_data_bestfree(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	xfs_dablk_t			lblk,
 	bool				is_block)
 {
@@ -455,7 +455,7 @@ out:
  */
 STATIC void
 xchk_directory_check_freesp(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	xfs_dablk_t			lblk,
 	struct xfs_buf			*dbp,
 	unsigned int			len)
@@ -474,7 +474,7 @@ xchk_directory_check_freesp(
 /* Check free space info in a directory leaf1 block. */
 STATIC int
 xchk_directory_leaf1_bestfree(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	struct xfs_da_args		*args,
 	xfs_dablk_t			lblk)
 {
@@ -572,7 +572,7 @@ out:
 /* Check free space info in a directory freespace block. */
 STATIC int
 xchk_directory_free_bestfree(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	struct xfs_da_args		*args,
 	xfs_dablk_t			lblk)
 {
@@ -628,18 +628,18 @@ STATIC int
 xchk_directory_blocks(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_bmbt_irec		got;
-	struct xfs_da_args		args;
-	struct xfs_ifork		*ifp;
-	struct xfs_mount		*mp = sc->mp;
-	xfs_fileoff_t			leaf_lblk;
-	xfs_fileoff_t			free_lblk;
-	xfs_fileoff_t			lblk;
-	struct xfs_iext_cursor		icur;
-	xfs_dablk_t			dabno;
-	bool				found;
-	int				is_block = 0;
-	int				error;
+	struct xfs_bmbt_irec	got;
+	struct xfs_da_args	args;
+	struct xfs_ifork	*ifp;
+	struct xfs_mount	*mp = sc->mp;
+	xfs_fileoff_t		leaf_lblk;
+	xfs_fileoff_t		free_lblk;
+	xfs_fileoff_t		lblk;
+	struct xfs_iext_cursor	icur;
+	xfs_dablk_t		dabno;
+	bool			found;
+	int			is_block = 0;
+	int			error;
 
 	/* Ignore local format directories. */
 	if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
@@ -772,14 +772,14 @@ int
 xchk_directory(
 	struct xfs_scrub	*sc)
 {
-	struct xchk_dir_ctx		sdc = {
+	struct xchk_dir_ctx	sdc = {
 		.dir_iter.actor = xchk_dir_actor,
 		.dir_iter.pos = 0,
 		.sc = sc,
 	};
-	size_t				bufsize;
-	loff_t				oldpos;
-	int				error = 0;
+	size_t			bufsize;
+	loff_t			oldpos;
+	int			error = 0;
 
 	if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
 		return -ENOENT;
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 6df8eba9f52b..224dba937492 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -37,7 +37,7 @@
 int
 xchk_setup_ag_iallocbt(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, sc->try_harder);
 }
@@ -51,7 +51,7 @@ xchk_setup_ag_iallocbt(
  */
 static inline void
 xchk_iallocbt_chunk_xref_other(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	struct xfs_inobt_rec_incore	*irec,
 	xfs_agino_t			agino)
 {
@@ -76,7 +76,7 @@ xchk_iallocbt_chunk_xref_other(
 /* Cross-reference with the other btrees. */
 STATIC void
 xchk_iallocbt_chunk_xref(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	struct xfs_inobt_rec_incore	*irec,
 	xfs_agino_t			agino,
 	xfs_agblock_t			agbno,
@@ -364,13 +364,13 @@ out:
 STATIC void
 xchk_iallocbt_xref_rmap_btreeblks(
 	struct xfs_scrub	*sc,
-	int				which)
+	int			which)
 {
-	struct xfs_owner_info		oinfo;
-	xfs_filblks_t			blocks;
-	xfs_extlen_t			inobt_blocks = 0;
-	xfs_extlen_t			finobt_blocks = 0;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	xfs_filblks_t		blocks;
+	xfs_extlen_t		inobt_blocks = 0;
+	xfs_extlen_t		finobt_blocks = 0;
+	int			error;
 
 	if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
 	    (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur) ||
@@ -404,12 +404,12 @@ xchk_iallocbt_xref_rmap_btreeblks(
 STATIC void
 xchk_iallocbt_xref_rmap_inodes(
 	struct xfs_scrub	*sc,
-	int				which,
-	xfs_filblks_t			inode_blocks)
+	int			which,
+	xfs_filblks_t		inode_blocks)
 {
-	struct xfs_owner_info		oinfo;
-	xfs_filblks_t			blocks;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	xfs_filblks_t		blocks;
+	int			error;
 
 	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
@@ -428,12 +428,12 @@ xchk_iallocbt_xref_rmap_inodes(
 STATIC int
 xchk_iallocbt(
 	struct xfs_scrub	*sc,
-	xfs_btnum_t			which)
+	xfs_btnum_t		which)
 {
-	struct xfs_btree_cur		*cur;
-	struct xfs_owner_info		oinfo;
-	xfs_filblks_t			inode_blocks = 0;
-	int				error;
+	struct xfs_btree_cur	*cur;
+	struct xfs_owner_info	oinfo;
+	xfs_filblks_t		inode_blocks = 0;
+	int			error;
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
 	cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
@@ -475,13 +475,13 @@ xchk_finobt(
 static inline void
 xchk_xref_inode_check(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len,
-	struct xfs_btree_cur		**icur,
-	bool				should_have_inodes)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len,
+	struct xfs_btree_cur	**icur,
+	bool			should_have_inodes)
 {
-	bool				has_inodes;
-	int				error;
+	bool			has_inodes;
+	int			error;
 
 	if (!(*icur) || xchk_skip_xref(sc->sm))
 		return;
@@ -497,8 +497,8 @@ xchk_xref_inode_check(
 void
 xchk_xref_is_not_inode_chunk(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len)
 {
 	xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
 	xchk_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
@@ -508,8 +508,8 @@ xchk_xref_is_not_inode_chunk(
 void
 xchk_xref_is_inode_chunk(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len)
 {
 	xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
 }
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 6cc027983c13..5b3b177c0fc9 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -39,9 +39,9 @@
 int
 xchk_setup_inode(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
-	int				error;
+	int			error;
 
 	/*
 	 * Try to get the inode.  If the verifiers fail, we try again
@@ -78,12 +78,12 @@ out:
 STATIC void
 xchk_inode_extsize(
 	struct xfs_scrub	*sc,
-	struct xfs_dinode		*dip,
-	xfs_ino_t			ino,
-	uint16_t			mode,
-	uint16_t			flags)
+	struct xfs_dinode	*dip,
+	xfs_ino_t		ino,
+	uint16_t		mode,
+	uint16_t		flags)
 {
-	xfs_failaddr_t			fa;
+	xfs_failaddr_t		fa;
 
 	fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
 			mode, flags);
@@ -100,13 +100,13 @@ xchk_inode_extsize(
 STATIC void
 xchk_inode_cowextsize(
 	struct xfs_scrub	*sc,
-	struct xfs_dinode		*dip,
-	xfs_ino_t			ino,
-	uint16_t			mode,
-	uint16_t			flags,
-	uint64_t			flags2)
+	struct xfs_dinode	*dip,
+	xfs_ino_t		ino,
+	uint16_t		mode,
+	uint16_t		flags,
+	uint64_t		flags2)
 {
-	xfs_failaddr_t			fa;
+	xfs_failaddr_t		fa;
 
 	fa = xfs_inode_validate_cowextsize(sc->mp,
 			be32_to_cpu(dip->di_cowextsize), mode, flags,
@@ -119,12 +119,12 @@ xchk_inode_cowextsize(
 STATIC void
 xchk_inode_flags(
 	struct xfs_scrub	*sc,
-	struct xfs_dinode		*dip,
-	xfs_ino_t			ino,
-	uint16_t			mode,
-	uint16_t			flags)
+	struct xfs_dinode	*dip,
+	xfs_ino_t		ino,
+	uint16_t		mode,
+	uint16_t		flags)
 {
-	struct xfs_mount		*mp = sc->mp;
+	struct xfs_mount	*mp = sc->mp;
 
 	if (flags & ~XFS_DIFLAG_ANY)
 		goto bad;
@@ -164,13 +164,13 @@ bad:
 STATIC void
 xchk_inode_flags2(
 	struct xfs_scrub	*sc,
-	struct xfs_dinode		*dip,
-	xfs_ino_t			ino,
-	uint16_t			mode,
-	uint16_t			flags,
-	uint64_t			flags2)
+	struct xfs_dinode	*dip,
+	xfs_ino_t		ino,
+	uint16_t		mode,
+	uint16_t		flags,
+	uint64_t		flags2)
 {
-	struct xfs_mount		*mp = sc->mp;
+	struct xfs_mount	*mp = sc->mp;
 
 	if (flags2 & ~XFS_DIFLAG2_ANY)
 		goto bad;
@@ -207,16 +207,16 @@ bad:
 STATIC void
 xchk_dinode(
 	struct xfs_scrub	*sc,
-	struct xfs_dinode		*dip,
-	xfs_ino_t			ino)
+	struct xfs_dinode	*dip,
+	xfs_ino_t		ino)
 {
-	struct xfs_mount		*mp = sc->mp;
-	size_t				fork_recs;
-	unsigned long long		isize;
-	uint64_t			flags2;
-	uint32_t			nextents;
-	uint16_t			flags;
-	uint16_t			mode;
+	struct xfs_mount	*mp = sc->mp;
+	size_t			fork_recs;
+	unsigned long long	isize;
+	uint64_t		flags2;
+	uint32_t		nextents;
+	uint16_t		flags;
+	uint16_t		mode;
 
 	flags = be16_to_cpu(dip->di_flags);
 	if (dip->di_version >= 3)
@@ -426,7 +426,7 @@ xchk_dinode(
  */
 static void
 xchk_inode_xref_finobt(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	xfs_ino_t			ino)
 {
 	struct xfs_inobt_rec_incore	rec;
@@ -470,12 +470,12 @@ xchk_inode_xref_finobt(
 STATIC void
 xchk_inode_xref_bmap(
 	struct xfs_scrub	*sc,
-	struct xfs_dinode		*dip)
+	struct xfs_dinode	*dip)
 {
-	xfs_extnum_t			nextents;
-	xfs_filblks_t			count;
-	xfs_filblks_t			acount;
-	int				error;
+	xfs_extnum_t		nextents;
+	xfs_filblks_t		count;
+	xfs_filblks_t		acount;
+	int			error;
 
 	if (xchk_skip_xref(sc->sm))
 		return;
@@ -504,13 +504,13 @@ xchk_inode_xref_bmap(
 STATIC void
 xchk_inode_xref(
 	struct xfs_scrub	*sc,
-	xfs_ino_t			ino,
-	struct xfs_dinode		*dip)
+	xfs_ino_t		ino,
+	struct xfs_dinode	*dip)
 {
-	struct xfs_owner_info		oinfo;
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	int			error;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -541,11 +541,11 @@ xchk_inode_xref(
 static void
 xchk_inode_check_reflink_iflag(
 	struct xfs_scrub	*sc,
-	xfs_ino_t			ino)
+	xfs_ino_t		ino)
 {
-	struct xfs_mount		*mp = sc->mp;
-	bool				has_shared;
-	int				error;
+	struct xfs_mount	*mp = sc->mp;
+	bool			has_shared;
+	int			error;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return;
@@ -566,8 +566,8 @@ int
 xchk_inode(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_dinode		di;
-	int				error = 0;
+	struct xfs_dinode	di;
+	int			error = 0;
 
 	/*
 	 * If sc->ip is NULL, that means that the setup function called
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 808459ad0c35..aacb0284c48a 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -29,7 +29,7 @@
 int
 xchk_setup_parent(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
 	return xchk_setup_inode_contents(sc, ip, 0);
 }
@@ -39,22 +39,22 @@ xchk_setup_parent(
 /* Look for an entry in a parent pointing to this inode. */
 
 struct xchk_parent_ctx {
-	struct dir_context		dc;
-	xfs_ino_t			ino;
-	xfs_nlink_t			nlink;
+	struct dir_context	dc;
+	xfs_ino_t		ino;
+	xfs_nlink_t		nlink;
 };
 
 /* Look for a single entry in a directory pointing to an inode. */
 STATIC int
 xchk_parent_actor(
-	struct dir_context		*dc,
-	const char			*name,
-	int				namelen,
-	loff_t				pos,
-	u64				ino,
-	unsigned			type)
+	struct dir_context	*dc,
+	const char		*name,
+	int			namelen,
+	loff_t			pos,
+	u64			ino,
+	unsigned		type)
 {
-	struct xchk_parent_ctx		*spc;
+	struct xchk_parent_ctx	*spc;
 
 	spc = container_of(dc, struct xchk_parent_ctx, dc);
 	if (spc->ino == ino)
@@ -66,19 +66,19 @@ xchk_parent_actor(
 STATIC int
 xchk_parent_count_parent_dentries(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*parent,
-	xfs_nlink_t			*nlink)
+	struct xfs_inode	*parent,
+	xfs_nlink_t		*nlink)
 {
-	struct xchk_parent_ctx		spc = {
+	struct xchk_parent_ctx	spc = {
 		.dc.actor = xchk_parent_actor,
 		.dc.pos = 0,
 		.ino = sc->ip->i_ino,
 		.nlink = 0,
 	};
-	size_t				bufsize;
-	loff_t				oldpos;
-	uint				lock_mode;
-	int				error = 0;
+	size_t			bufsize;
+	loff_t			oldpos;
+	uint			lock_mode;
+	int			error = 0;
 
 	/*
 	 * If there are any blocks, read-ahead block 0 as we're almost
@@ -122,14 +122,14 @@ out:
 STATIC int
 xchk_parent_validate(
 	struct xfs_scrub	*sc,
-	xfs_ino_t			dnum,
-	bool				*try_again)
+	xfs_ino_t		dnum,
+	bool			*try_again)
 {
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_inode		*dp = NULL;
-	xfs_nlink_t			expected_nlink;
-	xfs_nlink_t			nlink;
-	int				error = 0;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_inode	*dp = NULL;
+	xfs_nlink_t		expected_nlink;
+	xfs_nlink_t		nlink;
+	int			error = 0;
 
 	*try_again = false;
 
@@ -257,11 +257,11 @@ int
 xchk_parent(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_mount		*mp = sc->mp;
-	xfs_ino_t			dnum;
-	bool				try_again;
-	int				tries = 0;
-	int				error = 0;
+	struct xfs_mount	*mp = sc->mp;
+	xfs_ino_t		dnum;
+	bool			try_again;
+	int			tries = 0;
+	int			error = 0;
 
 	/*
 	 * If we're a directory, check that the '..' link points up to
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 309ebeecfa5d..782d582d3edd 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -49,10 +49,10 @@ xchk_quota_to_dqtype(
 int
 xchk_setup_quota(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
-	uint				dqtype;
-	int				error;
+	uint			dqtype;
+	int			error;
 
 	if (!XFS_IS_QUOTA_RUNNING(sc->mp) || !XFS_IS_QUOTA_ON(sc->mp))
 		return -ENOENT;
@@ -77,33 +77,33 @@ xchk_setup_quota(
 
 struct xchk_quota_info {
 	struct xfs_scrub	*sc;
-	xfs_dqid_t			last_id;
+	xfs_dqid_t		last_id;
 };
 
 /* Scrub the fields in an individual quota item. */
 STATIC int
 xchk_quota_item(
-	struct xfs_dquot		*dq,
-	uint				dqtype,
-	void				*priv)
+	struct xfs_dquot	*dq,
+	uint			dqtype,
+	void			*priv)
 {
-	struct xchk_quota_info		*sqi = priv;
+	struct xchk_quota_info	*sqi = priv;
 	struct xfs_scrub	*sc = sqi->sc;
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_disk_dquot		*d = &dq->q_core;
-	struct xfs_quotainfo		*qi = mp->m_quotainfo;
-	xfs_fileoff_t			offset;
-	unsigned long long		bsoft;
-	unsigned long long		isoft;
-	unsigned long long		rsoft;
-	unsigned long long		bhard;
-	unsigned long long		ihard;
-	unsigned long long		rhard;
-	unsigned long long		bcount;
-	unsigned long long		icount;
-	unsigned long long		rcount;
-	xfs_ino_t			fs_icount;
-	xfs_dqid_t			id = be32_to_cpu(d->d_id);
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_disk_dquot	*d = &dq->q_core;
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
+	xfs_fileoff_t		offset;
+	unsigned long long	bsoft;
+	unsigned long long	isoft;
+	unsigned long long	rsoft;
+	unsigned long long	bhard;
+	unsigned long long	ihard;
+	unsigned long long	rhard;
+	unsigned long long	bcount;
+	unsigned long long	icount;
+	unsigned long long	rcount;
+	xfs_ino_t		fs_icount;
+	xfs_dqid_t		id = be32_to_cpu(d->d_id);
 
 	/*
 	 * Except for the root dquot, the actual dquot we got must either have
@@ -197,12 +197,12 @@ STATIC int
 xchk_quota_data_fork(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_bmbt_irec		irec = { 0 };
-	struct xfs_iext_cursor		icur;
-	struct xfs_quotainfo		*qi = sc->mp->m_quotainfo;
-	struct xfs_ifork		*ifp;
-	xfs_fileoff_t			max_dqid_off;
-	int				error = 0;
+	struct xfs_bmbt_irec	irec = { 0 };
+	struct xfs_iext_cursor	icur;
+	struct xfs_quotainfo	*qi = sc->mp->m_quotainfo;
+	struct xfs_ifork	*ifp;
+	xfs_fileoff_t		max_dqid_off;
+	int			error = 0;
 
 	/* Invoke the fork scrubber. */
 	error = xchk_metadata_inode_forks(sc);
@@ -236,11 +236,11 @@ int
 xchk_quota(
 	struct xfs_scrub	*sc)
 {
-	struct xchk_quota_info		sqi;
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_quotainfo		*qi = mp->m_quotainfo;
-	uint				dqtype;
-	int				error = 0;
+	struct xchk_quota_info	sqi;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
+	uint			dqtype;
+	int			error = 0;
 
 	dqtype = xchk_quota_to_dqtype(sc);
 
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index c1162d408987..e8c82b026083 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -30,7 +30,7 @@
 int
 xchk_setup_ag_refcountbt(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, false);
 }
@@ -74,21 +74,21 @@ xchk_setup_ag_refcountbt(
  * should always hold true.  If not, the refcount is incorrect.
  */
 struct xchk_refcnt_frag {
-	struct list_head		list;
-	struct xfs_rmap_irec		rm;
+	struct list_head	list;
+	struct xfs_rmap_irec	rm;
 };
 
 struct xchk_refcnt_check {
 	struct xfs_scrub	*sc;
-	struct list_head		fragments;
+	struct list_head	fragments;
 
 	/* refcount extent we're examining */
-	xfs_agblock_t			bno;
-	xfs_extlen_t			len;
-	xfs_nlink_t			refcount;
+	xfs_agblock_t		bno;
+	xfs_extlen_t		len;
+	xfs_nlink_t		refcount;
 
 	/* number of owners seen */
-	xfs_nlink_t			seen;
+	xfs_nlink_t		seen;
 };
 
 /*
@@ -278,7 +278,7 @@ done:
 /* Use the rmap entries covering this extent to verify the refcount. */
 STATIC void
 xchk_refcountbt_xref_rmap(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	xfs_agblock_t			bno,
 	xfs_extlen_t			len,
 	xfs_nlink_t			refcount)
@@ -326,9 +326,9 @@ out_free:
 STATIC void
 xchk_refcountbt_xref(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len,
-	xfs_nlink_t			refcount)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len,
+	xfs_nlink_t		refcount)
 {
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -341,17 +341,17 @@ xchk_refcountbt_xref(
 /* Scrub a refcountbt record. */
 STATIC int
 xchk_refcountbt_rec(
-	struct xchk_btree		*bs,
-	union xfs_btree_rec		*rec)
+	struct xchk_btree	*bs,
+	union xfs_btree_rec	*rec)
 {
-	struct xfs_mount		*mp = bs->cur->bc_mp;
-	xfs_agblock_t			*cow_blocks = bs->private;
-	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
-	xfs_agblock_t			bno;
-	xfs_extlen_t			len;
-	xfs_nlink_t			refcount;
-	bool				has_cowflag;
-	int				error = 0;
+	struct xfs_mount	*mp = bs->cur->bc_mp;
+	xfs_agblock_t		*cow_blocks = bs->private;
+	xfs_agnumber_t		agno = bs->cur->bc_private.a.agno;
+	xfs_agblock_t		bno;
+	xfs_extlen_t		len;
+	xfs_nlink_t		refcount;
+	bool			has_cowflag;
+	int			error = 0;
 
 	bno = be32_to_cpu(rec->refc.rc_startblock);
 	len = be32_to_cpu(rec->refc.rc_blockcount);
@@ -383,12 +383,12 @@ xchk_refcountbt_rec(
 STATIC void
 xchk_refcount_xref_rmap(
 	struct xfs_scrub	*sc,
-	struct xfs_owner_info		*oinfo,
-	xfs_filblks_t			cow_blocks)
+	struct xfs_owner_info	*oinfo,
+	xfs_filblks_t		cow_blocks)
 {
-	xfs_extlen_t			refcbt_blocks = 0;
-	xfs_filblks_t			blocks;
-	int				error;
+	xfs_extlen_t		refcbt_blocks = 0;
+	xfs_filblks_t		blocks;
+	int			error;
 
 	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
@@ -419,9 +419,9 @@ int
 xchk_refcountbt(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_owner_info		oinfo;
-	xfs_agblock_t			cow_blocks = 0;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	xfs_agblock_t		cow_blocks = 0;
+	int			error;
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
 	error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec,
@@ -437,7 +437,7 @@ xchk_refcountbt(
 /* xref check that a cow staging extent is marked in the refcountbt. */
 void
 xchk_xref_is_cow_staging(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	xfs_agblock_t			agbno,
 	xfs_extlen_t			len)
 {
@@ -484,11 +484,11 @@ xchk_xref_is_cow_staging(
 void
 xchk_xref_is_not_shared(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno,
-	xfs_extlen_t			len)
+	xfs_agblock_t		agbno,
+	xfs_extlen_t		len)
 {
-	bool				shared;
-	int				error;
+	bool			shared;
+	int			error;
 
 	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 		return;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 29debd5649ac..5de1cac424ec 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -42,11 +42,11 @@
  */
 int
 xrep_attempt(
-	struct xfs_inode		*ip,
+	struct xfs_inode	*ip,
 	struct xfs_scrub	*sc,
-	bool				*fixed)
+	bool			*fixed)
 {
-	int				error = 0;
+	int			error = 0;
 
 	trace_xrep_attempt(ip, sc->sm, error);
 
@@ -94,7 +94,7 @@ xrep_attempt(
  */
 void
 xrep_failure(
-	struct xfs_mount		*mp)
+	struct xfs_mount	*mp)
 {
 	xfs_alert_ratelimited(mp,
 "Corruption not fixed during online repair.  Unmount and run xfs_repair.");
@@ -108,7 +108,7 @@ int
 xrep_probe(
 	struct xfs_scrub	*sc)
 {
-	int				error = 0;
+	int			error = 0;
 
 	if (xchk_should_terminate(sc, &error))
 		return error;
@@ -124,7 +124,7 @@ int
 xrep_roll_ag_trans(
 	struct xfs_scrub	*sc)
 {
-	int				error;
+	int			error;
 
 	/* Keep the AG header buffers locked so we can keep going. */
 	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
@@ -163,9 +163,9 @@ out_release:
  */
 bool
 xrep_ag_has_space(
-	struct xfs_perag		*pag,
-	xfs_extlen_t			nr_blocks,
-	enum xfs_ag_resv_type		type)
+	struct xfs_perag	*pag,
+	xfs_extlen_t		nr_blocks,
+	enum xfs_ag_resv_type	type)
 {
 	return  !xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) &&
 		!xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
@@ -179,7 +179,7 @@ xrep_ag_has_space(
  */
 xfs_extlen_t
 xrep_calc_ag_resblks(
-	struct xfs_scrub	*sc)
+	struct xfs_scrub		*sc)
 {
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_scrub_metadata	*sm = sc->sm;
@@ -280,13 +280,13 @@ xrep_calc_ag_resblks(
 int
 xrep_alloc_ag_block(
 	struct xfs_scrub	*sc,
-	struct xfs_owner_info		*oinfo,
-	xfs_fsblock_t			*fsbno,
-	enum xfs_ag_resv_type		resv)
+	struct xfs_owner_info	*oinfo,
+	xfs_fsblock_t		*fsbno,
+	enum xfs_ag_resv_type	resv)
 {
-	struct xfs_alloc_arg		args = {0};
-	xfs_agblock_t			bno;
-	int				error;
+	struct xfs_alloc_arg	args = {0};
+	xfs_agblock_t		bno;
+	int			error;
 
 	switch (resv) {
 	case XFS_AG_RESV_AGFL:
@@ -330,7 +330,7 @@ xrep_alloc_ag_block(
 /* Initialize a new AG btree root block with zero entries. */
 int
 xrep_init_btblock(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	xfs_fsblock_t			fsb,
 	struct xfs_buf			**bpp,
 	xfs_btnum_t			btnum,
@@ -386,11 +386,11 @@ xrep_init_btblock(
 int
 xrep_collect_btree_extent(
 	struct xfs_scrub	*sc,
-	struct xrep_extent_list		*exlist,
-	xfs_fsblock_t			fsbno,
-	xfs_extlen_t			len)
+	struct xrep_extent_list	*exlist,
+	xfs_fsblock_t		fsbno,
+	xfs_extlen_t		len)
 {
-	struct xrep_extent		*rex;
+	struct xrep_extent	*rex;
 
 	trace_xrep_collect_btree_extent(sc->mp,
 			XFS_FSB_TO_AGNO(sc->mp, fsbno),
@@ -416,10 +416,10 @@ xrep_collect_btree_extent(
 void
 xrep_cancel_btree_extents(
 	struct xfs_scrub	*sc,
-	struct xrep_extent_list		*exlist)
+	struct xrep_extent_list	*exlist)
 {
-	struct xrep_extent		*rex;
-	struct xrep_extent		*n;
+	struct xrep_extent	*rex;
+	struct xrep_extent	*n;
 
 	for_each_xrep_extent_safe(rex, n, exlist) {
 		list_del(&rex->list);
@@ -430,12 +430,12 @@ xrep_cancel_btree_extents(
 /* Compare two btree extents. */
 static int
 xrep_btree_extent_cmp(
-	void				*priv,
-	struct list_head		*a,
-	struct list_head		*b)
+	void			*priv,
+	struct list_head	*a,
+	struct list_head	*b)
 {
-	struct xrep_extent		*ap;
-	struct xrep_extent		*bp;
+	struct xrep_extent	*ap;
+	struct xrep_extent	*bp;
 
 	ap = container_of(a, struct xrep_extent, list);
 	bp = container_of(b, struct xrep_extent, list);
@@ -464,17 +464,17 @@ xrep_btree_extent_cmp(
 int
 xrep_subtract_extents(
 	struct xfs_scrub	*sc,
-	struct xrep_extent_list		*exlist,
-	struct xrep_extent_list		*sublist)
+	struct xrep_extent_list	*exlist,
+	struct xrep_extent_list	*sublist)
 {
-	struct list_head		*lp;
-	struct xrep_extent		*ex;
-	struct xrep_extent		*newex;
-	struct xrep_extent		*subex;
-	xfs_fsblock_t			sub_fsb;
-	xfs_extlen_t			sub_len;
-	int				state;
-	int				error = 0;
+	struct list_head	*lp;
+	struct xrep_extent	*ex;
+	struct xrep_extent	*newex;
+	struct xrep_extent	*subex;
+	xfs_fsblock_t		sub_fsb;
+	xfs_extlen_t		sub_len;
+	int			state;
+	int			error = 0;
 
 	if (list_empty(&exlist->list) || list_empty(&sublist->list))
 		return 0;
@@ -621,13 +621,13 @@ out:
 int
 xrep_invalidate_blocks(
 	struct xfs_scrub	*sc,
-	struct xrep_extent_list		*exlist)
+	struct xrep_extent_list	*exlist)
 {
-	struct xrep_extent		*rex;
-	struct xrep_extent		*n;
-	struct xfs_buf			*bp;
-	xfs_fsblock_t			fsbno;
-	xfs_agblock_t			i;
+	struct xrep_extent	*rex;
+	struct xrep_extent	*n;
+	struct xfs_buf		*bp;
+	xfs_fsblock_t		fsbno;
+	xfs_agblock_t		i;
 
 	/*
 	 * For each block in each extent, see if there's an incore buffer for
@@ -659,9 +659,9 @@ xrep_invalidate_blocks(
 int
 xrep_fix_freelist(
 	struct xfs_scrub	*sc,
-	bool				can_shrink)
+	bool			can_shrink)
 {
-	struct xfs_alloc_arg		args = {0};
+	struct xfs_alloc_arg	args = {0};
 
 	args.mp = sc->mp;
 	args.tp = sc->tp;
@@ -679,10 +679,10 @@ xrep_fix_freelist(
 STATIC int
 xrep_put_freelist(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			agbno)
+	xfs_agblock_t		agbno)
 {
-	struct xfs_owner_info		oinfo;
-	int				error;
+	struct xfs_owner_info	oinfo;
+	int			error;
 
 	/* Make sure there's space on the freelist. */
 	error = xrep_fix_freelist(sc, true);
@@ -715,16 +715,16 @@ xrep_put_freelist(
 STATIC int
 xrep_dispose_btree_block(
 	struct xfs_scrub	*sc,
-	xfs_fsblock_t			fsbno,
-	struct xfs_owner_info		*oinfo,
-	enum xfs_ag_resv_type		resv)
+	xfs_fsblock_t		fsbno,
+	struct xfs_owner_info	*oinfo,
+	enum xfs_ag_resv_type	resv)
 {
-	struct xfs_btree_cur		*cur;
-	struct xfs_buf			*agf_bp = NULL;
-	xfs_agnumber_t			agno;
-	xfs_agblock_t			agbno;
-	bool				has_other_rmap;
-	int				error;
+	struct xfs_btree_cur	*cur;
+	struct xfs_buf		*agf_bp = NULL;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	bool			has_other_rmap;
+	int			error;
 
 	agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
 	agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
@@ -789,13 +789,13 @@ out_free:
 int
 xrep_reap_btree_extents(
 	struct xfs_scrub	*sc,
-	struct xrep_extent_list		*exlist,
-	struct xfs_owner_info		*oinfo,
-	enum xfs_ag_resv_type		type)
+	struct xrep_extent_list	*exlist,
+	struct xfs_owner_info	*oinfo,
+	enum xfs_ag_resv_type	type)
 {
-	struct xrep_extent		*rex;
-	struct xrep_extent		*n;
-	int				error = 0;
+	struct xrep_extent	*rex;
+	struct xrep_extent	*n;
+	int			error = 0;
 
 	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
 
@@ -851,7 +851,7 @@ out:
  */
 
 struct xrep_findroot {
-	struct xfs_scrub	*sc;
+	struct xfs_scrub		*sc;
 	struct xfs_buf			*agfl_bp;
 	struct xfs_agf			*agf;
 	struct xrep_find_ag_btree	*btree_info;
@@ -860,11 +860,11 @@ struct xrep_findroot {
 /* See if our block is in the AGFL. */
 STATIC int
 xrep_findroot_agfl_walk(
-	struct xfs_mount		*mp,
-	xfs_agblock_t			bno,
-	void				*priv)
+	struct xfs_mount	*mp,
+	xfs_agblock_t		bno,
+	void			*priv)
 {
-	xfs_agblock_t			*agbno = priv;
+	xfs_agblock_t		*agbno = priv;
 
 	return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
 }
@@ -981,7 +981,7 @@ xrep_findroot_rmap(
 /* Find the roots of the per-AG btrees described in btree_info. */
 int
 xrep_find_ag_btree_roots(
-	struct xfs_scrub	*sc,
+	struct xfs_scrub		*sc,
 	struct xfs_buf			*agf_bp,
 	struct xrep_find_ag_btree	*btree_info,
 	struct xfs_buf			*agfl_bp)
@@ -1017,9 +1017,9 @@ xrep_find_ag_btree_roots(
 void
 xrep_force_quotacheck(
 	struct xfs_scrub	*sc,
-	uint				dqtype)
+	uint			dqtype)
 {
-	uint				flag;
+	uint			flag;
 
 	flag = xfs_quota_chkd_flag(dqtype);
 	if (!(flag & sc->mp->m_qflags))
@@ -1046,7 +1046,7 @@ int
 xrep_ino_dqattach(
 	struct xfs_scrub	*sc)
 {
-	int				error;
+	int			error;
 
 	error = xfs_qm_dqattach_locked(sc->ip, false);
 	switch (error) {
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 677f4b73b5ec..91355f6b0087 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -15,33 +15,31 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
 
 /* Repair helpers */
 
-int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc,
-		bool *fixed);
+int xrep_attempt(struct xfs_inode *ip, struct xfs_scrub *sc, bool *fixed);
 void xrep_failure(struct xfs_mount *mp);
 int xrep_roll_ag_trans(struct xfs_scrub *sc);
 bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
 		enum xfs_ag_resv_type type);
 xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc);
-int xrep_alloc_ag_block(struct xfs_scrub *sc,
-		struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
-		enum xfs_ag_resv_type resv);
+int xrep_alloc_ag_block(struct xfs_scrub *sc, struct xfs_owner_info *oinfo,
+		xfs_fsblock_t *fsbno, enum xfs_ag_resv_type resv);
 int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
 		struct xfs_buf **bpp, xfs_btnum_t btnum,
 		const struct xfs_buf_ops *ops);
 
 struct xrep_extent {
-	struct list_head		list;
-	xfs_fsblock_t			fsbno;
-	xfs_extlen_t			len;
+	struct list_head	list;
+	xfs_fsblock_t		fsbno;
+	xfs_extlen_t		len;
 };
 
 struct xrep_extent_list {
-	struct list_head		list;
+	struct list_head	list;
 };
 
 static inline void
 xrep_init_extent_list(
-	struct xrep_extent_list		*exlist)
+	struct xrep_extent_list	*exlist)
 {
 	INIT_LIST_HEAD(&exlist->list);
 }
@@ -78,10 +76,8 @@ struct xrep_find_ag_btree {
 	unsigned int			height;
 };
 
-int xrep_find_ag_btree_roots(struct xfs_scrub *sc,
-		struct xfs_buf *agf_bp,
-		struct xrep_find_ag_btree *btree_info,
-		struct xfs_buf *agfl_bp);
+int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp,
+		struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp);
 void xrep_force_quotacheck(struct xfs_scrub *sc, uint dqtype);
 int xrep_ino_dqattach(struct xfs_scrub *sc);
 
@@ -93,9 +89,9 @@ int xrep_superblock(struct xfs_scrub *sc);
 #else
 
 static inline int xrep_attempt(
-	struct xfs_inode		*ip,
+	struct xfs_inode	*ip,
 	struct xfs_scrub	*sc,
-	bool				*fixed)
+	bool			*fixed)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index dc9c91a706ff..5e293c129813 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -31,7 +31,7 @@
 int
 xchk_setup_ag_rmapbt(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
 	return xchk_setup_ag_btree(sc, ip, false);
 }
@@ -42,15 +42,15 @@ xchk_setup_ag_rmapbt(
 STATIC void
 xchk_rmapbt_xref_refc(
 	struct xfs_scrub	*sc,
-	struct xfs_rmap_irec		*irec)
+	struct xfs_rmap_irec	*irec)
 {
-	xfs_agblock_t			fbno;
-	xfs_extlen_t			flen;
-	bool				non_inode;
-	bool				is_bmbt;
-	bool				is_attr;
-	bool				is_unwritten;
-	int				error;
+	xfs_agblock_t		fbno;
+	xfs_extlen_t		flen;
+	bool			non_inode;
+	bool			is_bmbt;
+	bool			is_attr;
+	bool			is_unwritten;
+	int			error;
 
 	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
 		return;
@@ -73,10 +73,10 @@ xchk_rmapbt_xref_refc(
 STATIC void
 xchk_rmapbt_xref(
 	struct xfs_scrub	*sc,
-	struct xfs_rmap_irec		*irec)
+	struct xfs_rmap_irec	*irec)
 {
-	xfs_agblock_t			agbno = irec->rm_startblock;
-	xfs_extlen_t			len = irec->rm_blockcount;
+	xfs_agblock_t		agbno = irec->rm_startblock;
+	xfs_extlen_t		len = irec->rm_blockcount;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return;
@@ -96,17 +96,17 @@ xchk_rmapbt_xref(
 /* Scrub an rmapbt record. */
 STATIC int
 xchk_rmapbt_rec(
-	struct xchk_btree		*bs,
-	union xfs_btree_rec		*rec)
+	struct xchk_btree	*bs,
+	union xfs_btree_rec	*rec)
 {
-	struct xfs_mount		*mp = bs->cur->bc_mp;
-	struct xfs_rmap_irec		irec;
-	xfs_agnumber_t			agno = bs->cur->bc_private.a.agno;
-	bool				non_inode;
-	bool				is_unwritten;
-	bool				is_bmbt;
-	bool				is_attr;
-	int				error;
+	struct xfs_mount	*mp = bs->cur->bc_mp;
+	struct xfs_rmap_irec	irec;
+	xfs_agnumber_t		agno = bs->cur->bc_private.a.agno;
+	bool			non_inode;
+	bool			is_unwritten;
+	bool			is_bmbt;
+	bool			is_attr;
+	int			error;
 
 	error = xfs_rmap_btrec_to_irec(rec, &irec);
 	if (!xchk_btree_process_error(bs->sc, bs->cur, 0, &error))
@@ -174,7 +174,7 @@ int
 xchk_rmapbt(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_owner_info		oinfo;
+	struct xfs_owner_info	oinfo;
 
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
 	return xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
@@ -185,13 +185,13 @@ xchk_rmapbt(
 static inline void
 xchk_xref_check_owner(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			bno,
-	xfs_extlen_t			len,
-	struct xfs_owner_info		*oinfo,
-	bool				should_have_rmap)
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	struct xfs_owner_info	*oinfo,
+	bool			should_have_rmap)
 {
-	bool				has_rmap;
-	int				error;
+	bool			has_rmap;
+	int			error;
 
 	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
@@ -208,9 +208,9 @@ xchk_xref_check_owner(
 void
 xchk_xref_is_owned_by(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			bno,
-	xfs_extlen_t			len,
-	struct xfs_owner_info		*oinfo)
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	struct xfs_owner_info	*oinfo)
 {
 	xchk_xref_check_owner(sc, bno, len, oinfo, true);
 }
@@ -219,9 +219,9 @@ xchk_xref_is_owned_by(
 void
 xchk_xref_is_not_owned_by(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			bno,
-	xfs_extlen_t			len,
-	struct xfs_owner_info		*oinfo)
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	struct xfs_owner_info	*oinfo)
 {
 	xchk_xref_check_owner(sc, bno, len, oinfo, false);
 }
@@ -230,11 +230,11 @@ xchk_xref_is_not_owned_by(
 void
 xchk_xref_has_no_owner(
 	struct xfs_scrub	*sc,
-	xfs_agblock_t			bno,
-	xfs_extlen_t			len)
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len)
 {
-	bool				has_rmap;
-	int				error;
+	bool			has_rmap;
+	int			error;
 
 	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
 		return;
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 653a809bba34..665d4bbb17cc 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -27,9 +27,9 @@
 int
 xchk_setup_rt(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
-	int				error;
+	int			error;
 
 	error = xchk_setup_fs(sc, ip);
 	if (error)
@@ -47,13 +47,13 @@ xchk_setup_rt(
 /* Scrub a free extent record from the realtime bitmap. */
 STATIC int
 xchk_rtbitmap_rec(
-	struct xfs_trans		*tp,
-	struct xfs_rtalloc_rec		*rec,
-	void				*priv)
+	struct xfs_trans	*tp,
+	struct xfs_rtalloc_rec	*rec,
+	void			*priv)
 {
 	struct xfs_scrub	*sc = priv;
-	xfs_rtblock_t			startblock;
-	xfs_rtblock_t			blockcount;
+	xfs_rtblock_t		startblock;
+	xfs_rtblock_t		blockcount;
 
 	startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize;
 	blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize;
@@ -70,7 +70,7 @@ int
 xchk_rtbitmap(
 	struct xfs_scrub	*sc)
 {
-	int				error;
+	int			error;
 
 	/* Invoke the fork scrubber. */
 	error = xchk_metadata_inode_forks(sc);
@@ -90,10 +90,10 @@ int
 xchk_rtsummary(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_inode		*rsumip = sc->mp->m_rsumip;
-	struct xfs_inode		*old_ip = sc->ip;
-	uint				old_ilock_flags = sc->ilock_flags;
-	int				error = 0;
+	struct xfs_inode	*rsumip = sc->mp->m_rsumip;
+	struct xfs_inode	*old_ip = sc->ip;
+	uint			old_ilock_flags = sc->ilock_flags;
+	int			error = 0;
 
 	/*
 	 * We ILOCK'd the rt bitmap ip in the setup routine, now lock the
@@ -126,14 +126,14 @@ out:
 void
 xchk_xref_is_used_rt_space(
 	struct xfs_scrub	*sc,
-	xfs_rtblock_t			fsbno,
-	xfs_extlen_t			len)
+	xfs_rtblock_t		fsbno,
+	xfs_extlen_t		len)
 {
-	xfs_rtblock_t			startext;
-	xfs_rtblock_t			endext;
-	xfs_rtblock_t			extcount;
-	bool				is_free;
-	int				error;
+	xfs_rtblock_t		startext;
+	xfs_rtblock_t		endext;
+	xfs_rtblock_t		extcount;
+	bool			is_free;
+	int			error;
 
 	if (xchk_skip_xref(sc->sm))
 		return;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index b3c6420ccae5..5956b8073e2f 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -153,7 +153,7 @@ static int
 xchk_probe(
 	struct xfs_scrub	*sc)
 {
-	int				error = 0;
+	int			error = 0;
 
 	if (xchk_should_terminate(sc, &error))
 		return error;
@@ -167,8 +167,8 @@ xchk_probe(
 STATIC int
 xchk_teardown(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip_in,
-	int				error)
+	struct xfs_inode	*ip_in,
+	int			error)
 {
 	xchk_ag_free(sc, &sc->sa);
 	if (sc->tp) {
@@ -479,7 +479,7 @@ xfs_scrub_metadata(
 	struct xfs_inode		*ip,
 	struct xfs_scrub_metadata	*sm)
 {
-	struct xfs_scrub	sc;
+	struct xfs_scrub		sc;
 	struct xfs_mount		*mp = ip->i_mount;
 	bool				try_harder = false;
 	bool				already_fixed = false;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 47c75d2f28da..af323b229c4b 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -36,21 +36,21 @@ struct xchk_meta_ops {
 
 /* Buffer pointers and btree cursors for an entire AG. */
 struct xchk_ag {
-	xfs_agnumber_t			agno;
-	struct xfs_perag		*pag;
+	xfs_agnumber_t		agno;
+	struct xfs_perag	*pag;
 
 	/* AG btree roots */
-	struct xfs_buf			*agf_bp;
-	struct xfs_buf			*agfl_bp;
-	struct xfs_buf			*agi_bp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+	struct xfs_buf		*agi_bp;
 
 	/* AG btrees */
-	struct xfs_btree_cur		*bno_cur;
-	struct xfs_btree_cur		*cnt_cur;
-	struct xfs_btree_cur		*ino_cur;
-	struct xfs_btree_cur		*fino_cur;
-	struct xfs_btree_cur		*rmap_cur;
-	struct xfs_btree_cur		*refc_cur;
+	struct xfs_btree_cur	*bno_cur;
+	struct xfs_btree_cur	*cnt_cur;
+	struct xfs_btree_cur	*ino_cur;
+	struct xfs_btree_cur	*fino_cur;
+	struct xfs_btree_cur	*rmap_cur;
+	struct xfs_btree_cur	*refc_cur;
 };
 
 struct xfs_scrub {
@@ -66,7 +66,7 @@ struct xfs_scrub {
 	bool				has_quotaofflock;
 
 	/* State tracking for single-AG operations. */
-	struct xchk_ag		sa;
+	struct xchk_ag			sa;
 };
 
 /* Metadata scrubbers */
@@ -115,27 +115,25 @@ xchk_quota(struct xfs_scrub *sc)
 #endif
 
 /* cross-referencing helpers */
-void xchk_xref_is_used_space(struct xfs_scrub *sc,
-		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc,
-		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_inode_chunk(struct xfs_scrub *sc,
-		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_owned_by(struct xfs_scrub *sc,
-		xfs_agblock_t agbno, xfs_extlen_t len,
-		struct xfs_owner_info *oinfo);
-void xchk_xref_is_not_owned_by(struct xfs_scrub *sc,
-		xfs_agblock_t agbno, xfs_extlen_t len,
-		struct xfs_owner_info *oinfo);
-void xchk_xref_has_no_owner(struct xfs_scrub *sc,
-		xfs_agblock_t agbno, xfs_extlen_t len);
-void xchk_xref_is_cow_staging(struct xfs_scrub *sc,
-		xfs_agblock_t bno, xfs_extlen_t len);
-void xchk_xref_is_not_shared(struct xfs_scrub *sc,
-		xfs_agblock_t bno, xfs_extlen_t len);
+void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
+		xfs_extlen_t len);
+void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
+		xfs_extlen_t len);
+void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
+		xfs_extlen_t len);
+void xchk_xref_is_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
+		xfs_extlen_t len, struct xfs_owner_info *oinfo);
+void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
+		xfs_extlen_t len, struct xfs_owner_info *oinfo);
+void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
+		xfs_extlen_t len);
+void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
+		xfs_extlen_t len);
+void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno,
+		xfs_extlen_t len);
 #ifdef CONFIG_XFS_RT
-void xchk_xref_is_used_rt_space(struct xfs_scrub *sc,
-		xfs_rtblock_t rtbno, xfs_extlen_t len);
+void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
+		xfs_extlen_t len);
 #else
 # define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
 #endif
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index 56c6347e9482..f7ebaa946999 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -27,7 +27,7 @@
 int
 xchk_setup_symlink(
 	struct xfs_scrub	*sc,
-	struct xfs_inode		*ip)
+	struct xfs_inode	*ip)
 {
 	/* Allocate the buffer without the inode lock held. */
 	sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP);
@@ -43,10 +43,10 @@ int
 xchk_symlink(
 	struct xfs_scrub	*sc)
 {
-	struct xfs_inode		*ip = sc->ip;
-	struct xfs_ifork		*ifp;
-	loff_t				len;
-	int				error = 0;
+	struct xfs_inode	*ip = sc->ip;
+	struct xfs_ifork	*ifp;
+	loff_t			len;
+	int			error = 0;
 
 	if (!S_ISLNK(VFS_I(ip)->i_mode))
 		return -ENOENT;
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 1ef1202a1e45..96feaf8dcdec 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -23,8 +23,8 @@
 /* Figure out which block the btree cursor was pointing to. */
 static inline xfs_fsblock_t
 xchk_btree_cur_fsbno(
-	struct xfs_btree_cur		*cur,
-	int				level)
+	struct xfs_btree_cur	*cur,
+	int			level)
 {
 	if (level < cur->bc_nlevels && cur->bc_bufs[level])
 		return XFS_DADDR_TO_FSB(cur->bc_mp, cur->bc_bufs[level]->b_bn);
-- 
cgit v1.2.3


From 2e9e6481e2a78de3a85083beccfbf6eda2689922 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 19 Jul 2018 12:29:13 -0700
Subject: xfs: detect and fix bad summary counts at mount

Filippo Giunchedi complained that xfs doesn't even perform basic sanity
checks of the fs summary counters at mount time.  Therefore, recalculate
the summary counters from the AGFs after log recovery if the counts were
bad (or we had to recover the fs).  Enhance the recalculation routine to
fail the mount entirely if the new values are also obviously incorrect.

We use a mount state flag to record the "bad summary count" state so
that the (subsequent) online fsck patches can detect subtlely incorrect
counts and set the flag; clear it userspace asks for a repair; or force
a recalculation at the next mount if nobody fixes it by unmount time.

Reported-by: Filippo Giunchedi <fgiunchedi@wikimedia.org>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_sb.c | 21 +++++++++++--
 fs/xfs/xfs_mount.c     | 80 ++++++++++++++++++++++++++++++++++----------------
 fs/xfs/xfs_mount.h     |  1 +
 3 files changed, 73 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 350119eeaecb..b3ad15956366 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -804,6 +804,7 @@ xfs_initialize_perag_data(
 	uint64_t	bfree = 0;
 	uint64_t	bfreelst = 0;
 	uint64_t	btree = 0;
+	uint64_t	fdblocks;
 	int		error;
 
 	for (index = 0; index < agcount; index++) {
@@ -827,17 +828,31 @@ xfs_initialize_perag_data(
 		btree += pag->pagf_btreeblks;
 		xfs_perag_put(pag);
 	}
+	fdblocks = bfree + bfreelst + btree;
+
+	/*
+	 * If the new summary counts are obviously incorrect, fail the
+	 * mount operation because that implies the AGFs are also corrupt.
+	 * Clear BAD_SUMMARY so that we don't unmount with a dirty log, which
+	 * will prevent xfs_repair from fixing anything.
+	 */
+	if (fdblocks > sbp->sb_dblocks || ifree > ialloc) {
+		xfs_alert(mp, "AGF corruption. Please run xfs_repair.");
+		error = -EFSCORRUPTED;
+		goto out;
+	}
 
 	/* Overwrite incore superblock counters with just-read data */
 	spin_lock(&mp->m_sb_lock);
 	sbp->sb_ifree = ifree;
 	sbp->sb_icount = ialloc;
-	sbp->sb_fdblocks = bfree + bfreelst + btree;
+	sbp->sb_fdblocks = fdblocks;
 	spin_unlock(&mp->m_sb_lock);
 
 	xfs_reinit_percpu_counters(mp);
-
-	return 0;
+out:
+	mp->m_flags &= ~XFS_MOUNT_BAD_SUMMARY;
+	return error;
 }
 
 /*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a3378252baa1..60462c35ad4b 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -606,6 +606,56 @@ xfs_default_resblks(xfs_mount_t *mp)
 	return resblks;
 }
 
+/* Ensure the summary counts are correct. */
+STATIC int
+xfs_check_summary_counts(
+	struct xfs_mount	*mp)
+{
+	/*
+	 * The AG0 superblock verifier rejects in-progress filesystems,
+	 * so we should never see the flag set this far into mounting.
+	 */
+	if (mp->m_sb.sb_inprogress) {
+		xfs_err(mp, "sb_inprogress set after log recovery??");
+		WARN_ON(1);
+		return -EFSCORRUPTED;
+	}
+
+	/*
+	 * Now the log is mounted, we know if it was an unclean shutdown or
+	 * not. If it was, with the first phase of recovery has completed, we
+	 * have consistent AG blocks on disk. We have not recovered EFIs yet,
+	 * but they are recovered transactionally in the second recovery phase
+	 * later.
+	 *
+	 * If the log was clean when we mounted, we can check the summary
+	 * counters.  If any of them are obviously incorrect, we can recompute
+	 * them from the AGF headers in the next step.
+	 */
+	if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
+	    (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
+	     mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
+		mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
+
+	/*
+	 * We can safely re-initialise incore superblock counters from the
+	 * per-ag data. These may not be correct if the filesystem was not
+	 * cleanly unmounted, so we waited for recovery to finish before doing
+	 * this.
+	 *
+	 * If the filesystem was cleanly unmounted or the previous check did
+	 * not flag anything weird, then we can trust the values in the
+	 * superblock to be correct and we don't need to do anything here.
+	 * Otherwise, recalculate the summary counters.
+	 */
+	if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+	     XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
+	    !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY))
+		return 0;
+
+	return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
+}
+
 /*
  * This function does the following on an initial mount of a file system:
  *	- reads the superblock from disk and init the mount struct
@@ -831,32 +881,10 @@ xfs_mountfs(
 		goto out_fail_wait;
 	}
 
-	/*
-	 * Now the log is mounted, we know if it was an unclean shutdown or
-	 * not. If it was, with the first phase of recovery has completed, we
-	 * have consistent AG blocks on disk. We have not recovered EFIs yet,
-	 * but they are recovered transactionally in the second recovery phase
-	 * later.
-	 *
-	 * Hence we can safely re-initialise incore superblock counters from
-	 * the per-ag data. These may not be correct if the filesystem was not
-	 * cleanly unmounted, so we need to wait for recovery to finish before
-	 * doing this.
-	 *
-	 * If the filesystem was cleanly unmounted, then we can trust the
-	 * values in the superblock to be correct and we don't need to do
-	 * anything here.
-	 *
-	 * If we are currently making the filesystem, the initialisation will
-	 * fail as the perag data is in an undefined state.
-	 */
-	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
-	    !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
-	     !mp->m_sb.sb_inprogress) {
-		error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
-		if (error)
-			goto out_log_dealloc;
-	}
+	/* Make sure the summary counts are ok. */
+	error = xfs_check_summary_counts(mp);
+	if (error)
+		goto out_log_dealloc;
 
 	/*
 	 * Get and sanity-check the root inode.
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 245349d1e23f..f08907db9c61 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -202,6 +202,7 @@ typedef struct xfs_mount {
 						   must be synchronous except
 						   for space allocations */
 #define XFS_MOUNT_UNMOUNTING	(1ULL << 1)	/* filesystem is unmounting */
+#define XFS_MOUNT_BAD_SUMMARY	(1ULL << 2)	/* summary counters are bad */
 #define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 						   operations, typically for
-- 
cgit v1.2.3


From 53235f22151ea7229e1251e46e68098bcf74922d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 20 Jul 2018 09:28:39 -0700
Subject: xfs: refactor unmount record write

Refactor the writing of the unmount record into a separate helper.  No
functionality changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_log_format.h |  13 ++++
 fs/xfs/xfs_log.c               | 131 ++++++++++++++++++++++-------------------
 2 files changed, 82 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 79bb79853c9f..e5f97c69b320 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -77,6 +77,19 @@ static inline uint xlog_get_cycle(char *ptr)
 
 #define XLOG_UNMOUNT_TYPE	0x556e	/* Un for Unmount */
 
+/*
+ * Log item for unmount records.
+ *
+ * The unmount record used to have a string "Unmount filesystem--" in the
+ * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
+ * We just write the magic number now; see xfs_log_unmount_write.
+ */
+struct xfs_unmount_log_format {
+	uint16_t	magic;	/* XLOG_UNMOUNT_TYPE */
+	uint16_t	pad1;
+	uint32_t	pad2;	/* may as well make it 64 bits */
+};
+
 /* Region types for iovec's i_type */
 #define XLOG_REG_TYPE_BFORMAT		1
 #define XLOG_REG_TYPE_BCHUNK		2
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 5e56f3b93d4b..bac586cbc54e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -826,6 +826,74 @@ xfs_log_mount_cancel(
  * deallocation must not be done until source-end.
  */
 
+/* Actually write the unmount record to disk. */
+static void
+xfs_log_write_unmount_record(
+	struct xfs_mount	*mp)
+{
+	/* the data section must be 32 bit size aligned */
+	struct xfs_unmount_log_format magic = {
+		.magic = XLOG_UNMOUNT_TYPE,
+	};
+	struct xfs_log_iovec reg = {
+		.i_addr = &magic,
+		.i_len = sizeof(magic),
+		.i_type = XLOG_REG_TYPE_UNMOUNT,
+	};
+	struct xfs_log_vec vec = {
+		.lv_niovecs = 1,
+		.lv_iovecp = &reg,
+	};
+	struct xlog		*log = mp->m_log;
+	struct xlog_in_core	*iclog;
+	struct xlog_ticket	*tic = NULL;
+	xfs_lsn_t		lsn;
+	int			error;
+
+	error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
+	if (error)
+		goto out_err;
+
+	/* remove inited flag, and account for space used */
+	tic->t_flags = 0;
+	tic->t_curr_res -= sizeof(magic);
+	error = xlog_write(log, &vec, tic, &lsn, NULL, XLOG_UNMOUNT_TRANS);
+	/*
+	 * At this point, we're umounting anyway, so there's no point in
+	 * transitioning log state to IOERROR. Just continue...
+	 */
+out_err:
+	if (error)
+		xfs_alert(mp, "%s: unmount record failed", __func__);
+
+	spin_lock(&log->l_icloglock);
+	iclog = log->l_iclog;
+	atomic_inc(&iclog->ic_refcnt);
+	xlog_state_want_sync(log, iclog);
+	spin_unlock(&log->l_icloglock);
+	error = xlog_state_release_iclog(log, iclog);
+
+	spin_lock(&log->l_icloglock);
+	switch (iclog->ic_state) {
+	default:
+		if (!XLOG_FORCED_SHUTDOWN(log)) {
+			xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+			break;
+		}
+		/* fall through */
+	case XLOG_STATE_ACTIVE:
+	case XLOG_STATE_DIRTY:
+		spin_unlock(&log->l_icloglock);
+		break;
+	}
+
+	if (tic) {
+		trace_xfs_log_umount_write(log, tic);
+		xlog_ungrant_log_space(log, tic);
+		xfs_log_ticket_put(tic);
+	}
+}
+
 /*
  * Unmount record used to have a string "Unmount filesystem--" in the
  * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
@@ -842,8 +910,6 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 #ifdef DEBUG
 	xlog_in_core_t	 *first_iclog;
 #endif
-	xlog_ticket_t	*tic = NULL;
-	xfs_lsn_t	 lsn;
 	int		 error;
 
 	/*
@@ -870,66 +936,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 	} while (iclog != first_iclog);
 #endif
 	if (! (XLOG_FORCED_SHUTDOWN(log))) {
-		error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
-		if (!error) {
-			/* the data section must be 32 bit size aligned */
-			struct {
-			    uint16_t magic;
-			    uint16_t pad1;
-			    uint32_t pad2; /* may as well make it 64 bits */
-			} magic = {
-				.magic = XLOG_UNMOUNT_TYPE,
-			};
-			struct xfs_log_iovec reg = {
-				.i_addr = &magic,
-				.i_len = sizeof(magic),
-				.i_type = XLOG_REG_TYPE_UNMOUNT,
-			};
-			struct xfs_log_vec vec = {
-				.lv_niovecs = 1,
-				.lv_iovecp = &reg,
-			};
-
-			/* remove inited flag, and account for space used */
-			tic->t_flags = 0;
-			tic->t_curr_res -= sizeof(magic);
-			error = xlog_write(log, &vec, tic, &lsn,
-					   NULL, XLOG_UNMOUNT_TRANS);
-			/*
-			 * At this point, we're umounting anyway,
-			 * so there's no point in transitioning log state
-			 * to IOERROR. Just continue...
-			 */
-		}
-
-		if (error)
-			xfs_alert(mp, "%s: unmount record failed", __func__);
-
-
-		spin_lock(&log->l_icloglock);
-		iclog = log->l_iclog;
-		atomic_inc(&iclog->ic_refcnt);
-		xlog_state_want_sync(log, iclog);
-		spin_unlock(&log->l_icloglock);
-		error = xlog_state_release_iclog(log, iclog);
-
-		spin_lock(&log->l_icloglock);
-		if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
-		      iclog->ic_state == XLOG_STATE_DIRTY)) {
-			if (!XLOG_FORCED_SHUTDOWN(log)) {
-				xlog_wait(&iclog->ic_force_wait,
-							&log->l_icloglock);
-			} else {
-				spin_unlock(&log->l_icloglock);
-			}
-		} else {
-			spin_unlock(&log->l_icloglock);
-		}
-		if (tic) {
-			trace_xfs_log_umount_write(log, tic);
-			xlog_ungrant_log_space(log, tic);
-			xfs_log_ticket_put(tic);
-		}
+		xfs_log_write_unmount_record(mp);
 	} else {
 		/*
 		 * We're already in forced_shutdown mode, couldn't
-- 
cgit v1.2.3


From f467cad95f5e3814fda408dea76eb962ab19685d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 20 Jul 2018 09:28:40 -0700
Subject: xfs: force summary counter recalc at next mount

Use the "bad summary count" mount flag from the previous patch to skip
writing the unmount record to force log recovery at the next mount,
which will recalculate the summary counters for us.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_errortag.h |  4 +++-
 fs/xfs/xfs_error.c           |  3 +++
 fs/xfs/xfs_log.c             | 16 +++++++++++++++-
 fs/xfs/xfs_mount.c           | 13 +++++++++++++
 fs/xfs/xfs_mount.h           |  1 +
 5 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index b9974e7a8e6e..66077a105cbb 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -53,7 +53,8 @@
 #define XFS_ERRTAG_LOG_ITEM_PIN				30
 #define XFS_ERRTAG_BUF_LRU_REF				31
 #define XFS_ERRTAG_FORCE_SCRUB_REPAIR			32
-#define XFS_ERRTAG_MAX					33
+#define XFS_ERRTAG_FORCE_SUMMARY_RECALC			33
+#define XFS_ERRTAG_MAX					34
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -91,5 +92,6 @@
 #define XFS_RANDOM_LOG_ITEM_PIN				1
 #define XFS_RANDOM_BUF_LRU_REF				2
 #define XFS_RANDOM_FORCE_SCRUB_REPAIR			1
+#define XFS_RANDOM_FORCE_SUMMARY_RECALC			1
 
 #endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 0470114a8d80..9866f542e77b 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -50,6 +50,7 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_LOG_ITEM_PIN,
 	XFS_RANDOM_BUF_LRU_REF,
 	XFS_RANDOM_FORCE_SCRUB_REPAIR,
+	XFS_RANDOM_FORCE_SUMMARY_RECALC,
 };
 
 struct xfs_errortag_attr {
@@ -157,6 +158,7 @@ XFS_ERRORTAG_ATTR_RW(log_bad_crc,	XFS_ERRTAG_LOG_BAD_CRC);
 XFS_ERRORTAG_ATTR_RW(log_item_pin,	XFS_ERRTAG_LOG_ITEM_PIN);
 XFS_ERRORTAG_ATTR_RW(buf_lru_ref,	XFS_ERRTAG_BUF_LRU_REF);
 XFS_ERRORTAG_ATTR_RW(force_repair,	XFS_ERRTAG_FORCE_SCRUB_REPAIR);
+XFS_ERRORTAG_ATTR_RW(bad_summary,	XFS_ERRTAG_FORCE_SUMMARY_RECALC);
 
 static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -192,6 +194,7 @@ static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(log_item_pin),
 	XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
 	XFS_ERRORTAG_ATTR_LIST(force_repair),
+	XFS_ERRORTAG_ATTR_LIST(bad_summary),
 	NULL,
 };
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index bac586cbc54e..fd10b1426382 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -848,16 +848,30 @@ xfs_log_write_unmount_record(
 	struct xlog_in_core	*iclog;
 	struct xlog_ticket	*tic = NULL;
 	xfs_lsn_t		lsn;
+	uint			flags = XLOG_UNMOUNT_TRANS;
 	int			error;
 
 	error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
 	if (error)
 		goto out_err;
 
+	/*
+	 * If we think the summary counters are bad, clear the unmount header
+	 * flag in the unmount record so that the summary counters will be
+	 * recalculated during log recovery at next mount.  Refer to
+	 * xlog_check_unmount_rec for more details.
+	 */
+	if (XFS_TEST_ERROR((mp->m_flags & XFS_MOUNT_BAD_SUMMARY), mp,
+			XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
+		xfs_alert(mp, "%s: will fix summary counters at next mount",
+				__func__);
+		flags &= ~XLOG_UNMOUNT_TRANS;
+	}
+
 	/* remove inited flag, and account for space used */
 	tic->t_flags = 0;
 	tic->t_curr_res -= sizeof(magic);
-	error = xlog_write(log, &vec, tic, &lsn, NULL, XLOG_UNMOUNT_TRANS);
+	error = xlog_write(log, &vec, tic, &lsn, NULL, flags);
 	/*
 	 * At this point, we're umounting anyway, so there's no point in
 	 * transitioning log state to IOERROR. Just continue...
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 60462c35ad4b..4fb361cde32a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1423,3 +1423,16 @@ xfs_dev_is_read_only(
 	}
 	return 0;
 }
+
+/* Force the summary counters to be recalculated at next mount. */
+void
+xfs_force_summary_recalc(
+	struct xfs_mount	*mp)
+{
+	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
+		return;
+
+	spin_lock(&mp->m_sb_lock);
+	mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
+	spin_unlock(&mp->m_sb_lock);
+}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f08907db9c61..540353a51478 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -435,5 +435,6 @@ int	xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
 
 struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
 		int error_class, int error);
+void xfs_force_summary_recalc(struct xfs_mount *mp);
 
 #endif	/* __XFS_MOUNT_H__ */
-- 
cgit v1.2.3


From e79e0e1428188b24c3b57309ffa54a33c4ae40c4 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Mon, 18 Jun 2018 13:24:13 -0500
Subject: gfs2: Don't reject a supposedly full bitmap if we have blocks
 reserved

Before this patch, you could get into situations like this:

1. Process 1 searches for X free blocks, finds them, makes a reservation
2. Process 2 searches for free blocks in the same rgrp, but now the
   bitmap is full because process 1's reservation is skipped over.
   So it marks the bitmap as GBF_FULL.
3. Process 1 tries to allocate blocks from its own reservation, but
   since the GBF_FULL bit is set, it skips over the rgrp and searches
   elsewhere, thus not using its own reservation.

This patch adds an additional check to allow processes to use their
own reservations.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 60c86532782e..bce75f25e53a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1687,7 +1687,8 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 
 	while(1) {
 		bi = rbm_bi(rbm);
-		if (test_bit(GBF_FULL, &bi->bi_flags) &&
+		if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
+		    test_bit(GBF_FULL, &bi->bi_flags) &&
 		    (state == GFS2_BLKST_FREE))
 			goto next_bitmap;
 
-- 
cgit v1.2.3


From 910f3d58d0d40691534b77cc01588ffa22ee7dee Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Fri, 22 Jun 2018 09:51:43 +0800
Subject: gfs2: using posix_acl_xattr_size instead of posix_acl_to_xattr

It seems better to get size by calling posix_acl_xattr_size() instead of
calling posix_acl_to_xattr() with NULL buffer argument.

posix_acl_xattr_size() never returns 0, so remove the unnecessary check.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/acl.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 776717f1eeea..af5f87a493d9 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -82,14 +82,12 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
 int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	int error;
-	int len;
+	size_t len;
 	char *data;
 	const char *name = gfs2_acl_name(type);
 
 	if (acl) {
-		len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
-		if (len == 0)
-			return 0;
+		len = posix_acl_xattr_size(acl->a_count);
 		data = kmalloc(len, GFP_NOFS);
 		if (data == NULL)
 			return -ENOMEM;
-- 
cgit v1.2.3


From 109dbb1e6f27fb8f80ee61953485c7c3b1717951 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Mon, 2 Jul 2018 22:16:13 +0530
Subject: fs: gfs2: Adding new return type vm_fault_t

Use new return type vm_fault_t for gfs2_page_mkwrite
handler.

see commit 1c8f422059ae ("mm: change return type to
vm_fault_t") for reference.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7137db7b0119..8cb278ee9a0e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -387,7 +387,7 @@ static int gfs2_allocate_page_backing(struct page *page)
  * blocks allocated on disk to back that page.
  */
 
-static int gfs2_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
-- 
cgit v1.2.3


From 076ff2f0b877df4ace6604480f9b1278e61719b8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 24 Jul 2018 09:52:31 +0200
Subject: exofs: use bio_clone_fast in _write_mirror

The mirroring code never changes the bio data or biovecs.  This means
we can reuse the biovec allocation easily instead of duplicating it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by Boaz Harrosh <ooo@electrozaur.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/exofs/ore.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 1b8b44637e70..5331a15a61f1 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -873,8 +873,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
 			struct bio *bio;
 
 			if (per_dev != master_dev) {
-				bio = bio_clone_kmalloc(master_dev->bio,
-							GFP_KERNEL);
+				bio = bio_clone_fast(master_dev->bio,
+						     GFP_KERNEL, NULL);
 				if (unlikely(!bio)) {
 					ORE_DBGMSG(
 					      "Failed to allocate BIO size=%u\n",
-- 
cgit v1.2.3


From 4a7727725dc7d73769c5ab24c566df454093285f Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 5 Jul 2018 14:40:46 -0500
Subject: GFS2: Fix recovery issues for spectators

This patch fixes a couple problems dealing with spectators who
remain with gfs2 mounts after the last non-spectator node fails.

Before this patch, spectator mounts would try to acquire the dlm's
mounted lock EX as part of its normal recovery sequence.
The mounted lock is only used to determine whether the node is
the first mounter, the first node to mount the file system, for
the purposes of file system recovery and journal replay.

It's not necessary for spectators: they should never do journal
recovery. If they acquire the lock it will prevent another "real"
first-mounter from acquiring the lock in EX mode, which means it
also cannot do journal recovery because it doesn't think it's the
first node to mount the file system.

This patch checks if the mounter is a spectator, and if so, avoids
grabbing the mounted lock. This allows a secondary mounter who is
really the first non-spectator mounter, to do journal recovery:
since the spectator doesn't acquire the lock, it can grab it in
EX mode, and therefore consider itself to be the first mounter
both as a "real" first mount, and as a first-real-after-spectator.

Note that the control lock still needs to be taken in PR mode
in order to fetch the lvb value so it has the current status of
all journal's recovery. This is used as it is today by a first
mounter to replay the journals. For spectators, it's merely
used to fetch the status bits. All recovery is bypassed and the
node waits until recovery is completed by a non-spectator node.

I also improved the cryptic message given by control_mount when
a spectator is waiting for a non-spectator to perform recovery.

It also fixes a problem in gfs2_recover_set whereby spectators
were never queueing recovery work for their own journal.
They cannot do recovery themselves, but they still need to queue
the work so they can check the recovery bits and clear the
DFL_BLOCK_LOCKS bit once the recovery happens on another node.

When the work queue runs on a spectator, it bypasses most of the
work so it won't print a bunch of annoying messages. All it will
print is a bunch of messages that look like this until recovery
completes on the non-spectator node:

GFS2: fsid=mycluster:scratch.s: recover generation 3 jid 0
GFS2: fsid=mycluster:scratch.s: recover jid 0 result busy

These continue every 1.5 seconds until the recovery is done by
the non-spectator, at which time it says:

GFS2: fsid=mycluster:scratch.s: recover generation 4 done

Then it proceeds with its mount.

If the file system is mounted in spectator node and the last
remaining non-spectator is fenced, any IO to the file system is
blocked by dlm and the spectator waits until recovery is
performed by a non-spectator.

If a spectator tries to mount the file system before any
non-spectators, it blocks and repeatedly gives this kernel
message:

GFS2: fsid=mycluster:scratch: Recovery is required. Waiting for a non-spectator to mount.
GFS2: fsid=mycluster:scratch: Recovery is required. Waiting for a non-spectator to mount.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/lock_dlm.c | 20 +++++++++++++++++---
 fs/gfs2/recovery.c |  7 ++++---
 fs/gfs2/sys.c      | 11 +++++++++--
 3 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 006c6164f759..ac7caa267ed6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -821,6 +821,13 @@ restart:
 		goto fail;
 	}
 
+	/**
+	 * If we're a spectator, we don't want to take the lock in EX because
+	 * we cannot do the first-mount responsibility it implies: recovery.
+	 */
+	if (sdp->sd_args.ar_spectator)
+		goto locks_done;
+
 	error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
 	if (!error) {
 		mounted_mode = DLM_LOCK_EX;
@@ -896,9 +903,16 @@ locks_done:
 	if (lvb_gen < mount_gen) {
 		/* wait for mounted nodes to update control_lock lvb to our
 		   generation, which might include new recovery bits set */
-		fs_info(sdp, "control_mount wait1 block %u start %u mount %u "
-			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
-			lvb_gen, ls->ls_recover_flags);
+		if (sdp->sd_args.ar_spectator) {
+			fs_info(sdp, "Recovery is required. Waiting for a "
+				"non-spectator to mount.\n");
+			msleep_interruptible(1000);
+		} else {
+			fs_info(sdp, "control_mount wait1 block %u start %u "
+				"mount %u lvb %u flags %lx\n", block_gen,
+				start_gen, mount_gen, lvb_gen,
+				ls->ls_recover_flags);
+		}
 		spin_unlock(&ls->ls_recover_spin);
 		goto restart;
 	}
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index d8b622c375ab..0f501f938d1c 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -413,12 +413,13 @@ void gfs2_recover_func(struct work_struct *work)
 	ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
 	int ro = 0;
 	unsigned int pass;
-	int error;
+	int error = 0;
 	int jlocked = 0;
 
 	t_start = ktime_get();
-	if (sdp->sd_args.ar_spectator ||
-	    (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) {
+	if (sdp->sd_args.ar_spectator)
+		goto fail;
+	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
 		fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
 			jd->jd_jid);
 		jlocked = 1;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index c191fa58a1df..1787d295834e 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -429,11 +429,18 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
 
 	spin_lock(&sdp->sd_jindex_spin);
 	rv = -EBUSY;
-	if (sdp->sd_jdesc->jd_jid == jid)
+	/**
+	 * If we're a spectator, we use journal0, but it's not really ours.
+	 * So we need to wait for its recovery too. If we skip it we'd never
+	 * queue work to the recovery workqueue, and so its completion would
+	 * never clear the DFL_BLOCK_LOCKS flag, so all our locks would
+	 * permanently stop working.
+	 */
+	if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator)
 		goto out;
 	rv = -ENOENT;
 	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
-		if (jd->jd_jid != jid)
+		if (jd->jd_jid != jid && !sdp->sd_args.ar_spectator)
 			continue;
 		rv = gfs2_recover_journal(jd, false);
 		break;
-- 
cgit v1.2.3


From c25892827c7996eb19ca2a5b1cf596218122e994 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 6 Jul 2018 23:05:41 +0100
Subject: gfs2: fallocate_chunk: Always initialize struct iomap

In fallocate_chunk, always initialize the iomap before calling
gfs2_iomap_get_alloc: future changes could otherwise cause things like
iomap.flags to leak across calls.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index f3c6d78659b1..6d895d39158a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -877,7 +877,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	loff_t end = offset + len;
 	struct buffer_head *dibh;
-	struct iomap iomap = { };
 	int error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -893,6 +892,8 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	}
 
 	while (offset < end) {
+		struct iomap iomap = { };
+
 		error = gfs2_iomap_get_alloc(inode, offset, end - offset,
 					     &iomap);
 		if (error)
-- 
cgit v1.2.3


From 1d45bb7f9d2a5cbae1e5d9a5f72adad84db4d318 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 27 Jun 2018 01:59:18 +0100
Subject: gfs2: Use iomap for stuffed direct I/O reads

Remove the fallback code from direct to buffered I/O for stuffed reads.

For stuffed writes, we must keep the fallback code: the deferred glock
we are holding under direct I/O doesn't allow to write to the inode or
change the file size.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/file.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6d895d39158a..08369c6cd127 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -706,14 +706,8 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
 	if (ret)
 		goto out_uninit;
 
-	/* fall back to buffered I/O for stuffed files */
-	ret = -ENOTBLK;
-	if (gfs2_is_stuffed(ip))
-		goto out;
-
 	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);
 
-out:
 	gfs2_glock_dq(&gh);
 out_uninit:
 	gfs2_holder_uninit(&gh);
-- 
cgit v1.2.3


From f95cbb44abf9d6545769147d5abec4770c89872d Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 6 Jun 2018 20:30:38 +0100
Subject: gfs2: use iomap_readpage for blocksize == PAGE_SIZE

We only use iomap_readpage for pages that don't have buffer heads
attached yet: iomap_readpage would otherwise read pages from disk that
are marked buffer_uptodate() but not PageUptodate().  Those pages may
actually contain data more recent than what's on disk.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/aops.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index cc80fd71f3dd..31e8270d0b26 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -508,9 +508,13 @@ static int __gfs2_readpage(void *file, struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
 	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+
 	int error;
 
-	if (gfs2_is_stuffed(ip)) {
+	if (i_blocksize(page->mapping->host) == PAGE_SIZE &&
+	    !page_has_buffers(page)) {
+		error = iomap_readpage(page, &gfs2_iomap_ops);
+	} else if (gfs2_is_stuffed(ip)) {
 		error = stuffed_readpage(ip, page);
 		unlock_page(page);
 	} else {
-- 
cgit v1.2.3


From d1b0cb933c8e638947ea72f3ab4e3dad4325bb96 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 17 Jul 2018 15:59:28 +0100
Subject: gfs2: remove redundant variable 'moved'

Variable 'moved' s being assigned but is never used hence it is
redundant and can be removed.  This has been the case ever since commit
c752666c.

Cleans up clang warning:
warning: variable 'moved' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/dir.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index d97ad89955d1..e37002560c11 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1011,7 +1011,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	u64 bn, leaf_no;
 	__be64 *lp;
 	u32 index;
-	int x, moved = 0;
+	int x;
 	int error;
 
 	index = name->hash >> (32 - dip->i_depth);
@@ -1113,8 +1113,6 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 
 			if (!prev)
 				prev = dent;
-
-			moved = 1;
 		} else {
 			prev = dent;
 		}
-- 
cgit v1.2.3


From f6753df35c32f17b7abf0de37aa52850ca9733c9 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 30 May 2018 14:05:15 -0500
Subject: GFS2: rgrp free blocks used incorrectly

Before this patch, several functions in rgrp.c checked the value of
rgd->rd_free_clone. That does not take into account blocks that were
reserved by a multi-block reservation. This causes a problem when
space gets tight in the file system. For example, when function
gfs2_inplace_reserve checks to see if a rgrp has enough blocks to
satisfy the request, it can accept a rgrp that it should reject
because, although there are enough blocks to satisfy the request
_now_, those blocks may be reserved for another running process.

A second problem with this occurs when we've reserved the remaining
blocks in an rgrp: function rg_mblk_search() can reject an rgrp
improperly because it calculates:

   u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;

But rd_reserved includes blocks that the current process just
reserved in its own call to inplace_reserve. For example, it can
reserve the last 128 blocks of an rgrp, then reject that same rgrp
because the above calculates out to free_blocks = 0;

Consequences include, but are not limited to, (1) leaving holes,
and thus increasing file system fragmentation, and (2) reporting
file system is full long before it actually is.

This patch introduces a new function, rgd_free, which returns the
number of clone-free blocks (blocks that are truly free as opposed
to blocks that are still being used because an unlinked file is
still open) minus the number of blocks reserved by processes, but
not counting the blocks we ourselves reserved (because obviously
we need to allocate them).

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0a484a009ba2..68a81afd3b4a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1489,6 +1489,34 @@ static void rs_insert(struct gfs2_inode *ip)
 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
 }
 
+/**
+ * rgd_free - return the number of free blocks we can allocate.
+ * @rgd: the resource group
+ *
+ * This function returns the number of free blocks for an rgrp.
+ * That's the clone-free blocks (blocks that are free, not including those
+ * still being used for unlinked files that haven't been deleted.)
+ *
+ * It also subtracts any blocks reserved by someone else, but does not
+ * include free blocks that are still part of our current reservation,
+ * because obviously we can (and will) allocate them.
+ */
+static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
+{
+	u32 tot_reserved, tot_free;
+
+	if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
+		return 0;
+	tot_reserved = rgd->rd_reserved - rs->rs_free;
+
+	if (rgd->rd_free_clone < tot_reserved)
+		tot_reserved = 0;
+
+	tot_free = rgd->rd_free_clone - tot_reserved;
+
+	return tot_free;
+}
+
 /**
  * rg_mblk_search - find a group of multiple free blocks to form a reservation
  * @rgd: the resource group descriptor
@@ -1504,7 +1532,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 	u64 goal;
 	struct gfs2_blkreserv *rs = &ip->i_res;
 	u32 extlen;
-	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
+	u32 free_blocks = rgd_free(rgd, rs);
 	int ret;
 	struct inode *inode = &ip->i_inode;
 
@@ -1985,7 +2013,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	int error = 0, rg_locked, flags = 0;
 	u64 last_unlinked = NO_BLOCK;
 	int loops = 0;
-	u32 skip = 0;
+	u32 free_blocks, skip = 0;
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
@@ -2056,10 +2084,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 			goto check_rgrp;
 
 		/* If rgrp has enough free space, use it */
-		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
+		free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
+		if (free_blocks >= ap->target ||
 		    (loops == 2 && ap->min_target &&
-		     rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
-			ap->allowed = rs->rs_rbm.rgd->rd_free_clone;
+		     free_blocks >= ap->min_target)) {
+			ap->allowed = free_blocks;
 			return 0;
 		}
 check_rgrp:
-- 
cgit v1.2.3


From 776125785a87ff05d49938bd5b9f336f2a05bff6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 25 Jul 2018 18:45:08 +0100
Subject: gfs2: Special-case rindex for gfs2_grow

To speed up the common case of appending to a file,
gfs2_write_alloc_required presumes that writing beyond the end of a file
will always require additional blocks to be allocated.  This assumption
is incorrect for preallocates files, but there are no negative
consequences as long as *some* space is still left on the filesystem.

One special file that always has some space preallocated beyond the end
of the file is the rindex: when growing a filesystem, gfs2_grow adds one
or more new resource groups and appends records describing those
resource groups to the rindex; the preallocated space ensures that this
is always possible.

However, when a filesystem is completely full, gfs2_write_alloc_required
will indicate that an additional allocation is required, and appending
the next record to the rindex will fail even though space for that
record has already been preallocated.  To fix that, skip the incorrect
optimization in gfs2_write_alloc_required, but for the rindex only.
Other writes to preallocated space beyond the end of the file are still
allowed to fail on completely full filesystems.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/bmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 89f1f7d3186d..03128ed1f34e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2316,7 +2316,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 	end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
 	lblock = offset >> shift;
 	lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
-	if (lblock_stop > end_of_file)
+	if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
 		return 1;
 
 	size = (lblock_stop - lblock) << shift;
-- 
cgit v1.2.3


From 02dff7bf81685b6770a082243060e0b5aac348cf Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:07 -0700
Subject: xfs: pull up dfops from xfs_itruncate_extents()

xfs_itruncate_extents[_flags]() uses a local dfops with a
transaction provided by the caller. It uses hacky ->t_dfops
replacement logic to avoid stomping over an already populated
->t_dfops.

The latter never occurs for current callers and the logic itself is
not really appropriate. Clean this up by updating all callers to
initialize a dfops and to use that down in xfs_itruncate_extents().
This more closely resembles the upcoming logic where dfops will be
embedded within the transaction. We can also replace the
xfs_defer_init() in the xfs_itruncate_extents_flags() loop with an
assert. Both dfops and firstblock should be in a valid state
after xfs_defer_finish() and the inode joined to the dfops is fixed
throughout the loop.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_attr_inactive.c | 3 +++
 fs/xfs/xfs_bmap_util.c     | 2 ++
 fs/xfs/xfs_inode.c         | 8 +++-----
 fs/xfs/xfs_iops.c          | 3 +++
 fs/xfs/xfs_qm_syscalls.c   | 3 +++
 5 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 7ce10055f275..d3055972d3a6 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -26,6 +26,7 @@
 #include "xfs_quota.h"
 #include "xfs_trace.h"
 #include "xfs_dir2.h"
+#include "xfs_defer.h"
 
 /*
  * Look at all the extents for this logical region,
@@ -381,6 +382,7 @@ xfs_attr_inactive(
 {
 	struct xfs_trans	*trans;
 	struct xfs_mount	*mp;
+	struct xfs_defer_ops	dfops;
 	int			lock_mode = XFS_ILOCK_SHARED;
 	int			error = 0;
 
@@ -397,6 +399,7 @@ xfs_attr_inactive(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrinval, 0, 0, 0, &trans);
 	if (error)
 		goto out_destroy_fork;
+	xfs_defer_init(trans, &dfops);
 
 	lock_mode = XFS_ILOCK_EXCL;
 	xfs_ilock(dp, lock_mode);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 1b78c20de7bd..1a4617c74c6a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -792,6 +792,7 @@ xfs_free_eofblocks(
 	int			nimaps;
 	struct xfs_bmbt_irec	imap;
 	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_defer_ops	dfops;
 
 	/*
 	 * Figure out if there are any blocks beyond the end
@@ -831,6 +832,7 @@ xfs_free_eofblocks(
 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
 			return error;
 		}
+		xfs_defer_init(tp, &dfops);
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, 0);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7b2694d3901a..7d7d7e95fa17 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1541,8 +1541,6 @@ xfs_itruncate_extents_flags(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp = *tpp;
-	struct xfs_defer_ops	*odfops = tp->t_dfops;
-	struct xfs_defer_ops	dfops;
 	xfs_fileoff_t		first_unmap_block;
 	xfs_fileoff_t		last_block;
 	xfs_filblks_t		unmap_len;
@@ -1579,7 +1577,7 @@ xfs_itruncate_extents_flags(
 	ASSERT(first_unmap_block < last_block);
 	unmap_len = last_block - first_unmap_block + 1;
 	while (!done) {
-		xfs_defer_init(tp, &dfops);
+		ASSERT(tp->t_firstblock == NULLFSBLOCK);
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
 				    XFS_ITRUNC_MAX_EXTENTS, &done);
 		if (error)
@@ -1618,8 +1616,6 @@ xfs_itruncate_extents_flags(
 	trace_xfs_itruncate_extents_end(ip, new_size);
 
 out:
-	/* ->t_dfops points to local stack, don't leak it! */
-	tp->t_dfops = odfops;
 	*tpp = tp;
 	return error;
 out_bmap_cancel:
@@ -1723,6 +1719,7 @@ xfs_inactive_truncate(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
+	struct xfs_defer_ops	dfops;
 	int			error;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -1730,6 +1727,7 @@ xfs_inactive_truncate(
 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 		return error;
 	}
+	xfs_defer_init(tp, &dfops);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 0fa29f39d658..704b57a8b99e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -26,6 +26,7 @@
 #include "xfs_dir2.h"
 #include "xfs_trans_space.h"
 #include "xfs_iomap.h"
+#include "xfs_defer.h"
 
 #include <linux/capability.h>
 #include <linux/xattr.h>
@@ -812,6 +813,7 @@ xfs_setattr_size(
 	struct inode		*inode = VFS_I(ip);
 	xfs_off_t		oldsize, newsize;
 	struct xfs_trans	*tp;
+	struct xfs_defer_ops	dfops;
 	int			error;
 	uint			lock_flags = 0;
 	bool			did_zeroing = false;
@@ -915,6 +917,7 @@ xfs_setattr_size(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 	if (error)
 		return error;
+	xfs_defer_init(tp, &dfops);
 
 	lock_flags |= XFS_ILOCK_EXCL;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index abc8a21e3a82..df0783303887 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -22,6 +22,7 @@
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_defer.h"
 
 STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
 STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
@@ -213,6 +214,7 @@ xfs_qm_scall_trunc_qfile(
 {
 	struct xfs_inode	*ip;
 	struct xfs_trans	*tp;
+	struct xfs_defer_ops	dfops;
 	int			error;
 
 	if (ino == NULLFSINO)
@@ -229,6 +231,7 @@ xfs_qm_scall_trunc_qfile(
 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 		goto out_put;
 	}
+	xfs_defer_init(tp, &dfops);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
-- 
cgit v1.2.3


From a61acc3c78df14bb9b7bfefb2cc771fcda15b8fe Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:08 -0700
Subject: xfs: use ->t_dfops in log recovery intent processing

xlog_finish_defer_ops() processes the deferred operations collected
over the entire intent recovery sequence. We can't xfs_defer_init()
here because the dfops is already populated. Attach it manually and
eliminate the last caller of xfs_defer_finish() that doesn't pass
->t_dfops.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_recover.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cbac943896f4..3289811eb076 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4854,6 +4854,8 @@ xlog_finish_defer_ops(
 			0, XFS_TRANS_RESERVE, &tp);
 	if (error)
 		return error;
+	/* dfops is already populated so assign it manually */
+	tp->t_dfops = dfops;
 
 	error = xfs_defer_finish(&tp, dfops);
 	if (error)
-- 
cgit v1.2.3


From dcbd44f79986e55691600b969c14db004d741883 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:08 -0700
Subject: xfs: fix transaction leak on remote attr set/remove failure

The xattr remote value set/remove handlers both clear args.trans in
the error path without having cancelled the transaction. This leaks
the transaction, causes warnings around returning to userspace with
locks held and leads to system lockups or other general problems.

The higher level xfs_attr_[set|remove]() functions already detect
and cancel args.trans when set in the error path. Drop the NULL
assignments from the rmtval handlers and allow the callers to clean
up the transaction correctly.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_remote.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 7841e6255129..829ab20f0cd7 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -558,7 +558,6 @@ xfs_attr_rmtval_set(
 	return 0;
 out_defer_cancel:
 	xfs_defer_cancel(args->trans->t_dfops);
-	args->trans = NULL;
 	return error;
 }
 
@@ -646,6 +645,5 @@ xfs_attr_rmtval_remove(
 	return 0;
 out_defer_cancel:
 	xfs_defer_cancel(args->trans->t_dfops);
-	args->trans = NULL;
 	return error;
 }
-- 
cgit v1.2.3


From 03f4e4b26cd5f6eed728f82d90039a19d1b51ce3 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:09 -0700
Subject: xfs: make deferred processing safe for embedded dfops

xfs_defer_finish() has a couple quirks that are not safe with
respect to the upcoming internal dfops functionality. First,
xfs_defer_finish() attaches the passed in dfops structure to
->t_dfops and caches and restores the original value. Second, it
continues to use the initial dfops reference before and after the
transaction roll.

These behaviors assume that dop is an independent memory allocation
from the transaction itself, which may not always be true once
transactions begin to use an embedded dfops structure. In the latter
model, dfops processing creates a new xfs_defer_ops structure with
each transaction and the associated state is migrated across to the
new transaction.

Fix up xfs_defer_finish() to handle the possibility of the current
dfops changing after a transaction roll. Since ->t_dfops is used
unconditionally in this path, it is no longer necessary to
attach/restore ->t_dfops and pass it explicitly down to
xfs_defer_trans_roll(). Update dop in the latter function and the
caller to ensure that it always refers to the current dfops
structure.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index c4b0eaeb5190..ee734a8b3fa9 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -225,9 +225,9 @@ xfs_defer_trans_abort(
 /* Roll a transaction so we can do some deferred op processing. */
 STATIC int
 xfs_defer_trans_roll(
-	struct xfs_trans		**tp,
-	struct xfs_defer_ops		*dop)
+	struct xfs_trans		**tp)
 {
+	struct xfs_defer_ops		*dop = (*tp)->t_dfops;
 	int				i;
 	int				error;
 
@@ -243,6 +243,7 @@ xfs_defer_trans_roll(
 
 	/* Roll the transaction. */
 	error = xfs_trans_roll(tp);
+	dop = (*tp)->t_dfops;
 	if (error) {
 		trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
 		xfs_defer_trans_abort(*tp, dop, error);
@@ -338,31 +339,25 @@ xfs_defer_finish(
 	void				*state;
 	int				error = 0;
 	void				(*cleanup_fn)(struct xfs_trans *, void *, int);
-	struct xfs_defer_ops		*orig_dop;
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+	ASSERT((*tp)->t_dfops == dop);
 
 	trace_xfs_defer_finish((*tp)->t_mountp, dop, _RET_IP_);
 
-	/*
-	 * Attach dfops to the transaction during deferred ops processing. This
-	 * explicitly causes calls into the allocator to defer AGFL block frees.
-	 * Note that this code can go away once all dfops users attach to the
-	 * associated tp.
-	 */
-	ASSERT(!(*tp)->t_dfops || ((*tp)->t_dfops == dop));
-	orig_dop = (*tp)->t_dfops;
-	(*tp)->t_dfops = dop;
-
 	/* Until we run out of pending work to finish... */
 	while (xfs_defer_has_unfinished_work(dop)) {
 		/* Log intents for work items sitting in the intake. */
 		xfs_defer_intake_work(*tp, dop);
 
-		/* Roll the transaction. */
-		error = xfs_defer_trans_roll(tp, dop);
+		/*
+		 * Roll the transaction and update dop in case dfops was
+		 * embedded in the transaction.
+		 */
+		error = xfs_defer_trans_roll(tp);
 		if (error)
 			goto out;
+		dop = (*tp)->t_dfops;
 
 		/* Log an intent-done item for the first pending item. */
 		dfp = list_first_entry(&dop->dop_pending,
@@ -428,10 +423,11 @@ xfs_defer_finish(
 	 * Roll the transaction once more to avoid returning to the caller
 	 * with a dirty transaction.
 	 */
-	if ((*tp)->t_flags & XFS_TRANS_DIRTY)
-		error = xfs_defer_trans_roll(tp, dop);
+	if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
+		error = xfs_defer_trans_roll(tp);
+		dop = (*tp)->t_dfops;
+	}
 out:
-	(*tp)->t_dfops = orig_dop;
 	if (error)
 		trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
 	else
-- 
cgit v1.2.3


From 83200bfac6082a46cc962366478d050052e50450 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:09 -0700
Subject: xfs: remove unused deferred ops committed field

dop_committed is set when deferred item processing rolls the
transaction at least once, but is only ever accessed in tracepoints.
The transaction roll/commit events are already available via
independent tracepoints, so remove the otherwise unused field.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 1 -
 fs/xfs/libxfs/xfs_defer.h | 1 -
 fs/xfs/xfs_trace.h        | 8 ++------
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index ee734a8b3fa9..0df09c094e42 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,7 +249,6 @@ xfs_defer_trans_roll(
 		xfs_defer_trans_abort(*tp, dop, error);
 		return error;
 	}
-	dop->dop_committed = true;
 
 	/* Rejoin the joined inodes. */
 	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index c17c9deda995..58c979c9f3fa 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -49,7 +49,6 @@ enum xfs_defer_ops_type {
 #define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */
 
 struct xfs_defer_ops {
-	bool			dop_committed;	/* did any trans commit? */
 	bool			dop_low;	/* alloc in low mode */
 	struct list_head	dop_intake;	/* unlogged pending work */
 	struct list_head	dop_pending;	/* logged pending work */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index b668fc127aa7..cc6995cfce66 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2229,14 +2229,12 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
 	TP_fast_assign(
 		__entry->dev = mp ? mp->m_super->s_dev : 0;
 		__entry->dop = dop;
-		__entry->committed = dop->dop_committed;
 		__entry->low = dop->dop_low;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d ops %p committed %d low %d, caller %pS",
+	TP_printk("dev %d:%d ops %p low %d, caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->dop,
-		  __entry->committed,
 		  __entry->low,
 		  (char *)__entry->caller_ip)
 )
@@ -2259,14 +2257,12 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
 	TP_fast_assign(
 		__entry->dev = mp ? mp->m_super->s_dev : 0;
 		__entry->dop = dop;
-		__entry->committed = dop->dop_committed;
 		__entry->low = dop->dop_low;
 		__entry->error = error;
 	),
-	TP_printk("dev %d:%d ops %p committed %d low %d err %d",
+	TP_printk("dev %d:%d ops %p low %d err %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->dop,
-		  __entry->committed,
 		  __entry->low,
 		  __entry->error)
 )
-- 
cgit v1.2.3


From 509308b413c9e4e3140b4bc524522255d126787e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:10 -0700
Subject: xfs: reset dfops to initial state after finish

xfs_defer_init() is currently used in two particular situations. The
first and most obvious case is raw initialization of an
xfs_defer_ops struct. The other case is partial reinit of
xfs_defer_ops on reuse due to iteration.

Most instances of the first case will be replaced by a single init
of a dfops embedded in the transaction. Init calls are still
technically required for the second case because the dfops may have
low space mode enabled or have joined items that need to be reset
before the dfops should be reused.

Since the current dfops usage expects either a final transaction
commit after xfs_defer_finish() or xfs_defer_init() if dfops is to
be reused, we can shift some of the init logic into
xfs_defer_finish() such that the latter returns with a reinitialized
dfops. This eliminates the second dependency noted above such that a
dfops is immediately ready for reuse after an xfs_defer_finish()
without the need to change any calling code.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 0df09c094e42..23f2a52b088e 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -319,6 +319,19 @@ xfs_defer_bjoin(
 	return -EFSCORRUPTED;
 }
 
+/*
+ * Reset an already used dfops after finish.
+ */
+static void
+xfs_defer_reset(
+	struct xfs_defer_ops	*dop)
+{
+	ASSERT(!xfs_defer_has_unfinished_work(dop));
+	dop->dop_low = false;
+	memset(dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+	memset(dop->dop_bufs, 0, sizeof(dop->dop_bufs));
+}
+
 /*
  * Finish all the pending work.  This involves logging intent items for
  * any work items that wandered in since the last transaction roll (if
@@ -427,10 +440,13 @@ xfs_defer_finish(
 		dop = (*tp)->t_dfops;
 	}
 out:
-	if (error)
+	if (error) {
 		trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
-	else
+	} else {
 		trace_xfs_defer_finish_done((*tp)->t_mountp, dop, _RET_IP_);
+		xfs_defer_reset(dop);
+	}
+
 	return error;
 }
 
-- 
cgit v1.2.3


From 44fd294681de73990da656294e3dacaa7878f577 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:11 -0700
Subject: xfs: pack holes in xfs_defer_ops and xfs_trans

Both structures have holes due to member alignment. Move dop_low to
the end of xfs_defer ops to sanitize the cache line alignment and
move t_flags to save 8 bytes in xfs_trans.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.h | 3 ++-
 fs/xfs/xfs_trans.h        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 58c979c9f3fa..8f58f217fdff 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -49,13 +49,14 @@ enum xfs_defer_ops_type {
 #define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */
 
 struct xfs_defer_ops {
-	bool			dop_low;	/* alloc in low mode */
 	struct list_head	dop_intake;	/* unlogged pending work */
 	struct list_head	dop_pending;	/* logged pending work */
 
 	/* relog these with each roll */
 	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES];
 	struct xfs_buf		*dop_bufs[XFS_DEFER_OPS_NR_BUFS];
+
+	bool			dop_low;	/* alloc in low mode */
 };
 
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 37fdacc690c7..6f857af61455 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -102,12 +102,12 @@ typedef struct xfs_trans {
 	unsigned int		t_blk_res_used;	/* # of resvd blocks used */
 	unsigned int		t_rtx_res;	/* # of rt extents resvd */
 	unsigned int		t_rtx_res_used;	/* # of resvd rt extents used */
+	unsigned int		t_flags;	/* misc flags */
 	xfs_fsblock_t		t_firstblock;	/* first block allocated */
 	struct xlog_ticket	*t_ticket;	/* log mgr ticket */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
 	struct xfs_defer_ops	*t_dfops;	/* dfops reference */
-	unsigned int		t_flags;	/* misc flags */
 	int64_t			t_icount_delta;	/* superblock icount change */
 	int64_t			t_ifree_delta;	/* superblock ifree change */
 	int64_t			t_fdblocks_delta; /* superblock fdblocks chg */
-- 
cgit v1.2.3


From e021a2e5fc520d930f949f303e7307038e258645 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:11 -0700
Subject: xfs: support embedded dfops in transaction

The dfops structure used by multi-transaction operations is
typically stored on the stack and carried around by the associated
transaction. The lifecycle of dfops does not quite match that of the
transaction, but they are tightly related in that the former depends
on the latter.

The relationship of these objects is tight enough that we can avoid
the cumbersome boilerplate code required in most cases to manage
them separately by just embedding an xfs_defer_ops in the
transaction itself. This means that a transaction allocation returns
with an initialized dfops, a transaction commit finishes pending
deferred items before the tx commit, a transaction cancel cancels
the dfops before the transaction and a transaction dup operation
transfers the current dfops state to the new transaction.

The dup operation is slightly complicated by the fact that we can no
longer just copy a dfops pointer from the old transaction to the new
transaction. This is solved through a dfops move helper that
transfers the pending items and other dfops state across the
transactions. This also requires that transaction rolling code
always refer to the transaction for the current dfops reference.

Finally, to facilitate incremental conversion to the internal dfops
and continue to support the current external dfops mode of
operation, create the new ->t_dfops_internal field with a layer of
indirection. On allocation, ->t_dfops points to the internal dfops.
This state is overridden by callers who re-init a local dfops on the
transaction. Once ->t_dfops is overridden, the external dfops
reference is maintained as the transaction rolls.

This patch adds the fundamental ability to support an internal
dfops. All codepaths that perform deferred processing continue to
override the internal dfops until they are converted over in
subsequent patches.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 22 ++++++++++++++++++++++
 fs/xfs/libxfs/xfs_defer.h | 16 ++--------------
 fs/xfs/xfs_trans.c        | 30 ++++++++++++++++++++++++++----
 fs/xfs/xfs_trans.h        | 17 ++++++++++++++++-
 4 files changed, 66 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 23f2a52b088e..b63cc9e730da 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -555,3 +555,25 @@ xfs_defer_init(
 	}
 	trace_xfs_defer_init(mp, dop, _RET_IP_);
 }
+
+/*
+ * Move state from one xfs_defer_ops to another and reset the source to initial
+ * state. This is primarily used to carry state forward across transaction rolls
+ * with internal dfops.
+ */
+void
+xfs_defer_move(
+	struct xfs_defer_ops	*dst,
+	struct xfs_defer_ops	*src)
+{
+	ASSERT(dst != src);
+
+	list_splice_init(&src->dop_intake, &dst->dop_intake);
+	list_splice_init(&src->dop_pending, &dst->dop_pending);
+
+	memcpy(dst->dop_inodes, src->dop_inodes, sizeof(dst->dop_inodes));
+	memcpy(dst->dop_bufs, src->dop_bufs, sizeof(dst->dop_bufs));
+	dst->dop_low = src->dop_low;
+
+	xfs_defer_reset(src);
+}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 8f58f217fdff..35507ca9a148 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -7,6 +7,7 @@
 #define	__XFS_DEFER_H__
 
 struct xfs_defer_op_type;
+struct xfs_defer_ops;
 
 /*
  * Save a log intent item and a list of extents, so that we can replay
@@ -45,20 +46,6 @@ enum xfs_defer_ops_type {
 	XFS_DEFER_OPS_TYPE_MAX,
 };
 
-#define XFS_DEFER_OPS_NR_INODES	2	/* join up to two inodes */
-#define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */
-
-struct xfs_defer_ops {
-	struct list_head	dop_intake;	/* unlogged pending work */
-	struct list_head	dop_pending;	/* logged pending work */
-
-	/* relog these with each roll */
-	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES];
-	struct xfs_buf		*dop_bufs[XFS_DEFER_OPS_NR_BUFS];
-
-	bool			dop_low;	/* alloc in low mode */
-};
-
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
 		struct list_head *h);
 int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop);
@@ -67,6 +54,7 @@ void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
 int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
+void xfs_defer_move(struct xfs_defer_ops *dst, struct xfs_defer_ops *src);
 
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index de00f79ff698..412c8d236c71 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -119,7 +119,13 @@ xfs_trans_dup(
 	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
-	ntp->t_dfops = tp->t_dfops;
+
+	/* copy the dfops pointer if it's external, otherwise move it */
+	xfs_defer_init(ntp, &ntp->t_dfops_internal);
+	if (tp->t_dfops != &tp->t_dfops_internal)
+		ntp->t_dfops = tp->t_dfops;
+	else
+		xfs_defer_move(ntp->t_dfops, tp->t_dfops);
 
 	xfs_trans_dup_dqinfo(tp, ntp);
 
@@ -275,6 +281,13 @@ xfs_trans_alloc(
 	INIT_LIST_HEAD(&tp->t_items);
 	INIT_LIST_HEAD(&tp->t_busy);
 	tp->t_firstblock = NULLFSBLOCK;
+	/*
+	 * We only roll transactions with permanent log reservation. Don't init
+	 * ->t_dfops to skip attempts to finish or cancel an empty dfops with a
+	 * non-permanent res.
+	 */
+	if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES)
+		xfs_defer_init(tp, &tp->t_dfops_internal);
 
 	error = xfs_trans_reserve(tp, resp, blocks, rtextents);
 	if (error) {
@@ -916,11 +929,17 @@ __xfs_trans_commit(
 	int			error = 0;
 	int			sync = tp->t_flags & XFS_TRANS_SYNC;
 
-	ASSERT(!tp->t_dfops ||
-	       !xfs_defer_has_unfinished_work(tp->t_dfops) || regrant);
-
 	trace_xfs_trans_commit(tp, _RET_IP_);
 
+	/* finish deferred items on final commit */
+	if (!regrant && tp->t_dfops) {
+		error = xfs_defer_finish(&tp, tp->t_dfops);
+		if (error) {
+			xfs_defer_cancel(tp->t_dfops);
+			goto out_unreserve;
+		}
+	}
+
 	/*
 	 * If there is nothing to be logged by the transaction,
 	 * then unlock all of the items associated with the
@@ -1010,6 +1029,9 @@ xfs_trans_cancel(
 
 	trace_xfs_trans_cancel(tp, _RET_IP_);
 
+	if (tp->t_dfops)
+		xfs_defer_cancel(tp->t_dfops);
+
 	/*
 	 * See if the caller is relying on us to shut down the
 	 * filesystem.  This happens in paths where we detect
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 6f857af61455..dc79e3c1d3e8 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -24,7 +24,6 @@ struct xfs_rui_log_item;
 struct xfs_btree_cur;
 struct xfs_cui_log_item;
 struct xfs_cud_log_item;
-struct xfs_defer_ops;
 struct xfs_bui_log_item;
 struct xfs_bud_log_item;
 
@@ -90,6 +89,21 @@ void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
 #define XFS_ITEM_LOCKED		2
 #define XFS_ITEM_FLUSHING	3
 
+/*
+ * Deferred operations tracking structure.
+ */
+#define XFS_DEFER_OPS_NR_INODES	2	/* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */
+struct xfs_defer_ops {
+	struct list_head	dop_intake;	/* unlogged pending work */
+	struct list_head	dop_pending;	/* logged pending work */
+
+	/* relog these with each roll */
+	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES];
+	struct xfs_buf		*dop_bufs[XFS_DEFER_OPS_NR_BUFS];
+
+	bool			dop_low;	/* alloc in low mode */
+};
 
 /*
  * This is the structure maintained for every active transaction.
@@ -130,6 +144,7 @@ typedef struct xfs_trans {
 	struct list_head	t_items;	/* log item descriptors */
 	struct list_head	t_busy;		/* list of busy extents */
 	unsigned long		t_pflags;	/* saved process flags state */
+	struct xfs_defer_ops	t_dfops_internal;
 } xfs_trans_t;
 
 /*
-- 
cgit v1.2.3


From 1e5ae1995a44f2b7a03f08a10504568a96040a8c Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:12 -0700
Subject: xfs: use internal dfops in cow blocks cancel

All callers either explicitly initialize a dfops or pass a
transaction with an internal dfops. Drop the hacky old dfops
replacement logic and use the one associated with the transaction.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_reflink.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 406f79d44153..04c25ee6b6f8 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -483,8 +483,6 @@ xfs_reflink_cancel_cow_blocks(
 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	struct xfs_bmbt_irec		got, del;
 	struct xfs_iext_cursor		icur;
-	struct xfs_defer_ops		dfops;
-	struct xfs_defer_ops		*odfops = (*tpp)->t_dfops;
 	int				error = 0;
 
 	if (!xfs_is_reflink_inode(ip))
@@ -511,7 +509,8 @@ xfs_reflink_cancel_cow_blocks(
 			if (error)
 				break;
 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
-			xfs_defer_init(*tpp, &dfops);
+			ASSERT((*tpp)->t_dfops);
+			ASSERT((*tpp)->t_firstblock == NULLFSBLOCK);
 
 			/* Free the CoW orphan record. */
 			error = xfs_refcount_free_cow_extent(ip->i_mount,
@@ -553,7 +552,6 @@ next_extent:
 	/* clear tag if cow fork is emptied */
 	if (!ifp->if_bytes)
 		xfs_inode_clear_cowblocks_tag(ip);
-	(*tpp)->t_dfops = odfops;
 	return error;
 }
 
-- 
cgit v1.2.3


From 9c6bb0cf7ba318767107328f39aac880344ddd2e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:12 -0700
Subject: xfs: use internal dfops in attr code

Remove the unnecessary on-stack dfops structure and use the internal
transaction dfops instead. The lower level xattr code already
appropriately accesses ->t_dfops throughout.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 927d4c968f9a..66a22c80a0db 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -202,7 +202,6 @@ xfs_attr_set(
 	struct xfs_mount	*mp = dp->i_mount;
 	struct xfs_buf		*leaf_bp = NULL;
 	struct xfs_da_args	args;
-	struct xfs_defer_ops	dfops;
 	struct xfs_trans_res	tres;
 	int			rsvd = (flags & ATTR_ROOT) != 0;
 	int			error, err2, local;
@@ -251,7 +250,6 @@ xfs_attr_set(
 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(args.trans, &dfops);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
@@ -315,18 +313,18 @@ xfs_attr_set(
 		 */
 		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
 		if (error)
-			goto out_defer_cancel;
+			goto out;
 		/*
 		 * Prevent the leaf buffer from being unlocked so that a
 		 * concurrent AIL push cannot grab the half-baked leaf
 		 * buffer and run into problems with the write verifier.
 		 */
 		xfs_trans_bhold(args.trans, leaf_bp);
-		xfs_defer_bjoin(&dfops, leaf_bp);
-		xfs_defer_ijoin(&dfops, dp);
-		error = xfs_defer_finish(&args.trans, &dfops);
+		xfs_defer_bjoin(args.trans->t_dfops, leaf_bp);
+		xfs_defer_ijoin(args.trans->t_dfops, dp);
+		error = xfs_defer_finish(&args.trans, args.trans->t_dfops);
 		if (error)
-			goto out_defer_cancel;
+			goto out;
 
 		/*
 		 * Commit the leaf transformation.  We'll need another (linked)
@@ -366,8 +364,6 @@ xfs_attr_set(
 
 	return error;
 
-out_defer_cancel:
-	xfs_defer_cancel(&dfops);
 out:
 	if (leaf_bp)
 		xfs_trans_brelse(args.trans, leaf_bp);
@@ -389,7 +385,6 @@ xfs_attr_remove(
 {
 	struct xfs_mount	*mp = dp->i_mount;
 	struct xfs_da_args	args;
-	struct xfs_defer_ops	dfops;
 	int			error;
 
 	XFS_STATS_INC(mp, xs_attr_remove);
@@ -422,7 +417,6 @@ xfs_attr_remove(
 			&args.trans);
 	if (error)
 		return error;
-	xfs_defer_init(args.trans, &dfops);
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 	/*
-- 
cgit v1.2.3


From 91ef75b6572498face47746c253926e733a4da3b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:13 -0700
Subject: xfs: use internal dfops during [b|c]ui recovery

bmap and refcount intent processing associates a dfops from the
caller with a local transaction to collect all deferred items for
post-processing. Use the internal dfops in both of these functions
and move the deferred items to the parent dfops before the
transaction commits.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c     | 21 +++++++++++----------
 fs/xfs/xfs_log_recover.c   |  6 +++---
 fs/xfs/xfs_refcount_item.c | 30 ++++++++++++++++--------------
 3 files changed, 30 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 478bfc798861..bc5eb2e0ab0c 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -441,7 +441,12 @@ xfs_bui_recover(
 			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
 	if (error)
 		return error;
-	tp->t_dfops = dfops;
+	/*
+	 * Recovery stashes all deferred ops during intent processing and
+	 * finishes them on completion. Transfer current dfops state to this
+	 * transaction and transfer the result back before we return.
+	 */
+	xfs_defer_move(tp->t_dfops, dfops);
 	budp = xfs_trans_get_bud(tp, buip);
 
 	/* Grab the inode. */
@@ -470,7 +475,7 @@ xfs_bui_recover(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	count = bmap->me_len;
-	error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type,
+	error = xfs_trans_log_finish_bmap_update(tp, budp, tp->t_dfops, type,
 			ip, whichfork, bmap->me_startoff,
 			bmap->me_startblock, &count, state);
 	if (error)
@@ -482,18 +487,14 @@ xfs_bui_recover(
 		irec.br_blockcount = count;
 		irec.br_startoff = bmap->me_startoff;
 		irec.br_state = state;
-		error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec);
+		error = xfs_bmap_unmap_extent(tp->t_mountp, tp->t_dfops, ip,
+					      &irec);
 		if (error)
 			goto err_inode;
 	}
 
 	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
-	/*
-	 * Recovery finishes all deferred ops once intent processing is
-	 * complete. Reset the trans reference because commit expects a finished
-	 * dfops or none at all.
-	 */
-	tp->t_dfops = NULL;
+	xfs_defer_move(dfops, tp->t_dfops);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	IRELE(ip);
@@ -501,7 +502,7 @@ xfs_bui_recover(
 	return error;
 
 err_inode:
-	tp->t_dfops = NULL;
+	xfs_defer_move(dfops, tp->t_dfops);
 	xfs_trans_cancel(tp);
 	if (ip) {
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 3289811eb076..958e9b96dc6a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4854,10 +4854,10 @@ xlog_finish_defer_ops(
 			0, XFS_TRANS_RESERVE, &tp);
 	if (error)
 		return error;
-	/* dfops is already populated so assign it manually */
-	tp->t_dfops = dfops;
+	/* transfer all collected dfops to this transaction */
+	xfs_defer_move(tp->t_dfops, dfops);
 
-	error = xfs_defer_finish(&tp, dfops);
+	error = xfs_defer_finish(&tp, tp->t_dfops);
 	if (error)
 		goto out_cancel;
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 2064c689bc72..d3582a06626f 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -452,7 +452,12 @@ xfs_cui_recover(
 			mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
 	if (error)
 		return error;
-	tp->t_dfops = dfops;
+	/*
+	 * Recovery stashes all deferred ops during intent processing and
+	 * finishes them on completion. Transfer current dfops state to this
+	 * transaction and transfer the result back before we return.
+	 */
+	xfs_defer_move(tp->t_dfops, dfops);
 	cudp = xfs_trans_get_cud(tp, cuip);
 
 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
@@ -474,8 +479,8 @@ xfs_cui_recover(
 			new_len = refc->pe_len;
 		} else
 			error = xfs_trans_log_finish_refcount_update(tp, cudp,
-				dfops, type, refc->pe_startblock, refc->pe_len,
-				&new_fsb, &new_len, &rcur);
+				tp->t_dfops, type, refc->pe_startblock,
+				refc->pe_len, &new_fsb, &new_len, &rcur);
 		if (error)
 			goto abort_error;
 
@@ -486,21 +491,23 @@ xfs_cui_recover(
 			switch (type) {
 			case XFS_REFCOUNT_INCREASE:
 				error = xfs_refcount_increase_extent(
-						tp->t_mountp, dfops, &irec);
+						tp->t_mountp, tp->t_dfops,
+						&irec);
 				break;
 			case XFS_REFCOUNT_DECREASE:
 				error = xfs_refcount_decrease_extent(
-						tp->t_mountp, dfops, &irec);
+						tp->t_mountp, tp->t_dfops,
+						&irec);
 				break;
 			case XFS_REFCOUNT_ALLOC_COW:
 				error = xfs_refcount_alloc_cow_extent(
-						tp->t_mountp, dfops,
+						tp->t_mountp, tp->t_dfops,
 						irec.br_startblock,
 						irec.br_blockcount);
 				break;
 			case XFS_REFCOUNT_FREE_COW:
 				error = xfs_refcount_free_cow_extent(
-						tp->t_mountp, dfops,
+						tp->t_mountp, tp->t_dfops,
 						irec.br_startblock,
 						irec.br_blockcount);
 				break;
@@ -515,18 +522,13 @@ xfs_cui_recover(
 
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
-	/*
-	 * Recovery finishes all deferred ops once intent processing is
-	 * complete. Reset the trans reference because commit expects a finished
-	 * dfops or none at all.
-	 */
-	tp->t_dfops = NULL;
+	xfs_defer_move(dfops, tp->t_dfops);
 	error = xfs_trans_commit(tp);
 	return error;
 
 abort_error:
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
-	tp->t_dfops = NULL;
+	xfs_defer_move(dfops, tp->t_dfops);
 	xfs_trans_cancel(tp);
 	return error;
 }
-- 
cgit v1.2.3


From c8eac49ef798a7d00240847f63902caa1388241a Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:13 -0700
Subject: xfs: remove all boilerplate defer init/finish code

At this point, the transaction subsystem completely manages deferred
items internally such that the common and boilerplate
xfs_trans_alloc() -> xfs_defer_init() -> xfs_defer_finish() ->
xfs_trans_commit() sequence can be replaced with a simple
transaction allocation and commit.

Remove all such boilerplate deferred ops code. In doing so, we
change each case over to use the dfops in the transaction and
specifically eliminate:

- The on-stack dfops and associated xfs_defer_init() call, as the
  internal dfops is initialized on transaction allocation.
- xfs_bmap_finish() calls that precede a final xfs_trans_commit() of
  a transaction.
- xfs_defer_cancel() calls in error handlers that precede a
  transaction cancel.

The only deferred ops calls that remain are those that are
non-deterministic with respect to the final commit of the associated
transaction or are open-coded due to special handling.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c     | 16 +--------
 fs/xfs/libxfs/xfs_refcount.c | 10 +-----
 fs/xfs/xfs_attr_inactive.c   |  2 --
 fs/xfs/xfs_bmap_util.c       | 43 ++++--------------------
 fs/xfs/xfs_dquot.c           |  4 ---
 fs/xfs/xfs_inode.c           | 79 ++++++++------------------------------------
 fs/xfs/xfs_iomap.c           | 26 ++-------------
 fs/xfs/xfs_iops.c            |  2 --
 fs/xfs/xfs_log_recover.c     |  8 -----
 fs/xfs/xfs_qm_syscalls.c     |  2 --
 fs/xfs/xfs_reflink.c         | 37 +++++++--------------
 fs/xfs/xfs_rtalloc.c         |  9 +----
 fs/xfs/xfs_symlink.c         | 38 +++++----------------
 13 files changed, 44 insertions(+), 232 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6bc0cdff488e..92cd064a2589 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1018,7 +1018,6 @@ xfs_bmap_add_attrfork(
 	int			size,		/* space new attribute needs */
 	int			rsvd)		/* xact may use reserved blks */
 {
-	struct xfs_defer_ops	dfops;		/* freed extent records */
 	xfs_mount_t		*mp;		/* mount structure */
 	xfs_trans_t		*tp;		/* transaction pointer */
 	int			blks;		/* space reservation */
@@ -1037,7 +1036,6 @@ xfs_bmap_add_attrfork(
 			rsvd ? XFS_TRANS_RESERVE : 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(tp, &dfops);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
@@ -1102,7 +1100,7 @@ xfs_bmap_add_attrfork(
 	if (logflags)
 		xfs_trans_log_inode(tp, ip, logflags);
 	if (error)
-		goto bmap_cancel;
+		goto trans_cancel;
 	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
 	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
 		bool log_sb = false;
@@ -1121,15 +1119,10 @@ xfs_bmap_add_attrfork(
 			xfs_log_sb(tp);
 	}
 
-	error = xfs_defer_finish(&tp, &dfops);
-	if (error)
-		goto bmap_cancel;
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 
-bmap_cancel:
-	xfs_defer_cancel(&dfops);
 trans_cancel:
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -5953,14 +5946,12 @@ xfs_bmap_split_extent(
 {
 	struct xfs_mount        *mp = ip->i_mount;
 	struct xfs_trans        *tp;
-	struct xfs_defer_ops    dfops;
 	int                     error;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(tp, &dfops);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -5969,14 +5960,9 @@ xfs_bmap_split_extent(
 	if (error)
 		goto out;
 
-	error = xfs_defer_finish(&tp, &dfops);
-	if (error)
-		goto out;
-
 	return xfs_trans_commit(tp);
 
 out:
-	xfs_defer_cancel(&dfops);
 	xfs_trans_cancel(tp);
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 9ef1f440a6f2..4cbc2efb099e 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1635,7 +1635,6 @@ xfs_refcount_recover_cow_leftovers(
 	struct list_head		debris;
 	union xfs_btree_irec		low;
 	union xfs_btree_irec		high;
-	struct xfs_defer_ops		dfops;
 	xfs_fsblock_t			fsb;
 	xfs_agblock_t			agbno;
 	int				error;
@@ -1691,22 +1690,17 @@ xfs_refcount_recover_cow_leftovers(
 		trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec);
 
 		/* Free the orphan record */
-		xfs_defer_init(tp, &dfops);
 		agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
 		fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
 		error = xfs_refcount_free_cow_extent(mp, tp->t_dfops, fsb,
 				rr->rr_rrec.rc_blockcount);
 		if (error)
-			goto out_defer;
+			goto out_trans;
 
 		/* Free the block. */
 		xfs_bmap_add_free(mp, tp->t_dfops, fsb,
 				rr->rr_rrec.rc_blockcount, NULL);
 
-		error = xfs_defer_finish(&tp, tp->t_dfops);
-		if (error)
-			goto out_defer;
-
 		error = xfs_trans_commit(tp);
 		if (error)
 			goto out_free;
@@ -1716,8 +1710,6 @@ xfs_refcount_recover_cow_leftovers(
 	}
 
 	return error;
-out_defer:
-	xfs_defer_cancel(tp->t_dfops);
 out_trans:
 	xfs_trans_cancel(tp);
 out_free:
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index d3055972d3a6..228821b2ebe0 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -382,7 +382,6 @@ xfs_attr_inactive(
 {
 	struct xfs_trans	*trans;
 	struct xfs_mount	*mp;
-	struct xfs_defer_ops	dfops;
 	int			lock_mode = XFS_ILOCK_SHARED;
 	int			error = 0;
 
@@ -399,7 +398,6 @@ xfs_attr_inactive(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrinval, 0, 0, 0, &trans);
 	if (error)
 		goto out_destroy_fork;
-	xfs_defer_init(trans, &dfops);
 
 	lock_mode = XFS_ILOCK_EXCL;
 	xfs_ilock(dp, lock_mode);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 1a4617c74c6a..ddb5f1200d3d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -792,7 +792,6 @@ xfs_free_eofblocks(
 	int			nimaps;
 	struct xfs_bmbt_irec	imap;
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_defer_ops	dfops;
 
 	/*
 	 * Figure out if there are any blocks beyond the end
@@ -832,7 +831,6 @@ xfs_free_eofblocks(
 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
 			return error;
 		}
-		xfs_defer_init(tp, &dfops);
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, 0);
@@ -880,7 +878,6 @@ xfs_alloc_file_space(
 	int			rt;
 	xfs_trans_t		*tp;
 	xfs_bmbt_irec_t		imaps[1], *imapp;
-	struct xfs_defer_ops	dfops;
 	uint			qblocks, resblks, resrtextents;
 	int			error;
 
@@ -973,7 +970,6 @@ xfs_alloc_file_space(
 
 		xfs_trans_ijoin(tp, ip, 0);
 
-		xfs_defer_init(tp, &dfops);
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
 					allocatesize_fsb, alloc_type, resblks,
 					imapp, &nimaps);
@@ -983,10 +979,6 @@ xfs_alloc_file_space(
 		/*
 		 * Complete the transaction
 		 */
-		error = xfs_defer_finish(&tp, tp->t_dfops);
-		if (error)
-			goto error0;
-
 		error = xfs_trans_commit(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
@@ -1005,8 +997,7 @@ xfs_alloc_file_space(
 
 	return error;
 
-error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
-	xfs_defer_cancel(&dfops);
+error0:	/* unlock inode, unreserve quota blocks, cancel trans */
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
 
 error1:	/* Just cancel transaction */
@@ -1024,7 +1015,6 @@ xfs_unmap_extent(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
-	struct xfs_defer_ops	dfops;
 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
 	int			error;
 
@@ -1042,23 +1032,17 @@ xfs_unmap_extent(
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops);
 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done);
 	if (error)
-		goto out_bmap_cancel;
+		goto out_trans_cancel;
 
 	xfs_defer_ijoin(tp->t_dfops, ip);
-	error = xfs_defer_finish(&tp, tp->t_dfops);
-	if (error)
-		goto out_bmap_cancel;
 
 	error = xfs_trans_commit(tp);
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 
-out_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 	goto out_unlock;
@@ -1310,7 +1294,6 @@ xfs_collapse_file_space(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	int			error;
-	struct xfs_defer_ops	dfops;
 	xfs_fileoff_t		next_fsb = XFS_B_TO_FSB(mp, offset + len);
 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
@@ -1343,22 +1326,16 @@ xfs_collapse_file_space(
 			goto out_trans_cancel;
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(tp, &dfops);
 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
 				&done);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 
-		error = xfs_defer_finish(&tp, tp->t_dfops);
-		if (error)
-			goto out_bmap_cancel;
 		error = xfs_trans_commit(tp);
 	}
 
 	return error;
 
-out_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 	return error;
@@ -1385,7 +1362,6 @@ xfs_insert_file_space(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	int			error;
-	struct xfs_defer_ops	dfops;
 	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, offset);
 	xfs_fileoff_t		next_fsb = NULLFSBLOCK;
 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
@@ -1421,22 +1397,17 @@ xfs_insert_file_space(
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-		xfs_defer_init(tp, &dfops);
 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
 				&done, stop_fsb);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 
-		error = xfs_defer_finish(&tp, tp->t_dfops);
-		if (error)
-			goto out_bmap_cancel;
 		error = xfs_trans_commit(tp);
 	}
 
 	return error;
 
-out_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
+out_trans_cancel:
 	xfs_trans_cancel(tp);
 	return error;
 }
@@ -1607,7 +1578,7 @@ xfs_swap_extent_rmap(
 
 		/* Unmap the old blocks in the source file. */
 		while (tirec.br_blockcount) {
-			xfs_defer_init(tp, tp->t_dfops);
+			ASSERT(tp->t_firstblock == NULLFSBLOCK);
 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
 
 			/* Read extent from the source file */
@@ -1831,7 +1802,6 @@ xfs_swap_extents(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
-	struct xfs_defer_ops	dfops;
 	struct xfs_bstat	*sbp = &sxp->sx_stat;
 	int			src_log_flags, target_log_flags;
 	int			error = 0;
@@ -1900,7 +1870,6 @@ xfs_swap_extents(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
 		goto out_unlock;
-	xfs_defer_init(tp, &dfops);
 
 	/*
 	 * Lock and join the inodes to the tansaction so that transaction commit
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index c53de34c9ae5..a57d5e8c3118 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -295,8 +295,6 @@ xfs_dquot_disk_alloc(
 
 	trace_xfs_dqalloc(dqp);
 
-	xfs_defer_init(tp, tp->t_dfops);
-
 	xfs_ilock(quotip, XFS_ILOCK_EXCL);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 		/*
@@ -538,7 +536,6 @@ xfs_qm_dqread_alloc(
 	struct xfs_buf		**bpp)
 {
 	struct xfs_trans	*tp;
-	struct xfs_defer_ops	dfops;
 	struct xfs_buf		*bp;
 	int			error;
 
@@ -546,7 +543,6 @@ xfs_qm_dqread_alloc(
 			XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
 	if (error)
 		goto err;
-	xfs_defer_init(tp, &dfops);
 
 	error = xfs_dquot_disk_alloc(&tp, dqp, &bp);
 	if (error)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7d7d7e95fa17..c47183a2f167 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1142,7 +1142,6 @@ xfs_create(
 	struct xfs_inode	*ip = NULL;
 	struct xfs_trans	*tp = NULL;
 	int			error;
-	struct xfs_defer_ops	dfops;
 	bool                    unlock_dp_on_error = false;
 	prid_t			prid;
 	struct xfs_dquot	*udqp = NULL;
@@ -1194,8 +1193,6 @@ xfs_create(
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
 	unlock_dp_on_error = true;
 
-	xfs_defer_init(tp, &dfops);
-
 	/*
 	 * Reserve disk quota and the inode.
 	 */
@@ -1236,11 +1233,11 @@ xfs_create(
 	if (is_dir) {
 		error = xfs_dir_init(tp, ip, dp);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 
 		error = xfs_bumplink(tp, dp);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 	}
 
 	/*
@@ -1258,10 +1255,6 @@ xfs_create(
 	 */
 	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
 
-	error = xfs_defer_finish(&tp, &dfops);
-	if (error)
-		goto out_bmap_cancel;
-
 	error = xfs_trans_commit(tp);
 	if (error)
 		goto out_release_inode;
@@ -1273,8 +1266,6 @@ xfs_create(
 	*ipp = ip;
 	return 0;
 
- out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
  out_trans_cancel:
 	xfs_trans_cancel(tp);
  out_release_inode:
@@ -1399,7 +1390,6 @@ xfs_link(
 	xfs_mount_t		*mp = tdp->i_mount;
 	xfs_trans_t		*tp;
 	int			error;
-	struct xfs_defer_ops	dfops;
 	int			resblks;
 
 	trace_xfs_link(tdp, target_name);
@@ -1448,8 +1438,6 @@ xfs_link(
 			goto error_return;
 	}
 
-	xfs_defer_init(tp, &dfops);
-
 	/*
 	 * Handle initial link state of O_TMPFILE inode
 	 */
@@ -1478,12 +1466,6 @@ xfs_link(
 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
 		xfs_trans_set_sync(tp);
 
-	error = xfs_defer_finish(&tp, &dfops);
-	if (error) {
-		xfs_defer_cancel(&dfops);
-		goto error_return;
-	}
-
 	return xfs_trans_commit(tp);
 
  error_return:
@@ -1719,7 +1701,6 @@ xfs_inactive_truncate(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
-	struct xfs_defer_ops	dfops;
 	int			error;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -1727,8 +1708,6 @@ xfs_inactive_truncate(
 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 		return error;
 	}
-	xfs_defer_init(tp, &dfops);
-
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
 
@@ -1769,7 +1748,6 @@ STATIC int
 xfs_inactive_ifree(
 	struct xfs_inode *ip)
 {
-	struct xfs_defer_ops	dfops;
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	int			error;
@@ -1806,7 +1784,6 @@ xfs_inactive_ifree(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops);
 	error = xfs_ifree(tp, ip);
 	if (error) {
 		/*
@@ -1833,12 +1810,6 @@ xfs_inactive_ifree(
 	 * Just ignore errors at this point.  There is nothing we can do except
 	 * to try to keep going. Make sure it's not a silent error.
 	 */
-	error = xfs_defer_finish(&tp, &dfops);
-	if (error) {
-		xfs_notice(mp, "%s: xfs_defer_finish returned error %d",
-			__func__, error);
-		xfs_defer_cancel(&dfops);
-	}
 	error = xfs_trans_commit(tp);
 	if (error)
 		xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
@@ -2569,7 +2540,6 @@ xfs_remove(
 	xfs_trans_t             *tp = NULL;
 	int			is_dir = S_ISDIR(VFS_I(ip)->i_mode);
 	int                     error = 0;
-	struct xfs_defer_ops	dfops;
 	uint			resblks;
 
 	trace_xfs_remove(dp, name);
@@ -2649,11 +2619,10 @@ xfs_remove(
 	if (error)
 		goto out_trans_cancel;
 
-	xfs_defer_init(tp, &dfops);
 	error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks);
 	if (error) {
 		ASSERT(error != -ENOENT);
-		goto out_bmap_cancel;
+		goto out_trans_cancel;
 	}
 
 	/*
@@ -2664,10 +2633,6 @@ xfs_remove(
 	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
 		xfs_trans_set_sync(tp);
 
-	error = xfs_defer_finish(&tp, &dfops);
-	if (error)
-		goto out_bmap_cancel;
-
 	error = xfs_trans_commit(tp);
 	if (error)
 		goto std_return;
@@ -2677,8 +2642,6 @@ xfs_remove(
 
 	return 0;
 
- out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
  out_trans_cancel:
 	xfs_trans_cancel(tp);
  std_return:
@@ -2740,9 +2703,6 @@ static int
 xfs_finish_rename(
 	struct xfs_trans	*tp)
 {
-	struct xfs_defer_ops	*dfops = tp->t_dfops;
-	int			error;
-
 	/*
 	 * If this is a synchronous mount, make sure that the rename transaction
 	 * goes to disk before returning to the user.
@@ -2750,13 +2710,6 @@ xfs_finish_rename(
 	if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
 		xfs_trans_set_sync(tp);
 
-	error = xfs_defer_finish(&tp, dfops);
-	if (error) {
-		xfs_defer_cancel(dfops);
-		xfs_trans_cancel(tp);
-		return error;
-	}
-
 	return xfs_trans_commit(tp);
 }
 
@@ -2869,7 +2822,6 @@ xfs_cross_rename(
 	return xfs_finish_rename(tp);
 
 out_trans_abort:
-	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_cancel(tp);
 	return error;
 }
@@ -2924,7 +2876,6 @@ xfs_rename(
 {
 	struct xfs_mount	*mp = src_dp->i_mount;
 	struct xfs_trans	*tp;
-	struct xfs_defer_ops	dfops;
 	struct xfs_inode	*wip = NULL;		/* whiteout inode */
 	struct xfs_inode	*inodes[__XFS_SORT_INODES];
 	int			num_inodes = __XFS_SORT_INODES;
@@ -3006,8 +2957,6 @@ xfs_rename(
 		goto out_trans_cancel;
 	}
 
-	xfs_defer_init(tp, &dfops);
-
 	/* RENAME_EXCHANGE is unique from here on. */
 	if (flags & RENAME_EXCHANGE)
 		return xfs_cross_rename(tp, src_dp, src_name, src_ip,
@@ -3035,7 +2984,7 @@ xfs_rename(
 		error = xfs_dir_createname(tp, target_dp, target_name,
 					   src_ip->i_ino, spaceres);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 
 		xfs_trans_ichgtime(tp, target_dp,
 					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3043,7 +2992,7 @@ xfs_rename(
 		if (new_parent && src_is_directory) {
 			error = xfs_bumplink(tp, target_dp);
 			if (error)
-				goto out_bmap_cancel;
+				goto out_trans_cancel;
 		}
 	} else { /* target_ip != NULL */
 		/*
@@ -3074,7 +3023,7 @@ xfs_rename(
 		error = xfs_dir_replace(tp, target_dp, target_name,
 					src_ip->i_ino, spaceres);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 
 		xfs_trans_ichgtime(tp, target_dp,
 					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3085,7 +3034,7 @@ xfs_rename(
 		 */
 		error = xfs_droplink(tp, target_ip);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 
 		if (src_is_directory) {
 			/*
@@ -3093,7 +3042,7 @@ xfs_rename(
 			 */
 			error = xfs_droplink(tp, target_ip);
 			if (error)
-				goto out_bmap_cancel;
+				goto out_trans_cancel;
 		}
 	} /* target_ip != NULL */
 
@@ -3109,7 +3058,7 @@ xfs_rename(
 					target_dp->i_ino, spaceres);
 		ASSERT(error != -EEXIST);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 	}
 
 	/*
@@ -3135,7 +3084,7 @@ xfs_rename(
 		 */
 		error = xfs_droplink(tp, src_dp);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 	}
 
 	/*
@@ -3150,7 +3099,7 @@ xfs_rename(
 		error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
 					   spaceres);
 	if (error)
-		goto out_bmap_cancel;
+		goto out_trans_cancel;
 
 	/*
 	 * For whiteouts, we need to bump the link count on the whiteout inode.
@@ -3164,10 +3113,10 @@ xfs_rename(
 		ASSERT(VFS_I(wip)->i_nlink == 0);
 		error = xfs_bumplink(tp, wip);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 		error = xfs_iunlink_remove(tp, wip);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 		xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
 
 		/*
@@ -3188,8 +3137,6 @@ xfs_rename(
 		IRELE(wip);
 	return error;
 
-out_bmap_cancel:
-	xfs_defer_cancel(&dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 out_release_wip:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 756694219f77..8e8ca9f03f0e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -157,7 +157,6 @@ xfs_iomap_write_direct(
 	int		quota_flag;
 	int		rt;
 	xfs_trans_t	*tp;
-	struct xfs_defer_ops dfops;
 	uint		qblocks, resblks, resrtextents;
 	int		error;
 	int		lockmode;
@@ -253,20 +252,15 @@ xfs_iomap_write_direct(
 	 * From this point onwards we overwrite the imap pointer that the
 	 * caller gave to us.
 	 */
-	xfs_defer_init(tp, &dfops);
 	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 				bmapi_flags, resblks, imap, &nimaps);
 	if (error)
-		goto out_bmap_cancel;
+		goto out_res_cancel;
 
 	/*
 	 * Complete the transaction
 	 */
-	error = xfs_defer_finish(&tp, tp->t_dfops);
-	if (error)
-		goto out_bmap_cancel;
-
 	error = xfs_trans_commit(tp);
 	if (error)
 		goto out_unlock;
@@ -286,8 +280,7 @@ out_unlock:
 	xfs_iunlock(ip, lockmode);
 	return error;
 
-out_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
+out_res_cancel:
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
@@ -663,7 +656,6 @@ xfs_iomap_write_allocate(
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb, last_block;
 	xfs_fileoff_t	end_fsb, map_start_fsb;
-	struct xfs_defer_ops	dfops;
 	xfs_filblks_t	count_fsb;
 	xfs_trans_t	*tp;
 	int		nimaps;
@@ -713,8 +705,6 @@ xfs_iomap_write_allocate(
 			xfs_ilock(ip, XFS_ILOCK_EXCL);
 			xfs_trans_ijoin(tp, ip, 0);
 
-			xfs_defer_init(tp, &dfops);
-
 			/*
 			 * it is possible that the extents have changed since
 			 * we did the read call as we dropped the ilock for a
@@ -772,10 +762,6 @@ xfs_iomap_write_allocate(
 			if (error)
 				goto trans_cancel;
 
-			error = xfs_defer_finish(&tp, tp->t_dfops);
-			if (error)
-				goto trans_cancel;
-
 			error = xfs_trans_commit(tp);
 			if (error)
 				goto error0;
@@ -806,7 +792,6 @@ xfs_iomap_write_allocate(
 	}
 
 trans_cancel:
-	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_cancel(tp);
 error0:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -827,7 +812,6 @@ xfs_iomap_write_unwritten(
 	int		nimaps;
 	xfs_trans_t	*tp;
 	xfs_bmbt_irec_t imap;
-	struct xfs_defer_ops dfops;
 	struct inode	*inode = VFS_I(ip);
 	xfs_fsize_t	i_size;
 	uint		resblks;
@@ -872,7 +856,6 @@ xfs_iomap_write_unwritten(
 		/*
 		 * Modify the unwritten extent state of the buffer.
 		 */
-		xfs_defer_init(tp, &dfops);
 		nimaps = 1;
 		error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
 					XFS_BMAPI_CONVERT, resblks, &imap,
@@ -896,10 +879,6 @@ xfs_iomap_write_unwritten(
 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 		}
 
-		error = xfs_defer_finish(&tp, tp->t_dfops);
-		if (error)
-			goto error_on_bmapi_transaction;
-
 		error = xfs_trans_commit(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
@@ -923,7 +902,6 @@ xfs_iomap_write_unwritten(
 	return 0;
 
 error_on_bmapi_transaction:
-	xfs_defer_cancel(tp->t_dfops);
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 704b57a8b99e..2eac22bfad6a 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -813,7 +813,6 @@ xfs_setattr_size(
 	struct inode		*inode = VFS_I(ip);
 	xfs_off_t		oldsize, newsize;
 	struct xfs_trans	*tp;
-	struct xfs_defer_ops	dfops;
 	int			error;
 	uint			lock_flags = 0;
 	bool			did_zeroing = false;
@@ -917,7 +916,6 @@ xfs_setattr_size(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 	if (error)
 		return error;
-	xfs_defer_init(tp, &dfops);
 
 	lock_flags |= XFS_ILOCK_EXCL;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 958e9b96dc6a..265e1f561157 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4857,15 +4857,7 @@ xlog_finish_defer_ops(
 	/* transfer all collected dfops to this transaction */
 	xfs_defer_move(tp->t_dfops, dfops);
 
-	error = xfs_defer_finish(&tp, tp->t_dfops);
-	if (error)
-		goto out_cancel;
-
 	return xfs_trans_commit(tp);
-
-out_cancel:
-	xfs_trans_cancel(tp);
-	return error;
 }
 
 /*
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index df0783303887..c07c5a39d516 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -214,7 +214,6 @@ xfs_qm_scall_trunc_qfile(
 {
 	struct xfs_inode	*ip;
 	struct xfs_trans	*tp;
-	struct xfs_defer_ops	dfops;
 	int			error;
 
 	if (ino == NULLFSINO)
@@ -231,7 +230,6 @@ xfs_qm_scall_trunc_qfile(
 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 		goto out_put;
 	}
-	xfs_defer_init(tp, &dfops);
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, 0);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 04c25ee6b6f8..a653739c9fb2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -364,7 +364,6 @@ xfs_reflink_allocate_cow(
 	xfs_fileoff_t		offset_fsb = imap->br_startoff;
 	xfs_filblks_t		count_fsb = imap->br_blockcount;
 	struct xfs_bmbt_irec	got;
-	struct xfs_defer_ops	dfops;
 	struct xfs_trans	*tp = NULL;
 	int			nimaps, error = 0;
 	bool			trimmed;
@@ -424,7 +423,6 @@ retry:
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	xfs_defer_init(tp, &dfops);
 	nimaps = 1;
 
 	/* Allocate the entire reservation as unwritten blocks. */
@@ -432,15 +430,11 @@ retry:
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
 			resblks, imap, &nimaps);
 	if (error)
-		goto out_bmap_cancel;
+		goto out_trans_cancel;
 
 	xfs_inode_set_cowblocks_tag(ip);
 
 	/* Finish up. */
-	error = xfs_defer_finish(&tp, tp->t_dfops);
-	if (error)
-		goto out_bmap_cancel;
-
 	error = xfs_trans_commit(tp);
 	if (error)
 		return error;
@@ -453,8 +447,7 @@ retry:
 		return -ENOSPC;
 convert:
 	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb);
-out_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
+out_trans_cancel:
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
 out:
@@ -624,7 +617,6 @@ xfs_reflink_end_cow(
 	struct xfs_trans		*tp;
 	xfs_fileoff_t			offset_fsb;
 	xfs_fileoff_t			end_fsb;
-	struct xfs_defer_ops		dfops;
 	int				error;
 	unsigned int			resblks;
 	xfs_filblks_t			rlen;
@@ -691,11 +683,11 @@ xfs_reflink_end_cow(
 			goto prev_extent;
 
 		/* Unmap the old blocks in the data fork. */
-		xfs_defer_init(tp, &dfops);
+		ASSERT(tp->t_dfops && tp->t_firstblock == NULLFSBLOCK);
 		rlen = del.br_blockcount;
 		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 
 		/* Trim the extent to whatever got unmapped. */
 		if (rlen) {
@@ -708,13 +700,13 @@ xfs_reflink_end_cow(
 		error = xfs_refcount_free_cow_extent(tp->t_mountp, tp->t_dfops,
 				del.br_startblock, del.br_blockcount);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 
 		/* Map the new blocks into the data fork. */
 		error = xfs_bmap_map_extent(tp->t_mountp, tp->t_dfops, ip,
 					    &del);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 
 		/* Charge this new data fork mapping to the on-disk quota. */
 		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
@@ -726,7 +718,7 @@ xfs_reflink_end_cow(
 		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 		if (!xfs_iext_get_extent(ifp, &icur, &got))
 			break;
 		continue;
@@ -741,8 +733,6 @@ prev_extent:
 		goto out;
 	return 0;
 
-out_defer:
-	xfs_defer_cancel(tp->t_dfops);
 out_cancel:
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -998,7 +988,6 @@ xfs_reflink_remap_extent(
 	bool			real_extent = xfs_bmap_is_real_extent(irec);
 	struct xfs_trans	*tp;
 	unsigned int		resblks;
-	struct xfs_defer_ops	dfops;
 	struct xfs_bmbt_irec	uirec;
 	xfs_filblks_t		rlen;
 	xfs_filblks_t		unmap_len;
@@ -1039,10 +1028,10 @@ xfs_reflink_remap_extent(
 	/* Unmap the old blocks in the data fork. */
 	rlen = unmap_len;
 	while (rlen) {
-		xfs_defer_init(tp, &dfops);
+		ASSERT(tp->t_dfops && tp->t_firstblock == NULLFSBLOCK);
 		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 
 		/*
 		 * Trim the extent to whatever got unmapped.
@@ -1063,12 +1052,12 @@ xfs_reflink_remap_extent(
 		/* Update the refcount tree */
 		error = xfs_refcount_increase_extent(mp, tp->t_dfops, &uirec);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 
 		/* Map the new blocks into the data fork. */
 		error = xfs_bmap_map_extent(mp, tp->t_dfops, ip, &uirec);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 
 		/* Update quota accounting. */
 		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
@@ -1090,7 +1079,7 @@ next_extent:
 		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_defer_finish(&tp, tp->t_dfops);
 		if (error)
-			goto out_defer;
+			goto out_cancel;
 	}
 
 	error = xfs_trans_commit(tp);
@@ -1099,8 +1088,6 @@ next_extent:
 		goto out;
 	return 0;
 
-out_defer:
-	xfs_defer_cancel(tp->t_dfops);
 out_cancel:
 	xfs_trans_cancel(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index bc471d42a968..86d7d2f76226 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -761,7 +761,6 @@ xfs_growfs_rt_alloc(
 	struct xfs_buf		*bp;	/* temporary buffer for zeroing */
 	xfs_daddr_t		d;		/* disk block address */
 	int			error;		/* error return value */
-	struct xfs_defer_ops	dfops;		/* list of freed blocks */
 	xfs_fsblock_t		fsbno;		/* filesystem block for bno */
 	struct xfs_bmbt_irec	map;		/* block map output */
 	int			nmap;		/* number of block maps */
@@ -786,7 +785,6 @@ xfs_growfs_rt_alloc(
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-		xfs_defer_init(tp, &dfops);
 		/*
 		 * Allocate blocks to the bitmap file.
 		 */
@@ -797,13 +795,10 @@ xfs_growfs_rt_alloc(
 		if (!error && nmap < 1)
 			error = -ENOSPC;
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 		/*
 		 * Free any blocks freed up in the transaction, then commit.
 		 */
-		error = xfs_defer_finish(&tp, tp->t_dfops);
-		if (error)
-			goto out_bmap_cancel;
 		error = xfs_trans_commit(tp);
 		if (error)
 			return error;
@@ -853,8 +848,6 @@ xfs_growfs_rt_alloc(
 
 	return 0;
 
-out_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 	return error;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index d1ab0afa2723..ce801aedbcdc 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -163,7 +163,6 @@ xfs_symlink(
 	struct xfs_inode	*ip = NULL;
 	int			error = 0;
 	int			pathlen;
-	struct xfs_defer_ops	dfops;
 	bool                    unlock_dp_on_error = false;
 	xfs_fileoff_t		first_fsb;
 	xfs_filblks_t		fs_blocks;
@@ -241,12 +240,6 @@ xfs_symlink(
 	if (error)
 		goto out_trans_cancel;
 
-	/*
-	 * Initialize the bmap freelist prior to calling either
-	 * bmapi or the directory create code.
-	 */
-	xfs_defer_init(tp, &dfops);
-
 	/*
 	 * Allocate an inode for the symlink.
 	 */
@@ -290,7 +283,7 @@ xfs_symlink(
 		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
 				  XFS_BMAPI_METADATA, resblks, mval, &nmaps);
 		if (error)
-			goto out_bmap_cancel;
+			goto out_trans_cancel;
 
 		if (resblks)
 			resblks -= fs_blocks;
@@ -308,7 +301,7 @@ xfs_symlink(
 					       BTOBB(byte_cnt), 0);
 			if (!bp) {
 				error = -ENOMEM;
-				goto out_bmap_cancel;
+				goto out_trans_cancel;
 			}
 			bp->b_ops = &xfs_symlink_buf_ops;
 
@@ -337,7 +330,7 @@ xfs_symlink(
 	 */
 	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks);
 	if (error)
-		goto out_bmap_cancel;
+		goto out_trans_cancel;
 	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
@@ -350,10 +343,6 @@ xfs_symlink(
 		xfs_trans_set_sync(tp);
 	}
 
-	error = xfs_defer_finish(&tp, tp->t_dfops);
-	if (error)
-		goto out_bmap_cancel;
-
 	error = xfs_trans_commit(tp);
 	if (error)
 		goto out_release_inode;
@@ -365,8 +354,6 @@ xfs_symlink(
 	*ipp = ip;
 	return 0;
 
-out_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 out_release_inode:
@@ -399,7 +386,6 @@ xfs_inactive_symlink_rmt(
 	xfs_buf_t	*bp;
 	int		done;
 	int		error;
-	struct xfs_defer_ops	dfops;
 	int		i;
 	xfs_mount_t	*mp;
 	xfs_bmbt_irec_t	mval[XFS_SYMLINK_MAPS];
@@ -438,7 +424,6 @@ xfs_inactive_symlink_rmt(
 	 * Find the block(s) so we can inval and unmap them.
 	 */
 	done = 0;
-	xfs_defer_init(tp, &dfops);
 	nmaps = ARRAY_SIZE(mval);
 	error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
 				mval, &nmaps, 0);
@@ -453,7 +438,7 @@ xfs_inactive_symlink_rmt(
 			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
 		if (!bp) {
 			error = -ENOMEM;
-			goto error_bmap_cancel;
+			goto error_trans_cancel;
 		}
 		xfs_trans_binval(tp, bp);
 	}
@@ -462,19 +447,14 @@ xfs_inactive_symlink_rmt(
 	 */
 	error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, &done);
 	if (error)
-		goto error_bmap_cancel;
+		goto error_trans_cancel;
 	ASSERT(done);
-	/*
-	 * Commit the first transaction.  This logs the EFI and the inode.
-	 */
-	xfs_defer_ijoin(tp->t_dfops, ip);
-	error = xfs_defer_finish(&tp, tp->t_dfops);
-	if (error)
-		goto error_bmap_cancel;
 
 	/*
-	 * Commit the transaction containing extent freeing and EFDs.
+	 * Commit the transaction. This first logs the EFI and the inode, then
+	 * rolls and commits the transaction that frees the extents.
 	 */
+	xfs_defer_ijoin(tp->t_dfops, ip);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	error = xfs_trans_commit(tp);
 	if (error) {
@@ -492,8 +472,6 @@ xfs_inactive_symlink_rmt(
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return 0;
 
-error_bmap_cancel:
-	xfs_defer_cancel(tp->t_dfops);
 error_trans_cancel:
 	xfs_trans_cancel(tp);
 error_unlock:
-- 
cgit v1.2.3


From d5cca7eb244d276177a57e42494d479742bbba37 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:14 -0700
Subject: xfs: remove unnecessary dfops init calls in xattr code

Each xfs_defer_init() call in the xattr code uses the internal dfops
reference. In addition, a successful xfs_defer_finish() always
returns with a reset xfs_defer_ops structure.

Given that along with the fact that every xfs_defer_init() call in
the xattr code is followed up by an xfs_defer_finish(), the former
calls are no longer necessary and can be removed.

Note that the xfs_defer_init() call in the remote value copy loop of
xfs_attr_rmtval_set() is not followed by a finish, but the dfops is
unused in this instance.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 8 --------
 fs/xfs/libxfs/xfs_attr_remote.c | 3 ---
 2 files changed, 11 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 66a22c80a0db..3e98f0af389c 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -587,7 +587,6 @@ xfs_attr_leaf_addname(
 		 * Commit that transaction so that the node_addname() call
 		 * can manage its own transactions.
 		 */
-		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_attr3_leaf_to_node(args);
 		if (error)
 			goto out_defer_cancel;
@@ -676,7 +675,6 @@ xfs_attr_leaf_addname(
 		 * If the result is small enough, shrink it all into the inode.
 		 */
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
@@ -741,7 +739,6 @@ xfs_attr_leaf_removename(
 	 * If the result is small enough, shrink it all into the inode.
 	 */
 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
@@ -870,7 +867,6 @@ restart:
 			 */
 			xfs_da_state_free(state);
 			state = NULL;
-			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_attr3_leaf_to_node(args);
 			if (error)
 				goto out_defer_cancel;
@@ -897,7 +893,6 @@ restart:
 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
 		 */
-		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_da3_split(state);
 		if (error)
 			goto out_defer_cancel;
@@ -995,7 +990,6 @@ restart:
 		 * Check to see if the tree needs to be collapsed.
 		 */
 		if (retval && (state->path.active > 1)) {
-			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_da3_join(state);
 			if (error)
 				goto out_defer_cancel;
@@ -1120,7 +1114,6 @@ xfs_attr_node_removename(
 	 * Check to see if the tree needs to be collapsed.
 	 */
 	if (retval && (state->path.active > 1)) {
-		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_da3_join(state);
 		if (error)
 			goto out_defer_cancel;
@@ -1152,7 +1145,6 @@ xfs_attr_node_removename(
 			goto out;
 
 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-			xfs_defer_init(args->trans, args->trans->t_dfops);
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 829ab20f0cd7..0fbfb740949e 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -480,7 +480,6 @@ xfs_attr_rmtval_set(
 		 * extent and then crash then the block may not contain the
 		 * correct metadata after log recovery occurs.
 		 */
-		xfs_defer_init(args->trans, args->trans->t_dfops);
 		nmap = 1;
 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
 				  blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
@@ -522,7 +521,6 @@ xfs_attr_rmtval_set(
 
 		ASSERT(blkcnt > 0);
 
-		xfs_defer_init(args->trans, args->trans->t_dfops);
 		nmap = 1;
 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
 				       blkcnt, &map, &nmap,
@@ -625,7 +623,6 @@ xfs_attr_rmtval_remove(
 	blkcnt = args->rmtblkcnt;
 	done = 0;
 	while (!done) {
-		xfs_defer_init(args->trans, args->trans->t_dfops);
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 				    XFS_BMAPI_ATTRFORK, 1, &done);
 		if (error)
-- 
cgit v1.2.3


From 9e28a242be65b8274742425ca5d146f366205a90 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:15 -0700
Subject: xfs: drop unnecessary xfs_defer_finish() dfops parameter

Every caller of xfs_defer_finish() now passes the transaction and
its associated ->t_dfops. The xfs_defer_ops parameter is therefore
no longer necessary and can be removed.

Since most xfs_defer_finish() callers also have to consider
xfs_defer_cancel() on error, update the latter to also receive the
transaction for consistency. The log recovery code contains an
outlier case that cancels a dfops directly without an available
transaction. Retain an internal wrapper to support this outlier case
for the time being.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 27 +++++++++++++--------------
 fs/xfs/libxfs/xfs_attr_remote.c |  8 ++++----
 fs/xfs/libxfs/xfs_defer.c       |  7 +++----
 fs/xfs/libxfs/xfs_defer.h       |  4 ++--
 fs/xfs/xfs_bmap_util.c          |  4 ++--
 fs/xfs/xfs_dquot.c              |  4 ++--
 fs/xfs/xfs_inode.c              |  4 ++--
 fs/xfs/xfs_log_recover.c        |  2 +-
 fs/xfs/xfs_reflink.c            |  8 ++++----
 fs/xfs/xfs_trans.c              | 13 ++++++++++---
 fs/xfs/xfs_trans.h              |  3 +++
 11 files changed, 46 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 3e98f0af389c..3deb5cdadf08 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -322,7 +322,7 @@ xfs_attr_set(
 		xfs_trans_bhold(args.trans, leaf_bp);
 		xfs_defer_bjoin(args.trans->t_dfops, leaf_bp);
 		xfs_defer_ijoin(args.trans->t_dfops, dp);
-		error = xfs_defer_finish(&args.trans, args.trans->t_dfops);
+		error = xfs_defer_finish(&args.trans);
 		if (error)
 			goto out;
 
@@ -591,7 +591,7 @@ xfs_attr_leaf_addname(
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
 
@@ -680,7 +680,7 @@ xfs_attr_leaf_addname(
 			if (error)
 				goto out_defer_cancel;
 			xfs_defer_ijoin(args->trans->t_dfops, dp);
-			error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
 		}
@@ -698,7 +698,7 @@ xfs_attr_leaf_addname(
 	}
 	return error;
 out_defer_cancel:
-	xfs_defer_cancel(args->trans->t_dfops);
+	xfs_defer_cancel(args->trans);
 	return error;
 }
 
@@ -744,13 +744,13 @@ xfs_attr_leaf_removename(
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
 	}
 	return 0;
 out_defer_cancel:
-	xfs_defer_cancel(args->trans->t_dfops);
+	xfs_defer_cancel(args->trans);
 	return error;
 }
 
@@ -871,8 +871,7 @@ restart:
 			if (error)
 				goto out_defer_cancel;
 			xfs_defer_ijoin(args->trans->t_dfops, dp);
-			error = xfs_defer_finish(&args->trans,
-						 args->trans->t_dfops);
+			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
 
@@ -897,7 +896,7 @@ restart:
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
 	} else {
@@ -994,7 +993,7 @@ restart:
 			if (error)
 				goto out_defer_cancel;
 			xfs_defer_ijoin(args->trans->t_dfops, dp);
-			error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
 		}
@@ -1023,7 +1022,7 @@ out:
 		return error;
 	return retval;
 out_defer_cancel:
-	xfs_defer_cancel(args->trans->t_dfops);
+	xfs_defer_cancel(args->trans);
 	goto out;
 }
 
@@ -1118,7 +1117,7 @@ xfs_attr_node_removename(
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
 		/*
@@ -1150,7 +1149,7 @@ xfs_attr_node_removename(
 			if (error)
 				goto out_defer_cancel;
 			xfs_defer_ijoin(args->trans->t_dfops, dp);
-			error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
 		} else
@@ -1162,7 +1161,7 @@ out:
 	xfs_da_state_free(state);
 	return error;
 out_defer_cancel:
-	xfs_defer_cancel(args->trans->t_dfops);
+	xfs_defer_cancel(args->trans);
 	goto out;
 }
 
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 0fbfb740949e..77ca38586913 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -487,7 +487,7 @@ xfs_attr_rmtval_set(
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, dp);
-		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
 
@@ -555,7 +555,7 @@ xfs_attr_rmtval_set(
 	ASSERT(valuelen == 0);
 	return 0;
 out_defer_cancel:
-	xfs_defer_cancel(args->trans->t_dfops);
+	xfs_defer_cancel(args->trans);
 	return error;
 }
 
@@ -628,7 +628,7 @@ xfs_attr_rmtval_remove(
 		if (error)
 			goto out_defer_cancel;
 		xfs_defer_ijoin(args->trans->t_dfops, args->dp);
-		error = xfs_defer_finish(&args->trans, args->trans->t_dfops);
+		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
 
@@ -641,6 +641,6 @@ xfs_attr_rmtval_remove(
 	}
 	return 0;
 out_defer_cancel:
-	xfs_defer_cancel(args->trans->t_dfops);
+	xfs_defer_cancel(args->trans);
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index b63cc9e730da..cbee0a86c978 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -342,9 +342,9 @@ xfs_defer_reset(
  */
 int
 xfs_defer_finish(
-	struct xfs_trans		**tp,
-	struct xfs_defer_ops		*dop)
+	struct xfs_trans		**tp)
 {
+	struct xfs_defer_ops		*dop = (*tp)->t_dfops;
 	struct xfs_defer_pending	*dfp;
 	struct list_head		*li;
 	struct list_head		*n;
@@ -353,7 +353,6 @@ xfs_defer_finish(
 	void				(*cleanup_fn)(struct xfs_trans *, void *, int);
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
-	ASSERT((*tp)->t_dfops == dop);
 
 	trace_xfs_defer_finish((*tp)->t_mountp, dop, _RET_IP_);
 
@@ -454,7 +453,7 @@ out:
  * Free up any items left in the list.
  */
 void
-xfs_defer_cancel(
+__xfs_defer_cancel(
 	struct xfs_defer_ops		*dop)
 {
 	struct xfs_defer_pending	*dfp;
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 35507ca9a148..56f927803940 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -48,8 +48,8 @@ enum xfs_defer_ops_type {
 
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
 		struct list_head *h);
-int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop);
-void xfs_defer_cancel(struct xfs_defer_ops *dop);
+int xfs_defer_finish(struct xfs_trans **tp);
+void __xfs_defer_cancel(struct xfs_defer_ops *dop);
 void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index ddb5f1200d3d..c32ec17048f5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1624,7 +1624,7 @@ xfs_swap_extent_rmap(
 				goto out_defer;
 
 			xfs_defer_ijoin(tp->t_dfops, ip);
-			error = xfs_defer_finish(tpp, tp->t_dfops);
+			error = xfs_defer_finish(tpp);
 			tp = *tpp;
 			if (error)
 				goto out_defer;
@@ -1645,7 +1645,7 @@ xfs_swap_extent_rmap(
 	return 0;
 
 out_defer:
-	xfs_defer_cancel(tp->t_dfops);
+	xfs_defer_cancel(tp);
 out:
 	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
 	tip->i_d.di_flags2 = tip_flags2;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a57d5e8c3118..da5c55cec966 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -368,7 +368,7 @@ xfs_dquot_disk_alloc(
 		xfs_trans_brelse(tp, bp);
 		goto error1;
 	}
-	error = xfs_defer_finish(tpp, tp->t_dfops);
+	error = xfs_defer_finish(tpp);
 	tp = *tpp;
 	if (error) {
 		xfs_buf_relse(bp);
@@ -378,7 +378,7 @@ xfs_dquot_disk_alloc(
 	return 0;
 
 error1:
-	xfs_defer_cancel(tp->t_dfops);
+	xfs_defer_cancel(tp);
 error0:
 	return error;
 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c47183a2f167..0e4bd559a6a7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1570,7 +1570,7 @@ xfs_itruncate_extents_flags(
 		 * reservation and commit the old transaction.
 		 */
 		xfs_defer_ijoin(tp->t_dfops, ip);
-		error = xfs_defer_finish(&tp, tp->t_dfops);
+		error = xfs_defer_finish(&tp);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -1606,7 +1606,7 @@ out_bmap_cancel:
 	 * the transaction can be properly aborted.  We just need to make sure
 	 * we're not holding any resources that we were not when we came in.
 	 */
-	xfs_defer_cancel(tp->t_dfops);
+	xfs_defer_cancel(tp);
 	goto out;
 }
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 265e1f561157..94908a4019e1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4946,7 +4946,7 @@ out:
 	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->ail_lock);
 	if (error)
-		xfs_defer_cancel(&dfops);
+		__xfs_defer_cancel(&dfops);
 	else
 		error = xlog_finish_defer_ops(log->l_mp, &dfops);
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index a653739c9fb2..68b6921dc3f6 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -518,9 +518,9 @@ xfs_reflink_cancel_cow_blocks(
 
 			/* Roll the transaction */
 			xfs_defer_ijoin((*tpp)->t_dfops, ip);
-			error = xfs_defer_finish(tpp, (*tpp)->t_dfops);
+			error = xfs_defer_finish(tpp);
 			if (error) {
-				xfs_defer_cancel((*tpp)->t_dfops);
+				xfs_defer_cancel(*tpp);
 				break;
 			}
 
@@ -716,7 +716,7 @@ xfs_reflink_end_cow(
 		xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
 
 		xfs_defer_ijoin(tp->t_dfops, ip);
-		error = xfs_defer_finish(&tp, tp->t_dfops);
+		error = xfs_defer_finish(&tp);
 		if (error)
 			goto out_cancel;
 		if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -1077,7 +1077,7 @@ xfs_reflink_remap_extent(
 next_extent:
 		/* Process all the deferred stuff. */
 		xfs_defer_ijoin(tp->t_dfops, ip);
-		error = xfs_defer_finish(&tp, tp->t_dfops);
+		error = xfs_defer_finish(&tp);
 		if (error)
 			goto out_cancel;
 	}
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 412c8d236c71..cd553aa9ecb0 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -933,9 +933,9 @@ __xfs_trans_commit(
 
 	/* finish deferred items on final commit */
 	if (!regrant && tp->t_dfops) {
-		error = xfs_defer_finish(&tp, tp->t_dfops);
+		error = xfs_defer_finish(&tp);
 		if (error) {
-			xfs_defer_cancel(tp->t_dfops);
+			xfs_defer_cancel(tp);
 			goto out_unreserve;
 		}
 	}
@@ -1030,7 +1030,7 @@ xfs_trans_cancel(
 	trace_xfs_trans_cancel(tp, _RET_IP_);
 
 	if (tp->t_dfops)
-		xfs_defer_cancel(tp->t_dfops);
+		xfs_defer_cancel(tp);
 
 	/*
 	 * See if the caller is relying on us to shut down the
@@ -1111,3 +1111,10 @@ xfs_trans_roll(
 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
 	return xfs_trans_reserve(*tpp, &tres, 0, 0);
 }
+
+void
+xfs_defer_cancel(
+	struct xfs_trans	*tp)
+{
+	__xfs_defer_cancel(tp->t_dfops);
+}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index dc79e3c1d3e8..5170e89bec02 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -214,6 +214,9 @@ xfs_trans_read_buf(
 				      flags, bpp, ops);
 }
 
+/* cancel dfops associated with a transaction */
+void xfs_defer_cancel(struct xfs_trans *);
+
 struct xfs_buf	*xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
 
 void		xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
-- 
cgit v1.2.3


From b277c37f43dd387f7430a2186deda0e58c943087 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 24 Jul 2018 13:43:15 -0700
Subject: xfs: bypass final dfops roll in trans commit path

Once xfs_defer_finish() has completed all deferred operations, it
checks the dirty state of the transaction and rolls it once more to
return a clean transaction for the caller. This primarily to cover
the case where repeated xfs_defer_finish() calls are made in a loop
and we need to make sure that the caller starts the next iteration
with a clean transaction. Otherwise we risk transaction reservation
overrun.

This final transaction roll is not required in the transaction
commit path, however, because the transaction is immediately
committed and freed after dfops completion. Refactor the final roll
into a separate helper such that we can avoid it in the transaction
commit path.  Lift the dfops reset as well so dfops remains valid
until after the last call to xfs_defer_trans_roll(). The reset is
also unnecessary in the transaction commit path because the
transaction is about to complete.

This eliminates unnecessary regrants of transactions where the
associated transaction roll can be replaced by a transaction commit.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 38 +++++++++++++++++++++++++-------------
 fs/xfs/libxfs/xfs_defer.h |  1 +
 fs/xfs/xfs_trans.c        |  2 +-
 3 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index cbee0a86c978..a5f7dc18a62f 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -341,7 +341,7 @@ xfs_defer_reset(
  * If an inode is provided, relog it to the new transaction.
  */
 int
-xfs_defer_finish(
+xfs_defer_finish_noroll(
 	struct xfs_trans		**tp)
 {
 	struct xfs_defer_ops		*dop = (*tp)->t_dfops;
@@ -430,25 +430,37 @@ xfs_defer_finish(
 			cleanup_fn(*tp, state, error);
 	}
 
-	/*
-	 * Roll the transaction once more to avoid returning to the caller
-	 * with a dirty transaction.
-	 */
-	if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
-		error = xfs_defer_trans_roll(tp);
-		dop = (*tp)->t_dfops;
-	}
 out:
-	if (error) {
+	if (error)
 		trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
-	} else {
+	 else
 		trace_xfs_defer_finish_done((*tp)->t_mountp, dop, _RET_IP_);
-		xfs_defer_reset(dop);
-	}
 
 	return error;
 }
 
+int
+xfs_defer_finish(
+	struct xfs_trans	**tp)
+{
+	int			error;
+
+	/*
+	 * Finish and roll the transaction once more to avoid returning to the
+	 * caller with a dirty transaction.
+	 */
+	error = xfs_defer_finish_noroll(tp);
+	if (error)
+		return error;
+	if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
+		error = xfs_defer_trans_roll(tp);
+		if (error)
+			return error;
+	}
+	xfs_defer_reset((*tp)->t_dfops);
+	return 0;
+}
+
 /*
  * Free up any items left in the list.
  */
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 56f927803940..85c41fe4dbae 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -48,6 +48,7 @@ enum xfs_defer_ops_type {
 
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
 		struct list_head *h);
+int xfs_defer_finish_noroll(struct xfs_trans **tp);
 int xfs_defer_finish(struct xfs_trans **tp);
 void __xfs_defer_cancel(struct xfs_defer_ops *dop);
 void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cd553aa9ecb0..7bf5c1202719 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -933,7 +933,7 @@ __xfs_trans_commit(
 
 	/* finish deferred items on final commit */
 	if (!regrant && tp->t_dfops) {
-		error = xfs_defer_finish(&tp);
+		error = xfs_defer_finish_noroll(&tp);
 		if (error) {
 			xfs_defer_cancel(tp);
 			goto out_unreserve;
-- 
cgit v1.2.3


From 89c3e8cf3c266d61347abcc412b9330d81da794b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 25 Jul 2018 12:51:48 -0700
Subject: xfs: kill IHOLD

Nobody uses this macro, get rid of it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/xfs_inode.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b1f0e8394f3b..c43abf4ea3fc 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -483,13 +483,6 @@ static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
 	xfs_finish_inode_setup(ip);
 }
 
-#define IHOLD(ip) \
-do { \
-	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
-	ihold(VFS_I(ip)); \
-	trace_xfs_ihold(ip, _THIS_IP_); \
-} while (0)
-
 #define IRELE(ip) \
 do { \
 	trace_xfs_irele(ip, _THIS_IP_); \
-- 
cgit v1.2.3


From 44a8736bd20a08e1adbf479d11f8198a1243958d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 25 Jul 2018 12:52:32 -0700
Subject: xfs: clean up IRELE/iput callsites

Replace the IRELE macro with a proper function so that we can do proper
typechecking and so that we can stop open-coding iput in scrub, which
means that we'll be able to ftrace inode lifetimes going through scrub
correctly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/common.c    |  2 +-
 fs/xfs/scrub/dir.c       |  2 +-
 fs/xfs/scrub/parent.c    |  6 +++---
 fs/xfs/scrub/scrub.c     |  2 +-
 fs/xfs/xfs_bmap_item.c   |  4 ++--
 fs/xfs/xfs_export.c      |  2 +-
 fs/xfs/xfs_filestream.c  |  4 ++--
 fs/xfs/xfs_icache.c      |  4 ++--
 fs/xfs/xfs_inode.c       | 17 +++++++++++++----
 fs/xfs/xfs_inode.h       |  6 +-----
 fs/xfs/xfs_iops.c        |  4 ++--
 fs/xfs/xfs_itable.c      |  2 +-
 fs/xfs/xfs_log_recover.c |  4 ++--
 fs/xfs/xfs_mount.c       |  4 ++--
 fs/xfs/xfs_qm.c          | 22 +++++++++++-----------
 fs/xfs/xfs_qm_syscalls.c |  8 ++++----
 fs/xfs/xfs_quotaops.c    |  2 +-
 fs/xfs/xfs_rtalloc.c     |  6 +++---
 fs/xfs/xfs_symlink.c     |  2 +-
 19 files changed, 54 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index baac08304a5a..346b02abccf7 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -711,7 +711,7 @@ xchk_get_inode(
 		return error;
 	}
 	if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
-		iput(VFS_I(ip));
+		xfs_irele(ip);
 		return -ENOENT;
 	}
 
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index f58709052b03..cd3e4d768a18 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -87,7 +87,7 @@ xchk_dir_check_ftype(
 			xfs_mode_to_ftype(VFS_I(ip)->i_mode));
 	if (ino_dtype != dtype)
 		xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
-	iput(VFS_I(ip));
+	xfs_irele(ip);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index aacb0284c48a..1c9d7c7f64f5 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -230,11 +230,11 @@ xchk_parent_validate(
 
 	/* Drat, parent changed.  Try again! */
 	if (dnum != dp->i_ino) {
-		iput(VFS_I(dp));
+		xfs_irele(dp);
 		*try_again = true;
 		return 0;
 	}
-	iput(VFS_I(dp));
+	xfs_irele(dp);
 
 	/*
 	 * '..' didn't change, so check that there was only one entry
@@ -247,7 +247,7 @@ xchk_parent_validate(
 out_unlock:
 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
 out_rele:
-	iput(VFS_I(dp));
+	xfs_irele(dp);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 5956b8073e2f..6efb926f3cf8 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -183,7 +183,7 @@ xchk_teardown(
 			xfs_iunlock(sc->ip, sc->ilock_flags);
 		if (sc->ip != ip_in &&
 		    !xfs_internal_inum(sc->mp, sc->ip->i_ino))
-			iput(VFS_I(sc->ip));
+			xfs_irele(sc->ip);
 		sc->ip = NULL;
 	}
 	if (sc->has_quotaofflock)
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index bc5eb2e0ab0c..e1d6c127b07d 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -497,7 +497,7 @@ xfs_bui_recover(
 	xfs_defer_move(dfops, tp->t_dfops);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	IRELE(ip);
+	xfs_irele(ip);
 
 	return error;
 
@@ -506,7 +506,7 @@ err_inode:
 	xfs_trans_cancel(tp);
 	if (ip) {
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		IRELE(ip);
+		xfs_irele(ip);
 	}
 	return error;
 }
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 3cf4682e2510..f2284ceb129f 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -150,7 +150,7 @@ xfs_nfs_get_inode(
 	}
 
 	if (VFS_I(ip)->i_generation != generation) {
-		IRELE(ip);
+		xfs_irele(ip);
 		return ERR_PTR(-ESTALE);
 	}
 
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 56a3999cefae..212173c62588 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -340,7 +340,7 @@ xfs_filestream_lookup_ag(
 	if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
 		ag = NULLAGNUMBER;
 out:
-	IRELE(pip);
+	xfs_irele(pip);
 	return ag;
 }
 
@@ -389,7 +389,7 @@ xfs_filestream_new_ag(
 	if (mru)
 		xfs_fstrm_free_func(mp, mru);
 
-	IRELE(pip);
+	xfs_irele(pip);
 exit:
 	if (*agp == NULLAGNUMBER)
 		*agp = 0;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 47f417d20a30..8de94ecd73ae 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -716,7 +716,7 @@ xfs_icache_inode_is_allocated(
 		return error;
 
 	*inuse = !!(VFS_I(ip)->i_mode);
-	IRELE(ip);
+	xfs_irele(ip);
 	return 0;
 }
 
@@ -856,7 +856,7 @@ restart:
 			    xfs_iflags_test(batch[i], XFS_INEW))
 				xfs_inew_wait(batch[i]);
 			error = execute(batch[i], flags, args);
-			IRELE(batch[i]);
+			xfs_irele(batch[i]);
 			if (error == -EAGAIN) {
 				skipped++;
 				continue;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0e4bd559a6a7..64c694d2b2a5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1276,7 +1276,7 @@ xfs_create(
 	 */
 	if (ip) {
 		xfs_finish_inode_setup(ip);
-		IRELE(ip);
+		xfs_irele(ip);
 	}
 
 	xfs_qm_dqrele(udqp);
@@ -1371,7 +1371,7 @@ xfs_create_tmpfile(
 	 */
 	if (ip) {
 		xfs_finish_inode_setup(ip);
-		IRELE(ip);
+		xfs_irele(ip);
 	}
 
 	xfs_qm_dqrele(udqp);
@@ -3134,14 +3134,14 @@ xfs_rename(
 
 	error = xfs_finish_rename(tp);
 	if (wip)
-		IRELE(wip);
+		xfs_irele(wip);
 	return error;
 
 out_trans_cancel:
 	xfs_trans_cancel(tp);
 out_release_wip:
 	if (wip)
-		IRELE(wip);
+		xfs_irele(wip);
 	return error;
 }
 
@@ -3597,3 +3597,12 @@ xfs_iflush_int(
 corrupt_out:
 	return -EFSCORRUPTED;
 }
+
+/* Release an inode. */
+void
+xfs_irele(
+	struct xfs_inode	*ip)
+{
+	trace_xfs_irele(ip, _RET_IP_);
+	iput(VFS_I(ip));
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c43abf4ea3fc..8db34d6f2835 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -483,11 +483,7 @@ static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
 	xfs_finish_inode_setup(ip);
 }
 
-#define IRELE(ip) \
-do { \
-	trace_xfs_irele(ip, _THIS_IP_); \
-	iput(VFS_I(ip)); \
-} while (0)
+void xfs_irele(struct xfs_inode *ip);
 
 extern struct kmem_zone	*xfs_inode_zone;
 
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 2eac22bfad6a..0ef5ad7fb851 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -209,7 +209,7 @@ xfs_generic_create(
 	xfs_finish_inode_setup(ip);
 	if (!tmpfile)
 		xfs_cleanup_inode(dir, inode, dentry);
-	iput(inode);
+	xfs_irele(ip);
 	goto out_free_acl;
 }
 
@@ -391,7 +391,7 @@ xfs_vn_symlink(
  out_cleanup_inode:
 	xfs_finish_inode_setup(cip);
 	xfs_cleanup_inode(dir, inode, dentry);
-	iput(inode);
+	xfs_irele(cip);
  out:
 	return error;
 }
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 65810827a8d0..e9508ba01ed1 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -114,7 +114,7 @@ xfs_bulkstat_one_int(
 		break;
 	}
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	IRELE(ip);
+	xfs_irele(ip);
 
 	error = formatter(buffer, ubsize, ubused, buf);
 	if (!error)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 94908a4019e1..7776fde9430c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -5087,11 +5087,11 @@ xlog_recover_process_one_iunlink(
 	 */
 	ip->i_d.di_dmevmask = 0;
 
-	IRELE(ip);
+	xfs_irele(ip);
 	return agino;
 
  fail_iput:
-	IRELE(ip);
+	xfs_irele(ip);
  fail:
 	/*
 	 * We can't read in the inode this bucket points to, or this inode
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fb361cde32a..8f739e4d0d1c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1039,7 +1039,7 @@ xfs_mountfs(
  out_rtunmount:
 	xfs_rtunmount_inodes(mp);
  out_rele_rip:
-	IRELE(rip);
+	xfs_irele(rip);
 	/* Clean out dquots that might be in memory after quotacheck. */
 	xfs_qm_unmount(mp);
 	/*
@@ -1095,7 +1095,7 @@ xfs_unmountfs(
 	xfs_fs_unreserve_ag_blocks(mp);
 	xfs_qm_unmount_quotas(mp);
 	xfs_rtunmount_inodes(mp);
-	IRELE(mp->m_rootip);
+	xfs_irele(mp->m_rootip);
 
 	/*
 	 * We can potentially deadlock here if we have an inode cluster
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 9ceb85cce33a..52ed7904df10 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -231,15 +231,15 @@ xfs_qm_unmount_quotas(
 	 */
 	if (mp->m_quotainfo) {
 		if (mp->m_quotainfo->qi_uquotaip) {
-			IRELE(mp->m_quotainfo->qi_uquotaip);
+			xfs_irele(mp->m_quotainfo->qi_uquotaip);
 			mp->m_quotainfo->qi_uquotaip = NULL;
 		}
 		if (mp->m_quotainfo->qi_gquotaip) {
-			IRELE(mp->m_quotainfo->qi_gquotaip);
+			xfs_irele(mp->m_quotainfo->qi_gquotaip);
 			mp->m_quotainfo->qi_gquotaip = NULL;
 		}
 		if (mp->m_quotainfo->qi_pquotaip) {
-			IRELE(mp->m_quotainfo->qi_pquotaip);
+			xfs_irele(mp->m_quotainfo->qi_pquotaip);
 			mp->m_quotainfo->qi_pquotaip = NULL;
 		}
 	}
@@ -1200,12 +1200,12 @@ xfs_qm_dqusage_adjust(
 			goto error0;
 	}
 
-	IRELE(ip);
+	xfs_irele(ip);
 	*res = BULKSTAT_RV_DIDONE;
 	return 0;
 
 error0:
-	IRELE(ip);
+	xfs_irele(ip);
 	*res = BULKSTAT_RV_GIVEUP;
 	return error;
 }
@@ -1575,11 +1575,11 @@ xfs_qm_init_quotainos(
 
 error_rele:
 	if (uip)
-		IRELE(uip);
+		xfs_irele(uip);
 	if (gip)
-		IRELE(gip);
+		xfs_irele(gip);
 	if (pip)
-		IRELE(pip);
+		xfs_irele(pip);
 	return error;
 }
 
@@ -1588,15 +1588,15 @@ xfs_qm_destroy_quotainos(
 	xfs_quotainfo_t	*qi)
 {
 	if (qi->qi_uquotaip) {
-		IRELE(qi->qi_uquotaip);
+		xfs_irele(qi->qi_uquotaip);
 		qi->qi_uquotaip = NULL; /* paranoia */
 	}
 	if (qi->qi_gquotaip) {
-		IRELE(qi->qi_gquotaip);
+		xfs_irele(qi->qi_gquotaip);
 		qi->qi_gquotaip = NULL;
 	}
 	if (qi->qi_pquotaip) {
-		IRELE(qi->qi_pquotaip);
+		xfs_irele(qi->qi_pquotaip);
 		qi->qi_pquotaip = NULL;
 	}
 }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index c07c5a39d516..b3190890f096 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -190,15 +190,15 @@ xfs_qm_scall_quotaoff(
 	 * Release our quotainode references if we don't need them anymore.
 	 */
 	if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
-		IRELE(q->qi_uquotaip);
+		xfs_irele(q->qi_uquotaip);
 		q->qi_uquotaip = NULL;
 	}
 	if ((dqtype & XFS_QMOPT_GQUOTA) && q->qi_gquotaip) {
-		IRELE(q->qi_gquotaip);
+		xfs_irele(q->qi_gquotaip);
 		q->qi_gquotaip = NULL;
 	}
 	if ((dqtype & XFS_QMOPT_PQUOTA) && q->qi_pquotaip) {
-		IRELE(q->qi_pquotaip);
+		xfs_irele(q->qi_pquotaip);
 		q->qi_pquotaip = NULL;
 	}
 
@@ -251,7 +251,7 @@ xfs_qm_scall_trunc_qfile(
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 out_put:
-	IRELE(ip);
+	xfs_irele(ip);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 205fbb2a77e4..a7c0c657dfaf 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -45,7 +45,7 @@ xfs_qm_fill_state(
 	tstate->ino_warnlimit = q->qi_iwarnlimit;
 	tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit;
 	if (tempqip)
-		IRELE(ip);
+		xfs_irele(ip);
 }
 
 /*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 86d7d2f76226..926ed314ffba 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1207,7 +1207,7 @@ xfs_rtmount_inodes(
 	ASSERT(sbp->sb_rsumino != NULLFSINO);
 	error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
 	if (error) {
-		IRELE(mp->m_rbmip);
+		xfs_irele(mp->m_rbmip);
 		return error;
 	}
 	ASSERT(mp->m_rsumip != NULL);
@@ -1219,9 +1219,9 @@ xfs_rtunmount_inodes(
 	struct xfs_mount	*mp)
 {
 	if (mp->m_rbmip)
-		IRELE(mp->m_rbmip);
+		xfs_irele(mp->m_rbmip);
 	if (mp->m_rsumip)
-		IRELE(mp->m_rsumip);
+		xfs_irele(mp->m_rsumip);
 }
 
 /*
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index ce801aedbcdc..2bfe7fbbedb2 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -364,7 +364,7 @@ out_release_inode:
 	 */
 	if (ip) {
 		xfs_finish_inode_setup(ip);
-		IRELE(ip);
+		xfs_irele(ip);
 	}
 
 	xfs_qm_dqrele(udqp);
-- 
cgit v1.2.3


From 1c02d502c20809a2a5f71ec16a930a61ed779b81 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Thu, 26 Jul 2018 09:11:27 -0700
Subject: xfs: remove deprecated barrier/nobarrier mount

The barrier mount options have been no-ops and deprecated since

4cf4573 xfs: deprecate barrier/nobarrier mount option

i.e. kernel 4.10 / December 2016, with a stated deprecation schedule
after v4.15.  Should be fair game to remove them now.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 Documentation/filesystems/xfs.txt |  4 ++--
 fs/xfs/xfs_mount.h                |  1 -
 fs/xfs/xfs_super.c                | 34 ++++------------------------------
 3 files changed, 6 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 4d9ff0a7f8e1..a9ae82fb9d13 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -223,8 +223,6 @@ Deprecated Mount Options
 
   Name				Removal Schedule
   ----				----------------
-  barrier			no earlier than v4.15
-  nobarrier			no earlier than v4.15
 
 
 Removed Mount Options
@@ -236,6 +234,8 @@ Removed Mount Options
   ihashsize			v4.0
   irixsgid			v4.0
   osyncisdsync/osyncisosync	v4.0
+  barrier			v4.19
+  nobarrier			v4.19
 
 
 sysctls
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 540353a51478..7964513c3128 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -217,7 +217,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_SMALL_INUMS	(1ULL << 14)	/* user wants 32bit inodes */
 #define XFS_MOUNT_32BITINODES	(1ULL << 15)	/* inode32 allocator active */
 #define XFS_MOUNT_NOUUID	(1ULL << 16)	/* ignore uuid during mount */
-#define XFS_MOUNT_BARRIER	(1ULL << 17)
 #define XFS_MOUNT_IKEEP		(1ULL << 18)	/* keep empty inode clusters*/
 #define XFS_MOUNT_SWALLOC	(1ULL << 19)	/* turn on stripe width
 						 * allocation */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f9f8dc490d3d..d2ebacd5975c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -65,11 +65,10 @@ enum {
 	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
 	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
 	Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
-	Opt_allocsize, Opt_norecovery, Opt_barrier, Opt_nobarrier,
-	Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep,
-	Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams,
-	Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota,
-	Opt_uquota, Opt_gquota, Opt_pquota,
+	Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
+	Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
+	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
+	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
 	Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
 };
@@ -118,14 +117,7 @@ static const match_table_t tokens = {
 	{Opt_qnoenforce, "qnoenforce"},	/* same as uqnoenforce */
 	{Opt_discard,	"discard"},	/* Discard unused blocks */
 	{Opt_nodiscard,	"nodiscard"},	/* Do not discard unused blocks */
-
 	{Opt_dax,	"dax"},		/* Enable direct access to bdev pages */
-
-	/* Deprecated mount options scheduled for removal */
-	{Opt_barrier,	"barrier"},	/* use writer barriers for log write and
-					 * unwritten extent conversion */
-	{Opt_nobarrier,	"nobarrier"},	/* .. disable */
-
 	{Opt_err,	NULL},
 };
 
@@ -209,7 +201,6 @@ xfs_parseargs(
 	 * Set some default flags that could be cleared by the mount option
 	 * parsing.
 	 */
-	mp->m_flags |= XFS_MOUNT_BARRIER;
 	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 
 	/*
@@ -362,14 +353,6 @@ xfs_parseargs(
 			mp->m_flags |= XFS_MOUNT_DAX;
 			break;
 #endif
-		case Opt_barrier:
-			xfs_warn(mp, "%s option is deprecated, ignoring.", p);
-			mp->m_flags |= XFS_MOUNT_BARRIER;
-			break;
-		case Opt_nobarrier:
-			xfs_warn(mp, "%s option is deprecated, ignoring.", p);
-			mp->m_flags &= ~XFS_MOUNT_BARRIER;
-			break;
 		default:
 			xfs_warn(mp, "unknown mount option [%s].", p);
 			return -EINVAL;
@@ -487,7 +470,6 @@ xfs_showargs(
 	static struct proc_xfs_info xfs_info_unset[] = {
 		/* the few simple ones we can get from the mount struct */
 		{ XFS_MOUNT_COMPAT_IOSIZE,	",largeio" },
-		{ XFS_MOUNT_BARRIER,		",nobarrier" },
 		{ XFS_MOUNT_SMALL_INUMS,	",inode64" },
 		{ 0, NULL }
 	};
@@ -1278,14 +1260,6 @@ xfs_fs_remount(
 
 		token = match_token(p, tokens, args);
 		switch (token) {
-		case Opt_barrier:
-			xfs_warn(mp, "%s option is deprecated, ignoring.", p);
-			mp->m_flags |= XFS_MOUNT_BARRIER;
-			break;
-		case Opt_nobarrier:
-			xfs_warn(mp, "%s option is deprecated, ignoring.", p);
-			mp->m_flags &= ~XFS_MOUNT_BARRIER;
-			break;
 		case Opt_inode64:
 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 			mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
-- 
cgit v1.2.3


From 3f30f929bb17877ebc1653c6f3ff41863f1ba524 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 26 Jul 2018 12:59:13 -0500
Subject: gfs2: cleanup: call gfs2_rgrp_ondisk2lvb from gfs2_rgrp_out

Before this patch gfs2_rgrp_ondisk2lvb was called after every call
to gfs2_rgrp_out. This patch just calls it directly from within
gfs2_rgrp_out, and moves the function to be before it so we don't
need a function prototype.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 68a81afd3b4a..7c5afeba8888 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1053,6 +1053,18 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 	/* rd_data0, rd_data and rd_bitbytes already set from rindex */
 }
 
+static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
+{
+	const struct gfs2_rgrp *str = buf;
+
+	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
+	rgl->rl_flags = str->rg_flags;
+	rgl->rl_free = str->rg_free;
+	rgl->rl_dinodes = str->rg_dinodes;
+	rgl->rl_igeneration = str->rg_igeneration;
+	rgl->__pad = 0UL;
+}
+
 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 {
 	struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
@@ -1075,6 +1087,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 	str->rg_crc = cpu_to_be32(crc);
 
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
 }
 
 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
@@ -1089,18 +1102,6 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
 	return 1;
 }
 
-static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
-{
-	const struct gfs2_rgrp *str = buf;
-
-	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
-	rgl->rl_flags = str->rg_flags;
-	rgl->rl_free = str->rg_free;
-	rgl->rl_dinodes = str->rg_dinodes;
-	rgl->rl_igeneration = str->rg_igeneration;
-	rgl->__pad = 0UL;
-}
-
 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
 {
 	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
@@ -1426,7 +1427,6 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
 				gfs2_trans_add_meta(rgd->rd_gl, bh);
 				gfs2_rgrp_out(rgd, bh->b_data);
-				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
 				gfs2_trans_end(sdp);
 			}
 		}
@@ -2424,7 +2424,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
-	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
 
 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
 	if (dinode)
@@ -2465,7 +2464,6 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 
 	/* Directories keep their data in the metadata address space */
 	if (meta || ip->i_depth)
@@ -2502,7 +2500,6 @@ void gfs2_unlink_di(struct inode *inode)
 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 	update_rgrp_lvb_unlinked(rgd, 1);
 }
 
@@ -2523,7 +2520,6 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
 	update_rgrp_lvb_unlinked(rgd, -1);
 
 	gfs2_statfs_change(sdp, 0, +1, -1);
-- 
cgit v1.2.3


From d0dd962d8a4ef4df9b710c4e3a975e6bfd9f0225 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 28 May 2018 10:32:41 -0400
Subject: media: dvb: get rid of VIDEO_SET_SPU_PALETTE

No upstream drivers use it. It doesn't make any sense to have
a compat32 code for something that nobody uses upstream.

Reported-by: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../media/uapi/dvb/video-set-spu-palette.rst       | 82 ----------------------
 .../media/uapi/dvb/video_function_calls.rst        |  1 -
 Documentation/media/uapi/dvb/video_types.rst       | 18 -----
 Documentation/media/video.h.rst.exceptions         |  1 -
 fs/compat_ioctl.c                                  | 30 --------
 include/uapi/linux/dvb/video.h                     |  7 --
 6 files changed, 139 deletions(-)
 delete mode 100644 Documentation/media/uapi/dvb/video-set-spu-palette.rst

(limited to 'fs')

diff --git a/Documentation/media/uapi/dvb/video-set-spu-palette.rst b/Documentation/media/uapi/dvb/video-set-spu-palette.rst
deleted file mode 100644
index 51a1913d21d2..000000000000
--- a/Documentation/media/uapi/dvb/video-set-spu-palette.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_SPU_PALETTE:
-
-=====================
-VIDEO_SET_SPU_PALETTE
-=====================
-
-Name
-----
-
-VIDEO_SET_SPU_PALETTE
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_SPU_PALETTE, struct video_spu_palette *palette )
-    :name: VIDEO_SET_SPU_PALETTE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_SPU_PALETTE for this command.
-
-    -  .. row 3
-
-       -  video_spu_palette_t \*palette
-
-       -  SPU palette according to section ??.
-
-
-Description
------------
-
-This ioctl sets the SPU color palette.
-
-.. c:type:: video_spu_palette
-
-.. code-block::c
-
-	typedef struct video_spu_palette {      /* SPU Palette information */
-		int length;
-		__u8 __user *palette;
-	} video_spu_palette_t;
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  input is not a valid palette or driver doesn’t handle SPU.
diff --git a/Documentation/media/uapi/dvb/video_function_calls.rst b/Documentation/media/uapi/dvb/video_function_calls.rst
index 68588ac7fecb..8d8383ffaeba 100644
--- a/Documentation/media/uapi/dvb/video_function_calls.rst
+++ b/Documentation/media/uapi/dvb/video_function_calls.rst
@@ -38,6 +38,5 @@ Video Function Calls
     video-set-system
     video-set-highlight
     video-set-spu
-    video-set-spu-palette
     video-get-navi
     video-set-attributes
diff --git a/Documentation/media/uapi/dvb/video_types.rst b/Documentation/media/uapi/dvb/video_types.rst
index 640a21de6b8a..4cfa00e5c934 100644
--- a/Documentation/media/uapi/dvb/video_types.rst
+++ b/Documentation/media/uapi/dvb/video_types.rst
@@ -320,24 +320,6 @@ to the following format:
      } video_spu_t;
 
 
-.. c:type:: video_spu_palette
-
-struct video_spu_palette
-========================
-
-The following structure is used to set the SPU palette by calling
-VIDEO_SPU_PALETTE:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_spu_palette {
-	 int length;
-	 uint8_t *palette;
-     } video_spu_palette_t;
-
-
 .. c:type:: video_navi_pack
 
 struct video_navi_pack
diff --git a/Documentation/media/video.h.rst.exceptions b/Documentation/media/video.h.rst.exceptions
index a91aa884ce0e..89d7c3ef2da7 100644
--- a/Documentation/media/video.h.rst.exceptions
+++ b/Documentation/media/video.h.rst.exceptions
@@ -36,5 +36,4 @@ replace typedef video_stream_source_t :c:type:`video_stream_source`
 replace typedef video_play_state_t :c:type:`video_play_state`
 replace typedef video_highlight_t :c:type:`video_highlight`
 replace typedef video_spu_t :c:type:`video_spu`
-replace typedef video_spu_palette_t :c:type:`video_spu_palette`
 replace typedef video_navi_pack_t :c:type:`video_navi_pack`
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9907475b4226..fdb5ef9b5d06 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -198,34 +198,6 @@ static int do_video_stillpicture(struct file *file,
 	return err;
 }
 
-struct compat_video_spu_palette {
-	int length;
-	compat_uptr_t palette;
-};
-
-static int do_video_set_spu_palette(struct file *file,
-		unsigned int cmd, struct compat_video_spu_palette __user *up)
-{
-	struct video_spu_palette __user *up_native;
-	compat_uptr_t palp;
-	int length, err;
-
-	err  = get_user(palp, &up->palette);
-	err |= get_user(length, &up->length);
-	if (err)
-		return -EFAULT;
-
-	up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
-	err  = put_user(compat_ptr(palp), &up_native->palette);
-	err |= put_user(length, &up_native->length);
-	if (err)
-		return -EFAULT;
-
-	err = do_ioctl(file, cmd, (unsigned long) up_native);
-
-	return err;
-}
-
 #ifdef CONFIG_BLOCK
 typedef struct sg_io_hdr32 {
 	compat_int_t interface_id;	/* [i] 'S' for SCSI generic (required) */
@@ -1347,8 +1319,6 @@ static long do_ioctl_trans(unsigned int cmd,
 		return do_video_get_event(file, cmd, argp);
 	case VIDEO_STILLPICTURE:
 		return do_video_stillpicture(file, cmd, argp);
-	case VIDEO_SET_SPU_PALETTE:
-		return do_video_set_spu_palette(file, cmd, argp);
 	}
 
 	/*
diff --git a/include/uapi/linux/dvb/video.h b/include/uapi/linux/dvb/video.h
index df3d7028c807..6a0c9757b7ba 100644
--- a/include/uapi/linux/dvb/video.h
+++ b/include/uapi/linux/dvb/video.h
@@ -186,12 +186,6 @@ typedef struct video_spu {
 } video_spu_t;
 
 
-typedef struct video_spu_palette {      /* SPU Palette information */
-	int length;
-	__u8 __user *palette;
-} video_spu_palette_t;
-
-
 typedef struct video_navi_pack {
 	int length;          /* 0 ... 1024 */
 	__u8 data[1024];
@@ -248,7 +242,6 @@ typedef __u16 video_attributes_t;
 #define VIDEO_SET_SYSTEM           _IO('o', 38)
 #define VIDEO_SET_HIGHLIGHT        _IOW('o', 39, video_highlight_t)
 #define VIDEO_SET_SPU              _IOW('o', 50, video_spu_t)
-#define VIDEO_SET_SPU_PALETTE      _IOW('o', 51, video_spu_palette_t)
 #define VIDEO_GET_NAVI             _IOR('o', 52, video_navi_pack_t)
 #define VIDEO_SET_ATTRIBUTES       _IO('o', 53)
 #define VIDEO_GET_SIZE             _IOR('o', 55, video_size_t)
-- 
cgit v1.2.3


From a4d2aadca184ece182418950d45ba4ffc7b652d2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 29 Jul 2018 15:48:00 -0400
Subject: ext4: sysfs: print ext4_super_block fields as little-endian

While working on extended rand for last_error/first_error timestamps,
I noticed that the endianess is wrong; we access the little-endian
fields in struct ext4_super_block as native-endian when we print them.

This adds a special case in ext4_attr_show() and ext4_attr_store()
to byteswap the superblock fields if needed.

In older kernels, this code was part of super.c, it got moved to
sysfs.c in linux-4.4.

Cc: stable@vger.kernel.org
Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors")
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/sysfs.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f34da0bb8f17..b970a200f20c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -274,8 +274,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
 	case attr_pointer_ui:
 		if (!ptr)
 			return 0;
-		return snprintf(buf, PAGE_SIZE, "%u\n",
-				*((unsigned int *) ptr));
+		if (a->attr_ptr == ptr_ext4_super_block_offset)
+			return snprintf(buf, PAGE_SIZE, "%u\n",
+					le32_to_cpup(ptr));
+		else
+			return snprintf(buf, PAGE_SIZE, "%u\n",
+					*((unsigned int *) ptr));
 	case attr_pointer_atomic:
 		if (!ptr)
 			return 0;
@@ -308,7 +312,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
 		ret = kstrtoul(skip_spaces(buf), 0, &t);
 		if (ret)
 			return ret;
-		*((unsigned int *) ptr) = t;
+		if (a->attr_ptr == ptr_ext4_super_block_offset)
+			*((__le32 *) ptr) = cpu_to_le32(t);
+		else
+			*((unsigned int *) ptr) = t;
 		return len;
 	case attr_inode_readahead:
 		return inode_readahead_blks_store(sbi, buf, len);
-- 
cgit v1.2.3


From af123b3718592a66a24716ed4724dc214220492b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 29 Jul 2018 15:49:00 -0400
Subject: ext4: use 64-bit timestamps for mmp_time

The mmp_time field is 64 bits wide, which is good, but calling
get_seconds() results in a 32-bit value on 32-bit architectures. Using
ktime_get_real_seconds() instead returns 64 bits everywhere.

Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mmp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 638ad4743477..39b07c2d3384 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -147,7 +147,7 @@ static int kmmpd(void *data)
 
 	mmp_block = le64_to_cpu(es->s_mmp_block);
 	mmp = (struct mmp_struct *)(bh->b_data);
-	mmp->mmp_time = cpu_to_le64(get_seconds());
+	mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
 	/*
 	 * Start with the higher mmp_check_interval and reduce it if
 	 * the MMP block is being updated on time.
@@ -165,7 +165,7 @@ static int kmmpd(void *data)
 			seq = 1;
 
 		mmp->mmp_seq = cpu_to_le32(seq);
-		mmp->mmp_time = cpu_to_le64(get_seconds());
+		mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
 		last_update_time = jiffies;
 
 		retval = write_mmp_block(sb, bh);
@@ -241,7 +241,7 @@ static int kmmpd(void *data)
 	 * Unmount seems to be clean.
 	 */
 	mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
-	mmp->mmp_time = cpu_to_le64(get_seconds());
+	mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
 
 	retval = write_mmp_block(sb, bh);
 
-- 
cgit v1.2.3


From 5ffff834322281f550b10c958fd9dd85679b8dbb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 29 Jul 2018 15:50:00 -0400
Subject: ext4: use ktime_get_real_seconds for i_dtime

We only care about the low 32-bit for i_dtime as explained in commit
b5f515735bea ("ext4: avoid Y2038 overflow in recently_deleted()"), so
the use of get_seconds() is correct here, but that function is getting
removed in the process of the y2038 fixes, so let's use the modern
ktime_get_real_seconds() here.

Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4efe77286ecd..ba0de19fb1ad 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -317,7 +317,7 @@ stop_handle:
 	 * (Well, we could do this if we need to, but heck - it works)
 	 */
 	ext4_orphan_del(handle, inode);
-	EXT4_I(inode)->i_dtime	= get_seconds();
+	EXT4_I(inode)->i_dtime	= (__u32)ktime_get_real_seconds();
 
 	/*
 	 * One subtle ordering requirement: if anything has gone wrong
-- 
cgit v1.2.3


From 7b62b293200ffaba5b281668ba7102cb4209774f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 29 Jul 2018 15:51:00 -0400
Subject: ext4: use timespec64 for all inode times

This is the last missing piece for the inode times on 32-bit systems:
now that VFS interfaces use timespec64, we just need to stop truncating
the tv_sec values for y2038 compatibililty.

Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h   | 22 +++++++++-------------
 fs/ext4/ialloc.c |  2 +-
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7c7123f265c2..1d7dac2df6e8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -789,17 +789,16 @@ struct move_extent {
  * affected filesystem before 2242.
  */
 
-static inline __le32 ext4_encode_extra_time(struct timespec *time)
+static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
 {
-	u32 extra = sizeof(time->tv_sec) > 4 ?
-		((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0;
+	u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
 	return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
 }
 
-static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+static inline void ext4_decode_extra_time(struct timespec64 *time,
+					  __le32 extra)
 {
-	if (unlikely(sizeof(time->tv_sec) > 4 &&
-			(extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
+	if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) {
 
 #if 1
 		/* Handle legacy encoding of pre-1970 dates with epoch
@@ -821,9 +820,8 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
 do {										\
 	(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);		\
 	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     {\
-		struct timespec ts = timespec64_to_timespec((inode)->xtime);	\
 		(raw_inode)->xtime ## _extra =					\
-				ext4_encode_extra_time(&ts);			\
+				ext4_encode_extra_time(&(inode)->xtime);	\
 		}								\
 } while (0)
 
@@ -840,10 +838,8 @@ do {									       \
 do {										\
 	(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);	\
 	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {	\
-		struct timespec ts = timespec64_to_timespec((inode)->xtime);	\
-		ext4_decode_extra_time(&ts,					\
+		ext4_decode_extra_time(&(inode)->xtime,				\
 				       raw_inode->xtime ## _extra);		\
-		(inode)->xtime = timespec_to_timespec64(ts);			\
 		}								\
 	else									\
 		(inode)->xtime.tv_nsec = 0;					\
@@ -993,9 +989,9 @@ struct ext4_inode_info {
 
 	/*
 	 * File creation time. Its function is same as that of
-	 * struct timespec i_{a,c,m}time in the generic inode.
+	 * struct timespec64 i_{a,c,m}time in the generic inode.
 	 */
-	struct timespec i_crtime;
+	struct timespec64 i_crtime;
 
 	/* mballoc */
 	struct list_head i_prealloc_list;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f336cbc6e932..dffd21de2694 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1086,7 +1086,7 @@ got:
 	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
-	ei->i_crtime = timespec64_to_timespec(inode->i_mtime);
+	ei->i_crtime = inode->i_mtime;
 
 	memset(ei->i_data, 0, sizeof(ei->i_data));
 	ei->i_dir_start_lookup = 0;
-- 
cgit v1.2.3


From b42d1d6b5b789c41dacbe2bc192c7b359d109d7b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 29 Jul 2018 15:51:47 -0400
Subject: jbd2: replace current_kernel_time64 with ktime equivalent

jbd2 is one of the few callers of current_kernel_time64(), which
is a wrapper around ktime_get_coarse_real_ts64(). This calls the
latter directly for consistency with the rest of the kernel that
is moving to the ktime_get_ family of time accessors.

Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/commit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8de0e7723316..150cc030b4d7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -121,7 +121,7 @@ static int journal_submit_commit_record(journal_t *journal,
 	struct commit_header *tmp;
 	struct buffer_head *bh;
 	int ret;
-	struct timespec64 now = current_kernel_time64();
+	struct timespec64 now;
 
 	*cbh = NULL;
 
@@ -134,6 +134,7 @@ static int journal_submit_commit_record(journal_t *journal,
 		return 1;
 
 	tmp = (struct commit_header *)bh->b_data;
+	ktime_get_coarse_real_ts64(&now);
 	tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
 	tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
 
-- 
cgit v1.2.3


From 6a0678a79bb3a4e5fc1b680e7afc78727e21aff3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 29 Jul 2018 15:51:48 -0400
Subject: ext4: super: extend timestamps to 40 bits

The inode timestamps use 34 bits in ext4, but the various timestamps in
the superblock are limited to 32 bits. If every user accesses these as
'unsigned', then this is good until year 2106, but it seems better to
extend this a bit further in the process of removing the deprecated
get_seconds() function.

This adds another byte for each timestamp in the superblock, making
them long enough to store timestamps beyond what is in the inodes,
which seems good enough here (in ocfs2, they are already 64-bit wide,
which is appropriate for a new layout).

I did not modify e2fsprogs, which obviously needs the same change to
actually interpret future timestamps correctly.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h  |  9 ++++++++-
 fs/ext4/super.c | 39 ++++++++++++++++++++++++++++++---------
 fs/ext4/sysfs.c | 19 +++++++++++++++++--
 3 files changed, 55 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1d7dac2df6e8..6d7dec48372b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1295,7 +1295,14 @@ struct ext4_super_block {
 	__le32	s_lpf_ino;		/* Location of the lost+found inode */
 	__le32	s_prj_quota_inum;	/* inode for tracking project quota */
 	__le32	s_checksum_seed;	/* crc32c(uuid) if csum_seed set */
-	__le32	s_reserved[98];		/* Padding to the end of the block */
+	__u8	s_wtime_hi;
+	__u8	s_mtime_hi;
+	__u8	s_mkfs_time_hi;
+	__u8	s_lastcheck_hi;
+	__u8	s_first_error_time_hi;
+	__u8	s_last_error_time_hi;
+	__u8	s_pad[2];
+	__le32	s_reserved[96];		/* Padding to the end of the block */
 	__le32	s_checksum;		/* crc32c(superblock) */
 };
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b7f7922061be..67f4310edd0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -312,6 +312,24 @@ void ext4_itable_unused_set(struct super_block *sb,
 		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 }
 
+static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
+{
+	time64_t now = ktime_get_real_seconds();
+
+	now = clamp_val(now, 0, (1ull << 40) - 1);
+
+	*lo = cpu_to_le32(lower_32_bits(now));
+	*hi = upper_32_bits(now);
+}
+
+static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
+{
+	return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
+}
+#define ext4_update_tstamp(es, tstamp) \
+	__ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
+#define ext4_get_tstamp(es, tstamp) \
+	__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 
 static void __save_error_info(struct super_block *sb, const char *func,
 			    unsigned int line)
@@ -322,11 +340,12 @@ static void __save_error_info(struct super_block *sb, const char *func,
 	if (bdev_read_only(sb->s_bdev))
 		return;
 	es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-	es->s_last_error_time = cpu_to_le32(get_seconds());
+	ext4_update_tstamp(es, s_last_error_time);
 	strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 	es->s_last_error_line = cpu_to_le32(line);
 	if (!es->s_first_error_time) {
 		es->s_first_error_time = es->s_last_error_time;
+		es->s_first_error_time_hi = es->s_last_error_time_hi;
 		strncpy(es->s_first_error_func, func,
 			sizeof(es->s_first_error_func));
 		es->s_first_error_line = cpu_to_le32(line);
@@ -2174,8 +2193,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 			 "warning: maximal mount count reached, "
 			 "running e2fsck is recommended");
 	else if (le32_to_cpu(es->s_checkinterval) &&
-		(le32_to_cpu(es->s_lastcheck) +
-			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+		 (ext4_get_tstamp(es, s_lastcheck) +
+		  le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
 		ext4_msg(sb, KERN_WARNING,
 			 "warning: checktime reached, "
 			 "running e2fsck is recommended");
@@ -2184,7 +2203,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
 	le16_add_cpu(&es->s_mnt_count, 1);
-	es->s_mtime = cpu_to_le32(get_seconds());
+	ext4_update_tstamp(es, s_mtime);
 	ext4_update_dynamic_rev(sb);
 	if (sbi->s_journal)
 		ext4_set_feature_journal_needs_recovery(sb);
@@ -2875,8 +2894,9 @@ static void print_daily_error_info(struct timer_list *t)
 		ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
 			 le32_to_cpu(es->s_error_count));
 	if (es->s_first_error_time) {
-		printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
-		       sb->s_id, le32_to_cpu(es->s_first_error_time),
+		printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
+		       sb->s_id,
+		       ext4_get_tstamp(es, s_first_error_time),
 		       (int) sizeof(es->s_first_error_func),
 		       es->s_first_error_func,
 		       le32_to_cpu(es->s_first_error_line));
@@ -2889,8 +2909,9 @@ static void print_daily_error_info(struct timer_list *t)
 		printk(KERN_CONT "\n");
 	}
 	if (es->s_last_error_time) {
-		printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
-		       sb->s_id, le32_to_cpu(es->s_last_error_time),
+		printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
+		       sb->s_id,
+		       ext4_get_tstamp(es, s_last_error_time),
 		       (int) sizeof(es->s_last_error_func),
 		       es->s_last_error_func,
 		       le32_to_cpu(es->s_last_error_line));
@@ -4813,7 +4834,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	 * to complain and force a full file system check.
 	 */
 	if (!(sb->s_flags & SB_RDONLY))
-		es->s_wtime = cpu_to_le32(get_seconds());
+		ext4_update_tstamp(es, s_wtime);
 	if (sb->s_bdev->bd_part)
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index b970a200f20c..e60cc5e89023 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -25,6 +25,8 @@ typedef enum {
 	attr_reserved_clusters,
 	attr_inode_readahead,
 	attr_trigger_test_error,
+	attr_first_error_time,
+	attr_last_error_time,
 	attr_feature,
 	attr_pointer_ui,
 	attr_pointer_atomic,
@@ -182,8 +184,8 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
 EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
-EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
-EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
+EXT4_ATTR(first_error_time, 0444, first_error_time);
+EXT4_ATTR(last_error_time, 0444, last_error_time);
 
 static unsigned int old_bump_val = 128;
 EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -249,6 +251,15 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
 	return NULL;
 }
 
+static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi)
+{
+	return snprintf(buf, PAGE_SIZE, "%lld",
+			((time64_t)hi << 32) + le32_to_cpu(lo));
+}
+
+#define print_tstamp(buf, es, tstamp) \
+	__print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi)
+
 static ssize_t ext4_attr_show(struct kobject *kobj,
 			      struct attribute *attr, char *buf)
 {
@@ -287,6 +298,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
 				atomic_read((atomic_t *) ptr));
 	case attr_feature:
 		return snprintf(buf, PAGE_SIZE, "supported\n");
+	case attr_first_error_time:
+		return print_tstamp(buf, sbi->s_es, s_first_error_time);
+	case attr_last_error_time:
+		return print_tstamp(buf, sbi->s_es, s_last_error_time);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 7f144fd046d967ff2fbba59203b42b888ad5aae7 Mon Sep 17 00:00:00 2001
From: Junichi Uekawa <uekawa@google.com>
Date: Sun, 29 Jul 2018 15:51:52 -0400
Subject: ext4: fix warning message in ext4_enable_quotas()

Output the warning message before we clobber type and be -1 all the time.
The error message would now be

[    1.519791] EXT4-fs warning (device vdb): ext4_enable_quotas:5402:
Failed to enable quota tracking (type=0, err=-3). Please run e2fsck to fix.

Signed-off-by: Junichi Uekawa <uekawa@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 67f4310edd0a..3e64d8d51ac4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5686,13 +5686,13 @@ static int ext4_enable_quotas(struct super_block *sb)
 				DQUOT_USAGE_ENABLED |
 				(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
 			if (err) {
-				for (type--; type >= 0; type--)
-					dquot_quota_off(sb, type);
-
 				ext4_warning(sb,
 					"Failed to enable quota tracking "
 					"(type=%d, err=%d). Please run "
 					"e2fsck to fix.", type, err);
+				for (type--; type >= 0; type--)
+					dquot_quota_off(sb, type);
+
 				return err;
 			}
 		}
-- 
cgit v1.2.3


From 21ac738ede0b49004b53b4a44fe3df7bb4a78280 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Sun, 29 Jul 2018 15:51:54 -0400
Subject: ext4: check allocation failure when duplicating "data" in
 ext4_remount()

There is no check for allocation failure when duplicating
"data" in ext4_remount(). Check for failure and return
error -ENOMEM in this case.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/super.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3e64d8d51ac4..d4a218ba626c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5101,6 +5101,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #endif
 	char *orig_data = kstrdup(data, GFP_KERNEL);
 
+	if (data && !orig_data)
+		return -ENOMEM;
+
 	/* Store the original options */
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
-- 
cgit v1.2.3


From d1753390274f7760e5b593cb657ea34f0617e559 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 27 Jul 2018 21:33:27 +0000
Subject: sysfs: Fix regression when adding a file to an existing group

Commit 5f81880d5204 ("sysfs, kobject: allow creating kobject belonging
to arbitrary users") incorrectly changed the argument passed as the
parent parameter when calling sysfs_add_file_mode_ns(). This caused some
sysfs attribute files to not be added correctly to certain groups.

Fixes: 5f81880d5204 ("sysfs, kobject: allow creating kobject belonging to arbitrary users")
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Reported-by: Heiner Kallweit <hkallweit1@gmail.com>
Tested-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/sysfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index fa46216523cf..052e5ad9a4d2 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -373,7 +373,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 		return -ENOENT;
 
 	kobject_get_ownership(kobj, &uid, &gid);
-	error = sysfs_add_file_mode_ns(kobj->sd, attr, false,
+	error = sysfs_add_file_mode_ns(parent, attr, false,
 				       attr->mode, uid, gid, NULL);
 	kernfs_put(parent);
 
-- 
cgit v1.2.3


From 62bbdd9974678513fee113f09f6b672623521179 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Sun, 29 Jul 2018 16:11:59 -0400
Subject: ext4: use swap macro in mext_page_double_lock

Make use of the swap macro and remove unnecessary variable *tmp*.
This makes the code easier to read and maintain.

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/move_extent.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 8e17efdcbf11..a409ff70d67b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -134,9 +134,7 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
 		mapping[0] = inode1->i_mapping;
 		mapping[1] = inode2->i_mapping;
 	} else {
-		pgoff_t tmp = index1;
-		index1 = index2;
-		index2 = tmp;
+		swap(index1, index2);
 		mapping[0] = inode2->i_mapping;
 		mapping[1] = inode1->i_mapping;
 	}
-- 
cgit v1.2.3


From cdbf8897cb09b7baf2b8a7e78051a35a872b01d5 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Sun, 29 Jul 2018 16:59:16 -0400
Subject: dax: dax_layout_busy_page() warn on !exceptional

Inodes using DAX should only ever have exceptional entries in their page
caches.  Make this clear by warning if the iteration in
dax_layout_busy_page() ever sees a non-exceptional entry, and by adding a
comment for the pagevec_release() call which only deals with struct page
pointers.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/dax.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dax.c b/fs/dax.c
index 641192808bb6..897b51e41d8f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -566,7 +566,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 			if (index >= end)
 				break;
 
-			if (!radix_tree_exceptional_entry(pvec_ent))
+			if (WARN_ON_ONCE(
+			     !radix_tree_exceptional_entry(pvec_ent)))
 				continue;
 
 			xa_lock_irq(&mapping->i_pages);
@@ -578,6 +579,13 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 			if (page)
 				break;
 		}
+
+		/*
+		 * We don't expect normal struct page entries to exist in our
+		 * tree, but we keep these pagevec calls so that this code is
+		 * consistent with the common pattern for handling pagevecs
+		 * throughout the kernel.
+		 */
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
 		index++;
-- 
cgit v1.2.3


From 430657b6be896db57d974375cc499ca212c7f01d Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Sun, 29 Jul 2018 17:00:22 -0400
Subject: ext4: handle layout changes to pinned DAX mappings

Follow the lead of xfs_break_dax_layouts() and add synchronization between
operations in ext4 which remove blocks from an inode (hole punch, truncate
down, etc.) and pages which are pinned due to DAX DMA operations.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
---
 fs/ext4/ext4.h     |  1 +
 fs/ext4/extents.c  | 17 +++++++++++++++++
 fs/ext4/inode.c    | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/truncate.h |  4 ++++
 4 files changed, 68 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6d7dec48372b..1fc013f3d944 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2459,6 +2459,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
 extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
+extern int ext4_break_layouts(struct inode *);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
 extern void ext4_set_inode_flags(struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8ce6fd5b10dd..72a361d5ef74 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4826,6 +4826,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		 * released from page cache.
 		 */
 		down_write(&EXT4_I(inode)->i_mmap_sem);
+
+		ret = ext4_break_layouts(inode);
+		if (ret) {
+			up_write(&EXT4_I(inode)->i_mmap_sem);
+			goto out_mutex;
+		}
+
 		ret = ext4_update_disksize_before_punch(inode, offset, len);
 		if (ret) {
 			up_write(&EXT4_I(inode)->i_mmap_sem);
@@ -5499,6 +5506,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	 * page cache.
 	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
+
+	ret = ext4_break_layouts(inode);
+	if (ret)
+		goto out_mmap;
+
 	/*
 	 * Need to round down offset to be aligned with page size boundary
 	 * for page size > block size.
@@ -5647,6 +5659,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	 * page cache.
 	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
+
+	ret = ext4_break_layouts(inode);
+	if (ret)
+		goto out_mmap;
+
 	/*
 	 * Need to round down to align start offset to page size boundary
 	 * for page size > block size.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ba0de19fb1ad..60432498acfb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4191,6 +4191,39 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
 	return 0;
 }
 
+static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock)
+{
+	*did_unlock = true;
+	up_write(&ei->i_mmap_sem);
+	schedule();
+	down_write(&ei->i_mmap_sem);
+}
+
+int ext4_break_layouts(struct inode *inode)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct page *page;
+	bool retry;
+	int error;
+
+	if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem)))
+		return -EINVAL;
+
+	do {
+		retry = false;
+		page = dax_layout_busy_page(inode->i_mapping);
+		if (!page)
+			return 0;
+
+		error = ___wait_var_event(&page->_refcount,
+				atomic_read(&page->_refcount) == 1,
+				TASK_INTERRUPTIBLE, 0, 0,
+				ext4_wait_dax_page(ei, &retry));
+	} while (error == 0 && retry);
+
+	return error;
+}
+
 /*
  * ext4_punch_hole: punches a hole in a file by releasing the blocks
  * associated with the given offset and length
@@ -4264,6 +4297,11 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	 * page cache.
 	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
+
+	ret = ext4_break_layouts(inode);
+	if (ret)
+		goto out_dio;
+
 	first_block_offset = round_up(offset, sb->s_blocksize);
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
@@ -5553,6 +5591,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 				ext4_wait_for_tail_page_commit(inode);
 		}
 		down_write(&EXT4_I(inode)->i_mmap_sem);
+
+		rc = ext4_break_layouts(inode);
+		if (rc) {
+			up_write(&EXT4_I(inode)->i_mmap_sem);
+			error = rc;
+			goto err_out;
+		}
+
 		/*
 		 * Truncate pagecache after we've waited for commit
 		 * in data=journal mode to make pages freeable.
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 0cb13badf473..bcbe3668c1d4 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -11,6 +11,10 @@
  */
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
+	/*
+	 * We don't need to call ext4_break_layouts() because the blocks we
+	 * are truncating were never visible to userspace.
+	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
 	truncate_inode_pages(inode->i_mapping, inode->i_size);
 	ext4_truncate(inode);
-- 
cgit v1.2.3


From f39b3f45dbcb0343822cce31ea7636ad66e60bc2 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sun, 29 Jul 2018 17:13:42 -0400
Subject: ext4: reset error code in ext4_find_entry in fallback

When ext4_find_entry() falls back to "searching the old fashioned
way" due to a corrupt dx dir, it needs to reset the error code
to NULL so that the nonstandard ERR_BAD_DX_DIR code isn't returned
to userspace.

https://bugzilla.kernel.org/show_bug.cgi?id=199947

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@yandex.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/namei.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a4c25c4681d..116ff68c5bd4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1398,6 +1398,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 			goto cleanup_and_exit;
 		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
 			       "falling back\n"));
+		ret = NULL;
 	}
 	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
 	if (!nblocks) {
-- 
cgit v1.2.3


From 9af0b3d1257756394ebbd06b14937b557e3a756b Mon Sep 17 00:00:00 2001
From: Wang Shilong <wshilong@ddn.com>
Date: Sun, 29 Jul 2018 17:27:45 -0400
Subject: ext4: fix race when setting the bitmap corrupted flag

Whenever we hit block or inode bitmap corruptions we set
bit and then reduce this block group free inode/clusters
counter to expose right available space.

However some of ext4_mark_group_bitmap_corrupted() is called
inside group spinlock, some are not, this could make it happen
that we double reduce one block group free counters from system.

Always hold group spinlock for it could fix it, but it looks
a little heavy, we could use test_and_set_bit() to fix race
problems here.

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/super.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d4a218ba626c..f7750bc5b85a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -795,26 +795,26 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+	int ret;
 
-	if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) &&
-	    !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
-		percpu_counter_sub(&sbi->s_freeclusters_counter,
-					grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
-			&grp->bb_state);
+	if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+					    &grp->bb_state);
+		if (!ret)
+			percpu_counter_sub(&sbi->s_freeclusters_counter,
+					   grp->bb_free);
 	}
 
-	if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) &&
-	    !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-		if (gdp) {
+	if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
+		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+					    &grp->bb_state);
+		if (!ret && gdp) {
 			int count;
 
 			count = ext4_free_inodes_count(sb, gdp);
 			percpu_counter_sub(&sbi->s_freeinodes_counter,
 					   count);
 		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
-			&grp->bb_state);
 	}
 }
 
-- 
cgit v1.2.3


From ebcbef3a61a6081ffe20b0b684f18ebbf23f1dfb Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sun, 29 Jul 2018 22:37:08 -0700
Subject: xfs: pass transaction lock while setting up agresv on cyclic metadata

Pass a tranaction pointer through to all helpers that calculate the
per-AG block reservation.  Online repair will use this to reinitialize
per-ag reservations while it still holds all the AG headers locked to
the repair transaction.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_ag_resv.c        | 13 +++++++------
 fs/xfs/libxfs/xfs_ag_resv.h        |  2 +-
 fs/xfs/libxfs/xfs_ialloc_btree.c   | 10 ++++++----
 fs/xfs/libxfs/xfs_ialloc_btree.h   |  4 ++--
 fs/xfs/libxfs/xfs_refcount_btree.c |  5 +++--
 fs/xfs/libxfs/xfs_refcount_btree.h |  3 ++-
 fs/xfs/libxfs/xfs_rmap_btree.c     |  5 +++--
 fs/xfs/libxfs/xfs_rmap_btree.h     |  2 +-
 fs/xfs/xfs_fsops.c                 |  2 +-
 9 files changed, 26 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index fecd187fcf2c..e701ebc36c06 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -248,7 +248,8 @@ __xfs_ag_resv_init(
 /* Create a per-AG block reservation. */
 int
 xfs_ag_resv_init(
-	struct xfs_perag		*pag)
+	struct xfs_perag		*pag,
+	struct xfs_trans		*tp)
 {
 	struct xfs_mount		*mp = pag->pag_mount;
 	xfs_agnumber_t			agno = pag->pag_agno;
@@ -260,11 +261,11 @@ xfs_ag_resv_init(
 	if (pag->pag_meta_resv.ar_asked == 0) {
 		ask = used = 0;
 
-		error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
+		error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask, &used);
 		if (error)
 			goto out;
 
-		error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
+		error = xfs_finobt_calc_reserves(mp, tp, agno, &ask, &used);
 		if (error)
 			goto out;
 
@@ -282,7 +283,7 @@ xfs_ag_resv_init(
 
 			mp->m_inotbt_nores = true;
 
-			error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
+			error = xfs_refcountbt_calc_reserves(mp, tp, agno, &ask,
 					&used);
 			if (error)
 				goto out;
@@ -298,7 +299,7 @@ xfs_ag_resv_init(
 	if (pag->pag_rmapbt_resv.ar_asked == 0) {
 		ask = used = 0;
 
-		error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
+		error = xfs_rmapbt_calc_reserves(mp, tp, agno, &ask, &used);
 		if (error)
 			goto out;
 
@@ -309,7 +310,7 @@ xfs_ag_resv_init(
 
 #ifdef DEBUG
 	/* need to read in the AGF for the ASSERT below to work */
-	error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
+	error = xfs_alloc_pagf_init(pag->pag_mount, tp, pag->pag_agno, 0);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index dc953fc84b2f..c0352edc8e41 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -7,7 +7,7 @@
 #define	__XFS_AG_RESV_H__
 
 int xfs_ag_resv_free(struct xfs_perag *pag);
-int xfs_ag_resv_init(struct xfs_perag *pag);
+int xfs_ag_resv_init(struct xfs_perag *pag, struct xfs_trans *tp);
 
 bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
 xfs_extlen_t xfs_ag_resv_needed(struct xfs_perag *pag,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 735a33252eb2..86c50208a143 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -552,6 +552,7 @@ xfs_inobt_max_size(
 static int
 xfs_inobt_count_blocks(
 	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_btnum_t		btnum,
 	xfs_extlen_t		*tree_blocks)
@@ -560,14 +561,14 @@ xfs_inobt_count_blocks(
 	struct xfs_btree_cur	*cur;
 	int			error;
 
-	error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
+	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
 	if (error)
 		return error;
 
-	cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum);
+	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
 	error = xfs_btree_count_blocks(cur, tree_blocks);
 	xfs_btree_del_cursor(cur, error);
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
 
 	return error;
 }
@@ -578,6 +579,7 @@ xfs_inobt_count_blocks(
 int
 xfs_finobt_calc_reserves(
 	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_extlen_t		*ask,
 	xfs_extlen_t		*used)
@@ -588,7 +590,7 @@ xfs_finobt_calc_reserves(
 	if (!xfs_sb_version_hasfinobt(&mp->m_sb))
 		return 0;
 
-	error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len);
+	error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index bf8f0c405e7d..ebdd0c6b8766 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -60,8 +60,8 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
 #define xfs_inobt_rec_check_count(mp, rec)	0
 #endif	/* DEBUG */
 
-int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno,
-		xfs_extlen_t *ask, xfs_extlen_t *used);
+int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
+		xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
 extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
 		unsigned long long len);
 
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 26d2300ed865..1aaa01c97517 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -404,6 +404,7 @@ xfs_refcountbt_max_size(
 int
 xfs_refcountbt_calc_reserves(
 	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_extlen_t		*ask,
 	xfs_extlen_t		*used)
@@ -418,14 +419,14 @@ xfs_refcountbt_calc_reserves(
 		return 0;
 
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
 	if (error)
 		return error;
 
 	agf = XFS_BUF_TO_AGF(agbp);
 	agblocks = be32_to_cpu(agf->agf_length);
 	tree_len = be32_to_cpu(agf->agf_refcount_blocks);
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
 
 	*ask += xfs_refcountbt_max_size(mp, agblocks);
 	*used += tree_len;
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index 801c2c7732fd..ba416f71c824 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -55,6 +55,7 @@ extern xfs_extlen_t xfs_refcountbt_max_size(struct xfs_mount *mp,
 		xfs_agblock_t agblocks);
 
 extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp,
-		xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
+		struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask,
+		xfs_extlen_t *used);
 
 #endif	/* __XFS_REFCOUNT_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 221a88ea60bb..f79cf040d745 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -554,6 +554,7 @@ xfs_rmapbt_max_size(
 int
 xfs_rmapbt_calc_reserves(
 	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_extlen_t		*ask,
 	xfs_extlen_t		*used)
@@ -567,14 +568,14 @@ xfs_rmapbt_calc_reserves(
 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
 		return 0;
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
 	if (error)
 		return error;
 
 	agf = XFS_BUF_TO_AGF(agbp);
 	agblocks = be32_to_cpu(agf->agf_length);
 	tree_len = be32_to_cpu(agf->agf_rmap_blocks);
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
 
 	/* Reserve 1% of the AG or enough for 1 block per record. */
 	*ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 50198b6c3bb2..820d668b063d 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -51,7 +51,7 @@ extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
 extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp,
 		xfs_agblock_t agblocks);
 
-extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp,
+extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
 		xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
 
 #endif	/* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3f2bd6032cf8..7c00b8bedfe3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -536,7 +536,7 @@ xfs_fs_reserve_ag_blocks(
 
 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
 		pag = xfs_perag_get(mp, agno);
-		err2 = xfs_ag_resv_init(pag);
+		err2 = xfs_ag_resv_init(pag, NULL);
 		xfs_perag_put(pag);
 		if (err2 && !error)
 			error = err2;
-- 
cgit v1.2.3


From bc270b53e6aa3b9723e26a548fa1a1688ea61361 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sun, 29 Jul 2018 22:37:09 -0700
Subject: xfs: move the repair extent list into its own file

Move the xrep_extent_list code into a separate file.  Logically, this
data structure is really just a clumsy bitmap, and in the next patch
we'll make this more obvious.  No functional changes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/Makefile       |   1 +
 fs/xfs/scrub/bitmap.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/bitmap.h |  37 +++++++++
 fs/xfs/scrub/repair.c | 194 +---------------------------------------------
 fs/xfs/scrub/repair.h |  27 +------
 5 files changed, 248 insertions(+), 219 deletions(-)
 create mode 100644 fs/xfs/scrub/bitmap.c
 create mode 100644 fs/xfs/scrub/bitmap.h

(limited to 'fs')

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 2f3f75a7f180..7f96bdadc372 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -158,6 +158,7 @@ xfs-$(CONFIG_XFS_QUOTA)		+= scrub/quota.o
 ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
 xfs-y				+= $(addprefix scrub/, \
 				   agheader_repair.o \
+				   bitmap.o \
 				   repair.o \
 				   )
 endif
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
new file mode 100644
index 000000000000..a7c2f4773f98
--- /dev/null
+++ b/fs/xfs/scrub/bitmap.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+
+/* Collect a dead btree extent for later disposal. */
+int
+xrep_collect_btree_extent(
+	struct xfs_scrub	*sc,
+	struct xrep_extent_list	*exlist,
+	xfs_fsblock_t		fsbno,
+	xfs_extlen_t		len)
+{
+	struct xrep_extent	*rex;
+
+	trace_xrep_collect_btree_extent(sc->mp,
+			XFS_FSB_TO_AGNO(sc->mp, fsbno),
+			XFS_FSB_TO_AGBNO(sc->mp, fsbno), len);
+
+	rex = kmem_alloc(sizeof(struct xrep_extent), KM_MAYFAIL);
+	if (!rex)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&rex->list);
+	rex->fsbno = fsbno;
+	rex->len = len;
+	list_add_tail(&rex->list, &exlist->list);
+
+	return 0;
+}
+
+/*
+ * An error happened during the rebuild so the transaction will be cancelled.
+ * The fs will shut down, and the administrator has to unmount and run repair.
+ * Therefore, free all the memory associated with the list so we can die.
+ */
+void
+xrep_cancel_btree_extents(
+	struct xfs_scrub	*sc,
+	struct xrep_extent_list	*exlist)
+{
+	struct xrep_extent	*rex;
+	struct xrep_extent	*n;
+
+	for_each_xrep_extent_safe(rex, n, exlist) {
+		list_del(&rex->list);
+		kmem_free(rex);
+	}
+}
+
+/* Compare two btree extents. */
+static int
+xrep_btree_extent_cmp(
+	void			*priv,
+	struct list_head	*a,
+	struct list_head	*b)
+{
+	struct xrep_extent	*ap;
+	struct xrep_extent	*bp;
+
+	ap = container_of(a, struct xrep_extent, list);
+	bp = container_of(b, struct xrep_extent, list);
+
+	if (ap->fsbno > bp->fsbno)
+		return 1;
+	if (ap->fsbno < bp->fsbno)
+		return -1;
+	return 0;
+}
+
+/*
+ * Remove all the blocks mentioned in @sublist from the extents in @exlist.
+ *
+ * The intent is that callers will iterate the rmapbt for all of its records
+ * for a given owner to generate @exlist; and iterate all the blocks of the
+ * metadata structures that are not being rebuilt and have the same rmapbt
+ * owner to generate @sublist.  This routine subtracts all the extents
+ * mentioned in sublist from all the extents linked in @exlist, which leaves
+ * @exlist as the list of blocks that are not accounted for, which we assume
+ * are the dead blocks of the old metadata structure.  The blocks mentioned in
+ * @exlist can be reaped.
+ */
+#define LEFT_ALIGNED	(1 << 0)
+#define RIGHT_ALIGNED	(1 << 1)
+int
+xrep_subtract_extents(
+	struct xfs_scrub	*sc,
+	struct xrep_extent_list	*exlist,
+	struct xrep_extent_list	*sublist)
+{
+	struct list_head	*lp;
+	struct xrep_extent	*ex;
+	struct xrep_extent	*newex;
+	struct xrep_extent	*subex;
+	xfs_fsblock_t		sub_fsb;
+	xfs_extlen_t		sub_len;
+	int			state;
+	int			error = 0;
+
+	if (list_empty(&exlist->list) || list_empty(&sublist->list))
+		return 0;
+	ASSERT(!list_empty(&sublist->list));
+
+	list_sort(NULL, &exlist->list, xrep_btree_extent_cmp);
+	list_sort(NULL, &sublist->list, xrep_btree_extent_cmp);
+
+	/*
+	 * Now that we've sorted both lists, we iterate exlist once, rolling
+	 * forward through sublist and/or exlist as necessary until we find an
+	 * overlap or reach the end of either list.  We do not reset lp to the
+	 * head of exlist nor do we reset subex to the head of sublist.  The
+	 * list traversal is similar to merge sort, but we're deleting
+	 * instead.  In this manner we avoid O(n^2) operations.
+	 */
+	subex = list_first_entry(&sublist->list, struct xrep_extent,
+			list);
+	lp = exlist->list.next;
+	while (lp != &exlist->list) {
+		ex = list_entry(lp, struct xrep_extent, list);
+
+		/*
+		 * Advance subex and/or ex until we find a pair that
+		 * intersect or we run out of extents.
+		 */
+		while (subex->fsbno + subex->len <= ex->fsbno) {
+			if (list_is_last(&subex->list, &sublist->list))
+				goto out;
+			subex = list_next_entry(subex, list);
+		}
+		if (subex->fsbno >= ex->fsbno + ex->len) {
+			lp = lp->next;
+			continue;
+		}
+
+		/* trim subex to fit the extent we have */
+		sub_fsb = subex->fsbno;
+		sub_len = subex->len;
+		if (subex->fsbno < ex->fsbno) {
+			sub_len -= ex->fsbno - subex->fsbno;
+			sub_fsb = ex->fsbno;
+		}
+		if (sub_len > ex->len)
+			sub_len = ex->len;
+
+		state = 0;
+		if (sub_fsb == ex->fsbno)
+			state |= LEFT_ALIGNED;
+		if (sub_fsb + sub_len == ex->fsbno + ex->len)
+			state |= RIGHT_ALIGNED;
+		switch (state) {
+		case LEFT_ALIGNED:
+			/* Coincides with only the left. */
+			ex->fsbno += sub_len;
+			ex->len -= sub_len;
+			break;
+		case RIGHT_ALIGNED:
+			/* Coincides with only the right. */
+			ex->len -= sub_len;
+			lp = lp->next;
+			break;
+		case LEFT_ALIGNED | RIGHT_ALIGNED:
+			/* Total overlap, just delete ex. */
+			lp = lp->next;
+			list_del(&ex->list);
+			kmem_free(ex);
+			break;
+		case 0:
+			/*
+			 * Deleting from the middle: add the new right extent
+			 * and then shrink the left extent.
+			 */
+			newex = kmem_alloc(sizeof(struct xrep_extent),
+					KM_MAYFAIL);
+			if (!newex) {
+				error = -ENOMEM;
+				goto out;
+			}
+			INIT_LIST_HEAD(&newex->list);
+			newex->fsbno = sub_fsb + sub_len;
+			newex->len = ex->fsbno + ex->len - newex->fsbno;
+			list_add(&newex->list, &ex->list);
+			ex->len = sub_fsb - ex->fsbno;
+			lp = lp->next;
+			break;
+		default:
+			ASSERT(0);
+			break;
+		}
+	}
+
+out:
+	return error;
+}
+#undef LEFT_ALIGNED
+#undef RIGHT_ALIGNED
diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h
new file mode 100644
index 000000000000..1038157695a8
--- /dev/null
+++ b/fs/xfs/scrub/bitmap.h
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_BITMAP_H__
+#define __XFS_SCRUB_BITMAP_H__
+
+struct xrep_extent {
+	struct list_head	list;
+	xfs_fsblock_t		fsbno;
+	xfs_extlen_t		len;
+};
+
+struct xrep_extent_list {
+	struct list_head	list;
+};
+
+static inline void
+xrep_init_extent_list(
+	struct xrep_extent_list		*exlist)
+{
+	INIT_LIST_HEAD(&exlist->list);
+}
+
+#define for_each_xrep_extent_safe(rbe, n, exlist) \
+	list_for_each_entry_safe((rbe), (n), &(exlist)->list, list)
+int xrep_collect_btree_extent(struct xfs_scrub *sc,
+		struct xrep_extent_list *btlist, xfs_fsblock_t fsbno,
+		xfs_extlen_t len);
+void xrep_cancel_btree_extents(struct xfs_scrub *sc,
+		struct xrep_extent_list *btlist);
+int xrep_subtract_extents(struct xfs_scrub *sc,
+		struct xrep_extent_list *exlist,
+		struct xrep_extent_list *sublist);
+
+#endif	/* __XFS_SCRUB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 5de1cac424ec..27a904ef6189 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -34,6 +34,7 @@
 #include "scrub/common.h"
 #include "scrub/trace.h"
 #include "scrub/repair.h"
+#include "scrub/bitmap.h"
 
 /*
  * Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -380,200 +381,7 @@ xrep_init_btblock(
  * sublist.  As with the other btrees we subtract sublist from exlist, and the
  * result (since the rmapbt lives in the free space) are the blocks from the
  * old rmapbt.
- */
-
-/* Collect a dead btree extent for later disposal. */
-int
-xrep_collect_btree_extent(
-	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist,
-	xfs_fsblock_t		fsbno,
-	xfs_extlen_t		len)
-{
-	struct xrep_extent	*rex;
-
-	trace_xrep_collect_btree_extent(sc->mp,
-			XFS_FSB_TO_AGNO(sc->mp, fsbno),
-			XFS_FSB_TO_AGBNO(sc->mp, fsbno), len);
-
-	rex = kmem_alloc(sizeof(struct xrep_extent), KM_MAYFAIL);
-	if (!rex)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&rex->list);
-	rex->fsbno = fsbno;
-	rex->len = len;
-	list_add_tail(&rex->list, &exlist->list);
-
-	return 0;
-}
-
-/*
- * An error happened during the rebuild so the transaction will be cancelled.
- * The fs will shut down, and the administrator has to unmount and run repair.
- * Therefore, free all the memory associated with the list so we can die.
- */
-void
-xrep_cancel_btree_extents(
-	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist)
-{
-	struct xrep_extent	*rex;
-	struct xrep_extent	*n;
-
-	for_each_xrep_extent_safe(rex, n, exlist) {
-		list_del(&rex->list);
-		kmem_free(rex);
-	}
-}
-
-/* Compare two btree extents. */
-static int
-xrep_btree_extent_cmp(
-	void			*priv,
-	struct list_head	*a,
-	struct list_head	*b)
-{
-	struct xrep_extent	*ap;
-	struct xrep_extent	*bp;
-
-	ap = container_of(a, struct xrep_extent, list);
-	bp = container_of(b, struct xrep_extent, list);
-
-	if (ap->fsbno > bp->fsbno)
-		return 1;
-	if (ap->fsbno < bp->fsbno)
-		return -1;
-	return 0;
-}
-
-/*
- * Remove all the blocks mentioned in @sublist from the extents in @exlist.
  *
- * The intent is that callers will iterate the rmapbt for all of its records
- * for a given owner to generate @exlist; and iterate all the blocks of the
- * metadata structures that are not being rebuilt and have the same rmapbt
- * owner to generate @sublist.  This routine subtracts all the extents
- * mentioned in sublist from all the extents linked in @exlist, which leaves
- * @exlist as the list of blocks that are not accounted for, which we assume
- * are the dead blocks of the old metadata structure.  The blocks mentioned in
- * @exlist can be reaped.
- */
-#define LEFT_ALIGNED	(1 << 0)
-#define RIGHT_ALIGNED	(1 << 1)
-int
-xrep_subtract_extents(
-	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist,
-	struct xrep_extent_list	*sublist)
-{
-	struct list_head	*lp;
-	struct xrep_extent	*ex;
-	struct xrep_extent	*newex;
-	struct xrep_extent	*subex;
-	xfs_fsblock_t		sub_fsb;
-	xfs_extlen_t		sub_len;
-	int			state;
-	int			error = 0;
-
-	if (list_empty(&exlist->list) || list_empty(&sublist->list))
-		return 0;
-	ASSERT(!list_empty(&sublist->list));
-
-	list_sort(NULL, &exlist->list, xrep_btree_extent_cmp);
-	list_sort(NULL, &sublist->list, xrep_btree_extent_cmp);
-
-	/*
-	 * Now that we've sorted both lists, we iterate exlist once, rolling
-	 * forward through sublist and/or exlist as necessary until we find an
-	 * overlap or reach the end of either list.  We do not reset lp to the
-	 * head of exlist nor do we reset subex to the head of sublist.  The
-	 * list traversal is similar to merge sort, but we're deleting
-	 * instead.  In this manner we avoid O(n^2) operations.
-	 */
-	subex = list_first_entry(&sublist->list, struct xrep_extent,
-			list);
-	lp = exlist->list.next;
-	while (lp != &exlist->list) {
-		ex = list_entry(lp, struct xrep_extent, list);
-
-		/*
-		 * Advance subex and/or ex until we find a pair that
-		 * intersect or we run out of extents.
-		 */
-		while (subex->fsbno + subex->len <= ex->fsbno) {
-			if (list_is_last(&subex->list, &sublist->list))
-				goto out;
-			subex = list_next_entry(subex, list);
-		}
-		if (subex->fsbno >= ex->fsbno + ex->len) {
-			lp = lp->next;
-			continue;
-		}
-
-		/* trim subex to fit the extent we have */
-		sub_fsb = subex->fsbno;
-		sub_len = subex->len;
-		if (subex->fsbno < ex->fsbno) {
-			sub_len -= ex->fsbno - subex->fsbno;
-			sub_fsb = ex->fsbno;
-		}
-		if (sub_len > ex->len)
-			sub_len = ex->len;
-
-		state = 0;
-		if (sub_fsb == ex->fsbno)
-			state |= LEFT_ALIGNED;
-		if (sub_fsb + sub_len == ex->fsbno + ex->len)
-			state |= RIGHT_ALIGNED;
-		switch (state) {
-		case LEFT_ALIGNED:
-			/* Coincides with only the left. */
-			ex->fsbno += sub_len;
-			ex->len -= sub_len;
-			break;
-		case RIGHT_ALIGNED:
-			/* Coincides with only the right. */
-			ex->len -= sub_len;
-			lp = lp->next;
-			break;
-		case LEFT_ALIGNED | RIGHT_ALIGNED:
-			/* Total overlap, just delete ex. */
-			lp = lp->next;
-			list_del(&ex->list);
-			kmem_free(ex);
-			break;
-		case 0:
-			/*
-			 * Deleting from the middle: add the new right extent
-			 * and then shrink the left extent.
-			 */
-			newex = kmem_alloc(sizeof(struct xrep_extent),
-					KM_MAYFAIL);
-			if (!newex) {
-				error = -ENOMEM;
-				goto out;
-			}
-			INIT_LIST_HEAD(&newex->list);
-			newex->fsbno = sub_fsb + sub_len;
-			newex->len = ex->fsbno + ex->len - newex->fsbno;
-			list_add(&newex->list, &ex->list);
-			ex->len = sub_fsb - ex->fsbno;
-			lp = lp->next;
-			break;
-		default:
-			ASSERT(0);
-			break;
-		}
-	}
-
-out:
-	return error;
-}
-#undef LEFT_ALIGNED
-#undef RIGHT_ALIGNED
-
-/*
  * Disposal of Blocks from Old per-AG Btrees
  *
  * Now that we've constructed a new btree to replace the damaged one, we want
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 91355f6b0087..a3d491a438f4 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -27,33 +27,8 @@ int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
 		struct xfs_buf **bpp, xfs_btnum_t btnum,
 		const struct xfs_buf_ops *ops);
 
-struct xrep_extent {
-	struct list_head	list;
-	xfs_fsblock_t		fsbno;
-	xfs_extlen_t		len;
-};
-
-struct xrep_extent_list {
-	struct list_head	list;
-};
-
-static inline void
-xrep_init_extent_list(
-	struct xrep_extent_list	*exlist)
-{
-	INIT_LIST_HEAD(&exlist->list);
-}
+struct xrep_extent_list;
 
-#define for_each_xrep_extent_safe(rbe, n, exlist) \
-	list_for_each_entry_safe((rbe), (n), &(exlist)->list, list)
-int xrep_collect_btree_extent(struct xfs_scrub *sc,
-		struct xrep_extent_list *btlist, xfs_fsblock_t fsbno,
-		xfs_extlen_t len);
-void xrep_cancel_btree_extents(struct xfs_scrub *sc,
-		struct xrep_extent_list *btlist);
-int xrep_subtract_extents(struct xfs_scrub *sc,
-		struct xrep_extent_list *exlist,
-		struct xrep_extent_list *sublist);
 int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
 int xrep_invalidate_blocks(struct xfs_scrub *sc,
 		struct xrep_extent_list *btlist);
-- 
cgit v1.2.3


From fcacbc3f511338842dd177e2d53d457f9741543b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 17 Jul 2018 16:51:50 -0700
Subject: xfs: remove if_real_bytes

The field is only used for asserts, and to track if we really need to do
realloc when growing the inode fork data.  But the krealloc function
already performs this check internally, so there is no need to keep track
of the real allocation size.

This will free space in the inode fork for keeping a sequence counter of
changes to the extent list.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_inode_fork.c | 19 ++++---------------
 fs/xfs/libxfs/xfs_inode_fork.h |  1 -
 fs/xfs/xfs_inode.c             |  3 +--
 fs/xfs/xfs_inode_item.c        |  4 ----
 4 files changed, 5 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 183ec0cb8921..dee85b0f8846 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -158,7 +158,6 @@ xfs_init_local_fork(
 	}
 
 	ifp->if_bytes = size;
-	ifp->if_real_bytes = real_size;
 	ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
 	ifp->if_flags |= XFS_IFINLINE;
 }
@@ -226,7 +225,6 @@ xfs_iformat_extents(
 		return -EFSCORRUPTED;
 	}
 
-	ifp->if_real_bytes = 0;
 	ifp->if_bytes = 0;
 	ifp->if_u1.if_root = NULL;
 	ifp->if_height = 0;
@@ -317,7 +315,6 @@ xfs_iformat_btree(
 	ifp->if_flags &= ~XFS_IFEXTENTS;
 	ifp->if_flags |= XFS_IFBROOT;
 
-	ifp->if_real_bytes = 0;
 	ifp->if_bytes = 0;
 	ifp->if_u1.if_root = NULL;
 	ifp->if_height = 0;
@@ -501,7 +498,6 @@ xfs_idata_realloc(
 		 */
 		real_size = roundup(new_size, 4);
 		if (ifp->if_u1.if_data == NULL) {
-			ASSERT(ifp->if_real_bytes == 0);
 			ifp->if_u1.if_data = kmem_alloc(real_size,
 							KM_SLEEP | KM_NOFS);
 		} else {
@@ -509,15 +505,12 @@ xfs_idata_realloc(
 			 * Only do the realloc if the underlying size
 			 * is really changing.
 			 */
-			if (ifp->if_real_bytes != real_size) {
-				ifp->if_u1.if_data =
-					kmem_realloc(ifp->if_u1.if_data,
-							real_size,
-							KM_SLEEP | KM_NOFS);
-			}
+			ifp->if_u1.if_data =
+				kmem_realloc(ifp->if_u1.if_data,
+						real_size,
+						KM_SLEEP | KM_NOFS);
 		}
 	}
-	ifp->if_real_bytes = real_size;
 	ifp->if_bytes = new_size;
 	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
 }
@@ -543,17 +536,13 @@ xfs_idestroy_fork(
 	 */
 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
 		if (ifp->if_u1.if_data != NULL) {
-			ASSERT(ifp->if_real_bytes != 0);
 			kmem_free(ifp->if_u1.if_data);
 			ifp->if_u1.if_data = NULL;
-			ifp->if_real_bytes = 0;
 		}
 	} else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) {
 		xfs_iext_destroy(ifp);
 	}
 
-	ASSERT(ifp->if_real_bytes == 0);
-
 	if (whichfork == XFS_ATTR_FORK) {
 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
 		ip->i_afp = NULL;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 781b1603df5e..46242052aad0 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -14,7 +14,6 @@ struct xfs_dinode;
  */
 typedef struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
-	int			if_real_bytes;	/* bytes allocated in if_u1 */
 	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
 	short			if_broot_bytes;	/* bytes allocated for root */
 	unsigned char		if_flags;	/* per-fork flags */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 64c694d2b2a5..54fcfdfc748c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -927,7 +927,7 @@ xfs_ialloc(
 	case S_IFLNK:
 		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 		ip->i_df.if_flags = XFS_IFEXTENTS;
-		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+		ip->i_df.if_bytes = 0;
 		ip->i_df.if_u1.if_root = NULL;
 		break;
 	default:
@@ -1841,7 +1841,6 @@ xfs_inactive(
 	 * to clean up here.
 	 */
 	if (VFS_I(ip)->i_mode == 0) {
-		ASSERT(ip->i_df.if_real_bytes == 0);
 		ASSERT(ip->i_df.if_broot_bytes == 0);
 		return;
 	}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 2389c34c172d..fa1c4fe2ffbf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -194,8 +194,6 @@ xfs_inode_item_format_data_fork(
 			 * to be there by xfs_idata_realloc().
 			 */
 			data_bytes = roundup(ip->i_df.if_bytes, 4);
-			ASSERT(ip->i_df.if_real_bytes == 0 ||
-			       ip->i_df.if_real_bytes >= data_bytes);
 			ASSERT(ip->i_df.if_u1.if_data != NULL);
 			ASSERT(ip->i_d.di_size > 0);
 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
@@ -280,8 +278,6 @@ xfs_inode_item_format_attr_fork(
 			 * to be there by xfs_idata_realloc().
 			 */
 			data_bytes = roundup(ip->i_afp->if_bytes, 4);
-			ASSERT(ip->i_afp->if_real_bytes == 0 ||
-			       ip->i_afp->if_real_bytes >= data_bytes);
 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
 					ip->i_afp->if_u1.if_data,
-- 
cgit v1.2.3


From 1216b58b353fbf5529454b442cebb3c8f14d93da Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 17 Jul 2018 16:51:50 -0700
Subject: xfs: simplify xfs_idata_realloc

Streamline the code and take advantage of the fact that kmem_realloc
through krealloc will be have like a normal allocation if passing in a
NULL old pointer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_inode_fork.c | 55 +++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index dee85b0f8846..a0e3fb804605 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -468,51 +468,34 @@ xfs_iroot_realloc(
  */
 void
 xfs_idata_realloc(
-	xfs_inode_t	*ip,
-	int		byte_diff,
-	int		whichfork)
+	struct xfs_inode	*ip,
+	int			byte_diff,
+	int			whichfork)
 {
-	xfs_ifork_t	*ifp;
-	int		new_size;
-	int		real_size;
-
-	if (byte_diff == 0) {
-		return;
-	}
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
+	int			new_size = (int)ifp->if_bytes + byte_diff;
 
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	new_size = (int)ifp->if_bytes + byte_diff;
 	ASSERT(new_size >= 0);
+	ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork));
+
+	if (byte_diff == 0)
+		return;
 
 	if (new_size == 0) {
 		kmem_free(ifp->if_u1.if_data);
 		ifp->if_u1.if_data = NULL;
-		real_size = 0;
-	} else {
-		/*
-		 * Stuck with malloc/realloc.
-		 * For inline data, the underlying buffer must be
-		 * a multiple of 4 bytes in size so that it can be
-		 * logged and stay on word boundaries.  We enforce
-		 * that here.
-		 */
-		real_size = roundup(new_size, 4);
-		if (ifp->if_u1.if_data == NULL) {
-			ifp->if_u1.if_data = kmem_alloc(real_size,
-							KM_SLEEP | KM_NOFS);
-		} else {
-			/*
-			 * Only do the realloc if the underlying size
-			 * is really changing.
-			 */
-			ifp->if_u1.if_data =
-				kmem_realloc(ifp->if_u1.if_data,
-						real_size,
-						KM_SLEEP | KM_NOFS);
-		}
+		ifp->if_bytes = 0;
+		return;
 	}
+
+	/*
+	 * For inline data, the underlying buffer must be a multiple of 4 bytes
+	 * in size so that it can be logged and stay on word boundaries.
+	 * We enforce that here.
+	 */
+	ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data,
+			roundup(new_size, 4), KM_SLEEP | KM_NOFS);
 	ifp->if_bytes = new_size;
-	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
 }
 
 void
-- 
cgit v1.2.3


From 3ba738df25239f877f6a98ce1cc925fa7e924cd3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 17 Jul 2018 16:51:50 -0700
Subject: xfs: remove the xfs_ifork_t typedef

We only have a few more callers left, so seize the opportunity and kill
it off.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c  |  6 +++---
 fs/xfs/libxfs/xfs_bmap.c       | 18 +++++++++---------
 fs/xfs/libxfs/xfs_inode_fork.c |  8 ++++----
 fs/xfs/libxfs/xfs_inode_fork.h |  4 ++--
 fs/xfs/xfs_icache.c            |  2 +-
 fs/xfs/xfs_inode.h             |  6 +++---
 fs/xfs/xfs_super.c             |  2 +-
 7 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 088ffcd22fa2..4e7ef79a83c7 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -528,7 +528,7 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
 {
 	xfs_attr_sf_hdr_t *hdr;
 	xfs_inode_t *dp;
-	xfs_ifork_t *ifp;
+	struct xfs_ifork *ifp;
 
 	trace_xfs_attr_sf_create(args);
 
@@ -563,7 +563,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
 	int i, offset, size;
 	xfs_mount_t *mp;
 	xfs_inode_t *dp;
-	xfs_ifork_t *ifp;
+	struct xfs_ifork *ifp;
 
 	trace_xfs_attr_sf_add(args);
 
@@ -704,7 +704,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
 	xfs_attr_shortform_t *sf;
 	xfs_attr_sf_entry_t *sfe;
 	int i;
-	xfs_ifork_t *ifp;
+	struct xfs_ifork *ifp;
 
 	trace_xfs_attr_sf_lookup(args);
 
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 92cd064a2589..a85c0445b38f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -326,7 +326,7 @@ xfs_bmap_check_leaf_extents(
 	xfs_buf_t		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
 	xfs_extnum_t		i=0, j;	/* index into the extents list */
-	xfs_ifork_t		*ifp;	/* fork structure */
+	struct xfs_ifork	*ifp;	/* fork structure */
 	int			level;	/* btree level, for checking */
 	xfs_mount_t		*mp;	/* file system mount structure */
 	__be64			*pp;	/* pointer to block address */
@@ -594,7 +594,7 @@ xfs_bmap_btree_to_extents(
 	xfs_fsblock_t		cbno;	/* child block number */
 	xfs_buf_t		*cbp;	/* child block's buffer */
 	int			error;	/* error return value */
-	xfs_ifork_t		*ifp;	/* inode fork data */
+	struct xfs_ifork	*ifp;	/* inode fork data */
 	xfs_mount_t		*mp;	/* mount point structure */
 	__be64			*pp;	/* ptr to block address */
 	struct xfs_btree_block	*rblock;/* root btree block */
@@ -817,7 +817,7 @@ xfs_bmap_local_to_extents(
 {
 	int		error = 0;
 	int		flags;		/* logging flags returned */
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	struct xfs_ifork *ifp;		/* inode fork pointer */
 	xfs_alloc_arg_t	args;		/* allocation arguments */
 	xfs_buf_t	*bp;		/* buffer for extent block */
 	struct xfs_bmbt_irec rec;
@@ -1471,7 +1471,7 @@ xfs_bmap_one_block(
 	xfs_inode_t	*ip,		/* incore inode */
 	int		whichfork)	/* data or attr fork */
 {
-	xfs_ifork_t	*ifp;		/* inode fork pointer */
+	struct xfs_ifork *ifp;		/* inode fork pointer */
 	int		rval;		/* return value */
 	xfs_bmbt_irec_t	s;		/* internal version of extent */
 	struct xfs_iext_cursor icur;
@@ -1509,7 +1509,7 @@ xfs_bmap_add_extent_delay_real(
 	struct xfs_bmbt_irec	*new = &bma->got;
 	int			error;	/* error return value */
 	int			i;	/* temp state */
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
 					/* left is 0, right is 1, prev is 2 */
@@ -2018,7 +2018,7 @@ xfs_bmap_add_extent_unwritten_real(
 	xfs_btree_cur_t		*cur;	/* btree cursor */
 	int			error;	/* error return value */
 	int			i;	/* temp state */
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
 					/* left is 0, right is 1, prev is 2 */
@@ -2486,7 +2486,7 @@ xfs_bmap_add_extent_hole_delay(
 	struct xfs_iext_cursor	*icur,
 	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
 {
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
 	xfs_filblks_t		newlen=0;	/* new indirect size */
 	xfs_filblks_t		oldlen=0;	/* old indirect size */
@@ -4844,7 +4844,7 @@ xfs_bmap_del_extent_real(
 	struct xfs_bmbt_irec	got;	/* current extent entry */
 	xfs_fileoff_t		got_endoff;	/* first offset past got */
 	int			i;	/* temp state */
-	xfs_ifork_t		*ifp;	/* inode fork pointer */
+	struct xfs_ifork	*ifp;	/* inode fork pointer */
 	xfs_mount_t		*mp;	/* mount structure */
 	xfs_filblks_t		nblks;	/* quota/sb block count */
 	xfs_bmbt_irec_t		new;	/* new record to be inserted */
@@ -5092,7 +5092,7 @@ __xfs_bunmapi(
 	int			error;		/* error return value */
 	xfs_extnum_t		extno;		/* extent number in list */
 	struct xfs_bmbt_irec	got;		/* current extent record */
-	xfs_ifork_t		*ifp;		/* inode fork pointer */
+	struct xfs_ifork	*ifp;		/* inode fork pointer */
 	int			isrt;		/* freeing in rt area */
 	int			logflags;	/* transaction logging flags */
 	xfs_extlen_t		mod;		/* rt extent offset */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index a0e3fb804605..f9acf1d436f6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -269,7 +269,7 @@ xfs_iformat_btree(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	xfs_bmdr_block_t	*dfp;
-	xfs_ifork_t		*ifp;
+	struct xfs_ifork	*ifp;
 	/* REFERENCED */
 	int			nrecs;
 	int			size;
@@ -347,7 +347,7 @@ xfs_iroot_realloc(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	int			cur_max;
-	xfs_ifork_t		*ifp;
+	struct xfs_ifork	*ifp;
 	struct xfs_btree_block	*new_broot;
 	int			new_max;
 	size_t			new_size;
@@ -503,7 +503,7 @@ xfs_idestroy_fork(
 	xfs_inode_t	*ip,
 	int		whichfork)
 {
-	xfs_ifork_t	*ifp;
+	struct xfs_ifork	*ifp;
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	if (ifp->if_broot != NULL) {
@@ -592,7 +592,7 @@ xfs_iflush_fork(
 	int			whichfork)
 {
 	char			*cp;
-	xfs_ifork_t		*ifp;
+	struct xfs_ifork	*ifp;
 	xfs_mount_t		*mp;
 	static const short	brootflag[2] =
 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 46242052aad0..1492143371f3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -12,7 +12,7 @@ struct xfs_dinode;
 /*
  * File incore extent information, present for each of data & attr forks.
  */
-typedef struct xfs_ifork {
+struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
 	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
 	short			if_broot_bytes;	/* bytes allocated for root */
@@ -22,7 +22,7 @@ typedef struct xfs_ifork {
 		void		*if_root;	/* extent tree root */
 		char		*if_data;	/* inline file data */
 	} if_u1;
-} xfs_ifork_t;
+};
 
 /*
  * Per-fork incore inode flags.
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 8de94ecd73ae..e5591f5ebe15 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -66,7 +66,7 @@ xfs_inode_alloc(
 	ip->i_cowfp = NULL;
 	ip->i_cnextents = 0;
 	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
-	memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
+	memset(&ip->i_df, 0, sizeof(ip->i_df));
 	ip->i_flags = 0;
 	ip->i_delayed_blks = 0;
 	memset(&ip->i_d, 0, sizeof(ip->i_d));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8db34d6f2835..a67efdac70a6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -34,9 +34,9 @@ typedef struct xfs_inode {
 	struct xfs_imap		i_imap;		/* location for xfs_imap() */
 
 	/* Extent information. */
-	xfs_ifork_t		*i_afp;		/* attribute fork pointer */
-	xfs_ifork_t		*i_cowfp;	/* copy on write extents */
-	xfs_ifork_t		i_df;		/* data fork */
+	struct xfs_ifork	*i_afp;		/* attribute fork pointer */
+	struct xfs_ifork	*i_cowfp;	/* copy on write extents */
+	struct xfs_ifork	i_df;		/* data fork */
 
 	/* operations vectors */
 	const struct xfs_dir_ops *d_ops;		/* directory ops vector */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d2ebacd5975c..207ee302b1bb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1860,7 +1860,7 @@ xfs_init_zones(void)
 	if (!xfs_da_state_zone)
 		goto out_destroy_btree_cur_zone;
 
-	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+	xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
 	if (!xfs_ifork_zone)
 		goto out_destroy_da_state_zone;
 
-- 
cgit v1.2.3


From 51d626903083f7bd651d38b031775740ed41758c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 17 Jul 2018 16:51:51 -0700
Subject: xfs: introduce a new xfs_inode_has_cow_data helper

We have a few places that already check if an inode has actual data in
the COW fork to avoid work on reflink inodes that do not actually have
outstanding COW blocks.  There are a few more places that can avoid
working if doing the same check, so add a documented helper for this
condition and use it in all places where it makes sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c      |  4 ++--
 fs/xfs/xfs_bmap_util.c |  2 +-
 fs/xfs/xfs_icache.c    | 10 ++++------
 fs/xfs/xfs_inode.c     |  3 +--
 fs/xfs/xfs_inode.h     |  9 +++++++++
 fs/xfs/xfs_reflink.c   |  2 +-
 6 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f4d3252236c1..814100d27343 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -338,7 +338,7 @@ xfs_map_blocks(
 	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
 		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
 	if (imap_valid &&
-	    (!xfs_is_reflink_inode(ip) || wpc->io_type == XFS_IO_COW))
+	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
 		return 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -363,7 +363,7 @@ xfs_map_blocks(
 	 * Check if this is offset is covered by a COW extents, and if yes use
 	 * it directly instead of looking up anything in the data fork.
 	 */
-	if (xfs_is_reflink_inode(ip) &&
+	if (xfs_inode_has_cow_data(ip) &&
 	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
 	    imap.br_startoff <= offset_fsb) {
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c32ec17048f5..412dc58ae54d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1263,7 +1263,7 @@ xfs_prepare_shift(
 	 * we've flushed all the dirty data out to disk to avoid having
 	 * CoW extents at the wrong offsets.
 	 */
-	if (xfs_is_reflink_inode(ip)) {
+	if (xfs_inode_has_cow_data(ip)) {
 		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
 				true);
 		if (error)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index e5591f5ebe15..245483cc282b 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1697,14 +1697,13 @@ xfs_inode_clear_eofblocks_tag(
  */
 static bool
 xfs_prep_free_cowblocks(
-	struct xfs_inode	*ip,
-	struct xfs_ifork	*ifp)
+	struct xfs_inode	*ip)
 {
 	/*
 	 * Just clear the tag if we have an empty cow fork or none at all. It's
 	 * possible the inode was fully unshared since it was originally tagged.
 	 */
-	if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
+	if (!xfs_inode_has_cow_data(ip)) {
 		trace_xfs_inode_free_cowblocks_invalid(ip);
 		xfs_inode_clear_cowblocks_tag(ip);
 		return false;
@@ -1742,11 +1741,10 @@ xfs_inode_free_cowblocks(
 	void			*args)
 {
 	struct xfs_eofblocks	*eofb = args;
-	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	int			match;
 	int			ret = 0;
 
-	if (!xfs_prep_free_cowblocks(ip, ifp))
+	if (!xfs_prep_free_cowblocks(ip))
 		return 0;
 
 	if (eofb) {
@@ -1771,7 +1769,7 @@ xfs_inode_free_cowblocks(
 	 * Check again, nobody else should be able to dirty blocks or change
 	 * the reflink iflag now that we have the first two locks held.
 	 */
-	if (xfs_prep_free_cowblocks(ip, ifp))
+	if (xfs_prep_free_cowblocks(ip))
 		ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
 
 	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 54fcfdfc748c..5fc1815c2b62 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1832,7 +1832,6 @@ xfs_inactive(
 	xfs_inode_t	*ip)
 {
 	struct xfs_mount	*mp;
-	struct xfs_ifork	*cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	int			error;
 	int			truncate = 0;
 
@@ -1853,7 +1852,7 @@ xfs_inactive(
 		return;
 
 	/* Try to clean out the cow blocks if there are any. */
-	if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0)
+	if (xfs_inode_has_cow_data(ip))
 		xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
 
 	if (VFS_I(ip)->i_nlink != 0) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index a67efdac70a6..79a3e61a6991 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -198,6 +198,15 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
 	return ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
 }
 
+/*
+ * Check if an inode has any data in the COW fork.  This might be often false
+ * even for inodes with the reflink flag when there is no pending COW operation.
+ */
+static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
+{
+	return ip->i_cowfp && ip->i_cowfp->if_bytes;
+}
+
 /*
  * In-core inode flags.
  */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 68b6921dc3f6..9a0a56526266 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -478,7 +478,7 @@ xfs_reflink_cancel_cow_blocks(
 	struct xfs_iext_cursor		icur;
 	int				error = 0;
 
-	if (!xfs_is_reflink_inode(ip))
+	if (!xfs_inode_has_cow_data(ip))
 		return 0;
 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
 		return 0;
-- 
cgit v1.2.3


From b41e44b4cb230747d7ad56e38c9dc65369b1d381 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 30 May 2018 11:07:03 -0400
Subject: media: dvb/video.h: get rid of unused APIs

There are a number of other ioctls that aren't used anywhere
inside the Kernel tree.

Get rid of them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../media/uapi/dvb/video-get-frame-rate.rst        |  61 -----------
 Documentation/media/uapi/dvb/video-get-navi.rst    |  84 ---------------
 .../media/uapi/dvb/video-set-attributes.rst        |  93 -----------------
 .../media/uapi/dvb/video-set-highlight.rst         |  86 ----------------
 Documentation/media/uapi/dvb/video-set-id.rst      |  75 --------------
 Documentation/media/uapi/dvb/video-set-spu.rst     |  85 ----------------
 Documentation/media/uapi/dvb/video-set-system.rst  |  77 --------------
 .../media/uapi/dvb/video_function_calls.rst        |   6 --
 Documentation/media/uapi/dvb/video_types.rst       | 113 ---------------------
 Documentation/media/video.h.rst.exceptions         |   2 -
 fs/compat_ioctl.c                                  |   7 --
 include/uapi/linux/dvb/video.h                     |  51 ----------
 12 files changed, 740 deletions(-)
 delete mode 100644 Documentation/media/uapi/dvb/video-get-frame-rate.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-get-navi.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-attributes.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-highlight.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-id.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-spu.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-system.rst

(limited to 'fs')

diff --git a/Documentation/media/uapi/dvb/video-get-frame-rate.rst b/Documentation/media/uapi/dvb/video-get-frame-rate.rst
deleted file mode 100644
index 400042a854cf..000000000000
--- a/Documentation/media/uapi/dvb/video-get-frame-rate.rst
+++ /dev/null
@@ -1,61 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_GET_FRAME_RATE:
-
-====================
-VIDEO_GET_FRAME_RATE
-====================
-
-Name
-----
-
-VIDEO_GET_FRAME_RATE
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(int fd, VIDEO_GET_FRAME_RATE, unsigned int *rate)
-    :name: VIDEO_GET_FRAME_RATE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_GET_FRAME_RATE for this command.
-
-    -  .. row 3
-
-       -  unsigned int \*rate
-
-       -  Returns the framerate in number of frames per 1000 seconds.
-
-
-Description
------------
-
-This ioctl call asks the Video Device to return the current framerate.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/media/uapi/dvb/video-get-navi.rst b/Documentation/media/uapi/dvb/video-get-navi.rst
deleted file mode 100644
index 114a9ac48b9e..000000000000
--- a/Documentation/media/uapi/dvb/video-get-navi.rst
+++ /dev/null
@@ -1,84 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_GET_NAVI:
-
-==============
-VIDEO_GET_NAVI
-==============
-
-Name
-----
-
-VIDEO_GET_NAVI
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_GET_NAVI , struct video_navi_pack *navipack)
-    :name: VIDEO_GET_NAVI
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_GET_NAVI for this command.
-
-    -  .. row 3
-
-       -  video_navi_pack_t \*navipack
-
-       -  PCI or DSI pack (private stream 2) according to section ??.
-
-
-Description
------------
-
-This ioctl returns navigational information from the DVD stream. This is
-especially needed if an encoded stream has to be decoded by the
-hardware.
-
-.. c:type:: video_navi_pack
-
-.. code-block::c
-
-	typedef struct video_navi_pack {
-		int length;          /* 0 ... 1024 */
-		__u8 data[1024];
-	} video_navi_pack_t;
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EFAULT``
-
-       -  driver is not able to return navigational information
diff --git a/Documentation/media/uapi/dvb/video-set-attributes.rst b/Documentation/media/uapi/dvb/video-set-attributes.rst
deleted file mode 100644
index b2f11a6746e9..000000000000
--- a/Documentation/media/uapi/dvb/video-set-attributes.rst
+++ /dev/null
@@ -1,93 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_ATTRIBUTES:
-
-====================
-VIDEO_SET_ATTRIBUTES
-====================
-
-Name
-----
-
-VIDEO_SET_ATTRIBUTES
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_ATTRIBUTE ,video_attributes_t vattr)
-    :name: VIDEO_SET_ATTRIBUTE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_ATTRIBUTE for this command.
-
-    -  .. row 3
-
-       -  video_attributes_t vattr
-
-       -  video attributes according to section ??.
-
-
-Description
------------
-
-This ioctl is intended for DVD playback and allows you to set certain
-information about the stream. Some hardware may not need this
-information, but the call also tells the hardware to prepare for DVD
-playback.
-
-.. c:type:: video_attributes_t
-
-.. code-block::c
-
-	typedef __u16 video_attributes_t;
-	/*   bits: descr. */
-	/*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
-	/*   13-12 TV system (0=525/60, 1=625/50) */
-	/*   11-10 Aspect ratio (0=4:3, 3=16:9) */
-	/*    9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */
-	/*    7    line 21-1 data present in GOP (1=yes, 0=no) */
-	/*    6    line 21-2 data present in GOP (1=yes, 0=no) */
-	/*    5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */
-	/*    2    source letterboxed (1=yes, 0=no) */
-	/*    0    film/camera mode (0=camera, 1=film (625/50 only)) */
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  input is not a valid attribute setting.
diff --git a/Documentation/media/uapi/dvb/video-set-highlight.rst b/Documentation/media/uapi/dvb/video-set-highlight.rst
deleted file mode 100644
index 90aeafd923b7..000000000000
--- a/Documentation/media/uapi/dvb/video-set-highlight.rst
+++ /dev/null
@@ -1,86 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_HIGHLIGHT:
-
-===================
-VIDEO_SET_HIGHLIGHT
-===================
-
-Name
-----
-
-VIDEO_SET_HIGHLIGHT
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_HIGHLIGHT, struct video_highlight *vhilite)
-    :name: VIDEO_SET_HIGHLIGHT
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_HIGHLIGHT for this command.
-
-    -  .. row 3
-
-       -  video_highlight_t \*vhilite
-
-       -  SPU Highlight information according to section ??.
-
-
-Description
------------
-
-This ioctl sets the SPU highlight information for the menu access of a
-DVD.
-
-.. c:type:: video_highlight
-
-.. code-block:: c
-
-	typedef
-	struct video_highlight {
-		int     active;      /*    1=show highlight, 0=hide highlight */
-		__u8    contrast1;   /*    7- 4  Pattern pixel contrast */
-				/*    3- 0  Background pixel contrast */
-		__u8    contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-				/*    3- 0  Emphasis pixel-1 contrast */
-		__u8    color1;      /*    7- 4  Pattern pixel color */
-				/*    3- 0  Background pixel color */
-		__u8    color2;      /*    7- 4  Emphasis pixel-2 color */
-				/*    3- 0  Emphasis pixel-1 color */
-		__u32    ypos;       /*   23-22  auto action mode */
-				/*   21-12  start y */
-				/*    9- 0  end y */
-		__u32    xpos;       /*   23-22  button color number */
-				/*   21-12  start x */
-				/*    9- 0  end x */
-	} video_highlight_t;
-
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/media/uapi/dvb/video-set-id.rst b/Documentation/media/uapi/dvb/video-set-id.rst
deleted file mode 100644
index 18f66875ae3f..000000000000
--- a/Documentation/media/uapi/dvb/video-set-id.rst
+++ /dev/null
@@ -1,75 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_ID:
-
-============
-VIDEO_SET_ID
-============
-
-Name
-----
-
-VIDEO_SET_ID
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(int fd, VIDEO_SET_ID, int id)
-    :name: VIDEO_SET_ID
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_ID for this command.
-
-    -  .. row 3
-
-       -  int id
-
-       -  video sub-stream id
-
-
-Description
------------
-
-This ioctl selects which sub-stream is to be decoded if a program or
-system stream is sent to the video device.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  Invalid sub-stream id.
diff --git a/Documentation/media/uapi/dvb/video-set-spu.rst b/Documentation/media/uapi/dvb/video-set-spu.rst
deleted file mode 100644
index 739e5e7bd133..000000000000
--- a/Documentation/media/uapi/dvb/video-set-spu.rst
+++ /dev/null
@@ -1,85 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_SPU:
-
-=============
-VIDEO_SET_SPU
-=============
-
-Name
-----
-
-VIDEO_SET_SPU
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_SPU , struct video_spu *spu)
-    :name: VIDEO_SET_SPU
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_SPU for this command.
-
-    -  .. row 3
-
-       -  video_spu_t \*spu
-
-       -  SPU decoding (de)activation and subid setting according to section
-	  ??.
-
-
-Description
------------
-
-This ioctl activates or deactivates SPU decoding in a DVD input stream.
-It can only be used, if the driver is able to handle a DVD stream.
-
-.. c:type:: struct video_spu
-
-.. code-block:: c
-
-	typedef struct video_spu {
-		int active;
-		int stream_id;
-	} video_spu_t;
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  input is not a valid spu setting or driver cannot handle SPU.
diff --git a/Documentation/media/uapi/dvb/video-set-system.rst b/Documentation/media/uapi/dvb/video-set-system.rst
deleted file mode 100644
index e39cbe080ef7..000000000000
--- a/Documentation/media/uapi/dvb/video-set-system.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_SYSTEM:
-
-================
-VIDEO_SET_SYSTEM
-================
-
-Name
-----
-
-VIDEO_SET_SYSTEM
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_SYSTEM , video_system_t system)
-    :name: VIDEO_SET_SYSTEM
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_FORMAT for this command.
-
-    -  .. row 3
-
-       -  video_system_t system
-
-       -  video system of TV output.
-
-
-Description
------------
-
-This ioctl sets the television output format. The format (see section
-??) may vary from the color format of the displayed MPEG stream. If the
-hardware is not able to display the requested format the call will
-return an error.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  system is not a valid or supported video system.
diff --git a/Documentation/media/uapi/dvb/video_function_calls.rst b/Documentation/media/uapi/dvb/video_function_calls.rst
index 8d8383ffaeba..3f4f6c9ffad7 100644
--- a/Documentation/media/uapi/dvb/video_function_calls.rst
+++ b/Documentation/media/uapi/dvb/video_function_calls.rst
@@ -21,7 +21,6 @@ Video Function Calls
     video-get-status
     video-get-frame-count
     video-get-pts
-    video-get-frame-rate
     video-get-event
     video-command
     video-try-command
@@ -31,12 +30,7 @@ Video Function Calls
     video-fast-forward
     video-slowmotion
     video-get-capabilities
-    video-set-id
     video-clear-buffer
     video-set-streamtype
     video-set-format
-    video-set-system
-    video-set-highlight
-    video-set-spu
-    video-get-navi
     video-set-attributes
diff --git a/Documentation/media/uapi/dvb/video_types.rst b/Documentation/media/uapi/dvb/video_types.rst
index 4cfa00e5c934..a0942171596c 100644
--- a/Documentation/media/uapi/dvb/video_types.rst
+++ b/Documentation/media/uapi/dvb/video_types.rst
@@ -246,116 +246,3 @@ following bits set according to the hardwares capabilities.
      #define VIDEO_CAP_SPU    16
      #define VIDEO_CAP_NAVI   32
      #define VIDEO_CAP_CSS    64
-
-
-.. _video-system:
-
-video_system_t
-==============
-
-A call to VIDEO_SET_SYSTEM sets the desired video system for TV
-output. The following system types can be set:
-
-
-.. code-block:: c
-
-    typedef enum {
-	 VIDEO_SYSTEM_PAL,
-	 VIDEO_SYSTEM_NTSC,
-	 VIDEO_SYSTEM_PALN,
-	 VIDEO_SYSTEM_PALNc,
-	 VIDEO_SYSTEM_PALM,
-	 VIDEO_SYSTEM_NTSC60,
-	 VIDEO_SYSTEM_PAL60,
-	 VIDEO_SYSTEM_PALM60
-    } video_system_t;
-
-
-.. c:type:: video_highlight
-
-struct video_highlight
-======================
-
-Calling the ioctl VIDEO_SET_HIGHLIGHTS posts the SPU highlight
-information. The call expects the following format for that information:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_highlight {
-	 boolean active;      /*    1=show highlight, 0=hide highlight */
-	 uint8_t contrast1;   /*    7- 4  Pattern pixel contrast */
-		      /*    3- 0  Background pixel contrast */
-	 uint8_t contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-		      /*    3- 0  Emphasis pixel-1 contrast */
-	 uint8_t color1;      /*    7- 4  Pattern pixel color */
-		      /*    3- 0  Background pixel color */
-	 uint8_t color2;      /*    7- 4  Emphasis pixel-2 color */
-		      /*    3- 0  Emphasis pixel-1 color */
-	 uint32_t ypos;       /*   23-22  auto action mode */
-		      /*   21-12  start y */
-		      /*    9- 0  end y */
-	 uint32_t xpos;       /*   23-22  button color number */
-		      /*   21-12  start x */
-		      /*    9- 0  end x */
-     } video_highlight_t;
-
-
-.. c:type:: video_spu
-
-struct video_spu
-================
-
-Calling VIDEO_SET_SPU deactivates or activates SPU decoding, according
-to the following format:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_spu {
-	 boolean active;
-	 int stream_id;
-     } video_spu_t;
-
-
-.. c:type:: video_navi_pack
-
-struct video_navi_pack
-======================
-
-In order to get the navigational data the following structure has to be
-passed to the ioctl VIDEO_GET_NAVI:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_navi_pack {
-	 int length;         /* 0 ... 1024 */
-	 uint8_t data[1024];
-     } video_navi_pack_t;
-
-
-.. _video-attributes-t:
-
-video_attributes_t
-==================
-
-The following attributes can be set by a call to VIDEO_SET_ATTRIBUTES:
-
-
-.. code-block:: c
-
-     typedef uint16_t video_attributes_t;
-     /*   bits: descr. */
-     /*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
-     /*   13-12 TV system (0=525/60, 1=625/50) */
-     /*   11-10 Aspect ratio (0=4:3, 3=16:9) */
-     /*    9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */
-     /*    7    line 21-1 data present in GOP (1=yes, 0=no) */
-     /*    6    line 21-2 data present in GOP (1=yes, 0=no) */
-     /*    5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */
-     /*    2    source letterboxed (1=yes, 0=no) */
-     /*    0    film/camera mode (0=camera, 1=film (625/50 only)) */
diff --git a/Documentation/media/video.h.rst.exceptions b/Documentation/media/video.h.rst.exceptions
index 89d7c3ef2da7..371cdbd7d062 100644
--- a/Documentation/media/video.h.rst.exceptions
+++ b/Documentation/media/video.h.rst.exceptions
@@ -34,6 +34,4 @@ replace typedef video_displayformat_t :c:type:`video_displayformat`
 replace typedef video_size_t :c:type:`video_size`
 replace typedef video_stream_source_t :c:type:`video_stream_source`
 replace typedef video_play_state_t :c:type:`video_play_state`
-replace typedef video_highlight_t :c:type:`video_highlight`
-replace typedef video_spu_t :c:type:`video_spu`
 replace typedef video_navi_pack_t :c:type:`video_navi_pack`
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index fdb5ef9b5d06..59216b172003 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1205,16 +1205,9 @@ COMPATIBLE_IOCTL(VIDEO_FAST_FORWARD)
 COMPATIBLE_IOCTL(VIDEO_SLOWMOTION)
 COMPATIBLE_IOCTL(VIDEO_GET_CAPABILITIES)
 COMPATIBLE_IOCTL(VIDEO_CLEAR_BUFFER)
-COMPATIBLE_IOCTL(VIDEO_SET_ID)
 COMPATIBLE_IOCTL(VIDEO_SET_STREAMTYPE)
 COMPATIBLE_IOCTL(VIDEO_SET_FORMAT)
-COMPATIBLE_IOCTL(VIDEO_SET_SYSTEM)
-COMPATIBLE_IOCTL(VIDEO_SET_HIGHLIGHT)
-COMPATIBLE_IOCTL(VIDEO_SET_SPU)
-COMPATIBLE_IOCTL(VIDEO_GET_NAVI)
-COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES)
 COMPATIBLE_IOCTL(VIDEO_GET_SIZE)
-COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE)
 /* cec */
 COMPATIBLE_IOCTL(CEC_ADAP_G_CAPS)
 COMPATIBLE_IOCTL(CEC_ADAP_G_LOG_ADDRS)
diff --git a/include/uapi/linux/dvb/video.h b/include/uapi/linux/dvb/video.h
index 6a0c9757b7ba..43ba8b0a3d14 100644
--- a/include/uapi/linux/dvb/video.h
+++ b/include/uapi/linux/dvb/video.h
@@ -37,18 +37,6 @@ typedef enum {
 } video_format_t;
 
 
-typedef enum {
-	 VIDEO_SYSTEM_PAL,
-	 VIDEO_SYSTEM_NTSC,
-	 VIDEO_SYSTEM_PALN,
-	 VIDEO_SYSTEM_PALNc,
-	 VIDEO_SYSTEM_PALM,
-	 VIDEO_SYSTEM_NTSC60,
-	 VIDEO_SYSTEM_PAL60,
-	 VIDEO_SYSTEM_PALM60
-} video_system_t;
-
-
 typedef enum {
 	VIDEO_PAN_SCAN,       /* use pan and scan format */
 	VIDEO_LETTER_BOX,     /* use letterbox format */
@@ -160,38 +148,6 @@ struct video_still_picture {
 };
 
 
-typedef
-struct video_highlight {
-	int     active;      /*    1=show highlight, 0=hide highlight */
-	__u8    contrast1;   /*    7- 4  Pattern pixel contrast */
-			     /*    3- 0  Background pixel contrast */
-	__u8    contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-			     /*    3- 0  Emphasis pixel-1 contrast */
-	__u8    color1;      /*    7- 4  Pattern pixel color */
-			     /*    3- 0  Background pixel color */
-	__u8    color2;      /*    7- 4  Emphasis pixel-2 color */
-			     /*    3- 0  Emphasis pixel-1 color */
-	__u32    ypos;       /*   23-22  auto action mode */
-			     /*   21-12  start y */
-			     /*    9- 0  end y */
-	__u32    xpos;       /*   23-22  button color number */
-			     /*   21-12  start x */
-			     /*    9- 0  end x */
-} video_highlight_t;
-
-
-typedef struct video_spu {
-	int active;
-	int stream_id;
-} video_spu_t;
-
-
-typedef struct video_navi_pack {
-	int length;          /* 0 ... 1024 */
-	__u8 data[1024];
-} video_navi_pack_t;
-
-
 typedef __u16 video_attributes_t;
 /*   bits: descr. */
 /*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
@@ -236,16 +192,9 @@ typedef __u16 video_attributes_t;
 #define VIDEO_SLOWMOTION           _IO('o', 32)
 #define VIDEO_GET_CAPABILITIES     _IOR('o', 33, unsigned int)
 #define VIDEO_CLEAR_BUFFER         _IO('o',  34)
-#define VIDEO_SET_ID               _IO('o', 35)
 #define VIDEO_SET_STREAMTYPE       _IO('o', 36)
 #define VIDEO_SET_FORMAT           _IO('o', 37)
-#define VIDEO_SET_SYSTEM           _IO('o', 38)
-#define VIDEO_SET_HIGHLIGHT        _IOW('o', 39, video_highlight_t)
-#define VIDEO_SET_SPU              _IOW('o', 50, video_spu_t)
-#define VIDEO_GET_NAVI             _IOR('o', 52, video_navi_pack_t)
-#define VIDEO_SET_ATTRIBUTES       _IO('o', 53)
 #define VIDEO_GET_SIZE             _IOR('o', 55, video_size_t)
-#define VIDEO_GET_FRAME_RATE       _IOR('o', 56, unsigned int)
 
 /**
  * VIDEO_GET_PTS
-- 
cgit v1.2.3


From d21c249b26311dd193b100e65fc9e7ae96233d40 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 30 May 2018 11:07:04 -0400
Subject: media: dvb/audio.h: get rid of unused APIs

There are a number of other ioctls that aren't used anywhere
inside the Kernel tree.

Get rid of them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/audio.h.rst.exceptions         |  3 -
 Documentation/media/uapi/dvb/audio-get-pts.rst     | 65 ---------------------
 .../media/uapi/dvb/audio-set-attributes.rst        | 67 ----------------------
 Documentation/media/uapi/dvb/audio-set-ext-id.rst  | 66 ---------------------
 Documentation/media/uapi/dvb/audio-set-karaoke.rst | 66 ---------------------
 Documentation/media/uapi/dvb/audio_data_types.rst  | 37 ------------
 .../media/uapi/dvb/audio_function_calls.rst        |  4 --
 fs/compat_ioctl.c                                  |  3 -
 include/uapi/linux/dvb/audio.h                     | 37 ------------
 9 files changed, 348 deletions(-)
 delete mode 100644 Documentation/media/uapi/dvb/audio-get-pts.rst
 delete mode 100644 Documentation/media/uapi/dvb/audio-set-attributes.rst
 delete mode 100644 Documentation/media/uapi/dvb/audio-set-ext-id.rst
 delete mode 100644 Documentation/media/uapi/dvb/audio-set-karaoke.rst

(limited to 'fs')

diff --git a/Documentation/media/audio.h.rst.exceptions b/Documentation/media/audio.h.rst.exceptions
index f40f3cbfe4c9..940458774cf6 100644
--- a/Documentation/media/audio.h.rst.exceptions
+++ b/Documentation/media/audio.h.rst.exceptions
@@ -1,9 +1,6 @@
 # Ignore header name
 ignore define _DVBAUDIO_H_
 
-# Typedef pointing to structs
-replace typedef audio_karaoke_t :c:type:`audio_karaoke`
-
 # Undocumented audio caps, as this is a deprecated API anyway
 ignore define AUDIO_CAP_DTS
 ignore define AUDIO_CAP_LPCM
diff --git a/Documentation/media/uapi/dvb/audio-get-pts.rst b/Documentation/media/uapi/dvb/audio-get-pts.rst
deleted file mode 100644
index 2d1396b003de..000000000000
--- a/Documentation/media/uapi/dvb/audio-get-pts.rst
+++ /dev/null
@@ -1,65 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_GET_PTS:
-
-=============
-AUDIO_GET_PTS
-=============
-
-Name
-----
-
-AUDIO_GET_PTS
-
-.. attention:: This ioctl is deprecated
-
-Synopsis
---------
-
-.. c:function:: int ioctl(int fd, AUDIO_GET_PTS, __u64 *pts)
-    :name: AUDIO_GET_PTS
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  __u64 \*pts
-
-       -  Returns the 33-bit timestamp as defined in ITU T-REC-H.222.0 /
-	  ISO/IEC 13818-1.
-
-	  The PTS should belong to the currently played frame if possible,
-	  but may also be a value close to it like the PTS of the last
-	  decoded frame or the last PTS extracted by the PES parser.
-
-
-Description
------------
-
-This ioctl is obsolete. Do not use in new drivers. If you need this
-functionality, then please contact the linux-media mailing list
-(`https://linuxtv.org/lists.php <https://linuxtv.org/lists.php>`__).
-
-This ioctl call asks the Audio Device to return the current PTS
-timestamp.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/media/uapi/dvb/audio-set-attributes.rst b/Documentation/media/uapi/dvb/audio-set-attributes.rst
deleted file mode 100644
index f0c6153ca80f..000000000000
--- a/Documentation/media/uapi/dvb/audio-set-attributes.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_SET_ATTRIBUTES:
-
-====================
-AUDIO_SET_ATTRIBUTES
-====================
-
-Name
-----
-
-AUDIO_SET_ATTRIBUTES
-
-.. attention:: This ioctl is deprecated
-
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, AUDIO_SET_ATTRIBUTES, struct audio_attributes *attr )
-    :name: AUDIO_SET_ATTRIBUTES
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  audio_attributes_t attr
-
-       -  audio attributes according to section ??
-
-
-Description
------------
-
-This ioctl is intended for DVD playback and allows you to set certain
-information about the audio stream.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  attr is not a valid or supported attribute setting.
diff --git a/Documentation/media/uapi/dvb/audio-set-ext-id.rst b/Documentation/media/uapi/dvb/audio-set-ext-id.rst
deleted file mode 100644
index 8503c47f26bd..000000000000
--- a/Documentation/media/uapi/dvb/audio-set-ext-id.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_SET_EXT_ID:
-
-================
-AUDIO_SET_EXT_ID
-================
-
-Name
-----
-
-AUDIO_SET_EXT_ID
-
-.. attention:: This ioctl is deprecated
-
-Synopsis
---------
-
-.. c:function:: int  ioctl(fd, AUDIO_SET_EXT_ID, int id)
-    :name: AUDIO_SET_EXT_ID
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  int id
-
-       -  audio sub_stream_id
-
-
-Description
------------
-
-This ioctl can be used to set the extension id for MPEG streams in DVD
-playback. Only the first 3 bits are recognized.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  id is not a valid id.
diff --git a/Documentation/media/uapi/dvb/audio-set-karaoke.rst b/Documentation/media/uapi/dvb/audio-set-karaoke.rst
deleted file mode 100644
index c759952d88aa..000000000000
--- a/Documentation/media/uapi/dvb/audio-set-karaoke.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_SET_KARAOKE:
-
-=================
-AUDIO_SET_KARAOKE
-=================
-
-Name
-----
-
-AUDIO_SET_KARAOKE
-
-.. attention:: This ioctl is deprecated
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, AUDIO_SET_KARAOKE, struct audio_karaoke *karaoke)
-    :name: AUDIO_SET_KARAOKE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  audio_karaoke_t \*karaoke
-
-       -  karaoke settings according to section ??.
-
-
-Description
------------
-
-This ioctl allows one to set the mixer settings for a karaoke DVD.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  karaoke is not a valid or supported karaoke setting.
diff --git a/Documentation/media/uapi/dvb/audio_data_types.rst b/Documentation/media/uapi/dvb/audio_data_types.rst
index 6b93359d64f7..5bffa2c98a24 100644
--- a/Documentation/media/uapi/dvb/audio_data_types.rst
+++ b/Documentation/media/uapi/dvb/audio_data_types.rst
@@ -114,40 +114,3 @@ following bits set according to the hardwares capabilities.
      #define AUDIO_CAP_OGG   64
      #define AUDIO_CAP_SDDS 128
      #define AUDIO_CAP_AC3  256
-
-.. c:type:: audio_karaoke
-
-The ioctl AUDIO_SET_KARAOKE uses the following format:
-
-
-.. code-block:: c
-
-    typedef
-    struct audio_karaoke {
-	int vocal1;
-	int vocal2;
-	int melody;
-    } audio_karaoke_t;
-
-If Vocal1 or Vocal2 are non-zero, they get mixed into left and right t
-at 70% each. If both, Vocal1 and Vocal2 are non-zero, Vocal1 gets mixed
-into the left channel and Vocal2 into the right channel at 100% each. Ff
-Melody is non-zero, the melody channel gets mixed into left and right.
-
-
-.. c:type:: audio_attributes
-
-The following attributes can be set by a call to AUDIO_SET_ATTRIBUTES:
-
-
-.. code-block:: c
-
-     typedef uint16_t audio_attributes_t;
-     /*   bits: descr. */
-     /*   15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, */
-     /*   12    multichannel extension */
-     /*   11-10 audio type (0=not spec, 1=language included) */
-     /*    9- 8 audio application mode (0=not spec, 1=karaoke, 2=surround) */
-     /*    7- 6 Quantization / DRC (mpeg audio: 1=DRC exists)(lpcm: 0=16bit,  */
-     /*    5- 4 Sample frequency fs (0=48kHz, 1=96kHz) */
-     /*    2- 0 number of audio channels (n+1 channels) */
diff --git a/Documentation/media/uapi/dvb/audio_function_calls.rst b/Documentation/media/uapi/dvb/audio_function_calls.rst
index 0bb56f0cfed4..7dba16285dab 100644
--- a/Documentation/media/uapi/dvb/audio_function_calls.rst
+++ b/Documentation/media/uapi/dvb/audio_function_calls.rst
@@ -22,13 +22,9 @@ Audio Function Calls
     audio-set-bypass-mode
     audio-channel-select
     audio-bilingual-channel-select
-    audio-get-pts
     audio-get-status
     audio-get-capabilities
     audio-clear-buffer
     audio-set-id
     audio-set-mixer
     audio-set-streamtype
-    audio-set-ext-id
-    audio-set-attributes
-    audio-set-karaoke
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 59216b172003..a9b00942e87d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1178,9 +1178,6 @@ COMPATIBLE_IOCTL(AUDIO_CLEAR_BUFFER)
 COMPATIBLE_IOCTL(AUDIO_SET_ID)
 COMPATIBLE_IOCTL(AUDIO_SET_MIXER)
 COMPATIBLE_IOCTL(AUDIO_SET_STREAMTYPE)
-COMPATIBLE_IOCTL(AUDIO_SET_EXT_ID)
-COMPATIBLE_IOCTL(AUDIO_SET_ATTRIBUTES)
-COMPATIBLE_IOCTL(AUDIO_SET_KARAOKE)
 COMPATIBLE_IOCTL(DMX_START)
 COMPATIBLE_IOCTL(DMX_STOP)
 COMPATIBLE_IOCTL(DMX_SET_FILTER)
diff --git a/include/uapi/linux/dvb/audio.h b/include/uapi/linux/dvb/audio.h
index 69f7a85d81b1..afeae063e640 100644
--- a/include/uapi/linux/dvb/audio.h
+++ b/include/uapi/linux/dvb/audio.h
@@ -67,27 +67,6 @@ typedef struct audio_status {
 } audio_status_t;                              /* separate decoder hardware */
 
 
-typedef
-struct audio_karaoke {  /* if Vocal1 or Vocal2 are non-zero, they get mixed  */
-	int vocal1;    /* into left and right t at 70% each */
-	int vocal2;    /* if both, Vocal1 and Vocal2 are non-zero, Vocal1 gets*/
-	int melody;    /* mixed into the left channel and */
-		       /* Vocal2 into the right channel at 100% each. */
-		       /* if Melody is non-zero, the melody channel gets mixed*/
-} audio_karaoke_t;     /* into left and right  */
-
-
-typedef __u16 audio_attributes_t;
-/*   bits: descr. */
-/*   15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, */
-/*   12    multichannel extension */
-/*   11-10 audio type (0=not spec, 1=language included) */
-/*    9- 8 audio application mode (0=not spec, 1=karaoke, 2=surround) */
-/*    7- 6 Quantization / DRC (mpeg audio: 1=DRC exists)(lpcm: 0=16bit,  */
-/*    5- 4 Sample frequency fs (0=48kHz, 1=96kHz) */
-/*    2- 0 number of audio channels (n+1 channels) */
-
-
 /* for GET_CAPABILITIES and SET_FORMAT, the latter should only set one bit */
 #define AUDIO_CAP_DTS    1
 #define AUDIO_CAP_LPCM   2
@@ -115,22 +94,6 @@ typedef __u16 audio_attributes_t;
 #define AUDIO_SET_ID               _IO('o', 13)
 #define AUDIO_SET_MIXER            _IOW('o', 14, audio_mixer_t)
 #define AUDIO_SET_STREAMTYPE       _IO('o', 15)
-#define AUDIO_SET_EXT_ID           _IO('o', 16)
-#define AUDIO_SET_ATTRIBUTES       _IOW('o', 17, audio_attributes_t)
-#define AUDIO_SET_KARAOKE          _IOW('o', 18, audio_karaoke_t)
-
-/**
- * AUDIO_GET_PTS
- *
- * Read the 33 bit presentation time stamp as defined
- * in ITU T-REC-H.222.0 / ISO/IEC 13818-1.
- *
- * The PTS should belong to the currently played
- * frame if possible, but may also be a value close to it
- * like the PTS of the last decoded frame or the last PTS
- * extracted by the PES parser.
- */
-#define AUDIO_GET_PTS              _IOR('o', 19, __u64)
 #define AUDIO_BILINGUAL_CHANNEL_SELECT _IO('o', 20)
 
 #endif /* _DVBAUDIO_H_ */
-- 
cgit v1.2.3


From 86d969b425d7ecf774799b70142b957dc267575b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 30 Jul 2018 11:18:13 -0700
Subject: xfs: refactor the xrep_extent_list into xfs_bitmap

As mentioned previously, the xrep_extent_list basically implements a
bitmap with two functions: set and disjoint union.  Rename all these
functions to xfs_bitmap to shorten the name and make it more obvious
what we're doing.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/bitmap.c | 183 +++++++++++++++++++++++++-------------------------
 fs/xfs/scrub/bitmap.h |  35 +++++-----
 fs/xfs/scrub/repair.c |  85 +++++++++++------------
 fs/xfs/scrub/repair.h |   8 +--
 fs/xfs/scrub/trace.h  |   1 -
 5 files changed, 149 insertions(+), 163 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index a7c2f4773f98..c770e2d0b6aa 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -16,183 +16,186 @@
 #include "scrub/repair.h"
 #include "scrub/bitmap.h"
 
-/* Collect a dead btree extent for later disposal. */
+/*
+ * Set a range of this bitmap.  Caller must ensure the range is not set.
+ *
+ * This is the logical equivalent of bitmap |= mask(start, len).
+ */
 int
-xrep_collect_btree_extent(
-	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist,
-	xfs_fsblock_t		fsbno,
-	xfs_extlen_t		len)
+xfs_bitmap_set(
+	struct xfs_bitmap	*bitmap,
+	uint64_t		start,
+	uint64_t		len)
 {
-	struct xrep_extent	*rex;
+	struct xfs_bitmap_range	*bmr;
 
-	trace_xrep_collect_btree_extent(sc->mp,
-			XFS_FSB_TO_AGNO(sc->mp, fsbno),
-			XFS_FSB_TO_AGBNO(sc->mp, fsbno), len);
-
-	rex = kmem_alloc(sizeof(struct xrep_extent), KM_MAYFAIL);
-	if (!rex)
+	bmr = kmem_alloc(sizeof(struct xfs_bitmap_range), KM_MAYFAIL);
+	if (!bmr)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&rex->list);
-	rex->fsbno = fsbno;
-	rex->len = len;
-	list_add_tail(&rex->list, &exlist->list);
+	INIT_LIST_HEAD(&bmr->list);
+	bmr->start = start;
+	bmr->len = len;
+	list_add_tail(&bmr->list, &bitmap->list);
 
 	return 0;
 }
 
-/*
- * An error happened during the rebuild so the transaction will be cancelled.
- * The fs will shut down, and the administrator has to unmount and run repair.
- * Therefore, free all the memory associated with the list so we can die.
- */
+/* Free everything related to this bitmap. */
 void
-xrep_cancel_btree_extents(
-	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist)
+xfs_bitmap_destroy(
+	struct xfs_bitmap	*bitmap)
 {
-	struct xrep_extent	*rex;
-	struct xrep_extent	*n;
+	struct xfs_bitmap_range	*bmr;
+	struct xfs_bitmap_range	*n;
 
-	for_each_xrep_extent_safe(rex, n, exlist) {
-		list_del(&rex->list);
-		kmem_free(rex);
+	for_each_xfs_bitmap_extent(bmr, n, bitmap) {
+		list_del(&bmr->list);
+		kmem_free(bmr);
 	}
 }
 
+/* Set up a per-AG block bitmap. */
+void
+xfs_bitmap_init(
+	struct xfs_bitmap	*bitmap)
+{
+	INIT_LIST_HEAD(&bitmap->list);
+}
+
 /* Compare two btree extents. */
 static int
-xrep_btree_extent_cmp(
+xfs_bitmap_range_cmp(
 	void			*priv,
 	struct list_head	*a,
 	struct list_head	*b)
 {
-	struct xrep_extent	*ap;
-	struct xrep_extent	*bp;
+	struct xfs_bitmap_range	*ap;
+	struct xfs_bitmap_range	*bp;
 
-	ap = container_of(a, struct xrep_extent, list);
-	bp = container_of(b, struct xrep_extent, list);
+	ap = container_of(a, struct xfs_bitmap_range, list);
+	bp = container_of(b, struct xfs_bitmap_range, list);
 
-	if (ap->fsbno > bp->fsbno)
+	if (ap->start > bp->start)
 		return 1;
-	if (ap->fsbno < bp->fsbno)
+	if (ap->start < bp->start)
 		return -1;
 	return 0;
 }
 
 /*
- * Remove all the blocks mentioned in @sublist from the extents in @exlist.
+ * Remove all the blocks mentioned in @sub from the extents in @bitmap.
  *
  * The intent is that callers will iterate the rmapbt for all of its records
- * for a given owner to generate @exlist; and iterate all the blocks of the
+ * for a given owner to generate @bitmap; and iterate all the blocks of the
  * metadata structures that are not being rebuilt and have the same rmapbt
- * owner to generate @sublist.  This routine subtracts all the extents
- * mentioned in sublist from all the extents linked in @exlist, which leaves
- * @exlist as the list of blocks that are not accounted for, which we assume
+ * owner to generate @sub.  This routine subtracts all the extents
+ * mentioned in sub from all the extents linked in @bitmap, which leaves
+ * @bitmap as the list of blocks that are not accounted for, which we assume
  * are the dead blocks of the old metadata structure.  The blocks mentioned in
- * @exlist can be reaped.
+ * @bitmap can be reaped.
+ *
+ * This is the logical equivalent of bitmap &= ~sub.
  */
 #define LEFT_ALIGNED	(1 << 0)
 #define RIGHT_ALIGNED	(1 << 1)
 int
-xrep_subtract_extents(
-	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist,
-	struct xrep_extent_list	*sublist)
+xfs_bitmap_disunion(
+	struct xfs_bitmap	*bitmap,
+	struct xfs_bitmap	*sub)
 {
 	struct list_head	*lp;
-	struct xrep_extent	*ex;
-	struct xrep_extent	*newex;
-	struct xrep_extent	*subex;
-	xfs_fsblock_t		sub_fsb;
-	xfs_extlen_t		sub_len;
+	struct xfs_bitmap_range	*br;
+	struct xfs_bitmap_range	*new_br;
+	struct xfs_bitmap_range	*sub_br;
+	uint64_t		sub_start;
+	uint64_t		sub_len;
 	int			state;
 	int			error = 0;
 
-	if (list_empty(&exlist->list) || list_empty(&sublist->list))
+	if (list_empty(&bitmap->list) || list_empty(&sub->list))
 		return 0;
-	ASSERT(!list_empty(&sublist->list));
+	ASSERT(!list_empty(&sub->list));
 
-	list_sort(NULL, &exlist->list, xrep_btree_extent_cmp);
-	list_sort(NULL, &sublist->list, xrep_btree_extent_cmp);
+	list_sort(NULL, &bitmap->list, xfs_bitmap_range_cmp);
+	list_sort(NULL, &sub->list, xfs_bitmap_range_cmp);
 
 	/*
-	 * Now that we've sorted both lists, we iterate exlist once, rolling
-	 * forward through sublist and/or exlist as necessary until we find an
+	 * Now that we've sorted both lists, we iterate bitmap once, rolling
+	 * forward through sub and/or bitmap as necessary until we find an
 	 * overlap or reach the end of either list.  We do not reset lp to the
-	 * head of exlist nor do we reset subex to the head of sublist.  The
+	 * head of bitmap nor do we reset sub_br to the head of sub.  The
 	 * list traversal is similar to merge sort, but we're deleting
 	 * instead.  In this manner we avoid O(n^2) operations.
 	 */
-	subex = list_first_entry(&sublist->list, struct xrep_extent,
+	sub_br = list_first_entry(&sub->list, struct xfs_bitmap_range,
 			list);
-	lp = exlist->list.next;
-	while (lp != &exlist->list) {
-		ex = list_entry(lp, struct xrep_extent, list);
+	lp = bitmap->list.next;
+	while (lp != &bitmap->list) {
+		br = list_entry(lp, struct xfs_bitmap_range, list);
 
 		/*
-		 * Advance subex and/or ex until we find a pair that
+		 * Advance sub_br and/or br until we find a pair that
 		 * intersect or we run out of extents.
 		 */
-		while (subex->fsbno + subex->len <= ex->fsbno) {
-			if (list_is_last(&subex->list, &sublist->list))
+		while (sub_br->start + sub_br->len <= br->start) {
+			if (list_is_last(&sub_br->list, &sub->list))
 				goto out;
-			subex = list_next_entry(subex, list);
+			sub_br = list_next_entry(sub_br, list);
 		}
-		if (subex->fsbno >= ex->fsbno + ex->len) {
+		if (sub_br->start >= br->start + br->len) {
 			lp = lp->next;
 			continue;
 		}
 
-		/* trim subex to fit the extent we have */
-		sub_fsb = subex->fsbno;
-		sub_len = subex->len;
-		if (subex->fsbno < ex->fsbno) {
-			sub_len -= ex->fsbno - subex->fsbno;
-			sub_fsb = ex->fsbno;
+		/* trim sub_br to fit the extent we have */
+		sub_start = sub_br->start;
+		sub_len = sub_br->len;
+		if (sub_br->start < br->start) {
+			sub_len -= br->start - sub_br->start;
+			sub_start = br->start;
 		}
-		if (sub_len > ex->len)
-			sub_len = ex->len;
+		if (sub_len > br->len)
+			sub_len = br->len;
 
 		state = 0;
-		if (sub_fsb == ex->fsbno)
+		if (sub_start == br->start)
 			state |= LEFT_ALIGNED;
-		if (sub_fsb + sub_len == ex->fsbno + ex->len)
+		if (sub_start + sub_len == br->start + br->len)
 			state |= RIGHT_ALIGNED;
 		switch (state) {
 		case LEFT_ALIGNED:
 			/* Coincides with only the left. */
-			ex->fsbno += sub_len;
-			ex->len -= sub_len;
+			br->start += sub_len;
+			br->len -= sub_len;
 			break;
 		case RIGHT_ALIGNED:
 			/* Coincides with only the right. */
-			ex->len -= sub_len;
+			br->len -= sub_len;
 			lp = lp->next;
 			break;
 		case LEFT_ALIGNED | RIGHT_ALIGNED:
 			/* Total overlap, just delete ex. */
 			lp = lp->next;
-			list_del(&ex->list);
-			kmem_free(ex);
+			list_del(&br->list);
+			kmem_free(br);
 			break;
 		case 0:
 			/*
 			 * Deleting from the middle: add the new right extent
 			 * and then shrink the left extent.
 			 */
-			newex = kmem_alloc(sizeof(struct xrep_extent),
+			new_br = kmem_alloc(sizeof(struct xfs_bitmap_range),
 					KM_MAYFAIL);
-			if (!newex) {
+			if (!new_br) {
 				error = -ENOMEM;
 				goto out;
 			}
-			INIT_LIST_HEAD(&newex->list);
-			newex->fsbno = sub_fsb + sub_len;
-			newex->len = ex->fsbno + ex->len - newex->fsbno;
-			list_add(&newex->list, &ex->list);
-			ex->len = sub_fsb - ex->fsbno;
+			INIT_LIST_HEAD(&new_br->list);
+			new_br->start = sub_start + sub_len;
+			new_br->len = br->start + br->len - new_br->start;
+			list_add(&new_br->list, &br->list);
+			br->len = sub_start - br->start;
 			lp = lp->next;
 			break;
 		default:
diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h
index 1038157695a8..dad652ee9177 100644
--- a/fs/xfs/scrub/bitmap.h
+++ b/fs/xfs/scrub/bitmap.h
@@ -6,32 +6,27 @@
 #ifndef __XFS_SCRUB_BITMAP_H__
 #define __XFS_SCRUB_BITMAP_H__
 
-struct xrep_extent {
+struct xfs_bitmap_range {
 	struct list_head	list;
-	xfs_fsblock_t		fsbno;
-	xfs_extlen_t		len;
+	uint64_t		start;
+	uint64_t		len;
 };
 
-struct xrep_extent_list {
+struct xfs_bitmap {
 	struct list_head	list;
 };
 
-static inline void
-xrep_init_extent_list(
-	struct xrep_extent_list		*exlist)
-{
-	INIT_LIST_HEAD(&exlist->list);
-}
+void xfs_bitmap_init(struct xfs_bitmap *bitmap);
+void xfs_bitmap_destroy(struct xfs_bitmap *bitmap);
 
-#define for_each_xrep_extent_safe(rbe, n, exlist) \
-	list_for_each_entry_safe((rbe), (n), &(exlist)->list, list)
-int xrep_collect_btree_extent(struct xfs_scrub *sc,
-		struct xrep_extent_list *btlist, xfs_fsblock_t fsbno,
-		xfs_extlen_t len);
-void xrep_cancel_btree_extents(struct xfs_scrub *sc,
-		struct xrep_extent_list *btlist);
-int xrep_subtract_extents(struct xfs_scrub *sc,
-		struct xrep_extent_list *exlist,
-		struct xrep_extent_list *sublist);
+#define for_each_xfs_bitmap_extent(bex, n, bitmap) \
+	list_for_each_entry_safe((bex), (n), &(bitmap)->list, list)
+
+#define for_each_xfs_bitmap_block(b, bex, n, bitmap) \
+	list_for_each_entry_safe((bex), (n), &(bitmap)->list, list) \
+		for ((b) = bex->start; (b) < bex->start + bex->len; (b)++)
+
+int xfs_bitmap_set(struct xfs_bitmap *bitmap, uint64_t start, uint64_t len);
+int xfs_bitmap_disunion(struct xfs_bitmap *bitmap, struct xfs_bitmap *sub);
 
 #endif	/* __XFS_SCRUB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 27a904ef6189..85b048b341a0 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -368,17 +368,17 @@ xrep_init_btblock(
  *
  * However, that leaves the matter of removing all the metadata describing the
  * old broken structure.  For primary metadata we use the rmap data to collect
- * every extent with a matching rmap owner (exlist); we then iterate all other
+ * every extent with a matching rmap owner (bitmap); we then iterate all other
  * metadata structures with the same rmap owner to collect the extents that
- * cannot be removed (sublist).  We then subtract sublist from exlist to
+ * cannot be removed (sublist).  We then subtract sublist from bitmap to
  * derive the blocks that were used by the old btree.  These blocks can be
  * reaped.
  *
  * For rmapbt reconstructions we must use different tactics for extent
  * collection.  First we iterate all primary metadata (this excludes the old
  * rmapbt, obviously) to generate new rmap records.  The gaps in the rmap
- * records are collected as exlist.  The bnobt records are collected as
- * sublist.  As with the other btrees we subtract sublist from exlist, and the
+ * records are collected as bitmap.  The bnobt records are collected as
+ * sublist.  As with the other btrees we subtract sublist from bitmap, and the
  * result (since the rmapbt lives in the free space) are the blocks from the
  * old rmapbt.
  *
@@ -386,11 +386,11 @@ xrep_init_btblock(
  *
  * Now that we've constructed a new btree to replace the damaged one, we want
  * to dispose of the blocks that (we think) the old btree was using.
- * Previously, we used the rmapbt to collect the extents (exlist) with the
+ * Previously, we used the rmapbt to collect the extents (bitmap) with the
  * rmap owner corresponding to the tree we rebuilt, collected extents for any
  * blocks with the same rmap owner that are owned by another data structure
- * (sublist), and subtracted sublist from exlist.  In theory the extents
- * remaining in exlist are the old btree's blocks.
+ * (sublist), and subtracted sublist from bitmap.  In theory the extents
+ * remaining in bitmap are the old btree's blocks.
  *
  * Unfortunately, it's possible that the btree was crosslinked with other
  * blocks on disk.  The rmap data can tell us if there are multiple owners, so
@@ -406,7 +406,7 @@ xrep_init_btblock(
  * If there are no rmap records at all, we also free the block.  If the btree
  * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
  * supposed to be a rmap record and everything is ok.  For other btrees there
- * had to have been an rmap entry for the block to have ended up on @exlist,
+ * had to have been an rmap entry for the block to have ended up on @bitmap,
  * so if it's gone now there's something wrong and the fs will shut down.
  *
  * Note: If there are multiple rmap records with only the same rmap owner as
@@ -419,7 +419,7 @@ xrep_init_btblock(
  * The caller is responsible for locking the AG headers for the entire rebuild
  * operation so that nothing else can sneak in and change the AG state while
  * we're not looking.  We also assume that the caller already invalidated any
- * buffers associated with @exlist.
+ * buffers associated with @bitmap.
  */
 
 /*
@@ -429,13 +429,12 @@ xrep_init_btblock(
 int
 xrep_invalidate_blocks(
 	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist)
+	struct xfs_bitmap	*bitmap)
 {
-	struct xrep_extent	*rex;
-	struct xrep_extent	*n;
+	struct xfs_bitmap_range	*bmr;
+	struct xfs_bitmap_range	*n;
 	struct xfs_buf		*bp;
 	xfs_fsblock_t		fsbno;
-	xfs_agblock_t		i;
 
 	/*
 	 * For each block in each extent, see if there's an incore buffer for
@@ -445,18 +444,16 @@ xrep_invalidate_blocks(
 	 * because we never own those; and if we can't TRYLOCK the buffer we
 	 * assume it's owned by someone else.
 	 */
-	for_each_xrep_extent_safe(rex, n, exlist) {
-		for (fsbno = rex->fsbno, i = rex->len; i > 0; fsbno++, i--) {
-			/* Skip AG headers and post-EOFS blocks */
-			if (!xfs_verify_fsbno(sc->mp, fsbno))
-				continue;
-			bp = xfs_buf_incore(sc->mp->m_ddev_targp,
-					XFS_FSB_TO_DADDR(sc->mp, fsbno),
-					XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK);
-			if (bp) {
-				xfs_trans_bjoin(sc->tp, bp);
-				xfs_trans_binval(sc->tp, bp);
-			}
+	for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) {
+		/* Skip AG headers and post-EOFS blocks */
+		if (!xfs_verify_fsbno(sc->mp, fsbno))
+			continue;
+		bp = xfs_buf_incore(sc->mp->m_ddev_targp,
+				XFS_FSB_TO_DADDR(sc->mp, fsbno),
+				XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK);
+		if (bp) {
+			xfs_trans_bjoin(sc->tp, bp);
+			xfs_trans_binval(sc->tp, bp);
 		}
 	}
 
@@ -519,9 +516,9 @@ xrep_put_freelist(
 	return 0;
 }
 
-/* Dispose of a single metadata block. */
+/* Dispose of a single block. */
 STATIC int
-xrep_dispose_btree_block(
+xrep_reap_block(
 	struct xfs_scrub	*sc,
 	xfs_fsblock_t		fsbno,
 	struct xfs_owner_info	*oinfo,
@@ -593,41 +590,35 @@ out_free:
 	return error;
 }
 
-/* Dispose of btree blocks from an old per-AG btree. */
+/* Dispose of every block of every extent in the bitmap. */
 int
-xrep_reap_btree_extents(
+xrep_reap_extents(
 	struct xfs_scrub	*sc,
-	struct xrep_extent_list	*exlist,
+	struct xfs_bitmap	*bitmap,
 	struct xfs_owner_info	*oinfo,
 	enum xfs_ag_resv_type	type)
 {
-	struct xrep_extent	*rex;
-	struct xrep_extent	*n;
+	struct xfs_bitmap_range	*bmr;
+	struct xfs_bitmap_range	*n;
+	xfs_fsblock_t		fsbno;
 	int			error = 0;
 
 	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
 
-	/* Dispose of every block from the old btree. */
-	for_each_xrep_extent_safe(rex, n, exlist) {
+	for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) {
 		ASSERT(sc->ip != NULL ||
-		       XFS_FSB_TO_AGNO(sc->mp, rex->fsbno) == sc->sa.agno);
-
+		       XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno);
 		trace_xrep_dispose_btree_extent(sc->mp,
-				XFS_FSB_TO_AGNO(sc->mp, rex->fsbno),
-				XFS_FSB_TO_AGBNO(sc->mp, rex->fsbno), rex->len);
+				XFS_FSB_TO_AGNO(sc->mp, fsbno),
+				XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
 
-		for (; rex->len > 0; rex->len--, rex->fsbno++) {
-			error = xrep_dispose_btree_block(sc, rex->fsbno,
-					oinfo, type);
-			if (error)
-				goto out;
-		}
-		list_del(&rex->list);
-		kmem_free(rex);
+		error = xrep_reap_block(sc, fsbno, oinfo, type);
+		if (error)
+			goto out;
 	}
 
 out:
-	xrep_cancel_btree_extents(sc, exlist);
+	xfs_bitmap_destroy(bitmap);
 	return error;
 }
 
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index a3d491a438f4..5a4e92221916 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -27,13 +27,11 @@ int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
 		struct xfs_buf **bpp, xfs_btnum_t btnum,
 		const struct xfs_buf_ops *ops);
 
-struct xrep_extent_list;
+struct xfs_bitmap;
 
 int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
-int xrep_invalidate_blocks(struct xfs_scrub *sc,
-		struct xrep_extent_list *btlist);
-int xrep_reap_btree_extents(struct xfs_scrub *sc,
-		struct xrep_extent_list *exlist,
+int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xfs_bitmap *btlist);
+int xrep_reap_extents(struct xfs_scrub *sc, struct xfs_bitmap *exlist,
 		struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
 
 struct xrep_find_ag_btree {
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 93db22c39b51..4e20f0e48232 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -511,7 +511,6 @@ DEFINE_EVENT(xrep_extent_class, name, \
 		 xfs_agblock_t agbno, xfs_extlen_t len), \
 	TP_ARGS(mp, agno, agbno, len))
 DEFINE_REPAIR_EXTENT_EVENT(xrep_dispose_btree_extent);
-DEFINE_REPAIR_EXTENT_EVENT(xrep_collect_btree_extent);
 DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
 
 DECLARE_EVENT_CLASS(xrep_rmap_class,
-- 
cgit v1.2.3


From eca383fcd63b452cf533505154135da2a1f70227 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sun, 29 Jul 2018 13:10:44 -0700
Subject: xfs: refactor superblock verifiers

Split the superblock verifier into the common checks, the read-time
checks, and the write-time check functions.  No functional changes, but
we're setting up to add more write-only checks.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
---
 fs/xfs/libxfs/xfs_sb.c | 205 ++++++++++++++++++++++++++-----------------------
 1 file changed, 111 insertions(+), 94 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index b3ad15956366..f3835e923893 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -96,80 +96,94 @@ xfs_perag_put(
 	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
 }
 
-/*
- * Check the validity of the SB found.
- */
+/* Check all the superblock fields we care about when reading one in. */
 STATIC int
-xfs_mount_validate_sb(
-	xfs_mount_t	*mp,
-	xfs_sb_t	*sbp,
-	bool		check_inprogress,
-	bool		check_version)
+xfs_validate_sb_read(
+	struct xfs_mount	*mp,
+	struct xfs_sb		*sbp)
 {
-	uint32_t	agcount = 0;
-	uint32_t	rem;
-
-	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
-		xfs_warn(mp, "bad magic number");
-		return -EWRONGFS;
-	}
-
-
-	if (!xfs_sb_good_version(sbp)) {
-		xfs_warn(mp, "bad version");
-		return -EWRONGFS;
-	}
+	if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5)
+		return 0;
 
 	/*
-	 * Version 5 superblock feature mask validation. Reject combinations the
-	 * kernel cannot support up front before checking anything else. For
-	 * write validation, we don't need to check feature masks.
+	 * Version 5 superblock feature mask validation. Reject combinations
+	 * the kernel cannot support up front before checking anything else.
 	 */
-	if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
-		if (xfs_sb_has_compat_feature(sbp,
-					XFS_SB_FEAT_COMPAT_UNKNOWN)) {
-			xfs_warn(mp,
+	if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) {
+		xfs_warn(mp,
 "Superblock has unknown compatible features (0x%x) enabled.",
-				(sbp->sb_features_compat &
-						XFS_SB_FEAT_COMPAT_UNKNOWN));
-			xfs_warn(mp,
+			(sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN));
+		xfs_warn(mp,
 "Using a more recent kernel is recommended.");
-		}
+	}
 
-		if (xfs_sb_has_ro_compat_feature(sbp,
-					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
-			xfs_alert(mp,
+	if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+		xfs_alert(mp,
 "Superblock has unknown read-only compatible features (0x%x) enabled.",
-				(sbp->sb_features_ro_compat &
-						XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
-			if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-				xfs_warn(mp,
+			(sbp->sb_features_ro_compat &
+					XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+		if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+			xfs_warn(mp,
 "Attempted to mount read-only compatible filesystem read-write.");
-				xfs_warn(mp,
+			xfs_warn(mp,
 "Filesystem can only be safely mounted read only.");
 
-				return -EINVAL;
-			}
-		}
-		if (xfs_sb_has_incompat_feature(sbp,
-					XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
-			xfs_warn(mp,
-"Superblock has unknown incompatible features (0x%x) enabled.",
-				(sbp->sb_features_incompat &
-						XFS_SB_FEAT_INCOMPAT_UNKNOWN));
-			xfs_warn(mp,
-"Filesystem can not be safely mounted by this kernel.");
 			return -EINVAL;
 		}
-	} else if (xfs_sb_version_hascrc(sbp)) {
-		/*
-		 * We can't read verify the sb LSN because the read verifier is
-		 * called before the log is allocated and processed. We know the
-		 * log is set up before write verifier (!check_version) calls,
-		 * so just check it here.
-		 */
-		if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
-			return -EFSCORRUPTED;
+	}
+	if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
+		xfs_warn(mp,
+"Superblock has unknown incompatible features (0x%x) enabled.",
+			(sbp->sb_features_incompat &
+					XFS_SB_FEAT_INCOMPAT_UNKNOWN));
+		xfs_warn(mp,
+"Filesystem cannot be safely mounted by this kernel.");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Check all the superblock fields we care about when writing one out. */
+STATIC int
+xfs_validate_sb_write(
+	struct xfs_mount	*mp,
+	struct xfs_sb		*sbp)
+{
+	if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5)
+		return 0;
+
+	/* XXX: For write validation, we don't need to check feature masks?? */
+
+	/*
+	 * We can't read verify the sb LSN because the read verifier is called
+	 * before the log is allocated and processed. We know the log is set up
+	 * before write verifier calls, so check it here.
+	 */
+	if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
+		return -EFSCORRUPTED;
+
+	return 0;
+}
+
+/* Check the validity of the SB. */
+STATIC int
+xfs_validate_sb_common(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct xfs_sb		*sbp)
+{
+	uint32_t		agcount = 0;
+	uint32_t		rem;
+
+	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
+		xfs_warn(mp, "bad magic number");
+		return -EWRONGFS;
+	}
+
+	if (!xfs_sb_good_version(sbp)) {
+		xfs_warn(mp, "bad version");
+		return -EWRONGFS;
 	}
 
 	if (xfs_sb_version_has_pquotino(sbp)) {
@@ -321,7 +335,12 @@ xfs_mount_validate_sb(
 		return -EFBIG;
 	}
 
-	if (check_inprogress && sbp->sb_inprogress) {
+	/*
+	 * Don't touch the filesystem if a user tool thinks it owns the primary
+	 * superblock.  mkfs doesn't clear the flag from secondary supers, so
+	 * we don't check them at all.
+	 */
+	if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) {
 		xfs_warn(mp, "Offline file system operation in progress!");
 		return -EFSCORRUPTED;
 	}
@@ -596,29 +615,6 @@ xfs_sb_to_disk(
 	}
 }
 
-static int
-xfs_sb_verify(
-	struct xfs_buf	*bp,
-	bool		check_version)
-{
-	struct xfs_mount *mp = bp->b_target->bt_mount;
-	struct xfs_sb	sb;
-
-	/*
-	 * Use call variant which doesn't convert quota flags from disk 
-	 * format, because xfs_mount_validate_sb checks the on-disk flags.
-	 */
-	__xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false);
-
-	/*
-	 * Only check the in progress field for the primary superblock as
-	 * mkfs.xfs doesn't clear it from secondary superblocks.
-	 */
-	return xfs_mount_validate_sb(mp, &sb,
-				     bp->b_maps[0].bm_bn == XFS_SB_DADDR,
-				     check_version);
-}
-
 /*
  * If the superblock has the CRC feature bit set or the CRC field is non-null,
  * check that the CRC is valid.  We check the CRC field is non-null because a
@@ -633,11 +629,12 @@ xfs_sb_verify(
  */
 static void
 xfs_sb_read_verify(
-	struct xfs_buf	*bp)
+	struct xfs_buf		*bp)
 {
-	struct xfs_mount *mp = bp->b_target->bt_mount;
-	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
-	int		error;
+	struct xfs_sb		sb;
+	struct xfs_mount	*mp = bp->b_target->bt_mount;
+	struct xfs_dsb		*dsb = XFS_BUF_TO_SBP(bp);
+	int			error;
 
 	/*
 	 * open code the version check to avoid needing to convert the entire
@@ -657,7 +654,16 @@ xfs_sb_read_verify(
 			}
 		}
 	}
-	error = xfs_sb_verify(bp, true);
+
+	/*
+	 * Check all the superblock fields.  Don't byteswap the xquota flags
+	 * because _verify_common checks the on-disk values.
+	 */
+	__xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false);
+	error = xfs_validate_sb_common(mp, bp, &sb);
+	if (error)
+		goto out_error;
+	error = xfs_validate_sb_read(mp, &sb);
 
 out_error:
 	if (error == -EFSCORRUPTED || error == -EFSBADCRC)
@@ -691,15 +697,22 @@ static void
 xfs_sb_write_verify(
 	struct xfs_buf		*bp)
 {
+	struct xfs_sb		sb;
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 	int			error;
 
-	error = xfs_sb_verify(bp, false);
-	if (error) {
-		xfs_verifier_error(bp, error, __this_address);
-		return;
-	}
+	/*
+	 * Check all the superblock fields.  Don't byteswap the xquota flags
+	 * because _verify_common checks the on-disk values.
+	 */
+	__xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false);
+	error = xfs_validate_sb_common(mp, bp, &sb);
+	if (error)
+		goto out_error;
+	error = xfs_validate_sb_write(mp, &sb);
+	if (error)
+		goto out_error;
 
 	if (!xfs_sb_version_hascrc(&mp->m_sb))
 		return;
@@ -708,6 +721,10 @@ xfs_sb_write_verify(
 		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
 
 	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
+	return;
+
+out_error:
+	xfs_verifier_error(bp, error, __this_address);
 }
 
 const struct xfs_buf_ops xfs_sb_buf_ops = {
-- 
cgit v1.2.3


From 8756a5af18191a471e670cc577aea60b652fea4c Mon Sep 17 00:00:00 2001
From: Bill O'Donnell <billodo@redhat.com>
Date: Thu, 26 Jul 2018 10:10:34 -0700
Subject: libxfs: add more bounds checking to sb sanity checks

Current sb verifier doesn't check bounds on sb_fdblocks and sb_ifree.
Add sanity checks for these parameters.

Signed-off-by: Bill O'Donnell <billodo@redhat.com>
[darrick: port to refactored sb validation predicates]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
---
 fs/xfs/libxfs/xfs_sb.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index f3835e923893..3d29f4a5242f 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -150,6 +150,18 @@ xfs_validate_sb_write(
 	struct xfs_mount	*mp,
 	struct xfs_sb		*sbp)
 {
+	/*
+	 * Carry out additional sb summary counter sanity checks when we write
+	 * the superblock.  We skip this in the read validator because there
+	 * could be newer superblocks in the log and if the values are garbage
+	 * we'll recalculate them at the end of log mount.
+	 */
+	if (sbp->sb_fdblocks > sbp->sb_dblocks ||
+	    sbp->sb_ifree > sbp->sb_icount) {
+		xfs_warn(mp, "SB summary counter sanity check failed");
+		return -EFSCORRUPTED;
+	}
+
 	if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5)
 		return 0;
 
-- 
cgit v1.2.3


From 69775fd15dc78e0547af45fb3e375d5423cb21b1 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 26 Jul 2018 10:10:42 -0700
Subject: xfs: verify icount in superblock write

Add a helper predicate to check the inode count for sanity, then use it
in the superblock write verifier to inspect sb_icount.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
---
 fs/xfs/libxfs/xfs_sb.c    |  3 ++-
 fs/xfs/libxfs/xfs_types.c | 34 ++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_types.h |  1 +
 3 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 3d29f4a5242f..05e7ed1b8022 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -154,9 +154,10 @@ xfs_validate_sb_write(
 	 * Carry out additional sb summary counter sanity checks when we write
 	 * the superblock.  We skip this in the read validator because there
 	 * could be newer superblocks in the log and if the values are garbage
-	 * we'll recalculate them at the end of log mount.
+	 * even after replay we'll recalculate them at the end of log mount.
 	 */
 	if (sbp->sb_fdblocks > sbp->sb_dblocks ||
+	    !xfs_verify_icount(mp, sbp->sb_icount) ||
 	    sbp->sb_ifree > sbp->sb_icount) {
 		xfs_warn(mp, "SB summary counter sanity check failed");
 		return -EFSCORRUPTED;
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index 2e2a243cef2e..33a5ca346baf 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -171,3 +171,37 @@ xfs_verify_rtbno(
 {
 	return rtbno < mp->m_sb.sb_rblocks;
 }
+
+/* Calculate the range of valid icount values. */
+static void
+xfs_icount_range(
+	struct xfs_mount	*mp,
+	unsigned long long	*min,
+	unsigned long long	*max)
+{
+	unsigned long long	nr_inos = 0;
+	xfs_agnumber_t		agno;
+
+	/* root, rtbitmap, rtsum all live in the first chunk */
+	*min = XFS_INODES_PER_CHUNK;
+
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		xfs_agino_t	first, last;
+
+		xfs_agino_range(mp, agno, &first, &last);
+		nr_inos += last - first + 1;
+	}
+	*max = nr_inos;
+}
+
+/* Sanity-checking of inode counts. */
+bool
+xfs_verify_icount(
+	struct xfs_mount	*mp,
+	unsigned long long	icount)
+{
+	unsigned long long	min, max;
+
+	xfs_icount_range(mp, &min, &max);
+	return icount >= min && icount <= max;
+}
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 4055d62f690c..b9e6c89284c3 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -165,5 +165,6 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
+bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount);
 
 #endif	/* __XFS_TYPES_H__ */
-- 
cgit v1.2.3


From 9e037cb7972fab5a9f55bca4ebe6e4dbf7e160bc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 30 Jul 2018 16:45:35 -0700
Subject: xfs: check for unknown v5 feature bits in superblock write verifier

Make sure we never try to write the superblock with unknown feature bits
set.  We checked those at mount time, so if they're set now then memory
is corrupt.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
---
 fs/xfs/libxfs/xfs_sb.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 05e7ed1b8022..ca1b3a7a9171 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -166,7 +166,40 @@ xfs_validate_sb_write(
 	if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5)
 		return 0;
 
-	/* XXX: For write validation, we don't need to check feature masks?? */
+	/*
+	 * Version 5 superblock feature mask validation. Reject combinations
+	 * the kernel cannot support since we checked for unsupported bits in
+	 * the read verifier, which means that memory is corrupt.
+	 */
+	if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) {
+		xfs_warn(mp,
+"Corruption detected in superblock compatible features (0x%x)!",
+			(sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN));
+		return -EFSCORRUPTED;
+	}
+
+	if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+		xfs_alert(mp,
+"Corruption detected in superblock read-only compatible features (0x%x)!",
+			(sbp->sb_features_ro_compat &
+					XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+		return -EFSCORRUPTED;
+	}
+	if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
+		xfs_warn(mp,
+"Corruption detected in superblock incompatible features (0x%x)!",
+			(sbp->sb_features_incompat &
+					XFS_SB_FEAT_INCOMPAT_UNKNOWN));
+		return -EFSCORRUPTED;
+	}
+	if (xfs_sb_has_incompat_log_feature(sbp,
+			XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
+		xfs_warn(mp,
+"Corruption detected in superblock incompatible log features (0x%x)!",
+			(sbp->sb_features_log_incompat &
+					XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
+		return -EFSCORRUPTED;
+	}
 
 	/*
 	 * We can't read verify the sb LSN because the read verifier is called
-- 
cgit v1.2.3


From 745b3f76d1c889d738a1c4537a3c491bc1ecac4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 17 Jul 2018 16:51:51 -0700
Subject: xfs: maintain a sequence count for inode fork manipulations

Add a simple 32-bit unsigned integer as the sequence count for
modifications to the extent list in the inode fork.  This will be
used to optimize away extent list lookups in the writeback code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_iext_tree.c  | 6 ++++++
 fs/xfs/libxfs/xfs_inode_fork.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index b80c63faace2..8a7aea041ee1 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -624,6 +624,8 @@ xfs_iext_insert(
 	struct xfs_iext_leaf	*new = NULL;
 	int			nr_entries, i;
 
+	ifp->if_seq++;
+
 	if (ifp->if_height == 0)
 		xfs_iext_alloc_root(ifp, cur);
 	else if (ifp->if_height == 1)
@@ -864,6 +866,8 @@ xfs_iext_remove(
 	ASSERT(ifp->if_u1.if_root != NULL);
 	ASSERT(xfs_iext_valid(ifp, cur));
 
+	ifp->if_seq++;
+
 	nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
 	for (i = cur->pos; i < nr_entries; i++)
 		leaf->recs[i] = leaf->recs[i + 1];
@@ -970,6 +974,8 @@ xfs_iext_update_extent(
 {
 	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
 
+	ifp->if_seq++;
+
 	if (cur->pos == 0) {
 		struct xfs_bmbt_irec	old;
 
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 1492143371f3..60361d2d74a1 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -14,6 +14,7 @@ struct xfs_dinode;
  */
 struct xfs_ifork {
 	int			if_bytes;	/* bytes in if_u1 */
+	unsigned int		if_seq;		/* cow fork mod counter */
 	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
 	short			if_broot_bytes;	/* bytes allocated for root */
 	unsigned char		if_flags;	/* per-fork flags */
-- 
cgit v1.2.3


From e666aa37f4330cb93a5004a89b7a938312e74e36 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 17 Jul 2018 16:51:52 -0700
Subject: xfs: avoid COW fork extent lookups in writeback if the fork didn't
 change

Used the per-fork sequence counter to avoid lookups in the writeback code
unless the COW fork actually changed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c  | 38 +++++++++++++++++++++++++++++++++-----
 fs/xfs/xfs_iomap.c |  5 ++++-
 fs/xfs/xfs_iomap.h |  2 +-
 3 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 814100d27343..235b4ddcd324 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -29,6 +29,7 @@
 struct xfs_writepage_ctx {
 	struct xfs_bmbt_irec    imap;
 	unsigned int		io_type;
+	unsigned int		cow_seq;
 	struct xfs_ioend	*ioend;
 };
 
@@ -310,6 +311,7 @@ xfs_map_blocks(
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			count = i_blocksize(inode);
 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+	xfs_fileoff_t		cow_fsb = NULLFILEOFF;
 	struct xfs_bmbt_irec	imap;
 	int			whichfork = XFS_DATA_FORK;
 	struct xfs_iext_cursor	icur;
@@ -333,12 +335,23 @@ xfs_map_blocks(
 	 * COW fork blocks can overlap data fork blocks even if the blocks
 	 * aren't shared.  COW I/O always takes precedent, so we must always
 	 * check for overlap on reflink inodes unless the mapping is already a
-	 * COW one.
+	 * COW one, or the COW fork hasn't changed from the last time we looked
+	 * at it.
+	 *
+	 * It's safe to check the COW fork if_seq here without the ILOCK because
+	 * we've indirectly protected against concurrent updates: writeback has
+	 * the page locked, which prevents concurrent invalidations by reflink
+	 * and directio and prevents concurrent buffered writes to the same
+	 * page.  Changes to if_seq always happen under i_lock, which protects
+	 * against concurrent updates and provides a memory barrier on the way
+	 * out that ensures that we always see the current value.
 	 */
 	imap_valid = offset_fsb >= wpc->imap.br_startoff &&
 		     offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
 	if (imap_valid &&
-	    (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
+	    (!xfs_inode_has_cow_data(ip) ||
+	     wpc->io_type == XFS_IO_COW ||
+	     wpc->cow_seq == ip->i_cowfp->if_seq))
 		return 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -364,8 +377,10 @@ xfs_map_blocks(
 	 * it directly instead of looking up anything in the data fork.
 	 */
 	if (xfs_inode_has_cow_data(ip) &&
-	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
-	    imap.br_startoff <= offset_fsb) {
+	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
+		cow_fsb = imap.br_startoff;
+	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
+		wpc->cow_seq = ip->i_cowfp->if_seq;
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		/*
 		 * Truncate can race with writeback since writeback doesn't
@@ -411,6 +426,16 @@ xfs_map_blocks(
 		imap.br_startblock = HOLESTARTBLOCK;
 		wpc->io_type = XFS_IO_HOLE;
 	} else {
+		/*
+		 * Truncate to the next COW extent if there is one.  This is the
+		 * only opportunity to do this because we can skip COW fork
+		 * lookups for the subsequent blocks in the mapping; however,
+		 * the requirement to treat the COW range separately remains.
+		 */
+		if (cow_fsb != NULLFILEOFF &&
+		    cow_fsb < imap.br_startoff + imap.br_blockcount)
+			imap.br_blockcount = cow_fsb - imap.br_startoff;
+
 		if (isnullstartblock(imap.br_startblock)) {
 			/* got a delalloc extent */
 			wpc->io_type = XFS_IO_DELALLOC;
@@ -427,9 +452,12 @@ xfs_map_blocks(
 	trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
+	error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
+			&wpc->cow_seq);
 	if (error)
 		return error;
+	ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+	       imap.br_startoff + imap.br_blockcount <= cow_fsb);
 	wpc->imap = imap;
 	trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
 	return 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 8e8ca9f03f0e..3282575e2df4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -651,7 +651,8 @@ xfs_iomap_write_allocate(
 	xfs_inode_t	*ip,
 	int		whichfork,
 	xfs_off_t	offset,
-	xfs_bmbt_irec_t *imap)
+	xfs_bmbt_irec_t *imap,
+	unsigned int	*cow_seq)
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb, last_block;
@@ -766,6 +767,8 @@ xfs_iomap_write_allocate(
 			if (error)
 				goto error0;
 
+			if (whichfork == XFS_COW_FORK)
+				*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
 
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 83474c9cede9..c6170548831b 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
 int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 			struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
-			struct xfs_bmbt_irec *);
+			struct xfs_bmbt_irec *, unsigned int *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
-- 
cgit v1.2.3


From ff23f4af7efd86cbb1bda42fe2171e0790f9cb5a Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 31 Jul 2018 13:18:02 -0700
Subject: xfs: move extent busy tree initialization to xfs_initialize_perag

Move the per-AG busy extent tree initialization to the per-ag structure
initialization since we don't want online repair to leak the old tree.
We only deconstruct the tree at unmount time, so this should be safe.
This also enables us to eliminate the commented out initialization in
the xfsprogs libxfs.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_alloc.c | 3 ---
 fs/xfs/xfs_mount.c        | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3c3f2d5119ea..9847c1632712 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2755,9 +2755,6 @@ xfs_alloc_read_agf(
 		pag->pagf_levels[XFS_BTNUM_RMAPi] =
 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
 		pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
-		spin_lock_init(&pag->pagb_lock);
-		pag->pagb_count = 0;
-		pag->pagb_tree = RB_ROOT;
 		pag->pagf_init = 1;
 		pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
 	}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 8f739e4d0d1c..99db27d6ac8a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -207,6 +207,9 @@ xfs_initialize_perag(
 		if (xfs_buf_hash_init(pag))
 			goto out_free_pag;
 		init_waitqueue_head(&pag->pagb_wait);
+		spin_lock_init(&pag->pagb_lock);
+		pag->pagb_count = 0;
+		pag->pagb_tree = RB_ROOT;
 
 		if (radix_tree_preload(GFP_NOFS))
 			goto out_hash_destroy;
-- 
cgit v1.2.3


From 0c60d3aa0e2d007e7f79c96c118da25f594afe02 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 1 Aug 2018 07:40:48 -0700
Subject: xfs: refactor log recovery check

Add a predicate to decide if the log is actively in recovery and use
that instead of open-coding a pagf_init check in the attr leaf verifier.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c | 3 +--
 fs/xfs/xfs_log.c              | 9 +++++++++
 fs/xfs/xfs_log.h              | 1 +
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 4e7ef79a83c7..6fc5425b1474 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -242,7 +242,6 @@ xfs_attr3_leaf_verify(
 	struct xfs_attr3_icleaf_hdr	ichdr;
 	struct xfs_mount		*mp = bp->b_target->bt_mount;
 	struct xfs_attr_leafblock	*leaf = bp->b_addr;
-	struct xfs_perag		*pag = bp->b_pag;
 	struct xfs_attr_leaf_entry	*entries;
 	uint16_t			end;
 	int				i;
@@ -270,7 +269,7 @@ xfs_attr3_leaf_verify(
 	 * because we may have transitioned an empty shortform attr to a leaf
 	 * if the attr didn't fit in shortform.
 	 */
-	if (pag && pag->pagf_init && ichdr.count == 0)
+	if (!xfs_log_in_recovery(mp) && ichdr.count == 0)
 		return __this_address;
 
 	/*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index fd10b1426382..00df4f39093a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -4104,3 +4104,12 @@ xfs_log_check_lsn(
 
 	return valid;
 }
+
+bool
+xfs_log_in_recovery(
+	struct xfs_mount	*mp)
+{
+	struct xlog		*log = mp->m_log;
+
+	return log->l_flags & XLOG_ACTIVE_RECOVERY;
+}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3c1f6a8b4b70..73a64bf32f6f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -153,5 +153,6 @@ bool	xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 void	xfs_log_work_queue(struct xfs_mount *mp);
 void	xfs_log_quiesce(struct xfs_mount *mp);
 bool	xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
+bool	xfs_log_in_recovery(struct xfs_mount *);
 
 #endif	/* __XFS_LOG_H__ */
-- 
cgit v1.2.3


From 611995db2ce210d20d51c5270639b750476534e5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 1 Aug 2018 07:40:48 -0700
Subject: xfs: use a local variable for magic number in xfs_da3_node_lookup_int

Use a local variable for the block magic number checks instead of
abusing blk->magic.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/libxfs/xfs_da_btree.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 9efbd2038ffb..8ffe17b223da 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1481,6 +1481,7 @@ xfs_da3_node_lookup_int(
 	int			error;
 	int			retval;
 	unsigned int		expected_level = 0;
+	uint16_t		magic;
 	struct xfs_inode	*dp = state->args->dp;
 
 	args = state->args;
@@ -1505,17 +1506,17 @@ xfs_da3_node_lookup_int(
 			return error;
 		}
 		curr = blk->bp->b_addr;
-		blk->magic = be16_to_cpu(curr->magic);
+		magic = be16_to_cpu(curr->magic);
 
-		if (blk->magic == XFS_ATTR_LEAF_MAGIC ||
-		    blk->magic == XFS_ATTR3_LEAF_MAGIC) {
+		if (magic == XFS_ATTR_LEAF_MAGIC ||
+		    magic == XFS_ATTR3_LEAF_MAGIC) {
 			blk->magic = XFS_ATTR_LEAF_MAGIC;
 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
 			break;
 		}
 
-		if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
-		    blk->magic == XFS_DIR3_LEAFN_MAGIC) {
+		if (magic == XFS_DIR2_LEAFN_MAGIC ||
+		    magic == XFS_DIR3_LEAFN_MAGIC) {
 			blk->magic = XFS_DIR2_LEAFN_MAGIC;
 			blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
 							      blk->bp, NULL);
-- 
cgit v1.2.3


From 56830d6cc114f76f656d5e65ab355b070d5a695e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 1 Aug 2018 07:40:48 -0700
Subject: xfs: check da node magic in _node_lookup_int

Before we start processing what we /think/ is a da3 node block, actually
check the magic to make sure that we're looking at a node block.  This
way we won't blow the asserts in _node_hdr_from_disk on corrupted
metadata.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/libxfs/xfs_da_btree.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 8ffe17b223da..376bee94b5dd 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1523,8 +1523,10 @@ xfs_da3_node_lookup_int(
 			break;
 		}
 
-		blk->magic = XFS_DA_NODE_MAGIC;
+		if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC)
+			return -EFSCORRUPTED;
 
+		blk->magic = XFS_DA_NODE_MAGIC;
 
 		/*
 		 * Search an intermediate node for a match.
-- 
cgit v1.2.3


From 5ef2a69993676a0dfd49bf60ae1323eb8a288366 Mon Sep 17 00:00:00 2001
From: Wang Shilong <wshilong@ddn.com>
Date: Wed, 1 Aug 2018 12:02:31 -0400
Subject: ext4: use ext4_warning() for sb_getblk failure

Out of memory should not be considered as critical errors; so replace
ext4_error() with ext4_warnig().

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/balloc.c | 6 +++---
 fs/ext4/ialloc.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index aa52d87985aa..e5d6ee61ff48 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -426,9 +426,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 	}
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
-		ext4_error(sb, "Cannot get buffer for block bitmap - "
-			   "block_group = %u, block_bitmap = %llu",
-			   block_group, bitmap_blk);
+		ext4_warning(sb, "Cannot get buffer for block bitmap - "
+			     "block_group = %u, block_bitmap = %llu",
+			     block_group, bitmap_blk);
 		return ERR_PTR(-ENOMEM);
 	}
 
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index dffd21de2694..2addcb8730e1 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -138,9 +138,9 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 	}
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
-		ext4_error(sb, "Cannot read inode bitmap - "
-			    "block_group = %u, inode_bitmap = %llu",
-			    block_group, bitmap_blk);
+		ext4_warning(sb, "Cannot read inode bitmap - "
+			     "block_group = %u, inode_bitmap = %llu",
+			     block_group, bitmap_blk);
 		return ERR_PTR(-ENOMEM);
 	}
 	if (bitmap_uptodate(bh))
-- 
cgit v1.2.3


From 7d95178c77014dbd8dce36ee40bbbc5e6c121ff5 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 1 Aug 2018 12:36:52 -0400
Subject: ext4: check for NUL characters in extended attribute's name

Extended attribute names are defined to be NUL-terminated, so the name
must not contain a NUL character.  This is important because there are
places when remove extended attribute, the code uses strlen to
determine the length of the entry.  That should probably be fixed at
some point, but code is currently really messy, so the simplest fix
for now is to simply validate that the extended attributes are sane.

https://bugzilla.kernel.org/show_bug.cgi?id=200401

Reported-by: Wen Xu <wen.xu@gatech.edu>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/xattr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 723df14f4084..f36fc5d5b257 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -190,6 +190,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
 		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
 		if ((void *)next >= end)
 			return -EFSCORRUPTED;
+		if (strnlen(e->e_name, e->e_name_len) != e->e_name_len)
+			return -EFSCORRUPTED;
 		e = next;
 	}
 
-- 
cgit v1.2.3


From 006477f40d2e79cd1ba655af94fbd9b2a585bfef Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 31 Jul 2018 13:39:34 +0200
Subject: kconfig: move the "Executable file formats" menu to fs/Kconfig.binfmt

No need to have this in the top-level Kconfig.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Kconfig           | 2 --
 fs/Kconfig.binfmt | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/Kconfig b/Kconfig
index a5997d6c2029..48a80beab685 100644
--- a/Kconfig
+++ b/Kconfig
@@ -13,9 +13,7 @@ source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
 
-menu "Executable file formats"
 source "fs/Kconfig.binfmt"
-endmenu
 
 source "mm/Kconfig"
 
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 56df483de619..b795f8da81f3 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,3 +1,6 @@
+
+menu "Executable file formats"
+
 config BINFMT_ELF
 	bool "Kernel support for ELF binaries"
 	depends on MMU
@@ -187,3 +190,5 @@ config COREDUMP
 	  This option enables support for performing core dumps. You almost
 	  certainly want to say Y here. Not necessary on systems that never
 	  need debugging or only ever run flawless code.
+
+endmenu
-- 
cgit v1.2.3


From c971e6a006175bd0f195c6346c4e8bc4089bec00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 28 May 2018 18:27:19 -0400
Subject: kill d_instantiate_no_diralias()

The only user is fuse_create_new_entry(), and there it's used to
mitigate the same mkdir/open-by-handle race as in nfs_mkdir().
The same solution applies - unhash the mkdir argument, then
call d_splice_alias() and if that returns a reference to preexisting
alias, dput() and report success.  ->mkdir() argument left unhashed
negative with the preexisting alias moved in the right place is just
fine from the ->mkdir() callers point of view.

Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 27 ---------------------------
 fs/fuse/dir.c          | 15 +++++++++++----
 include/linux/dcache.h |  1 -
 3 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..a7d9e7a4c283 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1899,33 +1899,6 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 }
 EXPORT_SYMBOL(d_instantiate_new);
 
-/**
- * d_instantiate_no_diralias - instantiate a non-aliased dentry
- * @entry: dentry to complete
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry.  If a directory alias is found, then
- * return an error (and drop inode).  Together with d_materialise_unique() this
- * guarantees that a directory inode may never have more than one alias.
- */
-int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
-{
-	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
-
-	security_d_instantiate(entry, inode);
-	spin_lock(&inode->i_lock);
-	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
-		spin_unlock(&inode->i_lock);
-		iput(inode);
-		return -EBUSY;
-	}
-	__d_instantiate(entry, inode);
-	spin_unlock(&inode->i_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(d_instantiate_no_diralias);
-
 struct dentry *d_make_root(struct inode *root_inode)
 {
 	struct dentry *res = NULL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..4bbae6ac75c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 {
 	struct fuse_entry_out outarg;
 	struct inode *inode;
+	struct dentry *d;
 	int err;
 	struct fuse_forget_link *forget;
 
@@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 	}
 	kfree(forget);
 
-	err = d_instantiate_no_diralias(entry, inode);
-	if (err)
-		return err;
+	d_drop(entry);
+	d = d_splice_alias(inode, entry);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
 
-	fuse_change_entry_timeout(entry, &outarg);
+	if (d) {
+		fuse_change_entry_timeout(d, &outarg);
+		dput(d);
+	} else {
+		fuse_change_entry_timeout(entry, &outarg);
+	}
 	fuse_invalidate_attr(dir);
 	return 0;
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..0b83629a3d8f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -227,7 +227,6 @@ extern void d_instantiate(struct dentry *, struct inode *);
 extern void d_instantiate_new(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
-extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);
-- 
cgit v1.2.3


From 1a5d5e5d51e75a5bca67dadbcea8c841934b7b85 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Thu, 2 Aug 2018 00:03:40 -0400
Subject: ext4: fix spectre gadget in ext4_mb_regular_allocator()

'ac->ac_g_ex.fe_len' is a user-controlled value which is used in the
derivation of 'ac->ac_2order'. 'ac->ac_2order', in turn, is used to
index arrays which makes it a potential spectre gadget. Fix this by
sanitizing the value assigned to 'ac->ac2_order'.  This covers the
following accesses found with the help of smatch:

* fs/ext4/mballoc.c:1896 ext4_mb_simple_scan_group() warn: potential
  spectre issue 'grp->bb_counters' [w] (local cap)

* fs/ext4/mballoc.c:445 mb_find_buddy() warn: potential spectre issue
  'EXT4_SB(e4b->bd_sb)->s_mb_offsets' [r] (local cap)

* fs/ext4/mballoc.c:446 mb_find_buddy() warn: potential spectre issue
  'EXT4_SB(e4b->bd_sb)->s_mb_maxs' [r] (local cap)

Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/mballoc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f7ab34088162..8b24d3d42cb3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -14,6 +14,7 @@
 #include <linux/log2.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/nospec.h>
 #include <linux/backing-dev.h>
 #include <trace/events/ext4.h>
 
@@ -2140,7 +2141,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 		 * This should tell if fe_len is exactly power of 2
 		 */
 		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
-			ac->ac_2order = i - 1;
+			ac->ac_2order = array_index_nospec(i - 1,
+							   sb->s_blocksize_bits + 2);
 	}
 
 	/* if stream allocation is enabled, use global goal */
-- 
cgit v1.2.3


From bc716523462f98ea6bfef3e1a5926daadbf32e9e Mon Sep 17 00:00:00 2001
From: Liu Song <liu.song11@zte.com.cn>
Date: Thu, 2 Aug 2018 00:11:16 -0400
Subject: ext4: improve code readability in ext4_iget()

Merge the duplicated complex conditions to improve code readability.

Signed-off-by: Liu Song <liu.song11@zte.com.cn>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jiang Biao <jiang.biao2@zte.com.cn>
---
 fs/ext4/inode.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 60432498acfb..8f6ad7667974 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4982,17 +4982,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		ret = -EFSCORRUPTED;
 		goto bad_inode;
 	} else if (!ext4_has_inline_data(inode)) {
-		if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-			if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-			    (S_ISLNK(inode->i_mode) &&
-			     !ext4_inode_is_fast_symlink(inode))))
-				/* Validate extent which is part of inode */
+		/* validate the block references in the inode */
+		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		   (S_ISLNK(inode->i_mode) &&
+		    !ext4_inode_is_fast_symlink(inode))) {
+			if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 				ret = ext4_ext_check_inode(inode);
-		} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-			   (S_ISLNK(inode->i_mode) &&
-			    !ext4_inode_is_fast_symlink(inode))) {
-			/* Validate block references which are part of inode */
-			ret = ext4_ind_check_inode(inode);
+			else
+				ret = ext4_ind_check_inode(inode);
 		}
 	}
 	if (ret)
-- 
cgit v1.2.3


From 2afc9166f79b8f6da5f347f48515215ceee4ae37 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 2 Aug 2018 10:51:40 -0700
Subject: scsi: sysfs: Introduce sysfs_{un,}break_active_protection()

Introduce these two functions and export them such that the next patch
can add calls to these functions from the SCSI core.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 fs/sysfs/file.c       | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h | 14 ++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5c13f29bfcdb..118fa197a35f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -405,6 +405,50 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
+/**
+ * sysfs_break_active_protection - break "active" protection
+ * @kobj: The kernel object @attr is associated with.
+ * @attr: The attribute to break the "active" protection for.
+ *
+ * With sysfs, just like kernfs, deletion of an attribute is postponed until
+ * all active .show() and .store() callbacks have finished unless this function
+ * is called. Hence this function is useful in methods that implement self
+ * deletion.
+ */
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr)
+{
+	struct kernfs_node *kn;
+
+	kobject_get(kobj);
+	kn = kernfs_find_and_get(kobj->sd, attr->name);
+	if (kn)
+		kernfs_break_active_protection(kn);
+	return kn;
+}
+EXPORT_SYMBOL_GPL(sysfs_break_active_protection);
+
+/**
+ * sysfs_unbreak_active_protection - restore "active" protection
+ * @kn: Pointer returned by sysfs_break_active_protection().
+ *
+ * Undo the effects of sysfs_break_active_protection(). Since this function
+ * calls kernfs_put() on the kernfs node that corresponds to the 'attr'
+ * argument passed to sysfs_break_active_protection() that attribute may have
+ * been removed between the sysfs_break_active_protection() and
+ * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after
+ * this function has returned.
+ */
+void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+	struct kobject *kobj = kn->parent->priv;
+
+	kernfs_unbreak_active_protection(kn);
+	kernfs_put(kn);
+	kobject_put(kobj);
+}
+EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection);
+
 /**
  * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
  * @kobj: object we're acting for
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b8bfdc173ec0..3c12198c0103 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -237,6 +237,9 @@ int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj,
 				  const struct attribute *attr, umode_t mode);
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr);
+void sysfs_unbreak_active_protection(struct kernfs_node *kn);
 void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
 			  const void *ns);
 bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr);
@@ -350,6 +353,17 @@ static inline int sysfs_chmod_file(struct kobject *kobj,
 	return 0;
 }
 
+static inline struct kernfs_node *
+sysfs_break_active_protection(struct kobject *kobj,
+			      const struct attribute *attr)
+{
+	return NULL;
+}
+
+static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+}
+
 static inline void sysfs_remove_file_ns(struct kobject *kobj,
 					const struct attribute *attr,
 					const void *ns)
-- 
cgit v1.2.3


From 98719051e75ccf9eca18bd2b569de4ea637b4479 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:29 -0700
Subject: xfs: refactor internal dfops initialization

The current transaction allocation code conditionally initializes
the ->t_dfops indirection pointer. Transaction commit/cancel check
the validity of the pointer to determine whether to finish/cancel
the internal dfops.

This disallows the ability to use the internal dfops list as a
temporary container (via xfs_trans_alloc_empty()). Refactor
transaction allocation to always initialize ->t_dfops and check
permanent reservation state on transaction commit/cancel.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_trans.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 7bf5c1202719..12b6ad1558e6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -281,13 +281,7 @@ xfs_trans_alloc(
 	INIT_LIST_HEAD(&tp->t_items);
 	INIT_LIST_HEAD(&tp->t_busy);
 	tp->t_firstblock = NULLFSBLOCK;
-	/*
-	 * We only roll transactions with permanent log reservation. Don't init
-	 * ->t_dfops to skip attempts to finish or cancel an empty dfops with a
-	 * non-permanent res.
-	 */
-	if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES)
-		xfs_defer_init(tp, &tp->t_dfops_internal);
+	xfs_defer_init(tp, &tp->t_dfops_internal);
 
 	error = xfs_trans_reserve(tp, resp, blocks, rtextents);
 	if (error) {
@@ -931,8 +925,13 @@ __xfs_trans_commit(
 
 	trace_xfs_trans_commit(tp, _RET_IP_);
 
-	/* finish deferred items on final commit */
-	if (!regrant && tp->t_dfops) {
+	/*
+	 * Finish deferred items on final commit. Only permanent transactions
+	 * should ever have deferred ops.
+	 */
+	WARN_ON_ONCE(xfs_defer_has_unfinished_work(tp->t_dfops) &&
+		     !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
+	if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
 		error = xfs_defer_finish_noroll(&tp);
 		if (error) {
 			xfs_defer_cancel(tp);
@@ -1029,7 +1028,7 @@ xfs_trans_cancel(
 
 	trace_xfs_trans_cancel(tp, _RET_IP_);
 
-	if (tp->t_dfops)
+	if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
 		xfs_defer_cancel(tp);
 
 	/*
-- 
cgit v1.2.3


From fbfa977d25dc8db92dbf5fcafb0e03fae0005be5 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:29 -0700
Subject: xfs: use transaction for intent recovery instead of raw dfops

Log intent recovery is the last user of an external (on-stack)
dfops. The pattern exists because the dfops is used to collect
additional deferred operations queued during the whole recovery
sequence. The dfops is finished with a new transaction after intent
recovery completes.

We already have a mechanism to create an empty, container-like
transaction to support the scrub infrastructure. We can reuse that
mechanism here to drop the final user of external dfops. This
facilitates folding dfops state (i.e., dop_low) into the
transaction, the elimination of now unused external dfops support
and also eliminates the only caller of __xfs_defer_cancel().

Replace the on-stack dfops with an empty transaction and pass it
around to the various helpers that queue and finish deferred
operations during intent recovery.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_item.c     | 12 +++++------
 fs/xfs/xfs_bmap_item.h     |  3 +--
 fs/xfs/xfs_log_recover.c   | 51 ++++++++++++++++++++++++++--------------------
 fs/xfs/xfs_refcount_item.c | 12 +++++------
 fs/xfs/xfs_refcount_item.h |  3 +--
 5 files changed, 43 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index e1d6c127b07d..57429055e608 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -375,9 +375,8 @@ xfs_bud_init(
  */
 int
 xfs_bui_recover(
-	struct xfs_mount		*mp,
-	struct xfs_bui_log_item		*buip,
-	struct xfs_defer_ops		*dfops)
+	struct xfs_trans		*parent_tp,
+	struct xfs_bui_log_item		*buip)
 {
 	int				error = 0;
 	unsigned int			bui_type;
@@ -393,6 +392,7 @@ xfs_bui_recover(
 	struct xfs_trans		*tp;
 	struct xfs_inode		*ip = NULL;
 	struct xfs_bmbt_irec		irec;
+	struct xfs_mount		*mp = parent_tp->t_mountp;
 
 	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
 
@@ -446,7 +446,7 @@ xfs_bui_recover(
 	 * finishes them on completion. Transfer current dfops state to this
 	 * transaction and transfer the result back before we return.
 	 */
-	xfs_defer_move(tp->t_dfops, dfops);
+	xfs_defer_move(tp->t_dfops, parent_tp->t_dfops);
 	budp = xfs_trans_get_bud(tp, buip);
 
 	/* Grab the inode. */
@@ -494,7 +494,7 @@ xfs_bui_recover(
 	}
 
 	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
-	xfs_defer_move(dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	xfs_irele(ip);
@@ -502,7 +502,7 @@ xfs_bui_recover(
 	return error;
 
 err_inode:
-	xfs_defer_move(dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
 	xfs_trans_cancel(tp);
 	if (ip) {
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index fd1a1b13df51..89e043a88bb8 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -79,7 +79,6 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
 		struct xfs_bui_log_item *);
 void xfs_bui_item_free(struct xfs_bui_log_item *);
 void xfs_bui_release(struct xfs_bui_log_item *);
-int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip,
-		struct xfs_defer_ops *dfops);
+int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
 
 #endif	/* __XFS_BMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7776fde9430c..fc1ce9a644e3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4733,10 +4733,9 @@ xlog_recover_cancel_rui(
 /* Recover the CUI if necessary. */
 STATIC int
 xlog_recover_process_cui(
-	struct xfs_mount		*mp,
+	struct xfs_trans		*parent_tp,
 	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip,
-	struct xfs_defer_ops		*dfops)
+	struct xfs_log_item		*lip)
 {
 	struct xfs_cui_log_item		*cuip;
 	int				error;
@@ -4749,7 +4748,7 @@ xlog_recover_process_cui(
 		return 0;
 
 	spin_unlock(&ailp->ail_lock);
-	error = xfs_cui_recover(mp, cuip, dfops);
+	error = xfs_cui_recover(parent_tp, cuip);
 	spin_lock(&ailp->ail_lock);
 
 	return error;
@@ -4774,10 +4773,9 @@ xlog_recover_cancel_cui(
 /* Recover the BUI if necessary. */
 STATIC int
 xlog_recover_process_bui(
-	struct xfs_mount		*mp,
+	struct xfs_trans		*parent_tp,
 	struct xfs_ail			*ailp,
-	struct xfs_log_item		*lip,
-	struct xfs_defer_ops		*dfops)
+	struct xfs_log_item		*lip)
 {
 	struct xfs_bui_log_item		*buip;
 	int				error;
@@ -4790,7 +4788,7 @@ xlog_recover_process_bui(
 		return 0;
 
 	spin_unlock(&ailp->ail_lock);
-	error = xfs_bui_recover(mp, buip, dfops);
+	error = xfs_bui_recover(parent_tp, buip);
 	spin_lock(&ailp->ail_lock);
 
 	return error;
@@ -4829,9 +4827,9 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
 /* Take all the collected deferred ops and finish them in order. */
 static int
 xlog_finish_defer_ops(
-	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops)
+	struct xfs_trans	*parent_tp)
 {
+	struct xfs_mount	*mp = parent_tp->t_mountp;
 	struct xfs_trans	*tp;
 	int64_t			freeblks;
 	uint			resblks;
@@ -4855,7 +4853,7 @@ xlog_finish_defer_ops(
 	if (error)
 		return error;
 	/* transfer all collected dfops to this transaction */
-	xfs_defer_move(tp->t_dfops, dfops);
+	xfs_defer_move(tp->t_dfops, parent_tp->t_dfops);
 
 	return xfs_trans_commit(tp);
 }
@@ -4880,22 +4878,34 @@ STATIC int
 xlog_recover_process_intents(
 	struct xlog		*log)
 {
-	struct xfs_defer_ops	dfops;
+	struct xfs_trans	*parent_tp;
 	struct xfs_ail_cursor	cur;
 	struct xfs_log_item	*lip;
 	struct xfs_ail		*ailp;
-	int			error = 0;
+	int			error;
 #if defined(DEBUG) || defined(XFS_WARN)
 	xfs_lsn_t		last_lsn;
 #endif
 
+	/*
+	 * The intent recovery handlers commit transactions to complete recovery
+	 * for individual intents, but any new deferred operations that are
+	 * queued during that process are held off until the very end. The
+	 * purpose of this transaction is to serve as a container for deferred
+	 * operations. Each intent recovery handler must transfer dfops here
+	 * before its local transaction commits, and we'll finish the entire
+	 * list below.
+	 */
+	error = xfs_trans_alloc_empty(log->l_mp, &parent_tp);
+	if (error)
+		return error;
+
 	ailp = log->l_ailp;
 	spin_lock(&ailp->ail_lock);
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
 #if defined(DEBUG) || defined(XFS_WARN)
 	last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
 #endif
-	xfs_defer_init(NULL, &dfops);
 	while (lip != NULL) {
 		/*
 		 * We're done when we see something other than an intent.
@@ -4930,12 +4940,10 @@ xlog_recover_process_intents(
 			error = xlog_recover_process_rui(log->l_mp, ailp, lip);
 			break;
 		case XFS_LI_CUI:
-			error = xlog_recover_process_cui(log->l_mp, ailp, lip,
-					&dfops);
+			error = xlog_recover_process_cui(parent_tp, ailp, lip);
 			break;
 		case XFS_LI_BUI:
-			error = xlog_recover_process_bui(log->l_mp, ailp, lip,
-					&dfops);
+			error = xlog_recover_process_bui(parent_tp, ailp, lip);
 			break;
 		}
 		if (error)
@@ -4945,10 +4953,9 @@ xlog_recover_process_intents(
 out:
 	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->ail_lock);
-	if (error)
-		__xfs_defer_cancel(&dfops);
-	else
-		error = xlog_finish_defer_ops(log->l_mp, &dfops);
+	if (!error)
+		error = xlog_finish_defer_ops(parent_tp);
+	xfs_trans_cancel(parent_tp);
 
 	return error;
 }
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index d3582a06626f..011e1d0640fb 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -380,9 +380,8 @@ xfs_cud_init(
  */
 int
 xfs_cui_recover(
-	struct xfs_mount		*mp,
-	struct xfs_cui_log_item		*cuip,
-	struct xfs_defer_ops		*dfops)
+	struct xfs_trans		*parent_tp,
+	struct xfs_cui_log_item		*cuip)
 {
 	int				i;
 	int				error = 0;
@@ -398,6 +397,7 @@ xfs_cui_recover(
 	xfs_extlen_t			new_len;
 	struct xfs_bmbt_irec		irec;
 	bool				requeue_only = false;
+	struct xfs_mount		*mp = parent_tp->t_mountp;
 
 	ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
 
@@ -457,7 +457,7 @@ xfs_cui_recover(
 	 * finishes them on completion. Transfer current dfops state to this
 	 * transaction and transfer the result back before we return.
 	 */
-	xfs_defer_move(tp->t_dfops, dfops);
+	xfs_defer_move(tp->t_dfops, parent_tp->t_dfops);
 	cudp = xfs_trans_get_cud(tp, cuip);
 
 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
@@ -522,13 +522,13 @@ xfs_cui_recover(
 
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
-	xfs_defer_move(dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
 	error = xfs_trans_commit(tp);
 	return error;
 
 abort_error:
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
-	xfs_defer_move(dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
 	xfs_trans_cancel(tp);
 	return error;
 }
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index dd830b69cd1e..3896dcc2368f 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -82,7 +82,6 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
 		struct xfs_cui_log_item *);
 void xfs_cui_item_free(struct xfs_cui_log_item *);
 void xfs_cui_release(struct xfs_cui_log_item *);
-int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip,
-		struct xfs_defer_ops *dfops);
+int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
 
 #endif	/* __XFS_REFCOUNT_ITEM_H__ */
-- 
cgit v1.2.3


From 7279aa13b8fb954f50073a672f912898198efd14 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:30 -0700
Subject: xfs: remove unused __xfs_defer_cancel() internal helper

With no more external dfops users, there is no need for an
xfs_defer_ops cancel wrapper.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 5 +++--
 fs/xfs/libxfs/xfs_defer.h | 2 +-
 fs/xfs/xfs_trans.c        | 7 -------
 fs/xfs/xfs_trans.h        | 3 ---
 4 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index a5f7dc18a62f..ebead781613f 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -465,9 +465,10 @@ xfs_defer_finish(
  * Free up any items left in the list.
  */
 void
-__xfs_defer_cancel(
-	struct xfs_defer_ops		*dop)
+xfs_defer_cancel(
+	struct xfs_trans		*tp)
 {
+	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct xfs_defer_pending	*dfp;
 	struct xfs_defer_pending	*pli;
 	struct list_head		*pwi;
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 85c41fe4dbae..da145fc04ae1 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -50,7 +50,7 @@ void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
 		struct list_head *h);
 int xfs_defer_finish_noroll(struct xfs_trans **tp);
 int xfs_defer_finish(struct xfs_trans **tp);
-void __xfs_defer_cancel(struct xfs_defer_ops *dop);
+void xfs_defer_cancel(struct xfs_trans *);
 void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 12b6ad1558e6..0d07cdcc5c7d 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1110,10 +1110,3 @@ xfs_trans_roll(
 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
 	return xfs_trans_reserve(*tpp, &tres, 0, 0);
 }
-
-void
-xfs_defer_cancel(
-	struct xfs_trans	*tp)
-{
-	__xfs_defer_cancel(tp->t_dfops);
-}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 5170e89bec02..dc79e3c1d3e8 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -214,9 +214,6 @@ xfs_trans_read_buf(
 				      flags, bpp, ops);
 }
 
-/* cancel dfops associated with a transaction */
-void xfs_defer_cancel(struct xfs_trans *);
-
 struct xfs_buf	*xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
 
 void		xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
-- 
cgit v1.2.3


From ce356d64772f920f26cd6c1b02878a737a275638 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:30 -0700
Subject: xfs: pass transaction to dfops reset/move helpers

All callers pass ->t_dfops of the associated transactions. Refactor
the helpers to receive the transactions and facilitate further
cleanups between xfs_defer_ops and xfs_trans.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c  | 15 ++++++++++-----
 fs/xfs/libxfs/xfs_defer.h  |  2 +-
 fs/xfs/xfs_bmap_item.c     |  6 +++---
 fs/xfs/xfs_log_recover.c   |  2 +-
 fs/xfs/xfs_refcount_item.c |  6 +++---
 fs/xfs/xfs_trans.c         |  2 +-
 6 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index ebead781613f..e3517a53c525 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -324,9 +324,12 @@ xfs_defer_bjoin(
  */
 static void
 xfs_defer_reset(
-	struct xfs_defer_ops	*dop)
+	struct xfs_trans	*tp)
 {
+	struct xfs_defer_ops	*dop = tp->t_dfops;
+
 	ASSERT(!xfs_defer_has_unfinished_work(dop));
+
 	dop->dop_low = false;
 	memset(dop->dop_inodes, 0, sizeof(dop->dop_inodes));
 	memset(dop->dop_bufs, 0, sizeof(dop->dop_bufs));
@@ -457,7 +460,7 @@ xfs_defer_finish(
 		if (error)
 			return error;
 	}
-	xfs_defer_reset((*tp)->t_dfops);
+	xfs_defer_reset(*tp);
 	return 0;
 }
 
@@ -575,9 +578,11 @@ xfs_defer_init(
  */
 void
 xfs_defer_move(
-	struct xfs_defer_ops	*dst,
-	struct xfs_defer_ops	*src)
+	struct xfs_trans	*dtp,
+	struct xfs_trans	*stp)
 {
+	struct xfs_defer_ops	*dst = dtp->t_dfops;
+	struct xfs_defer_ops	*src = stp->t_dfops;
 	ASSERT(dst != src);
 
 	list_splice_init(&src->dop_intake, &dst->dop_intake);
@@ -587,5 +592,5 @@ xfs_defer_move(
 	memcpy(dst->dop_bufs, src->dop_bufs, sizeof(dst->dop_bufs));
 	dst->dop_low = src->dop_low;
 
-	xfs_defer_reset(src);
+	xfs_defer_reset(stp);
 }
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index da145fc04ae1..d60c50498fdf 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -55,7 +55,7 @@ void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
 int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
-void xfs_defer_move(struct xfs_defer_ops *dst, struct xfs_defer_ops *src);
+void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
 
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 57429055e608..b8a6be036cd7 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -446,7 +446,7 @@ xfs_bui_recover(
 	 * finishes them on completion. Transfer current dfops state to this
 	 * transaction and transfer the result back before we return.
 	 */
-	xfs_defer_move(tp->t_dfops, parent_tp->t_dfops);
+	xfs_defer_move(tp, parent_tp);
 	budp = xfs_trans_get_bud(tp, buip);
 
 	/* Grab the inode. */
@@ -494,7 +494,7 @@ xfs_bui_recover(
 	}
 
 	set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
-	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	xfs_irele(ip);
@@ -502,7 +502,7 @@ xfs_bui_recover(
 	return error;
 
 err_inode:
-	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp, tp);
 	xfs_trans_cancel(tp);
 	if (ip) {
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fc1ce9a644e3..a21dc61ec09e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4853,7 +4853,7 @@ xlog_finish_defer_ops(
 	if (error)
 		return error;
 	/* transfer all collected dfops to this transaction */
-	xfs_defer_move(tp->t_dfops, parent_tp->t_dfops);
+	xfs_defer_move(tp, parent_tp);
 
 	return xfs_trans_commit(tp);
 }
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 011e1d0640fb..4a417daae781 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -457,7 +457,7 @@ xfs_cui_recover(
 	 * finishes them on completion. Transfer current dfops state to this
 	 * transaction and transfer the result back before we return.
 	 */
-	xfs_defer_move(tp->t_dfops, parent_tp->t_dfops);
+	xfs_defer_move(tp, parent_tp);
 	cudp = xfs_trans_get_cud(tp, cuip);
 
 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
@@ -522,13 +522,13 @@ xfs_cui_recover(
 
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
 	set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
-	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp, tp);
 	error = xfs_trans_commit(tp);
 	return error;
 
 abort_error:
 	xfs_refcount_finish_one_cleanup(tp, rcur, error);
-	xfs_defer_move(parent_tp->t_dfops, tp->t_dfops);
+	xfs_defer_move(parent_tp, tp);
 	xfs_trans_cancel(tp);
 	return error;
 }
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 0d07cdcc5c7d..ae3c875a14e5 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -125,7 +125,7 @@ xfs_trans_dup(
 	if (tp->t_dfops != &tp->t_dfops_internal)
 		ntp->t_dfops = tp->t_dfops;
 	else
-		xfs_defer_move(ntp->t_dfops, tp->t_dfops);
+		xfs_defer_move(ntp, tp);
 
 	xfs_trans_dup_dqinfo(tp, ntp);
 
-- 
cgit v1.2.3


From 1214f1cf663b0939fbb8f1bccdc74c1d1e452d53 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:31 -0700
Subject: xfs: replace dop_low with transaction flag

The dop_low field enables the low free space allocation mode when a
previous allocation has detected difficulty allocating blocks. It
has historically been part of the xfs_defer_ops structure, which
means if enabled, it remains enabled across a set of transactions
until the deferred operations have completed and the dfops is reset.

Now that the dfops is embedded in the transaction, we can save a bit
more space by using a transaction flag rather than a standalone
boolean. Drop the ->dop_low field and replace it with a transaction
flag that is set at the same points, carried across rolling
transactions and cleared on completion of deferred operations. This
essentially emulates the behavior of ->dop_low and so should not
change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c       |  8 ++++----
 fs/xfs/libxfs/xfs_bmap_btree.c |  4 ++--
 fs/xfs/libxfs/xfs_defer.c      | 16 ++++++++++++++--
 fs/xfs/libxfs/xfs_defer.h      | 11 -----------
 fs/xfs/libxfs/xfs_shared.h     | 12 ++++++++++++
 fs/xfs/xfs_filestream.c        |  3 ++-
 fs/xfs/xfs_trace.h             | 10 ++--------
 fs/xfs/xfs_trans.h             |  2 --
 8 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a85c0445b38f..8edf7522aaff 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -700,7 +700,7 @@ xfs_bmap_extents_to_btree(
 	if (tp->t_firstblock == NULLFSBLOCK) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
-	} else if (tp->t_dfops->dop_low) {
+	} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = tp->t_firstblock;
 	} else {
@@ -3449,7 +3449,7 @@ xfs_bmap_btalloc(
 			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
 		if (error)
 			return error;
-	} else if (ap->tp->t_dfops->dop_low) {
+	} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
 		if (xfs_inode_is_filestream(ap->ip))
 			args.type = XFS_ALLOCTYPE_FIRST_AG;
 		else
@@ -3484,7 +3484,7 @@ xfs_bmap_btalloc(
 	 * is >= the stripe unit and the allocation offset is
 	 * at the end of file.
 	 */
-	if (!ap->tp->t_dfops->dop_low && ap->aeof) {
+	if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
 		if (!ap->offset) {
 			args.alignment = stripe_align;
 			atype = args.type;
@@ -3576,7 +3576,7 @@ xfs_bmap_btalloc(
 		args.total = ap->minlen;
 		if ((error = xfs_alloc_vextent(&args)))
 			return error;
-		ap->tp->t_dfops->dop_low = true;
+		ap->tp->t_flags |= XFS_TRANS_LOWMODE;
 	}
 	if (args.fsbno != NULLFSBLOCK) {
 		/*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 01489714a253..955e29de8cae 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -226,7 +226,7 @@ xfs_bmbt_alloc_block(
 		 * block allocation here and corrupt the filesystem.
 		 */
 		args.minleft = args.tp->t_blk_res;
-	} else if (cur->bc_tp->t_dfops->dop_low) {
+	} else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) {
 		args.type = XFS_ALLOCTYPE_START_BNO;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -253,7 +253,7 @@ xfs_bmbt_alloc_block(
 		error = xfs_alloc_vextent(&args);
 		if (error)
 			goto error0;
-		cur->bc_tp->t_dfops->dop_low = true;
+		cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE;
 	}
 	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 		*stat = 0;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index e3517a53c525..64e1abc60edc 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -330,9 +330,14 @@ xfs_defer_reset(
 
 	ASSERT(!xfs_defer_has_unfinished_work(dop));
 
-	dop->dop_low = false;
 	memset(dop->dop_inodes, 0, sizeof(dop->dop_inodes));
 	memset(dop->dop_bufs, 0, sizeof(dop->dop_bufs));
+
+	/*
+	 * Low mode state transfers across transaction rolls to mirror dfops
+	 * lifetime. Clear it now that dfops is reset.
+	 */
+	tp->t_flags &= ~XFS_TRANS_LOWMODE;
 }
 
 /*
@@ -590,7 +595,14 @@ xfs_defer_move(
 
 	memcpy(dst->dop_inodes, src->dop_inodes, sizeof(dst->dop_inodes));
 	memcpy(dst->dop_bufs, src->dop_bufs, sizeof(dst->dop_bufs));
-	dst->dop_low = src->dop_low;
+
+	/*
+	 * Low free space mode was historically controlled by a dfops field.
+	 * This meant that low mode state potentially carried across multiple
+	 * transaction rolls. Transfer low mode on a dfops move to preserve
+	 * that behavior.
+	 */
+	dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
 
 	xfs_defer_reset(stp);
 }
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d60c50498fdf..8908a2716774 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -25,17 +25,6 @@ struct xfs_defer_pending {
 
 /*
  * Header for deferred operation list.
- *
- * dop_low is used by the allocator to activate the lowspace algorithm -
- * when free space is running low the extent allocator may choose to
- * allocate an extent from an AG without leaving sufficient space for
- * a btree split when inserting the new extent.  In this case the allocator
- * will enable the lowspace algorithm which is supposed to allow further
- * allocations (such as btree splits and newroots) to allocate from
- * sequential AGs.  In order to avoid locking AGs out of order the lowspace
- * algorithm will start searching for free space from AG 0.  If the correct
- * transaction reservations have been made then this algorithm will eventually
- * find all the space it needs.
  */
 enum xfs_defer_ops_type {
 	XFS_DEFER_OPS_TYPE_BMAP,
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 22089f1c880a..1c5debe748f0 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -64,6 +64,18 @@ void	xfs_log_get_max_trans_res(struct xfs_mount *mp,
 #define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
 #define XFS_TRANS_NO_WRITECOUNT 0x40	/* do not elevate SB writecount */
 #define XFS_TRANS_NOFS		0x80	/* pass KM_NOFS to kmem_alloc */
+/*
+ * LOWMODE is used by the allocator to activate the lowspace algorithm - when
+ * free space is running low the extent allocator may choose to allocate an
+ * extent from an AG without leaving sufficient space for a btree split when
+ * inserting the new extent. In this case the allocator will enable the
+ * lowspace algorithm which is supposed to allow further allocations (such as
+ * btree splits and newroots) to allocate from sequential AGs. In order to
+ * avoid locking AGs out of order the lowspace algorithm will start searching
+ * for free space from AG 0. If the correct transaction reservations have been
+ * made then this algorithm will eventually find all the space it needs.
+ */
+#define XFS_TRANS_LOWMODE	0x100	/* allocate in low space mode */
 
 /*
  * Field values for xfs_trans_mod_sb.
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 212173c62588..182501373af2 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -20,6 +20,7 @@
 #include "xfs_trace.h"
 #include "xfs_ag_resv.h"
 #include "xfs_trans.h"
+#include "xfs_shared.h"
 
 struct xfs_fstrm_item {
 	struct xfs_mru_cache_elem	mru;
@@ -378,7 +379,7 @@ xfs_filestream_new_ag(
 
 	if (xfs_alloc_is_userdata(ap->datatype))
 		flags |= XFS_PICK_USERDATA;
-	if (ap->tp->t_dfops->dop_low)
+	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
 		flags |= XFS_PICK_LOWSPACE;
 
 	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index cc6995cfce66..8807f1bb814a 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2223,19 +2223,16 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
 		__field(dev_t, dev)
 		__field(void *, dop)
 		__field(char, committed)
-		__field(char, low)
 		__field(unsigned long, caller_ip)
 	),
 	TP_fast_assign(
 		__entry->dev = mp ? mp->m_super->s_dev : 0;
 		__entry->dop = dop;
-		__entry->low = dop->dop_low;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d ops %p low %d, caller %pS",
+	TP_printk("dev %d:%d ops %p caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->dop,
-		  __entry->low,
 		  (char *)__entry->caller_ip)
 )
 #define DEFINE_DEFER_EVENT(name) \
@@ -2251,19 +2248,16 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
 		__field(dev_t, dev)
 		__field(void *, dop)
 		__field(char, committed)
-		__field(char, low)
 		__field(int, error)
 	),
 	TP_fast_assign(
 		__entry->dev = mp ? mp->m_super->s_dev : 0;
 		__entry->dop = dop;
-		__entry->low = dop->dop_low;
 		__entry->error = error;
 	),
-	TP_printk("dev %d:%d ops %p low %d err %d",
+	TP_printk("dev %d:%d ops %p err %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->dop,
-		  __entry->low,
 		  __entry->error)
 )
 #define DEFINE_DEFER_ERROR_EVENT(name) \
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index dc79e3c1d3e8..7e493221160e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -101,8 +101,6 @@ struct xfs_defer_ops {
 	/* relog these with each roll */
 	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES];
 	struct xfs_buf		*dop_bufs[XFS_DEFER_OPS_NR_BUFS];
-
-	bool			dop_low;	/* alloc in low mode */
 };
 
 /*
-- 
cgit v1.2.3


From 488c919a5bec3be4b8613898de6958043edbb8d9 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:31 -0700
Subject: xfs: add missing defer ijoins for held inodes

Log items that require relogging during deferred operations
processing are explicitly joined to the associated dfops via the
xfs_defer_*join() helpers. These calls imply that the associated
object is "held" by the transaction such that when rolled, the item
can be immediately joined to a follow up transaction. For buffers,
this means the buffer remains locked and held after each roll. For
inodes, this means that the inode remains locked.

Failure to join a held item to the dfops structure means the
associated object pins the tail of the log while dfops processing
completes, because the item never relogs and is not unlocked or
released until deferred processing completes.

Currently, all buffers that are held in transactions (XFS_BLI_HOLD)
with deferred operations are explicitly joined to the dfops. This is
not the case for inodes, however, as various contexts defer
operations to transactions with held inodes without explicit joins
to the associated dfops (and thus not relogging).

While this is not a catastrophic problem, it is not ideal. Given
that we want to eventually relog such items automatically during
dfops processing, start by explicitly adding these missing
xfs_defer_ijoin() calls. A call is added everywhere an inode is
joined to a transaction without transferring lock ownership and
said transaction runs deferred operations.

All xfs_defer_ijoin() calls will eventually be replaced by automatic
dfops inode relogging. This patch essentially implements the
behavior change that would otherwise occur due to automatic inode
dfops relogging.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 1 +
 fs/xfs/xfs_bmap_util.c   | 1 +
 fs/xfs/xfs_inode.c       | 1 +
 fs/xfs/xfs_iomap.c       | 3 +++
 fs/xfs/xfs_reflink.c     | 1 +
 5 files changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 8edf7522aaff..71687d805f79 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1119,6 +1119,7 @@ xfs_bmap_add_attrfork(
 			xfs_log_sb(tp);
 	}
 
+	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 412dc58ae54d..0c58a66b39e5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -979,6 +979,7 @@ xfs_alloc_file_space(
 		/*
 		 * Complete the transaction
 		 */
+		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_trans_commit(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5fc1815c2b62..441c8593cfd7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1810,6 +1810,7 @@ xfs_inactive_ifree(
 	 * Just ignore errors at this point.  There is nothing we can do except
 	 * to try to keep going. Make sure it's not a silent error.
 	 */
+	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	if (error)
 		xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3282575e2df4..8093a01fcf9e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -261,6 +261,7 @@ xfs_iomap_write_direct(
 	/*
 	 * Complete the transaction
 	 */
+	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	if (error)
 		goto out_unlock;
@@ -763,6 +764,7 @@ xfs_iomap_write_allocate(
 			if (error)
 				goto trans_cancel;
 
+			xfs_defer_ijoin(tp->t_dfops, ip);
 			error = xfs_trans_commit(tp);
 			if (error)
 				goto error0;
@@ -882,6 +884,7 @@ xfs_iomap_write_unwritten(
 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 		}
 
+		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_trans_commit(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 9a0a56526266..e986fcf928e5 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -435,6 +435,7 @@ retry:
 	xfs_inode_set_cowblocks_tag(ip);
 
 	/* Finish up. */
+	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	if (error)
 		return error;
-- 
cgit v1.2.3


From 82ff27bc52a88cb5cc400bfa64e210d3ec8dfebd Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:32 -0700
Subject: xfs: automatic dfops buffer relogging

Buffers that are held across deferred operations are explicitly
joined to the dfops structure to ensure appropriate relogging.
While buffers are currently joined explicitly, we can detect the
conditions that require relogging at dfops finish time by inspecting
the transaction item list for held buffers.

Replace the xfs_defer_bjoin() infrastructure with such detection and
automatic relogging of held buffers. This eliminates the need for
the per-dfops buffer list, replaced by an on-stack variant in
xfs_defer_trans_roll().

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c  |  1 -
 fs/xfs/libxfs/xfs_defer.c | 58 +++++++++++++++++++++--------------------------
 fs/xfs/libxfs/xfs_defer.h |  1 -
 fs/xfs/xfs_dquot.c        |  1 -
 fs/xfs/xfs_trans.h        |  1 -
 5 files changed, 26 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 3deb5cdadf08..227887bee00d 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -320,7 +320,6 @@ xfs_attr_set(
 		 * buffer and run into problems with the write verifier.
 		 */
 		xfs_trans_bhold(args.trans, leaf_bp);
-		xfs_defer_bjoin(args.trans->t_dfops, leaf_bp);
 		xfs_defer_ijoin(args.trans->t_dfops, dp);
 		error = xfs_defer_finish(&args.trans);
 		if (error)
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 64e1abc60edc..e9b7671d289a 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -14,6 +14,7 @@
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
+#include "xfs_buf_item.h"
 #include "xfs_trace.h"
 
 /*
@@ -228,6 +229,10 @@ xfs_defer_trans_roll(
 	struct xfs_trans		**tp)
 {
 	struct xfs_defer_ops		*dop = (*tp)->t_dfops;
+	struct xfs_buf_log_item		*bli;
+	struct xfs_log_item		*lip;
+	struct xfs_buf			*bplist[XFS_DEFER_OPS_NR_BUFS];
+	int				bpcount = 0;
 	int				i;
 	int				error;
 
@@ -235,9 +240,24 @@ xfs_defer_trans_roll(
 	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
 		xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
 
-	/* Hold the (previously bjoin'd) buffer locked across the roll. */
-	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
-		xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+	list_for_each_entry(lip, &(*tp)->t_items, li_trans) {
+		switch (lip->li_type) {
+		case XFS_LI_BUF:
+			bli = container_of(lip, struct xfs_buf_log_item,
+					   bli_item);
+			if (bli->bli_flags & XFS_BLI_HOLD) {
+				if (bpcount >= XFS_DEFER_OPS_NR_BUFS) {
+					ASSERT(0);
+					return -EFSCORRUPTED;
+				}
+				xfs_trans_dirty_buf(*tp, bli->bli_buf);
+				bplist[bpcount++] = bli->bli_buf;
+			}
+			break;
+		default:
+			break;
+		}
+	}
 
 	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop, _RET_IP_);
 
@@ -255,9 +275,9 @@ xfs_defer_trans_roll(
 		xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
 
 	/* Rejoin the buffers and dirty them so the log moves forward. */
-	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
-		xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
-		xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+	for (i = 0; i < bpcount; i++) {
+		xfs_trans_bjoin(*tp, bplist[i]);
+		xfs_trans_bhold(*tp, bplist[i]);
 	}
 
 	return error;
@@ -295,30 +315,6 @@ xfs_defer_ijoin(
 	return -EFSCORRUPTED;
 }
 
-/*
- * Add this buffer to the deferred op.  Each joined buffer is relogged
- * each time we roll the transaction.
- */
-int
-xfs_defer_bjoin(
-	struct xfs_defer_ops		*dop,
-	struct xfs_buf			*bp)
-{
-	int				i;
-
-	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
-		if (dop->dop_bufs[i] == bp)
-			return 0;
-		else if (dop->dop_bufs[i] == NULL) {
-			dop->dop_bufs[i] = bp;
-			return 0;
-		}
-	}
-
-	ASSERT(0);
-	return -EFSCORRUPTED;
-}
-
 /*
  * Reset an already used dfops after finish.
  */
@@ -331,7 +327,6 @@ xfs_defer_reset(
 	ASSERT(!xfs_defer_has_unfinished_work(dop));
 
 	memset(dop->dop_inodes, 0, sizeof(dop->dop_inodes));
-	memset(dop->dop_bufs, 0, sizeof(dop->dop_bufs));
 
 	/*
 	 * Low mode state transfers across transaction rolls to mirror dfops
@@ -594,7 +589,6 @@ xfs_defer_move(
 	list_splice_init(&src->dop_pending, &dst->dop_pending);
 
 	memcpy(dst->dop_inodes, src->dop_inodes, sizeof(dst->dop_inodes));
-	memcpy(dst->dop_bufs, src->dop_bufs, sizeof(dst->dop_bufs));
 
 	/*
 	 * Low free space mode was historically controlled by a dfops field.
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 8908a2716774..4a8bb838adf2 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -43,7 +43,6 @@ void xfs_defer_cancel(struct xfs_trans *);
 void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
-int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
 void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
 
 /* Description of a deferred type. */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index da5c55cec966..e1196854dbcd 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -362,7 +362,6 @@ xfs_dquot_disk_alloc(
 	 * manually or by committing the transaction.
 	 */
 	xfs_trans_bhold(tp, bp);
-	error = xfs_defer_bjoin(tp->t_dfops, bp);
 	if (error) {
 		xfs_trans_bhold_release(tp, bp);
 		xfs_trans_brelse(tp, bp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7e493221160e..581456c79197 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -100,7 +100,6 @@ struct xfs_defer_ops {
 
 	/* relog these with each roll */
 	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES];
-	struct xfs_buf		*dop_bufs[XFS_DEFER_OPS_NR_BUFS];
 };
 
 /*
-- 
cgit v1.2.3


From a8198666fb755e129c2fe92819774256ec26c79c Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:32 -0700
Subject: xfs: automatic dfops inode relogging

Inodes that are held across deferred operations are explicitly
joined to the dfops structure to ensure appropriate relogging.
While inodes are currently joined explicitly, we can detect the
conditions that require relogging at dfops finish time by inspecting
the transaction item list for inodes with ili_lock_flags == 0.

Replace the xfs_defer_ijoin() infrastructure with such detection and
automatic relogging of held inodes. This eliminates the need for the
per-dfops inode list, replaced by an on-stack variant in
xfs_defer_trans_roll().

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        |  9 -------
 fs/xfs/libxfs/xfs_attr_remote.c |  2 --
 fs/xfs/libxfs/xfs_bmap.c        |  8 ------
 fs/xfs/libxfs/xfs_defer.c       | 59 +++++++++++++++--------------------------
 fs/xfs/libxfs/xfs_defer.h       |  1 -
 fs/xfs/xfs_bmap_util.c          |  4 ---
 fs/xfs/xfs_inode.c              |  2 --
 fs/xfs/xfs_iomap.c              |  3 ---
 fs/xfs/xfs_reflink.c            |  4 ---
 fs/xfs/xfs_symlink.c            |  1 -
 fs/xfs/xfs_trans.h              |  3 ---
 11 files changed, 21 insertions(+), 75 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 227887bee00d..3190dfc21b60 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -320,7 +320,6 @@ xfs_attr_set(
 		 * buffer and run into problems with the write verifier.
 		 */
 		xfs_trans_bhold(args.trans, leaf_bp);
-		xfs_defer_ijoin(args.trans->t_dfops, dp);
 		error = xfs_defer_finish(&args.trans);
 		if (error)
 			goto out;
@@ -589,7 +588,6 @@ xfs_attr_leaf_addname(
 		error = xfs_attr3_leaf_to_node(args);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->trans->t_dfops, dp);
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
@@ -678,7 +676,6 @@ xfs_attr_leaf_addname(
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->trans->t_dfops, dp);
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
@@ -742,7 +739,6 @@ xfs_attr_leaf_removename(
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->trans->t_dfops, dp);
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
@@ -869,7 +865,6 @@ restart:
 			error = xfs_attr3_leaf_to_node(args);
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->trans->t_dfops, dp);
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
@@ -894,7 +889,6 @@ restart:
 		error = xfs_da3_split(state);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->trans->t_dfops, dp);
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
@@ -991,7 +985,6 @@ restart:
 			error = xfs_da3_join(state);
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->trans->t_dfops, dp);
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
@@ -1115,7 +1108,6 @@ xfs_attr_node_removename(
 		error = xfs_da3_join(state);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->trans->t_dfops, dp);
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
@@ -1147,7 +1139,6 @@ xfs_attr_node_removename(
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
 				goto out_defer_cancel;
-			xfs_defer_ijoin(args->trans->t_dfops, dp);
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out_defer_cancel;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 77ca38586913..f52552313773 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -486,7 +486,6 @@ xfs_attr_rmtval_set(
 				  &nmap);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->trans->t_dfops, dp);
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
@@ -627,7 +626,6 @@ xfs_attr_rmtval_remove(
 				    XFS_BMAPI_ATTRFORK, 1, &done);
 		if (error)
 			goto out_defer_cancel;
-		xfs_defer_ijoin(args->trans->t_dfops, args->dp);
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out_defer_cancel;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 71687d805f79..5cd490dc891a 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1119,7 +1119,6 @@ xfs_bmap_add_attrfork(
 			xfs_log_sb(tp);
 	}
 
-	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
@@ -5987,7 +5986,6 @@ __xfs_bmap_add(
 	int				whichfork,
 	struct xfs_bmbt_irec		*bmap)
 {
-	int				error;
 	struct xfs_bmap_intent		*bi;
 
 	trace_xfs_bmap_defer(mp,
@@ -6006,12 +6004,6 @@ __xfs_bmap_add(
 	bi->bi_whichfork = whichfork;
 	bi->bi_bmap = *bmap;
 
-	error = xfs_defer_ijoin(dfops, bi->bi_owner);
-	if (error) {
-		kmem_free(bi);
-		return error;
-	}
-
 	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index e9b7671d289a..1e7073252a5e 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -15,6 +15,8 @@
 #include "xfs_defer.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
 #include "xfs_trace.h"
 
 /*
@@ -230,16 +232,14 @@ xfs_defer_trans_roll(
 {
 	struct xfs_defer_ops		*dop = (*tp)->t_dfops;
 	struct xfs_buf_log_item		*bli;
+	struct xfs_inode_log_item	*ili;
 	struct xfs_log_item		*lip;
 	struct xfs_buf			*bplist[XFS_DEFER_OPS_NR_BUFS];
-	int				bpcount = 0;
+	struct xfs_inode		*iplist[XFS_DEFER_OPS_NR_INODES];
+	int				bpcount = 0, ipcount = 0;
 	int				i;
 	int				error;
 
-	/* Log all the joined inodes. */
-	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
-		xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
-
 	list_for_each_entry(lip, &(*tp)->t_items, li_trans) {
 		switch (lip->li_type) {
 		case XFS_LI_BUF:
@@ -254,6 +254,19 @@ xfs_defer_trans_roll(
 				bplist[bpcount++] = bli->bli_buf;
 			}
 			break;
+		case XFS_LI_INODE:
+			ili = container_of(lip, struct xfs_inode_log_item,
+					   ili_item);
+			if (ili->ili_lock_flags == 0) {
+				if (ipcount >= XFS_DEFER_OPS_NR_INODES) {
+					ASSERT(0);
+					return -EFSCORRUPTED;
+				}
+				xfs_trans_log_inode(*tp, ili->ili_inode,
+						    XFS_ILOG_CORE);
+				iplist[ipcount++] = ili->ili_inode;
+			}
+			break;
 		default:
 			break;
 		}
@@ -271,8 +284,8 @@ xfs_defer_trans_roll(
 	}
 
 	/* Rejoin the joined inodes. */
-	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
-		xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
+	for (i = 0; i < ipcount; i++)
+		xfs_trans_ijoin(*tp, iplist[i], 0);
 
 	/* Rejoin the buffers and dirty them so the log moves forward. */
 	for (i = 0; i < bpcount; i++) {
@@ -291,30 +304,6 @@ xfs_defer_has_unfinished_work(
 	return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake);
 }
 
-/*
- * Add this inode to the deferred op.  Each joined inode is relogged
- * each time we roll the transaction.
- */
-int
-xfs_defer_ijoin(
-	struct xfs_defer_ops		*dop,
-	struct xfs_inode		*ip)
-{
-	int				i;
-
-	for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) {
-		if (dop->dop_inodes[i] == ip)
-			return 0;
-		else if (dop->dop_inodes[i] == NULL) {
-			dop->dop_inodes[i] = ip;
-			return 0;
-		}
-	}
-
-	ASSERT(0);
-	return -EFSCORRUPTED;
-}
-
 /*
  * Reset an already used dfops after finish.
  */
@@ -322,11 +311,7 @@ static void
 xfs_defer_reset(
 	struct xfs_trans	*tp)
 {
-	struct xfs_defer_ops	*dop = tp->t_dfops;
-
-	ASSERT(!xfs_defer_has_unfinished_work(dop));
-
-	memset(dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+	ASSERT(!xfs_defer_has_unfinished_work(tp->t_dfops));
 
 	/*
 	 * Low mode state transfers across transaction rolls to mirror dfops
@@ -588,8 +573,6 @@ xfs_defer_move(
 	list_splice_init(&src->dop_intake, &dst->dop_intake);
 	list_splice_init(&src->dop_pending, &dst->dop_pending);
 
-	memcpy(dst->dop_inodes, src->dop_inodes, sizeof(dst->dop_inodes));
-
 	/*
 	 * Low free space mode was historically controlled by a dfops field.
 	 * This meant that low mode state potentially carried across multiple
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 4a8bb838adf2..bf1e9f78561e 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -42,7 +42,6 @@ int xfs_defer_finish(struct xfs_trans **tp);
 void xfs_defer_cancel(struct xfs_trans *);
 void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
-int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
 void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
 
 /* Description of a deferred type. */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0c58a66b39e5..30ac1300dc49 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -979,7 +979,6 @@ xfs_alloc_file_space(
 		/*
 		 * Complete the transaction
 		 */
-		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_trans_commit(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
@@ -1037,8 +1036,6 @@ xfs_unmap_extent(
 	if (error)
 		goto out_trans_cancel;
 
-	xfs_defer_ijoin(tp->t_dfops, ip);
-
 	error = xfs_trans_commit(tp);
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1624,7 +1621,6 @@ xfs_swap_extent_rmap(
 			if (error)
 				goto out_defer;
 
-			xfs_defer_ijoin(tp->t_dfops, ip);
 			error = xfs_defer_finish(tpp);
 			tp = *tpp;
 			if (error)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 441c8593cfd7..7bb46a0eecfc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1569,7 +1569,6 @@ xfs_itruncate_extents_flags(
 		 * Duplicate the transaction that has the permanent
 		 * reservation and commit the old transaction.
 		 */
-		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_defer_finish(&tp);
 		if (error)
 			goto out_bmap_cancel;
@@ -1810,7 +1809,6 @@ xfs_inactive_ifree(
 	 * Just ignore errors at this point.  There is nothing we can do except
 	 * to try to keep going. Make sure it's not a silent error.
 	 */
-	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	if (error)
 		xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 8093a01fcf9e..3282575e2df4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -261,7 +261,6 @@ xfs_iomap_write_direct(
 	/*
 	 * Complete the transaction
 	 */
-	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	if (error)
 		goto out_unlock;
@@ -764,7 +763,6 @@ xfs_iomap_write_allocate(
 			if (error)
 				goto trans_cancel;
 
-			xfs_defer_ijoin(tp->t_dfops, ip);
 			error = xfs_trans_commit(tp);
 			if (error)
 				goto error0;
@@ -884,7 +882,6 @@ xfs_iomap_write_unwritten(
 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 		}
 
-		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_trans_commit(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e986fcf928e5..dce8ba8ab681 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -435,7 +435,6 @@ retry:
 	xfs_inode_set_cowblocks_tag(ip);
 
 	/* Finish up. */
-	xfs_defer_ijoin(tp->t_dfops, ip);
 	error = xfs_trans_commit(tp);
 	if (error)
 		return error;
@@ -518,7 +517,6 @@ xfs_reflink_cancel_cow_blocks(
 					NULL);
 
 			/* Roll the transaction */
-			xfs_defer_ijoin((*tpp)->t_dfops, ip);
 			error = xfs_defer_finish(tpp);
 			if (error) {
 				xfs_defer_cancel(*tpp);
@@ -716,7 +714,6 @@ xfs_reflink_end_cow(
 		/* Remove the mapping from the CoW fork. */
 		xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
 
-		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_defer_finish(&tp);
 		if (error)
 			goto out_cancel;
@@ -1077,7 +1074,6 @@ xfs_reflink_remap_extent(
 
 next_extent:
 		/* Process all the deferred stuff. */
-		xfs_defer_ijoin(tp->t_dfops, ip);
 		error = xfs_defer_finish(&tp);
 		if (error)
 			goto out_cancel;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 2bfe7fbbedb2..a3e98c64b6e3 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -454,7 +454,6 @@ xfs_inactive_symlink_rmt(
 	 * Commit the transaction. This first logs the EFI and the inode, then
 	 * rolls and commits the transaction that frees the extents.
 	 */
-	xfs_defer_ijoin(tp->t_dfops, ip);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	error = xfs_trans_commit(tp);
 	if (error) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 581456c79197..8665d45b82c6 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -97,9 +97,6 @@ void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
 struct xfs_defer_ops {
 	struct list_head	dop_intake;	/* unlogged pending work */
 	struct list_head	dop_pending;	/* logged pending work */
-
-	/* relog these with each roll */
-	struct xfs_inode	*dop_inodes[XFS_DEFER_OPS_NR_INODES];
 };
 
 /*
-- 
cgit v1.2.3


From 7dbddbaccd189e63c39c9e22c728c4548b9893bb Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:32 -0700
Subject: xfs: drop dop param from xfs_defer_op_type ->finish_item() callback

The dfops infrastructure ->finish_item() callback passes the
transaction and dfops as separate parameters. Since dfops is always
part of a transaction, the latter parameter is no longer necessary.
Remove it from the various callbacks.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c     |  2 --
 fs/xfs/libxfs/xfs_bmap.h     |  6 +++---
 fs/xfs/libxfs/xfs_defer.c    |  2 +-
 fs/xfs/libxfs/xfs_defer.h    |  4 ++--
 fs/xfs/libxfs/xfs_refcount.c |  2 +-
 fs/xfs/libxfs/xfs_refcount.h |  7 +++----
 fs/xfs/xfs_bmap_item.c       |  5 ++---
 fs/xfs/xfs_refcount_item.c   |  4 ++--
 fs/xfs/xfs_trans.h           | 10 +++++-----
 fs/xfs/xfs_trans_bmap.c      |  6 ++----
 fs/xfs/xfs_trans_extfree.c   |  2 --
 fs/xfs/xfs_trans_refcount.c  |  6 ++----
 fs/xfs/xfs_trans_rmap.c      |  1 -
 13 files changed, 23 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 5cd490dc891a..d20f541b7061 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -6045,7 +6045,6 @@ xfs_bmap_unmap_extent(
 int
 xfs_bmap_finish_one(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dfops,
 	struct xfs_inode		*ip,
 	enum xfs_bmap_intent_type	type,
 	int				whichfork,
@@ -6072,7 +6071,6 @@ xfs_bmap_finish_one(
 
 	switch (type) {
 	case XFS_BMAP_MAP:
-		ASSERT(dfops == tp->t_dfops);
 		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
 				startblock, 0);
 		*blockcount = 0;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 2e8555c1229a..9165a878edcd 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -252,9 +252,9 @@ struct xfs_bmap_intent {
 	struct xfs_bmbt_irec			bi_bmap;
 };
 
-int	xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops,
-		struct xfs_inode *ip, enum xfs_bmap_intent_type type,
-		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
+int	xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip,
+		enum xfs_bmap_intent_type type, int whichfork,
+		xfs_fileoff_t startoff, xfs_fsblock_t startblock,
 		xfs_filblks_t *blockcount, xfs_exntst_t state);
 int	xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 1e7073252a5e..66ef9341813b 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -371,7 +371,7 @@ xfs_defer_finish_noroll(
 		list_for_each_safe(li, n, &dfp->dfp_work) {
 			list_del(li);
 			dfp->dfp_count--;
-			error = dfp->dfp_type->finish_item(*tp, dop, li,
+			error = dfp->dfp_type->finish_item(*tp, li,
 					dfp->dfp_done, &state);
 			if (error == -EAGAIN) {
 				/*
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index bf1e9f78561e..0de7504e5651 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -50,8 +50,8 @@ struct xfs_defer_op_type {
 	unsigned int		max_items;
 	void (*abort_intent)(void *);
 	void *(*create_done)(struct xfs_trans *, void *, unsigned int);
-	int (*finish_item)(struct xfs_trans *, struct xfs_defer_ops *,
-			struct list_head *, void *, void **);
+	int (*finish_item)(struct xfs_trans *, struct list_head *, void *,
+			void **);
 	void (*finish_cleanup)(struct xfs_trans *, void *, int);
 	void (*cancel_item)(struct list_head *);
 	int (*diff_items)(void *, struct list_head *, struct list_head *);
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 4cbc2efb099e..86f297ca90cd 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1082,7 +1082,6 @@ xfs_refcount_finish_one_cleanup(
 int
 xfs_refcount_finish_one(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dfops,
 	enum xfs_refcount_intent_type	type,
 	xfs_fsblock_t			startblock,
 	xfs_extlen_t			blockcount,
@@ -1091,6 +1090,7 @@ xfs_refcount_finish_one(
 	struct xfs_btree_cur		**pcur)
 {
 	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_defer_ops		*dfops = tp->t_dfops;
 	struct xfs_btree_cur		*rcur;
 	struct xfs_buf			*agbp = NULL;
 	int				error = 0;
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 5fef74412727..3b72c6dbf6ad 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -37,10 +37,9 @@ extern int xfs_refcount_decrease_extent(struct xfs_mount *mp,
 extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
 		struct xfs_btree_cur *rcur, int error);
 extern int xfs_refcount_finish_one(struct xfs_trans *tp,
-		struct xfs_defer_ops *dfops, enum xfs_refcount_intent_type type,
-		xfs_fsblock_t startblock, xfs_extlen_t blockcount,
-		xfs_fsblock_t *new_fsb, xfs_extlen_t *new_len,
-		struct xfs_btree_cur **pcur);
+		enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
+		xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
+		xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
 
 extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur,
 		xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index b8a6be036cd7..e828e0b51814 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -475,9 +475,8 @@ xfs_bui_recover(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	count = bmap->me_len;
-	error = xfs_trans_log_finish_bmap_update(tp, budp, tp->t_dfops, type,
-			ip, whichfork, bmap->me_startoff,
-			bmap->me_startblock, &count, state);
+	error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork,
+			bmap->me_startoff, bmap->me_startblock, &count, state);
 	if (error)
 		goto err_inode;
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 4a417daae781..43c4ac374cba 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -479,8 +479,8 @@ xfs_cui_recover(
 			new_len = refc->pe_len;
 		} else
 			error = xfs_trans_log_finish_refcount_update(tp, cudp,
-				tp->t_dfops, type, refc->pe_startblock,
-				refc->pe_len, &new_fsb, &new_len, &rcur);
+				type, refc->pe_startblock, refc->pe_len,
+				&new_fsb, &new_len, &rcur);
 		if (error)
 			goto abort_error;
 
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8665d45b82c6..299656dbf324 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -268,7 +268,7 @@ void xfs_refcount_update_init_defer_op(void);
 struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
 		struct xfs_cui_log_item *cuip);
 int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
-		struct xfs_cud_log_item *cudp, struct xfs_defer_ops *dfops,
+		struct xfs_cud_log_item *cudp,
 		enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
 		xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
 		xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
@@ -280,9 +280,9 @@ void xfs_bmap_update_init_defer_op(void);
 struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp,
 		struct xfs_bui_log_item *buip);
 int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
-		struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops,
-		enum xfs_bmap_intent_type type, struct xfs_inode *ip,
-		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
-		xfs_filblks_t *blockcount, xfs_exntst_t state);
+		struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type,
+		struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff,
+		xfs_fsblock_t startblock, xfs_filblks_t *blockcount,
+		xfs_exntst_t state);
 
 #endif	/* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
index a15a5cd867f9..741c558b2179 100644
--- a/fs/xfs/xfs_trans_bmap.c
+++ b/fs/xfs/xfs_trans_bmap.c
@@ -43,7 +43,6 @@ int
 xfs_trans_log_finish_bmap_update(
 	struct xfs_trans		*tp,
 	struct xfs_bud_log_item		*budp,
-	struct xfs_defer_ops		*dop,
 	enum xfs_bmap_intent_type	type,
 	struct xfs_inode		*ip,
 	int				whichfork,
@@ -54,7 +53,7 @@ xfs_trans_log_finish_bmap_update(
 {
 	int				error;
 
-	error = xfs_bmap_finish_one(tp, dop, ip, type, whichfork, startoff,
+	error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
 			startblock, blockcount, state);
 
 	/*
@@ -176,7 +175,6 @@ xfs_bmap_update_create_done(
 STATIC int
 xfs_bmap_update_finish_item(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop,
 	struct list_head		*item,
 	void				*done_item,
 	void				**state)
@@ -187,7 +185,7 @@ xfs_bmap_update_finish_item(
 
 	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
 	count = bmap->bi_bmap.br_blockcount;
-	error = xfs_trans_log_finish_bmap_update(tp, done_item, dop,
+	error = xfs_trans_log_finish_bmap_update(tp, done_item,
 			bmap->bi_type,
 			bmap->bi_owner, bmap->bi_whichfork,
 			bmap->bi_bmap.br_startoff,
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index bd66c76f55e6..855c0b651fd4 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -171,7 +171,6 @@ xfs_extent_free_create_done(
 STATIC int
 xfs_extent_free_finish_item(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop,
 	struct list_head		*item,
 	void				*done_item,
 	void				**state)
@@ -226,7 +225,6 @@ static const struct xfs_defer_op_type xfs_extent_free_defer_type = {
 STATIC int
 xfs_agfl_free_finish_item(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop,
 	struct list_head		*item,
 	void				*done_item,
 	void				**state)
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
index 46dd4fca8aa7..523c55663954 100644
--- a/fs/xfs/xfs_trans_refcount.c
+++ b/fs/xfs/xfs_trans_refcount.c
@@ -42,7 +42,6 @@ int
 xfs_trans_log_finish_refcount_update(
 	struct xfs_trans		*tp,
 	struct xfs_cud_log_item		*cudp,
-	struct xfs_defer_ops		*dop,
 	enum xfs_refcount_intent_type	type,
 	xfs_fsblock_t			startblock,
 	xfs_extlen_t			blockcount,
@@ -52,7 +51,7 @@ xfs_trans_log_finish_refcount_update(
 {
 	int				error;
 
-	error = xfs_refcount_finish_one(tp, dop, type, startblock,
+	error = xfs_refcount_finish_one(tp, type, startblock,
 			blockcount, new_fsb, new_len, pcur);
 
 	/*
@@ -169,7 +168,6 @@ xfs_refcount_update_create_done(
 STATIC int
 xfs_refcount_update_finish_item(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop,
 	struct list_head		*item,
 	void				*done_item,
 	void				**state)
@@ -180,7 +178,7 @@ xfs_refcount_update_finish_item(
 	int				error;
 
 	refc = container_of(item, struct xfs_refcount_intent, ri_list);
-	error = xfs_trans_log_finish_refcount_update(tp, done_item, dop,
+	error = xfs_trans_log_finish_refcount_update(tp, done_item,
 			refc->ri_type,
 			refc->ri_startblock,
 			refc->ri_blockcount,
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
index 726d8e2c0558..05b00e40251f 100644
--- a/fs/xfs/xfs_trans_rmap.c
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -193,7 +193,6 @@ xfs_rmap_update_create_done(
 STATIC int
 xfs_rmap_update_finish_item(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop,
 	struct list_head		*item,
 	void				*done_item,
 	void				**state)
-- 
cgit v1.2.3


From 60f31a609ed3d28791acb2bc24188cb7e2259176 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:33 -0700
Subject: xfs: clean out superfluous dfops dop params/vars

The dfops code still passes around the xfs_defer_ops pointer
superfluously in a few places. Clean this up wherever the
transaction will suffice.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 43 +++++++++++++++++++++----------------------
 fs/xfs/libxfs/xfs_defer.h |  2 +-
 fs/xfs/xfs_trans.c        |  2 +-
 3 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 66ef9341813b..7079f534c735 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -181,9 +181,9 @@ static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
  */
 STATIC void
 xfs_defer_intake_work(
-	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop)
+	struct xfs_trans		*tp)
 {
+	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct list_head		*li;
 	struct xfs_defer_pending	*dfp;
 
@@ -204,9 +204,9 @@ xfs_defer_intake_work(
 STATIC void
 xfs_defer_trans_abort(
 	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop,
 	int				error)
 {
+	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct xfs_defer_pending	*dfp;
 
 	trace_xfs_defer_trans_abort(tp->t_mountp, dop, _RET_IP_);
@@ -230,7 +230,6 @@ STATIC int
 xfs_defer_trans_roll(
 	struct xfs_trans		**tp)
 {
-	struct xfs_defer_ops		*dop = (*tp)->t_dfops;
 	struct xfs_buf_log_item		*bli;
 	struct xfs_inode_log_item	*ili;
 	struct xfs_log_item		*lip;
@@ -272,14 +271,14 @@ xfs_defer_trans_roll(
 		}
 	}
 
-	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop, _RET_IP_);
+	trace_xfs_defer_trans_roll((*tp)->t_mountp, (*tp)->t_dfops, _RET_IP_);
 
 	/* Roll the transaction. */
 	error = xfs_trans_roll(tp);
-	dop = (*tp)->t_dfops;
 	if (error) {
-		trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
-		xfs_defer_trans_abort(*tp, dop, error);
+		trace_xfs_defer_trans_roll_error((*tp)->t_mountp,
+						 (*tp)->t_dfops, error);
+		xfs_defer_trans_abort(*tp,  error);
 		return error;
 	}
 
@@ -299,9 +298,10 @@ xfs_defer_trans_roll(
 /* Do we have any work items to finish? */
 bool
 xfs_defer_has_unfinished_work(
-	struct xfs_defer_ops		*dop)
+	struct xfs_trans		*tp)
 {
-	return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake);
+	return !list_empty(&tp->t_dfops->dop_pending) ||
+		!list_empty(&tp->t_dfops->dop_intake);
 }
 
 /*
@@ -311,7 +311,7 @@ static void
 xfs_defer_reset(
 	struct xfs_trans	*tp)
 {
-	ASSERT(!xfs_defer_has_unfinished_work(tp->t_dfops));
+	ASSERT(!xfs_defer_has_unfinished_work(tp));
 
 	/*
 	 * Low mode state transfers across transaction rolls to mirror dfops
@@ -332,7 +332,6 @@ int
 xfs_defer_finish_noroll(
 	struct xfs_trans		**tp)
 {
-	struct xfs_defer_ops		*dop = (*tp)->t_dfops;
 	struct xfs_defer_pending	*dfp;
 	struct list_head		*li;
 	struct list_head		*n;
@@ -342,24 +341,22 @@ xfs_defer_finish_noroll(
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 
-	trace_xfs_defer_finish((*tp)->t_mountp, dop, _RET_IP_);
+	trace_xfs_defer_finish((*tp)->t_mountp, (*tp)->t_dfops, _RET_IP_);
 
 	/* Until we run out of pending work to finish... */
-	while (xfs_defer_has_unfinished_work(dop)) {
+	while (xfs_defer_has_unfinished_work(*tp)) {
 		/* Log intents for work items sitting in the intake. */
-		xfs_defer_intake_work(*tp, dop);
+		xfs_defer_intake_work(*tp);
 
 		/*
-		 * Roll the transaction and update dop in case dfops was
-		 * embedded in the transaction.
+		 * Roll the transaction.
 		 */
 		error = xfs_defer_trans_roll(tp);
 		if (error)
 			goto out;
-		dop = (*tp)->t_dfops;
 
 		/* Log an intent-done item for the first pending item. */
-		dfp = list_first_entry(&dop->dop_pending,
+		dfp = list_first_entry(&(*tp)->t_dfops->dop_pending,
 				struct xfs_defer_pending, dfp_list);
 		trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
 		dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
@@ -390,7 +387,7 @@ xfs_defer_finish_noroll(
 				 */
 				if (cleanup_fn)
 					cleanup_fn(*tp, state, error);
-				xfs_defer_trans_abort(*tp, dop, error);
+				xfs_defer_trans_abort(*tp, error);
 				goto out;
 			}
 		}
@@ -420,9 +417,11 @@ xfs_defer_finish_noroll(
 
 out:
 	if (error)
-		trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
+		trace_xfs_defer_finish_error((*tp)->t_mountp, (*tp)->t_dfops,
+					     error);
 	 else
-		trace_xfs_defer_finish_done((*tp)->t_mountp, dop, _RET_IP_);
+		trace_xfs_defer_finish_done((*tp)->t_mountp, (*tp)->t_dfops,
+					    _RET_IP_);
 
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 0de7504e5651..f051c8056141 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -41,7 +41,7 @@ int xfs_defer_finish_noroll(struct xfs_trans **tp);
 int xfs_defer_finish(struct xfs_trans **tp);
 void xfs_defer_cancel(struct xfs_trans *);
 void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
-bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
+bool xfs_defer_has_unfinished_work(struct xfs_trans *tp);
 void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
 
 /* Description of a deferred type. */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ae3c875a14e5..b0130b21f4de 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -929,7 +929,7 @@ __xfs_trans_commit(
 	 * Finish deferred items on final commit. Only permanent transactions
 	 * should ever have deferred ops.
 	 */
-	WARN_ON_ONCE(xfs_defer_has_unfinished_work(tp->t_dfops) &&
+	WARN_ON_ONCE(xfs_defer_has_unfinished_work(tp) &&
 		     !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
 	if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
 		error = xfs_defer_finish_noroll(&tp);
-- 
cgit v1.2.3


From 9b1f4e9831df29776031e86e112e68784f1fc079 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:33 -0700
Subject: xfs: cancel dfops on xfs_defer_finish() error

The current semantics of xfs_defer_finish() require the caller to
call xfs_defer_cancel() on error. This is slightly inconsistent with
transaction commit error handling where a failed commit cleans up
the transaction before returning.

More significantly, the only requirement for exposure of
->dop_pending outside of xfs_defer_finish() is so that
xfs_defer_cancel() can drain it on error. Since the only recourse of
xfs_defer_finish() errors is cancellation, mirror the transaction
logic and cancel remaining dfops before returning from
xfs_defer_finish() with an error.

Beside simplifying xfs_defer_finish() semantics, this ensures that
xfs_defer_finish() always returns with an empty ->dop_pending and
thus facilitates removal of the list from xfs_defer_ops.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 16 ++++++++--------
 fs/xfs/libxfs/xfs_attr_remote.c |  4 ++--
 fs/xfs/libxfs/xfs_defer.c       | 11 ++++++-----
 fs/xfs/xfs_bmap_util.c          |  2 +-
 fs/xfs/xfs_dquot.c              |  2 +-
 fs/xfs/xfs_inode.c              |  2 +-
 fs/xfs/xfs_reflink.c            |  4 +---
 fs/xfs/xfs_trans.c              |  4 +---
 8 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 3190dfc21b60..1e671d4eb6fa 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -590,7 +590,7 @@ xfs_attr_leaf_addname(
 			goto out_defer_cancel;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
-			goto out_defer_cancel;
+			return error;
 
 		/*
 		 * Commit the current trans (including the inode) and start
@@ -678,7 +678,7 @@ xfs_attr_leaf_addname(
 				goto out_defer_cancel;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
-				goto out_defer_cancel;
+				return error;
 		}
 
 		/*
@@ -741,7 +741,7 @@ xfs_attr_leaf_removename(
 			goto out_defer_cancel;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
-			goto out_defer_cancel;
+			return error;
 	}
 	return 0;
 out_defer_cancel:
@@ -867,7 +867,7 @@ restart:
 				goto out_defer_cancel;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
-				goto out_defer_cancel;
+				goto out;
 
 			/*
 			 * Commit the node conversion and start the next
@@ -891,7 +891,7 @@ restart:
 			goto out_defer_cancel;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
-			goto out_defer_cancel;
+			goto out;
 	} else {
 		/*
 		 * Addition succeeded, update Btree hashvals.
@@ -987,7 +987,7 @@ restart:
 				goto out_defer_cancel;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
-				goto out_defer_cancel;
+				goto out;
 		}
 
 		/*
@@ -1110,7 +1110,7 @@ xfs_attr_node_removename(
 			goto out_defer_cancel;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
-			goto out_defer_cancel;
+			goto out;
 		/*
 		 * Commit the Btree join operation and start a new trans.
 		 */
@@ -1141,7 +1141,7 @@ xfs_attr_node_removename(
 				goto out_defer_cancel;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
-				goto out_defer_cancel;
+				goto out;
 		} else
 			xfs_trans_brelse(args->trans, bp);
 	}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index f52552313773..af094063e402 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -488,7 +488,7 @@ xfs_attr_rmtval_set(
 			goto out_defer_cancel;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
-			goto out_defer_cancel;
+			return error;
 
 		ASSERT(nmap == 1);
 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -628,7 +628,7 @@ xfs_attr_rmtval_remove(
 			goto out_defer_cancel;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
-			goto out_defer_cancel;
+			return error;
 
 		/*
 		 * Close out trans and start the next one in the chain.
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 7079f534c735..b656a399cd71 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -416,14 +416,15 @@ xfs_defer_finish_noroll(
 	}
 
 out:
-	if (error)
+	if (error) {
 		trace_xfs_defer_finish_error((*tp)->t_mountp, (*tp)->t_dfops,
 					     error);
-	 else
-		trace_xfs_defer_finish_done((*tp)->t_mountp, (*tp)->t_dfops,
-					    _RET_IP_);
+		xfs_defer_cancel(*tp);
+		return error;
+	}
 
-	return error;
+	trace_xfs_defer_finish_done((*tp)->t_mountp, (*tp)->t_dfops, _RET_IP_);
+	return 0;
 }
 
 int
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 30ac1300dc49..d9dad399440a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1624,7 +1624,7 @@ xfs_swap_extent_rmap(
 			error = xfs_defer_finish(tpp);
 			tp = *tpp;
 			if (error)
-				goto out_defer;
+				goto out;
 
 			tirec.br_startoff += rlen;
 			if (tirec.br_startblock != HOLESTARTBLOCK &&
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index e1196854dbcd..70a76ac41f01 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -371,7 +371,7 @@ xfs_dquot_disk_alloc(
 	tp = *tpp;
 	if (error) {
 		xfs_buf_relse(bp);
-		goto error1;
+		goto error0;
 	}
 	*bpp = bp;
 	return 0;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7bb46a0eecfc..d957a46dc1cb 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1571,7 +1571,7 @@ xfs_itruncate_extents_flags(
 		 */
 		error = xfs_defer_finish(&tp);
 		if (error)
-			goto out_bmap_cancel;
+			goto out;
 
 		error = xfs_trans_roll_inode(&tp, ip);
 		if (error)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index dce8ba8ab681..2ec562d75494 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -518,10 +518,8 @@ xfs_reflink_cancel_cow_blocks(
 
 			/* Roll the transaction */
 			error = xfs_defer_finish(tpp);
-			if (error) {
-				xfs_defer_cancel(*tpp);
+			if (error)
 				break;
-			}
 
 			/* Remove the mapping from the CoW fork. */
 			xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index b0130b21f4de..b050663c2a70 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -933,10 +933,8 @@ __xfs_trans_commit(
 		     !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
 	if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
 		error = xfs_defer_finish_noroll(&tp);
-		if (error) {
-			xfs_defer_cancel(tp);
+		if (error)
 			goto out_unreserve;
-		}
 	}
 
 	/*
-- 
cgit v1.2.3


From 1ae093cbea3d1ef04e1344b9e3996a9e1763a91b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:34 -0700
Subject: xfs: replace xfs_defer_ops ->dop_pending with on-stack list

The xfs_defer_ops ->dop_pending list is used to track active
deferred operations once intents are logged. These items must be
aborted in the event of an error. The list is populated as intents
are logged and items are removed as they complete (or are aborted).

Now that xfs_defer_finish() cancels on error, there is no need to
ever access ->dop_pending outside of xfs_defer_finish(). The list is
only ever populated after xfs_defer_finish() begins and is either
completed or cancelled before it returns.

Remove ->dop_pending from xfs_defer_ops and replace it with a local
list in the xfs_defer_finish() path. Pass the local list to the
various helpers now that it is not accessible via dfops. Note that
we have to check for NULL in the abort case as the final tx roll
occurs outside of the scope of the new local list (once the dfops
has completed and thus drained the list).

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 146 +++++++++++++++++++++-------------------------
 fs/xfs/libxfs/xfs_defer.h |   1 -
 fs/xfs/xfs_trace.h        |   5 +-
 fs/xfs/xfs_trans.c        |   2 +-
 fs/xfs/xfs_trans.h        |   1 -
 5 files changed, 71 insertions(+), 84 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index b656a399cd71..1cbddcf539da 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -180,7 +180,7 @@ static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
  * the pending list.
  */
 STATIC void
-xfs_defer_intake_work(
+xfs_defer_create_intents(
 	struct xfs_trans		*tp)
 {
 	struct xfs_defer_ops		*dop = tp->t_dfops;
@@ -190,21 +190,19 @@ xfs_defer_intake_work(
 	list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
 		dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
 				dfp->dfp_count);
-		trace_xfs_defer_intake_work(tp->t_mountp, dfp);
+		trace_xfs_defer_create_intent(tp->t_mountp, dfp);
 		list_sort(tp->t_mountp, &dfp->dfp_work,
 				dfp->dfp_type->diff_items);
 		list_for_each(li, &dfp->dfp_work)
 			dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
 	}
-
-	list_splice_tail_init(&dop->dop_intake, &dop->dop_pending);
 }
 
 /* Abort all the intents that were committed. */
 STATIC void
 xfs_defer_trans_abort(
 	struct xfs_trans		*tp,
-	int				error)
+	struct list_head		*dop_pending)
 {
 	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct xfs_defer_pending	*dfp;
@@ -212,24 +210,21 @@ xfs_defer_trans_abort(
 	trace_xfs_defer_trans_abort(tp->t_mountp, dop, _RET_IP_);
 
 	/* Abort intent items that don't have a done item. */
-	list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
+	list_for_each_entry(dfp, dop_pending, dfp_list) {
 		trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
 		if (dfp->dfp_intent && !dfp->dfp_done) {
 			dfp->dfp_type->abort_intent(dfp->dfp_intent);
 			dfp->dfp_intent = NULL;
 		}
 	}
-
-	/* Shut down FS. */
-	xfs_force_shutdown(tp->t_mountp, (error == -EFSCORRUPTED) ?
-			SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR);
 }
 
 /* Roll a transaction so we can do some deferred op processing. */
 STATIC int
 xfs_defer_trans_roll(
-	struct xfs_trans		**tp)
+	struct xfs_trans		**tpp)
 {
+	struct xfs_trans		*tp = *tpp;
 	struct xfs_buf_log_item		*bli;
 	struct xfs_inode_log_item	*ili;
 	struct xfs_log_item		*lip;
@@ -239,7 +234,7 @@ xfs_defer_trans_roll(
 	int				i;
 	int				error;
 
-	list_for_each_entry(lip, &(*tp)->t_items, li_trans) {
+	list_for_each_entry(lip, &tp->t_items, li_trans) {
 		switch (lip->li_type) {
 		case XFS_LI_BUF:
 			bli = container_of(lip, struct xfs_buf_log_item,
@@ -249,7 +244,7 @@ xfs_defer_trans_roll(
 					ASSERT(0);
 					return -EFSCORRUPTED;
 				}
-				xfs_trans_dirty_buf(*tp, bli->bli_buf);
+				xfs_trans_dirty_buf(tp, bli->bli_buf);
 				bplist[bpcount++] = bli->bli_buf;
 			}
 			break;
@@ -261,7 +256,7 @@ xfs_defer_trans_roll(
 					ASSERT(0);
 					return -EFSCORRUPTED;
 				}
-				xfs_trans_log_inode(*tp, ili->ili_inode,
+				xfs_trans_log_inode(tp, ili->ili_inode,
 						    XFS_ILOG_CORE);
 				iplist[ipcount++] = ili->ili_inode;
 			}
@@ -271,39 +266,30 @@ xfs_defer_trans_roll(
 		}
 	}
 
-	trace_xfs_defer_trans_roll((*tp)->t_mountp, (*tp)->t_dfops, _RET_IP_);
+	trace_xfs_defer_trans_roll(tp->t_mountp, tp->t_dfops, _RET_IP_);
 
 	/* Roll the transaction. */
-	error = xfs_trans_roll(tp);
+	error = xfs_trans_roll(tpp);
+	tp = *tpp;
 	if (error) {
-		trace_xfs_defer_trans_roll_error((*tp)->t_mountp,
-						 (*tp)->t_dfops, error);
-		xfs_defer_trans_abort(*tp,  error);
+		trace_xfs_defer_trans_roll_error(tp->t_mountp,
+						 tp->t_dfops, error);
 		return error;
 	}
 
 	/* Rejoin the joined inodes. */
 	for (i = 0; i < ipcount; i++)
-		xfs_trans_ijoin(*tp, iplist[i], 0);
+		xfs_trans_ijoin(tp, iplist[i], 0);
 
 	/* Rejoin the buffers and dirty them so the log moves forward. */
 	for (i = 0; i < bpcount; i++) {
-		xfs_trans_bjoin(*tp, bplist[i]);
-		xfs_trans_bhold(*tp, bplist[i]);
+		xfs_trans_bjoin(tp, bplist[i]);
+		xfs_trans_bhold(tp, bplist[i]);
 	}
 
 	return error;
 }
 
-/* Do we have any work items to finish? */
-bool
-xfs_defer_has_unfinished_work(
-	struct xfs_trans		*tp)
-{
-	return !list_empty(&tp->t_dfops->dop_pending) ||
-		!list_empty(&tp->t_dfops->dop_intake);
-}
-
 /*
  * Reset an already used dfops after finish.
  */
@@ -311,7 +297,7 @@ static void
 xfs_defer_reset(
 	struct xfs_trans	*tp)
 {
-	ASSERT(!xfs_defer_has_unfinished_work(tp));
+	ASSERT(list_empty(&tp->t_dfops->dop_intake));
 
 	/*
 	 * Low mode state transfers across transaction rolls to mirror dfops
@@ -320,6 +306,36 @@ xfs_defer_reset(
 	tp->t_flags &= ~XFS_TRANS_LOWMODE;
 }
 
+/*
+ * Free up any items left in the list.
+ */
+static void
+xfs_defer_cancel_list(
+	struct xfs_mount		*mp,
+	struct list_head		*dop_list)
+{
+	struct xfs_defer_pending	*dfp;
+	struct xfs_defer_pending	*pli;
+	struct list_head		*pwi;
+	struct list_head		*n;
+
+	/*
+	 * Free the pending items.  Caller should already have arranged
+	 * for the intent items to be released.
+	 */
+	list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
+		trace_xfs_defer_cancel_list(mp, dfp);
+		list_del(&dfp->dfp_list);
+		list_for_each_safe(pwi, n, &dfp->dfp_work) {
+			list_del(pwi);
+			dfp->dfp_count--;
+			dfp->dfp_type->cancel_item(pwi);
+		}
+		ASSERT(dfp->dfp_count == 0);
+		kmem_free(dfp);
+	}
+}
+
 /*
  * Finish all the pending work.  This involves logging intent items for
  * any work items that wandered in since the last transaction roll (if
@@ -338,15 +354,19 @@ xfs_defer_finish_noroll(
 	void				*state;
 	int				error = 0;
 	void				(*cleanup_fn)(struct xfs_trans *, void *, int);
+	LIST_HEAD(dop_pending);
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 
 	trace_xfs_defer_finish((*tp)->t_mountp, (*tp)->t_dfops, _RET_IP_);
 
 	/* Until we run out of pending work to finish... */
-	while (xfs_defer_has_unfinished_work(*tp)) {
-		/* Log intents for work items sitting in the intake. */
-		xfs_defer_intake_work(*tp);
+	while (!list_empty(&dop_pending) ||
+	       !list_empty(&(*tp)->t_dfops->dop_intake)) {
+		/* log intents and pull in intake items */
+		xfs_defer_create_intents(*tp);
+		list_splice_tail_init(&(*tp)->t_dfops->dop_intake,
+				      &dop_pending);
 
 		/*
 		 * Roll the transaction.
@@ -356,8 +376,8 @@ xfs_defer_finish_noroll(
 			goto out;
 
 		/* Log an intent-done item for the first pending item. */
-		dfp = list_first_entry(&(*tp)->t_dfops->dop_pending,
-				struct xfs_defer_pending, dfp_list);
+		dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
+				       dfp_list);
 		trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
 		dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
 				dfp->dfp_count);
@@ -387,7 +407,6 @@ xfs_defer_finish_noroll(
 				 */
 				if (cleanup_fn)
 					cleanup_fn(*tp, state, error);
-				xfs_defer_trans_abort(*tp, error);
 				goto out;
 			}
 		}
@@ -417,8 +436,11 @@ xfs_defer_finish_noroll(
 
 out:
 	if (error) {
+		xfs_defer_trans_abort(*tp, &dop_pending);
+		xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
 		trace_xfs_defer_finish_error((*tp)->t_mountp, (*tp)->t_dfops,
 					     error);
+		xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
 		xfs_defer_cancel(*tp);
 		return error;
 	}
@@ -442,54 +464,24 @@ xfs_defer_finish(
 		return error;
 	if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
 		error = xfs_defer_trans_roll(tp);
-		if (error)
+		if (error) {
+			xfs_force_shutdown((*tp)->t_mountp,
+					   SHUTDOWN_CORRUPT_INCORE);
 			return error;
+		}
 	}
 	xfs_defer_reset(*tp);
 	return 0;
 }
 
-/*
- * Free up any items left in the list.
- */
 void
 xfs_defer_cancel(
-	struct xfs_trans		*tp)
+	struct xfs_trans	*tp)
 {
-	struct xfs_defer_ops		*dop = tp->t_dfops;
-	struct xfs_defer_pending	*dfp;
-	struct xfs_defer_pending	*pli;
-	struct list_head		*pwi;
-	struct list_head		*n;
+	struct xfs_mount	*mp = tp->t_mountp;
 
-	trace_xfs_defer_cancel(NULL, dop, _RET_IP_);
-
-	/*
-	 * Free the pending items.  Caller should already have arranged
-	 * for the intent items to be released.
-	 */
-	list_for_each_entry_safe(dfp, pli, &dop->dop_intake, dfp_list) {
-		trace_xfs_defer_intake_cancel(NULL, dfp);
-		list_del(&dfp->dfp_list);
-		list_for_each_safe(pwi, n, &dfp->dfp_work) {
-			list_del(pwi);
-			dfp->dfp_count--;
-			dfp->dfp_type->cancel_item(pwi);
-		}
-		ASSERT(dfp->dfp_count == 0);
-		kmem_free(dfp);
-	}
-	list_for_each_entry_safe(dfp, pli, &dop->dop_pending, dfp_list) {
-		trace_xfs_defer_pending_cancel(NULL, dfp);
-		list_del(&dfp->dfp_list);
-		list_for_each_safe(pwi, n, &dfp->dfp_work) {
-			list_del(pwi);
-			dfp->dfp_count--;
-			dfp->dfp_type->cancel_item(pwi);
-		}
-		ASSERT(dfp->dfp_count == 0);
-		kmem_free(dfp);
-	}
+	trace_xfs_defer_cancel(mp, tp->t_dfops, _RET_IP_);
+	xfs_defer_cancel_list(mp, &tp->t_dfops->dop_intake);
 }
 
 /* Add an item for later deferred processing. */
@@ -547,7 +539,6 @@ xfs_defer_init(
 
 	memset(dop, 0, sizeof(struct xfs_defer_ops));
 	INIT_LIST_HEAD(&dop->dop_intake);
-	INIT_LIST_HEAD(&dop->dop_pending);
 	if (tp) {
 		ASSERT(tp->t_firstblock == NULLFSBLOCK);
 		tp->t_dfops = dop;
@@ -571,7 +562,6 @@ xfs_defer_move(
 	ASSERT(dst != src);
 
 	list_splice_init(&src->dop_intake, &dst->dop_intake);
-	list_splice_init(&src->dop_pending, &dst->dop_pending);
 
 	/*
 	 * Low free space mode was historically controlled by a dfops field.
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index f051c8056141..f091bf3abeaf 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -41,7 +41,6 @@ int xfs_defer_finish_noroll(struct xfs_trans **tp);
 int xfs_defer_finish(struct xfs_trans **tp);
 void xfs_defer_cancel(struct xfs_trans *);
 void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
-bool xfs_defer_has_unfinished_work(struct xfs_trans *tp);
 void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
 
 /* Description of a deferred type. */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 8807f1bb814a..fec9cfe3dfb4 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2392,9 +2392,8 @@ DEFINE_DEFER_EVENT(xfs_defer_finish_done);
 DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error);
 DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error);
 
-DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work);
-DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel);
-DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent);
+DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list);
 DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
 DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
 
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index b050663c2a70..413e4138357f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -929,7 +929,7 @@ __xfs_trans_commit(
 	 * Finish deferred items on final commit. Only permanent transactions
 	 * should ever have deferred ops.
 	 */
-	WARN_ON_ONCE(xfs_defer_has_unfinished_work(tp) &&
+	WARN_ON_ONCE(!list_empty(&tp->t_dfops->dop_intake) &&
 		     !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
 	if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
 		error = xfs_defer_finish_noroll(&tp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 299656dbf324..1cdc7c0ebeac 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -96,7 +96,6 @@ void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
 #define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */
 struct xfs_defer_ops {
 	struct list_head	dop_intake;	/* unlogged pending work */
-	struct list_head	dop_pending;	/* logged pending work */
 };
 
 /*
-- 
cgit v1.2.3


From 0f37d1780c3d864599fb377dcb47ad1aa0686b4e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:34 -0700
Subject: xfs: pass transaction to xfs_defer_add()

The majority of remaining references to struct xfs_defer_ops in XFS
are associated with xfs_defer_add(). At this point, there are no
more external xfs_defer_ops users left. All instances of
xfs_defer_ops are embedded in the transaction, which means we can
safely pass the transaction down to the dfops add interface.

Update xfs_defer_add() to receive the transaction as a parameter.
Various subsystems implement wrappers to allocate and construct the
context specific data structures for the associated deferred
operation type. Update these to also carry the transaction down as
needed and clean up unused dfops parameters along the way.

This removes most of the remaining references to struct
xfs_defer_ops throughout the code and facilitates removal of the
structure.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[darrick: fix unused variable warnings with ftrace disabled]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c      |  9 ++---
 fs/xfs/libxfs/xfs_bmap.c       | 89 +++++++++++++++++++-----------------------
 fs/xfs/libxfs/xfs_bmap.h       | 19 +++++----
 fs/xfs/libxfs/xfs_bmap_btree.c |  2 +-
 fs/xfs/libxfs/xfs_defer.c      |  5 ++-
 fs/xfs/libxfs/xfs_defer.h      |  2 +-
 fs/xfs/libxfs/xfs_ialloc.c     | 25 ++++++------
 fs/xfs/libxfs/xfs_refcount.c   | 80 ++++++++++++++++---------------------
 fs/xfs/libxfs/xfs_refcount.h   | 18 ++++-----
 fs/xfs/libxfs/xfs_rmap.c       | 57 +++++++++++++--------------
 fs/xfs/libxfs/xfs_rmap.h       | 22 +++++------
 fs/xfs/xfs_bmap_item.c         |  3 +-
 fs/xfs/xfs_bmap_util.c         | 13 ++----
 fs/xfs/xfs_refcount_item.c     | 14 ++-----
 fs/xfs/xfs_reflink.c           | 21 +++++-----
 15 files changed, 169 insertions(+), 210 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9847c1632712..5580b6e23bb3 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2198,12 +2198,12 @@ xfs_agfl_reset(
  */
 STATIC void
 xfs_defer_agfl_block(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	xfs_agnumber_t			agno,
 	xfs_fsblock_t			agbno,
 	struct xfs_owner_info		*oinfo)
 {
+	struct xfs_mount		*mp = tp->t_mountp;
 	struct xfs_extent_free_item	*new;		/* new element */
 
 	ASSERT(xfs_bmap_free_item_zone != NULL);
@@ -2216,7 +2216,7 @@ xfs_defer_agfl_block(
 
 	trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
 
-	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
+	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
 }
 
 /*
@@ -2325,8 +2325,7 @@ xfs_alloc_fix_freelist(
 
 		/* defer agfl frees if dfops is provided */
 		if (tp->t_dfops) {
-			xfs_defer_agfl_block(mp, tp->t_dfops, args->agno,
-					     bno, &targs.oinfo);
+			xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
 		} else {
 			error = xfs_free_agfl_block(tp, args->agno, bno, agbp,
 						    &targs.oinfo);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index d20f541b7061..c9fec0443f38 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -533,8 +533,7 @@ xfs_bmap_validate_ret(
  */
 void
 __xfs_bmap_add_free(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	xfs_fsblock_t			bno,
 	xfs_filblks_t			len,
 	struct xfs_owner_info		*oinfo,
@@ -542,8 +541,9 @@ __xfs_bmap_add_free(
 {
 	struct xfs_extent_free_item	*new;		/* new element */
 #ifdef DEBUG
-	xfs_agnumber_t		agno;
-	xfs_agblock_t		agbno;
+	struct xfs_mount		*mp = tp->t_mountp;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
 
 	ASSERT(bno != NULLFSBLOCK);
 	ASSERT(len > 0);
@@ -566,9 +566,10 @@ __xfs_bmap_add_free(
 	else
 		xfs_rmap_skip_owner_update(&new->xefi_oinfo);
 	new->xefi_skip_discard = skip_discard;
-	trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
-			XFS_FSB_TO_AGBNO(mp, bno), len);
-	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
+	trace_xfs_bmap_free_defer(tp->t_mountp,
+			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
+			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
+	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
 }
 
 /*
@@ -624,7 +625,7 @@ xfs_bmap_btree_to_extents(
 	if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
 		return error;
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
-	xfs_bmap_add_free(mp, cur->bc_tp->t_dfops, cbno, 1, &oinfo);
+	xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
 	ip->i_d.di_nblocks--;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
@@ -1961,8 +1962,7 @@ xfs_bmap_add_extent_delay_real(
 
 	/* add reverse mapping unless caller opted out */
 	if (!(bma->flags & XFS_BMAPI_NORMAP)) {
-		error = xfs_rmap_map_extent(mp, bma->tp->t_dfops, bma->ip,
-				whichfork, new);
+		error = xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
 		if (error)
 			goto done;
 	}
@@ -2026,7 +2026,6 @@ xfs_bmap_add_extent_unwritten_real(
 	int			state = xfs_bmap_fork_to_state(whichfork);
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_bmbt_irec	old;
-	struct xfs_defer_ops	*dfops = tp ? tp->t_dfops : NULL;
 
 	*logflagsp = 0;
 
@@ -2445,7 +2444,7 @@ xfs_bmap_add_extent_unwritten_real(
 	}
 
 	/* update reverse mappings */
-	error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
+	error = xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
 	if (error)
 		goto done;
 
@@ -2806,8 +2805,7 @@ xfs_bmap_add_extent_hole_real(
 
 	/* add reverse mapping unless caller opted out */
 	if (!(flags & XFS_BMAPI_NORMAP)) {
-		error = xfs_rmap_map_extent(mp, tp->t_dfops, ip, whichfork,
-				new);
+		error = xfs_rmap_map_extent(tp, ip, whichfork, new);
 		if (error)
 			goto done;
 	}
@@ -4364,9 +4362,8 @@ xfs_bmapi_write(
 			 * the refcount btree for orphan recovery.
 			 */
 			if (whichfork == XFS_COW_FORK) {
-				error = xfs_refcount_alloc_cow_extent(mp,
-						tp->t_dfops, bma.blkno,
-						bma.length);
+				error = xfs_refcount_alloc_cow_extent(tp,
+						bma.blkno, bma.length);
 				if (error)
 					goto error0;
 			}
@@ -4852,7 +4849,6 @@ xfs_bmap_del_extent_real(
 	uint			qfield;	/* quota field to update */
 	int			state = xfs_bmap_fork_to_state(whichfork);
 	struct xfs_bmbt_irec	old;
-	struct xfs_defer_ops	*dfops = tp ? tp->t_dfops : NULL;
 
 	mp = ip->i_mount;
 	XFS_STATS_INC(mp, xs_del_exlist);
@@ -5036,7 +5032,7 @@ xfs_bmap_del_extent_real(
 	}
 
 	/* remove reverse mapping */
-	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
+	error = xfs_rmap_unmap_extent(tp, ip, whichfork, del);
 	if (error)
 		goto done;
 
@@ -5045,11 +5041,11 @@ xfs_bmap_del_extent_real(
 	 */
 	if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
 		if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
-			error = xfs_refcount_decrease_extent(mp, dfops, del);
+			error = xfs_refcount_decrease_extent(tp, del);
 			if (error)
 				goto done;
 		} else {
-			__xfs_bmap_add_free(mp, dfops, del->br_startblock,
+			__xfs_bmap_add_free(tp, del->br_startblock,
 					del->br_blockcount, NULL,
 					(bflags & XFS_BMAPI_NODISCARD) ||
 					del->br_state == XFS_EXT_UNWRITTEN);
@@ -5489,6 +5485,7 @@ xfs_bmse_can_merge(
  */
 STATIC int
 xfs_bmse_merge(
+	struct xfs_trans		*tp,
 	struct xfs_inode		*ip,
 	int				whichfork,
 	xfs_fileoff_t			shift,		/* shift fsb */
@@ -5496,8 +5493,7 @@ xfs_bmse_merge(
 	struct xfs_bmbt_irec		*got,		/* extent to shift */
 	struct xfs_bmbt_irec		*left,		/* preceding extent */
 	struct xfs_btree_cur		*cur,
-	int				*logflags,	/* output */
-	struct xfs_defer_ops		*dfops)
+	int				*logflags)	/* output */
 {
 	struct xfs_bmbt_irec		new;
 	xfs_filblks_t			blockcount;
@@ -5553,23 +5549,23 @@ done:
 			&new);
 
 	/* update reverse mapping. rmap functions merge the rmaps for us */
-	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
+	error = xfs_rmap_unmap_extent(tp, ip, whichfork, got);
 	if (error)
 		return error;
 	memcpy(&new, got, sizeof(new));
 	new.br_startoff = left->br_startoff + left->br_blockcount;
-	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
+	return xfs_rmap_map_extent(tp, ip, whichfork, &new);
 }
 
 static int
 xfs_bmap_shift_update_extent(
+	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	int			whichfork,
 	struct xfs_iext_cursor	*icur,
 	struct xfs_bmbt_irec	*got,
 	struct xfs_btree_cur	*cur,
 	int			*logflags,
-	struct xfs_defer_ops	*dfops,
 	xfs_fileoff_t		startoff)
 {
 	struct xfs_mount	*mp = ip->i_mount;
@@ -5597,10 +5593,10 @@ xfs_bmap_shift_update_extent(
 			got);
 
 	/* update reverse mapping */
-	error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev);
+	error = xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
 	if (error)
 		return error;
-	return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got);
+	return xfs_rmap_map_extent(tp, ip, whichfork, got);
 }
 
 int
@@ -5660,9 +5656,9 @@ xfs_bmap_collapse_extents(
 		}
 
 		if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
-			error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
-					&icur, &got, &prev, cur, &logflags,
-					tp->t_dfops);
+			error = xfs_bmse_merge(tp, ip, whichfork,
+					offset_shift_fsb, &icur, &got, &prev,
+					cur, &logflags);
 			if (error)
 				goto del_cursor;
 			goto done;
@@ -5674,8 +5670,8 @@ xfs_bmap_collapse_extents(
 		}
 	}
 
-	error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
-			&logflags, tp->t_dfops, new_startoff);
+	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
+			cur, &logflags, new_startoff);
 	if (error)
 		goto del_cursor;
 
@@ -5801,8 +5797,8 @@ xfs_bmap_insert_extents(
 			WARN_ON_ONCE(1);
 	}
 
-	error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
-			&logflags, tp->t_dfops, new_startoff);
+	error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
+			cur, &logflags, new_startoff);
 	if (error)
 		goto del_cursor;
 
@@ -5979,8 +5975,7 @@ xfs_bmap_is_update_needed(
 /* Record a bmap intent. */
 static int
 __xfs_bmap_add(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	enum xfs_bmap_intent_type	type,
 	struct xfs_inode		*ip,
 	int				whichfork,
@@ -5988,10 +5983,10 @@ __xfs_bmap_add(
 {
 	struct xfs_bmap_intent		*bi;
 
-	trace_xfs_bmap_defer(mp,
-			XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
+	trace_xfs_bmap_defer(tp->t_mountp,
+			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
 			type,
-			XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
+			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
 			ip->i_ino, whichfork,
 			bmap->br_startoff,
 			bmap->br_blockcount,
@@ -6004,38 +5999,34 @@ __xfs_bmap_add(
 	bi->bi_whichfork = whichfork;
 	bi->bi_bmap = *bmap;
 
-	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
+	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
 	return 0;
 }
 
 /* Map an extent into a file. */
 int
 xfs_bmap_map_extent(
-	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops,
+	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	struct xfs_bmbt_irec	*PREV)
 {
 	if (!xfs_bmap_is_update_needed(PREV))
 		return 0;
 
-	return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip,
-			XFS_DATA_FORK, PREV);
+	return __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
 }
 
 /* Unmap an extent out of a file. */
 int
 xfs_bmap_unmap_extent(
-	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops,
+	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	struct xfs_bmbt_irec	*PREV)
 {
 	if (!xfs_bmap_is_update_needed(PREV))
 		return 0;
 
-	return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip,
-			XFS_DATA_FORK, PREV);
+	return __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 9165a878edcd..b6e9b639e731 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -184,9 +184,9 @@ void	xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
 void	xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
 int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
-void	__xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
-			  xfs_fsblock_t bno, xfs_filblks_t len,
-			  struct xfs_owner_info *oinfo, bool skip_discard);
+void	__xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
+		xfs_filblks_t len, struct xfs_owner_info *oinfo,
+		bool skip_discard);
 void	xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
 int	xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
@@ -230,13 +230,12 @@ int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
 
 static inline void
 xfs_bmap_add_free(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	xfs_fsblock_t			bno,
 	xfs_filblks_t			len,
 	struct xfs_owner_info		*oinfo)
 {
-	__xfs_bmap_add_free(mp, dfops, bno, len, oinfo, false);
+	__xfs_bmap_add_free(tp, bno, len, oinfo, false);
 }
 
 enum xfs_bmap_intent_type {
@@ -256,10 +255,10 @@ int	xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip,
 		enum xfs_bmap_intent_type type, int whichfork,
 		xfs_fileoff_t startoff, xfs_fsblock_t startblock,
 		xfs_filblks_t *blockcount, xfs_exntst_t state);
-int	xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
-		struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
-int	xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
-		struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
+int	xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+		struct xfs_bmbt_irec *imap);
+int	xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+		struct xfs_bmbt_irec *imap);
 
 static inline int xfs_bmap_fork_to_state(int whichfork)
 {
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 955e29de8cae..cdb74d2e2a43 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -289,7 +289,7 @@ xfs_bmbt_free_block(
 	struct xfs_owner_info	oinfo;
 
 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
-	xfs_bmap_add_free(mp, cur->bc_tp->t_dfops, fsbno, 1, &oinfo);
+	xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo);
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 1cbddcf539da..ce2286763531 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -487,12 +487,15 @@ xfs_defer_cancel(
 /* Add an item for later deferred processing. */
 void
 xfs_defer_add(
-	struct xfs_defer_ops		*dop,
+	struct xfs_trans		*tp,
 	enum xfs_defer_ops_type		type,
 	struct list_head		*li)
 {
+	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct xfs_defer_pending	*dfp = NULL;
 
+	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+
 	/*
 	 * Add the item to a pending item at the end of the intake list.
 	 * If the last pending item has the same type, reuse it.  Else,
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index f091bf3abeaf..b2675f1ca909 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -35,7 +35,7 @@ enum xfs_defer_ops_type {
 	XFS_DEFER_OPS_TYPE_MAX,
 };
 
-void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
+void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type,
 		struct list_head *h);
 int xfs_defer_finish_noroll(struct xfs_trans **tp);
 int xfs_defer_finish(struct xfs_trans **tp);
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 295304ad1bc1..a8f6db735d5d 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1838,23 +1838,24 @@ out_error:
  */
 STATIC void
 xfs_difree_inode_chunk(
-	struct xfs_mount		*mp,
+	struct xfs_trans		*tp,
 	xfs_agnumber_t			agno,
-	struct xfs_inobt_rec_incore	*rec,
-	struct xfs_defer_ops		*dfops)
+	struct xfs_inobt_rec_incore	*rec)
 {
-	xfs_agblock_t	sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
-	int		startidx, endidx;
-	int		nextbit;
-	xfs_agblock_t	agbno;
-	int		contigblk;
-	struct xfs_owner_info	oinfo;
+	struct xfs_mount		*mp = tp->t_mountp;
+	xfs_agblock_t			sagbno = XFS_AGINO_TO_AGBNO(mp,
+							rec->ir_startino);
+	int				startidx, endidx;
+	int				nextbit;
+	xfs_agblock_t			agbno;
+	int				contigblk;
+	struct xfs_owner_info		oinfo;
 	DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
 	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
 
 	if (!xfs_inobt_issparse(rec->ir_holemask)) {
 		/* not sparse, calculate extent info directly */
-		xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, sagbno),
+		xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
 				  mp->m_ialloc_blks, &oinfo);
 		return;
 	}
@@ -1898,7 +1899,7 @@ xfs_difree_inode_chunk(
 
 		ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
 		ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
-		xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, agbno),
+		xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, agbno),
 				  contigblk, &oinfo);
 
 		/* reset range to current bit and carry on... */
@@ -2002,7 +2003,7 @@ xfs_difree_inobt(
 			goto error0;
 		}
 
-		xfs_difree_inode_chunk(mp, agno, &rec, tp->t_dfops);
+		xfs_difree_inode_chunk(tp, agno, &rec);
 	} else {
 		xic->deleted = false;
 
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 86f297ca90cd..542aa1475b5f 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -34,11 +34,9 @@ enum xfs_refc_adjust_op {
 };
 
 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
-		xfs_agblock_t agbno, xfs_extlen_t aglen,
-		struct xfs_defer_ops *dfops);
+		xfs_agblock_t agbno, xfs_extlen_t aglen);
 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
-		xfs_agblock_t agbno, xfs_extlen_t aglen,
-		struct xfs_defer_ops *dfops);
+		xfs_agblock_t agbno, xfs_extlen_t aglen);
 
 /*
  * Look up the first record less than or equal to [bno, len] in the btree
@@ -870,7 +868,6 @@ xfs_refcount_adjust_extents(
 	xfs_agblock_t		*agbno,
 	xfs_extlen_t		*aglen,
 	enum xfs_refc_adjust_op	adj,
-	struct xfs_defer_ops	*dfops,
 	struct xfs_owner_info	*oinfo)
 {
 	struct xfs_refcount_irec	ext, tmp;
@@ -925,8 +922,8 @@ xfs_refcount_adjust_extents(
 				fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
 						cur->bc_private.a.agno,
 						tmp.rc_startblock);
-				xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
-						tmp.rc_blockcount, oinfo);
+				xfs_bmap_add_free(cur->bc_tp, fsbno,
+						  tmp.rc_blockcount, oinfo);
 			}
 
 			(*agbno) += tmp.rc_blockcount;
@@ -968,8 +965,8 @@ xfs_refcount_adjust_extents(
 			fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
 					cur->bc_private.a.agno,
 					ext.rc_startblock);
-			xfs_bmap_add_free(cur->bc_mp, dfops, fsbno,
-					ext.rc_blockcount, oinfo);
+			xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount,
+					  oinfo);
 		}
 
 skip:
@@ -998,7 +995,6 @@ xfs_refcount_adjust(
 	xfs_agblock_t		*new_agbno,
 	xfs_extlen_t		*new_aglen,
 	enum xfs_refc_adjust_op	adj,
-	struct xfs_defer_ops	*dfops,
 	struct xfs_owner_info	*oinfo)
 {
 	bool			shape_changed;
@@ -1043,7 +1039,7 @@ xfs_refcount_adjust(
 
 	/* Now that we've taken care of the ends, adjust the middle extents */
 	error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen,
-			adj, dfops, oinfo);
+			adj, oinfo);
 	if (error)
 		goto out_error;
 
@@ -1090,7 +1086,6 @@ xfs_refcount_finish_one(
 	struct xfs_btree_cur		**pcur)
 {
 	struct xfs_mount		*mp = tp->t_mountp;
-	struct xfs_defer_ops		*dfops = tp->t_dfops;
 	struct xfs_btree_cur		*rcur;
 	struct xfs_buf			*agbp = NULL;
 	int				error = 0;
@@ -1145,23 +1140,23 @@ xfs_refcount_finish_one(
 	switch (type) {
 	case XFS_REFCOUNT_INCREASE:
 		error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
-			new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL);
+			new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL);
 		*new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
 		break;
 	case XFS_REFCOUNT_DECREASE:
 		error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
-			new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL);
+			new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL);
 		*new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno);
 		break;
 	case XFS_REFCOUNT_ALLOC_COW:
 		*new_fsb = startblock + blockcount;
 		*new_len = 0;
-		error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops);
+		error = __xfs_refcount_cow_alloc(rcur, bno, blockcount);
 		break;
 	case XFS_REFCOUNT_FREE_COW:
 		*new_fsb = startblock + blockcount;
 		*new_len = 0;
-		error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops);
+		error = __xfs_refcount_cow_free(rcur, bno, blockcount);
 		break;
 	default:
 		ASSERT(0);
@@ -1183,16 +1178,16 @@ out_cur:
  */
 static int
 __xfs_refcount_add(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	enum xfs_refcount_intent_type	type,
 	xfs_fsblock_t			startblock,
 	xfs_extlen_t			blockcount)
 {
 	struct xfs_refcount_intent	*ri;
 
-	trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock),
-			type, XFS_FSB_TO_AGBNO(mp, startblock),
+	trace_xfs_refcount_defer(tp->t_mountp,
+			XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
+			type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
 			blockcount);
 
 	ri = kmem_alloc(sizeof(struct xfs_refcount_intent),
@@ -1202,7 +1197,7 @@ __xfs_refcount_add(
 	ri->ri_startblock = startblock;
 	ri->ri_blockcount = blockcount;
 
-	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
+	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
 	return 0;
 }
 
@@ -1211,14 +1206,13 @@ __xfs_refcount_add(
  */
 int
 xfs_refcount_increase_extent(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	struct xfs_bmbt_irec		*PREV)
 {
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb))
 		return 0;
 
-	return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE,
+	return __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE,
 			PREV->br_startblock, PREV->br_blockcount);
 }
 
@@ -1227,14 +1221,13 @@ xfs_refcount_increase_extent(
  */
 int
 xfs_refcount_decrease_extent(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	struct xfs_bmbt_irec		*PREV)
 {
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+	if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb))
 		return 0;
 
-	return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE,
+	return __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE,
 			PREV->br_startblock, PREV->br_blockcount);
 }
 
@@ -1522,8 +1515,7 @@ STATIC int
 __xfs_refcount_cow_alloc(
 	struct xfs_btree_cur	*rcur,
 	xfs_agblock_t		agbno,
-	xfs_extlen_t		aglen,
-	struct xfs_defer_ops	*dfops)
+	xfs_extlen_t		aglen)
 {
 	trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
 			agbno, aglen);
@@ -1540,8 +1532,7 @@ STATIC int
 __xfs_refcount_cow_free(
 	struct xfs_btree_cur	*rcur,
 	xfs_agblock_t		agbno,
-	xfs_extlen_t		aglen,
-	struct xfs_defer_ops	*dfops)
+	xfs_extlen_t		aglen)
 {
 	trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
 			agbno, aglen);
@@ -1554,47 +1545,45 @@ __xfs_refcount_cow_free(
 /* Record a CoW staging extent in the refcount btree. */
 int
 xfs_refcount_alloc_cow_extent(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	xfs_fsblock_t			fsb,
 	xfs_extlen_t			len)
 {
+	struct xfs_mount		*mp = tp->t_mountp;
 	int				error;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return 0;
 
-	error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
-			fsb, len);
+	error = __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
 	if (error)
 		return error;
 
 	/* Add rmap entry */
-	return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+	return xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
 			XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
 }
 
 /* Forget a CoW staging event in the refcount btree. */
 int
 xfs_refcount_free_cow_extent(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	xfs_fsblock_t			fsb,
 	xfs_extlen_t			len)
 {
+	struct xfs_mount		*mp = tp->t_mountp;
 	int				error;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return 0;
 
 	/* Remove rmap entry */
-	error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+	error = xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
 			XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
 	if (error)
 		return error;
 
-	return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
-			fsb, len);
+	return __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
 }
 
 struct xfs_refcount_recovery {
@@ -1692,14 +1681,13 @@ xfs_refcount_recover_cow_leftovers(
 		/* Free the orphan record */
 		agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
 		fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
-		error = xfs_refcount_free_cow_extent(mp, tp->t_dfops, fsb,
+		error = xfs_refcount_free_cow_extent(tp, fsb,
 				rr->rr_rrec.rc_blockcount);
 		if (error)
 			goto out_trans;
 
 		/* Free the block. */
-		xfs_bmap_add_free(mp, tp->t_dfops, fsb,
-				rr->rr_rrec.rc_blockcount, NULL);
+		xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL);
 
 		error = xfs_trans_commit(tp);
 		if (error)
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 3b72c6dbf6ad..1d9c518575e7 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -29,10 +29,10 @@ struct xfs_refcount_intent {
 	xfs_extlen_t				ri_blockcount;
 };
 
-extern int xfs_refcount_increase_extent(struct xfs_mount *mp,
-		struct xfs_defer_ops *dfops, struct xfs_bmbt_irec *irec);
-extern int xfs_refcount_decrease_extent(struct xfs_mount *mp,
-		struct xfs_defer_ops *dfops, struct xfs_bmbt_irec *irec);
+extern int xfs_refcount_increase_extent(struct xfs_trans *tp,
+		struct xfs_bmbt_irec *irec);
+extern int xfs_refcount_decrease_extent(struct xfs_trans *tp,
+		struct xfs_bmbt_irec *irec);
 
 extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
 		struct xfs_btree_cur *rcur, int error);
@@ -45,12 +45,10 @@ extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur,
 		xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
 		xfs_extlen_t *flen, bool find_end_of_shared);
 
-extern int xfs_refcount_alloc_cow_extent(struct xfs_mount *mp,
-		struct xfs_defer_ops *dfops, xfs_fsblock_t fsb,
-		xfs_extlen_t len);
-extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp,
-		struct xfs_defer_ops *dfops, xfs_fsblock_t fsb,
-		xfs_extlen_t len);
+extern int xfs_refcount_alloc_cow_extent(struct xfs_trans *tp,
+		xfs_fsblock_t fsb, xfs_extlen_t len);
+extern int xfs_refcount_free_cow_extent(struct xfs_trans *tp,
+		xfs_fsblock_t fsb, xfs_extlen_t len);
 extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
 		xfs_agnumber_t agno);
 
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index fb266fa2cc45..245af452840e 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2277,18 +2277,18 @@ xfs_rmap_update_is_needed(
  */
 static int
 __xfs_rmap_add(
-	struct xfs_mount		*mp,
-	struct xfs_defer_ops		*dfops,
+	struct xfs_trans		*tp,
 	enum xfs_rmap_intent_type	type,
 	uint64_t			owner,
 	int				whichfork,
 	struct xfs_bmbt_irec		*bmap)
 {
-	struct xfs_rmap_intent	*ri;
+	struct xfs_rmap_intent		*ri;
 
-	trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
+	trace_xfs_rmap_defer(tp->t_mountp,
+			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
 			type,
-			XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
+			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
 			owner, whichfork,
 			bmap->br_startoff,
 			bmap->br_blockcount,
@@ -2301,23 +2301,22 @@ __xfs_rmap_add(
 	ri->ri_whichfork = whichfork;
 	ri->ri_bmap = *bmap;
 
-	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
+	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
 	return 0;
 }
 
 /* Map an extent into a file. */
 int
 xfs_rmap_map_extent(
-	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops,
+	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	int			whichfork,
 	struct xfs_bmbt_irec	*PREV)
 {
-	if (!xfs_rmap_update_is_needed(mp, whichfork))
+	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
 		return 0;
 
-	return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ?
+	return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
 			XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino,
 			whichfork, PREV);
 }
@@ -2325,25 +2324,29 @@ xfs_rmap_map_extent(
 /* Unmap an extent out of a file. */
 int
 xfs_rmap_unmap_extent(
-	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops,
+	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	int			whichfork,
 	struct xfs_bmbt_irec	*PREV)
 {
-	if (!xfs_rmap_update_is_needed(mp, whichfork))
+	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
 		return 0;
 
-	return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ?
+	return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
 			XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino,
 			whichfork, PREV);
 }
 
-/* Convert a data fork extent from unwritten to real or vice versa. */
+/*
+ * Convert a data fork extent from unwritten to real or vice versa.
+ *
+ * Note that tp can be NULL here as no transaction is used for COW fork
+ * unwritten conversion.
+ */
 int
 xfs_rmap_convert_extent(
 	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops,
+	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	int			whichfork,
 	struct xfs_bmbt_irec	*PREV)
@@ -2351,7 +2354,7 @@ xfs_rmap_convert_extent(
 	if (!xfs_rmap_update_is_needed(mp, whichfork))
 		return 0;
 
-	return __xfs_rmap_add(mp, dfops, xfs_is_reflink_inode(ip) ?
+	return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
 			XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino,
 			whichfork, PREV);
 }
@@ -2359,8 +2362,7 @@ xfs_rmap_convert_extent(
 /* Schedule the creation of an rmap for non-file data. */
 int
 xfs_rmap_alloc_extent(
-	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
@@ -2368,23 +2370,21 @@ xfs_rmap_alloc_extent(
 {
 	struct xfs_bmbt_irec	bmap;
 
-	if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK))
+	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
 		return 0;
 
-	bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+	bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
 	bmap.br_blockcount = len;
 	bmap.br_startoff = 0;
 	bmap.br_state = XFS_EXT_NORM;
 
-	return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner,
-			XFS_DATA_FORK, &bmap);
+	return __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap);
 }
 
 /* Schedule the deletion of an rmap for non-file data. */
 int
 xfs_rmap_free_extent(
-	struct xfs_mount	*mp,
-	struct xfs_defer_ops	*dfops,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
@@ -2392,16 +2392,15 @@ xfs_rmap_free_extent(
 {
 	struct xfs_bmbt_irec	bmap;
 
-	if (!xfs_rmap_update_is_needed(mp, XFS_DATA_FORK))
+	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
 		return 0;
 
-	bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+	bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
 	bmap.br_blockcount = len;
 	bmap.br_startoff = 0;
 	bmap.br_state = XFS_EXT_NORM;
 
-	return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
-			XFS_DATA_FORK, &bmap);
+	return __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap);
 }
 
 /* Compare rmap records.  Returns -1 if a < b, 1 if a > b, and 0 if equal. */
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 9f19454768b2..157dc722ad35 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -185,21 +185,17 @@ struct xfs_rmap_intent {
 };
 
 /* functions for updating the rmapbt based on bmbt map/unmap operations */
-int xfs_rmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+int xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+		int whichfork, struct xfs_bmbt_irec *imap);
+int xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+		int whichfork, struct xfs_bmbt_irec *imap);
+int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp,
 		struct xfs_inode *ip, int whichfork,
 		struct xfs_bmbt_irec *imap);
-int xfs_rmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
-		struct xfs_inode *ip, int whichfork,
-		struct xfs_bmbt_irec *imap);
-int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
-		struct xfs_inode *ip, int whichfork,
-		struct xfs_bmbt_irec *imap);
-int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
-		xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
-		uint64_t owner);
-int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
-		xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
-		uint64_t owner);
+int xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
+		xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
+int xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
+		xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
 
 void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
 		struct xfs_btree_cur *rcur, int error);
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index e828e0b51814..ce45f066995e 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -486,8 +486,7 @@ xfs_bui_recover(
 		irec.br_blockcount = count;
 		irec.br_startoff = bmap->me_startoff;
 		irec.br_state = state;
-		error = xfs_bmap_unmap_extent(tp->t_mountp, tp->t_dfops, ip,
-					      &irec);
+		error = xfs_bmap_unmap_extent(tp, ip, &irec);
 		if (error)
 			goto err_inode;
 	}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d9dad399440a..addbd74ecd8e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1534,7 +1534,6 @@ xfs_swap_extent_rmap(
 	struct xfs_inode		*tip)
 {
 	struct xfs_trans		*tp = *tpp;
-	struct xfs_mount		*mp = tp->t_mountp;
 	struct xfs_bmbt_irec		irec;
 	struct xfs_bmbt_irec		uirec;
 	struct xfs_bmbt_irec		tirec;
@@ -1598,26 +1597,22 @@ xfs_swap_extent_rmap(
 			trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
 
 			/* Remove the mapping from the donor file. */
-			error = xfs_bmap_unmap_extent(mp, tp->t_dfops, tip,
-					&uirec);
+			error = xfs_bmap_unmap_extent(tp, tip, &uirec);
 			if (error)
 				goto out_defer;
 
 			/* Remove the mapping from the source file. */
-			error = xfs_bmap_unmap_extent(mp, tp->t_dfops, ip,
-					&irec);
+			error = xfs_bmap_unmap_extent(tp, ip, &irec);
 			if (error)
 				goto out_defer;
 
 			/* Map the donor file's blocks into the source file. */
-			error = xfs_bmap_map_extent(mp, tp->t_dfops, ip,
-					&uirec);
+			error = xfs_bmap_map_extent(tp, ip, &uirec);
 			if (error)
 				goto out_defer;
 
 			/* Map the source file's blocks into the donor file. */
-			error = xfs_bmap_map_extent(mp, tp->t_dfops, tip,
-					&irec);
+			error = xfs_bmap_map_extent(tp, tip, &irec);
 			if (error)
 				goto out_defer;
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 43c4ac374cba..fce38b56b962 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -490,24 +490,18 @@ xfs_cui_recover(
 			irec.br_blockcount = new_len;
 			switch (type) {
 			case XFS_REFCOUNT_INCREASE:
-				error = xfs_refcount_increase_extent(
-						tp->t_mountp, tp->t_dfops,
-						&irec);
+				error = xfs_refcount_increase_extent(tp, &irec);
 				break;
 			case XFS_REFCOUNT_DECREASE:
-				error = xfs_refcount_decrease_extent(
-						tp->t_mountp, tp->t_dfops,
-						&irec);
+				error = xfs_refcount_decrease_extent(tp, &irec);
 				break;
 			case XFS_REFCOUNT_ALLOC_COW:
-				error = xfs_refcount_alloc_cow_extent(
-						tp->t_mountp, tp->t_dfops,
+				error = xfs_refcount_alloc_cow_extent(tp,
 						irec.br_startblock,
 						irec.br_blockcount);
 				break;
 			case XFS_REFCOUNT_FREE_COW:
-				error = xfs_refcount_free_cow_extent(
-						tp->t_mountp, tp->t_dfops,
+				error = xfs_refcount_free_cow_extent(tp,
 						irec.br_startblock,
 						irec.br_blockcount);
 				break;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 2ec562d75494..cbceb320a2e7 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -506,15 +506,13 @@ xfs_reflink_cancel_cow_blocks(
 			ASSERT((*tpp)->t_firstblock == NULLFSBLOCK);
 
 			/* Free the CoW orphan record. */
-			error = xfs_refcount_free_cow_extent(ip->i_mount,
-					(*tpp)->t_dfops, del.br_startblock,
-					del.br_blockcount);
+			error = xfs_refcount_free_cow_extent(*tpp,
+					del.br_startblock, del.br_blockcount);
 			if (error)
 				break;
 
-			xfs_bmap_add_free(ip->i_mount, (*tpp)->t_dfops,
-					del.br_startblock, del.br_blockcount,
-					NULL);
+			xfs_bmap_add_free(*tpp, del.br_startblock,
+					  del.br_blockcount, NULL);
 
 			/* Roll the transaction */
 			error = xfs_defer_finish(tpp);
@@ -694,14 +692,13 @@ xfs_reflink_end_cow(
 		trace_xfs_reflink_cow_remap(ip, &del);
 
 		/* Free the CoW orphan record. */
-		error = xfs_refcount_free_cow_extent(tp->t_mountp, tp->t_dfops,
-				del.br_startblock, del.br_blockcount);
+		error = xfs_refcount_free_cow_extent(tp, del.br_startblock,
+				del.br_blockcount);
 		if (error)
 			goto out_cancel;
 
 		/* Map the new blocks into the data fork. */
-		error = xfs_bmap_map_extent(tp->t_mountp, tp->t_dfops, ip,
-					    &del);
+		error = xfs_bmap_map_extent(tp, ip, &del);
 		if (error)
 			goto out_cancel;
 
@@ -1046,12 +1043,12 @@ xfs_reflink_remap_extent(
 				uirec.br_blockcount, uirec.br_startblock);
 
 		/* Update the refcount tree */
-		error = xfs_refcount_increase_extent(mp, tp->t_dfops, &uirec);
+		error = xfs_refcount_increase_extent(tp, &uirec);
 		if (error)
 			goto out_cancel;
 
 		/* Map the new blocks into the data fork. */
-		error = xfs_bmap_map_extent(mp, tp->t_dfops, ip, &uirec);
+		error = xfs_bmap_map_extent(tp, ip, &uirec);
 		if (error)
 			goto out_cancel;
 
-- 
cgit v1.2.3


From c03edc9e49b6a3c1f4b27f505a04093ab333b245 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:35 -0700
Subject: xfs: always defer agfl block frees

The AGFL fixup code conditionally defers block frees from the free
list based on whether the current transaction has an associated
xfs_defer_ops structure. Now that dfops is embedded in the
transaction and the internal dfops is used unconditionally, this
invariant is always true.

Remove the now dead logic to check for ->t_dfops in
xfs_alloc_fix_freelist() and unconditionally defer AGFL block frees.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 5580b6e23bb3..e1c0c0d2f1b0 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2323,15 +2323,8 @@ xfs_alloc_fix_freelist(
 		if (error)
 			goto out_agbp_relse;
 
-		/* defer agfl frees if dfops is provided */
-		if (tp->t_dfops) {
-			xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
-		} else {
-			error = xfs_free_agfl_block(tp, args->agno, bno, agbp,
-						    &targs.oinfo);
-			if (error)
-				goto out_agbp_relse;
-		}
+		/* defer agfl frees */
+		xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
 	}
 
 	targs.tp = tp;
-- 
cgit v1.2.3


From 9d9e6233859706875c392707efd6d516cfb764fb Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 1 Aug 2018 07:20:35 -0700
Subject: xfs: fold dfops into the transaction

struct xfs_defer_ops has now been reduced to a single list_head. The
external dfops mechanism is unused and thus everywhere a (permanent)
transaction is accessible the associated dfops structure is as well.

Remove the xfs_defer_ops structure and fold the list_head into the
transaction. Also remove the last remnant of external dfops in
xfs_trans_dup().

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c     |  1 -
 fs/xfs/libxfs/xfs_btree.h    |  1 -
 fs/xfs/libxfs/xfs_da_btree.h |  1 -
 fs/xfs/libxfs/xfs_defer.c    | 67 +++++++++++++-------------------------------
 fs/xfs/libxfs/xfs_defer.h    |  2 --
 fs/xfs/libxfs/xfs_dir2.c     |  2 --
 fs/xfs/libxfs/xfs_dir2.h     |  1 -
 fs/xfs/xfs_inode.h           |  1 -
 fs/xfs/xfs_reflink.c         |  5 ++--
 fs/xfs/xfs_trace.h           | 40 ++++++++++++--------------
 fs/xfs/xfs_trans.c           | 13 ++++-----
 fs/xfs/xfs_trans.h           |  8 ++----
 12 files changed, 46 insertions(+), 96 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c9fec0443f38..5648a177e0ac 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4286,7 +4286,6 @@ xfs_bmapi_write(
 	bma.ip = ip;
 	bma.total = total;
 	bma.datatype = 0;
-	ASSERT(!tp || tp->t_dfops);
 
 	while (bno < end && n < *nmap) {
 		bool			need_alloc = false, wasdelay = false;
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 503615f4d729..e3b3e9dce5da 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -7,7 +7,6 @@
 #define	__XFS_BTREE_H__
 
 struct xfs_buf;
-struct xfs_defer_ops;
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 59e290ef334f..84dd865b6c3d 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -7,7 +7,6 @@
 #ifndef __XFS_DA_BTREE_H__
 #define	__XFS_DA_BTREE_H__
 
-struct xfs_defer_ops;
 struct xfs_inode;
 struct xfs_trans;
 struct zone;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index ce2286763531..e792b167150a 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -183,11 +183,10 @@ STATIC void
 xfs_defer_create_intents(
 	struct xfs_trans		*tp)
 {
-	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct list_head		*li;
 	struct xfs_defer_pending	*dfp;
 
-	list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
+	list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
 		dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
 				dfp->dfp_count);
 		trace_xfs_defer_create_intent(tp->t_mountp, dfp);
@@ -204,10 +203,9 @@ xfs_defer_trans_abort(
 	struct xfs_trans		*tp,
 	struct list_head		*dop_pending)
 {
-	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct xfs_defer_pending	*dfp;
 
-	trace_xfs_defer_trans_abort(tp->t_mountp, dop, _RET_IP_);
+	trace_xfs_defer_trans_abort(tp, _RET_IP_);
 
 	/* Abort intent items that don't have a done item. */
 	list_for_each_entry(dfp, dop_pending, dfp_list) {
@@ -266,14 +264,13 @@ xfs_defer_trans_roll(
 		}
 	}
 
-	trace_xfs_defer_trans_roll(tp->t_mountp, tp->t_dfops, _RET_IP_);
+	trace_xfs_defer_trans_roll(tp, _RET_IP_);
 
 	/* Roll the transaction. */
 	error = xfs_trans_roll(tpp);
 	tp = *tpp;
 	if (error) {
-		trace_xfs_defer_trans_roll_error(tp->t_mountp,
-						 tp->t_dfops, error);
+		trace_xfs_defer_trans_roll_error(tp, error);
 		return error;
 	}
 
@@ -297,7 +294,7 @@ static void
 xfs_defer_reset(
 	struct xfs_trans	*tp)
 {
-	ASSERT(list_empty(&tp->t_dfops->dop_intake));
+	ASSERT(list_empty(&tp->t_dfops));
 
 	/*
 	 * Low mode state transfers across transaction rolls to mirror dfops
@@ -358,15 +355,13 @@ xfs_defer_finish_noroll(
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 
-	trace_xfs_defer_finish((*tp)->t_mountp, (*tp)->t_dfops, _RET_IP_);
+	trace_xfs_defer_finish(*tp, _RET_IP_);
 
 	/* Until we run out of pending work to finish... */
-	while (!list_empty(&dop_pending) ||
-	       !list_empty(&(*tp)->t_dfops->dop_intake)) {
+	while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
 		/* log intents and pull in intake items */
 		xfs_defer_create_intents(*tp);
-		list_splice_tail_init(&(*tp)->t_dfops->dop_intake,
-				      &dop_pending);
+		list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
 
 		/*
 		 * Roll the transaction.
@@ -438,14 +433,13 @@ out:
 	if (error) {
 		xfs_defer_trans_abort(*tp, &dop_pending);
 		xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
-		trace_xfs_defer_finish_error((*tp)->t_mountp, (*tp)->t_dfops,
-					     error);
+		trace_xfs_defer_finish_error(*tp, error);
 		xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
 		xfs_defer_cancel(*tp);
 		return error;
 	}
 
-	trace_xfs_defer_finish_done((*tp)->t_mountp, (*tp)->t_dfops, _RET_IP_);
+	trace_xfs_defer_finish_done(*tp, _RET_IP_);
 	return 0;
 }
 
@@ -480,8 +474,8 @@ xfs_defer_cancel(
 {
 	struct xfs_mount	*mp = tp->t_mountp;
 
-	trace_xfs_defer_cancel(mp, tp->t_dfops, _RET_IP_);
-	xfs_defer_cancel_list(mp, &tp->t_dfops->dop_intake);
+	trace_xfs_defer_cancel(tp, _RET_IP_);
+	xfs_defer_cancel_list(mp, &tp->t_dfops);
 }
 
 /* Add an item for later deferred processing. */
@@ -491,7 +485,6 @@ xfs_defer_add(
 	enum xfs_defer_ops_type		type,
 	struct list_head		*li)
 {
-	struct xfs_defer_ops		*dop = tp->t_dfops;
 	struct xfs_defer_pending	*dfp = NULL;
 
 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -501,8 +494,8 @@ xfs_defer_add(
 	 * If the last pending item has the same type, reuse it.  Else,
 	 * create a new pending item at the end of the intake list.
 	 */
-	if (!list_empty(&dop->dop_intake)) {
-		dfp = list_last_entry(&dop->dop_intake,
+	if (!list_empty(&tp->t_dfops)) {
+		dfp = list_last_entry(&tp->t_dfops,
 				struct xfs_defer_pending, dfp_list);
 		if (dfp->dfp_type->type != type ||
 		    (dfp->dfp_type->max_items &&
@@ -517,7 +510,7 @@ xfs_defer_add(
 		dfp->dfp_done = NULL;
 		dfp->dfp_count = 0;
 		INIT_LIST_HEAD(&dfp->dfp_work);
-		list_add_tail(&dfp->dfp_list, &dop->dop_intake);
+		list_add_tail(&dfp->dfp_list, &tp->t_dfops);
 	}
 
 	list_add_tail(li, &dfp->dfp_work);
@@ -532,39 +525,17 @@ xfs_defer_init_op_type(
 	defer_op_types[type->type] = type;
 }
 
-/* Initialize a deferred operation. */
-void
-xfs_defer_init(
-	struct xfs_trans		*tp,
-	struct xfs_defer_ops		*dop)
-{
-	struct xfs_mount		*mp = NULL;
-
-	memset(dop, 0, sizeof(struct xfs_defer_ops));
-	INIT_LIST_HEAD(&dop->dop_intake);
-	if (tp) {
-		ASSERT(tp->t_firstblock == NULLFSBLOCK);
-		tp->t_dfops = dop;
-		mp = tp->t_mountp;
-	}
-	trace_xfs_defer_init(mp, dop, _RET_IP_);
-}
-
 /*
- * Move state from one xfs_defer_ops to another and reset the source to initial
- * state. This is primarily used to carry state forward across transaction rolls
- * with internal dfops.
+ * Move deferred ops from one transaction to another and reset the source to
+ * initial state. This is primarily used to carry state forward across
+ * transaction rolls with pending dfops.
  */
 void
 xfs_defer_move(
 	struct xfs_trans	*dtp,
 	struct xfs_trans	*stp)
 {
-	struct xfs_defer_ops	*dst = dtp->t_dfops;
-	struct xfs_defer_ops	*src = stp->t_dfops;
-	ASSERT(dst != src);
-
-	list_splice_init(&src->dop_intake, &dst->dop_intake);
+	list_splice_init(&stp->t_dfops, &dtp->t_dfops);
 
 	/*
 	 * Low free space mode was historically controlled by a dfops field.
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index b2675f1ca909..2584a5b95b0d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -7,7 +7,6 @@
 #define	__XFS_DEFER_H__
 
 struct xfs_defer_op_type;
-struct xfs_defer_ops;
 
 /*
  * Save a log intent item and a list of extents, so that we can replay
@@ -40,7 +39,6 @@ void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type,
 int xfs_defer_finish_noroll(struct xfs_trans **tp);
 int xfs_defer_finish(struct xfs_trans **tp);
 void xfs_defer_cancel(struct xfs_trans *);
-void xfs_defer_init(struct xfs_trans *tp, struct xfs_defer_ops *dop);
 void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp);
 
 /* Description of a deferred type. */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 4ea1fddb126f..229152cd1a24 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -424,7 +424,6 @@ xfs_dir_removename(
 	int			v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
-	ASSERT(tp->t_dfops);
 	XFS_STATS_INC(dp->i_mount, xs_dir_remove);
 
 	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -483,7 +482,6 @@ xfs_dir_replace(
 	int			v;		/* type-checking value */
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
-	ASSERT(tp->t_dfops);
 
 	rval = xfs_dir_ino_validate(tp->t_mountp, inum);
 	if (rval)
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index ba5acd03de94..c3e3f6b813d8 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -9,7 +9,6 @@
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
 
-struct xfs_defer_ops;
 struct xfs_da_args;
 struct xfs_inode;
 struct xfs_mount;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 79a3e61a6991..be2014520155 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -15,7 +15,6 @@
 struct xfs_dinode;
 struct xfs_inode;
 struct xfs_buf;
-struct xfs_defer_ops;
 struct xfs_bmbt_irec;
 struct xfs_inode_log_item;
 struct xfs_mount;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cbceb320a2e7..38f405415b88 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -502,7 +502,6 @@ xfs_reflink_cancel_cow_blocks(
 			if (error)
 				break;
 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
-			ASSERT((*tpp)->t_dfops);
 			ASSERT((*tpp)->t_firstblock == NULLFSBLOCK);
 
 			/* Free the CoW orphan record. */
@@ -678,7 +677,7 @@ xfs_reflink_end_cow(
 			goto prev_extent;
 
 		/* Unmap the old blocks in the data fork. */
-		ASSERT(tp->t_dfops && tp->t_firstblock == NULLFSBLOCK);
+		ASSERT(tp->t_firstblock == NULLFSBLOCK);
 		rlen = del.br_blockcount;
 		error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1);
 		if (error)
@@ -1021,7 +1020,7 @@ xfs_reflink_remap_extent(
 	/* Unmap the old blocks in the data fork. */
 	rlen = unmap_len;
 	while (rlen) {
-		ASSERT(tp->t_dfops && tp->t_firstblock == NULLFSBLOCK);
+		ASSERT(tp->t_firstblock == NULLFSBLOCK);
 		error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1);
 		if (error)
 			goto out_cancel;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index fec9cfe3dfb4..ad315e83bc02 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2213,57 +2213,54 @@ DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
 
 /* deferred ops */
 struct xfs_defer_pending;
-struct xfs_defer_ops;
 
 DECLARE_EVENT_CLASS(xfs_defer_class,
-	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop,
-		 unsigned long caller_ip),
-	TP_ARGS(mp, dop, caller_ip),
+	TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip),
+	TP_ARGS(tp, caller_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
-		__field(void *, dop)
+		__field(struct xfs_trans *, tp)
 		__field(char, committed)
 		__field(unsigned long, caller_ip)
 	),
 	TP_fast_assign(
-		__entry->dev = mp ? mp->m_super->s_dev : 0;
-		__entry->dop = dop;
+		__entry->dev = tp->t_mountp->m_super->s_dev;
+		__entry->tp = tp;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d ops %p caller %pS",
+	TP_printk("dev %d:%d tp %p caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->dop,
+		  __entry->tp,
 		  (char *)__entry->caller_ip)
 )
 #define DEFINE_DEFER_EVENT(name) \
 DEFINE_EVENT(xfs_defer_class, name, \
-	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, \
-		 unsigned long caller_ip), \
-	TP_ARGS(mp, dop, caller_ip))
+	TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), \
+	TP_ARGS(tp, caller_ip))
 
 DECLARE_EVENT_CLASS(xfs_defer_error_class,
-	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error),
-	TP_ARGS(mp, dop, error),
+	TP_PROTO(struct xfs_trans *tp, int error),
+	TP_ARGS(tp, error),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
-		__field(void *, dop)
+		__field(struct xfs_trans *, tp)
 		__field(char, committed)
 		__field(int, error)
 	),
 	TP_fast_assign(
-		__entry->dev = mp ? mp->m_super->s_dev : 0;
-		__entry->dop = dop;
+		__entry->dev = tp->t_mountp->m_super->s_dev;
+		__entry->tp = tp;
 		__entry->error = error;
 	),
-	TP_printk("dev %d:%d ops %p err %d",
+	TP_printk("dev %d:%d tp %p err %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->dop,
+		  __entry->tp,
 		  __entry->error)
 )
 #define DEFINE_DEFER_ERROR_EVENT(name) \
 DEFINE_EVENT(xfs_defer_error_class, name, \
-	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), \
-	TP_ARGS(mp, dop, error))
+	TP_PROTO(struct xfs_trans *tp, int error), \
+	TP_ARGS(tp, error))
 
 DECLARE_EVENT_CLASS(xfs_defer_pending_class,
 	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp),
@@ -2382,7 +2379,6 @@ DEFINE_EVENT(xfs_map_extent_deferred_class, name, \
 		 xfs_exntst_t state), \
 	TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state))
 
-DEFINE_DEFER_EVENT(xfs_defer_init);
 DEFINE_DEFER_EVENT(xfs_defer_cancel);
 DEFINE_DEFER_EVENT(xfs_defer_trans_roll);
 DEFINE_DEFER_EVENT(xfs_defer_trans_abort);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 413e4138357f..bedc5a5133a5 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -100,6 +100,7 @@ xfs_trans_dup(
 	ntp->t_mountp = tp->t_mountp;
 	INIT_LIST_HEAD(&ntp->t_items);
 	INIT_LIST_HEAD(&ntp->t_busy);
+	INIT_LIST_HEAD(&ntp->t_dfops);
 	ntp->t_firstblock = NULLFSBLOCK;
 
 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -120,12 +121,8 @@ xfs_trans_dup(
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
 
-	/* copy the dfops pointer if it's external, otherwise move it */
-	xfs_defer_init(ntp, &ntp->t_dfops_internal);
-	if (tp->t_dfops != &tp->t_dfops_internal)
-		ntp->t_dfops = tp->t_dfops;
-	else
-		xfs_defer_move(ntp, tp);
+	/* move deferred ops over to the new tp */
+	xfs_defer_move(ntp, tp);
 
 	xfs_trans_dup_dqinfo(tp, ntp);
 
@@ -280,8 +277,8 @@ xfs_trans_alloc(
 	tp->t_mountp = mp;
 	INIT_LIST_HEAD(&tp->t_items);
 	INIT_LIST_HEAD(&tp->t_busy);
+	INIT_LIST_HEAD(&tp->t_dfops);
 	tp->t_firstblock = NULLFSBLOCK;
-	xfs_defer_init(tp, &tp->t_dfops_internal);
 
 	error = xfs_trans_reserve(tp, resp, blocks, rtextents);
 	if (error) {
@@ -929,7 +926,7 @@ __xfs_trans_commit(
 	 * Finish deferred items on final commit. Only permanent transactions
 	 * should ever have deferred ops.
 	 */
-	WARN_ON_ONCE(!list_empty(&tp->t_dfops->dop_intake) &&
+	WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
 		     !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
 	if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
 		error = xfs_defer_finish_noroll(&tp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 1cdc7c0ebeac..c3d278e96ad1 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -90,13 +90,10 @@ void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
 #define XFS_ITEM_FLUSHING	3
 
 /*
- * Deferred operations tracking structure.
+ * Deferred operation item relogging limits.
  */
 #define XFS_DEFER_OPS_NR_INODES	2	/* join up to two inodes */
 #define XFS_DEFER_OPS_NR_BUFS	2	/* join up to two buffers */
-struct xfs_defer_ops {
-	struct list_head	dop_intake;	/* unlogged pending work */
-};
 
 /*
  * This is the structure maintained for every active transaction.
@@ -114,7 +111,6 @@ typedef struct xfs_trans {
 	struct xlog_ticket	*t_ticket;	/* log mgr ticket */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
-	struct xfs_defer_ops	*t_dfops;	/* dfops reference */
 	int64_t			t_icount_delta;	/* superblock icount change */
 	int64_t			t_ifree_delta;	/* superblock ifree change */
 	int64_t			t_fdblocks_delta; /* superblock fdblocks chg */
@@ -136,8 +132,8 @@ typedef struct xfs_trans {
 	int64_t			t_rextslog_delta;/* superblocks rextslog chg */
 	struct list_head	t_items;	/* log item descriptors */
 	struct list_head	t_busy;		/* list of busy extents */
+	struct list_head	t_dfops;	/* deferred operations */
 	unsigned long		t_pflags;	/* saved process flags state */
-	struct xfs_defer_ops	t_dfops_internal;
 } xfs_trans_t;
 
 /*
-- 
cgit v1.2.3


From c2b6e1591b6b15e1dcd9c1596b0371b6abc48fed Mon Sep 17 00:00:00 2001
From: Thomas Bianchi <thomas.bianchi8@gmail.com>
Date: Wed, 1 Aug 2018 12:58:34 -0700
Subject: xfs: substitute spaces with tabs

Inside xfs_attr_shortform_list removes spaces at the beginnig of the line
and replaces with tabs.
Issue found by checkpatch.

ERROR: code indent should use tabs where possible

Signed-off-by: Thomas Bianchi <thomas.bianchi8@gmail.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_attr_list.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index f9ca80154c9c..a58034049995 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -87,7 +87,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
 	 */
 	if (context->bufsize == 0 ||
 	    (XFS_ISRESET_CURSOR(cursor) &&
-             (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
+	     (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
 		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 			context->put_listent(context,
 					     sfe->flags,
-- 
cgit v1.2.3


From 21e2156f3c4b2ad8b780a6d02342ca0e028a8acd Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 3 Aug 2018 10:57:52 +0100
Subject: gfs2: Get rid of gfs2_ea_strlen

Function gfs2_ea_strlen is only called from ea_list_i, so inline it
there.  Remove the duplicate switch statement and the creative use of
memcpy to set a null byte.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Andrew Price <anprice@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/xattr.c | 59 +++++++++++++++++++++------------------------------------
 1 file changed, 22 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index f2bce1e0f6fb..38515988aaf7 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -343,60 +343,45 @@ struct ea_list {
 	unsigned int ei_size;
 };
 
-static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
-{
-	switch (ea->ea_type) {
-	case GFS2_EATYPE_USR:
-		return 5 + ea->ea_name_len + 1;
-	case GFS2_EATYPE_SYS:
-		return 7 + ea->ea_name_len + 1;
-	case GFS2_EATYPE_SECURITY:
-		return 9 + ea->ea_name_len + 1;
-	default:
-		return 0;
-	}
-}
-
 static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
 		     struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
 		     void *private)
 {
 	struct ea_list *ei = private;
 	struct gfs2_ea_request *er = ei->ei_er;
-	unsigned int ea_size = gfs2_ea_strlen(ea);
+	unsigned int ea_size;
+	char *prefix;
+	unsigned int l;
 
 	if (ea->ea_type == GFS2_EATYPE_UNUSED)
 		return 0;
 
-	if (er->er_data_len) {
-		char *prefix = NULL;
-		unsigned int l = 0;
-		char c = 0;
+	switch (ea->ea_type) {
+	case GFS2_EATYPE_USR:
+		prefix = "user.";
+		l = 5;
+		break;
+	case GFS2_EATYPE_SYS:
+		prefix = "system.";
+		l = 7;
+		break;
+	case GFS2_EATYPE_SECURITY:
+		prefix = "security.";
+		l = 9;
+		break;
+	default:
+		BUG();
+	}
 
+	ea_size = l + ea->ea_name_len + 1;
+	if (er->er_data_len) {
 		if (ei->ei_size + ea_size > er->er_data_len)
 			return -ERANGE;
 
-		switch (ea->ea_type) {
-		case GFS2_EATYPE_USR:
-			prefix = "user.";
-			l = 5;
-			break;
-		case GFS2_EATYPE_SYS:
-			prefix = "system.";
-			l = 7;
-			break;
-		case GFS2_EATYPE_SECURITY:
-			prefix = "security.";
-			l = 9;
-			break;
-		}
-
-		BUG_ON(l == 0);
-
 		memcpy(er->er_data + ei->ei_size, prefix, l);
 		memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea),
 		       ea->ea_name_len);
-		memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1);
+		er->er_data[ei->ei_size + ea_size - 1] = 0;
 	}
 
 	ei->ei_size += ea_size;
-- 
cgit v1.2.3


From 1f31c98d650ca342e2f54cb17c4554ad110c5a11 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 1 Aug 2018 15:50:27 -0700
Subject: xfs: only validate summary counts on primary superblock

Skip the summary counter checks for secondary superblocks and inprogress
primary superblocks because mkfs has always written those out with
zeroed summary counters.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index ca1b3a7a9171..081f46e30556 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -148,6 +148,7 @@ xfs_validate_sb_read(
 STATIC int
 xfs_validate_sb_write(
 	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
 	struct xfs_sb		*sbp)
 {
 	/*
@@ -155,10 +156,15 @@ xfs_validate_sb_write(
 	 * the superblock.  We skip this in the read validator because there
 	 * could be newer superblocks in the log and if the values are garbage
 	 * even after replay we'll recalculate them at the end of log mount.
+	 *
+	 * mkfs has traditionally written zeroed counters to inprogress and
+	 * secondary superblocks, so allow this usage to continue because
+	 * we never read counters from such superblocks.
 	 */
-	if (sbp->sb_fdblocks > sbp->sb_dblocks ||
-	    !xfs_verify_icount(mp, sbp->sb_icount) ||
-	    sbp->sb_ifree > sbp->sb_icount) {
+	if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && !sbp->sb_inprogress &&
+	    (sbp->sb_fdblocks > sbp->sb_dblocks ||
+	     !xfs_verify_icount(mp, sbp->sb_icount) ||
+	     sbp->sb_ifree > sbp->sb_icount)) {
 		xfs_warn(mp, "SB summary counter sanity check failed");
 		return -EFSCORRUPTED;
 	}
@@ -756,7 +762,7 @@ xfs_sb_write_verify(
 	error = xfs_validate_sb_common(mp, bp, &sb);
 	if (error)
 		goto out_error;
-	error = xfs_validate_sb_write(mp, &sb);
+	error = xfs_validate_sb_write(mp, bp, &sb);
 	if (error)
 		goto out_error;
 
-- 
cgit v1.2.3


From a0e336ba3e3d1c7ec0f738a2e2e203434c00b08e Mon Sep 17 00:00:00 2001
From: Huang Chong <huang.chong@zte.com.cn>
Date: Fri, 3 Aug 2018 08:17:54 -0700
Subject: xfs: fix a comment in xfs_log_reserve

Fix the comment in xfs_log_reserve to avoid confusing.

Signed-of-by: Huang Chong <huang.chong@zte.com.cn>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 00df4f39093a..c3b610b687d1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -410,7 +410,7 @@ out_error:
 }
 
 /*
- * Reserve log space and return a ticket corresponding the reservation.
+ * Reserve log space and return a ticket corresponding to the reservation.
  *
  * Each reservation is going to reserve extra space for a log record header.
  * When writes happen to the on-disk log, we don't subtract the length of the
-- 
cgit v1.2.3


From eb9950eb31f56e57582a61c92073336d04a26542 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 3 Aug 2018 17:06:56 +0100
Subject: rxrpc: Push iov_iter up from rxrpc_kernel_recv_data() to caller

Push iov_iter up from rxrpc_kernel_recv_data() to its caller to allow
non-contiguous iovs to be passed down, thereby permitting file reading to
be simplified in the AFS filesystem in a future patch.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/rxrpc.c         | 28 +++++++++++++++++-----------
 include/net/af_rxrpc.h |  2 +-
 net/rxrpc/recvmsg.c    | 33 +++++++++++----------------------
 3 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..19db5f672a9d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -346,7 +346,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
-	size_t offset;
 	s64 tx_total_len;
 	int ret;
 
@@ -433,10 +432,10 @@ error_do_abort:
 		rxrpc_kernel_abort_call(call->net->socket, rxcall,
 					RX_USER_ABORT, ret, "KSD");
 	} else {
-		offset = 0;
-		rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
-				       0, &offset, false, &call->abort_code,
-				       &call->service_id);
+		iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+		rxrpc_kernel_recv_data(call->net->socket, rxcall,
+				       &msg.msg_iter, false,
+				       &call->abort_code, &call->service_id);
 		ac->abort_code = call->abort_code;
 		ac->responded = true;
 	}
@@ -467,13 +466,14 @@ static void afs_deliver_to_call(struct afs_call *call)
 	       state == AFS_CALL_SV_AWAIT_ACK
 	       ) {
 		if (state == AFS_CALL_SV_AWAIT_ACK) {
-			size_t offset = 0;
+			struct iov_iter iter;
+
+			iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
 			ret = rxrpc_kernel_recv_data(call->net->socket,
-						     call->rxcall,
-						     NULL, 0, &offset, false,
+						     call->rxcall, &iter, false,
 						     &remote_abort,
 						     &call->service_id);
-			trace_afs_recv_data(call, 0, offset, false, ret);
+			trace_afs_recv_data(call, 0, 0, false, ret);
 
 			if (ret == -EINPROGRESS || ret == -EAGAIN)
 				return;
@@ -894,6 +894,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 		     bool want_more)
 {
 	struct afs_net *net = call->net;
+	struct iov_iter iter;
+	struct kvec iov;
 	enum afs_call_state state;
 	u32 remote_abort = 0;
 	int ret;
@@ -903,10 +905,14 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
 	ASSERTCMP(call->offset, <=, count);
 
-	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall,
-				     buf, count, &call->offset,
+	iov.iov_base = buf + call->offset;
+	iov.iov_len = count - call->offset;
+	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
+
+	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
 				     want_more, &remote_abort,
 				     &call->service_id);
+	call->offset += (count - call->offset) - iov_iter_count(&iter);
 	trace_afs_recv_data(call, count, call->offset, want_more, ret);
 	if (ret == 0 || ret == -EAGAIN)
 		return ret;
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 8ae8ee004258..f53edb3754bc 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -61,7 +61,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
 			   struct msghdr *, size_t,
 			   rxrpc_notify_end_tx_t);
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
-			   void *, size_t, size_t *, bool, u32 *, u16 *);
+			   struct iov_iter *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
 			     u32, int, const char *);
 void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a57ea96c84ea..816b19a78809 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -611,9 +611,7 @@ wait_error:
  * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
  * @sock: The socket that the call exists on
  * @call: The call to send data through
- * @buf: The buffer to receive into
- * @size: The size of the buffer, including data already read
- * @_offset: The running offset into the buffer.
+ * @iter: The buffer to receive into
  * @want_more: True if more data is expected to be read
  * @_abort: Where the abort code is stored if -ECONNABORTED is returned
  * @_service: Where to store the actual service ID (may be upgraded)
@@ -626,39 +624,30 @@ wait_error:
  * Note that we may return -EAGAIN to drain empty packets at the end of the
  * data, even if we've already copied over the requested data.
  *
- * This function adds the amount it transfers to *_offset, so this should be
- * precleared as appropriate.  Note that the amount remaining in the buffer is
- * taken to be size - *_offset.
- *
  * *_abort should also be initialised to 0.
  */
 int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
-			   void *buf, size_t size, size_t *_offset,
+			   struct iov_iter *iter,
 			   bool want_more, u32 *_abort, u16 *_service)
 {
-	struct iov_iter iter;
-	struct kvec iov;
+	size_t offset = 0;
 	int ret;
 
-	_enter("{%d,%s},%zu/%zu,%d",
+	_enter("{%d,%s},%zu,%d",
 	       call->debug_id, rxrpc_call_states[call->state],
-	       *_offset, size, want_more);
+	       iov_iter_count(iter), want_more);
 
-	ASSERTCMP(*_offset, <=, size);
 	ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING);
 
-	iov.iov_base = buf + *_offset;
-	iov.iov_len = size - *_offset;
-	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
-
 	mutex_lock(&call->user_mutex);
 
 	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_CLIENT_RECV_REPLY:
 	case RXRPC_CALL_SERVER_RECV_REQUEST:
 	case RXRPC_CALL_SERVER_ACK_REQUEST:
-		ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0,
-					 _offset);
+		ret = rxrpc_recvmsg_data(sock, call, NULL, iter,
+					 iov_iter_count(iter), 0,
+					 &offset);
 		if (ret < 0)
 			goto out;
 
@@ -667,7 +656,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 		 * full buffer or have been given -EAGAIN.
 		 */
 		if (ret == 1) {
-			if (*_offset < size)
+			if (iov_iter_count(iter) > 0)
 				goto short_data;
 			if (!want_more)
 				goto read_phase_complete;
@@ -704,7 +693,7 @@ out:
 	if (_service)
 		*_service = call->service_id;
 	mutex_unlock(&call->user_mutex);
-	_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
+	_leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort);
 	return ret;
 
 short_data:
@@ -720,7 +709,7 @@ call_complete:
 	ret = call->error;
 	if (call->completion == RXRPC_CALL_SUCCEEDED) {
 		ret = 1;
-		if (size > 0)
+		if (iov_iter_count(iter) > 0)
 			ret = -ECONNRESET;
 	}
 	goto out;
-- 
cgit v1.2.3


From c2b6d621c4ffe9936adf7a55c8b1c769672c306f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Jun 2018 15:53:17 -0400
Subject: new primitive: discard_new_inode()

	We don't want open-by-handle picking half-set-up in-core
struct inode from e.g. mkdir() having failed halfway through.
In other words, we don't want such inodes returned by iget_locked()
on their way to extinction.  However, we can't just have them
unhashed - otherwise open-by-handle immediately *after* that would've
ended up creating a new in-core inode over the on-disk one that
is in process of being freed right under us.

	Solution: new flag (I_CREATING) set by insert_inode_locked() and
removed by unlock_new_inode() and a new primitive (discard_new_inode())
to be used by such halfway-through-setup failure exits instead of
unlock_new_inode() / iput() combinations.  That primitive unlocks new
inode, but leaves I_CREATING in place.

	iget_locked() treats finding an I_CREATING inode as failure
(-ESTALE, once we sort out the error propagation).
	insert_inode_locked() treats the same as instant -EBUSY.
	ilookup() treats those as icache miss.

[Fix by Dan Carpenter <dan.carpenter@oracle.com> folded in]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c        |  2 +-
 fs/inode.c         | 45 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/fs.h |  6 +++++-
 3 files changed, 47 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a7d9e7a4c283..11b753d29409 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1892,7 +1892,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 	spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
 	WARN_ON(!(inode->i_state & I_NEW));
-	inode->i_state &= ~I_NEW;
+	inode->i_state &= ~I_NEW & ~I_CREATING;
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..6cd2e7ba9f4d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -804,6 +804,10 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
+		if (unlikely(inode->i_state & I_CREATING)) {
+			spin_unlock(&inode->i_lock);
+			return ERR_PTR(-ESTALE);
+		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
 		return inode;
@@ -831,6 +835,10 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
+		if (unlikely(inode->i_state & I_CREATING)) {
+			spin_unlock(&inode->i_lock);
+			return ERR_PTR(-ESTALE);
+		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
 		return inode;
@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
 	lockdep_annotate_inode_mutex_key(inode);
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
-	inode->i_state &= ~I_NEW;
+	inode->i_state &= ~I_NEW & ~I_CREATING;
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(unlock_new_inode);
 
+void discard_new_inode(struct inode *inode)
+{
+	lockdep_annotate_inode_mutex_key(inode);
+	spin_lock(&inode->i_lock);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
+	smp_mb();
+	wake_up_bit(&inode->i_state, __I_NEW);
+	spin_unlock(&inode->i_lock);
+	iput(inode);
+}
+EXPORT_SYMBOL(discard_new_inode);
+
 /**
  * lock_two_nondirectories - take two i_mutexes on non-directory objects
  *
@@ -1039,6 +1060,8 @@ again:
 		 * Use the old inode instead of the preallocated one.
 		 */
 		spin_unlock(&inode_hash_lock);
+		if (IS_ERR(old))
+			return NULL;
 		wait_on_inode(old);
 		if (unlikely(inode_unhashed(old))) {
 			iput(old);
@@ -1128,6 +1151,8 @@ again:
 	inode = find_inode_fast(sb, head, ino);
 	spin_unlock(&inode_hash_lock);
 	if (inode) {
+		if (IS_ERR(inode))
+			return NULL;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
 			iput(inode);
@@ -1165,6 +1190,8 @@ again:
 		 */
 		spin_unlock(&inode_hash_lock);
 		destroy_inode(inode);
+		if (IS_ERR(old))
+			return NULL;
 		inode = old;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
@@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
 	inode = find_inode(sb, head, test, data);
 	spin_unlock(&inode_hash_lock);
 
-	return inode;
+	return IS_ERR(inode) ? NULL : inode;
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 
@@ -1338,6 +1365,8 @@ again:
 	spin_unlock(&inode_hash_lock);
 
 	if (inode) {
+		if (IS_ERR(inode))
+			return NULL;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
 			iput(inode);
@@ -1421,12 +1450,17 @@ int insert_inode_locked(struct inode *inode)
 		}
 		if (likely(!old)) {
 			spin_lock(&inode->i_lock);
-			inode->i_state |= I_NEW;
+			inode->i_state |= I_NEW | I_CREATING;
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_hash_lock);
 			return 0;
 		}
+		if (unlikely(old->i_state & I_CREATING)) {
+			spin_unlock(&old->i_lock);
+			spin_unlock(&inode_hash_lock);
+			return -EBUSY;
+		}
 		__iget(old);
 		spin_unlock(&old->i_lock);
 		spin_unlock(&inode_hash_lock);
@@ -1443,7 +1477,10 @@ EXPORT_SYMBOL(insert_inode_locked);
 int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
-	struct inode *old = inode_insert5(inode, hashval, test, NULL, data);
+	struct inode *old;
+
+	inode->i_state |= I_CREATING;
+	old = inode_insert5(inode, hashval, test, NULL, data);
 
 	if (old != inode) {
 		iput(old);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..a42600565925 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
  *			and work dirs among overlayfs mounts.
  *
+ * I_CREATING		New object's inode in the middle of setting up.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 #define __I_DIRTY_TIME_EXPIRED	12
 #define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
 #define I_WB_SWITCH		(1 << 13)
-#define I_OVL_INUSE			(1 << 14)
+#define I_OVL_INUSE		(1 << 14)
+#define I_CREATING		(1 << 15)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
 static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
 #endif
 extern void unlock_new_inode(struct inode *);
+extern void discard_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 extern void evict_inodes(struct super_block *sb);
 
-- 
cgit v1.2.3


From 32955c5422a8a5460bbefe2a6fc51eadcafff4c9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 May 2018 12:20:05 -0400
Subject: btrfs: switch to discard_new_inode()

Make sure that no partially set up inodes can be returned by
open-by-handle.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c | 106 ++++++++++++++++++++-----------------------------------
 1 file changed, 39 insertions(+), 67 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e9482f0db9d0..9382e0881900 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6335,8 +6335,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	location->type = BTRFS_INODE_ITEM_KEY;
 
 	ret = btrfs_insert_inode_locked(inode);
-	if (ret < 0)
+	if (ret < 0) {
+		iput(inode);
 		goto fail;
+	}
 
 	path->leave_spinning = 1;
 	ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
@@ -6395,12 +6397,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	return inode;
 
 fail_unlock:
-	unlock_new_inode(inode);
+	discard_new_inode(inode);
 fail:
 	if (dir && name)
 		BTRFS_I(dir)->index_cnt--;
 	btrfs_free_path(path);
-	iput(inode);
 	return ERR_PTR(ret);
 }
 
@@ -6505,7 +6506,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct inode *inode = NULL;
 	int err;
-	int drop_inode = 0;
 	u64 objectid;
 	u64 index = 0;
 
@@ -6527,6 +6527,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 			mode, &index);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
+		inode = NULL;
 		goto out_unlock;
 	}
 
@@ -6541,31 +6542,24 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
 	if (err)
-		goto out_unlock_inode;
+		goto out_unlock;
 
 	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
 			0, index);
-	if (err) {
-		goto out_unlock_inode;
-	} else {
-		btrfs_update_inode(trans, root, inode);
-		d_instantiate_new(dentry, inode);
-	}
+	if (err)
+		goto out_unlock;
+
+	btrfs_update_inode(trans, root, inode);
+	d_instantiate_new(dentry, inode);
 
 out_unlock:
 	btrfs_end_transaction(trans);
 	btrfs_btree_balance_dirty(fs_info);
-	if (drop_inode) {
+	if (err && inode) {
 		inode_dec_link_count(inode);
-		iput(inode);
+		discard_new_inode(inode);
 	}
 	return err;
-
-out_unlock_inode:
-	drop_inode = 1;
-	unlock_new_inode(inode);
-	goto out_unlock;
-
 }
 
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -6575,7 +6569,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct inode *inode = NULL;
-	int drop_inode_on_err = 0;
 	int err;
 	u64 objectid;
 	u64 index = 0;
@@ -6598,9 +6591,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 			mode, &index);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
+		inode = NULL;
 		goto out_unlock;
 	}
-	drop_inode_on_err = 1;
 	/*
 	* If the active LSM wants to access the inode during
 	* d_instantiate it needs these. Smack checks to see
@@ -6613,33 +6606,28 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
 	if (err)
-		goto out_unlock_inode;
+		goto out_unlock;
 
 	err = btrfs_update_inode(trans, root, inode);
 	if (err)
-		goto out_unlock_inode;
+		goto out_unlock;
 
 	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
 			0, index);
 	if (err)
-		goto out_unlock_inode;
+		goto out_unlock;
 
 	BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 	d_instantiate_new(dentry, inode);
 
 out_unlock:
 	btrfs_end_transaction(trans);
-	if (err && drop_inode_on_err) {
+	if (err && inode) {
 		inode_dec_link_count(inode);
-		iput(inode);
+		discard_new_inode(inode);
 	}
 	btrfs_btree_balance_dirty(fs_info);
 	return err;
-
-out_unlock_inode:
-	unlock_new_inode(inode);
-	goto out_unlock;
-
 }
 
 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -6748,6 +6736,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 			S_IFDIR | mode, &index);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
+		inode = NULL;
 		goto out_fail;
 	}
 
@@ -6758,34 +6747,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
 	if (err)
-		goto out_fail_inode;
+		goto out_fail;
 
 	btrfs_i_size_write(BTRFS_I(inode), 0);
 	err = btrfs_update_inode(trans, root, inode);
 	if (err)
-		goto out_fail_inode;
+		goto out_fail;
 
 	err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
 			dentry->d_name.name,
 			dentry->d_name.len, 0, index);
 	if (err)
-		goto out_fail_inode;
+		goto out_fail;
 
 	d_instantiate_new(dentry, inode);
 	drop_on_err = 0;
 
 out_fail:
 	btrfs_end_transaction(trans);
-	if (drop_on_err) {
+	if (err && inode) {
 		inode_dec_link_count(inode);
-		iput(inode);
+		discard_new_inode(inode);
 	}
 	btrfs_btree_balance_dirty(fs_info);
 	return err;
-
-out_fail_inode:
-	unlock_new_inode(inode);
-	goto out_fail;
 }
 
 static noinline int uncompress_inline(struct btrfs_path *path,
@@ -10112,7 +10097,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	struct btrfs_key key;
 	struct inode *inode = NULL;
 	int err;
-	int drop_inode = 0;
 	u64 objectid;
 	u64 index = 0;
 	int name_len;
@@ -10145,6 +10129,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 				objectid, S_IFLNK|S_IRWXUGO, &index);
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
+		inode = NULL;
 		goto out_unlock;
 	}
 
@@ -10161,12 +10146,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 
 	err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
 	if (err)
-		goto out_unlock_inode;
+		goto out_unlock;
 
 	path = btrfs_alloc_path();
 	if (!path) {
 		err = -ENOMEM;
-		goto out_unlock_inode;
+		goto out_unlock;
 	}
 	key.objectid = btrfs_ino(BTRFS_I(inode));
 	key.offset = 0;
@@ -10176,7 +10161,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 				      datasize);
 	if (err) {
 		btrfs_free_path(path);
-		goto out_unlock_inode;
+		goto out_unlock;
 	}
 	leaf = path->nodes[0];
 	ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -10208,26 +10193,19 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	if (!err)
 		err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
 				BTRFS_I(inode), 0, index);
-	if (err) {
-		drop_inode = 1;
-		goto out_unlock_inode;
-	}
+	if (err)
+		goto out_unlock;
 
 	d_instantiate_new(dentry, inode);
 
 out_unlock:
 	btrfs_end_transaction(trans);
-	if (drop_inode) {
+	if (err && inode) {
 		inode_dec_link_count(inode);
-		iput(inode);
+		discard_new_inode(inode);
 	}
 	btrfs_btree_balance_dirty(fs_info);
 	return err;
-
-out_unlock_inode:
-	drop_inode = 1;
-	unlock_new_inode(inode);
-	goto out_unlock;
 }
 
 static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -10436,14 +10414,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	ret = btrfs_init_inode_security(trans, inode, dir, NULL);
 	if (ret)
-		goto out_inode;
+		goto out;
 
 	ret = btrfs_update_inode(trans, root, inode);
 	if (ret)
-		goto out_inode;
+		goto out;
 	ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 	if (ret)
-		goto out_inode;
+		goto out;
 
 	/*
 	 * We set number of links to 0 in btrfs_new_inode(), and here we set
@@ -10453,21 +10431,15 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	 *    d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
 	 */
 	set_nlink(inode, 1);
-	unlock_new_inode(inode);
 	d_tmpfile(dentry, inode);
+	unlock_new_inode(inode);
 	mark_inode_dirty(inode);
-
 out:
 	btrfs_end_transaction(trans);
-	if (ret)
-		iput(inode);
+	if (ret && inode)
+		discard_new_inode(inode);
 	btrfs_btree_balance_dirty(fs_info);
 	return ret;
-
-out_inode:
-	unlock_new_inode(inode);
-	goto out;
-
 }
 
 __attribute__((const))
-- 
cgit v1.2.3


From dd54992776ebb44519ba4cd69145c4f19d166ddb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 May 2018 12:22:50 -0400
Subject: ufs: switch to discard_new_inode()

we don't want open-by-handle to pick an in-core inode that
has failed setup halfway through.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/ialloc.c | 3 +--
 fs/ufs/namei.c  | 9 +++------
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index e1ef0f0a1353..02c0a4be4212 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -343,8 +343,7 @@ cg_found:
 fail_remove_inode:
 	mutex_unlock(&sbi->s_lock);
 	clear_nlink(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	discard_new_inode(inode);
 	UFSD("EXIT (FAILED): err %d\n", err);
 	return ERR_PTR(err);
 failed:
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index d5f43ba76c59..9ef40f100415 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -43,8 +43,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
 		return 0;
 	}
 	inode_dec_link_count(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	discard_new_inode(inode);
 	return err;
 }
 
@@ -142,8 +141,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
 
 out_fail:
 	inode_dec_link_count(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	discard_new_inode(inode);
 	return err;
 }
 
@@ -198,8 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 out_fail:
 	inode_dec_link_count(inode);
 	inode_dec_link_count(inode);
-	unlock_new_inode(inode);
-	iput (inode);
+	discard_new_inode(inode);
 out_dir:
 	inode_dec_link_count(dir);
 	return err;
-- 
cgit v1.2.3


From 5c1a68a358f94b9ac2e33183327bc04f207feed2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 May 2018 12:25:39 -0400
Subject: udf: switch to discard_new_inode()

we don't want open-by-handle to pick an in-core inode that
has failed setup halfway through.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/udf/namei.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c586026508db..061d049c2620 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -608,8 +608,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
 	fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
 	if (unlikely(!fi)) {
 		inode_dec_link_count(inode);
-		unlock_new_inode(inode);
-		iput(inode);
+		discard_new_inode(inode);
 		return err;
 	}
 	cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -700,8 +699,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
 	if (!fi) {
 		inode_dec_link_count(inode);
-		unlock_new_inode(inode);
-		iput(inode);
+		discard_new_inode(inode);
 		goto out;
 	}
 	set_nlink(inode, 2);
@@ -719,8 +717,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (!fi) {
 		clear_nlink(inode);
 		mark_inode_dirty(inode);
-		unlock_new_inode(inode);
-		iput(inode);
+		discard_new_inode(inode);
 		goto out;
 	}
 	cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -1047,8 +1044,7 @@ out:
 out_no_entry:
 	up_write(&iinfo->i_data_sem);
 	inode_dec_link_count(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	discard_new_inode(inode);
 	goto out;
 }
 
-- 
cgit v1.2.3


From 2e5afe54e0cd6fce79b51ca547caf08a990ad56d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 May 2018 18:29:56 -0400
Subject: ext2: make sure that partially set up inodes won't be returned by
 ext2_iget()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/ialloc.c | 3 +--
 fs/ext2/namei.c  | 9 +++------
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 6484199b35d1..5c3d7b7e4975 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -611,8 +611,7 @@ fail_drop:
 	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	clear_nlink(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	discard_new_inode(inode);
 	return ERR_PTR(err);
 
 fail:
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 152453a91877..0c26dcc5d850 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -45,8 +45,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 		return 0;
 	}
 	inode_dec_link_count(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	discard_new_inode(inode);
 	return err;
 }
 
@@ -192,8 +191,7 @@ out:
 
 out_fail:
 	inode_dec_link_count(inode);
-	unlock_new_inode(inode);
-	iput (inode);
+	discard_new_inode(inode);
 	goto out;
 }
 
@@ -261,8 +259,7 @@ out:
 out_fail:
 	inode_dec_link_count(inode);
 	inode_dec_link_count(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	discard_new_inode(inode);
 out_dir:
 	inode_dec_link_count(dir);
 	goto out;
-- 
cgit v1.2.3


From a6cbedfa8783b42b9272c05297865bdb501005cb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Jun 2018 11:59:37 -0400
Subject: jfs: switch to discard_new_inode()

we don't want open-by-handle to pick an in-core inode that
has failed setup halfway through.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jfs/jfs_inode.c |  8 ++++----
 fs/jfs/namei.c     | 12 ++++--------
 2 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 5e9b7bb3aabf..96732c24b054 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -61,8 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	inode = new_inode(sb);
 	if (!inode) {
 		jfs_warn("ialloc: new_inode returned NULL!");
-		rc = -ENOMEM;
-		goto fail;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	jfs_inode = JFS_IP(inode);
@@ -141,9 +140,10 @@ fail_drop:
 	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	clear_nlink(inode);
-	unlock_new_inode(inode);
+	discard_new_inode(inode);
+	return ERR_PTR(rc);
+
 fail_put:
 	iput(inode);
-fail:
 	return ERR_PTR(rc);
 }
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 56c3fcbfe80e..14528c0ffe63 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -175,8 +175,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
 	if (rc) {
 		free_ea_wmap(ip);
 		clear_nlink(ip);
-		unlock_new_inode(ip);
-		iput(ip);
+		discard_new_inode(ip);
 	} else {
 		d_instantiate_new(dentry, ip);
 	}
@@ -309,8 +308,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 	if (rc) {
 		free_ea_wmap(ip);
 		clear_nlink(ip);
-		unlock_new_inode(ip);
-		iput(ip);
+		discard_new_inode(ip);
 	} else {
 		d_instantiate_new(dentry, ip);
 	}
@@ -1054,8 +1052,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	if (rc) {
 		free_ea_wmap(ip);
 		clear_nlink(ip);
-		unlock_new_inode(ip);
-		iput(ip);
+		discard_new_inode(ip);
 	} else {
 		d_instantiate_new(dentry, ip);
 	}
@@ -1441,8 +1438,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	if (rc) {
 		free_ea_wmap(ip);
 		clear_nlink(ip);
-		unlock_new_inode(ip);
-		iput(ip);
+		discard_new_inode(ip);
 	} else {
 		d_instantiate_new(dentry, ip);
 	}
-- 
cgit v1.2.3


From e950564b97fd0f541b02eb207685d0746f5ecf29 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 24 Jul 2018 15:01:55 +0200
Subject: vfs: don't evict uninitialized inode

iput() ends up calling ->evict() on new inode, which is not yet initialized
by owning fs.  So use destroy_inode() instead.

Add to sb->s_inodes list only if inode is not in I_CREATING state (meaning
that it wasn't allocated with new_inode(), which already does the
insertion).

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 80ea09a002bf ("vfs: factor out inode_insert5()")
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 6cd2e7ba9f4d..e44a97584158 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1050,6 +1050,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
 {
 	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
 	struct inode *old;
+	bool creating = inode->i_state & I_CREATING;
 
 again:
 	spin_lock(&inode_hash_lock);
@@ -1083,6 +1084,8 @@ again:
 	inode->i_state |= I_NEW;
 	hlist_add_head(&inode->i_hash, head);
 	spin_unlock(&inode->i_lock);
+	if (!creating)
+		inode_sb_list_add(inode);
 unlock:
 	spin_unlock(&inode_hash_lock);
 
@@ -1117,12 +1120,13 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
 	struct inode *inode = ilookup5(sb, hashval, test, data);
 
 	if (!inode) {
-		struct inode *new = new_inode(sb);
+		struct inode *new = alloc_inode(sb);
 
 		if (new) {
+			new->i_state = 0;
 			inode = inode_insert5(new, hashval, test, set, data);
 			if (unlikely(inode != new))
-				iput(new);
+				destroy_inode(new);
 		}
 	}
 	return inode;
-- 
cgit v1.2.3


From 5bef915104f32c9d0bb5df6e86a98e31cb524e9a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Jun 2018 19:36:57 -0400
Subject: new helper: inode_fake_hash()

open-coded in a quite a few places...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/inode.c     |  2 +-
 fs/jfs/jfs_imap.c  |  8 +-------
 fs/jfs/super.c     |  2 +-
 fs/xfs/xfs_iops.c  |  2 +-
 include/linux/fs.h | 11 +++++++++++
 5 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2a16111d312f..a2dfa1b2a89c 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
 	HFS_I(inode)->rsrc_inode = dir;
 	HFS_I(dir)->rsrc_inode = inode;
 	igrab(dir);
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 	mark_inode_dirty(inode);
 	dont_mount(dentry);
 out:
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f36ef68905a7..93e8c590ff5c 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 	/* release the page */
 	release_metapage(mp);
 
-	/*
-	 * __mark_inode_dirty expects inodes to be hashed.  Since we don't
-	 * want special inodes in the fileset inode space, we make them
-	 * appear hashed, but do not put on any lists.  hlist_del()
-	 * will work fine and require no locking.
-	 */
-	hlist_add_fake(&ip->i_hash);
+	inode_fake_hash(ip);
 
 	return (ip);
 }
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1b9264fd54b6..5403ece57dba 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	inode->i_ino = 0;
 	inode->i_size = i_size_read(sb->s_bdev->bd_inode);
 	inode->i_mapping->a_ops = &jfs_metapage_aops;
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 
 	sbi->direct_inode = inode;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 0fa29f39d658..3a75de777843 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1253,7 +1253,7 @@ xfs_setup_inode(
 
 	inode_sb_list_add(inode);
 	/* make the inode look hashed for the writeback code */
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 
 	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
 	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a42600565925..43941e230e2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -684,6 +684,17 @@ static inline int inode_unhashed(struct inode *inode)
 	return hlist_unhashed(&inode->i_hash);
 }
 
+/*
+ * __mark_inode_dirty expects inodes to be hashed.  Since we don't
+ * want special inodes in the fileset inode space, we make them
+ * appear hashed, but do not put on any lists.  hlist_del()
+ * will work fine and require no locking.
+ */
+static inline void inode_fake_hash(struct inode *inode)
+{
+	hlist_add_fake(&inode->i_hash);
+}
+
 /*
  * inode->i_mutex nesting subclasses for the lock validator:
  *
-- 
cgit v1.2.3


From d8e78da8682028ad53d040339a2b9d6fb6092d63 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Jun 2018 03:15:49 -0400
Subject: adfs: don't put inodes into icache

We never look them up in there; inode_fake_hash() will make them appear
hashed for mark_inode_dirty() purposes.  And don't leave them around
until memory pressure kicks them out - we never look them up again.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/inode.c | 2 +-
 fs/adfs/super.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index c836c425ca94..e91028d4340a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -287,7 +287,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
 		ADFS_I(inode)->mmu_private = inode->i_size;
 	}
 
-	insert_inode_hash(inode);
+	inode_fake_hash(inode);
 
 out:
 	return inode;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 71fa525d63a0..7e099a7a4eb1 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -291,6 +291,7 @@ static void destroy_inodecache(void)
 static const struct super_operations adfs_sops = {
 	.alloc_inode	= adfs_alloc_inode,
 	.destroy_inode	= adfs_destroy_inode,
+	.drop_inode	= generic_delete_inode,
 	.write_inode	= adfs_write_inode,
 	.put_super	= adfs_put_super,
 	.statfs		= adfs_statfs,
-- 
cgit v1.2.3


From c7b15a8657da7f8d11269c7cc3d8beef10d26b43 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Jun 2018 14:32:04 -0400
Subject: jfs: don't bother with make_bad_inode() in ialloc()

We hit that when inumber allocation has failed.  In that case
the in-core inode is not hashed and since its ->i_nlink is 1
the only place where jfs checks is_bad_inode() won't be reached.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jfs/jfs_inode.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 96732c24b054..4572b7cf183d 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -69,8 +69,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	rc = diAlloc(parent, S_ISDIR(mode), inode);
 	if (rc) {
 		jfs_warn("ialloc: diAlloc returned %d!", rc);
-		if (rc == -EIO)
-			make_bad_inode(inode);
 		goto fail_put;
 	}
 
-- 
cgit v1.2.3


From 1021bcf44d0e876b10f8739594ad7e6e9c746026 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Wed, 1 Aug 2018 19:23:37 +0800
Subject: pstore: add zstd compression support

This patch added the 6th compression algorithm support for pstore: zstd.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/Kconfig    | 17 ++++++++++++++---
 fs/pstore/platform.c | 16 ++++++++++++++++
 2 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 09c19ef91526..503086f7f7c1 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -50,12 +50,19 @@ config PSTORE_842_COMPRESS
 	help
 	  This option enables 842 compression algorithm support.
 
+config PSTORE_ZSTD_COMPRESS
+	bool "zstd compression"
+	depends on PSTORE
+	select CRYPTO_ZSTD
+	help
+	  This option enables zstd compression algorithm support.
+
 config PSTORE_COMPRESS
 	def_bool y
 	depends on PSTORE
 	depends on PSTORE_DEFLATE_COMPRESS || PSTORE_LZO_COMPRESS ||	\
 		   PSTORE_LZ4_COMPRESS || PSTORE_LZ4HC_COMPRESS ||	\
-		   PSTORE_842_COMPRESS
+		   PSTORE_842_COMPRESS || PSTORE_ZSTD_COMPRESS
 
 choice
 	prompt "Default pstore compression algorithm"
@@ -65,8 +72,8 @@ choice
 	  This change be changed at boot with "pstore.compress=..." on
 	  the kernel command line.
 
-	  Currently, pstore has support for 5 compression algorithms:
-	  deflate, lzo, lz4, lz4hc and 842.
+	  Currently, pstore has support for 6 compression algorithms:
+	  deflate, lzo, lz4, lz4hc, 842 and zstd.
 
 	  The default compression algorithm is deflate.
 
@@ -85,6 +92,9 @@ choice
 	config PSTORE_842_COMPRESS_DEFAULT
 		bool "842" if PSTORE_842_COMPRESS
 
+	config PSTORE_ZSTD_COMPRESS_DEFAULT
+		bool "zstd" if PSTORE_ZSTD_COMPRESS
+
 endchoice
 
 config PSTORE_COMPRESS_DEFAULT
@@ -95,6 +105,7 @@ config PSTORE_COMPRESS_DEFAULT
 	default "lz4" if PSTORE_LZ4_COMPRESS_DEFAULT
 	default "lz4hc" if PSTORE_LZ4HC_COMPRESS_DEFAULT
 	default "842" if PSTORE_842_COMPRESS_DEFAULT
+	default "zstd" if PSTORE_ZSTD_COMPRESS_DEFAULT
 
 config PSTORE_CONSOLE
 	bool "Log kernel console messages"
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index c238ab8ba31d..15e99d5a681d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -34,6 +34,9 @@
 #if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
 #include <linux/lz4.h>
 #endif
+#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
+#include <linux/zstd.h>
+#endif
 #include <linux/crypto.h>
 #include <linux/string.h>
 #include <linux/timer.h>
@@ -192,6 +195,13 @@ static int zbufsize_842(size_t size)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
+static int zbufsize_zstd(size_t size)
+{
+	return ZSTD_compressBound(size);
+}
+#endif
+
 static const struct pstore_zbackend *zbackend __ro_after_init;
 
 static const struct pstore_zbackend zbackends[] = {
@@ -224,6 +234,12 @@ static const struct pstore_zbackend zbackends[] = {
 		.zbufsize	= zbufsize_842,
 		.name		= "842",
 	},
+#endif
+#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
+	{
+		.zbufsize	= zbufsize_zstd,
+		.name		= "zstd",
+	},
 #endif
 	{ }
 };
-- 
cgit v1.2.3


From 863c37fcb14f8b66ea831b45fb35a53ac4a8d69e Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Sat, 4 Aug 2018 17:34:07 -0400
Subject: ext4: remove unneeded variable "err" in ext4_mb_release_inode_pa()

The err is not used after initalization. So just remove the variable.

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8b24d3d42cb3..e29fce2fbf25 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3801,7 +3801,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 	unsigned long long grp_blk_start;
-	int err = 0;
 	int free = 0;
 
 	BUG_ON(pa->pa_deleted == 0);
@@ -3842,7 +3841,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	}
 	atomic_add(free, &sbi->s_mb_discarded);
 
-	return err;
+	return 0;
 }
 
 static noinline_for_stack int
-- 
cgit v1.2.3


From 855371bd01b4cd8cf0e2b8ca172a5c30a481f963 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2018 20:48:31 -0400
Subject: afs: switch dynroot lookups to d_splice_alias()

->lookup() methods can (and should) use d_splice_alias() instead of
d_add().  Even if they are not going to be hit by open_by_handle(),
code does get copied around...

Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/dynroot.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 174e843f0633..40fea59067b3 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -143,7 +143,6 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
 {
 	struct afs_vnode *vnode;
 	struct inode *inode;
-	int ret;
 
 	vnode = AFS_FS_I(dir);
 
@@ -161,21 +160,10 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
 		return afs_lookup_atcell(dentry);
 
 	inode = afs_try_auto_mntpt(dentry, dir);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
-		if (ret == -ENOENT) {
-			d_add(dentry, NULL);
-			_leave(" = NULL [negative]");
-			return NULL;
-		}
-		_leave(" = %d [do]", ret);
-		return ERR_PTR(ret);
-	}
+	if (inode == ERR_PTR(-ENOENT))
+		inode = NULL;
 
-	d_add(dentry, inode);
-	_leave(" = 0 { ino=%lu v=%u }",
-	       d_inode(dentry)->i_ino, d_inode(dentry)->i_generation);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 const struct inode_operations afs_dynroot_inode_operations = {
-- 
cgit v1.2.3


From 34b2a88fb4aa4de34e1d5f9fc2761b746980f9b1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jun 2018 10:43:51 -0400
Subject: afs_lookup(): switch to d_splice_alias()

->lookup() methods can (and should) use d_splice_alias() instead of
d_add().  Even if they are not going to be hit by open_by_handle(),
code does get copied around; besides, d_splice_alias() has better
calling conventions for use in ->lookup(), so the code gets simpler.

Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/dir.c | 47 ++++++++++++-----------------------------------
 1 file changed, 12 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 7d623008157f..52f44255f65d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -822,6 +822,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 {
 	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct inode *inode;
+	struct dentry *d;
 	struct key *key;
 	int ret;
 
@@ -862,43 +863,19 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 
 	afs_stat_v(dvnode, n_lookup);
 	inode = afs_do_lookup(dir, dentry, key);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
-		if (ret == -ENOENT) {
-			inode = afs_try_auto_mntpt(dentry, dir);
-			if (!IS_ERR(inode)) {
-				key_put(key);
-				goto success;
-			}
-
-			ret = PTR_ERR(inode);
-		}
-
-		key_put(key);
-		if (ret == -ENOENT) {
-			d_add(dentry, NULL);
-			_leave(" = NULL [negative]");
-			return NULL;
-		}
-		_leave(" = %d [do]", ret);
-		return ERR_PTR(ret);
-	}
-	dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version;
-
-	/* instantiate the dentry */
 	key_put(key);
-	if (IS_ERR(inode)) {
-		_leave(" = %ld", PTR_ERR(inode));
-		return ERR_CAST(inode);
+	if (inode == ERR_PTR(-ENOENT)) {
+		inode = afs_try_auto_mntpt(dentry, dir);
+		if (inode == ERR_PTR(-ENOENT))
+			inode = NULL;
+	} else {
+		dentry->d_fsdata =
+			(void *)(unsigned long)dvnode->status.data_version;
 	}
-
-success:
-	d_add(dentry, inode);
-	_leave(" = 0 { ino=%lu v=%u }",
-	       d_inode(dentry)->i_ino,
-	       d_inode(dentry)->i_generation);
-
-	return NULL;
+	d = d_splice_alias(inode, dentry);
+	if (!IS_ERR_OR_NULL(d))
+		d->d_fsdata = dentry->d_fsdata;
+	return d;
 }
 
 /*
-- 
cgit v1.2.3


From 1401a0fc2d47988677dc1dbfd56ff89daa323717 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Jun 2018 10:45:44 -0400
Subject: afs_try_auto_mntpt(): return NULL instead of ERR_PTR(-ENOENT)

simpler logics in callers that way

Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/dir.c     |  2 --
 fs/afs/dynroot.c | 13 ++-----------
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 52f44255f65d..855bf2b79fed 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -866,8 +866,6 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 	key_put(key);
 	if (inode == ERR_PTR(-ENOENT)) {
 		inode = afs_try_auto_mntpt(dentry, dir);
-		if (inode == ERR_PTR(-ENOENT))
-			inode = NULL;
 	} else {
 		dentry->d_fsdata =
 			(void *)(unsigned long)dvnode->status.data_version;
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 40fea59067b3..1cde710a8013 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -83,7 +83,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
 
 out:
 	_leave("= %d", ret);
-	return ERR_PTR(ret);
+	return ret == -ENOENT ? NULL : ERR_PTR(ret);
 }
 
 /*
@@ -141,11 +141,6 @@ out_p:
 static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
 					 unsigned int flags)
 {
-	struct afs_vnode *vnode;
-	struct inode *inode;
-
-	vnode = AFS_FS_I(dir);
-
 	_enter("%pd", dentry);
 
 	ASSERTCMP(d_inode(dentry), ==, NULL);
@@ -159,11 +154,7 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
 	    memcmp(dentry->d_name.name, "@cell", 5) == 0)
 		return afs_lookup_atcell(dentry);
 
-	inode = afs_try_auto_mntpt(dentry, dir);
-	if (inode == ERR_PTR(-ENOENT))
-		inode = NULL;
-
-	return d_splice_alias(inode, dentry);
+	return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry);
 }
 
 const struct inode_operations afs_dynroot_inode_operations = {
-- 
cgit v1.2.3


From 7964410fcf135d7e76deef4e475816ec02482f7b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 1 Aug 2018 19:39:05 -0500
Subject: fs: dcache: Use true and false for boolean values

Return statements in functions returning bool should use true or false
instead of an integer value.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..6fd5c1aa4620 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -732,16 +732,16 @@ static inline bool fast_dput(struct dentry *dentry)
 		if (dentry->d_lockref.count > 1) {
 			dentry->d_lockref.count--;
 			spin_unlock(&dentry->d_lock);
-			return 1;
+			return true;
 		}
-		return 0;
+		return false;
 	}
 
 	/*
 	 * If we weren't the last ref, we're done.
 	 */
 	if (ret)
-		return 1;
+		return true;
 
 	/*
 	 * Careful, careful. The reference count went down
@@ -770,7 +770,7 @@ static inline bool fast_dput(struct dentry *dentry)
 
 	/* Nothing to do? Dropping the reference was all we needed? */
 	if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
-		return 1;
+		return true;
 
 	/*
 	 * Not the fast normal case? Get the lock. We've already decremented
@@ -787,7 +787,7 @@ static inline bool fast_dput(struct dentry *dentry)
 	 */
 	if (dentry->d_lockref.count) {
 		spin_unlock(&dentry->d_lock);
-		return 1;
+		return true;
 	}
 
 	/*
@@ -796,7 +796,7 @@ static inline bool fast_dput(struct dentry *dentry)
 	 * set it to 1.
 	 */
 	dentry->d_lockref.count = 1;
-	return 0;
+	return false;
 }
 
 
-- 
cgit v1.2.3


From 7dda712818373d4d8ecc5dca2293664fcd3b0158 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 16 Jul 2018 12:25:50 +0200
Subject: timerfd: add support for keyed wakeups

This prepares timerfd for use with aio poll.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Avi Kivity <avi@scylladb.com>
---
 fs/timerfd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49da3ff7..f6c54fd56645 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -66,7 +66,7 @@ static void timerfd_triggered(struct timerfd_ctx *ctx)
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
 	ctx->expired = 1;
 	ctx->ticks++;
-	wake_up_locked(&ctx->wqh);
+	wake_up_locked_poll(&ctx->wqh, EPOLLIN);
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 }
 
@@ -107,7 +107,7 @@ void timerfd_clock_was_set(void)
 		if (ctx->moffs != moffs) {
 			ctx->moffs = KTIME_MAX;
 			ctx->ticks++;
-			wake_up_locked(&ctx->wqh);
+			wake_up_locked_poll(&ctx->wqh, EPOLLIN);
 		}
 		spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 	}
@@ -345,7 +345,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		spin_lock_irq(&ctx->wqh.lock);
 		if (!timerfd_canceled(ctx)) {
 			ctx->ticks = ticks;
-			wake_up_locked(&ctx->wqh);
+			wake_up_locked_poll(&ctx->wqh, EPOLLIN);
 		} else
 			ret = -ECANCELED;
 		spin_unlock_irq(&ctx->wqh.lock);
-- 
cgit v1.2.3


From 9018ccc453af063d16b3b6b5dfa2ad0635390371 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 24 Jul 2018 11:36:37 +0200
Subject: aio: add a iocb refcount

This is needed to prevent races caused by the way the ->poll API works.
To avoid introducing overhead for other users of the iocbs we initialize
it to zero and only do refcount operations if it is non-zero in the
completion path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Avi Kivity <avi@scylladb.com>
---
 fs/aio.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..fe2018ada32c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -18,6 +18,7 @@
 #include <linux/export.h>
 #include <linux/syscalls.h>
 #include <linux/backing-dev.h>
+#include <linux/refcount.h>
 #include <linux/uio.h>
 
 #include <linux/sched/signal.h>
@@ -178,6 +179,7 @@ struct aio_kiocb {
 
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
+	refcount_t		ki_refcnt;
 
 	/*
 	 * If the aio_resfd field of the userspace iocb is not zero,
@@ -1015,6 +1017,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 
 	percpu_ref_get(&ctx->reqs);
 	INIT_LIST_HEAD(&req->ki_list);
+	refcount_set(&req->ki_refcnt, 0);
 	req->ki_ctx = ctx;
 	return req;
 out_put:
@@ -1049,6 +1052,15 @@ out:
 	return ret;
 }
 
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+	if (refcount_read(&iocb->ki_refcnt) == 0 ||
+	    refcount_dec_and_test(&iocb->ki_refcnt)) {
+		percpu_ref_put(&iocb->ki_ctx->reqs);
+		kmem_cache_free(kiocb_cachep, iocb);
+	}
+}
+
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
@@ -1118,8 +1130,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 		eventfd_ctx_put(iocb->ki_eventfd);
 	}
 
-	kmem_cache_free(kiocb_cachep, iocb);
-
 	/*
 	 * We have to order our ring_info tail store above and test
 	 * of the wait list below outside the wait lock.  This is
@@ -1130,8 +1140,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
-
-	percpu_ref_put(&ctx->reqs);
+	iocb_put(iocb);
 }
 
 /* aio_read_events_ring
-- 
cgit v1.2.3


From bfe4037e722ec672c9dafd5730d9132afeeb76e9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 16 Jul 2018 09:08:20 +0200
Subject: aio: implement IOCB_CMD_POLL

Simple one-shot poll through the io_submit() interface.  To poll for
a file descriptor the application should submit an iocb of type
IOCB_CMD_POLL.  It will poll the fd for the events specified in the
the first 32 bits of the aio_buf field of the iocb.

Unlike poll or epoll without EPOLLONESHOT this interface always works
in one shot mode, that is once the iocb is completed, it will have to be
resubmitted.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Avi Kivity <avi@scylladb.com>
---
 fs/aio.c                     | 178 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/aio_abi.h |   6 +-
 2 files changed, 180 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index fe2018ada32c..2fd19521d8a8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
  *	Implements an efficient asynchronous io interface.
  *
  *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
+ *	Copyright 2018 Christoph Hellwig.
  *
  *	See ../COPYING for licensing terms.
  */
@@ -165,10 +166,21 @@ struct fsync_iocb {
 	bool			datasync;
 };
 
+struct poll_iocb {
+	struct file		*file;
+	struct wait_queue_head	*head;
+	__poll_t		events;
+	bool			woken;
+	bool			cancelled;
+	struct wait_queue_entry	wait;
+	struct work_struct	work;
+};
+
 struct aio_kiocb {
 	union {
 		struct kiocb		rw;
 		struct fsync_iocb	fsync;
+		struct poll_iocb	poll;
 	};
 
 	struct kioctx		*ki_ctx;
@@ -1601,6 +1613,169 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
 	return 0;
 }
 
+static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+	struct file *file = iocb->poll.file;
+
+	aio_complete(iocb, mangle_poll(mask), 0);
+	fput(file);
+}
+
+static void aio_poll_complete_work(struct work_struct *work)
+{
+	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+	struct poll_table_struct pt = { ._key = req->events };
+	struct kioctx *ctx = iocb->ki_ctx;
+	__poll_t mask = 0;
+
+	if (!READ_ONCE(req->cancelled))
+		mask = vfs_poll(req->file, &pt) & req->events;
+
+	/*
+	 * Note that ->ki_cancel callers also delete iocb from active_reqs after
+	 * calling ->ki_cancel.  We need the ctx_lock roundtrip here to
+	 * synchronize with them.  In the cancellation case the list_del_init
+	 * itself is not actually needed, but harmless so we keep it in to
+	 * avoid further branches in the fast path.
+	 */
+	spin_lock_irq(&ctx->ctx_lock);
+	if (!mask && !READ_ONCE(req->cancelled)) {
+		add_wait_queue(req->head, &req->wait);
+		spin_unlock_irq(&ctx->ctx_lock);
+		return;
+	}
+	list_del_init(&iocb->ki_list);
+	spin_unlock_irq(&ctx->ctx_lock);
+
+	aio_poll_complete(iocb, mask);
+}
+
+/* assumes we are called with irqs disabled */
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+	struct poll_iocb *req = &aiocb->poll;
+
+	spin_lock(&req->head->lock);
+	WRITE_ONCE(req->cancelled, true);
+	if (!list_empty(&req->wait.entry)) {
+		list_del_init(&req->wait.entry);
+		schedule_work(&aiocb->poll.work);
+	}
+	spin_unlock(&req->head->lock);
+
+	return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+		void *key)
+{
+	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+	__poll_t mask = key_to_poll(key);
+
+	req->woken = true;
+
+	/* for instances that support it check for an event match first: */
+	if (mask && !(mask & req->events))
+		return 0;
+
+	list_del_init(&req->wait.entry);
+	schedule_work(&req->work);
+	return 1;
+}
+
+struct aio_poll_table {
+	struct poll_table_struct	pt;
+	struct aio_kiocb		*iocb;
+	int				error;
+};
+
+static void
+aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+		struct poll_table_struct *p)
+{
+	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
+
+	/* multiple wait queues per file are not supported */
+	if (unlikely(pt->iocb->poll.head)) {
+		pt->error = -EINVAL;
+		return;
+	}
+
+	pt->error = 0;
+	pt->iocb->poll.head = head;
+	add_wait_queue(head, &pt->iocb->poll.wait);
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+	struct kioctx *ctx = aiocb->ki_ctx;
+	struct poll_iocb *req = &aiocb->poll;
+	struct aio_poll_table apt;
+	__poll_t mask;
+
+	/* reject any unknown events outside the normal event mask. */
+	if ((u16)iocb->aio_buf != iocb->aio_buf)
+		return -EINVAL;
+	/* reject fields that are not defined for poll */
+	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+		return -EINVAL;
+
+	INIT_WORK(&req->work, aio_poll_complete_work);
+	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+	req->file = fget(iocb->aio_fildes);
+	if (unlikely(!req->file))
+		return -EBADF;
+
+	apt.pt._qproc = aio_poll_queue_proc;
+	apt.pt._key = req->events;
+	apt.iocb = aiocb;
+	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+
+	/* initialized the list so that we can do list_empty checks */
+	INIT_LIST_HEAD(&req->wait.entry);
+	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+
+	/* one for removal from waitqueue, one for this function */
+	refcount_set(&aiocb->ki_refcnt, 2);
+
+	mask = vfs_poll(req->file, &apt.pt) & req->events;
+	if (unlikely(!req->head)) {
+		/* we did not manage to set up a waitqueue, done */
+		goto out;
+	}
+
+	spin_lock_irq(&ctx->ctx_lock);
+	spin_lock(&req->head->lock);
+	if (req->woken) {
+		/* wake_up context handles the rest */
+		mask = 0;
+		apt.error = 0;
+	} else if (mask || apt.error) {
+		/* if we get an error or a mask we are done */
+		WARN_ON_ONCE(list_empty(&req->wait.entry));
+		list_del_init(&req->wait.entry);
+	} else {
+		/* actually waiting for an event */
+		list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+		aiocb->ki_cancel = aio_poll_cancel;
+	}
+	spin_unlock(&req->head->lock);
+	spin_unlock_irq(&ctx->ctx_lock);
+
+out:
+	if (unlikely(apt.error)) {
+		fput(req->file);
+		return apt.error;
+	}
+
+	if (mask)
+		aio_poll_complete(aiocb, mask);
+	iocb_put(aiocb);
+	return 0;
+}
+
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 bool compat)
 {
@@ -1674,6 +1849,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	case IOCB_CMD_FDSYNC:
 		ret = aio_fsync(&req->fsync, &iocb, true);
 		break;
+	case IOCB_CMD_POLL:
+		ret = aio_poll(req, &iocb);
+		break;
 	default:
 		pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
 		ret = -EINVAL;
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index d4593a6062ef..ce43d340f010 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -38,10 +38,8 @@ enum {
 	IOCB_CMD_PWRITE = 1,
 	IOCB_CMD_FSYNC = 2,
 	IOCB_CMD_FDSYNC = 3,
-	/* These two are experimental.
-	 * IOCB_CMD_PREADX = 4,
-	 * IOCB_CMD_POLL = 5,
-	 */
+	/* 4 was the experimental IOCB_CMD_PREADX */
+	IOCB_CMD_POLL = 5,
 	IOCB_CMD_NOOP = 6,
 	IOCB_CMD_PREADV = 7,
 	IOCB_CMD_PWRITEV = 8,
-- 
cgit v1.2.3


From e8693bcfa0b4a56268946f0756153d942cb66cf7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 16 Jul 2018 12:25:17 +0200
Subject: aio: allow direct aio poll comletions for keyed wakeups

If we get a keyed wakeup for a aio poll waitqueue and wake can acquire the
ctx_lock without spinning we can just complete the iocb straight from the
wakeup callback to avoid a context switch.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Avi Kivity <avi@scylladb.com>
---
 fs/aio.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 2fd19521d8a8..29f2b5b57d32 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1672,13 +1672,26 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		void *key)
 {
 	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
 	__poll_t mask = key_to_poll(key);
 
 	req->woken = true;
 
 	/* for instances that support it check for an event match first: */
-	if (mask && !(mask & req->events))
-		return 0;
+	if (mask) {
+		if (!(mask & req->events))
+			return 0;
+
+		/* try to complete the iocb inline if we can: */
+		if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+			list_del(&iocb->ki_list);
+			spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+			list_del_init(&req->wait.entry);
+			aio_poll_complete(iocb, mask);
+			return 1;
+		}
+	}
 
 	list_del_init(&req->wait.entry);
 	schedule_work(&req->work);
-- 
cgit v1.2.3


From a944442c2b8a420301e7830f976bab8cc86a2b4d Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.lkml@gmail.com>
Date: Tue, 12 Jun 2018 17:18:25 +0530
Subject: btrfs: replace get_seconds with new 64bit time API

The get_seconds() function is deprecated as it truncates the timestamp
to 32 bits. Change it to or ktime_get_real_seconds().

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 4 ++--
 fs/btrfs/dev-replace.c | 8 ++++----
 fs/btrfs/disk-io.c     | 4 ++--
 fs/btrfs/transaction.c | 2 +-
 fs/btrfs/transaction.h | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346aceea9..e671a1fcbbec 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -342,8 +342,8 @@ struct btrfs_path {
 					sizeof(struct btrfs_item))
 struct btrfs_dev_replace {
 	u64 replace_state;	/* see #define above */
-	u64 time_started;	/* seconds since 1-Jan-1970 */
-	u64 time_stopped;	/* seconds since 1-Jan-1970 */
+	time64_t time_started;	/* seconds since 1-Jan-1970 */
+	time64_t time_stopped;	/* seconds since 1-Jan-1970 */
 	atomic64_t num_write_errors;
 	atomic64_t num_uncorrectable_read_errors;
 
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e2ba0419297a..1b30c38d05c9 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -465,7 +465,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 	 * go to the tgtdev as well (refer to btrfs_map_block()).
 	 */
 	dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
-	dev_replace->time_started = get_seconds();
+	dev_replace->time_started = ktime_get_real_seconds();
 	dev_replace->cursor_left = 0;
 	dev_replace->committed_cursor_left = 0;
 	dev_replace->cursor_left_last_write_of_item = 0;
@@ -618,7 +618,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 			  : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
 	dev_replace->tgtdev = NULL;
 	dev_replace->srcdev = NULL;
-	dev_replace->time_stopped = get_seconds();
+	dev_replace->time_stopped = ktime_get_real_seconds();
 	dev_replace->item_needs_writeback = 1;
 
 	/* replace old device with new one in mapping tree */
@@ -807,7 +807,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
 		break;
 	}
 	dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
-	dev_replace->time_stopped = get_seconds();
+	dev_replace->time_stopped = ktime_get_real_seconds();
 	dev_replace->item_needs_writeback = 1;
 	btrfs_dev_replace_write_unlock(dev_replace);
 	btrfs_scrub_cancel(fs_info);
@@ -848,7 +848,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
 		dev_replace->replace_state =
 			BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
-		dev_replace->time_stopped = get_seconds();
+		dev_replace->time_stopped = ktime_get_real_seconds();
 		dev_replace->item_needs_writeback = 1;
 		btrfs_info(fs_info, "suspending dev_replace for unmount");
 		break;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 205092dc9390..f3224e23d5fa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1803,7 +1803,7 @@ static int transaction_kthread(void *arg)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_transaction *cur;
 	u64 transid;
-	unsigned long now;
+	time64_t now;
 	unsigned long delay;
 	bool cannot_commit;
 
@@ -1819,7 +1819,7 @@ static int transaction_kthread(void *arg)
 			goto sleep;
 		}
 
-		now = get_seconds();
+		now = ktime_get_real_seconds();
 		if (cur->state < TRANS_STATE_BLOCKED &&
 		    !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
 		    (now < cur->start_time ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ff5f6c719976..56c8bab0b816 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -241,7 +241,7 @@ loop:
 	refcount_set(&cur_trans->use_count, 2);
 	atomic_set(&cur_trans->pending_ordered, 0);
 	cur_trans->flags = 0;
-	cur_trans->start_time = get_seconds();
+	cur_trans->start_time = ktime_get_real_seconds();
 
 	memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
 
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94439482a0ec..4cbb1b55387d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -48,7 +48,7 @@ struct btrfs_transaction {
 	int aborted;
 	struct list_head list;
 	struct extent_io_tree dirty_pages;
-	unsigned long start_time;
+	time64_t start_time;
 	wait_queue_head_t writer_wait;
 	wait_queue_head_t commit_wait;
 	wait_queue_head_t pending_wait;
-- 
cgit v1.2.3


From 9bebe665c3e4f4af30add8602217938d8c17ee23 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 6 Jun 2018 13:13:12 +0800
Subject: btrfs: scrub: Remove unused copy_nocow_pages and its callchain

Since commit ac0b4145d662a3b9e340 ("btrfs: scrub: Don't use inode pages
for device replace") the function is not used and we can remove all
functions down the call chain.

There was an optimization that reused inode pages to speed up device
replace, but broke when there was nodatasum and compressed page. The
potential performance gain is small so we don't loose much by removing
it and using scrub_pages same as the other pages.

Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 365 -------------------------------------------------------
 1 file changed, 365 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6702896cdb8f..5bce2330ec64 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -197,23 +197,6 @@ struct scrub_fixup_nodatasum {
 	int			mirror_num;
 };
 
-struct scrub_nocow_inode {
-	u64			inum;
-	u64			offset;
-	u64			root;
-	struct list_head	list;
-};
-
-struct scrub_copy_nocow_ctx {
-	struct scrub_ctx	*sctx;
-	u64			logical;
-	u64			len;
-	int			mirror_num;
-	u64			physical_for_dev_replace;
-	struct list_head	inodes;
-	struct btrfs_work	work;
-};
-
 struct scrub_warning {
 	struct btrfs_path	*path;
 	u64			extent_item_size;
@@ -277,13 +260,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
 static void scrub_wr_submit(struct scrub_ctx *sctx);
 static void scrub_wr_bio_end_io(struct bio *bio);
 static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
-static int write_page_nocow(struct scrub_ctx *sctx,
-			    u64 physical_for_dev_replace, struct page *page);
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
-				      struct scrub_copy_nocow_ctx *ctx);
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
-			    int mirror_num, u64 physical_for_dev_replace);
-static void copy_nocow_pages_worker(struct btrfs_work *work);
 static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
 static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
 static void scrub_put_ctx(struct scrub_ctx *sctx);
@@ -2800,17 +2776,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
 			have_csum = scrub_find_csum(sctx, logical, csum);
 			if (have_csum == 0)
 				++sctx->stat.no_csum;
-			if (0 && sctx->is_dev_replace && !have_csum) {
-				ret = copy_nocow_pages(sctx, logical, l,
-						       mirror_num,
-						      physical_for_dev_replace);
-				goto behind_scrub_pages;
-			}
 		}
 		ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
 				  mirror_num, have_csum ? csum : NULL, 0,
 				  physical_for_dev_replace);
-behind_scrub_pages:
 		if (ret)
 			return ret;
 		len -= l;
@@ -4072,10 +4041,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
 		if (!fs_info->scrub_wr_completion_workers)
 			goto fail_scrub_wr_completion_workers;
 
-		fs_info->scrub_nocow_workers =
-			btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
-		if (!fs_info->scrub_nocow_workers)
-			goto fail_scrub_nocow_workers;
 		fs_info->scrub_parity_workers =
 			btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
 					      max_active, 2);
@@ -4086,8 +4051,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
 	return 0;
 
 fail_scrub_parity_workers:
-	btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
-fail_scrub_nocow_workers:
 	btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
 fail_scrub_wr_completion_workers:
 	btrfs_destroy_workqueue(fs_info->scrub_workers);
@@ -4100,7 +4063,6 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
 	if (--fs_info->scrub_workers_refcnt == 0) {
 		btrfs_destroy_workqueue(fs_info->scrub_workers);
 		btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
-		btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
 		btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
 	}
 	WARN_ON(fs_info->scrub_workers_refcnt < 0);
@@ -4359,330 +4321,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
 	*extent_dev = bbio->stripes[0].dev;
 	btrfs_put_bbio(bbio);
 }
-
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
-			    int mirror_num, u64 physical_for_dev_replace)
-{
-	struct scrub_copy_nocow_ctx *nocow_ctx;
-	struct btrfs_fs_info *fs_info = sctx->fs_info;
-
-	nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
-	if (!nocow_ctx) {
-		spin_lock(&sctx->stat_lock);
-		sctx->stat.malloc_errors++;
-		spin_unlock(&sctx->stat_lock);
-		return -ENOMEM;
-	}
-
-	scrub_pending_trans_workers_inc(sctx);
-
-	nocow_ctx->sctx = sctx;
-	nocow_ctx->logical = logical;
-	nocow_ctx->len = len;
-	nocow_ctx->mirror_num = mirror_num;
-	nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
-	btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
-			copy_nocow_pages_worker, NULL, NULL);
-	INIT_LIST_HEAD(&nocow_ctx->inodes);
-	btrfs_queue_work(fs_info->scrub_nocow_workers,
-			 &nocow_ctx->work);
-
-	return 0;
-}
-
-static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
-{
-	struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
-	struct scrub_nocow_inode *nocow_inode;
-
-	nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
-	if (!nocow_inode)
-		return -ENOMEM;
-	nocow_inode->inum = inum;
-	nocow_inode->offset = offset;
-	nocow_inode->root = root;
-	list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
-	return 0;
-}
-
-#define COPY_COMPLETE 1
-
-static void copy_nocow_pages_worker(struct btrfs_work *work)
-{
-	struct scrub_copy_nocow_ctx *nocow_ctx =
-		container_of(work, struct scrub_copy_nocow_ctx, work);
-	struct scrub_ctx *sctx = nocow_ctx->sctx;
-	struct btrfs_fs_info *fs_info = sctx->fs_info;
-	struct btrfs_root *root = fs_info->extent_root;
-	u64 logical = nocow_ctx->logical;
-	u64 len = nocow_ctx->len;
-	int mirror_num = nocow_ctx->mirror_num;
-	u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
-	int ret;
-	struct btrfs_trans_handle *trans = NULL;
-	struct btrfs_path *path;
-	int not_written = 0;
-
-	path = btrfs_alloc_path();
-	if (!path) {
-		spin_lock(&sctx->stat_lock);
-		sctx->stat.malloc_errors++;
-		spin_unlock(&sctx->stat_lock);
-		not_written = 1;
-		goto out;
-	}
-
-	trans = btrfs_join_transaction(root);
-	if (IS_ERR(trans)) {
-		not_written = 1;
-		goto out;
-	}
-
-	ret = iterate_inodes_from_logical(logical, fs_info, path,
-			record_inode_for_nocow, nocow_ctx, false);
-	if (ret != 0 && ret != -ENOENT) {
-		btrfs_warn(fs_info,
-			   "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
-			   logical, physical_for_dev_replace, len, mirror_num,
-			   ret);
-		not_written = 1;
-		goto out;
-	}
-
-	btrfs_end_transaction(trans);
-	trans = NULL;
-	while (!list_empty(&nocow_ctx->inodes)) {
-		struct scrub_nocow_inode *entry;
-		entry = list_first_entry(&nocow_ctx->inodes,
-					 struct scrub_nocow_inode,
-					 list);
-		list_del_init(&entry->list);
-		ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
-						 entry->root, nocow_ctx);
-		kfree(entry);
-		if (ret == COPY_COMPLETE) {
-			ret = 0;
-			break;
-		} else if (ret) {
-			break;
-		}
-	}
-out:
-	while (!list_empty(&nocow_ctx->inodes)) {
-		struct scrub_nocow_inode *entry;
-		entry = list_first_entry(&nocow_ctx->inodes,
-					 struct scrub_nocow_inode,
-					 list);
-		list_del_init(&entry->list);
-		kfree(entry);
-	}
-	if (trans && !IS_ERR(trans))
-		btrfs_end_transaction(trans);
-	if (not_written)
-		btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
-					    num_uncorrectable_read_errors);
-
-	btrfs_free_path(path);
-	kfree(nocow_ctx);
-
-	scrub_pending_trans_workers_dec(sctx);
-}
-
-static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
-				 u64 logical)
-{
-	struct extent_state *cached_state = NULL;
-	struct btrfs_ordered_extent *ordered;
-	struct extent_io_tree *io_tree;
-	struct extent_map *em;
-	u64 lockstart = start, lockend = start + len - 1;
-	int ret = 0;
-
-	io_tree = &inode->io_tree;
-
-	lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
-	ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
-	if (ordered) {
-		btrfs_put_ordered_extent(ordered);
-		ret = 1;
-		goto out_unlock;
-	}
-
-	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-	if (IS_ERR(em)) {
-		ret = PTR_ERR(em);
-		goto out_unlock;
-	}
-
-	/*
-	 * This extent does not actually cover the logical extent anymore,
-	 * move on to the next inode.
-	 */
-	if (em->block_start > logical ||
-	    em->block_start + em->block_len < logical + len ||
-	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
-		free_extent_map(em);
-		ret = 1;
-		goto out_unlock;
-	}
-	free_extent_map(em);
-
-out_unlock:
-	unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
-	return ret;
-}
-
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
-				      struct scrub_copy_nocow_ctx *nocow_ctx)
-{
-	struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info;
-	struct btrfs_key key;
-	struct inode *inode;
-	struct page *page;
-	struct btrfs_root *local_root;
-	struct extent_io_tree *io_tree;
-	u64 physical_for_dev_replace;
-	u64 nocow_ctx_logical;
-	u64 len = nocow_ctx->len;
-	unsigned long index;
-	int srcu_index;
-	int ret = 0;
-	int err = 0;
-
-	key.objectid = root;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-
-	srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
-	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
-	if (IS_ERR(local_root)) {
-		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
-		return PTR_ERR(local_root);
-	}
-
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.objectid = inum;
-	key.offset = 0;
-	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
-	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
-	if (IS_ERR(inode))
-		return PTR_ERR(inode);
-
-	/* Avoid truncate/dio/punch hole.. */
-	inode_lock(inode);
-	inode_dio_wait(inode);
-
-	physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
-	io_tree = &BTRFS_I(inode)->io_tree;
-	nocow_ctx_logical = nocow_ctx->logical;
-
-	ret = check_extent_to_block(BTRFS_I(inode), offset, len,
-			nocow_ctx_logical);
-	if (ret) {
-		ret = ret > 0 ? 0 : ret;
-		goto out;
-	}
-
-	while (len >= PAGE_SIZE) {
-		index = offset >> PAGE_SHIFT;
-again:
-		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
-		if (!page) {
-			btrfs_err(fs_info, "find_or_create_page() failed");
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		if (PageUptodate(page)) {
-			if (PageDirty(page))
-				goto next_page;
-		} else {
-			ClearPageError(page);
-			err = extent_read_full_page(io_tree, page,
-							   btrfs_get_extent,
-							   nocow_ctx->mirror_num);
-			if (err) {
-				ret = err;
-				goto next_page;
-			}
-
-			lock_page(page);
-			/*
-			 * If the page has been remove from the page cache,
-			 * the data on it is meaningless, because it may be
-			 * old one, the new data may be written into the new
-			 * page in the page cache.
-			 */
-			if (page->mapping != inode->i_mapping) {
-				unlock_page(page);
-				put_page(page);
-				goto again;
-			}
-			if (!PageUptodate(page)) {
-				ret = -EIO;
-				goto next_page;
-			}
-		}
-
-		ret = check_extent_to_block(BTRFS_I(inode), offset, len,
-					    nocow_ctx_logical);
-		if (ret) {
-			ret = ret > 0 ? 0 : ret;
-			goto next_page;
-		}
-
-		err = write_page_nocow(nocow_ctx->sctx,
-				       physical_for_dev_replace, page);
-		if (err)
-			ret = err;
-next_page:
-		unlock_page(page);
-		put_page(page);
-
-		if (ret)
-			break;
-
-		offset += PAGE_SIZE;
-		physical_for_dev_replace += PAGE_SIZE;
-		nocow_ctx_logical += PAGE_SIZE;
-		len -= PAGE_SIZE;
-	}
-	ret = COPY_COMPLETE;
-out:
-	inode_unlock(inode);
-	iput(inode);
-	return ret;
-}
-
-static int write_page_nocow(struct scrub_ctx *sctx,
-			    u64 physical_for_dev_replace, struct page *page)
-{
-	struct bio *bio;
-	struct btrfs_device *dev;
-
-	dev = sctx->wr_tgtdev;
-	if (!dev)
-		return -EIO;
-	if (!dev->bdev) {
-		btrfs_warn_rl(dev->fs_info,
-			"scrub write_page_nocow(bdev == NULL) is unexpected");
-		return -EIO;
-	}
-	bio = btrfs_io_bio_alloc(1);
-	bio->bi_iter.bi_size = 0;
-	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
-	bio_set_dev(bio, dev->bdev);
-	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
-	/* bio_add_page won't fail on a freshly allocated bio */
-	bio_add_page(bio, page, PAGE_SIZE, 0);
-
-	if (btrfsic_submit_bio_wait(bio)) {
-		bio_put(bio);
-		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
-		return -EIO;
-	}
-
-	bio_put(bio);
-	return 0;
-}
-- 
cgit v1.2.3


From bd3c685ed9fd3763615a51a70e19ff08a456e3e1 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 18 Jun 2018 14:59:25 +0300
Subject: btrfs: Document __btrfs_inc_extent_ref

Here is a doc-only patch which tires to deobfuscate the terra-incognita
that arguments for delayed refs are.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d9fe58c0080..2c55f3dce27b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2207,6 +2207,40 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
+/*
+ * __btrfs_inc_extent_ref - insert backreference for a given extent
+ *
+ * @trans:	    Handle of transaction
+ *
+ * @node:	    The delayed ref node used to get the bytenr/length for
+ *		    extent whose references are incremented.
+ *
+ * @parent:	    If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
+ *		    BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
+ *		    bytenr of the parent block. Since new extents are always
+ *		    created with indirect references, this will only be the case
+ *		    when relocating a shared extent. In that case, root_objectid
+ *		    will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must
+ *		    be 0
+ *
+ * @root_objectid:  The id of the root where this modification has originated,
+ *		    this can be either one of the well-known metadata trees or
+ *		    the subvolume id which references this extent.
+ *
+ * @owner:	    For data extents it is the inode number of the owning file.
+ *		    For metadata extents this parameter holds the level in the
+ *		    tree of the extent.
+ *
+ * @offset:	    For metadata extents the offset is ignored and is currently
+ *		    always passed as 0. For data extents it is the fileoffset
+ *		    this extent belongs to.
+ *
+ * @refs_to_add     Number of references to add
+ *
+ * @extent_op       Pointer to a structure, holding information necessary when
+ *                  updating a tree block's flags
+ *
+ */
 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_delayed_ref_node *node,
-- 
cgit v1.2.3


From 16d1c062c7de2999ea7be61d31070fa4ce3d99c4 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 18 Jun 2018 14:59:26 +0300
Subject: btrfs: Fix comment in lookup_inline_extent_backref

The comment wrongfully states that the owner parameter is the level of
the parent block. In fact owner is the level of the current block and
by adding 1 to it we can eventually get to the parent/root.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2c55f3dce27b..bdd5091433ab 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1635,8 +1635,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		extra_size = -1;
 
 	/*
-	 * Owner is our parent level, so we can just add one to get the level
-	 * for the block we are interested in.
+	 * Owner is our level, so we can just add one to get the level for the
+	 * block we are interested in.
 	 */
 	if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
 		key.type = BTRFS_METADATA_ITEM_KEY;
-- 
cgit v1.2.3


From b5e6c3e170b77025b5f6174258c7ad71eed2d4de Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 23 May 2018 11:58:33 -0400
Subject: btrfs: always wait on ordered extents at fsync time

There's a priority inversion that exists currently with btrfs fsync.  In
some cases we will collect outstanding ordered extents onto a list and
only wait on them at the very last second.  However this "very last
second" falls inside of a transaction handle, so if we are in a lower
priority cgroup we can end up holding the transaction open for longer
than needed, so if a high priority cgroup is also trying to fsync()
it'll see latency.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 56 ++++----------------------------------------------------
 1 file changed, 4 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 51e77d72068a..d5be80fb427c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2068,53 +2068,12 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	atomic_inc(&root->log_batch);
 	full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 			     &BTRFS_I(inode)->runtime_flags);
+
 	/*
-	 * We might have have had more pages made dirty after calling
-	 * start_ordered_ops and before acquiring the inode's i_mutex.
+	 * We have to do this here to avoid the priority inversion of waiting on
+	 * IO of a lower priority task while holding a transaciton open.
 	 */
-	if (full_sync) {
-		/*
-		 * For a full sync, we need to make sure any ordered operations
-		 * start and finish before we start logging the inode, so that
-		 * all extents are persisted and the respective file extent
-		 * items are in the fs/subvol btree.
-		 */
-		ret = btrfs_wait_ordered_range(inode, start, len);
-	} else {
-		/*
-		 * Start any new ordered operations before starting to log the
-		 * inode. We will wait for them to finish in btrfs_sync_log().
-		 *
-		 * Right before acquiring the inode's mutex, we might have new
-		 * writes dirtying pages, which won't immediately start the
-		 * respective ordered operations - that is done through the
-		 * fill_delalloc callbacks invoked from the writepage and
-		 * writepages address space operations. So make sure we start
-		 * all ordered operations before starting to log our inode. Not
-		 * doing this means that while logging the inode, writeback
-		 * could start and invoke writepage/writepages, which would call
-		 * the fill_delalloc callbacks (cow_file_range,
-		 * submit_compressed_extents). These callbacks add first an
-		 * extent map to the modified list of extents and then create
-		 * the respective ordered operation, which means in
-		 * tree-log.c:btrfs_log_inode() we might capture all existing
-		 * ordered operations (with btrfs_get_logged_extents()) before
-		 * the fill_delalloc callback adds its ordered operation, and by
-		 * the time we visit the modified list of extent maps (with
-		 * btrfs_log_changed_extents()), we see and process the extent
-		 * map they created. We then use the extent map to construct a
-		 * file extent item for logging without waiting for the
-		 * respective ordered operation to finish - this file extent
-		 * item points to a disk location that might not have yet been
-		 * written to, containing random data - so after a crash a log
-		 * replay will make our inode have file extent items that point
-		 * to disk locations containing invalid data, as we returned
-		 * success to userspace without waiting for the respective
-		 * ordered operation to finish, because it wasn't captured by
-		 * btrfs_get_logged_extents().
-		 */
-		ret = start_ordered_ops(inode, start, end);
-	}
+	ret = btrfs_wait_ordered_range(inode, start, len);
 	if (ret) {
 		inode_unlock(inode);
 		goto out;
@@ -2239,13 +2198,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 				goto out;
 			}
 		}
-		if (!full_sync) {
-			ret = btrfs_wait_ordered_range(inode, start, len);
-			if (ret) {
-				btrfs_end_transaction(trans);
-				goto out;
-			}
-		}
 		ret = btrfs_commit_transaction(trans);
 	} else {
 		ret = btrfs_end_transaction(trans);
-- 
cgit v1.2.3


From e7175a692765940f3ac3f0c005b9a766a59303d7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 23 May 2018 11:58:34 -0400
Subject: btrfs: remove the wait ordered logic in the log_one_extent path

Since we are waiting on all ordered extents at the start of the fsync()
path we don't need to wait on any logged ordered extents, and we don't
need to look up the checksums on the ordered extents as they will
already be on disk prior to getting here.  Rework this so we're only
looking up and copying the on-disk checksums for the extent range we
care about.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 118 +++++-----------------------------------------------
 1 file changed, 10 insertions(+), 108 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f8220ec02036..98cbb31ec5a8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4078,127 +4078,30 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return 0;
 }
 
-static int wait_ordered_extents(struct btrfs_trans_handle *trans,
-				struct inode *inode,
-				struct btrfs_root *root,
-				const struct extent_map *em,
-				const struct list_head *logged_list,
-				bool *ordered_io_error)
+static int log_extent_csums(struct btrfs_trans_handle *trans,
+			    struct btrfs_inode *inode,
+			    struct btrfs_root *root,
+			    const struct extent_map *em)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_ordered_extent *ordered;
 	struct btrfs_root *log = root->log_root;
-	u64 mod_start = em->mod_start;
-	u64 mod_len = em->mod_len;
-	const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 	u64 csum_offset;
 	u64 csum_len;
 	LIST_HEAD(ordered_sums);
 	int ret = 0;
 
-	*ordered_io_error = false;
-
-	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+	if (inode->flags & BTRFS_INODE_NODATASUM ||
+	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
 	    em->block_start == EXTENT_MAP_HOLE)
 		return 0;
 
-	/*
-	 * Wait far any ordered extent that covers our extent map. If it
-	 * finishes without an error, first check and see if our csums are on
-	 * our outstanding ordered extents.
-	 */
-	list_for_each_entry(ordered, logged_list, log_list) {
-		struct btrfs_ordered_sum *sum;
-
-		if (!mod_len)
-			break;
-
-		if (ordered->file_offset + ordered->len <= mod_start ||
-		    mod_start + mod_len <= ordered->file_offset)
-			continue;
-
-		if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
-		    !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
-		    !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
-			const u64 start = ordered->file_offset;
-			const u64 end = ordered->file_offset + ordered->len - 1;
-
-			WARN_ON(ordered->inode != inode);
-			filemap_fdatawrite_range(inode->i_mapping, start, end);
-		}
-
-		wait_event(ordered->wait,
-			   (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) ||
-			    test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)));
-
-		if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) {
-			/*
-			 * Clear the AS_EIO/AS_ENOSPC flags from the inode's
-			 * i_mapping flags, so that the next fsync won't get
-			 * an outdated io error too.
-			 */
-			filemap_check_errors(inode->i_mapping);
-			*ordered_io_error = true;
-			break;
-		}
-		/*
-		 * We are going to copy all the csums on this ordered extent, so
-		 * go ahead and adjust mod_start and mod_len in case this
-		 * ordered extent has already been logged.
-		 */
-		if (ordered->file_offset > mod_start) {
-			if (ordered->file_offset + ordered->len >=
-			    mod_start + mod_len)
-				mod_len = ordered->file_offset - mod_start;
-			/*
-			 * If we have this case
-			 *
-			 * |--------- logged extent ---------|
-			 *       |----- ordered extent ----|
-			 *
-			 * Just don't mess with mod_start and mod_len, we'll
-			 * just end up logging more csums than we need and it
-			 * will be ok.
-			 */
-		} else {
-			if (ordered->file_offset + ordered->len <
-			    mod_start + mod_len) {
-				mod_len = (mod_start + mod_len) -
-					(ordered->file_offset + ordered->len);
-				mod_start = ordered->file_offset +
-					ordered->len;
-			} else {
-				mod_len = 0;
-			}
-		}
-
-		if (skip_csum)
-			continue;
-
-		/*
-		 * To keep us from looping for the above case of an ordered
-		 * extent that falls inside of the logged extent.
-		 */
-		if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
-				     &ordered->flags))
-			continue;
-
-		list_for_each_entry(sum, &ordered->list, list) {
-			ret = btrfs_csum_file_blocks(trans, log, sum);
-			if (ret)
-				break;
-		}
-	}
-
-	if (*ordered_io_error || !mod_len || ret || skip_csum)
-		return ret;
-
+	/* If we're compressed we have to save the entire range of csums. */
 	if (em->compress_type) {
 		csum_offset = 0;
 		csum_len = max(em->block_len, em->orig_block_len);
 	} else {
-		csum_offset = mod_start - em->start;
-		csum_len = mod_len;
+		csum_offset = em->mod_start - em->start;
+		csum_len = em->mod_len;
 	}
 
 	/* block start is already adjusted for the file extent offset. */
@@ -4240,8 +4143,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 	int extent_inserted = 0;
 	bool ordered_io_err = false;
 
-	ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em,
-			logged_list, &ordered_io_err);
+	ret = log_extent_csums(trans, inode, root, em);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From a2120a473a80905e0275df9b0427fa7fa1187b72 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 23 May 2018 11:58:35 -0400
Subject: btrfs: clean up the left over logged_list usage

We no longer use this list we've passed around so remove it everywhere.
Also remove the extra checks for ordered/filemap errors as this is
handled higher up now that we're waiting on ordered_extents before
getting to the tree log code.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 32 ++------------------------------
 1 file changed, 2 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 98cbb31ec5a8..dce499071c86 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4129,7 +4129,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 			  struct btrfs_inode *inode, struct btrfs_root *root,
 			  const struct extent_map *em,
 			  struct btrfs_path *path,
-			  const struct list_head *logged_list,
 			  struct btrfs_log_ctx *ctx)
 {
 	struct btrfs_root *log = root->log_root;
@@ -4141,17 +4140,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 	u64 block_len;
 	int ret;
 	int extent_inserted = 0;
-	bool ordered_io_err = false;
 
 	ret = log_extent_csums(trans, inode, root, em);
 	if (ret)
 		return ret;
 
-	if (ordered_io_err) {
-		ctx->io_err = -EIO;
-		return ctx->io_err;
-	}
-
 	btrfs_init_map_token(&token);
 
 	ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
@@ -4326,7 +4319,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root,
 				     struct btrfs_inode *inode,
 				     struct btrfs_path *path,
-				     struct list_head *logged_list,
 				     struct btrfs_log_ctx *ctx,
 				     const u64 start,
 				     const u64 end)
@@ -4382,20 +4374,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	}
 
 	list_sort(NULL, &extents, extent_cmp);
-	btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
-	/*
-	 * Some ordered extents started by fsync might have completed
-	 * before we could collect them into the list logged_list, which
-	 * means they're gone, not in our logged_list nor in the inode's
-	 * ordered tree. We want the application/user space to know an
-	 * error happened while attempting to persist file data so that
-	 * it can take proper action. If such error happened, we leave
-	 * without writing to the log tree and the fsync must report the
-	 * file data write error and not commit the current transaction.
-	 */
-	ret = filemap_check_errors(inode->vfs_inode.i_mapping);
-	if (ret)
-		ctx->io_err = ret;
 process:
 	while (!list_empty(&extents)) {
 		em = list_entry(extents.next, struct extent_map, list);
@@ -4414,8 +4392,7 @@ process:
 
 		write_unlock(&tree->lock);
 
-		ret = log_one_extent(trans, inode, root, em, path, logged_list,
-				     ctx);
+		ret = log_one_extent(trans, inode, root, em, path, ctx);
 		write_lock(&tree->lock);
 		clear_em_logging(tree, em);
 		free_extent_map(em);
@@ -4800,7 +4777,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	struct btrfs_key min_key;
 	struct btrfs_key max_key;
 	struct btrfs_root *log = root->log_root;
-	LIST_HEAD(logged_list);
 	u64 last_extent = 0;
 	int err = 0;
 	int ret;
@@ -5137,7 +5113,7 @@ log_extents:
 	}
 	if (fast_search) {
 		ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-						&logged_list, ctx, start, end);
+						ctx, start, end);
 		if (ret) {
 			err = ret;
 			goto out_unlock;
@@ -5188,10 +5164,6 @@ log_extents:
 	inode->last_log_commit = inode->last_sub_trans;
 	spin_unlock(&inode->lock);
 out_unlock:
-	if (unlikely(err))
-		btrfs_put_logged_extents(&logged_list);
-	else
-		btrfs_submit_logged_extents(&logged_list, log);
 	mutex_unlock(&inode->log_mutex);
 
 	btrfs_free_path(path);
-- 
cgit v1.2.3


From 5636cf7d6dc86f47fd66757dae088e35014da464 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 23 May 2018 11:58:36 -0400
Subject: btrfs: remove the logged extents infrastructure

This is no longer used anywhere, remove all of it.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ordered-data.c      | 123 -------------------------------------------
 fs/btrfs/ordered-data.h      |  20 ++-----
 fs/btrfs/tree-log.c          |  16 ------
 include/trace/events/btrfs.h |   1 -
 4 files changed, 3 insertions(+), 157 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2e1a1694a33d..c35d2aed5ff8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -421,129 +421,6 @@ out:
 	return ret == 0;
 }
 
-/* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
-			      struct list_head *logged_list,
-			      const loff_t start,
-			      const loff_t end)
-{
-	struct btrfs_ordered_inode_tree *tree;
-	struct btrfs_ordered_extent *ordered;
-	struct rb_node *n;
-	struct rb_node *prev;
-
-	tree = &inode->ordered_tree;
-	spin_lock_irq(&tree->lock);
-	n = __tree_search(&tree->tree, end, &prev);
-	if (!n)
-		n = prev;
-	for (; n; n = rb_prev(n)) {
-		ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
-		if (ordered->file_offset > end)
-			continue;
-		if (entry_end(ordered) <= start)
-			break;
-		if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
-			continue;
-		list_add(&ordered->log_list, logged_list);
-		refcount_inc(&ordered->refs);
-	}
-	spin_unlock_irq(&tree->lock);
-}
-
-void btrfs_put_logged_extents(struct list_head *logged_list)
-{
-	struct btrfs_ordered_extent *ordered;
-
-	while (!list_empty(logged_list)) {
-		ordered = list_first_entry(logged_list,
-					   struct btrfs_ordered_extent,
-					   log_list);
-		list_del_init(&ordered->log_list);
-		btrfs_put_ordered_extent(ordered);
-	}
-}
-
-void btrfs_submit_logged_extents(struct list_head *logged_list,
-				 struct btrfs_root *log)
-{
-	int index = log->log_transid % 2;
-
-	spin_lock_irq(&log->log_extents_lock[index]);
-	list_splice_tail(logged_list, &log->logged_list[index]);
-	spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *log, u64 transid)
-{
-	struct btrfs_ordered_extent *ordered;
-	int index = transid % 2;
-
-	spin_lock_irq(&log->log_extents_lock[index]);
-	while (!list_empty(&log->logged_list[index])) {
-		struct inode *inode;
-		ordered = list_first_entry(&log->logged_list[index],
-					   struct btrfs_ordered_extent,
-					   log_list);
-		list_del_init(&ordered->log_list);
-		inode = ordered->inode;
-		spin_unlock_irq(&log->log_extents_lock[index]);
-
-		if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
-		    !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
-			u64 start = ordered->file_offset;
-			u64 end = ordered->file_offset + ordered->len - 1;
-
-			WARN_ON(!inode);
-			filemap_fdatawrite_range(inode->i_mapping, start, end);
-		}
-		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
-						   &ordered->flags));
-
-		/*
-		 * In order to keep us from losing our ordered extent
-		 * information when committing the transaction we have to make
-		 * sure that any logged extents are completed when we go to
-		 * commit the transaction.  To do this we simply increase the
-		 * current transactions pending_ordered counter and decrement it
-		 * when the ordered extent completes.
-		 */
-		if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
-			struct btrfs_ordered_inode_tree *tree;
-
-			tree = &BTRFS_I(inode)->ordered_tree;
-			spin_lock_irq(&tree->lock);
-			if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
-				set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
-				atomic_inc(&trans->transaction->pending_ordered);
-			}
-			spin_unlock_irq(&tree->lock);
-		}
-		btrfs_put_ordered_extent(ordered);
-		spin_lock_irq(&log->log_extents_lock[index]);
-	}
-	spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
-{
-	struct btrfs_ordered_extent *ordered;
-	int index = transid % 2;
-
-	spin_lock_irq(&log->log_extents_lock[index]);
-	while (!list_empty(&log->logged_list[index])) {
-		ordered = list_first_entry(&log->logged_list[index],
-					   struct btrfs_ordered_extent,
-					   log_list);
-		list_del_init(&ordered->log_list);
-		spin_unlock_irq(&log->log_extents_lock[index]);
-		btrfs_put_ordered_extent(ordered);
-		spin_lock_irq(&log->log_extents_lock[index]);
-	}
-	spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
 /*
  * used to drop a reference on an ordered extent.  This will free
  * the extent if the last reference is dropped
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3be443fb3001..b2d3f6a091f7 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -54,15 +54,11 @@ struct btrfs_ordered_sum {
 #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
 				       * has done its due diligence in updating
 				       * the isize. */
-#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
-				       ordered extent */
-#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
+#define BTRFS_ORDERED_TRUNCATED 8 /* Set when we have to truncate an extent */
 
-#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
-				 * in the logging code. */
-#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
+#define BTRFS_ORDERED_PENDING 9 /* We are waiting for this ordered extent to
 				  * complete in the current transaction. */
-#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */
+#define BTRFS_ORDERED_REGULAR 10 /* Regular IO for COW */
 
 struct btrfs_ordered_extent {
 	/* logical offset in the file */
@@ -193,16 +189,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
 			       const u64 range_start, const u64 range_len);
 u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
 			      const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
-			      struct list_head *logged_list,
-			      const loff_t start,
-			      const loff_t end);
-void btrfs_put_logged_extents(struct list_head *logged_list);
-void btrfs_submit_logged_extents(struct list_head *logged_list,
-				 struct btrfs_root *log);
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *log, u64 transid);
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
 int __init ordered_data_init(void);
 void __cold ordered_data_exit(void);
 
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index dce499071c86..daf32dc94dc3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2933,7 +2933,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	/* bail out if we need to do a full commit */
 	if (btrfs_need_log_full_commit(fs_info, trans)) {
 		ret = -EAGAIN;
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&root->log_mutex);
 		goto out;
 	}
@@ -2951,7 +2950,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (ret) {
 		blk_finish_plug(&plug);
 		btrfs_abort_transaction(trans, ret);
-		btrfs_free_logged_extents(log, log_transid);
 		btrfs_set_log_full_commit(fs_info, trans);
 		mutex_unlock(&root->log_mutex);
 		goto out;
@@ -3002,7 +3000,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 			goto out;
 		}
 		btrfs_wait_tree_log_extents(log, mark);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
 		goto out;
@@ -3020,7 +3017,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (atomic_read(&log_root_tree->log_commit[index2])) {
 		blk_finish_plug(&plug);
 		ret = btrfs_wait_tree_log_extents(log, mark);
-		btrfs_wait_logged_extents(trans, log, log_transid);
 		wait_log_commit(log_root_tree,
 				root_log_ctx.log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
@@ -3045,7 +3041,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (btrfs_need_log_full_commit(fs_info, trans)) {
 		blk_finish_plug(&plug);
 		btrfs_wait_tree_log_extents(log, mark);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
 		goto out_wake_log_root;
@@ -3058,7 +3053,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (ret) {
 		btrfs_set_log_full_commit(fs_info, trans);
 		btrfs_abort_transaction(trans, ret);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		goto out_wake_log_root;
 	}
@@ -3068,11 +3062,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 						  EXTENT_NEW | EXTENT_DIRTY);
 	if (ret) {
 		btrfs_set_log_full_commit(fs_info, trans);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		goto out_wake_log_root;
 	}
-	btrfs_wait_logged_extents(trans, log, log_transid);
 
 	btrfs_set_super_log_root(fs_info->super_for_commit,
 				 log_root_tree->node->start);
@@ -3159,14 +3151,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 				  EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
 	}
 
-	/*
-	 * We may have short-circuited the log tree with the full commit logic
-	 * and left ordered extents on our list, so clear these out to keep us
-	 * from leaking inodes and memory.
-	 */
-	btrfs_free_logged_extents(log, 0);
-	btrfs_free_logged_extents(log, 1);
-
 	free_extent_buffer(log->node);
 	kfree(log);
 }
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 39b94ec965be..7057cc99d267 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -433,7 +433,6 @@ DEFINE_EVENT(
 		{ (1 << BTRFS_ORDERED_DIRECT),	 	"DIRECT" 	}, \
 		{ (1 << BTRFS_ORDERED_IOERR), 		"IOERR" 	}, \
 		{ (1 << BTRFS_ORDERED_UPDATED_ISIZE), 	"UPDATED_ISIZE"	}, \
-		{ (1 << BTRFS_ORDERED_LOGGED_CSUM), 	"LOGGED_CSUM"	}, \
 		{ (1 << BTRFS_ORDERED_TRUNCATED), 	"TRUNCATED"	})
 
 
-- 
cgit v1.2.3


From ca5788aba3e8153da38cf99ca3ce2294f032fb51 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 19 Jul 2018 15:27:46 +0200
Subject: btrfs: remove remaing full_sync logic from btrfs_sync_file

The logic to check if the inode is already in the log can now be
simplified since we always wait for the ordered extents to complete
before deciding whether the inode needs to be logged. The big comment
about it can go away too.

CC: Filipe Manana <fdmanana@suse.com>
Suggested-by: Filipe Manana <fdmanana@suse.com>
[ code and changelog copied from mail discussion ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c         | 37 +------------------------------------
 fs/btrfs/ordered-data.c | 14 --------------
 fs/btrfs/ordered-data.h |  3 ---
 3 files changed, 1 insertion(+), 53 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d5be80fb427c..975c590c50d8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2042,7 +2042,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_log_ctx ctx;
 	int ret = 0, err;
-	bool full_sync = false;
 	u64 len;
 
 	/*
@@ -2066,8 +2065,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
 	inode_lock(inode);
 	atomic_inc(&root->log_batch);
-	full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-			     &BTRFS_I(inode)->runtime_flags);
 
 	/*
 	 * We have to do this here to avoid the priority inversion of waiting on
@@ -2080,41 +2077,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	}
 	atomic_inc(&root->log_batch);
 
-	/*
-	 * If the last transaction that changed this file was before the current
-	 * transaction and we have the full sync flag set in our inode, we can
-	 * bail out now without any syncing.
-	 *
-	 * Note that we can't bail out if the full sync flag isn't set. This is
-	 * because when the full sync flag is set we start all ordered extents
-	 * and wait for them to fully complete - when they complete they update
-	 * the inode's last_trans field through:
-	 *
-	 *     btrfs_finish_ordered_io() ->
-	 *         btrfs_update_inode_fallback() ->
-	 *             btrfs_update_inode() ->
-	 *                 btrfs_set_inode_last_trans()
-	 *
-	 * So we are sure that last_trans is up to date and can do this check to
-	 * bail out safely. For the fast path, when the full sync flag is not
-	 * set in our inode, we can not do it because we start only our ordered
-	 * extents and don't wait for them to complete (that is when
-	 * btrfs_finish_ordered_io runs), so here at this point their last_trans
-	 * value might be less than or equals to fs_info->last_trans_committed,
-	 * and setting a speculative last_trans for an inode when a buffered
-	 * write is made (such as fs_info->generation + 1 for example) would not
-	 * be reliable since after setting the value and before fsync is called
-	 * any number of transactions can start and commit (transaction kthread
-	 * commits the current transaction periodically), and a transaction
-	 * commit does not start nor waits for ordered extents to complete.
-	 */
 	smp_mb();
 	if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
-	    (full_sync && BTRFS_I(inode)->last_trans <=
-	     fs_info->last_trans_committed) ||
-	    (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
-	     BTRFS_I(inode)->last_trans
-	     <= fs_info->last_trans_committed)) {
+	    BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) {
 		/*
 		 * We've had everything committed since the last time we were
 		 * modified so clear this flag in case it was set for whatever
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index c35d2aed5ff8..e173b252d795 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -790,20 +790,6 @@ out:
 	return entry;
 }
 
-bool btrfs_have_ordered_extents_in_range(struct inode *inode,
-					 u64 file_offset,
-					 u64 len)
-{
-	struct btrfs_ordered_extent *oe;
-
-	oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len);
-	if (oe) {
-		btrfs_put_ordered_extent(oe);
-		return true;
-	}
-	return false;
-}
-
 /*
  * lookup and return any extent before 'file_offset'.  NULL is returned
  * if none is found
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index b2d3f6a091f7..02d813aaa261 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -178,9 +178,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
 		struct btrfs_inode *inode,
 		u64 file_offset,
 		u64 len);
-bool btrfs_have_ordered_extents_in_range(struct inode *inode,
-					 u64 file_offset,
-					 u64 len);
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 				struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
-- 
cgit v1.2.3


From a9ecb653b0ac9ed95671aa494015bbb705bf3eae Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 17:26:42 +0300
Subject: btrfs: Streamline log_extent_csums a bit

Currently this function takes the root as an argument only to get the
log_root from it. Simplify this by directly passing the log root from
the caller. Also eliminate the fs_info local variable, since it's used
only once, so directly reference it from the transaction handle.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index daf32dc94dc3..7f6aaeb4bd22 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4064,11 +4064,9 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
 
 static int log_extent_csums(struct btrfs_trans_handle *trans,
 			    struct btrfs_inode *inode,
-			    struct btrfs_root *root,
+			    struct btrfs_root *log_root,
 			    const struct extent_map *em)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_root *log = root->log_root;
 	u64 csum_offset;
 	u64 csum_len;
 	LIST_HEAD(ordered_sums);
@@ -4089,7 +4087,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
 	}
 
 	/* block start is already adjusted for the file extent offset. */
-	ret = btrfs_lookup_csums_range(fs_info->csum_root,
+	ret = btrfs_lookup_csums_range(trans->fs_info->csum_root,
 				       em->block_start + csum_offset,
 				       em->block_start + csum_offset +
 				       csum_len - 1, &ordered_sums, 0);
@@ -4101,7 +4099,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
 						   struct btrfs_ordered_sum,
 						   list);
 		if (!ret)
-			ret = btrfs_csum_file_blocks(trans, log, sums);
+			ret = btrfs_csum_file_blocks(trans, log_root, sums);
 		list_del(&sums->list);
 		kfree(sums);
 	}
@@ -4125,7 +4123,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
 	int ret;
 	int extent_inserted = 0;
 
-	ret = log_extent_csums(trans, inode, root, em);
+	ret = log_extent_csums(trans, inode, log, em);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From bece2e8239331d382f98b381f90e425a11157653 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 20 Jun 2018 10:03:31 -0700
Subject: btrfs: Fix misleading indentation reported by smatch

This patch avoids that building the BTRFS source code with smatch
triggers complaints about inconsistent indenting.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 4 ++--
 fs/btrfs/ioctl.c       | 4 ++--
 fs/btrfs/reada.c       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bdd5091433ab..e0e0f3dad206 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6313,7 +6313,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 		if (list_empty(&cache->dirty_list)) {
 			list_add_tail(&cache->dirty_list,
 				      &trans->transaction->dirty_bgs);
-				trans->transaction->num_dirty_bgs++;
+			trans->transaction->num_dirty_bgs++;
 			btrfs_get_block_group(cache);
 		}
 		spin_unlock(&trans->transaction->dirty_bgs_lock);
@@ -7568,7 +7568,7 @@ search:
 		 * for the proper type.
 		 */
 		if (!block_group_bits(block_group, flags)) {
-		    u64 extra = BTRFS_BLOCK_GROUP_DUP |
+			u64 extra = BTRFS_BLOCK_GROUP_DUP |
 				BTRFS_BLOCK_GROUP_RAID1 |
 				BTRFS_BLOCK_GROUP_RAID5 |
 				BTRFS_BLOCK_GROUP_RAID6 |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b077544b5232..0c4b9f364e84 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2507,8 +2507,8 @@ out:
 static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 					   void __user *argp)
 {
-	 struct btrfs_ioctl_ino_lookup_args *args;
-	 struct inode *inode;
+	struct btrfs_ioctl_ino_lookup_args *args;
+	struct inode *inode;
 	int ret = 0;
 
 	args = memdup_user(argp, sizeof(*args));
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 40f1bcef394d..4be425f70c2d 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -355,7 +355,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		dev = bbio->stripes[nzones].dev;
 
 		/* cannot read ahead on missing device. */
-		 if (!dev->bdev)
+		if (!dev->bdev)
 			continue;
 
 		zone = reada_find_zone(dev, logical, bbio);
-- 
cgit v1.2.3


From acd43e3cdffcb2e361e8b481c72e057d34fcd780 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 20 Jun 2018 10:03:32 -0700
Subject: btrfs: Annotate fall-through when parsing mount option

This patch avoids that the compiler complains that a fall-through
annotation is missing when building with W=1.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81107ad49f3a..3e298f26a383 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -760,6 +760,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 		case Opt_recovery:
 			btrfs_warn(info,
 				   "'recovery' is deprecated, use 'usebackuproot' instead");
+			/* fall through */
 		case Opt_usebackuproot:
 			btrfs_info(info,
 				   "trying to use backup root at mount time");
-- 
cgit v1.2.3


From edf57cbf2b030781885e339f32e35a470d2f8eba Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 20 Jun 2018 10:03:33 -0700
Subject: btrfs: Fix a C compliance issue

The C programming language does not allow to use preprocessor statements
inside macro arguments (pr_info() is defined as a macro). Hence rework
the pr_info() statement in btrfs_print_mod_info() such that it becomes
compliant. This patch allows tools like sparse to analyze the BTRFS
source code.

Fixes: 62e855771dac ("btrfs: convert printk(KERN_* to use pr_* calls")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3e298f26a383..3c5f221b5303 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2370,7 +2370,7 @@ static __cold void btrfs_interface_exit(void)
 
 static void __init btrfs_print_mod_info(void)
 {
-	pr_info("Btrfs loaded, crc32c=%s"
+	static const char options[] = ""
 #ifdef CONFIG_BTRFS_DEBUG
 			", debug=on"
 #endif
@@ -2383,8 +2383,8 @@ static void __init btrfs_print_mod_info(void)
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 			", ref-verify=on"
 #endif
-			"\n",
-			crc32c_impl());
+			;
+	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
 }
 
 static int __init init_btrfs_fs(void)
-- 
cgit v1.2.3


From 10728404c6f64a570f5be95cd5b237c69d94d818 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:43 +0300
Subject: btrfs: Remove fs_info from insert_tree_block_ref

This function is always called with a valid transaction so there is no
need to duplicate the fs_info, we can reference it directly from the
trans handle. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e0e0f3dad206..082e9f1c1f1a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1523,7 +1523,6 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
 }
 
 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
-					  struct btrfs_fs_info *fs_info,
 					  struct btrfs_path *path,
 					  u64 bytenr, u64 parent,
 					  u64 root_objectid)
@@ -1540,7 +1539,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
 		key.offset = root_objectid;
 	}
 
-	ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
+	ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
 				      path, &key, 0);
 	btrfs_release_path(path);
 	return ret;
@@ -2000,8 +1999,8 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
 	int ret;
 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
 		BUG_ON(refs_to_add != 1);
-		ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
-					    parent, root_objectid);
+		ret = insert_tree_block_ref(trans, path, bytenr, parent,
+					    root_objectid);
 	} else {
 		ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
 					     parent, root_objectid,
-- 
cgit v1.2.3


From 62b895af4022017c11855087801b0575a442ccc6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:44 +0300
Subject: btrfs: Remove fs_info from insert_extent_data_ref

This function is always called with a valid transaction handle from
where fs_info can be referenced. So remove the redundant argument.
No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 082e9f1c1f1a..bb89522c0de6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1304,13 +1304,12 @@ fail:
 }
 
 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
-					   struct btrfs_fs_info *fs_info,
 					   struct btrfs_path *path,
 					   u64 bytenr, u64 parent,
 					   u64 root_objectid, u64 owner,
 					   u64 offset, int refs_to_add)
 {
-	struct btrfs_root *root = fs_info->extent_root;
+	struct btrfs_root *root = trans->fs_info->extent_root;
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
 	u32 size;
@@ -2002,9 +2001,9 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
 		ret = insert_tree_block_ref(trans, path, bytenr, parent,
 					    root_objectid);
 	} else {
-		ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
-					     parent, root_objectid,
-					     owner, offset, refs_to_add);
+		ret = insert_extent_data_ref(trans, path, bytenr, parent,
+					     root_objectid, owner, offset,
+					     refs_to_add);
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From 375934105c97476f88eefe5f836808dd0cfc3eeb Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:45 +0300
Subject: btrfs: Remove fs_info argument from insert_extent_backref

This function is always called with a valid transaction handle from
where fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bb89522c0de6..9d08bd65ed44 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1990,7 +1990,6 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 }
 
 static int insert_extent_backref(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_path *path,
 				 u64 bytenr, u64 parent, u64 root_objectid,
 				 u64 owner, u64 offset, int refs_to_add)
@@ -2288,8 +2287,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 	path->reada = READA_FORWARD;
 	path->leave_spinning = 1;
 	/* now insert the actual backref */
-	ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
-				    root_objectid, owner, offset, refs_to_add);
+	ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
+				    owner, offset, refs_to_add);
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
 out:
-- 
cgit v1.2.3


From e9f6290d593a8b2f6a3cf90e4ffd446ca25faea9 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:46 +0300
Subject: btrfs: Remove fs_info from remove_extent_data_ref

This function is always called with a valid transaction from where the
fs_info can be referenced. No functional change.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9d08bd65ed44..64346986f6f0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1383,7 +1383,6 @@ fail:
 }
 
 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
-					   struct btrfs_fs_info *fs_info,
 					   struct btrfs_path *path,
 					   int refs_to_drop, int *last_ref)
 {
@@ -1420,7 +1419,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 	num_refs -= refs_to_drop;
 
 	if (num_refs == 0) {
-		ret = btrfs_del_item(trans, fs_info->extent_root, path);
+		ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
 		*last_ref = 1;
 	} else {
 		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
@@ -2020,7 +2019,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 		update_inline_extent_backref(fs_info, path, iref,
 					     -refs_to_drop, NULL, last_ref);
 	} else if (is_data) {
-		ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
+		ret = remove_extent_data_ref(trans, path, refs_to_drop,
 					     last_ref);
 	} else {
 		*last_ref = 1;
-- 
cgit v1.2.3


From b167fa9152015c91fef9489d059e9dd80d1032fd Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:47 +0300
Subject: btrfs: Remove fs_info from fixup_low_keys

This argument is unused. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4bc326df472e..18fd80e2f278 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -3128,8 +3128,7 @@ again:
  * higher levels
  *
  */
-static void fixup_low_keys(struct btrfs_fs_info *fs_info,
-			   struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_path *path,
 			   struct btrfs_disk_key *key, int level)
 {
 	int i;
@@ -3181,7 +3180,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
 	btrfs_set_item_key(eb, &disk_key, slot);
 	btrfs_mark_buffer_dirty(eb);
 	if (slot == 0)
-		fixup_low_keys(fs_info, path, &disk_key, 1);
+		fixup_low_keys(path, &disk_key, 1);
 }
 
 /*
@@ -3945,7 +3944,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
 		clean_tree_block(fs_info, right);
 
 	btrfs_item_key(right, &disk_key, 0);
-	fixup_low_keys(fs_info, path, &disk_key, 1);
+	fixup_low_keys(path, &disk_key, 1);
 
 	/* then fixup the leaf pointer in the path */
 	if (path->slots[0] < push_items) {
@@ -4320,7 +4319,7 @@ again:
 			path->nodes[0] = right;
 			path->slots[0] = 0;
 			if (path->slots[1] == 0)
-				fixup_low_keys(fs_info, path, &disk_key, 1);
+				fixup_low_keys(path, &disk_key, 1);
 		}
 		/*
 		 * We create a new leaf 'right' for the required ins_len and
@@ -4642,7 +4641,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
 		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
 		btrfs_set_item_key(leaf, &disk_key, slot);
 		if (slot == 0)
-			fixup_low_keys(fs_info, path, &disk_key, 1);
+			fixup_low_keys(path, &disk_key, 1);
 	}
 
 	item = btrfs_item_nr(slot);
@@ -4744,7 +4743,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
 
 	if (path->slots[0] == 0) {
 		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
-		fixup_low_keys(fs_info, path, &disk_key, 1);
+		fixup_low_keys(path, &disk_key, 1);
 	}
 	btrfs_unlock_up_safe(path, 1);
 
@@ -4886,7 +4885,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
 		    int level, int slot)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_buffer *parent = path->nodes[level];
 	u32 nritems;
 	int ret;
@@ -4919,7 +4917,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
 		struct btrfs_disk_key disk_key;
 
 		btrfs_node_key(parent, &disk_key, 0);
-		fixup_low_keys(fs_info, path, &disk_key, level + 1);
+		fixup_low_keys(path, &disk_key, level + 1);
 	}
 	btrfs_mark_buffer_dirty(parent);
 }
@@ -5022,7 +5020,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 			struct btrfs_disk_key disk_key;
 
 			btrfs_item_key(leaf, &disk_key, 0);
-			fixup_low_keys(fs_info, path, &disk_key, 1);
+			fixup_low_keys(path, &disk_key, 1);
 		}
 
 		/* delete the leaf if it is mostly empty */
-- 
cgit v1.2.3


From 867cc1fbebea7029ee5f95ea7e54f927ef7972b6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:48 +0300
Subject: btrfs: Remove fs_info from lookup_inline_extent_backref

This function is always called with a valid transaction handle from
where the fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 64346986f6f0..99e6a70b8fe5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1596,13 +1596,13 @@ static int find_next_key(struct btrfs_path *path, int level,
  */
 static noinline_for_stack
 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_path *path,
 				 struct btrfs_extent_inline_ref **ref_ret,
 				 u64 bytenr, u64 num_bytes,
 				 u64 parent, u64 root_objectid,
 				 u64 owner, u64 offset, int insert)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = fs_info->extent_root;
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
@@ -1868,9 +1868,9 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
 {
 	int ret;
 
-	ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
-					   bytenr, num_bytes, parent,
-					   root_objectid, owner, offset, 0);
+	ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
+					   num_bytes, parent, root_objectid,
+					   owner, offset, 0);
 	if (ret != -ENOENT)
 		return ret;
 
@@ -1972,9 +1972,9 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 	struct btrfs_extent_inline_ref *iref;
 	int ret;
 
-	ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
-					   bytenr, num_bytes, parent,
-					   root_objectid, owner, offset, 1);
+	ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
+					   num_bytes, parent, root_objectid,
+					   owner, offset, 1);
 	if (ret == 0) {
 		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
 		update_inline_extent_backref(fs_info, path, iref,
-- 
cgit v1.2.3


From 61a18f1c66636c3226598719935eca06e81976ca Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:49 +0300
Subject: btrfs: Remove fs_info argument from update_inline_extent_backref

This function always uses the leaf's extent_buffer which already
contains a reference to the fs_info. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 99e6a70b8fe5..102911286497 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1892,14 +1892,14 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
  * helper to update/remove inline back ref
  */
 static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
-				  struct btrfs_path *path,
+void update_inline_extent_backref(struct btrfs_path *path,
 				  struct btrfs_extent_inline_ref *iref,
 				  int refs_to_mod,
 				  struct btrfs_delayed_extent_op *extent_op,
 				  int *last_ref)
 {
-	struct extent_buffer *leaf;
+	struct extent_buffer *leaf = path->nodes[0];
+	struct btrfs_fs_info *fs_info = leaf->fs_info;
 	struct btrfs_extent_item *ei;
 	struct btrfs_extent_data_ref *dref = NULL;
 	struct btrfs_shared_data_ref *sref = NULL;
@@ -1910,7 +1910,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
 	int type;
 	u64 refs;
 
-	leaf = path->nodes[0];
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	refs = btrfs_extent_refs(leaf, ei);
 	WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
@@ -1977,8 +1976,8 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 					   owner, offset, 1);
 	if (ret == 0) {
 		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
-		update_inline_extent_backref(fs_info, path, iref,
-					     refs_to_add, extent_op, NULL);
+		update_inline_extent_backref(path, iref, refs_to_add,
+					     extent_op, NULL);
 	} else if (ret == -ENOENT) {
 		setup_inline_extent_backref(fs_info, path, iref, parent,
 					    root_objectid, owner, offset,
@@ -2016,8 +2015,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 
 	BUG_ON(!is_data && refs_to_drop != 1);
 	if (iref) {
-		update_inline_extent_backref(fs_info, path, iref,
-					     -refs_to_drop, NULL, last_ref);
+		update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
+					     last_ref);
 	} else if (is_data) {
 		ret = remove_extent_data_ref(trans, path, refs_to_drop,
 					     last_ref);
-- 
cgit v1.2.3


From b8582eeabb727f24ad0f128141558e4eb0928aec Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:50 +0300
Subject: btrfs: Remove fs_info argument from lookup_tree_block_ref

This function is always called with a valid transaction handle from
where the fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 102911286497..85e1b54bc2ea 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1487,12 +1487,11 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
 }
 
 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
-					  struct btrfs_fs_info *fs_info,
 					  struct btrfs_path *path,
 					  u64 bytenr, u64 parent,
 					  u64 root_objectid)
 {
-	struct btrfs_root *root = fs_info->extent_root;
+	struct btrfs_root *root = trans->fs_info->extent_root;
 	struct btrfs_key key;
 	int ret;
 
@@ -1878,8 +1877,8 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
 	*ref_ret = NULL;
 
 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-		ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
-					    parent, root_objectid);
+		ret = lookup_tree_block_ref(trans, path, bytenr, parent,
+					    root_objectid);
 	} else {
 		ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
 					     parent, root_objectid, owner,
-- 
cgit v1.2.3


From bd1d53ef358d584a0aa9dbd304c807fcc5e3dd5f Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:51 +0300
Subject: btrfs: Remove fs_info argument from lookup_extent_data_ref

This function is always called with a valid transaction handle from
where fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 85e1b54bc2ea..2fc879a0a2b7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1216,13 +1216,12 @@ static int match_extent_data_ref(struct extent_buffer *leaf,
 }
 
 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
-					   struct btrfs_fs_info *fs_info,
 					   struct btrfs_path *path,
 					   u64 bytenr, u64 parent,
 					   u64 root_objectid,
 					   u64 owner, u64 offset)
 {
-	struct btrfs_root *root = fs_info->extent_root;
+	struct btrfs_root *root = trans->fs_info->extent_root;
 	struct btrfs_key key;
 	struct btrfs_extent_data_ref *ref;
 	struct extent_buffer *leaf;
@@ -1880,9 +1879,8 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
 		ret = lookup_tree_block_ref(trans, path, bytenr, parent,
 					    root_objectid);
 	} else {
-		ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
-					     parent, root_objectid, owner,
-					     offset);
+		ret = lookup_extent_data_ref(trans, path, bytenr, parent,
+					     root_objectid, owner, offset);
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From fbe4801b26c3ca4192e679f04c0cf0dda32a6cda Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:52 +0300
Subject: btrfs: Remove fs_info from lookup_extent_backref

This argument is unused. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2fc879a0a2b7..19239ad3c6d1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1858,7 +1858,6 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
 }
 
 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_path *path,
 				 struct btrfs_extent_inline_ref **ref_ret,
 				 u64 bytenr, u64 num_bytes, u64 parent,
@@ -6878,9 +6877,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	if (is_data)
 		skinny_metadata = false;
 
-	ret = lookup_extent_backref(trans, info, path, &iref,
-				    bytenr, num_bytes, parent,
-				    root_objectid, owner_objectid,
+	ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
+				    parent, root_objectid, owner_objectid,
 				    owner_offset);
 	if (ret == 0) {
 		extent_slot = path->slots[0];
-- 
cgit v1.2.3


From 44e1c47d5c3f31a9f5c883834eb9e29d0b165ea8 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:53 +0300
Subject: btrfs: Remove fs_info from btrfs_add_delayed_tree_ref

This function is always called with a valid transaction handle from
where fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-ref.c | 4 ++--
 fs/btrfs/delayed-ref.h | 3 +--
 fs/btrfs/extent-tree.c | 8 ++++----
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 03dec673d12a..82ac1273c65f 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -709,13 +709,13 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
  * to make sure the delayed ref is eventually processed before this
  * transaction commits.
  */
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
-			       struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes, u64 parent,
 			       u64 ref_root,  int level, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
 			       int *old_ref_mod, int *new_ref_mod)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_delayed_tree_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index ea1aecb6a50d..31729302c827 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -234,8 +234,7 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
 		kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
 }
 
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
-			       struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes, u64 parent,
 			       u64 ref_root, int level, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 19239ad3c6d1..6b74ffc47ae4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2176,7 +2176,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			   owner, offset, BTRFS_ADD_DELAYED_REF);
 
 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+		ret = btrfs_add_delayed_tree_ref(trans, bytenr,
 						 num_bytes, parent,
 						 root_objectid, (int)owner,
 						 BTRFS_ADD_DELAYED_REF, NULL,
@@ -7196,7 +7196,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 				   root->root_key.objectid,
 				   btrfs_header_level(buf), 0,
 				   BTRFS_DROP_DELAYED_REF);
-		ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
+		ret = btrfs_add_delayed_tree_ref(trans, buf->start,
 						 buf->len, parent,
 						 root->root_key.objectid,
 						 btrfs_header_level(buf),
@@ -7275,7 +7275,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		old_ref_mod = new_ref_mod = 0;
 		ret = 0;
 	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+		ret = btrfs_add_delayed_tree_ref(trans, bytenr,
 						 num_bytes, parent,
 						 root_objectid, (int)owner,
 						 BTRFS_DROP_DELAYED_REF, NULL,
@@ -8491,7 +8491,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 		btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
 				   root_objectid, level, 0,
 				   BTRFS_ADD_DELAYED_EXTENT);
-		ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
+		ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
 						 ins.offset, parent,
 						 root_objectid, level,
 						 BTRFS_ADD_DELAYED_EXTENT,
-- 
cgit v1.2.3


From 88a979c615d0d9da19498b3b7692e725fb2f387e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:54 +0300
Subject: btrfs: Remove fs_info from btrfs_add_delayed_data_ref

This function is always called with a valid transaction handle from
where fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-ref.c | 4 ++--
 fs/btrfs/delayed-ref.h | 3 +--
 fs/btrfs/extent-tree.c | 7 +++----
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 82ac1273c65f..6eb00eb65d76 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -791,13 +791,13 @@ free_ref:
 /*
  * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
  */
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
-			       struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes,
 			       u64 parent, u64 ref_root,
 			       u64 owner, u64 offset, u64 reserved, int action,
 			       int *old_ref_mod, int *new_ref_mod)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_delayed_data_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 31729302c827..d9f2a4ebd5db 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -239,8 +239,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 			       u64 ref_root, int level, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
 			       int *old_ref_mod, int *new_ref_mod);
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
-			       struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes,
 			       u64 parent, u64 ref_root,
 			       u64 owner, u64 offset, u64 reserved, int action,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6b74ffc47ae4..10586687753c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2182,7 +2182,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 						 BTRFS_ADD_DELAYED_REF, NULL,
 						 &old_ref_mod, &new_ref_mod);
 	} else {
-		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+		ret = btrfs_add_delayed_data_ref(trans, bytenr,
 						 num_bytes, parent,
 						 root_objectid, owner, offset,
 						 0, BTRFS_ADD_DELAYED_REF,
@@ -7281,7 +7281,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
 						 BTRFS_DROP_DELAYED_REF, NULL,
 						 &old_ref_mod, &new_ref_mod);
 	} else {
-		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+		ret = btrfs_add_delayed_data_ref(trans, bytenr,
 						 num_bytes, parent,
 						 root_objectid, owner, offset,
 						 0, BTRFS_DROP_DELAYED_REF,
@@ -8255,7 +8255,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 				     u64 offset, u64 ram_bytes,
 				     struct btrfs_key *ins)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	int ret;
 
 	BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
@@ -8264,7 +8263,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 			   root->root_key.objectid, owner, offset,
 			   BTRFS_ADD_DELAYED_EXTENT);
 
-	ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
+	ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
 					 ins->offset, 0,
 					 root->root_key.objectid, owner,
 					 offset, ram_bytes,
-- 
cgit v1.2.3


From e7e02096d98388a003323c4223630d011ba1b382 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:55 +0300
Subject: btrfs: Remove fs_info from btrfs_make_block_group

This function is always called with a valid transaction handle from
where we can reference the fs_info. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 4 ++--
 fs/btrfs/extent-tree.c | 4 ++--
 fs/btrfs/volumes.c     | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e671a1fcbbec..b77f9ac699c1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2716,8 +2716,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_fs_info *info);
 int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
-			   struct btrfs_fs_info *fs_info, u64 bytes_used,
-			   u64 type, u64 chunk_offset, u64 size);
+			   u64 bytes_used, u64 type, u64 chunk_offset,
+			   u64 size);
 void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 				struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 10586687753c..fa78a0175f80 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10241,10 +10241,10 @@ next:
 	trans->can_flush_pending_bgs = can_flush_pending_bgs;
 }
 
-int btrfs_make_block_group(struct btrfs_trans_handle *trans,
-			   struct btrfs_fs_info *fs_info, u64 bytes_used,
+int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
 			   u64 type, u64 chunk_offset, u64 size)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_block_group_cache *cache;
 	int ret;
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1da162928d1a..e806669dc43a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4900,7 +4900,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	refcount_inc(&em->refs);
 	write_unlock(&em_tree->lock);
 
-	ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
+	ret = btrfs_make_block_group(trans, 0, type, start, num_bytes);
 	if (ret)
 		goto error_del_extent;
 
@@ -5175,7 +5175,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 		/*
 		 * There could be two corrupted data stripes, we need
 		 * to loop retry in order to rebuild the correct data.
-		 * 
+		 *
 		 * Fail a stripe at a time on every retry except the
 		 * stripe under reconstruction.
 		 */
-- 
cgit v1.2.3


From 5a98ec0141805a0ff9adb18fd18834a906637f2f Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:56 +0300
Subject: btrfs: Remove fs_info from btrfs_remove_block_group

This function is always called with a valid transaction handle from
where we can reference fs_info. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 3 +--
 fs/btrfs/extent-tree.c | 4 ++--
 fs/btrfs/volumes.c     | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b77f9ac699c1..7df6739e8eca 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2723,8 +2723,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 				struct btrfs_fs_info *fs_info,
 				const u64 chunk_offset);
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info, u64 group_start,
-			     struct extent_map *em);
+			     u64 group_start, struct extent_map *em);
 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fa78a0175f80..cb1716caf11b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10334,9 +10334,9 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 }
 
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info, u64 group_start,
-			     struct extent_map *em)
+			     u64 group_start, struct extent_map *em)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = fs_info->extent_root;
 	struct btrfs_path *path;
 	struct btrfs_block_group_cache *block_group;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e806669dc43a..ff7c8cdacd85 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2885,7 +2885,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 		}
 	}
 
-	ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em);
+	ret = btrfs_remove_block_group(trans, chunk_offset, em);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto out;
-- 
cgit v1.2.3


From e72cb9235d26e5b7360a38a702e644a24d8975e6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:57 +0300
Subject: btrfs: Remove fs_info from __btrfs_free_extent

This function is always called with a valid transaction handle so we
can reference the fs_info from there. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cb1716caf11b..67a3aa8bc283 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -52,11 +52,10 @@ enum {
 };
 
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info,
-				struct btrfs_delayed_ref_node *node, u64 parent,
-				u64 root_objectid, u64 owner_objectid,
-				u64 owner_offset, int refs_to_drop,
-				struct btrfs_delayed_extent_op *extra_op);
+			       struct btrfs_delayed_ref_node *node, u64 parent,
+			       u64 root_objectid, u64 owner_objectid,
+			       u64 owner_offset, int refs_to_drop,
+			       struct btrfs_delayed_extent_op *extra_op);
 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
 				    struct extent_buffer *leaf,
 				    struct btrfs_extent_item *ei);
@@ -2327,7 +2326,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
 					     ref->offset, node->ref_mod,
 					     extent_op);
 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
-		ret = __btrfs_free_extent(trans, fs_info, node, parent,
+		ret = __btrfs_free_extent(trans, node, parent,
 					  ref_root, ref->objectid,
 					  ref->offset, node->ref_mod,
 					  extent_op);
@@ -2480,8 +2479,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 					     ref->level, 0, 1,
 					     extent_op);
 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
-		ret = __btrfs_free_extent(trans, fs_info, node,
-					  parent, ref_root,
+		ret = __btrfs_free_extent(trans, node, parent, ref_root,
 					  ref->level, 0, 1, extent_op);
 	} else {
 		BUG();
@@ -6840,12 +6838,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 }
 
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *info,
-				struct btrfs_delayed_ref_node *node, u64 parent,
-				u64 root_objectid, u64 owner_objectid,
-				u64 owner_offset, int refs_to_drop,
-				struct btrfs_delayed_extent_op *extent_op)
+			       struct btrfs_delayed_ref_node *node, u64 parent,
+			       u64 root_objectid, u64 owner_objectid,
+			       u64 owner_offset, int refs_to_drop,
+			       struct btrfs_delayed_extent_op *extent_op)
 {
+	struct btrfs_fs_info *info = trans->fs_info;
 	struct btrfs_key key;
 	struct btrfs_path *path;
 	struct btrfs_root *extent_root = info->extent_root;
-- 
cgit v1.2.3


From ef89b8245bc77a29f31cad2dec5be5f50d3a7c2b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:58 +0300
Subject: btrfs: Remove fs_info from alloc_reserved_file_extent

fs_info can be referenced from the transaction handle, which is always
valid. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 67a3aa8bc283..f9f5c8f06021 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,7 +60,6 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
 				    struct extent_buffer *leaf,
 				    struct btrfs_extent_item *ei);
 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
-				      struct btrfs_fs_info *fs_info,
 				      u64 parent, u64 root_objectid,
 				      u64 flags, u64 owner, u64 offset,
 				      struct btrfs_key *ins, int ref_mod);
@@ -2316,10 +2315,10 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
 		if (extent_op)
 			flags |= extent_op->flags_to_set;
-		ret = alloc_reserved_file_extent(trans, fs_info,
-						 parent, ref_root, flags,
-						 ref->objectid, ref->offset,
-						 &ins, node->ref_mod);
+		ret = alloc_reserved_file_extent(trans, parent, ref_root,
+						 flags, ref->objectid,
+						 ref->offset, &ins,
+						 node->ref_mod);
 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
 		ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent,
 					     ref_root, ref->objectid,
@@ -8075,11 +8074,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
 }
 
 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
-				      struct btrfs_fs_info *fs_info,
 				      u64 parent, u64 root_objectid,
 				      u64 flags, u64 owner, u64 offset,
 				      struct btrfs_key *ins, int ref_mod)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret;
 	struct btrfs_extent_item *extent_item;
 	struct btrfs_extent_inline_ref *iref;
@@ -8306,8 +8305,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 	spin_unlock(&block_group->lock);
 	spin_unlock(&space_info->lock);
 
-	ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid,
-					 0, owner, offset, ins, 1);
+	ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
+					 offset, ins, 1);
 	btrfs_put_block_group(block_group);
 	return ret;
 }
-- 
cgit v1.2.3


From 2590d0f155c7c133b0c05048526ccc0a8dd5e9e4 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:48:59 +0300
Subject: btrfs: Remove fs_info argument from __btrfs_inc_extent_ref

This function already takes a transaction which holds a reference to
the fs_info struct. Use that reference and remove the extra arg. No
functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f9f5c8f06021..15fd8223344c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2231,12 +2231,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
  *
  */
 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_delayed_ref_node *node,
 				  u64 parent, u64 root_objectid,
 				  u64 owner, u64 offset, int refs_to_add,
 				  struct btrfs_delayed_extent_op *extent_op)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_extent_item *item;
@@ -2320,10 +2320,9 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
 						 ref->offset, &ins,
 						 node->ref_mod);
 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
-		ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent,
-					     ref_root, ref->objectid,
-					     ref->offset, node->ref_mod,
-					     extent_op);
+		ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
+					     ref->objectid, ref->offset,
+					     node->ref_mod, extent_op);
 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
 		ret = __btrfs_free_extent(trans, node, parent,
 					  ref_root, ref->objectid,
@@ -2473,10 +2472,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 		BUG_ON(!extent_op || !extent_op->update_flags);
 		ret = alloc_reserved_tree_block(trans, node, extent_op);
 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
-		ret = __btrfs_inc_extent_ref(trans, fs_info, node,
-					     parent, ref_root,
-					     ref->level, 0, 1,
-					     extent_op);
+		ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
+					     ref->level, 0, 1, extent_op);
 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
 		ret = __btrfs_free_extent(trans, node, parent, ref_root,
 					  ref->level, 0, 1, extent_op);
-- 
cgit v1.2.3


From 2bf98ef35f7fd4b5222b0d82e3abd3a27eb49a50 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:00 +0300
Subject: btrfs: Remove fs_info from run_delayed_data_ref

This function is always called with a valid transaction from where
fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 15fd8223344c..dbc936ff52a4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2289,7 +2289,6 @@ out:
 }
 
 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info,
 				struct btrfs_delayed_ref_node *node,
 				struct btrfs_delayed_extent_op *extent_op,
 				int insert_reserved)
@@ -2306,7 +2305,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
 	ins.type = BTRFS_EXTENT_ITEM_KEY;
 
 	ref = btrfs_delayed_node_to_data_ref(node);
-	trace_run_delayed_data_ref(fs_info, node, ref, node->action);
+	trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
 
 	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
 		parent = ref->parent;
@@ -2505,7 +2504,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
 					   insert_reserved);
 	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
 		 node->type == BTRFS_SHARED_DATA_REF_KEY)
-		ret = run_delayed_data_ref(trans, fs_info, node, extent_op,
+		ret = run_delayed_data_ref(trans, node, extent_op,
 					   insert_reserved);
 	else
 		BUG();
-- 
cgit v1.2.3


From 20b9a2d670fa9f06d9f682cc414f3d1cb0adf440 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:01 +0300
Subject: btrfs: Remove fs_info from run_delayed_extent_op

This function is always called with a valid transaction handle so
fs_info can be referenced from there. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dbc936ff52a4..1c6c9ed1f7d2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2352,10 +2352,10 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
 }
 
 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_delayed_ref_head *head,
 				 struct btrfs_delayed_extent_op *extent_op)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_key key;
 	struct btrfs_path *path;
 	struct btrfs_extent_item *ei;
@@ -2560,7 +2560,7 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans,
 		return 0;
 	}
 	spin_unlock(&head->lock);
-	ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
+	ret = run_delayed_extent_op(trans, head, extent_op);
 	btrfs_free_delayed_extent_op(extent_op);
 	return ret ? ret : 1;
 }
-- 
cgit v1.2.3


From c4d56d4a1688e9cb926fd4abb428e5a9c1f963b9 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:02 +0300
Subject: btrfs: Remove unused fs_info from cleanup_extent_op

The argument is no longer used so remove it.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1c6c9ed1f7d2..25a6202be4e3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2546,7 +2546,6 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref
 }
 
 static int cleanup_extent_op(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info,
 			     struct btrfs_delayed_ref_head *head)
 {
 	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
@@ -2574,7 +2573,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
 
 	delayed_refs = &trans->transaction->delayed_refs;
 
-	ret = cleanup_extent_op(trans, fs_info, head);
+	ret = cleanup_extent_op(trans, head);
 	if (ret < 0) {
 		unselect_delayed_ref_head(delayed_refs, head);
 		btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
-- 
cgit v1.2.3


From f9871eddd9cb1acff770e03344a4cf12c343546f Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:03 +0300
Subject: btrfs: Remove fs_info from cleanup_ref_head

fs_info can be refenreced from the transaction handle, since it's always
valid. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 25a6202be4e3..9f630298ea11 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2565,9 +2565,10 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans,
 }
 
 static int cleanup_ref_head(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info,
 			    struct btrfs_delayed_ref_head *head)
 {
+
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	int ret;
 
@@ -2722,7 +2723,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 		 * up and move on to the next ref_head.
 		 */
 		if (!ref) {
-			ret = cleanup_ref_head(trans, fs_info, locked_ref);
+			ret = cleanup_ref_head(trans, locked_ref);
 			if (ret > 0 ) {
 				/* We dropped our lock, we need to loop. */
 				ret = 0;
-- 
cgit v1.2.3


From f97806f2eefdd762304e078fdbb2f8b994942ea6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:04 +0300
Subject: btrfs: Remove fs_info from run_delayed_tree_ref

It can always be referneced from the passed transaction handle since
it's always valid. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9f630298ea11..c7a66dd434fc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2443,7 +2443,6 @@ out:
 }
 
 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info,
 				struct btrfs_delayed_ref_node *node,
 				struct btrfs_delayed_extent_op *extent_op,
 				int insert_reserved)
@@ -2454,14 +2453,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 	u64 ref_root = 0;
 
 	ref = btrfs_delayed_node_to_tree_ref(node);
-	trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
+	trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
 
 	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
 		parent = ref->parent;
 	ref_root = ref->root;
 
 	if (node->ref_mod != 1) {
-		btrfs_err(fs_info,
+		btrfs_err(trans->fs_info,
 	"btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
 			  node->bytenr, node->ref_mod, node->action, ref_root,
 			  parent);
@@ -2500,7 +2499,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
 
 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
-		ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
+		ret = run_delayed_tree_ref(trans, node, extent_op,
 					   insert_reserved);
 	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
 		 node->type == BTRFS_SHARED_DATA_REF_KEY)
-- 
cgit v1.2.3


From 01458828bb420d6a8f9aa09af2376caab71660b3 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:05 +0300
Subject: btrfs: Remove fs_info from do_chunk_alloc

This function is always called with a valid transaction handle from
where fs_info can be referenced. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c7a66dd434fc..556b93c05ee3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -66,8 +66,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 				     struct btrfs_delayed_ref_node *node,
 				     struct btrfs_delayed_extent_op *extent_op);
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
-			  struct btrfs_fs_info *fs_info, u64 flags,
+static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 			  int force);
 static int find_next_key(struct btrfs_path *path, int level,
 			 struct btrfs_key *key);
@@ -4306,7 +4305,7 @@ again:
 			if (IS_ERR(trans))
 				return PTR_ERR(trans);
 
-			ret = do_chunk_alloc(trans, fs_info, alloc_target,
+			ret = do_chunk_alloc(trans, alloc_target,
 					     CHUNK_ALLOC_NO_FORCE);
 			btrfs_end_transaction(trans);
 			if (ret < 0) {
@@ -4590,9 +4589,10 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
  *    - return 1 if it successfully allocates a chunk,
  *    - return errors including -ENOSPC otherwise.
  */
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
-			  struct btrfs_fs_info *fs_info, u64 flags, int force)
+static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+			  int force)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_space_info *space_info;
 	int wait_for_alloc = 0;
 	int ret = 0;
@@ -5001,7 +5001,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 			ret = PTR_ERR(trans);
 			break;
 		}
-		ret = do_chunk_alloc(trans, fs_info,
+		ret = do_chunk_alloc(trans,
 				     btrfs_metadata_alloc_profile(fs_info),
 				     CHUNK_ALLOC_NO_FORCE);
 		btrfs_end_transaction(trans);
@@ -7842,8 +7842,7 @@ loop:
 				goto out;
 			}
 
-			ret = do_chunk_alloc(trans, fs_info, flags,
-					     CHUNK_ALLOC_FORCE);
+			ret = do_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
 
 			/*
 			 * If we can't allocate a new chunk we've already looped
@@ -9469,7 +9468,7 @@ again:
 	 */
 	alloc_flags = update_block_group_flags(fs_info, cache->flags);
 	if (alloc_flags != cache->flags) {
-		ret = do_chunk_alloc(trans, fs_info, alloc_flags,
+		ret = do_chunk_alloc(trans, alloc_flags,
 				     CHUNK_ALLOC_FORCE);
 		/*
 		 * ENOSPC is allowed here, we may have enough space
@@ -9486,8 +9485,7 @@ again:
 	if (!ret)
 		goto out;
 	alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
-	ret = do_chunk_alloc(trans, fs_info, alloc_flags,
-			     CHUNK_ALLOC_FORCE);
+	ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
 	if (ret < 0)
 		goto out;
 	ret = inc_block_group_ro(cache, 0);
@@ -9509,7 +9507,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 {
 	u64 alloc_flags = get_alloc_profile(fs_info, type);
 
-	return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE);
+	return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
 }
 
 /*
-- 
cgit v1.2.3


From c216b2039aa06f9be23a9f385cb2d2f6434927d7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:06 +0300
Subject: btrfs: Remove fs_info from btrfs_alloc_chunk

It can be referenced from trans since the function is always called
within a transaction.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 4 ++--
 fs/btrfs/volumes.c     | 7 +++----
 fs/btrfs/volumes.h     | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 556b93c05ee3..c0928fcb3f32 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4568,7 +4568,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
 		 * the paths we visit in the chunk tree (they were already COWed
 		 * or created in the current transaction for example).
 		 */
-		ret = btrfs_alloc_chunk(trans, fs_info, flags);
+		ret = btrfs_alloc_chunk(trans, flags);
 	}
 
 	if (!ret) {
@@ -4670,7 +4670,7 @@ again:
 	 */
 	check_system_chunk(trans, fs_info, flags);
 
-	ret = btrfs_alloc_chunk(trans, fs_info, flags);
+	ret = btrfs_alloc_chunk(trans, flags);
 	trans->allocating_chunk = false;
 
 	spin_lock(&space_info->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ff7c8cdacd85..4a7423294270 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5038,13 +5038,12 @@ out:
  * require modifying the chunk tree. This division is important for the
  * bootstrap process of adding storage to a seed btrfs.
  */
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-		      struct btrfs_fs_info *fs_info, u64 type)
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
 {
 	u64 chunk_offset;
 
-	lockdep_assert_held(&fs_info->chunk_mutex);
-	chunk_offset = find_next_chunk(fs_info);
+	lockdep_assert_held(&trans->fs_info->chunk_mutex);
+	chunk_offset = find_next_chunk(trans->fs_info);
 	return __btrfs_alloc_chunk(trans, chunk_offset, type);
 }
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5139ec8daf4c..df2d8bdf8c9a 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -396,8 +396,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
 		     u64 physical, u64 **logical, int *naddrs, int *stripe_len);
 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-		      struct btrfs_fs_info *fs_info, u64 type);
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
 void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
-- 
cgit v1.2.3


From 451a2c130342125ca44dbbf3b62521c3f0041cfb Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:07 +0300
Subject: btrfs: Remove fs_info from check_system_chunk

It can be referenced from trans since the function is always called
within a transaction.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 3 +--
 fs/btrfs/extent-tree.c | 8 ++++----
 fs/btrfs/volumes.c     | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7df6739e8eca..db97e34aa113 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2821,8 +2821,7 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
 void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
 void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
-void check_system_chunk(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, const u64 type);
+void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
 		       u64 start, u64 end);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c0928fcb3f32..ca88a86b1c73 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4527,9 +4527,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
  * for allocating a chunk, otherwise if it's false, reserve space necessary for
  * removing a chunk.
  */
-void check_system_chunk(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u64 type)
+void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_space_info *info;
 	u64 left;
 	u64 thresh;
@@ -4668,7 +4668,7 @@ again:
 	 * Check if we have enough space in SYSTEM chunk because we may need
 	 * to update devices.
 	 */
-	check_system_chunk(trans, fs_info, flags);
+	check_system_chunk(trans, flags);
 
 	ret = btrfs_alloc_chunk(trans, flags);
 	trans->allocating_chunk = false;
@@ -9493,7 +9493,7 @@ out:
 	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
 		alloc_flags = update_block_group_flags(fs_info, cache->flags);
 		mutex_lock(&fs_info->chunk_mutex);
-		check_system_chunk(trans, fs_info, alloc_flags);
+		check_system_chunk(trans, alloc_flags);
 		mutex_unlock(&fs_info->chunk_mutex);
 	}
 	mutex_unlock(&fs_info->ro_block_group_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4a7423294270..9795d5079907 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2829,7 +2829,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 	}
 	map = em->map_lookup;
 	mutex_lock(&fs_info->chunk_mutex);
-	check_system_chunk(trans, fs_info, map->type);
+	check_system_chunk(trans, map->type);
 	mutex_unlock(&fs_info->chunk_mutex);
 
 	/*
-- 
cgit v1.2.3


From 9e715da86001cdbb2d802f1ac8eb5e12c6eb0d08 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:08 +0300
Subject: btrfs: Remove fs_info from free_excluded_extents

It can be referenced from the passed block group.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ca88a86b1c73..86c57b70ed28 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -217,9 +217,9 @@ static int add_excluded_extent(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-static void free_excluded_extents(struct btrfs_fs_info *fs_info,
-				  struct btrfs_block_group_cache *cache)
+static void free_excluded_extents(struct btrfs_block_group_cache *cache)
 {
+	struct btrfs_fs_info *fs_info = cache->fs_info;
 	u64 start, end;
 
 	start = cache->key.objectid;
@@ -555,7 +555,7 @@ static noinline void caching_thread(struct btrfs_work *work)
 	caching_ctl->progress = (u64)-1;
 
 	up_read(&fs_info->commit_root_sem);
-	free_excluded_extents(fs_info, block_group);
+	free_excluded_extents(block_group);
 	mutex_unlock(&caching_ctl->mutex);
 
 	wake_up(&caching_ctl->wait);
@@ -663,7 +663,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 		wake_up(&caching_ctl->wait);
 		if (ret == 1) {
 			put_caching_control(caching_ctl);
-			free_excluded_extents(fs_info, cache);
+			free_excluded_extents(cache);
 			return 0;
 		}
 	} else {
@@ -9860,7 +9860,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 		 */
 		if (block_group->cached == BTRFS_CACHE_NO ||
 		    block_group->cached == BTRFS_CACHE_ERROR)
-			free_excluded_extents(info, block_group);
+			free_excluded_extents(block_group);
 
 		btrfs_remove_free_space_cache(block_group);
 		ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
@@ -10108,7 +10108,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 			 * We may have excluded something, so call this just in
 			 * case.
 			 */
-			free_excluded_extents(info, cache);
+			free_excluded_extents(cache);
 			btrfs_put_block_group(cache);
 			goto error;
 		}
@@ -10123,14 +10123,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 		if (found_key.offset == btrfs_block_group_used(&cache->item)) {
 			cache->last_byte_to_unpin = (u64)-1;
 			cache->cached = BTRFS_CACHE_FINISHED;
-			free_excluded_extents(info, cache);
+			free_excluded_extents(cache);
 		} else if (btrfs_block_group_used(&cache->item) == 0) {
 			cache->last_byte_to_unpin = (u64)-1;
 			cache->cached = BTRFS_CACHE_FINISHED;
 			add_new_free_space(cache, found_key.objectid,
 					   found_key.objectid +
 					   found_key.offset);
-			free_excluded_extents(info, cache);
+			free_excluded_extents(cache);
 		}
 
 		ret = btrfs_add_block_group_cache(info, cache);
@@ -10259,14 +10259,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
 		 * We may have excluded something, so call this just in
 		 * case.
 		 */
-		free_excluded_extents(fs_info, cache);
+		free_excluded_extents(cache);
 		btrfs_put_block_group(cache);
 		return ret;
 	}
 
 	add_new_free_space(cache, chunk_offset, chunk_offset + size);
 
-	free_excluded_extents(fs_info, cache);
+	free_excluded_extents(cache);
 
 #ifdef CONFIG_BTRFS_DEBUG
 	if (btrfs_should_fragment_free_space(cache)) {
@@ -10350,7 +10350,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	 * Free the reserved super bytes from this block group before
 	 * remove it.
 	 */
-	free_excluded_extents(fs_info, block_group);
+	free_excluded_extents(block_group);
 	btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
 				  block_group->key.offset);
 
-- 
cgit v1.2.3


From 3c4da6574e7749353be6d5e9f06d17388c241469 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:09 +0300
Subject: btrfs: Remove fs_info from exclude_super_stripes

It can be referenced from the passed block group.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 86c57b70ed28..18f51478242b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -231,9 +231,9 @@ static void free_excluded_extents(struct btrfs_block_group_cache *cache)
 			  start, end, EXTENT_UPTODATE);
 }
 
-static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
-				 struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
 {
+	struct btrfs_fs_info *fs_info = cache->fs_info;
 	u64 bytenr;
 	u64 *logical;
 	int stripe_len;
@@ -10102,7 +10102,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 		 * info has super bytes accounted for, otherwise we'll think
 		 * we have more space than we actually do.
 		 */
-		ret = exclude_super_stripes(info, cache);
+		ret = exclude_super_stripes(cache);
 		if (ret) {
 			/*
 			 * We may have excluded something, so call this just in
@@ -10253,7 +10253,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
 	cache->last_byte_to_unpin = (u64)-1;
 	cache->cached = BTRFS_CACHE_FINISHED;
 	cache->needs_free_space = 1;
-	ret = exclude_super_stripes(fs_info, cache);
+	ret = exclude_super_stripes(cache);
 	if (ret) {
 		/*
 		 * We may have excluded something, so call this just in
-- 
cgit v1.2.3


From a639cdeba3486883be9f016c96d03bbb31063f11 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:10 +0300
Subject: btrfs: Remove fs_info from insert_inline_extent_backref

It can be referenced from the passed transaction handle, since it's
always valid.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 18f51478242b..283a3e967184 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1954,7 +1954,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
 
 static noinline_for_stack
 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_path *path,
 				 u64 bytenr, u64 num_bytes, u64 parent,
 				 u64 root_objectid, u64 owner,
@@ -1972,7 +1971,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 		update_inline_extent_backref(path, iref, refs_to_add,
 					     extent_op, NULL);
 	} else if (ret == -ENOENT) {
-		setup_inline_extent_backref(fs_info, path, iref, parent,
+		setup_inline_extent_backref(trans->fs_info, path, iref, parent,
 					    root_objectid, owner, offset,
 					    refs_to_add, extent_op);
 		ret = 0;
@@ -2235,7 +2234,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 				  u64 owner, u64 offset, int refs_to_add,
 				  struct btrfs_delayed_extent_op *extent_op)
 {
-	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_extent_item *item;
@@ -2252,10 +2250,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 	path->reada = READA_FORWARD;
 	path->leave_spinning = 1;
 	/* this will setup the path even if it fails to insert the back ref */
-	ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
-					   num_bytes, parent, root_objectid,
-					   owner, offset,
-					   refs_to_add, extent_op);
+	ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
+					   parent, root_objectid, owner,
+					   offset, refs_to_add, extent_op);
 	if ((ret < 0 && ret != -EAGAIN) || !ret)
 		goto out;
 
-- 
cgit v1.2.3


From 5fac7f9ee142a3f498137669931265bc7207135d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:11 +0300
Subject: btrfs: Remove fs_info from run_one_delayed_ref

It can be referenced from the passed transaction handle, since it's
always valid.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 283a3e967184..781e8529e9a3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2479,7 +2479,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 
 /* helper function to actually process a single delayed ref entry */
 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info,
 			       struct btrfs_delayed_ref_node *node,
 			       struct btrfs_delayed_extent_op *extent_op,
 			       int insert_reserved)
@@ -2488,7 +2487,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
 
 	if (trans->aborted) {
 		if (insert_reserved)
-			btrfs_pin_extent(fs_info, node->bytenr,
+			btrfs_pin_extent(trans->fs_info, node->bytenr,
 					 node->num_bytes, 1);
 		return 0;
 	}
@@ -2765,7 +2764,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 		locked_ref->extent_op = NULL;
 		spin_unlock(&locked_ref->lock);
 
-		ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
+		ret = run_one_delayed_ref(trans, ref, extent_op,
 					  must_insert_reserved);
 
 		btrfs_free_delayed_extent_op(extent_op);
-- 
cgit v1.2.3


From 87cc7a8a2afbe806bb18ef440277c4f2e58224fa Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:12 +0300
Subject: btrfs: Remove fs_info from remove_extent_backref

It can be referenced directly from the transaction handle since it's
always valid.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 781e8529e9a3..004c4e0fce36 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1998,7 +1998,6 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
 }
 
 static int remove_extent_backref(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_path *path,
 				 struct btrfs_extent_inline_ref *iref,
 				 int refs_to_drop, int is_data, int *last_ref)
@@ -2014,7 +2013,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 					     last_ref);
 	} else {
 		*last_ref = 1;
-		ret = btrfs_del_item(trans, fs_info->extent_root, path);
+		ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
 	}
 	return ret;
 }
@@ -6896,7 +6895,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 #endif
 		if (!found_extent) {
 			BUG_ON(iref);
-			ret = remove_extent_backref(trans, info, path, NULL,
+			ret = remove_extent_backref(trans, path, NULL,
 						    refs_to_drop,
 						    is_data, &last_ref);
 			if (ret) {
@@ -7040,9 +7039,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 			btrfs_mark_buffer_dirty(leaf);
 		}
 		if (found_extent) {
-			ret = remove_extent_backref(trans, info, path,
-						    iref, refs_to_drop,
-						    is_data, &last_ref);
+			ret = remove_extent_backref(trans, path, iref,
+						    refs_to_drop, is_data,
+						    &last_ref);
 			if (ret) {
 				btrfs_abort_transaction(trans, ret);
 				goto out;
-- 
cgit v1.2.3


From 61da2abfcad9c7e1a9c2f74ae7af8637d9fba36e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:13 +0300
Subject: btrfs: Remove fs_info from btrfs_alloc_logged_file_extent

It can be referenced from trans since the function is always called
within a valid transaction.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 1 -
 fs/btrfs/extent-tree.c | 2 +-
 fs/btrfs/tree-log.c    | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index db97e34aa113..bc4bb275e339 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2676,7 +2676,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 				     u64 offset, u64 ram_bytes,
 				     struct btrfs_key *ins);
 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   u64 root_objectid, u64 owner, u64 offset,
 				   struct btrfs_key *ins);
 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 004c4e0fce36..c27215dc8b84 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8263,10 +8263,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
  * space cache bits as well
  */
 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   u64 root_objectid, u64 owner, u64 offset,
 				   struct btrfs_key *ins)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret;
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_space_info *space_info;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7f6aaeb4bd22..b3b1d424f2d8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -715,7 +715,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 				 * allocation tree
 				 */
 				ret = btrfs_alloc_logged_file_extent(trans,
-						fs_info,
 						root->root_key.objectid,
 						key->objectid, offset, &ins);
 				if (ret)
-- 
cgit v1.2.3


From c83488afc5a772e424d8f159236bcf805b3c249c Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:14 +0300
Subject: btrfs: Remove fs_info from btrfs_inc_block_group_ro

It can be referenced from the passed bg cache.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 3 +--
 fs/btrfs/extent-tree.c | 4 ++--
 fs/btrfs/relocation.c  | 2 +-
 fs/btrfs/scrub.c       | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bc4bb275e339..5bb083de8f2c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2801,8 +2801,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
 			     struct btrfs_block_rsv *block_rsv,
 			     u64 num_bytes);
-int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
-			     struct btrfs_block_group_cache *cache);
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
 void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c27215dc8b84..5e15b0c319f5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9426,10 +9426,10 @@ out:
 	return ret;
 }
 
-int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
-			     struct btrfs_block_group_cache *cache)
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
 
 {
+	struct btrfs_fs_info *fs_info = cache->fs_info;
 	struct btrfs_trans_handle *trans;
 	u64 alloc_flags;
 	int ret;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 879b76fa881a..ef1b5aad035e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4375,7 +4375,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 	rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
 	BUG_ON(!rc->block_group);
 
-	ret = btrfs_inc_block_group_ro(fs_info, rc->block_group);
+	ret = btrfs_inc_block_group_ro(rc->block_group);
 	if (ret) {
 		err = ret;
 		goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 5bce2330ec64..67bb003323e7 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3832,7 +3832,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 		 * -> btrfs_scrub_pause()
 		 */
 		scrub_pause_on(fs_info);
-		ret = btrfs_inc_block_group_ro(fs_info, cache);
+		ret = btrfs_inc_block_group_ro(cache);
 		if (!ret && is_dev_replace) {
 			/*
 			 * If we are doing a device replace wait for any tasks
-- 
cgit v1.2.3


From 43a7e99db6788110fb2bd97bdad5aa5c0c004aff Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 15:49:15 +0300
Subject: btrfs: Remove fs_info from btrfs_force_chunk_alloc

It can be referenced from the passed transaction handle.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 3 +--
 fs/btrfs/extent-tree.c | 5 ++---
 fs/btrfs/relocation.c  | 3 +--
 fs/btrfs/volumes.c     | 2 +-
 4 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5bb083de8f2c..12cb327cd16e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2809,8 +2809,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
 				   u64 start, u64 end);
 int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 			 u64 num_bytes, u64 *actual_bytes);
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info, u64 type);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
 int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
 
 int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5e15b0c319f5..472872a6cc27 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9497,10 +9497,9 @@ out:
 	return ret;
 }
 
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info, u64 type)
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
 {
-	u64 alloc_flags = get_alloc_profile(fs_info, type);
+	u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
 
 	return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ef1b5aad035e..22214033a4a2 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4169,8 +4169,7 @@ restart:
 		}
 	}
 	if (trans && progress && err == -ENOSPC) {
-		ret = btrfs_force_chunk_alloc(trans, fs_info,
-					      rc->block_group->flags);
+		ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
 		if (ret == 1) {
 			err = 0;
 			progress = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9795d5079907..fe497937933f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3059,7 +3059,7 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
 			if (IS_ERR(trans))
 				return PTR_ERR(trans);
 
-			ret = btrfs_force_chunk_alloc(trans, fs_info,
+			ret = btrfs_force_chunk_alloc(trans,
 						      BTRFS_BLOCK_GROUP_DATA);
 			btrfs_end_transaction(trans);
 			if (ret < 0)
-- 
cgit v1.2.3


From 9912bbf6440ba0555e91d3306520da01872c7c1d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 20 Jun 2018 15:38:58 +0800
Subject: btrfs: check-integrity: Fix NULL pointer dereference for degraded
 mount

Commit f8f84b2dfda5 ("btrfs: index check-integrity state hash by a dev_t")
changed how btrfsic indexes device state.

Now we need to access device->bdev->bd_dev, while for degraded mount
it's completely possible to have device->bdev as NULL, thus it will
trigger a NULL pointer dereference at mount time.

Fix it by checking if the device is degraded before accessing
device->bdev->bd_dev.

There are a lot of other places accessing device->bdev->bd_dev, however
the other call sites have either checked device->bdev, or the
device->bdev is passed from btrfsic_map_block(), so it won't cause harm.

Fixes: f8f84b2dfda5 ("btrfs: index check-integrity state hash by a dev_t")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/check-integrity.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index a3fdb4fe967d..daf45472bef9 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1539,7 +1539,12 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
 	}
 
 	device = multi->stripes[0].dev;
-	block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev);
+	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
+	    !device->bdev || !device->name)
+		block_ctx_out->dev = NULL;
+	else
+		block_ctx_out->dev = btrfsic_dev_state_lookup(
+							device->bdev->bd_dev);
 	block_ctx_out->dev_bytenr = multi->stripes[0].physical;
 	block_ctx_out->start = bytenr;
 	block_ctx_out->len = len;
-- 
cgit v1.2.3


From bc877d285ca3dba24c52406946a4a69847cc7422 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 18 Jun 2018 14:13:19 +0300
Subject: btrfs: Deduplicate extent_buffer init code

When a new extent buffer is allocated there are a few mandatory fields
which need to be set in order for the buffer to be sane: level,
generation, bytenr, backref_rev, owner and FSID/UUID. Currently this
is open coded in the callers of btrfs_alloc_tree_block, meaning it's
fairly high in the abstraction hierarchy of operations. This patch
solves this by simply moving this init code in btrfs_init_new_buffer,
since this is the function which initializes a newly allocated
extent buffer. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c       | 29 +----------------------------
 fs/btrfs/disk-io.c     | 14 --------------
 fs/btrfs/extent-tree.c | 16 ++++++++++++----
 fs/btrfs/ioctl.c       |  8 --------
 4 files changed, 13 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 18fd80e2f278..18f1ca1c5bd9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -3358,17 +3358,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 
 	root_add_used(root, fs_info->nodesize);
 
-	memzero_extent_buffer(c, 0, sizeof(struct btrfs_header));
 	btrfs_set_header_nritems(c, 1);
-	btrfs_set_header_level(c, level);
-	btrfs_set_header_bytenr(c, c->start);
-	btrfs_set_header_generation(c, trans->transid);
-	btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
-	btrfs_set_header_owner(c, root->root_key.objectid);
-
-	write_extent_buffer_fsid(c, fs_info->fsid);
-	write_extent_buffer_chunk_tree_uuid(c, fs_info->chunk_tree_uuid);
-
 	btrfs_set_node_key(c, &lower_key, 0);
 	btrfs_set_node_blockptr(c, 0, lower->start);
 	lower_gen = btrfs_header_generation(lower);
@@ -3497,15 +3487,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 		return PTR_ERR(split);
 
 	root_add_used(root, fs_info->nodesize);
-
-	memzero_extent_buffer(split, 0, sizeof(struct btrfs_header));
-	btrfs_set_header_level(split, btrfs_header_level(c));
-	btrfs_set_header_bytenr(split, split->start);
-	btrfs_set_header_generation(split, trans->transid);
-	btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
-	btrfs_set_header_owner(split, root->root_key.objectid);
-	write_extent_buffer_fsid(split, fs_info->fsid);
-	write_extent_buffer_chunk_tree_uuid(split, fs_info->chunk_tree_uuid);
+	ASSERT(btrfs_header_level(c) == level);
 
 	ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
 	if (ret) {
@@ -4291,15 +4273,6 @@ again:
 
 	root_add_used(root, fs_info->nodesize);
 
-	memzero_extent_buffer(right, 0, sizeof(struct btrfs_header));
-	btrfs_set_header_bytenr(right, right->start);
-	btrfs_set_header_generation(right, trans->transid);
-	btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
-	btrfs_set_header_owner(right, root->root_key.objectid);
-	btrfs_set_header_level(right, 0);
-	write_extent_buffer_fsid(right, fs_info->fsid);
-	write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid);
-
 	if (split == 0) {
 		if (mid <= slot) {
 			btrfs_set_header_nritems(right, 0);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3224e23d5fa..6318ac2539d3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1292,15 +1292,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
 		goto fail;
 	}
 
-	memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
-	btrfs_set_header_bytenr(leaf, leaf->start);
-	btrfs_set_header_generation(leaf, trans->transid);
-	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
-	btrfs_set_header_owner(leaf, objectid);
 	root->node = leaf;
-
-	write_extent_buffer_fsid(leaf, fs_info->fsid);
-	write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
 	btrfs_mark_buffer_dirty(leaf);
 
 	root->commit_root = btrfs_root_node(root);
@@ -1374,14 +1366,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
 		return ERR_CAST(leaf);
 	}
 
-	memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
-	btrfs_set_header_bytenr(leaf, leaf->start);
-	btrfs_set_header_generation(leaf, trans->transid);
-	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
-	btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
 	root->node = leaf;
 
-	write_extent_buffer_fsid(root->node, fs_info->fsid);
 	btrfs_mark_buffer_dirty(root->node);
 	btrfs_tree_unlock(root->node);
 	return root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 472872a6cc27..0ca3999356c7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8302,7 +8302,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 
 static struct extent_buffer *
 btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		      u64 bytenr, int level)
+		      u64 bytenr, int level, u64 owner)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_buffer *buf;
@@ -8311,7 +8311,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	if (IS_ERR(buf))
 		return buf;
 
-	btrfs_set_header_generation(buf, trans->transid);
 	btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
 	btrfs_tree_lock(buf);
 	clean_tree_block(fs_info, buf);
@@ -8320,6 +8319,14 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	btrfs_set_lock_blocking(buf);
 	set_extent_buffer_uptodate(buf);
 
+	memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
+	btrfs_set_header_level(buf, level);
+	btrfs_set_header_bytenr(buf, buf->start);
+	btrfs_set_header_generation(buf, trans->transid);
+	btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
+	btrfs_set_header_owner(buf, owner);
+	write_extent_buffer_fsid(buf, fs_info->fsid);
+	write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
 	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
 		buf->log_index = root->log_transid % 2;
 		/*
@@ -8428,7 +8435,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 	if (btrfs_is_testing(fs_info)) {
 		buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
-					    level);
+					    level, root_objectid);
 		if (!IS_ERR(buf))
 			root->alloc_bytenr += blocksize;
 		return buf;
@@ -8444,7 +8451,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out_unuse;
 
-	buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
+	buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
+				    root_objectid);
 	if (IS_ERR(buf)) {
 		ret = PTR_ERR(buf);
 		goto out_free_reserved;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0c4b9f364e84..08b8c0b346b3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -616,14 +616,6 @@ static noinline int create_subvol(struct inode *dir,
 		goto fail;
 	}
 
-	memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
-	btrfs_set_header_bytenr(leaf, leaf->start);
-	btrfs_set_header_generation(leaf, trans->transid);
-	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
-	btrfs_set_header_owner(leaf, objectid);
-
-	write_extent_buffer_fsid(leaf, fs_info->fsid);
-	write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
 	btrfs_mark_buffer_dirty(leaf);
 
 	inode_item = &root_item->inode;
-- 
cgit v1.2.3


From e41ca5897489b1c18af75ff0cc8f5c80260b3281 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 6 Jun 2018 15:41:49 +0800
Subject: btrfs: Get rid of the confusing btrfs_file_extent_inline_len

We used to call btrfs_file_extent_inline_len() to get the uncompressed
data size of an inlined extent.

However this function is hiding evil, for compressed extent, it has no
choice but to directly read out ram_bytes from btrfs_file_extent_item.
While for uncompressed extent, it uses item size to calculate the real
data size, and ignoring ram_bytes completely.

In fact, for corrupted ram_bytes, due to above behavior kernel
btrfs_print_leaf() can't even print correct ram_bytes to expose the bug.

Since we have the tree-checker to verify all EXTENT_DATA, such mismatch
can be detected pretty easily, thus we can trust ram_bytes without the
evil btrfs_file_extent_inline_len().

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             | 26 --------------------------
 fs/btrfs/file-item.c         |  2 +-
 fs/btrfs/file.c              |  3 +--
 fs/btrfs/inode.c             | 12 ++++++------
 fs/btrfs/print-tree.c        |  4 ++--
 fs/btrfs/send.c              | 17 +++++++----------
 fs/btrfs/tree-log.c          | 12 ++++--------
 include/trace/events/btrfs.h |  2 +-
 8 files changed, 22 insertions(+), 56 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 12cb327cd16e..41ba770b9db9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2428,32 +2428,6 @@ static inline u32 btrfs_file_extent_inline_item_len(
 	return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
 }
 
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
-					int slot,
-					const struct btrfs_file_extent_item *fi)
-{
-	struct btrfs_map_token token;
-
-	btrfs_init_map_token(&token);
-	/*
-	 * return the space used on disk if this item isn't
-	 * compressed or encoded
-	 */
-	if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
-	    btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
-	    btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
-		return btrfs_file_extent_inline_item_len(eb,
-							 btrfs_item_nr(slot));
-	}
-
-	/* otherwise use the ram bytes field */
-	return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
-}
-
-
 /* btrfs_dev_stats_item */
 static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
 					const struct btrfs_dev_stats_item *ptr,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f9dd6d1836a3..8c3cd7072caf 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -942,7 +942,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 			btrfs_file_extent_num_bytes(leaf, fi);
 	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
 		size_t size;
-		size = btrfs_file_extent_inline_len(leaf, slot, fi);
+		size = btrfs_file_extent_ram_bytes(leaf, fi);
 		extent_end = ALIGN(extent_start + size,
 				   fs_info->sectorsize);
 	}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 975c590c50d8..4cd8af14f915 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -833,8 +833,7 @@ next_slot:
 				btrfs_file_extent_num_bytes(leaf, fi);
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			extent_end = key.offset +
-				btrfs_file_extent_inline_len(leaf,
-						     path->slots[0], fi);
+				btrfs_file_extent_ram_bytes(leaf, fi);
 		} else {
 			/* can't happen */
 			BUG();
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eba61bcb9bb3..1ade43c02b81 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1443,8 +1443,7 @@ next_slot:
 			nocow = 1;
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			extent_end = found_key.offset +
-				btrfs_file_extent_inline_len(leaf,
-						     path->slots[0], fi);
+				btrfs_file_extent_ram_bytes(leaf, fi);
 			extent_end = ALIGN(extent_end,
 					   fs_info->sectorsize);
 		} else {
@@ -4643,8 +4642,8 @@ search_again:
 					BTRFS_I(inode), leaf, fi,
 					found_key.offset);
 			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-				item_end += btrfs_file_extent_inline_len(leaf,
-							 path->slots[0], fi);
+				item_end += btrfs_file_extent_ram_bytes(leaf,
+									fi);
 
 				trace_btrfs_truncate_show_fi_inline(
 					BTRFS_I(inode), leaf, fi, path->slots[0],
@@ -6943,7 +6942,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 						       extent_start);
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
 		size_t size;
-		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+
+		size = btrfs_file_extent_ram_bytes(leaf, item);
 		extent_end = ALIGN(extent_start + size,
 				   fs_info->sectorsize);
 
@@ -6994,7 +6994,7 @@ next:
 		if (new_inline)
 			goto out;
 
-		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+		size = btrfs_file_extent_ram_bytes(leaf, item);
 		extent_offset = page_offset(page) + pg_offset - extent_start;
 		copy_size = min_t(u64, PAGE_SIZE - pg_offset,
 				  size - extent_offset);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index a4e11cf04671..59efcf2e0de8 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -267,8 +267,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
 					    struct btrfs_file_extent_item);
 			if (btrfs_file_extent_type(l, fi) ==
 			    BTRFS_FILE_EXTENT_INLINE) {
-				pr_info("\t\tinline extent data size %u\n",
-				       btrfs_file_extent_inline_len(l, i, fi));
+				pr_info("\t\tinline extent data size %llu\n",
+				       btrfs_file_extent_ram_bytes(l, fi));
 				break;
 			}
 			pr_info("\t\textent data disk bytenr %llu nr %llu\n",
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c47f62b19226..6ff7a1315e52 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1500,7 +1500,7 @@ static int read_symlink(struct btrfs_root *root,
 	BUG_ON(compression);
 
 	off = btrfs_file_extent_inline_start(ei);
-	len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
+	len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
 
 	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
 
@@ -5160,7 +5160,7 @@ static int clone_range(struct send_ctx *sctx,
 		ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
 		type = btrfs_file_extent_type(leaf, ei);
 		if (type == BTRFS_FILE_EXTENT_INLINE) {
-			ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
+			ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
 			ext_len = PAGE_ALIGN(ext_len);
 		} else {
 			ext_len = btrfs_file_extent_num_bytes(leaf, ei);
@@ -5236,8 +5236,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
 			struct btrfs_file_extent_item);
 	type = btrfs_file_extent_type(path->nodes[0], ei);
 	if (type == BTRFS_FILE_EXTENT_INLINE) {
-		len = btrfs_file_extent_inline_len(path->nodes[0],
-						   path->slots[0], ei);
+		len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
 		/*
 		 * it is possible the inline item won't cover the whole page,
 		 * but there may be items after this page.  Make
@@ -5375,7 +5374,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 		}
 
 		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
-			right_len = btrfs_file_extent_inline_len(eb, slot, ei);
+			right_len = btrfs_file_extent_ram_bytes(eb, ei);
 			right_len = PAGE_ALIGN(right_len);
 		} else {
 			right_len = btrfs_file_extent_num_bytes(eb, ei);
@@ -5496,8 +5495,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
 			    struct btrfs_file_extent_item);
 	type = btrfs_file_extent_type(path->nodes[0], fi);
 	if (type == BTRFS_FILE_EXTENT_INLINE) {
-		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
-							path->slots[0], fi);
+		u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
 		extent_end = ALIGN(key.offset + size,
 				   sctx->send_root->fs_info->sectorsize);
 	} else {
@@ -5560,7 +5558,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
 		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
 		if (btrfs_file_extent_type(leaf, fi) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+			u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
 
 			extent_end = ALIGN(key.offset + size,
 					   root->fs_info->sectorsize);
@@ -5606,8 +5604,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
 			    struct btrfs_file_extent_item);
 	type = btrfs_file_extent_type(path->nodes[0], fi);
 	if (type == BTRFS_FILE_EXTENT_INLINE) {
-		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
-							path->slots[0], fi);
+		u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
 		extent_end = ALIGN(key->offset + size,
 				   sctx->send_root->fs_info->sectorsize);
 	} else {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b3b1d424f2d8..6bca8f88ade0 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -597,7 +597,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 		if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
 			nbytes = 0;
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-		size = btrfs_file_extent_inline_len(eb, slot, item);
+		size = btrfs_file_extent_ram_bytes(eb, item);
 		nbytes = btrfs_file_extent_ram_bytes(eb, item);
 		extent_end = ALIGN(start + size,
 				   fs_info->sectorsize);
@@ -3920,9 +3920,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 					struct btrfs_file_extent_item);
 		if (btrfs_file_extent_type(src, extent) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			len = btrfs_file_extent_inline_len(src,
-							   src_path->slots[0],
-							   extent);
+			len = btrfs_file_extent_ram_bytes(src, extent);
 			*last_extent = ALIGN(key.offset + len,
 					     fs_info->sectorsize);
 		} else {
@@ -3987,7 +3985,7 @@ fill_holes:
 		extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
 		if (btrfs_file_extent_type(src, extent) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			len = btrfs_file_extent_inline_len(src, i, extent);
+			len = btrfs_file_extent_ram_bytes(src, extent);
 			extent_end = ALIGN(key.offset + len,
 					   fs_info->sectorsize);
 		} else {
@@ -4572,9 +4570,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 
 		if (btrfs_file_extent_type(leaf, extent) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			len = btrfs_file_extent_inline_len(leaf,
-							   path->slots[0],
-							   extent);
+			len = btrfs_file_extent_ram_bytes(leaf, extent);
 			ASSERT(len == i_size ||
 			       (len == fs_info->sectorsize &&
 				btrfs_file_extent_compression(leaf, extent) !=
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 7057cc99d267..b401c4e36394 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -374,7 +374,7 @@ DECLARE_EVENT_CLASS(
 		__entry->extent_type	= btrfs_file_extent_type(l, fi);
 		__entry->compression	= btrfs_file_extent_compression(l, fi);
 		__entry->extent_start	= start;
-		__entry->extent_end	= (start + btrfs_file_extent_inline_len(l, slot, fi));
+		__entry->extent_end	= (start + btrfs_file_extent_ram_bytes(l, fi));
 	),
 
 	TP_printk_btrfs(
-- 
cgit v1.2.3


From afd48513f0019a2048afed0d98904d3fec7e05dd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 21 Jun 2018 18:04:05 +0200
Subject: btrfs: use monotonic time for transaction handling

The transaction times were changed to ktime_get_real_seconds to avoid
the y2038 overflow, but they still have a minor problem when they go
backwards or jump due to settimeofday() or leap seconds.

This changes the transaction handling to instead use ktime_get_seconds(),
which returns a CLOCK_MONOTONIC timestamp that has neither of those
problems.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c     | 2 +-
 fs/btrfs/transaction.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6318ac2539d3..2de3da7b7bdc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1805,7 +1805,7 @@ static int transaction_kthread(void *arg)
 			goto sleep;
 		}
 
-		now = ktime_get_real_seconds();
+		now = ktime_get_seconds();
 		if (cur->state < TRANS_STATE_BLOCKED &&
 		    !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
 		    (now < cur->start_time ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 56c8bab0b816..ebe50dfb8947 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -241,7 +241,7 @@ loop:
 	refcount_set(&cur_trans->use_count, 2);
 	atomic_set(&cur_trans->pending_ordered, 0);
 	cur_trans->flags = 0;
-	cur_trans->start_time = ktime_get_real_seconds();
+	cur_trans->start_time = ktime_get_seconds();
 
 	memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
 
-- 
cgit v1.2.3


From d3c6be6fdab48dd26af3d3e01c5972ffe20985b9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 21 Jun 2018 18:04:06 +0200
Subject: btrfs: use timespec64 for i_otime

While the regular inode timestamps all use timespec64 now, the i_otime
field is btrfs specific and still needs to be converted to correctly
represent times beyond 2038.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 2 +-
 fs/btrfs/inode.c       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7e075343daa5..1343ac57b438 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -178,7 +178,7 @@ struct btrfs_inode {
 	struct btrfs_delayed_node *delayed_node;
 
 	/* File creation time. */
-	struct timespec i_otime;
+	struct timespec64 i_otime;
 
 	/* Hook into fs_info->delayed_iputs */
 	struct list_head delayed_iput;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1ade43c02b81..2858bc355920 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5744,7 +5744,7 @@ static struct inode *new_simple_dir(struct super_block *s,
 	inode->i_mtime = current_time(inode);
 	inode->i_atime = inode->i_mtime;
 	inode->i_ctime = inode->i_mtime;
-	BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime);
+	BTRFS_I(inode)->i_otime = inode->i_mtime;
 
 	return inode;
 }
@@ -6348,7 +6348,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	inode->i_mtime = current_time(inode);
 	inode->i_atime = inode->i_mtime;
 	inode->i_ctime = inode->i_mtime;
-	BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime);
+	BTRFS_I(inode)->i_otime = inode->i_mtime;
 
 	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
 				  struct btrfs_inode_item);
-- 
cgit v1.2.3


From bc931c0ef8f9e21ed5732ea148aeea9d0fffd3f8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 09:18:29 +0200
Subject: btrfs: Refactor count handling in btrfs_unpin_free_ino
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With gcc 4.1.2:

    fs/btrfs/inode-map.c: In function ‘btrfs_unpin_free_ino’:
    fs/btrfs/inode-map.c:241: warning: ‘count’ may be used uninitialized in this function

While this warning is a false-positive, it can easily be killed by
refactoring the code.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode-map.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 12fcd8897c33..a56f88ea9c73 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -244,8 +244,6 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
 		return;
 
 	while (1) {
-		bool add_to_ctl = true;
-
 		spin_lock(rbroot_lock);
 		n = rb_first(rbroot);
 		if (!n) {
@@ -257,15 +255,14 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
 		BUG_ON(info->bitmap); /* Logic error */
 
 		if (info->offset > root->ino_cache_progress)
-			add_to_ctl = false;
-		else if (info->offset + info->bytes > root->ino_cache_progress)
-			count = root->ino_cache_progress - info->offset + 1;
+			count = 0;
 		else
-			count = info->bytes;
+			count = min(root->ino_cache_progress - info->offset + 1,
+				    info->bytes);
 
 		rb_erase(&info->offset_index, rbroot);
 		spin_unlock(rbroot_lock);
-		if (add_to_ctl)
+		if (count)
 			__btrfs_add_free_space(root->fs_info, ctl,
 					       info->offset, count);
 		kmem_cache_free(btrfs_free_space_cachep, info);
-- 
cgit v1.2.3


From 43794446548730ac8461be30bbe47d5d027d1d16 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 22 Jun 2018 12:35:00 +0800
Subject: btrfs: Don't remove block group that still has pinned down bytes

[BUG]
Under certain KVM load and LTP tests, it is possible to hit the
following calltrace if quota is enabled:

BTRFS critical (device vda2): unable to find logical 8820195328 length 4096
BTRFS critical (device vda2): unable to find logical 8820195328 length 4096

WARNING: CPU: 0 PID: 49 at ../block/blk-core.c:172 blk_status_to_errno+0x1a/0x30
CPU: 0 PID: 49 Comm: kworker/u2:1 Not tainted 4.12.14-15-default #1 SLE15 (unreleased)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs]
task: ffff9f827b340bc0 task.stack: ffffb4f8c0304000
RIP: 0010:blk_status_to_errno+0x1a/0x30
Call Trace:
 submit_extent_page+0x191/0x270 [btrfs]
 ? btrfs_create_repair_bio+0x130/0x130 [btrfs]
 __do_readpage+0x2d2/0x810 [btrfs]
 ? btrfs_create_repair_bio+0x130/0x130 [btrfs]
 ? run_one_async_done+0xc0/0xc0 [btrfs]
 __extent_read_full_page+0xe7/0x100 [btrfs]
 ? run_one_async_done+0xc0/0xc0 [btrfs]
 read_extent_buffer_pages+0x1ab/0x2d0 [btrfs]
 ? run_one_async_done+0xc0/0xc0 [btrfs]
 btree_read_extent_buffer_pages+0x94/0xf0 [btrfs]
 read_tree_block+0x31/0x60 [btrfs]
 read_block_for_search.isra.35+0xf0/0x2e0 [btrfs]
 btrfs_search_slot+0x46b/0xa00 [btrfs]
 ? kmem_cache_alloc+0x1a8/0x510
 ? btrfs_get_token_32+0x5b/0x120 [btrfs]
 find_parent_nodes+0x11d/0xeb0 [btrfs]
 ? leaf_space_used+0xb8/0xd0 [btrfs]
 ? btrfs_leaf_free_space+0x49/0x90 [btrfs]
 ? btrfs_find_all_roots_safe+0x93/0x100 [btrfs]
 btrfs_find_all_roots_safe+0x93/0x100 [btrfs]
 btrfs_find_all_roots+0x45/0x60 [btrfs]
 btrfs_qgroup_trace_extent_post+0x20/0x40 [btrfs]
 btrfs_add_delayed_data_ref+0x1a3/0x1d0 [btrfs]
 btrfs_alloc_reserved_file_extent+0x38/0x40 [btrfs]
 insert_reserved_file_extent.constprop.71+0x289/0x2e0 [btrfs]
 btrfs_finish_ordered_io+0x2f4/0x7f0 [btrfs]
 ? pick_next_task_fair+0x2cd/0x530
 ? __switch_to+0x92/0x4b0
 btrfs_worker_helper+0x81/0x300 [btrfs]
 process_one_work+0x1da/0x3f0
 worker_thread+0x2b/0x3f0
 ? process_one_work+0x3f0/0x3f0
 kthread+0x11a/0x130
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x35/0x40

BTRFS critical (device vda2): unable to find logical 8820195328 length 16384
BTRFS: error (device vda2) in btrfs_finish_ordered_io:3023: errno=-5 IO failure
BTRFS info (device vda2): forced readonly
BTRFS error (device vda2): pending csums is 2887680

[CAUSE]
It's caused by race with block group auto removal:

- There is a meta block group X, which has only one tree block
  The tree block belongs to fs tree 257.
- In current transaction, some operation modified fs tree 257
  The tree block gets COWed, so the block group X is empty, and marked
  as unused, queued to be deleted.
- Some workload (like fsync) wakes up cleaner_kthread()
  Which will call btrfs_delete_unused_bgs() to remove unused block
  groups.
  So block group X along its chunk map get removed.
- Some delalloc work finished for fs tree 257
  Quota needs to get the original reference of the extent, which will
  read tree blocks of commit root of 257.
  Then since the chunk map gets removed, the above warning gets
  triggered.

[FIX]
Just let btrfs_delete_unused_bgs() skip block group which still has
pinned bytes.

However there is a minor side effect: currently we only queue empty
blocks at update_block_group(), and such empty block group with pinned
bytes won't go through update_block_group() again, such block group
won't be removed, until it gets new extent allocated and removed.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0ca3999356c7..37d8927015d9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10702,7 +10702,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		/* Don't want to race with allocators so take the groups_sem */
 		down_write(&space_info->groups_sem);
 		spin_lock(&block_group->lock);
-		if (block_group->reserved ||
+		if (block_group->reserved || block_group->pinned ||
 		    btrfs_block_group_used(&block_group->item) ||
 		    block_group->ro ||
 		    list_is_singular(&block_group->list)) {
-- 
cgit v1.2.3


From 7b4284de93c51b1d78699bf06bccee892699aa4e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 20 Jun 2018 18:43:12 +0300
Subject: btrfs: Streamline memory allocation failure handling in
 btrfs_add_delayed_tree_ref

Currently the function uses 2 goto labels to properly handle allocation
failures. This could be simplified by simply re-arranging the code so
that allocations are the in the beginning of the function. This allows
to use simple return statements. No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-ref.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6eb00eb65d76..62ff545ba1f7 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -730,27 +730,33 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 	if (!ref)
 		return -ENOMEM;
 
+	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
+	if (!head_ref) {
+		kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+		return -ENOMEM;
+	}
+
+	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
+	    is_fstree(ref_root)) {
+		record = kmalloc(sizeof(*record), GFP_NOFS);
+		if (!record) {
+			kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+			kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+			return -ENOMEM;
+		}
+	}
+
 	if (parent)
 		ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
 	else
 		ref_type = BTRFS_TREE_BLOCK_REF_KEY;
+
 	init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
 				ref_root, action, ref_type);
 	ref->root = ref_root;
 	ref->parent = parent;
 	ref->level = level;
 
-	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
-	if (!head_ref)
-		goto free_ref;
-
-	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
-	    is_fstree(ref_root)) {
-		record = kmalloc(sizeof(*record), GFP_NOFS);
-		if (!record)
-			goto free_head_ref;
-	}
-
 	init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
 			      ref_root, 0, action, false, is_system);
 	head_ref->extent_op = extent_op;
@@ -779,13 +785,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 		btrfs_qgroup_trace_extent_post(fs_info, record);
 
 	return 0;
-
-free_head_ref:
-	kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
-free_ref:
-	kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
-
-	return -ENOMEM;
 }
 
 /*
-- 
cgit v1.2.3


From 893bf4b115c713738df05bb557f8fba14f07c077 Mon Sep 17 00:00:00 2001
From: Su Yue <suy.fnst@cn.fujitsu.com>
Date: Fri, 22 Jun 2018 09:52:15 +0800
Subject: btrfs: print more details when checking tree block finds a problem

For easier debugging, print eb->start if level is invalid.  Also make
clear if bytenr found is not expected.

Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2de3da7b7bdc..c77afa9e5d91 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -615,8 +615,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
 	found_start = btrfs_header_bytenr(eb);
 	if (found_start != eb->start) {
-		btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
-			     found_start, eb->start);
+		btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu",
+			     eb->start, found_start);
 		ret = -EIO;
 		goto err;
 	}
@@ -628,8 +628,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	}
 	found_level = btrfs_header_level(eb);
 	if (found_level >= BTRFS_MAX_LEVEL) {
-		btrfs_err(fs_info, "bad tree block level %d",
-			  (int)btrfs_header_level(eb));
+		btrfs_err(fs_info, "bad tree block level %d on %llu",
+			  (int)btrfs_header_level(eb), eb->start);
 		ret = -EIO;
 		goto err;
 	}
-- 
cgit v1.2.3


From e4af400a9c5081e7d2b703e4c479cb9831cc1117 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Sun, 17 Jun 2018 12:39:47 -0500
Subject: btrfs: Use iocb to derive pos instead of passing a separate parameter

struct kiocb carries the ki_pos, so there is no need to pass it as
a separate function parameter.

generic_file_direct_write() increments ki_pos, so we now assign pos
after the function.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
[ rename to btrfs_buffered_write ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4cd8af14f915..89c9404fee9a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1568,10 +1568,11 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 	return ret;
 }
 
-static noinline ssize_t __btrfs_buffered_write(struct file *file,
-					       struct iov_iter *i,
-					       loff_t pos)
+static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
+					       struct iov_iter *i)
 {
+	struct file *file = iocb->ki_filp;
+	loff_t pos = iocb->ki_pos;
 	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1803,7 +1804,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
-	loff_t pos = iocb->ki_pos;
+	loff_t pos;
 	ssize_t written;
 	ssize_t written_buffered;
 	loff_t endbyte;
@@ -1814,8 +1815,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	if (written < 0 || !iov_iter_count(from))
 		return written;
 
-	pos += written;
-	written_buffered = __btrfs_buffered_write(file, from, pos);
+	pos = iocb->ki_pos;
+	written_buffered = btrfs_buffered_write(iocb, from);
 	if (written_buffered < 0) {
 		err = written_buffered;
 		goto out;
@@ -1952,7 +1953,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	if (iocb->ki_flags & IOCB_DIRECT) {
 		num_written = __btrfs_direct_write(iocb, from);
 	} else {
-		num_written = __btrfs_buffered_write(file, from, pos);
+		num_written = btrfs_buffered_write(iocb, from);
 		if (num_written > 0)
 			iocb->ki_pos = pos + num_written;
 		if (clean_page)
-- 
cgit v1.2.3


From af431dcb249ffff20b16e08adaa4b624405a046b Mon Sep 17 00:00:00 2001
From: Su Yue <suy.fnst@cn.fujitsu.com>
Date: Fri, 22 Jun 2018 16:18:01 +0800
Subject: btrfs: return EUCLEAN if extent_inline_ref type is invalid

If type of extent_inline_ref found is not expected, filesystem may have
been corrupted, should return EUCLEAN instead of EINVAL.

Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c     | 4 ++--
 fs/btrfs/extent-tree.c | 2 +-
 fs/btrfs/relocation.c  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0a8e2e29a66b..60f4afa8ecbc 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -925,7 +925,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
 		type = btrfs_get_extent_inline_ref_type(leaf, iref,
 							BTRFS_REF_TYPE_ANY);
 		if (type == BTRFS_REF_TYPE_INVALID)
-			return -EINVAL;
+			return -EUCLEAN;
 
 		offset = btrfs_extent_inline_ref_offset(leaf, iref);
 
@@ -1793,7 +1793,7 @@ static int get_extent_inline_ref(unsigned long *ptr,
 	*out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
 						     BTRFS_REF_TYPE_ANY);
 	if (*out_type == BTRFS_REF_TYPE_INVALID)
-		return -EINVAL;
+		return -EUCLEAN;
 
 	*ptr += btrfs_extent_inline_ref_size(*out_type);
 	WARN_ON(*ptr > end);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 37d8927015d9..b28fa3d20ef7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1719,7 +1719,7 @@ again:
 		iref = (struct btrfs_extent_inline_ref *)ptr;
 		type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
 		if (type == BTRFS_REF_TYPE_INVALID) {
-			err = -EINVAL;
+			err = -EUCLEAN;
 			goto out;
 		}
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 22214033a4a2..44722e939e34 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -792,7 +792,7 @@ again:
 			type = btrfs_get_extent_inline_ref_type(eb, iref,
 							BTRFS_REF_TYPE_BLOCK);
 			if (type == BTRFS_REF_TYPE_INVALID) {
-				err = -EINVAL;
+				err = -EUCLEAN;
 				goto out;
 			}
 			key.type = type;
@@ -3801,7 +3801,7 @@ int add_data_references(struct reloc_control *rc,
 			ret = find_data_references(rc, extent_key,
 						   eb, dref, blocks);
 		} else {
-			ret = -EINVAL;
+			ret = -EUCLEAN;
 			btrfs_err(rc->extent_root->fs_info,
 		     "extent %llu slot %d has an invalid inline ref type",
 			     eb->start, path->slots[0]);
-- 
cgit v1.2.3


From ab3629ed86ec43e7d95cf087d0b5a9d403fcd822 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 27 Jun 2018 12:16:34 +0800
Subject: btrfs: return error instead of crash when detecting unexpected type
 in btrfs_get_acl

The caller of btrfs_get_acl() checks error condition so there is no
impact from this change. In practice there is no chance to get into
default case of switch statement because VFS has already checked the
type.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15e1dfef56a5..60f83a3bd77c 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -30,7 +30,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		name = XATTR_NAME_POSIX_ACL_DEFAULT;
 		break;
 	default:
-		BUG();
+		return ERR_PTR(-EINVAL);
 	}
 
 	size = btrfs_getxattr(inode, name, "", 0);
-- 
cgit v1.2.3


From 7e35eab958e30d15743b1f9928d15b559a6e432d Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 27 Jun 2018 12:16:35 +0800
Subject: btrfs: replace empty string with NULL when getting attribute length
 in btrfs_get_acl

In btrfs_get_acl() the first call of btr_getxattr() is for getting the
length of attribute, the value buffer is never used in this case. So
it's better to replace empty string with NULL.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 60f83a3bd77c..83fdd80c51c6 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -33,7 +33,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		return ERR_PTR(-EINVAL);
 	}
 
-	size = btrfs_getxattr(inode, name, "", 0);
+	size = btrfs_getxattr(inode, name, NULL, 0);
 	if (size > 0) {
 		value = kzalloc(size, GFP_KERNEL);
 		if (!value)
-- 
cgit v1.2.3


From 5ee552da503961e6df785b8495bcfc98eb30eb2a Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 27 Jun 2018 12:16:36 +0800
Subject: btrfs: remove unnecessary -ERANGE check in btrfs_get_acl

There is no chance to get into -ERANGE error condition because we first
call btrfs_getxattr to get the length of the attribute, then we do a
subsequent call with the size from the first call.  Between the 2 calls
the size shouldn't change. So remove the unnecessary -ERANGE error
check.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 83fdd80c51c6..a1d7211c8884 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -42,7 +42,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 	}
 	if (size > 0) {
 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
-	} else if (size == -ERANGE || size == -ENODATA || size == 0) {
+	} else if (size == -ENODATA || size == 0) {
 		acl = NULL;
 	} else {
 		acl = ERR_PTR(-EIO);
-- 
cgit v1.2.3


From dc7789ef8785ea996ecf302a89415439ccf10faa Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 27 Jun 2018 12:16:37 +0800
Subject: btrfs: avoid error code override in btrfs_get_acl

It's not good to override the error code when failing from
btrfs_getxattr() in btrfs_get_acl() because it hides the real reason of
the failure.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index a1d7211c8884..7d673ec9e54a 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -45,7 +45,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 	} else if (size == -ENODATA || size == 0) {
 		acl = NULL;
 	} else {
-		acl = ERR_PTR(-EIO);
+		acl = ERR_PTR(size);
 	}
 	kfree(value);
 
-- 
cgit v1.2.3


From 4de426cd394e4de4d3039aff7d20724d7d988dcf Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 27 Jun 2018 12:16:38 +0800
Subject: btrfs: remove unnecessary curly braces in btrfs_get_acl

It's only coding style fix not functinal change.  When if/else has only
one statement then the braces are not needed.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/acl.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7d673ec9e54a..3b66c957ea6f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -40,13 +40,12 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 			return ERR_PTR(-ENOMEM);
 		size = btrfs_getxattr(inode, name, value, size);
 	}
-	if (size > 0) {
+	if (size > 0)
 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
-	} else if (size == -ENODATA || size == 0) {
+	else if (size == -ENODATA || size == 0)
 		acl = NULL;
-	} else {
+	else
 		acl = ERR_PTR(size);
-	}
 	kfree(value);
 
 	return acl;
-- 
cgit v1.2.3


From a79865c680d81220a1355cd13098e75227dc2994 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Thu, 21 Jun 2018 09:45:00 +0300
Subject: btrfs: Remove V0 extent support

The v0 compat code was introduced in commit 5d4f98a28c7d
("Btrfs: Mixed back reference  (FORWARD ROLLING FORMAT CHANGE)") 9
years ago, which was merged in 2.6.31. This means that the code is
there to support filesystems which are _VERY_ old and if you are using
btrfs on such an old kernel, you have much bigger problems. This coupled
with the fact that no one is likely testing/maintining this code likely
means it has bugs lurking. All things considered I think 43 kernel
releases later it's high time this remnant of the past got removed.

This patch removes all code wrapped in #ifdefs but leaves the BUG_ONs in case
we have a v0 with no support intact as a sort of safety-net.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c       |   6 +-
 fs/btrfs/ctree.h       |   2 -
 fs/btrfs/extent-tree.c | 209 +------------------------------------------------
 fs/btrfs/print-tree.c  |  30 +------
 fs/btrfs/relocation.c  | 151 +----------------------------------
 5 files changed, 4 insertions(+), 394 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 18f1ca1c5bd9..d436fb4c002e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -888,11 +888,7 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
 	     btrfs_root_last_snapshot(&root->root_item) ||
 	     btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
 		return 1;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
-	    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
-		return 1;
-#endif
+
 	return 0;
 }
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 41ba770b9db9..9fa958ed065e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -55,8 +55,6 @@ struct btrfs_ordered_sum;
 
 #define BTRFS_OLDEST_GENERATION	0ULL
 
-#define BTRFS_COMPAT_EXTENT_TREE_V0
-
 /*
  * the max metadata block size.  This limit is somewhat artificial,
  * but the memmove costs go through the roof for larger blocks.
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b28fa3d20ef7..6a73d2698807 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -867,17 +867,7 @@ search_again:
 			num_refs = btrfs_extent_refs(leaf, ei);
 			extent_flags = btrfs_extent_flags(leaf, ei);
 		} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-			struct btrfs_extent_item_v0 *ei0;
-			BUG_ON(item_size != sizeof(*ei0));
-			ei0 = btrfs_item_ptr(leaf, path->slots[0],
-					     struct btrfs_extent_item_v0);
-			num_refs = btrfs_extent_refs_v0(leaf, ei0);
-			/* FIXME: this isn't correct for data */
-			extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
-#else
 			BUG();
-#endif
 		}
 		BUG_ON(num_refs == 0);
 	} else {
@@ -1036,89 +1026,6 @@ out_free:
  * tree block info structure.
  */
 
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
-				  struct btrfs_path *path,
-				  u64 owner, u32 extra_size)
-{
-	struct btrfs_root *root = fs_info->extent_root;
-	struct btrfs_extent_item *item;
-	struct btrfs_extent_item_v0 *ei0;
-	struct btrfs_extent_ref_v0 *ref0;
-	struct btrfs_tree_block_info *bi;
-	struct extent_buffer *leaf;
-	struct btrfs_key key;
-	struct btrfs_key found_key;
-	u32 new_size = sizeof(*item);
-	u64 refs;
-	int ret;
-
-	leaf = path->nodes[0];
-	BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
-
-	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-	ei0 = btrfs_item_ptr(leaf, path->slots[0],
-			     struct btrfs_extent_item_v0);
-	refs = btrfs_extent_refs_v0(leaf, ei0);
-
-	if (owner == (u64)-1) {
-		while (1) {
-			if (path->slots[0] >= btrfs_header_nritems(leaf)) {
-				ret = btrfs_next_leaf(root, path);
-				if (ret < 0)
-					return ret;
-				BUG_ON(ret > 0); /* Corruption */
-				leaf = path->nodes[0];
-			}
-			btrfs_item_key_to_cpu(leaf, &found_key,
-					      path->slots[0]);
-			BUG_ON(key.objectid != found_key.objectid);
-			if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
-				path->slots[0]++;
-				continue;
-			}
-			ref0 = btrfs_item_ptr(leaf, path->slots[0],
-					      struct btrfs_extent_ref_v0);
-			owner = btrfs_ref_objectid_v0(leaf, ref0);
-			break;
-		}
-	}
-	btrfs_release_path(path);
-
-	if (owner < BTRFS_FIRST_FREE_OBJECTID)
-		new_size += sizeof(*bi);
-
-	new_size -= sizeof(*ei0);
-	ret = btrfs_search_slot(trans, root, &key, path,
-				new_size + extra_size, 1);
-	if (ret < 0)
-		return ret;
-	BUG_ON(ret); /* Corruption */
-
-	btrfs_extend_item(fs_info, path, new_size);
-
-	leaf = path->nodes[0];
-	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
-	btrfs_set_extent_refs(leaf, item, refs);
-	/* FIXME: get real generation */
-	btrfs_set_extent_generation(leaf, item, 0);
-	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-		btrfs_set_extent_flags(leaf, item,
-				       BTRFS_EXTENT_FLAG_TREE_BLOCK |
-				       BTRFS_BLOCK_FLAG_FULL_BACKREF);
-		bi = (struct btrfs_tree_block_info *)(item + 1);
-		/* FIXME: get first key of the block */
-		memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi));
-		btrfs_set_tree_block_level(leaf, bi, (int)owner);
-	} else {
-		btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
-	}
-	btrfs_mark_buffer_dirty(leaf);
-	return 0;
-}
-#endif
-
 /*
  * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
  * is_data == BTRFS_REF_TYPE_DATA, data type is requried,
@@ -1247,17 +1154,6 @@ again:
 	if (parent) {
 		if (!ret)
 			return 0;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-		key.type = BTRFS_EXTENT_REF_V0_KEY;
-		btrfs_release_path(path);
-		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-		if (ret < 0) {
-			err = ret;
-			goto fail;
-		}
-		if (!ret)
-			return 0;
-#endif
 		goto fail;
 	}
 
@@ -1400,13 +1296,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
 				      struct btrfs_shared_data_ref);
 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
-		struct btrfs_extent_ref_v0 *ref0;
-		ref0 = btrfs_item_ptr(leaf, path->slots[0],
-				      struct btrfs_extent_ref_v0);
-		num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
 	} else {
 		BUG();
 	}
@@ -1422,14 +1311,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
 		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
 			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-		else {
-			struct btrfs_extent_ref_v0 *ref0;
-			ref0 = btrfs_item_ptr(leaf, path->slots[0],
-					struct btrfs_extent_ref_v0);
-			btrfs_set_ref_count_v0(leaf, ref0, num_refs);
-		}
-#endif
 		btrfs_mark_buffer_dirty(leaf);
 	}
 	return ret;
@@ -1469,13 +1350,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
 				      struct btrfs_shared_data_ref);
 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
-		struct btrfs_extent_ref_v0 *ref0;
-		ref0 = btrfs_item_ptr(leaf, path->slots[0],
-				      struct btrfs_extent_ref_v0);
-		num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
 	} else {
 		WARN_ON(1);
 	}
@@ -1503,15 +1377,6 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 	if (ret > 0)
 		ret = -ENOENT;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (ret == -ENOENT && parent) {
-		btrfs_release_path(path);
-		key.type = BTRFS_EXTENT_REF_V0_KEY;
-		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-		if (ret > 0)
-			ret = -ENOENT;
-	}
-#endif
 	return ret;
 }
 
@@ -1676,22 +1541,6 @@ again:
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (item_size < sizeof(*ei)) {
-		if (!insert) {
-			err = -ENOENT;
-			goto out;
-		}
-		ret = convert_extent_item_v0(trans, fs_info, path, owner,
-					     extra_size);
-		if (ret < 0) {
-			err = ret;
-			goto out;
-		}
-		leaf = path->nodes[0];
-		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-	}
-#endif
 	BUG_ON(item_size < sizeof(*ei));
 
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -2416,17 +2265,6 @@ again:
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (item_size < sizeof(*ei)) {
-		ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
-		if (ret < 0) {
-			err = ret;
-			goto out;
-		}
-		leaf = path->nodes[0];
-		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-	}
-#endif
 	BUG_ON(item_size < sizeof(*ei));
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	__run_delayed_extent_op(extent_op, leaf, ei);
@@ -3238,12 +3076,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
 
 	ret = 1;
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (item_size < sizeof(*ei)) {
-		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
-		goto out;
-	}
-#endif
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 
 	if (item_size != sizeof(*ei) +
@@ -6888,11 +6720,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 				break;
 			extent_slot--;
 		}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-		item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
-		if (found_extent && item_size < sizeof(*ei))
-			found_extent = 0;
-#endif
+
 		if (!found_extent) {
 			BUG_ON(iref);
 			ret = remove_extent_backref(trans, path, NULL,
@@ -6968,41 +6796,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, extent_slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (item_size < sizeof(*ei)) {
-		BUG_ON(found_extent || extent_slot != path->slots[0]);
-		ret = convert_extent_item_v0(trans, info, path, owner_objectid,
-					     0);
-		if (ret < 0) {
-			btrfs_abort_transaction(trans, ret);
-			goto out;
-		}
-
-		btrfs_release_path(path);
-		path->leave_spinning = 1;
-
-		key.objectid = bytenr;
-		key.type = BTRFS_EXTENT_ITEM_KEY;
-		key.offset = num_bytes;
-
-		ret = btrfs_search_slot(trans, extent_root, &key, path,
-					-1, 1);
-		if (ret) {
-			btrfs_err(info,
-				  "umm, got %d back from search, was looking for %llu",
-				ret, bytenr);
-			btrfs_print_leaf(path->nodes[0]);
-		}
-		if (ret < 0) {
-			btrfs_abort_transaction(trans, ret);
-			goto out;
-		}
-
-		extent_slot = path->slots[0];
-		leaf = path->nodes[0];
-		item_size = btrfs_item_size_nr(leaf, extent_slot);
-	}
-#endif
 	BUG_ON(item_size < sizeof(*ei));
 	ei = btrfs_item_ptr(leaf, extent_slot,
 			    struct btrfs_extent_item);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 59efcf2e0de8..c0dac9d7ce33 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -52,18 +52,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
 	u64 offset;
 	int ref_index = 0;
 
-	if (item_size < sizeof(*ei)) {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-		struct btrfs_extent_item_v0 *ei0;
-		BUG_ON(item_size != sizeof(*ei0));
-		ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
-		pr_info("\t\textent refs %u\n",
-		       btrfs_extent_refs_v0(eb, ei0));
-		return;
-#else
+	if (item_size < sizeof(*ei))
 		BUG();
-#endif
-	}
 
 	ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
 	flags = btrfs_extent_flags(eb, ei);
@@ -133,20 +123,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
 	WARN_ON(ptr > end);
 }
 
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
-{
-	struct btrfs_extent_ref_v0 *ref0;
-
-	ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
-	printk("\t\textent back ref root %llu gen %llu owner %llu num_refs %lu\n",
-		btrfs_ref_root_v0(eb, ref0),
-		btrfs_ref_generation_v0(eb, ref0),
-		btrfs_ref_objectid_v0(eb, ref0),
-		(unsigned long)btrfs_ref_count_v0(eb, ref0));
-}
-#endif
-
 static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
 			    u32 item_size)
 {
@@ -280,11 +256,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
 			       btrfs_file_extent_ram_bytes(l, fi));
 			break;
 		case BTRFS_EXTENT_REF_V0_KEY:
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-			print_extent_ref_v0(l, i);
-#else
 			BUG();
-#endif
 			break;
 		case BTRFS_BLOCK_GROUP_ITEM_KEY:
 			bi = btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 44722e939e34..6dfa92e5ceed 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -586,29 +586,6 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
 	return btrfs_get_fs_root(fs_info, &key, false);
 }
 
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static noinline_for_stack
-struct btrfs_root *find_tree_root(struct reloc_control *rc,
-				  struct extent_buffer *leaf,
-				  struct btrfs_extent_ref_v0 *ref0)
-{
-	struct btrfs_root *root;
-	u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
-	u64 generation = btrfs_ref_generation_v0(leaf, ref0);
-
-	BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
-
-	root = read_fs_root(rc->extent_root->fs_info, root_objectid);
-	BUG_ON(IS_ERR(root));
-
-	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
-	    generation != btrfs_root_generation(&root->root_item))
-		return NULL;
-
-	return root;
-}
-#endif
-
 static noinline_for_stack
 int find_inline_backref(struct extent_buffer *leaf, int slot,
 			unsigned long *ptr, unsigned long *end)
@@ -621,12 +598,6 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
 	btrfs_item_key_to_cpu(leaf, &key, slot);
 
 	item_size = btrfs_item_size_nr(leaf, slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (item_size < sizeof(*ei)) {
-		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
-		return 1;
-	}
-#endif
 	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
 	WARN_ON(!(btrfs_extent_flags(leaf, ei) &
 		  BTRFS_EXTENT_FLAG_TREE_BLOCK));
@@ -811,29 +782,8 @@ again:
 			goto next;
 		}
 
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
-		    key.type == BTRFS_EXTENT_REF_V0_KEY) {
-			if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
-				struct btrfs_extent_ref_v0 *ref0;
-				ref0 = btrfs_item_ptr(eb, path1->slots[0],
-						struct btrfs_extent_ref_v0);
-				if (key.objectid == key.offset) {
-					root = find_tree_root(rc, eb, ref0);
-					if (root && !should_ignore_root(root))
-						cur->root = root;
-					else
-						list_add(&cur->list, &useless);
-					break;
-				}
-				if (is_cowonly_root(btrfs_ref_root_v0(eb,
-								      ref0)))
-					cur->cowonly = 1;
-			}
-#else
 		ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY);
 		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
-#endif
 			if (key.objectid == key.offset) {
 				/*
 				 * only root blocks of reloc trees use
@@ -3333,48 +3283,6 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
 	return 0;
 }
 
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int get_ref_objectid_v0(struct reloc_control *rc,
-			       struct btrfs_path *path,
-			       struct btrfs_key *extent_key,
-			       u64 *ref_objectid, int *path_change)
-{
-	struct btrfs_key key;
-	struct extent_buffer *leaf;
-	struct btrfs_extent_ref_v0 *ref0;
-	int ret;
-	int slot;
-
-	leaf = path->nodes[0];
-	slot = path->slots[0];
-	while (1) {
-		if (slot >= btrfs_header_nritems(leaf)) {
-			ret = btrfs_next_leaf(rc->extent_root, path);
-			if (ret < 0)
-				return ret;
-			BUG_ON(ret > 0);
-			leaf = path->nodes[0];
-			slot = path->slots[0];
-			if (path_change)
-				*path_change = 1;
-		}
-		btrfs_item_key_to_cpu(leaf, &key, slot);
-		if (key.objectid != extent_key->objectid)
-			return -ENOENT;
-
-		if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
-			slot++;
-			continue;
-		}
-		ref0 = btrfs_item_ptr(leaf, slot,
-				struct btrfs_extent_ref_v0);
-		*ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
-		break;
-	}
-	return 0;
-}
-#endif
-
 /*
  * helper to add a tree block to the list.
  * the major work is getting the generation and level of the block
@@ -3408,22 +3316,7 @@ static int add_tree_block(struct reloc_control *rc,
 		}
 		generation = btrfs_extent_generation(eb, ei);
 	} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-		u64 ref_owner;
-		int ret;
-
-		BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
-		ret = get_ref_objectid_v0(rc, path, extent_key,
-					  &ref_owner, NULL);
-		if (ret < 0)
-			return ret;
-		BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
-		level = (int)ref_owner;
-		/* FIXME: get real generation */
-		generation = 0;
-#else
 		BUG();
-#endif
 	}
 
 	btrfs_release_path(path);
@@ -3781,12 +3674,7 @@ int add_data_references(struct reloc_control *rc,
 	eb = path->nodes[0];
 	ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
 	end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-	if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
-		ptr = end;
-	else
-#endif
-		ptr += sizeof(struct btrfs_extent_item);
+	ptr += sizeof(struct btrfs_extent_item);
 
 	while (ptr < end) {
 		iref = (struct btrfs_extent_inline_ref *)ptr;
@@ -3832,13 +3720,8 @@ int add_data_references(struct reloc_control *rc,
 		if (key.objectid != extent_key->objectid)
 			break;
 
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-		if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
-		    key.type == BTRFS_EXTENT_REF_V0_KEY) {
-#else
 		BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
 		if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
-#endif
 			ret = __add_tree_block(rc, key.offset, blocksize,
 					       blocks);
 		} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
@@ -4086,39 +3969,7 @@ restart:
 			BUG_ON(ret);
 
 		} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-			u64 ref_owner;
-			int path_change = 0;
-
-			BUG_ON(item_size !=
-			       sizeof(struct btrfs_extent_item_v0));
-			ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
-						  &path_change);
-			if (ret < 0) {
-				err = ret;
-				break;
-			}
-			if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
-				flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
-			else
-				flags = BTRFS_EXTENT_FLAG_DATA;
-
-			if (path_change) {
-				btrfs_release_path(path);
-
-				path->search_commit_root = 1;
-				path->skip_locking = 1;
-				ret = btrfs_search_slot(NULL, rc->extent_root,
-							&key, path, 0, 0);
-				if (ret < 0) {
-					err = ret;
-					break;
-				}
-				BUG_ON(ret > 0);
-			}
-#else
 			BUG();
-#endif
 		}
 
 		if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
-- 
cgit v1.2.3


From ba3c2b196bf59ba8574808fe6f8fd88d0fed7510 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 26 Jun 2018 16:57:36 +0300
Subject: btrfs: Add graceful handling of V0 extents

Following the removal of the v0 handling code let's be courteous and
print an error message when such extents are handled. In the cases
where we have a transaction just abort it, otherwise just call
btrfs_handle_fs_error. Both cases result in the FS being re-mounted RO.

In case the error handling would be too intrusive, leave the BUG_ON in
place, like extent_data_ref_count, other proper handling would catch
that earlier.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  7 +++++++
 fs/btrfs/extent-tree.c | 39 +++++++++++++++++++++++++++++++++++----
 fs/btrfs/print-tree.c  |  9 ++++++---
 fs/btrfs/relocation.c  | 27 ++++++++++++++++++++++++---
 4 files changed, 72 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9fa958ed065e..427ca5de8542 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3432,6 +3432,13 @@ static inline void assfail(char *expr, char *file, int line)
 #define ASSERT(expr)	((void)0)
 #endif
 
+__cold
+static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
+{
+	btrfs_err(fs_info,
+"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
+}
+
 __printf(5, 6)
 __cold
 void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6a73d2698807..71283faa85e2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -867,8 +867,16 @@ search_again:
 			num_refs = btrfs_extent_refs(leaf, ei);
 			extent_flags = btrfs_extent_flags(leaf, ei);
 		} else {
-			BUG();
+			ret = -EINVAL;
+			btrfs_print_v0_err(fs_info);
+			if (trans)
+				btrfs_abort_transaction(trans, ret);
+			else
+				btrfs_handle_fs_error(fs_info, ret, NULL);
+
+			goto out_free;
 		}
+
 		BUG_ON(num_refs == 0);
 	} else {
 		num_refs = 0;
@@ -1296,6 +1304,10 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
 				      struct btrfs_shared_data_ref);
 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
+	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+		btrfs_print_v0_err(trans->fs_info);
+		btrfs_abort_transaction(trans, -EINVAL);
+		return -EINVAL;
 	} else {
 		BUG();
 	}
@@ -1328,6 +1340,8 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
 
 	leaf = path->nodes[0];
 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+	BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
 	if (iref) {
 		/*
 		 * If type is invalid, we should have bailed out earlier than
@@ -1541,7 +1555,12 @@ again:
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-	BUG_ON(item_size < sizeof(*ei));
+	if (item_size < sizeof(*ei)) {
+		err = -EINVAL;
+		btrfs_print_v0_err(fs_info);
+		btrfs_abort_transaction(trans, err);
+		goto out;
+	}
 
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	flags = btrfs_extent_flags(leaf, ei);
@@ -2265,7 +2284,14 @@ again:
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-	BUG_ON(item_size < sizeof(*ei));
+
+	if (item_size < sizeof(*ei)) {
+		err = -EINVAL;
+		btrfs_print_v0_err(fs_info);
+		btrfs_abort_transaction(trans, err);
+		goto out;
+	}
+
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	__run_delayed_extent_op(extent_op, leaf, ei);
 
@@ -6796,7 +6822,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, extent_slot);
-	BUG_ON(item_size < sizeof(*ei));
+	if (item_size < sizeof(*ei)) {
+		ret = -EINVAL;
+		btrfs_print_v0_err(info);
+		btrfs_abort_transaction(trans, ret);
+		goto out;
+	}
 	ei = btrfs_item_ptr(leaf, extent_slot,
 			    struct btrfs_extent_item);
 	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index c0dac9d7ce33..cc945376c244 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -52,8 +52,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
 	u64 offset;
 	int ref_index = 0;
 
-	if (item_size < sizeof(*ei))
-		BUG();
+	if (item_size < sizeof(*ei)) {
+		btrfs_print_v0_err(eb->fs_info);
+		btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+	}
 
 	ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
 	flags = btrfs_extent_flags(eb, ei);
@@ -256,7 +258,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
 			       btrfs_file_extent_ram_bytes(l, fi));
 			break;
 		case BTRFS_EXTENT_REF_V0_KEY:
-			BUG();
+			btrfs_print_v0_err(fs_info);
+			btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
 			break;
 		case BTRFS_BLOCK_GROUP_ITEM_KEY:
 			bi = btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 6dfa92e5ceed..eaf59c742dc8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -598,6 +598,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
 	btrfs_item_key_to_cpu(leaf, &key, slot);
 
 	item_size = btrfs_item_size_nr(leaf, slot);
+	if (item_size < sizeof(*ei)) {
+		btrfs_print_v0_err(leaf->fs_info);
+		btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL);
+		return 1;
+	}
 	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
 	WARN_ON(!(btrfs_extent_flags(leaf, ei) &
 		  BTRFS_EXTENT_FLAG_TREE_BLOCK));
@@ -782,7 +787,6 @@ again:
 			goto next;
 		}
 
-		ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY);
 		if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
 			if (key.objectid == key.offset) {
 				/*
@@ -826,6 +830,12 @@ again:
 			edge->node[UPPER] = upper;
 
 			goto next;
+		} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+			err = -EINVAL;
+			btrfs_print_v0_err(rc->extent_root->fs_info);
+			btrfs_handle_fs_error(rc->extent_root->fs_info, err,
+					      NULL);
+			goto out;
 		} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
 			goto next;
 		}
@@ -3315,6 +3325,10 @@ static int add_tree_block(struct reloc_control *rc,
 			level = (int)extent_key->offset;
 		}
 		generation = btrfs_extent_generation(eb, ei);
+	} else if (item_size == sizeof(struct btrfs_extent_item_v0)) {
+		btrfs_print_v0_err(eb->fs_info);
+		btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+		return -EINVAL;
 	} else {
 		BUG();
 	}
@@ -3720,7 +3734,6 @@ int add_data_references(struct reloc_control *rc,
 		if (key.objectid != extent_key->objectid)
 			break;
 
-		BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
 		if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
 			ret = __add_tree_block(rc, key.offset, blocksize,
 					       blocks);
@@ -3729,6 +3742,10 @@ int add_data_references(struct reloc_control *rc,
 					      struct btrfs_extent_data_ref);
 			ret = find_data_references(rc, extent_key,
 						   eb, dref, blocks);
+		} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+			btrfs_print_v0_err(eb->fs_info);
+			btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+			ret = -EINVAL;
 		} else {
 			ret = 0;
 		}
@@ -3967,7 +3984,11 @@ restart:
 			flags = btrfs_extent_flags(path->nodes[0], ei);
 			ret = check_extent_flags(flags);
 			BUG_ON(ret);
-
+		} else if (item_size == sizeof(struct btrfs_extent_item_v0)) {
+			err = -EINVAL;
+			btrfs_print_v0_err(trans->fs_info);
+			btrfs_abort_transaction(trans, err);
+			break;
 		} else {
 			BUG();
 		}
-- 
cgit v1.2.3


From 6d8ff4e4584cd84093b751671351c8103ead551d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 26 Jun 2018 16:20:59 +0200
Subject: btrfs: annotate unlikely branches after V0 extent type removal

The v0 extent type checks are the right case for the unlikely
annotations as we don't expect to ever see them, so let's give the
compiler some hint.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 8 ++++----
 fs/btrfs/print-tree.c  | 2 +-
 fs/btrfs/relocation.c  | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71283faa85e2..352b7e95f657 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1304,7 +1304,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
 				      struct btrfs_shared_data_ref);
 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+	} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
 		btrfs_print_v0_err(trans->fs_info);
 		btrfs_abort_transaction(trans, -EINVAL);
 		return -EINVAL;
@@ -1555,7 +1555,7 @@ again:
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-	if (item_size < sizeof(*ei)) {
+	if (unlikely(item_size < sizeof(*ei))) {
 		err = -EINVAL;
 		btrfs_print_v0_err(fs_info);
 		btrfs_abort_transaction(trans, err);
@@ -2285,7 +2285,7 @@ again:
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
 
-	if (item_size < sizeof(*ei)) {
+	if (unlikely(item_size < sizeof(*ei))) {
 		err = -EINVAL;
 		btrfs_print_v0_err(fs_info);
 		btrfs_abort_transaction(trans, err);
@@ -6822,7 +6822,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, extent_slot);
-	if (item_size < sizeof(*ei)) {
+	if (unlikely(item_size < sizeof(*ei))) {
 		ret = -EINVAL;
 		btrfs_print_v0_err(info);
 		btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index cc945376c244..df49931ffe92 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -52,7 +52,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
 	u64 offset;
 	int ref_index = 0;
 
-	if (item_size < sizeof(*ei)) {
+	if (unlikely(item_size < sizeof(*ei))) {
 		btrfs_print_v0_err(eb->fs_info);
 		btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
 	}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index eaf59c742dc8..229f721cbde9 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -830,7 +830,7 @@ again:
 			edge->node[UPPER] = upper;
 
 			goto next;
-		} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+		} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
 			err = -EINVAL;
 			btrfs_print_v0_err(rc->extent_root->fs_info);
 			btrfs_handle_fs_error(rc->extent_root->fs_info, err,
@@ -3325,7 +3325,7 @@ static int add_tree_block(struct reloc_control *rc,
 			level = (int)extent_key->offset;
 		}
 		generation = btrfs_extent_generation(eb, ei);
-	} else if (item_size == sizeof(struct btrfs_extent_item_v0)) {
+	} else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
 		btrfs_print_v0_err(eb->fs_info);
 		btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
 		return -EINVAL;
@@ -3742,7 +3742,7 @@ int add_data_references(struct reloc_control *rc,
 					      struct btrfs_extent_data_ref);
 			ret = find_data_references(rc, extent_key,
 						   eb, dref, blocks);
-		} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
+		} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
 			btrfs_print_v0_err(eb->fs_info);
 			btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
 			ret = -EINVAL;
@@ -3984,7 +3984,7 @@ restart:
 			flags = btrfs_extent_flags(path->nodes[0], ei);
 			ret = check_extent_flags(flags);
 			BUG_ON(ret);
-		} else if (item_size == sizeof(struct btrfs_extent_item_v0)) {
+		} else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
 			err = -EINVAL;
 			btrfs_print_v0_err(trans->fs_info);
 			btrfs_abort_transaction(trans, err);
-- 
cgit v1.2.3


From fce466eab7ac6baa9d2dcd88abcf945be3d4a089 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 3 Jul 2018 17:10:05 +0800
Subject: btrfs: tree-checker: Verify block_group_item

A crafted image with invalid block group items could make free space cache
code to cause panic.

We could detect such invalid block group item by checking:
1) Item size
   Known fixed value.
2) Block group size (key.offset)
   We have an upper limit on block group item (10G)
3) Chunk objectid
   Known fixed value.
4) Type
   Only 4 valid type values, DATA, METADATA, SYSTEM and DATA|METADATA.
   No more than 1 bit set for profile type.
5) Used space
   No more than the block group size.

This should allow btrfs to detect and refuse to mount the crafted image.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199849
Reported-by: Xu Wen <wen.xu@gatech.edu>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Tested-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-checker.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.c      |   2 +-
 fs/btrfs/volumes.h      |   2 +
 3 files changed, 103 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 8d40e7dd8c30..3bade2ad39e8 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -19,6 +19,7 @@
 #include "tree-checker.h"
 #include "disk-io.h"
 #include "compression.h"
+#include "volumes.h"
 
 /*
  * Error message should follow the following format:
@@ -353,6 +354,102 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
+__printf(4, 5)
+__cold
+static void block_group_err(const struct btrfs_fs_info *fs_info,
+			    const struct extent_buffer *eb, int slot,
+			    const char *fmt, ...)
+{
+	struct btrfs_key key;
+	struct va_format vaf;
+	va_list args;
+
+	btrfs_item_key_to_cpu(eb, &key, slot);
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	btrfs_crit(fs_info,
+	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
+		btrfs_header_level(eb) == 0 ? "leaf" : "node",
+		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+		key.objectid, key.offset, &vaf);
+	va_end(args);
+}
+
+static int check_block_group_item(struct btrfs_fs_info *fs_info,
+				  struct extent_buffer *leaf,
+				  struct btrfs_key *key, int slot)
+{
+	struct btrfs_block_group_item bgi;
+	u32 item_size = btrfs_item_size_nr(leaf, slot);
+	u64 flags;
+	u64 type;
+
+	/*
+	 * Here we don't really care about alignment since extent allocator can
+	 * handle it.  We care more about the size, as if one block group is
+	 * larger than maximum size, it's must be some obvious corruption.
+	 */
+	if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+		block_group_err(fs_info, leaf, slot,
+			"invalid block group size, have %llu expect (0, %llu]",
+				key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+		return -EUCLEAN;
+	}
+
+	if (item_size != sizeof(bgi)) {
+		block_group_err(fs_info, leaf, slot,
+			"invalid item size, have %u expect %zu",
+				item_size, sizeof(bgi));
+		return -EUCLEAN;
+	}
+
+	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+			   sizeof(bgi));
+	if (btrfs_block_group_chunk_objectid(&bgi) !=
+	    BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
+		block_group_err(fs_info, leaf, slot,
+		"invalid block group chunk objectid, have %llu expect %llu",
+				btrfs_block_group_chunk_objectid(&bgi),
+				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+		return -EUCLEAN;
+	}
+
+	if (btrfs_block_group_used(&bgi) > key->offset) {
+		block_group_err(fs_info, leaf, slot,
+			"invalid block group used, have %llu expect [0, %llu)",
+				btrfs_block_group_used(&bgi), key->offset);
+		return -EUCLEAN;
+	}
+
+	flags = btrfs_block_group_flags(&bgi);
+	if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
+		block_group_err(fs_info, leaf, slot,
+"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
+			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
+			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
+		return -EUCLEAN;
+	}
+
+	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+	if (type != BTRFS_BLOCK_GROUP_DATA &&
+	    type != BTRFS_BLOCK_GROUP_METADATA &&
+	    type != BTRFS_BLOCK_GROUP_SYSTEM &&
+	    type != (BTRFS_BLOCK_GROUP_METADATA |
+			   BTRFS_BLOCK_GROUP_DATA)) {
+		block_group_err(fs_info, leaf, slot,
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
+			type, hweight64(type),
+			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
+			BTRFS_BLOCK_GROUP_SYSTEM,
+			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
+		return -EUCLEAN;
+	}
+	return 0;
+}
+
 /*
  * Common point to switch the item-specific validation.
  */
@@ -374,6 +471,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info,
 	case BTRFS_XATTR_ITEM_KEY:
 		ret = check_dir_item(fs_info, leaf, key, slot);
 		break;
+	case BTRFS_BLOCK_GROUP_ITEM_KEY:
+		ret = check_block_group_item(fs_info, leaf, key, slot);
+		break;
 	}
 	return ret;
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fe497937933f..256e57eec8ec 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4692,7 +4692,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
 	if (type & BTRFS_BLOCK_GROUP_DATA) {
 		max_stripe_size = SZ_1G;
-		max_chunk_size = 10 * max_stripe_size;
+		max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
 		if (!devs_max)
 			devs_max = BTRFS_MAX_DEVS(info);
 	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index df2d8bdf8c9a..275c31c730cf 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -11,6 +11,8 @@
 #include <linux/btrfs.h>
 #include "async-thread.h"
 
+#define BTRFS_MAX_DATA_CHUNK_SIZE	(10ULL * SZ_1G)
+
 extern struct mutex uuid_mutex;
 
 #define BTRFS_STRIPE_LEN	SZ_64K
-- 
cgit v1.2.3


From ba480dd4db9f1798541eb2d1c423fc95feee8d36 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 3 Jul 2018 17:10:06 +0800
Subject: btrfs: tree-checker: Detect invalid and empty essential trees

A crafted image has empty root tree block, which will later cause NULL
pointer dereference.

The following trees should never be empty:
1) Tree root
   Must contain at least root items for extent tree, device tree and fs
   tree

2) Chunk tree
   Or we can't even bootstrap as it contains the mapping.

3) Fs tree
   At least inode item for top level inode (.).

4) Device tree
   Dev extents for chunks

5) Extent tree
   Must have corresponding extent for each chunk.

If any of them is empty, we are sure the fs is corrupted and no need to
mount it.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
Reported-by: Xu Wen <wen.xu@gatech.edu>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Tested-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-checker.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 3bade2ad39e8..db835635372f 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -496,9 +496,22 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
 	 * skip this check for relocation trees.
 	 */
 	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+		u64 owner = btrfs_header_owner(leaf);
 		struct btrfs_root *check_root;
 
-		key.objectid = btrfs_header_owner(leaf);
+		/* These trees must never be empty */
+		if (owner == BTRFS_ROOT_TREE_OBJECTID ||
+		    owner == BTRFS_CHUNK_TREE_OBJECTID ||
+		    owner == BTRFS_EXTENT_TREE_OBJECTID ||
+		    owner == BTRFS_DEV_TREE_OBJECTID ||
+		    owner == BTRFS_FS_TREE_OBJECTID ||
+		    owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
+			generic_err(fs_info, leaf, 0,
+			"invalid root, root %llu must never be empty",
+				    owner);
+			return -EUCLEAN;
+		}
+		key.objectid = owner;
 		key.type = BTRFS_ROOT_ITEM_KEY;
 		key.offset = (u64)-1;
 
-- 
cgit v1.2.3


From 389305b2aa68723c754f88d9dbd268a400e10664 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 3 Jul 2018 17:10:07 +0800
Subject: btrfs: relocation: Only remove reloc rb_trees if reloc control has
 been initialized

Invalid reloc tree can cause kernel NULL pointer dereference when btrfs
does some cleanup of the reloc roots.

It turns out that fs_info::reloc_ctl can be NULL in
btrfs_recover_relocation() as we allocate relocation control after all
reloc roots have been verified.
So when we hit: note, we haven't called set_reloc_control() thus
fs_info::reloc_ctl is still NULL.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199833
Reported-by: Xu Wen <wen.xu@gatech.edu>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Tested-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/relocation.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 229f721cbde9..b98d7a594542 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1281,18 +1281,19 @@ static void __del_reloc_root(struct btrfs_root *root)
 	struct mapping_node *node = NULL;
 	struct reloc_control *rc = fs_info->reloc_ctl;
 
-	spin_lock(&rc->reloc_root_tree.lock);
-	rb_node = tree_search(&rc->reloc_root_tree.rb_root,
-			      root->node->start);
-	if (rb_node) {
-		node = rb_entry(rb_node, struct mapping_node, rb_node);
-		rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+	if (rc) {
+		spin_lock(&rc->reloc_root_tree.lock);
+		rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+				      root->node->start);
+		if (rb_node) {
+			node = rb_entry(rb_node, struct mapping_node, rb_node);
+			rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+		}
+		spin_unlock(&rc->reloc_root_tree.lock);
+		if (!node)
+			return;
+		BUG_ON((struct btrfs_root *)node->data != root);
 	}
-	spin_unlock(&rc->reloc_root_tree.lock);
-
-	if (!node)
-		return;
-	BUG_ON((struct btrfs_root *)node->data != root);
 
 	spin_lock(&fs_info->trans_lock);
 	list_del_init(&root->root_list);
-- 
cgit v1.2.3


From 5da54bc13801d378bf2e868beb623fc13ba40a83 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 3 Jul 2018 13:14:50 +0800
Subject: btrfs: use a temporary variable for fs_devices in
 btrfs_init_new_device

There are many instances of the %fs_info->fs_devices pointer
dereferences, use a temporary variable instead.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 256e57eec8ec..adce1c2e9b7b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2410,12 +2410,13 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	struct list_head *devices;
 	struct super_block *sb = fs_info->sb;
 	struct rcu_string *name;
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 	u64 tmp;
 	int seeding_dev = 0;
 	int ret = 0;
 	bool unlocked = false;
 
-	if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
+	if (sb_rdonly(sb) && !fs_devices->seeding)
 		return -EROFS;
 
 	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
@@ -2423,7 +2424,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 
-	if (fs_info->fs_devices->seeding) {
+	if (fs_devices->seeding) {
 		seeding_dev = 1;
 		down_write(&sb->s_umount);
 		mutex_lock(&uuid_mutex);
@@ -2431,18 +2432,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
 	filemap_write_and_wait(bdev->bd_inode->i_mapping);
 
-	devices = &fs_info->fs_devices->devices;
+	devices = &fs_devices->devices;
 
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	mutex_lock(&fs_devices->device_list_mutex);
 	list_for_each_entry(device, devices, dev_list) {
 		if (device->bdev == bdev) {
 			ret = -EEXIST;
 			mutex_unlock(
-				&fs_info->fs_devices->device_list_mutex);
+				&fs_devices->device_list_mutex);
 			goto error;
 		}
 	}
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	device = btrfs_alloc_device(fs_info, NULL, NULL);
 	if (IS_ERR(device)) {
@@ -2491,23 +2492,22 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		}
 	}
 
-	device->fs_devices = fs_info->fs_devices;
+	device->fs_devices = fs_devices;
 
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	mutex_lock(&fs_devices->device_list_mutex);
 	mutex_lock(&fs_info->chunk_mutex);
-	list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices);
-	list_add(&device->dev_alloc_list,
-		 &fs_info->fs_devices->alloc_list);
-	fs_info->fs_devices->num_devices++;
-	fs_info->fs_devices->open_devices++;
-	fs_info->fs_devices->rw_devices++;
-	fs_info->fs_devices->total_devices++;
-	fs_info->fs_devices->total_rw_bytes += device->total_bytes;
+	list_add_rcu(&device->dev_list, &fs_devices->devices);
+	list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
+	fs_devices->num_devices++;
+	fs_devices->open_devices++;
+	fs_devices->rw_devices++;
+	fs_devices->total_devices++;
+	fs_devices->total_rw_bytes += device->total_bytes;
 
 	atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
 
 	if (!blk_queue_nonrot(q))
-		fs_info->fs_devices->rotating = 1;
+		fs_devices->rotating = 1;
 
 	tmp = btrfs_super_total_bytes(fs_info->super_copy);
 	btrfs_set_super_total_bytes(fs_info->super_copy,
@@ -2517,7 +2517,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
 
 	/* add sysfs device entry */
-	btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
+	btrfs_sysfs_add_device_link(fs_devices, device);
 
 	/*
 	 * we've got more storage, clear any full flags on the space
@@ -2526,7 +2526,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	btrfs_clear_space_info_full(fs_info);
 
 	mutex_unlock(&fs_info->chunk_mutex);
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	if (seeding_dev) {
 		mutex_lock(&fs_info->chunk_mutex);
@@ -2558,7 +2558,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		 */
 		snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
 						fs_info->fsid);
-		if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf))
+		if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf))
 			btrfs_warn(fs_info,
 				   "sysfs: failed to create fsid for sprout");
 	}
@@ -2593,7 +2593,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	return ret;
 
 error_sysfs:
-	btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+	btrfs_sysfs_rm_device_link(fs_devices, device);
 error_trans:
 	if (seeding_dev)
 		sb->s_flags |= SB_RDONLY;
-- 
cgit v1.2.3


From 694c51fb2eb6b6d7c9552af78ba646023a843366 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 3 Jul 2018 13:14:51 +0800
Subject: btrfs: drop unnecessary variable in btrfs_init_new_device

There is only usage of the declared devices variable, instead use its
value directly.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index adce1c2e9b7b..1ffc49c9c02f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2407,7 +2407,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
 	struct block_device *bdev;
-	struct list_head *devices;
 	struct super_block *sb = fs_info->sb;
 	struct rcu_string *name;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2432,10 +2431,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
 	filemap_write_and_wait(bdev->bd_inode->i_mapping);
 
-	devices = &fs_devices->devices;
-
 	mutex_lock(&fs_devices->device_list_mutex);
-	list_for_each_entry(device, devices, dev_list) {
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
 		if (device->bdev == bdev) {
 			ret = -EEXIST;
 			mutex_unlock(
-- 
cgit v1.2.3


From 8f6c72a9e08c2a74d2159d2fc986fa9b29ceb851 Mon Sep 17 00:00:00 2001
From: Zhihui Zhang <zzhsuny@gmail.com>
Date: Mon, 2 Jul 2018 20:00:54 -0400
Subject: Btrfs: free space cache: make sure there is always room for
 generation number

io_ctl_set_generation() assumes that the generation number shares
the same page with inline CRCs. Let's make sure this is always true.

Signed-off-by: Zhihui Zhang <zzhsuny@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d5f80cb300be..d2c0bdbd79ec 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -300,9 +300,9 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
 	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
 		check_crcs = 1;
 
-	/* Make sure we can fit our crcs into the first page */
+	/* Make sure we can fit our crcs and generation into the first page */
 	if (write && check_crcs &&
-	    (num_pages * sizeof(u32)) >= PAGE_SIZE)
+	    (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
 		return -ENOSPC;
 
 	memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
-- 
cgit v1.2.3


From 37508515621551538addaf826ab4b8a9aaf0a382 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:40 +0200
Subject: btrfs: simplify some assignments of inode numbers

There are several places when the btrfs inode is converted to the
generic inode, back to btrfs and then passed to btrfs_ino. We can remove
the extra back and forth conversions.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2858bc355920..17816c455786 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4242,9 +4242,9 @@ again:
 		prev = node;
 		entry = rb_entry(node, struct btrfs_inode, rb_node);
 
-		if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+		if (objectid < btrfs_ino(entry))
 			node = node->rb_left;
-		else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+		else if (objectid > btrfs_ino(entry))
 			node = node->rb_right;
 		else
 			break;
@@ -4252,7 +4252,7 @@ again:
 	if (!node) {
 		while (prev) {
 			entry = rb_entry(prev, struct btrfs_inode, rb_node);
-			if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
+			if (objectid <= btrfs_ino(entry)) {
 				node = prev;
 				break;
 			}
@@ -4261,7 +4261,7 @@ again:
 	}
 	while (node) {
 		entry = rb_entry(node, struct btrfs_inode, rb_node);
-		objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
+		objectid = btrfs_ino(entry) + 1;
 		inode = igrab(&entry->vfs_inode);
 		if (inode) {
 			spin_unlock(&root->inode_lock);
@@ -5614,9 +5614,9 @@ static void inode_tree_add(struct inode *inode)
 		parent = *p;
 		entry = rb_entry(parent, struct btrfs_inode, rb_node);
 
-		if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+		if (ino < btrfs_ino(entry))
 			p = &parent->rb_left;
-		else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+		else if (ino > btrfs_ino(entry))
 			p = &parent->rb_right;
 		else {
 			WARN_ON(!(entry->vfs_inode.i_state &
-- 
cgit v1.2.3


From 3ffbd68c48320730ef64ebfb5e639220f1f65483 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:42 +0200
Subject: btrfs: simplify pointer chasing of local fs_info variables

Functions that get btrfs inode can simply reach the fs_info by
dereferencing the root and this looks a bit more straightforward
compared to the btrfs_sb(...) indirection.

If the transaction handle is available and not NULL it's used instead.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c    |  4 ++--
 fs/btrfs/disk-io.c          |  2 +-
 fs/btrfs/extent-tree.c      |  6 +++---
 fs/btrfs/file-item.c        |  2 +-
 fs/btrfs/file.c             | 14 +++++++-------
 fs/btrfs/free-space-cache.c |  7 ++-----
 fs/btrfs/inode.c            |  6 +++---
 fs/btrfs/tree-log.c         |  6 +++---
 8 files changed, 22 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fe6caa7e698b..596d2af0c8aa 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1222,7 +1222,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 
 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
 	struct btrfs_path *path;
@@ -1837,7 +1837,7 @@ release_node:
 
 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_delayed_node *delayed_node;
 
 	/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c77afa9e5d91..e0baf2f3154d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -212,7 +212,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 		struct page *page, size_t pg_offset, u64 start, u64 len,
 		int create)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct extent_map_tree *em_tree = &inode->extent_tree;
 	struct extent_map *em;
 	int ret;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 352b7e95f657..6bba288133b8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5889,7 +5889,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	unsigned nr_extents;
 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
 	int ret = 0;
@@ -5962,7 +5962,7 @@ out_fail:
 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
 				     bool qgroup_free)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 	spin_lock(&inode->lock);
@@ -5991,7 +5991,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
 void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
 				    bool qgroup_free)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	unsigned num_extents;
 
 	spin_lock(&inode->lock);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 8c3cd7072caf..ba74827beb32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -922,7 +922,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 				     const bool new_inline,
 				     struct extent_map *em)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf = path->nodes[0];
 	const int slot = path->slots[0];
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 89c9404fee9a..da53e45705ba 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -83,7 +83,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
 static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
 				    struct inode_defrag *defrag)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct inode_defrag *entry;
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
@@ -135,8 +135,8 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 			   struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	struct btrfs_root *root = inode->root;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct inode_defrag *defrag;
 	u64 transid;
 	int ret;
@@ -185,7 +185,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
 				       struct inode_defrag *defrag)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	int ret;
 
 	if (!__need_auto_defrag(fs_info))
@@ -1132,7 +1132,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 			      struct btrfs_inode *inode, u64 start, u64 end)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
 	struct btrfs_path *path;
@@ -1469,7 +1469,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 				u64 *lockstart, u64 *lockend,
 				struct extent_state **cached_state)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	u64 start_pos;
 	u64 last_pos;
 	int i;
@@ -1525,7 +1525,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 				    size_t *write_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_root *root = inode->root;
 	struct btrfs_ordered_extent *ordered;
 	u64 lockstart, lockend;
@@ -2227,7 +2227,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
 		struct btrfs_inode *inode,
 		struct btrfs_path *path, u64 offset, u64 end)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
 	struct btrfs_file_extent_item *fi;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d2c0bdbd79ec..354d55f22d99 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -655,7 +655,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 				   struct btrfs_free_space_ctl *ctl,
 				   struct btrfs_path *path, u64 offset)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_free_space_header *header;
 	struct extent_buffer *leaf;
 	struct btrfs_io_ctl io_ctl;
@@ -1123,13 +1123,10 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
 {
 	int ret;
 	struct inode *inode = io_ctl->inode;
-	struct btrfs_fs_info *fs_info;
 
 	if (!inode)
 		return 0;
 
-	fs_info = btrfs_sb(inode->i_sb);
-
 	/* Flush the dirty pages in the cache file. */
 	ret = flush_dirty_cache(inode);
 	if (ret)
@@ -1145,7 +1142,7 @@ out:
 		BTRFS_I(inode)->generation = 0;
 		if (block_group) {
 #ifdef DEBUG
-			btrfs_err(fs_info,
+			btrfs_err(root->fs_info,
 				  "failed to write free space cache for block group %llu",
 				  block_group->key.objectid);
 #endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17816c455786..32613dca13c1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1751,7 +1751,7 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
 void __btrfs_del_delalloc_inode(struct btrfs_root *root,
 				struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = root->fs_info;
 
 	if (!list_empty(&inode->delalloc_inodes)) {
 		list_del_init(&inode->delalloc_inodes);
@@ -6418,7 +6418,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
 		   const char *name, int name_len, int add_backref, u64 index)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret = 0;
 	struct btrfs_key key;
 	struct btrfs_root *root = parent_inode->root;
@@ -6846,7 +6846,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 	    size_t pg_offset, u64 start, u64 len,
 		int create)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	int ret;
 	int err = 0;
 	u64 extent_start = 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6bca8f88ade0..7b7498f1f641 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3739,7 +3739,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 			       int start_slot, int nr, int inode_only,
 			       u64 logged_isize)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	unsigned long src_offset;
 	unsigned long dst_offset;
 	struct btrfs_root *log = inode->root->log_root;
@@ -5436,7 +5436,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
 				 struct btrfs_inode *inode,
 				 struct btrfs_log_ctx *ctx)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret;
 	struct btrfs_path *path;
 	struct btrfs_key key;
@@ -5971,7 +5971,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
 			struct dentry *parent)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 
 	/*
 	 * this will force the logging code to walk the dentry chain
-- 
cgit v1.2.3


From 69d2480456d1baf027a86e530989d7bedd698d5f Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:44 +0200
Subject: btrfs: use copy_page for copying pages instead of memcpy

Use the helper that's possibly optimized for full page copies.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c |  4 ++--
 fs/btrfs/raid56.c           | 12 +++++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 354d55f22d99..57e7ab7f5e03 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -547,7 +547,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
 		io_ctl_map_page(io_ctl, 0);
 	}
 
-	memcpy(io_ctl->cur, bitmap, PAGE_SIZE);
+	copy_page(io_ctl->cur, bitmap);
 	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 	if (io_ctl->index < io_ctl->num_pages)
 		io_ctl_map_page(io_ctl, 0);
@@ -607,7 +607,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
 	if (ret)
 		return ret;
 
-	memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE);
+	copy_page(entry->bitmap, io_ctl->cur);
 	io_ctl_unmap_page(io_ctl);
 
 	return 0;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5e4ad134b9ad..27ed47a23f26 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -260,7 +260,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
 		s = kmap(rbio->bio_pages[i]);
 		d = kmap(rbio->stripe_pages[i]);
 
-		memcpy(d, s, PAGE_SIZE);
+		copy_page(d, s);
 
 		kunmap(rbio->bio_pages[i]);
 		kunmap(rbio->stripe_pages[i]);
@@ -1275,7 +1275,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 						pointers);
 		} else {
 			/* raid5 */
-			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+			copy_page(pointers[nr_data], pointers[0]);
 			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
 		}
 
@@ -1941,9 +1941,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
 			BUG_ON(failb != -1);
 pstripe:
 			/* Copy parity block into failed block to start with */
-			memcpy(pointers[faila],
-			       pointers[rbio->nr_data],
-			       PAGE_SIZE);
+			copy_page(pointers[faila], pointers[rbio->nr_data]);
 
 			/* rearrange the pointer array */
 			p = pointers[faila];
@@ -2448,7 +2446,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 						pointers);
 		} else {
 			/* raid5 */
-			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+			copy_page(pointers[nr_data], pointers[0]);
 			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
 		}
 
@@ -2456,7 +2454,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 		p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
 		parity = kmap(p);
 		if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
-			memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
+			copy_page(parity, pointers[rbio->scrubp]);
 		else
 			/* Parity is right, needn't writeback */
 			bitmap_clear(rbio->dbitmap, pagenr, 1);
-- 
cgit v1.2.3


From d7f663fa3ff906247a979c1115bc92cbabfb19ba Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:47 +0200
Subject: btrfs: prune unused includes

Remove includes if none of the interfaces and exports is used in the
given source file.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c  |  4 ----
 fs/btrfs/dev-replace.c  |  5 -----
 fs/btrfs/disk-io.c      |  2 --
 fs/btrfs/file.c         |  3 ---
 fs/btrfs/inode-map.c    |  1 -
 fs/btrfs/inode.c        |  4 ----
 fs/btrfs/ioctl.c        |  5 -----
 fs/btrfs/ordered-data.c |  1 -
 fs/btrfs/raid56.c       | 13 -------------
 fs/btrfs/reada.c        |  1 -
 fs/btrfs/struct-funcs.c |  1 -
 fs/btrfs/super.c        |  3 ---
 fs/btrfs/sysfs.c        |  2 --
 fs/btrfs/volumes.c      |  3 ---
 14 files changed, 48 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d3e447b45bf7..f48794a36068 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -5,7 +5,6 @@
 
 #include <linux/kernel.h>
 #include <linux/bio.h>
-#include <linux/buffer_head.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
@@ -14,10 +13,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
 #include <linux/writeback.h>
-#include <linux/bit_spinlock.h>
 #include <linux/slab.h>
 #include <linux/sched/mm.h>
 #include <linux/log2.h>
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1b30c38d05c9..059ca3d5ddd3 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -6,14 +6,9 @@
 #include <linux/sched.h>
 #include <linux/bio.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
 #include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
 #include <linux/kthread.h>
 #include <linux/math64.h>
-#include <asm/div64.h>
 #include "ctree.h"
 #include "extent_map.h"
 #include "disk-io.h"
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e0baf2f3154d..6023eed3e805 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -5,8 +5,6 @@
 
 #include <linux/fs.h>
 #include <linux/blkdev.h>
-#include <linux/scatterlist.h>
-#include <linux/swap.h>
 #include <linux/radix-tree.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index da53e45705ba..2be00e873e92 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -5,14 +5,11 @@
 
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/highmem.h>
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/backing-dev.h>
-#include <linux/mpage.h>
 #include <linux/falloc.h>
-#include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/compat.h>
 #include <linux/slab.h>
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index a56f88ea9c73..ffca2abf13d0 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -3,7 +3,6 @@
  * Copyright (C) 2007 Oracle.  All rights reserved.
  */
 
-#include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/pagemap.h>
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 32613dca13c1..ada8a698af10 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -14,17 +14,13 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/compat.h>
-#include <linux/bit_spinlock.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/falloc.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
-#include <linux/mount.h>
 #include <linux/btrfs.h>
 #include <linux/blkdev.h>
 #include <linux/posix_acl_xattr.h>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 08b8c0b346b3..802918507cc0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5,23 +5,18 @@
 
 #include <linux/kernel.h>
 #include <linux/bio.h>
-#include <linux/buffer_head.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/fsnotify.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/time.h>
-#include <linux/init.h>
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
-#include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/compat.h>
-#include <linux/bit_spinlock.h>
 #include <linux/security.h>
 #include <linux/xattr.h>
 #include <linux/mm.h>
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e173b252d795..0c4ef208b8b9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,7 +6,6 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
-#include <linux/pagevec.h>
 #include "ctree.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 27ed47a23f26..42631079c492 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -5,32 +5,19 @@
  */
 
 #include <linux/sched.h>
-#include <linux/wait.h>
 #include <linux/bio.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
 #include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
-#include <linux/ratelimit.h>
-#include <linux/kthread.h>
 #include <linux/raid/pq.h>
 #include <linux/hash.h>
 #include <linux/list_sort.h>
 #include <linux/raid/xor.h>
 #include <linux/mm.h>
-#include <asm/div64.h>
 #include "ctree.h"
-#include "extent_map.h"
 #include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
 #include "volumes.h"
 #include "raid56.h"
 #include "async-thread.h"
-#include "check-integrity.h"
-#include "rcu-string.h"
 
 /* set when additional merges to this rbio are not allowed */
 #define RBIO_RMW_LOCKED_BIT	1
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 4be425f70c2d..dec14b739b10 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -7,7 +7,6 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include "ctree.h"
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b7b4acb12833..4c13b737f568 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -3,7 +3,6 @@
  * Copyright (C) 2007 Oracle.  All rights reserved.
  */
 
-#include <linux/highmem.h>
 #include <asm/unaligned.h>
 
 #include "ctree.h"
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3c5f221b5303..e04bcf0b0ed4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -5,7 +5,6 @@
 
 #include <linux/blkdev.h>
 #include <linux/module.h>
-#include <linux/buffer_head.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
@@ -15,8 +14,6 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4a4e960c7c66..3717c864ba23 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -7,10 +7,8 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
-#include <linux/buffer_head.h>
 #include <linux/kobject.h>
 #include <linux/bug.h>
-#include <linux/genhd.h>
 #include <linux/debugfs.h>
 
 #include "ctree.h"
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1ffc49c9c02f..211fb2161487 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8,15 +8,12 @@
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
 #include <linux/raid/pq.h>
 #include <linux/semaphore.h>
 #include <linux/uuid.h>
 #include <linux/list_sort.h>
-#include <asm/div64.h>
 #include "ctree.h"
 #include "extent_map.h"
 #include "disk-io.h"
-- 
cgit v1.2.3


From 65ad010488a5cc0f123a9924f7ad26a1b3f6a4f6 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:49 +0200
Subject: btrfs: pass only eb to num_extent_pages

Almost all callers pass the start and len as 2 arguments but this is not
necessary, all the information is provided by the eb. By reordering the
calls to num_extent_pages, we don't need the local variables with
start/len.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 30 +++++++++++++++---------------
 fs/btrfs/extent_io.h |  6 +++---
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b3e45714d28f..fd34c749209e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2059,7 +2059,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *eb, int mirror_num)
 {
 	u64 start = eb->start;
-	unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
+	unsigned long i, num_pages = num_extent_pages(eb);
 	int ret = 0;
 
 	if (sb_rdonly(fs_info->sb))
@@ -3588,7 +3588,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
 	if (!ret)
 		return ret;
 
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = eb->pages[i];
 
@@ -3718,7 +3718,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 	int ret = 0;
 
 	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 	atomic_set(&eb->io_pages, num_pages);
 
 	/* set btree blocks beyond nritems with 0 to avoid stale content. */
@@ -4653,7 +4653,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 
 	BUG_ON(extent_buffer_under_io(eb));
 
-	index = num_extent_pages(eb->start, eb->len);
+	index = num_extent_pages(eb);
 	if (index == 0)
 		return;
 
@@ -4746,7 +4746,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
 	unsigned long i;
 	struct page *p;
 	struct extent_buffer *new;
-	unsigned long num_pages = num_extent_pages(src->start, src->len);
+	unsigned long num_pages = num_extent_pages(src);
 
 	new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
 	if (new == NULL)
@@ -4778,12 +4778,11 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
 	unsigned long num_pages;
 	unsigned long i;
 
-	num_pages = num_extent_pages(start, len);
-
 	eb = __alloc_extent_buffer(fs_info, start, len);
 	if (!eb)
 		return NULL;
 
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		eb->pages[i] = alloc_page(GFP_NOFS);
 		if (!eb->pages[i])
@@ -4847,7 +4846,7 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb,
 
 	check_buffer_tree_ref(eb);
 
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = eb->pages[i];
 
@@ -4944,7 +4943,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 					  u64 start)
 {
 	unsigned long len = fs_info->nodesize;
-	unsigned long num_pages = num_extent_pages(start, len);
+	unsigned long num_pages;
 	unsigned long i;
 	unsigned long index = start >> PAGE_SHIFT;
 	struct extent_buffer *eb;
@@ -4967,6 +4966,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	if (!eb)
 		return ERR_PTR(-ENOMEM);
 
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++, index++) {
 		p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
 		if (!p) {
@@ -5163,7 +5163,7 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
 	unsigned long num_pages;
 	struct page *page;
 
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
@@ -5197,7 +5197,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
 
 	was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
 
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 	WARN_ON(atomic_read(&eb->refs) == 0);
 	WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
 
@@ -5213,7 +5213,7 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
 	unsigned long num_pages;
 
 	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
 		if (page)
@@ -5228,7 +5228,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 	unsigned long num_pages;
 
 	set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
 		SetPageUptodate(page);
@@ -5252,7 +5252,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
 		return 0;
 
-	num_pages = num_extent_pages(eb->start, eb->len);
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
 		if (wait == WAIT_NONE) {
@@ -5580,7 +5580,7 @@ void copy_extent_buffer_full(struct extent_buffer *dst,
 
 	ASSERT(dst->len == src->len);
 
-	num_pages = num_extent_pages(dst->start, dst->len);
+	num_pages = num_extent_pages(dst);
 	for (i = 0; i < num_pages; i++)
 		copy_page(page_address(dst->pages[i]),
 				page_address(src->pages[i]));
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0bfd4aeb822d..d8382a4a7f46 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -440,10 +440,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 			     int mirror_num);
 void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
 
-static inline unsigned long num_extent_pages(u64 start, u64 len)
+static inline unsigned long num_extent_pages(const struct extent_buffer *eb)
 {
-	return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
-		(start >> PAGE_SHIFT);
+	return ((eb->start + eb->len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+		(eb->start >> PAGE_SHIFT);
 }
 
 static inline void extent_buffer_get(struct extent_buffer *eb)
-- 
cgit v1.2.3


From 8791d43207966f40d8ffba18c663514aca5a6f3e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 4 Jul 2018 17:49:31 +0200
Subject: btrfs: use round_up wrapper in num_extent_pages

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index d8382a4a7f46..7a3c15b55f2b 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -442,8 +442,8 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
 
 static inline unsigned long num_extent_pages(const struct extent_buffer *eb)
 {
-	return ((eb->start + eb->len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
-		(eb->start >> PAGE_SHIFT);
+	return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
+	       (eb->start >> PAGE_SHIFT);
 }
 
 static inline void extent_buffer_get(struct extent_buffer *eb)
-- 
cgit v1.2.3


From cc5e31a4775d0d6b98139fdee51868a270bda42f Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 1 Mar 2018 18:20:27 +0100
Subject: btrfs: switch types to int when counting eb pages

The loops iterating eb pages use unsigned long, that's an overkill as
we know that there are at most 16 pages (64k / 4k), and 4 by default
(with nodesize 16k).

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 44 ++++++++++++++++++++++----------------------
 fs/btrfs/extent_io.h |  2 +-
 2 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd34c749209e..4fc37e284a79 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2059,7 +2059,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *eb, int mirror_num)
 {
 	u64 start = eb->start;
-	unsigned long i, num_pages = num_extent_pages(eb);
+	int i, num_pages = num_extent_pages(eb);
 	int ret = 0;
 
 	if (sb_rdonly(fs_info->sb))
@@ -3538,7 +3538,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
 			  struct btrfs_fs_info *fs_info,
 			  struct extent_page_data *epd)
 {
-	unsigned long i, num_pages;
+	int i, num_pages;
 	int flush = 0;
 	int ret = 0;
 
@@ -3712,7 +3712,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 	struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
 	u64 offset = eb->start;
 	u32 nritems;
-	unsigned long i, num_pages;
+	int i, num_pages;
 	unsigned long start, end;
 	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
 	int ret = 0;
@@ -4647,7 +4647,7 @@ int extent_buffer_under_io(struct extent_buffer *eb)
  */
 static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 {
-	unsigned long index;
+	int index;
 	struct page *page;
 	int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
 
@@ -4743,10 +4743,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
 
 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
 {
-	unsigned long i;
+	int i;
 	struct page *p;
 	struct extent_buffer *new;
-	unsigned long num_pages = num_extent_pages(src);
+	int num_pages = num_extent_pages(src);
 
 	new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
 	if (new == NULL)
@@ -4775,8 +4775,8 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
 						  u64 start, unsigned long len)
 {
 	struct extent_buffer *eb;
-	unsigned long num_pages;
-	unsigned long i;
+	int num_pages;
+	int i;
 
 	eb = __alloc_extent_buffer(fs_info, start, len);
 	if (!eb)
@@ -4842,7 +4842,7 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
 static void mark_extent_buffer_accessed(struct extent_buffer *eb,
 		struct page *accessed)
 {
-	unsigned long num_pages, i;
+	int num_pages, i;
 
 	check_buffer_tree_ref(eb);
 
@@ -4943,8 +4943,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 					  u64 start)
 {
 	unsigned long len = fs_info->nodesize;
-	unsigned long num_pages;
-	unsigned long i;
+	int num_pages;
+	int i;
 	unsigned long index = start >> PAGE_SHIFT;
 	struct extent_buffer *eb;
 	struct extent_buffer *exists = NULL;
@@ -5159,8 +5159,8 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
 
 void clear_extent_buffer_dirty(struct extent_buffer *eb)
 {
-	unsigned long i;
-	unsigned long num_pages;
+	int i;
+	int num_pages;
 	struct page *page;
 
 	num_pages = num_extent_pages(eb);
@@ -5189,8 +5189,8 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
 
 int set_extent_buffer_dirty(struct extent_buffer *eb)
 {
-	unsigned long i;
-	unsigned long num_pages;
+	int i;
+	int num_pages;
 	int was_dirty = 0;
 
 	check_buffer_tree_ref(eb);
@@ -5208,9 +5208,9 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
 
 void clear_extent_buffer_uptodate(struct extent_buffer *eb)
 {
-	unsigned long i;
+	int i;
 	struct page *page;
-	unsigned long num_pages;
+	int num_pages;
 
 	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 	num_pages = num_extent_pages(eb);
@@ -5223,9 +5223,9 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
 
 void set_extent_buffer_uptodate(struct extent_buffer *eb)
 {
-	unsigned long i;
+	int i;
 	struct page *page;
-	unsigned long num_pages;
+	int num_pages;
 
 	set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 	num_pages = num_extent_pages(eb);
@@ -5238,13 +5238,13 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 int read_extent_buffer_pages(struct extent_io_tree *tree,
 			     struct extent_buffer *eb, int wait, int mirror_num)
 {
-	unsigned long i;
+	int i;
 	struct page *page;
 	int err;
 	int ret = 0;
 	int locked_pages = 0;
 	int all_uptodate = 1;
-	unsigned long num_pages;
+	int num_pages;
 	unsigned long num_reads = 0;
 	struct bio *bio = NULL;
 	unsigned long bio_flags = 0;
@@ -5576,7 +5576,7 @@ void copy_extent_buffer_full(struct extent_buffer *dst,
 			     struct extent_buffer *src)
 {
 	int i;
-	unsigned num_pages;
+	int num_pages;
 
 	ASSERT(dst->len == src->len);
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7a3c15b55f2b..48f1ee9ad379 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -440,7 +440,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 			     int mirror_num);
 void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
 
-static inline unsigned long num_extent_pages(const struct extent_buffer *eb)
+static inline int num_extent_pages(const struct extent_buffer *eb)
 {
 	return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
 	       (eb->start >> PAGE_SHIFT);
-- 
cgit v1.2.3


From ebcc326316f3d798e9715e5ca1451c3e457b95dd Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:53 +0200
Subject: btrfs: open-code bio_set_op_attrs

The helper is trivial and marked as deprecated.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/check-integrity.c |  2 +-
 fs/btrfs/compression.c     |  4 ++--
 fs/btrfs/extent_io.c       |  2 +-
 fs/btrfs/inode.c           |  2 +-
 fs/btrfs/raid56.c          | 10 +++++-----
 fs/btrfs/scrub.c           |  6 +++---
 6 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index daf45472bef9..833cf3c35b4d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1629,7 +1629,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
 		bio = btrfs_io_bio_alloc(num_pages - i);
 		bio_set_dev(bio, block_ctx->dev->bdev);
 		bio->bi_iter.bi_sector = dev_bytenr >> 9;
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
+		bio->bi_opf = REQ_OP_READ;
 
 		for (j = i; j < num_pages; j++) {
 			ret = bio_add_page(bio, block_ctx->pagev[j],
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f48794a36068..70dace47258b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -609,7 +609,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	cb->len = bio->bi_iter.bi_size;
 
 	comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
-	bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
+	comp_bio->bi_opf = REQ_OP_READ;
 	comp_bio->bi_private = cb;
 	comp_bio->bi_end_io = end_compressed_bio_read;
 	refcount_set(&cb->pending_bios, 1);
@@ -656,7 +656,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			}
 
 			comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
-			bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
+			comp_bio->bi_opf = REQ_OP_READ;
 			comp_bio->bi_private = cb;
 			comp_bio->bi_end_io = end_compressed_bio_read;
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4fc37e284a79..fea015be4ce1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2398,7 +2398,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 				      start - page_offset(page),
 				      (int)phy_offset, failed_bio->bi_end_io,
 				      NULL);
-	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
+	bio->bi_opf = REQ_OP_READ | read_mode;
 
 	btrfs_debug(btrfs_sb(inode->i_sb),
 		"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ada8a698af10..564ec00c765b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7861,7 +7861,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
 	isector >>= inode->i_sb->s_blocksize_bits;
 	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
 				pgoff, isector, repair_endio, repair_arg);
-	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
+	bio->bi_opf = REQ_OP_READ | read_mode;
 
 	btrfs_debug(BTRFS_I(inode)->root->fs_info,
 		    "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 42631079c492..1a1b7d6c44cb 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1330,7 +1330,7 @@ write_data:
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_write_end_io;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+		bio->bi_opf = REQ_OP_WRITE;
 
 		submit_bio(bio);
 	}
@@ -1586,7 +1586,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_rmw_end_io;
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
+		bio->bi_opf = REQ_OP_READ;
 
 		btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
 
@@ -2130,7 +2130,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_recover_end_io;
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
+		bio->bi_opf = REQ_OP_READ;
 
 		btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
 
@@ -2502,7 +2502,7 @@ submit_write:
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_write_end_io;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+		bio->bi_opf = REQ_OP_WRITE;
 
 		submit_bio(bio);
 	}
@@ -2684,7 +2684,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid56_parity_scrub_end_io;
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
+		bio->bi_opf = REQ_OP_READ;
 
 		btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
 
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 67bb003323e7..bf4f262f9a52 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1842,7 +1842,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 		bio = btrfs_io_bio_alloc(1);
 		bio_set_dev(bio, page_bad->dev->bdev);
 		bio->bi_iter.bi_sector = page_bad->physical >> 9;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+		bio->bi_opf = REQ_OP_WRITE;
 
 		ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
 		if (PAGE_SIZE != ret) {
@@ -1937,7 +1937,7 @@ again:
 		bio->bi_end_io = scrub_wr_bio_end_io;
 		bio_set_dev(bio, sbio->dev->bdev);
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
-		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+		bio->bi_opf = REQ_OP_WRITE;
 		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical_for_dev_replace ||
@@ -2337,7 +2337,7 @@ again:
 		bio->bi_end_io = scrub_bio_end_io;
 		bio_set_dev(bio, sbio->dev->bdev);
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
-		bio_set_op_attrs(bio, REQ_OP_READ, 0);
+		bio->bi_opf = REQ_OP_READ;
 		sbio->status = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
-- 
cgit v1.2.3


From ac63885907ee25bf277fa39ba66a0d85dec20786 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:56 +0200
Subject: btrfs: raid56: add new helper for starting async work

Add helper that schedules a given function to run on the rmw workqueue.
This will replace several standalone helpers.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1a1b7d6c44cb..f30d847baf07 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -174,6 +174,12 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 					 int need_check);
 static void async_scrub_parity(struct btrfs_raid_bio *rbio);
 
+static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
+{
+	btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
+	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
+}
+
 /*
  * the stripe hash table is used for locking, and to collect
  * bios in hopes of making a full stripe
-- 
cgit v1.2.3


From cf6a4a7587dc997b29359e29cf14079e1931715b Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:56:58 +0200
Subject: btrfs: raid56: use new helper for async_rmw_stripe

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f30d847baf07..96a7d3445623 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -162,7 +162,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
 static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
 static void rmw_work(struct btrfs_work *work);
 static void read_rebuild_work(struct btrfs_work *work);
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
 static void async_read_rebuild(struct btrfs_raid_bio *rbio);
 static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
@@ -811,7 +810,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 				async_read_rebuild(next);
 			} else if (next->operation == BTRFS_RBIO_WRITE) {
 				steal_rbio(rbio, next);
-				async_rmw_stripe(next);
+				start_async_work(next, rmw_work);
 			} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
 				steal_rbio(rbio, next);
 				async_scrub_parity(next);
@@ -1501,12 +1500,6 @@ cleanup:
 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
-{
-	btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL);
-	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
 static void async_read_rebuild(struct btrfs_raid_bio *rbio)
 {
 	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
@@ -1645,7 +1638,7 @@ static int partial_stripe_write(struct btrfs_raid_bio *rbio)
 
 	ret = lock_stripe_add(rbio);
 	if (ret == 0)
-		async_rmw_stripe(rbio);
+		start_async_work(rbio, rmw_work);
 	return 0;
 }
 
-- 
cgit v1.2.3


From e66d8d5a41c1e3c560fc7aba4edcb53caf4f26c8 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:57:00 +0200
Subject: btrfs: raid56: use new helper for async_read_rebuild

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 96a7d3445623..f9b349171d61 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -162,7 +162,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
 static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
 static void rmw_work(struct btrfs_work *work);
 static void read_rebuild_work(struct btrfs_work *work);
-static void async_read_rebuild(struct btrfs_raid_bio *rbio);
 static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
 static void __free_raid_bio(struct btrfs_raid_bio *rbio);
@@ -804,10 +803,10 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 			spin_unlock_irqrestore(&h->lock, flags);
 
 			if (next->operation == BTRFS_RBIO_READ_REBUILD)
-				async_read_rebuild(next);
+				start_async_work(next, read_rebuild_work);
 			else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
 				steal_rbio(rbio, next);
-				async_read_rebuild(next);
+				start_async_work(next, read_rebuild_work);
 			} else if (next->operation == BTRFS_RBIO_WRITE) {
 				steal_rbio(rbio, next);
 				start_async_work(next, rmw_work);
@@ -1500,14 +1499,6 @@ cleanup:
 	rbio_orig_end_io(rbio, BLK_STS_IOERR);
 }
 
-static void async_read_rebuild(struct btrfs_raid_bio *rbio)
-{
-	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
-			read_rebuild_work, NULL, NULL);
-
-	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
 /*
  * the stripe must be locked by the caller.  It will
  * unlock after all the writes are done
@@ -2765,5 +2756,5 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
 {
 	if (!lock_stripe_add(rbio))
-		async_read_rebuild(rbio);
+		start_async_work(rbio, read_rebuild_work);
 }
-- 
cgit v1.2.3


From a81b747d0f6efc01506fbd5335e91607dc013b8d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:57:03 +0200
Subject: btrfs: raid56: use new helper for async_scrub_parity

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f9b349171d61..339cce0878d1 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -170,7 +170,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
 
 static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 					 int need_check);
-static void async_scrub_parity(struct btrfs_raid_bio *rbio);
+static void scrub_parity_work(struct btrfs_work *work);
 
 static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
 {
@@ -812,7 +812,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 				start_async_work(next, rmw_work);
 			} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
 				steal_rbio(rbio, next);
-				async_scrub_parity(next);
+				start_async_work(next, scrub_parity_work);
 			}
 
 			goto done_nolock;
@@ -2703,18 +2703,10 @@ static void scrub_parity_work(struct btrfs_work *work)
 	raid56_parity_scrub_stripe(rbio);
 }
 
-static void async_scrub_parity(struct btrfs_raid_bio *rbio)
-{
-	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
-			scrub_parity_work, NULL, NULL);
-
-	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
 void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
 {
 	if (!lock_stripe_add(rbio))
-		async_scrub_parity(rbio);
+		start_async_work(rbio, scrub_parity_work);
 }
 
 /* The following code is used for dev replace of a missing RAID 5/6 device. */
-- 
cgit v1.2.3


From 176571a1f64f7670c87c3f8d3b50c11cc836110e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:57:05 +0200
Subject: btrfs: raid56: merge rbio_is_full helpers

There's only one call site of the unlocked helper so it can be folded
into the caller.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 339cce0878d1..272acd9b1192 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -507,32 +507,21 @@ static void run_xor(void **pages, int src_cnt, ssize_t len)
 }
 
 /*
- * returns true if the bio list inside this rbio
- * covers an entire stripe (no rmw required).
- * Must be called with the bio list lock held, or
- * at a time when you know it is impossible to add
- * new bios into the list
+ * Returns true if the bio list inside this rbio covers an entire stripe (no
+ * rmw required).
  */
-static int __rbio_is_full(struct btrfs_raid_bio *rbio)
+static int rbio_is_full(struct btrfs_raid_bio *rbio)
 {
+	unsigned long flags;
 	unsigned long size = rbio->bio_list_bytes;
 	int ret = 1;
 
+	spin_lock_irqsave(&rbio->bio_list_lock, flags);
 	if (size != rbio->nr_data * rbio->stripe_len)
 		ret = 0;
-
 	BUG_ON(size > rbio->nr_data * rbio->stripe_len);
-	return ret;
-}
-
-static int rbio_is_full(struct btrfs_raid_bio *rbio)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&rbio->bio_list_lock, flags);
-	ret = __rbio_is_full(rbio);
 	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From c7b562c5480322ffaf591f45a4ff7ee089340ab4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 29 Jun 2018 10:57:10 +0200
Subject: btrfs: raid56: catch errors from full_stripe_write

Add fall-back code to catch failure of full_stripe_write. Proper error
handling from inside run_plug would need more code restructuring as it's
called at arbitrary points by io scheduler.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 272acd9b1192..df41d7049936 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1686,8 +1686,11 @@ static void run_plug(struct btrfs_plug_cb *plug)
 		list_del_init(&cur->plug_list);
 
 		if (rbio_is_full(cur)) {
+			int ret;
+
 			/* we have a full stripe, send it down */
-			full_stripe_write(cur);
+			ret = full_stripe_write(cur);
+			BUG_ON(ret);
 			continue;
 		}
 		if (last) {
-- 
cgit v1.2.3


From 340f1aa27f367e0c3d2662d44f356b05087fea05 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Thu, 5 Jul 2018 14:50:48 +0300
Subject: btrfs: qgroups: Move transaction management inside
 btrfs_quota_enable/disable

Commit 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to
btrfs_quota_enable") not only resulted in an easier to follow code but
it also introduced a subtle bug. It changed the timing when the initial
transaction rescan was happening:

- before the commit: it would happen after transaction commit had occured
- after the commit: it might happen before the transaction was committed

This results in failure to correctly rescan the quota since there could
be data which is still not committed on disk.

This patch aims to fix this by moving the transaction creation/commit
inside btrfs_quota_enable, which allows to schedule the quota commit
after the transaction has been committed.

Fixes: 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to btrfs_quota_enable")
Reported-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Link: https://marc.info/?l=linux-btrfs&m=152999289017582
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c  | 15 ++--------
 fs/btrfs/qgroup.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++---------
 fs/btrfs/qgroup.h |  6 ++--
 3 files changed, 76 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 802918507cc0..9dcd16dec754 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5105,9 +5105,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ioctl_quota_ctl_args *sa;
-	struct btrfs_trans_handle *trans = NULL;
 	int ret;
-	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -5123,28 +5121,19 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 	}
 
 	down_write(&fs_info->subvol_sem);
-	trans = btrfs_start_transaction(fs_info->tree_root, 2);
-	if (IS_ERR(trans)) {
-		ret = PTR_ERR(trans);
-		goto out;
-	}
 
 	switch (sa->cmd) {
 	case BTRFS_QUOTA_CTL_ENABLE:
-		ret = btrfs_quota_enable(trans, fs_info);
+		ret = btrfs_quota_enable(fs_info);
 		break;
 	case BTRFS_QUOTA_CTL_DISABLE:
-		ret = btrfs_quota_disable(trans, fs_info);
+		ret = btrfs_quota_disable(fs_info);
 		break;
 	default:
 		ret = -EINVAL;
 		break;
 	}
 
-	err = btrfs_commit_transaction(trans);
-	if (err && !ret)
-		ret = err;
-out:
 	kfree(sa);
 	up_write(&fs_info->subvol_sem);
 drop_write:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c25dc47210a3..97a59a444500 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -875,8 +875,7 @@ out:
 	return ret;
 }
 
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info)
+int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *quota_root;
 	struct btrfs_root *tree_root = fs_info->tree_root;
@@ -886,6 +885,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 	struct btrfs_key key;
 	struct btrfs_key found_key;
 	struct btrfs_qgroup *qgroup = NULL;
+	struct btrfs_trans_handle *trans = NULL;
 	int ret = 0;
 	int slot;
 
@@ -893,9 +893,25 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 	if (fs_info->quota_root)
 		goto out;
 
+	/*
+	 * 1 for quota root item
+	 * 1 for BTRFS_QGROUP_STATUS item
+	 *
+	 * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
+	 * per subvolume. However those are not currently reserved since it
+	 * would be a lot of overkill.
+	 */
+	trans = btrfs_start_transaction(tree_root, 2);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		trans = NULL;
+		goto out;
+	}
+
 	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
 	if (!fs_info->qgroup_ulist) {
 		ret = -ENOMEM;
+		btrfs_abort_transaction(trans, ret);
 		goto out;
 	}
 
@@ -906,12 +922,14 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 				       BTRFS_QUOTA_TREE_OBJECTID);
 	if (IS_ERR(quota_root)) {
 		ret =  PTR_ERR(quota_root);
+		btrfs_abort_transaction(trans, ret);
 		goto out;
 	}
 
 	path = btrfs_alloc_path();
 	if (!path) {
 		ret = -ENOMEM;
+		btrfs_abort_transaction(trans, ret);
 		goto out_free_root;
 	}
 
@@ -921,8 +939,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 
 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 				      sizeof(*ptr));
-	if (ret)
+	if (ret) {
+		btrfs_abort_transaction(trans, ret);
 		goto out_free_path;
+	}
 
 	leaf = path->nodes[0];
 	ptr = btrfs_item_ptr(leaf, path->slots[0],
@@ -944,9 +964,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
 	if (ret > 0)
 		goto out_add_root;
-	if (ret < 0)
+	if (ret < 0) {
+		btrfs_abort_transaction(trans, ret);
 		goto out_free_path;
-
+	}
 
 	while (1) {
 		slot = path->slots[0];
@@ -956,18 +977,23 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 		if (found_key.type == BTRFS_ROOT_REF_KEY) {
 			ret = add_qgroup_item(trans, quota_root,
 					      found_key.offset);
-			if (ret)
+			if (ret) {
+				btrfs_abort_transaction(trans, ret);
 				goto out_free_path;
+			}
 
 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
 			if (IS_ERR(qgroup)) {
 				ret = PTR_ERR(qgroup);
+				btrfs_abort_transaction(trans, ret);
 				goto out_free_path;
 			}
 		}
 		ret = btrfs_next_item(tree_root, path);
-		if (ret < 0)
+		if (ret < 0) {
+			btrfs_abort_transaction(trans, ret);
 			goto out_free_path;
+		}
 		if (ret)
 			break;
 	}
@@ -975,18 +1001,28 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 out_add_root:
 	btrfs_release_path(path);
 	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
-	if (ret)
+	if (ret) {
+		btrfs_abort_transaction(trans, ret);
 		goto out_free_path;
+	}
 
 	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
 	if (IS_ERR(qgroup)) {
 		ret = PTR_ERR(qgroup);
+		btrfs_abort_transaction(trans, ret);
 		goto out_free_path;
 	}
 	spin_lock(&fs_info->qgroup_lock);
 	fs_info->quota_root = quota_root;
 	set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 	spin_unlock(&fs_info->qgroup_lock);
+
+	ret = btrfs_commit_transaction(trans);
+	if (ret) {
+		trans = NULL;
+		goto out_free_path;
+	}
+
 	ret = qgroup_rescan_init(fs_info, 0, 1);
 	if (!ret) {
 	        qgroup_rescan_zero_tracking(fs_info);
@@ -1006,20 +1042,35 @@ out:
 	if (ret) {
 		ulist_free(fs_info->qgroup_ulist);
 		fs_info->qgroup_ulist = NULL;
+		if (trans)
+			btrfs_end_transaction(trans);
 	}
 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
 
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info)
+int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *quota_root;
+	struct btrfs_trans_handle *trans = NULL;
 	int ret = 0;
 
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (!fs_info->quota_root)
 		goto out;
+
+	/*
+	 * 1 For the root item
+	 *
+	 * We should also reserve enough items for the quota tree deletion in
+	 * btrfs_clean_quota_tree but this is not done.
+	 */
+	trans = btrfs_start_transaction(fs_info->tree_root, 1);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out;
+	}
+
 	clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 	btrfs_qgroup_wait_for_completion(fs_info, false);
 	spin_lock(&fs_info->qgroup_lock);
@@ -1031,12 +1082,16 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 	btrfs_free_qgroup_config(fs_info);
 
 	ret = btrfs_clean_quota_tree(trans, quota_root);
-	if (ret)
-		goto out;
+	if (ret) {
+		btrfs_abort_transaction(trans, ret);
+		goto end_trans;
+	}
 
 	ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
-	if (ret)
-		goto out;
+	if (ret) {
+		btrfs_abort_transaction(trans, ret);
+		goto end_trans;
+	}
 
 	list_del(&quota_root->dirty_list);
 
@@ -1048,6 +1103,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 	free_extent_buffer(quota_root->node);
 	free_extent_buffer(quota_root->commit_root);
 	kfree(quota_root);
+
+end_trans:
+	ret = btrfs_end_transaction(trans);
 out:
 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index d60dd06445ce..bec7c9b17a8e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -141,10 +141,8 @@ struct btrfs_qgroup {
 #define QGROUP_RELEASE		(1<<1)
 #define QGROUP_FREE		(1<<2)
 
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info);
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info);
+int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
+int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
-- 
cgit v1.2.3


From b4993e64f78a9605b45252fa9ba385c88a1f4ce9 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 3 Jul 2018 17:07:23 +0800
Subject: btrfs: fix in-memory value of total_devices after seed device
 deletion

In case of deleting the seed device the %cur_devices (seed) and the
%fs_devices (parent) are different. Now, as the parent
fs_devices::total_devices also maintains the total number of devices
including the seed device, so decrement its in-memory value for the
successful seed delete. We are already updating its corresponding
on-disk btrfs_super_block::number_devices value.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 211fb2161487..f532e855b507 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2026,6 +2026,9 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 
 	cur_devices->num_devices--;
 	cur_devices->total_devices--;
+	/* Update total_devices of the parent fs_devices if it's seed */
+	if (cur_devices != fs_devices)
+		fs_devices->total_devices--;
 
 	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
 		cur_devices->missing_devices--;
-- 
cgit v1.2.3


From 93b9bcdf9fbcb683d4e8c44ee8cec0989053d4de Mon Sep 17 00:00:00 2001
From: Gu Jinxiang <gujx@cn.fujitsu.com>
Date: Mon, 9 Jul 2018 14:39:15 +0800
Subject: btrfs: remove unused parameter from btrfs_parse_subvol_options

Since parameter flags is no more used since commit d7407606564c ("btrfs:
split parse_early_options() in two"), remove it.

Signed-off-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e04bcf0b0ed4..39d8e39b2fe1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -933,8 +933,8 @@ out:
  *
  * The value is later passed to mount_subvol()
  */
-static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
-		char **subvol_name, u64 *subvol_objectid)
+static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
+		u64 *subvol_objectid)
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *opts, *orig, *p;
@@ -1648,8 +1648,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	if (!(flags & SB_RDONLY))
 		mode |= FMODE_WRITE;
 
-	error = btrfs_parse_subvol_options(data, mode,
-					  &subvol_name, &subvol_objectid);
+	error = btrfs_parse_subvol_options(data, &subvol_name,
+					&subvol_objectid);
 	if (error) {
 		kfree(subvol_name);
 		return ERR_PTR(error);
-- 
cgit v1.2.3


From 621567a28ce081424bdcaaef7763a34b204d5be0 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 9 Jul 2018 14:52:54 +0800
Subject: btrfs: Remove unused function btrfs_account_dev_extents_size

This function is not used since the alloc_start parameter has been
obsoleted in commit 0d0c71b317207082856 ("btrfs: obsolete and remove
mount option alloc_start").

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 85 ------------------------------------------------------
 fs/btrfs/volumes.h |  2 --
 2 files changed, 87 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f532e855b507..f7191fdfb44c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1258,91 +1258,6 @@ error_bdev_put:
 	return ret;
 }
 
-/* helper to account the used device space in the range */
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
-				   u64 end, u64 *length)
-{
-	struct btrfs_key key;
-	struct btrfs_root *root = device->fs_info->dev_root;
-	struct btrfs_dev_extent *dev_extent;
-	struct btrfs_path *path;
-	u64 extent_end;
-	int ret;
-	int slot;
-	struct extent_buffer *l;
-
-	*length = 0;
-
-	if (start >= device->total_bytes ||
-		test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
-		return 0;
-
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-	path->reada = READA_FORWARD;
-
-	key.objectid = device->devid;
-	key.offset = start;
-	key.type = BTRFS_DEV_EXTENT_KEY;
-
-	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-	if (ret < 0)
-		goto out;
-	if (ret > 0) {
-		ret = btrfs_previous_item(root, path, key.objectid, key.type);
-		if (ret < 0)
-			goto out;
-	}
-
-	while (1) {
-		l = path->nodes[0];
-		slot = path->slots[0];
-		if (slot >= btrfs_header_nritems(l)) {
-			ret = btrfs_next_leaf(root, path);
-			if (ret == 0)
-				continue;
-			if (ret < 0)
-				goto out;
-
-			break;
-		}
-		btrfs_item_key_to_cpu(l, &key, slot);
-
-		if (key.objectid < device->devid)
-			goto next;
-
-		if (key.objectid > device->devid)
-			break;
-
-		if (key.type != BTRFS_DEV_EXTENT_KEY)
-			goto next;
-
-		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
-		extent_end = key.offset + btrfs_dev_extent_length(l,
-								  dev_extent);
-		if (key.offset <= start && extent_end > end) {
-			*length = end - start + 1;
-			break;
-		} else if (key.offset <= start && extent_end > start)
-			*length += extent_end - start;
-		else if (key.offset > start && extent_end <= end)
-			*length += extent_end - key.offset;
-		else if (key.offset > start && key.offset <= end) {
-			*length += end - key.offset + 1;
-			break;
-		} else if (key.offset > end)
-			break;
-
-next:
-		path->slots[0]++;
-	}
-	ret = 0;
-out:
-	btrfs_free_path(path);
-	return ret;
-}
-
 static int contains_pending_extent(struct btrfs_transaction *transaction,
 				   struct btrfs_device *device,
 				   u64 *start, u64 len)
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 275c31c730cf..9665b84b1026 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -384,8 +384,6 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
 	}
 }
 
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
-				   u64 end, u64 *length);
 void btrfs_get_bbio(struct btrfs_bio *bbio);
 void btrfs_put_bbio(struct btrfs_bio *bbio);
 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
-- 
cgit v1.2.3


From 959b1c04675735aa51b9a7b3e28963c4141bf3e7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 29 Jun 2018 08:26:05 +0300
Subject: btrfs: close devices without offloading to a temporary list

Since commit 88c14590cdd6 ("btrfs: use RCU in btrfs_show_devname for
device list traversal") btrfs_show_devname no longer takes
device_list_mutex. As such the deadlock that 0ccd05285e7f ("btrfs: fix a
possible umount deadlock") aimed to fix no longer exists, we can free
the devices immediatelly and remove the code that does the pending work.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
[ update changelog ]
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f7191fdfb44c..3c3359d7e91d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1001,7 +1001,7 @@ static void btrfs_close_bdev(struct btrfs_device *device)
 	blkdev_put(device->bdev, device->mode);
 }
 
-static void btrfs_prepare_close_one_device(struct btrfs_device *device)
+static void btrfs_close_one_device(struct btrfs_device *device)
 {
 	struct btrfs_fs_devices *fs_devices = device->fs_devices;
 	struct btrfs_device *new_device;
@@ -1019,6 +1019,8 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
 	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
 		fs_devices->missing_devices--;
 
+	btrfs_close_bdev(device);
+
 	new_device = btrfs_alloc_device(NULL, &device->devid,
 					device->uuid);
 	BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
@@ -1032,39 +1034,23 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
 
 	list_replace_rcu(&device->dev_list, &new_device->dev_list);
 	new_device->fs_devices = device->fs_devices;
+
+	call_rcu(&device->rcu, free_device_rcu);
 }
 
 static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
 	struct btrfs_device *device, *tmp;
-	struct list_head pending_put;
-
-	INIT_LIST_HEAD(&pending_put);
 
 	if (--fs_devices->opened > 0)
 		return 0;
 
 	mutex_lock(&fs_devices->device_list_mutex);
 	list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
-		btrfs_prepare_close_one_device(device);
-		list_add(&device->dev_list, &pending_put);
+		btrfs_close_one_device(device);
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
 
-	/*
-	 * btrfs_show_devname() is using the device_list_mutex,
-	 * sometimes call to blkdev_put() leads vfs calling
-	 * into this func. So do put outside of device_list_mutex,
-	 * as of now.
-	 */
-	while (!list_empty(&pending_put)) {
-		device = list_first_entry(&pending_put,
-				struct btrfs_device, dev_list);
-		list_del(&device->dev_list);
-		btrfs_close_bdev(device);
-		call_rcu(&device->rcu, free_device_rcu);
-	}
-
 	WARN_ON(fs_devices->open_devices);
 	WARN_ON(fs_devices->rw_devices);
 	fs_devices->opened = 0;
-- 
cgit v1.2.3


From 4306a97449f9a0f9e5229af7889d4401315355aa Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 29 May 2018 12:28:37 +0800
Subject: btrfs: do btrfs_free_stale_devices outside of device_list_add

btrfs_free_stale_devices() looks for device path reused for another
filesystem, and deletes the older fs_devices::device entry.

In preparation to handle locking in device_list_add, move
btrfs_free_stale_devices outside as these two functions serve a
different purpose.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3c3359d7e91d..c84a9e22daf1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -747,7 +747,8 @@ error_brelse:
  * error pointer when failed
  */
 static noinline struct btrfs_device *device_list_add(const char *path,
-			   struct btrfs_super_block *disk_super)
+			   struct btrfs_super_block *disk_super,
+			   bool *new_device_added)
 {
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *fs_devices;
@@ -793,7 +794,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 		mutex_unlock(&fs_devices->device_list_mutex);
 
 		device->fs_devices = fs_devices;
-		btrfs_free_stale_devices(path, device);
+		*new_device_added = true;
 
 		if (disk_super->label[0])
 			pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
@@ -1204,6 +1205,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 			  struct btrfs_fs_devices **fs_devices_ret)
 {
 	struct btrfs_super_block *disk_super;
+	bool new_device_added = false;
 	struct btrfs_device *device;
 	struct block_device *bdev;
 	struct page *page;
@@ -1229,11 +1231,14 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 	}
 
 	mutex_lock(&uuid_mutex);
-	device = device_list_add(path, disk_super);
-	if (IS_ERR(device))
+	device = device_list_add(path, disk_super, &new_device_added);
+	if (IS_ERR(device)) {
 		ret = PTR_ERR(device);
-	else
+	} else {
 		*fs_devices_ret = device->fs_devices;
+		if (new_device_added)
+			btrfs_free_stale_devices(path, device);
+	}
 	mutex_unlock(&uuid_mutex);
 
 	btrfs_release_disk_super(page);
-- 
cgit v1.2.3


From 9c6d173ea6e4c8c939ae6c257c7fc18f7b320316 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 29 May 2018 14:10:20 +0800
Subject: btrfs: extend locked section when adding a new device in
 device_list_add

Make sure the device_list_lock is held the whole time:

* when the device is being looked up
* new device is initialized and put to the list
* the list counters are updated (fs_devices::opened, fs_devices::total_devices)

Signed-off-by: Anand Jain <anand.jain@oracle.com>
[ update changelog ]
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c84a9e22daf1..870732ef356c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -762,21 +762,26 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 		if (IS_ERR(fs_devices))
 			return ERR_CAST(fs_devices);
 
+		mutex_lock(&fs_devices->device_list_mutex);
 		list_add(&fs_devices->fs_list, &fs_uuids);
 
 		device = NULL;
 	} else {
+		mutex_lock(&fs_devices->device_list_mutex);
 		device = find_device(fs_devices, devid,
 				disk_super->dev_item.uuid);
 	}
 
 	if (!device) {
-		if (fs_devices->opened)
+		if (fs_devices->opened) {
+			mutex_unlock(&fs_devices->device_list_mutex);
 			return ERR_PTR(-EBUSY);
+		}
 
 		device = btrfs_alloc_device(NULL, &devid,
 					    disk_super->dev_item.uuid);
 		if (IS_ERR(device)) {
+			mutex_unlock(&fs_devices->device_list_mutex);
 			/* we can safely leave the fs_devices entry around */
 			return device;
 		}
@@ -784,14 +789,13 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 		name = rcu_string_strdup(path, GFP_NOFS);
 		if (!name) {
 			btrfs_free_device(device);
+			mutex_unlock(&fs_devices->device_list_mutex);
 			return ERR_PTR(-ENOMEM);
 		}
 		rcu_assign_pointer(device->name, name);
 
-		mutex_lock(&fs_devices->device_list_mutex);
 		list_add_rcu(&device->dev_list, &fs_devices->devices);
 		fs_devices->num_devices++;
-		mutex_unlock(&fs_devices->device_list_mutex);
 
 		device->fs_devices = fs_devices;
 		*new_device_added = true;
@@ -838,12 +842,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 			 * with larger generation number or the last-in if
 			 * generation are equal.
 			 */
+			mutex_unlock(&fs_devices->device_list_mutex);
 			return ERR_PTR(-EEXIST);
 		}
 
 		name = rcu_string_strdup(path, GFP_NOFS);
-		if (!name)
+		if (!name) {
+			mutex_unlock(&fs_devices->device_list_mutex);
 			return ERR_PTR(-ENOMEM);
+		}
 		rcu_string_free(device->name);
 		rcu_assign_pointer(device->name, name);
 		if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
@@ -863,6 +870,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 
 	fs_devices->total_devices = btrfs_super_num_devices(disk_super);
 
+	mutex_unlock(&fs_devices->device_list_mutex);
 	return device;
 }
 
-- 
cgit v1.2.3


From fa6d2ae540a200a17bb7ee769f9df22d411c9404 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 29 May 2018 15:33:08 +0800
Subject: btrfs: rename local devices for fs_devices in
 btrfs_free_stale_devices(

Over the years we named %fs_devices and %devices to represent the
struct btrfs_fs_devices and the struct btrfs_device. So follow the same
scheme here too. No functional changes.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 870732ef356c..f4ce081bbe46 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -631,43 +631,42 @@ static void pending_bios_fn(struct btrfs_work *work)
  *		devices.
  */
 static void btrfs_free_stale_devices(const char *path,
-				     struct btrfs_device *skip_dev)
+				     struct btrfs_device *skip_device)
 {
-	struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
-	struct btrfs_device *dev, *tmp_dev;
+	struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
+	struct btrfs_device *device, *tmp_device;
 
-	list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
-
-		if (fs_devs->opened)
+	list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
+		if (fs_devices->opened)
 			continue;
 
-		list_for_each_entry_safe(dev, tmp_dev,
-					 &fs_devs->devices, dev_list) {
+		list_for_each_entry_safe(device, tmp_device,
+					 &fs_devices->devices, dev_list) {
 			int not_found = 0;
 
-			if (skip_dev && skip_dev == dev)
+			if (skip_device && skip_device == device)
 				continue;
-			if (path && !dev->name)
+			if (path && !device->name)
 				continue;
 
 			rcu_read_lock();
 			if (path)
-				not_found = strcmp(rcu_str_deref(dev->name),
+				not_found = strcmp(rcu_str_deref(device->name),
 						   path);
 			rcu_read_unlock();
 			if (not_found)
 				continue;
 
 			/* delete the stale device */
-			if (fs_devs->num_devices == 1) {
-				btrfs_sysfs_remove_fsid(fs_devs);
-				list_del(&fs_devs->fs_list);
-				free_fs_devices(fs_devs);
+			if (fs_devices->num_devices == 1) {
+				btrfs_sysfs_remove_fsid(fs_devices);
+				list_del(&fs_devices->fs_list);
+				free_fs_devices(fs_devices);
 				break;
 			} else {
-				fs_devs->num_devices--;
-				list_del(&dev->dev_list);
-				btrfs_free_device(dev);
+				fs_devices->num_devices--;
+				list_del(&device->dev_list);
+				btrfs_free_device(device);
 			}
 		}
 	}
-- 
cgit v1.2.3


From 7bcb8164ad9435068d9bc3b83b8a002c64d63ff6 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 29 May 2018 17:23:20 +0800
Subject: btrfs: use device_list_mutex when removing stale devices

btrfs_free_stale_devices() finds a stale (not opened) device matching
path in the fs_uuid list. We are already under uuid_mutex so when we
check for each fs_devices, hold the device_list_mutex too.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f4ce081bbe46..6a8f1e2c116f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -637,8 +637,11 @@ static void btrfs_free_stale_devices(const char *path,
 	struct btrfs_device *device, *tmp_device;
 
 	list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
-		if (fs_devices->opened)
+		mutex_lock(&fs_devices->device_list_mutex);
+		if (fs_devices->opened) {
+			mutex_unlock(&fs_devices->device_list_mutex);
 			continue;
+		}
 
 		list_for_each_entry_safe(device, tmp_device,
 					 &fs_devices->devices, dev_list) {
@@ -658,16 +661,18 @@ static void btrfs_free_stale_devices(const char *path,
 				continue;
 
 			/* delete the stale device */
-			if (fs_devices->num_devices == 1) {
-				btrfs_sysfs_remove_fsid(fs_devices);
-				list_del(&fs_devices->fs_list);
-				free_fs_devices(fs_devices);
+			fs_devices->num_devices--;
+			list_del(&device->dev_list);
+			btrfs_free_device(device);
+
+			if (fs_devices->num_devices == 0)
 				break;
-			} else {
-				fs_devices->num_devices--;
-				list_del(&device->dev_list);
-				btrfs_free_device(device);
-			}
+		}
+		mutex_unlock(&fs_devices->device_list_mutex);
+		if (fs_devices->num_devices == 0) {
+			btrfs_sysfs_remove_fsid(fs_devices);
+			list_del(&fs_devices->fs_list);
+			free_fs_devices(fs_devices);
 		}
 	}
 }
-- 
cgit v1.2.3


From 899f9307c33ce4758c30a076b10ed54d5c91c6e7 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 19 Jun 2018 16:37:36 +0200
Subject: btrfs: lift uuid_mutex to callers of btrfs_scan_one_device

Prepartory work to fix race between mount and device scan.

The callers will have to manage the critical section, eg. mount wants to
scan and then call btrfs_open_devices without the ioctl scan walking in
and modifying the fs devices in the meantime.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c   | 12 +++++++++++-
 fs/btrfs/volumes.c |  4 ++--
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 39d8e39b2fe1..0b6567f0c63d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -915,8 +915,10 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 				error = -ENOMEM;
 				goto out;
 			}
+			mutex_lock(&uuid_mutex);
 			error = btrfs_scan_one_device(device_name,
 					flags, holder, fs_devices);
+			mutex_unlock(&uuid_mutex);
 			kfree(device_name);
 			if (error)
 				goto out;
@@ -1537,7 +1539,9 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 			return ERR_PTR(error);
 	}
 
+	mutex_lock(&uuid_mutex);
 	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
+	mutex_unlock(&uuid_mutex);
 	if (error)
 		goto error_sec_opts;
 
@@ -2232,15 +2236,21 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 
 	switch (cmd) {
 	case BTRFS_IOC_SCAN_DEV:
+		mutex_lock(&uuid_mutex);
 		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
 					    &btrfs_root_fs_type, &fs_devices);
+		mutex_unlock(&uuid_mutex);
 		break;
 	case BTRFS_IOC_DEVICES_READY:
+		mutex_lock(&uuid_mutex);
 		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
 					    &btrfs_root_fs_type, &fs_devices);
-		if (ret)
+		if (ret) {
+			mutex_unlock(&uuid_mutex);
 			break;
+		}
 		ret = !(fs_devices->num_devices == fs_devices->total_devices);
+		mutex_unlock(&uuid_mutex);
 		break;
 	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
 		ret = btrfs_ioctl_get_supported_features((void __user*)arg);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6a8f1e2c116f..df5dd6f67887 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1224,6 +1224,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 	int ret = 0;
 	u64 bytenr;
 
+	lockdep_assert_held(&uuid_mutex);
+
 	/*
 	 * we would like to check all the supers, but that would make
 	 * a btrfs mount succeed after a mkfs from a different FS.
@@ -1242,7 +1244,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 		goto error_bdev_put;
 	}
 
-	mutex_lock(&uuid_mutex);
 	device = device_list_add(path, disk_super, &new_device_added);
 	if (IS_ERR(device)) {
 		ret = PTR_ERR(device);
@@ -1251,7 +1252,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 		if (new_device_added)
 			btrfs_free_stale_devices(path, device);
 	}
-	mutex_unlock(&uuid_mutex);
 
 	btrfs_release_disk_super(page);
 
-- 
cgit v1.2.3


From f5194e34cabaddd348a90f950e0a8188dd26cdc0 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 19 Jun 2018 17:09:47 +0200
Subject: btrfs: lift uuid_mutex to callers of btrfs_open_devices

Prepartory work to fix race between mount and device scan.

The callers will have to manage the critical section, eg. mount wants to
scan and then call btrfs_open_devices without the ioctl scan walking in
and modifying the fs devices in the meantime.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c   | 2 ++
 fs/btrfs/volumes.c | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0b6567f0c63d..d470eb6b9226 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1567,7 +1567,9 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 		goto error_fs_info;
 	}
 
+	mutex_lock(&uuid_mutex);
 	error = btrfs_open_devices(fs_devices, mode, fs_type);
+	mutex_unlock(&uuid_mutex);
 	if (error)
 		goto error_fs_info;
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index df5dd6f67887..459cc2a2a31c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1142,7 +1142,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 {
 	int ret;
 
-	mutex_lock(&uuid_mutex);
+	lockdep_assert_held(&uuid_mutex);
+
 	mutex_lock(&fs_devices->device_list_mutex);
 	if (fs_devices->opened) {
 		fs_devices->opened++;
@@ -1152,7 +1153,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		ret = open_fs_devices(fs_devices, flags, holder);
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
-	mutex_unlock(&uuid_mutex);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 5139cff598d42b1e531f40c84691a7e945f04553 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 19 Jun 2018 17:50:25 +0200
Subject: btrfs: lift uuid_mutex to callers of btrfs_parse_early_options

Prepartory work to fix race between mount and device scan.

btrfs_parse_early_options calls the device scan from mount and we'll
need to let mount completely manage the critical section.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d470eb6b9226..de87b7b0367d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -890,6 +890,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 	char *device_name, *opts, *orig, *p;
 	int error = 0;
 
+	lockdep_assert_held(&uuid_mutex);
+
 	if (!options)
 		return 0;
 
@@ -915,10 +917,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 				error = -ENOMEM;
 				goto out;
 			}
-			mutex_lock(&uuid_mutex);
 			error = btrfs_scan_one_device(device_name,
 					flags, holder, fs_devices);
-			mutex_unlock(&uuid_mutex);
 			kfree(device_name);
 			if (error)
 				goto out;
@@ -1526,8 +1526,10 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	if (!(flags & SB_RDONLY))
 		mode |= FMODE_WRITE;
 
+	mutex_lock(&uuid_mutex);
 	error = btrfs_parse_early_options(data, mode, fs_type,
 					  &fs_devices);
+	mutex_unlock(&uuid_mutex);
 	if (error) {
 		return ERR_PTR(error);
 	}
-- 
cgit v1.2.3


From 399f7f4c42e8a58c8456264d5112287aefe44cf4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 19 Jun 2018 18:01:24 +0200
Subject: btrfs: reorder initialization before the mount locks uuid_mutex

In preparation to take a big lock, move resource initialization before
the critical section. It's not obvious from the diff, the desired order
is:

- initialize mount security options
- allocate temporary fs_info
- allocate superblock buffers

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index de87b7b0367d..d33fc1fc4285 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1526,14 +1526,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	if (!(flags & SB_RDONLY))
 		mode |= FMODE_WRITE;
 
-	mutex_lock(&uuid_mutex);
-	error = btrfs_parse_early_options(data, mode, fs_type,
-					  &fs_devices);
-	mutex_unlock(&uuid_mutex);
-	if (error) {
-		return ERR_PTR(error);
-	}
-
 	security_init_mnt_opts(&new_sec_opts);
 	if (data) {
 		error = parse_security_options(data, &new_sec_opts);
@@ -1541,12 +1533,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 			return ERR_PTR(error);
 	}
 
-	mutex_lock(&uuid_mutex);
-	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
-	mutex_unlock(&uuid_mutex);
-	if (error)
-		goto error_sec_opts;
-
 	/*
 	 * Setup a dummy root and fs_info for test/set super.  This is because
 	 * we don't actually fill this stuff out until open_ctree, but we need
@@ -1559,8 +1545,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 		goto error_sec_opts;
 	}
 
-	fs_info->fs_devices = fs_devices;
-
 	fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
 	fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
 	security_init_mnt_opts(&fs_info->security_opts);
@@ -1569,6 +1553,20 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 		goto error_fs_info;
 	}
 
+	mutex_lock(&uuid_mutex);
+	error = btrfs_parse_early_options(data, mode, fs_type, &fs_devices);
+	mutex_unlock(&uuid_mutex);
+	if (error)
+		goto error_fs_info;
+
+	mutex_lock(&uuid_mutex);
+	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
+	mutex_unlock(&uuid_mutex);
+	if (error)
+		goto error_fs_info;
+
+	fs_info->fs_devices = fs_devices;
+
 	mutex_lock(&uuid_mutex);
 	error = btrfs_open_devices(fs_devices, mode, fs_type);
 	mutex_unlock(&uuid_mutex);
-- 
cgit v1.2.3


From 81ffd56b5745355b70d54ca4e1bdd0d64a66ff9f Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 19 Jun 2018 18:04:07 +0200
Subject: btrfs: fix mount and ioctl device scan ioctl race

Technically this extends the critical section covered by uuid_mutex to:

- parse early mount options -- here we can call device scan on paths
  that can be passed as 'device=/dev/...'

- scan the device passed to mount

- open the devices related to the fs_devices -- this increases
  fs_devices::opened

The race can happen when mount calls one of the scans and there's
another one called eg. by mkfs or 'btrfs dev scan':

Mount                                  Scan
-----                                  ----
scan_one_device (dev1, fsid1)
                                       scan_one_device (dev2, fsid1)
				           add the device
					   free stale devices
					       fsid1 fs_devices::opened == 0
					           find fsid1:dev1
					           free fsid1:dev1
					           if it's the last one,
					            free fs_devices of fsid1
						    too

open_devices (dev1, fsid1)
   dev1 not found

When fixed, the uuid mutex will make sure that mount will increase
fs_devices::opened and this will not be touched by the racing scan
ioctl.

Reported-and-tested-by: syzbot+909a5177749d7990ffa4@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+ceb2606025ec1cc3479c@syzkaller.appspotmail.com
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d33fc1fc4285..28ab75ebb983 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1555,19 +1555,19 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 
 	mutex_lock(&uuid_mutex);
 	error = btrfs_parse_early_options(data, mode, fs_type, &fs_devices);
-	mutex_unlock(&uuid_mutex);
-	if (error)
+	if (error) {
+		mutex_unlock(&uuid_mutex);
 		goto error_fs_info;
+	}
 
-	mutex_lock(&uuid_mutex);
 	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
-	mutex_unlock(&uuid_mutex);
-	if (error)
+	if (error) {
+		mutex_unlock(&uuid_mutex);
 		goto error_fs_info;
+	}
 
 	fs_info->fs_devices = fs_devices;
 
-	mutex_lock(&uuid_mutex);
 	error = btrfs_open_devices(fs_devices, mode, fs_type);
 	mutex_unlock(&uuid_mutex);
 	if (error)
-- 
cgit v1.2.3


From d64dcbd183abab251beb0fa0acd81a772a09887e Mon Sep 17 00:00:00 2001
From: Gu Jinxiang <gujx@cn.fujitsu.com>
Date: Thu, 12 Jul 2018 14:23:15 +0800
Subject: btrfs: make fs_devices a local variable in btrfs_parse_early_options

fs_devices is always passed to btrfs_scan_one_device which overrides it.
In the call stack below fs_devices is passed to btrfs_scan_one_device
from btrfs_mount_root.  In btrfs_mount_root the output fs_devices of
this call stack is not used.

btrfs_mount_root
  btrfs_parse_early_options
    btrfs_scan_one_device

So, it is not necessary to pass fs_devices from btrfs_mount_root, using
a local variable in btrfs_parse_early_options is enough.

Signed-off-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: Anand Jain <Anand.Jain@oracle.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 28ab75ebb983..909ea2564e84 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -884,10 +884,11 @@ out:
  * only when we need to allocate a new super block.
  */
 static int btrfs_parse_early_options(const char *options, fmode_t flags,
-		void *holder, struct btrfs_fs_devices **fs_devices)
+				     void *holder)
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *device_name, *opts, *orig, *p;
+	struct btrfs_fs_devices *fs_devices = NULL;
 	int error = 0;
 
 	lockdep_assert_held(&uuid_mutex);
@@ -918,7 +919,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 				goto out;
 			}
 			error = btrfs_scan_one_device(device_name,
-					flags, holder, fs_devices);
+					flags, holder, &fs_devices);
 			kfree(device_name);
 			if (error)
 				goto out;
@@ -1554,7 +1555,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	}
 
 	mutex_lock(&uuid_mutex);
-	error = btrfs_parse_early_options(data, mode, fs_type, &fs_devices);
+	error = btrfs_parse_early_options(data, mode, fs_type);
 	if (error) {
 		mutex_unlock(&uuid_mutex);
 		goto error_fs_info;
-- 
cgit v1.2.3


From 36350e95a2b1feed6382fe38cc80f79ec35a1323 Mon Sep 17 00:00:00 2001
From: Gu Jinxiang <gujx@cn.fujitsu.com>
Date: Thu, 12 Jul 2018 14:23:16 +0800
Subject: btrfs: return device pointer from btrfs_scan_one_device

Return device pointer (with the IS_ERR semantics) from
btrfs_scan_one_device so we don't have to return in through pointer.

And since btrfs_fs_devices can be obtained from btrfs_device, return that.

Signed-off-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ fixed conflics after recent changes to btrfs_scan_one_device ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c   | 34 +++++++++++++++++++++-------------
 fs/btrfs/volumes.c | 18 +++++++-----------
 fs/btrfs/volumes.h |  4 ++--
 3 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 909ea2564e84..5384afae3364 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -888,7 +888,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *device_name, *opts, *orig, *p;
-	struct btrfs_fs_devices *fs_devices = NULL;
+	struct btrfs_device *device = NULL;
 	int error = 0;
 
 	lockdep_assert_held(&uuid_mutex);
@@ -918,11 +918,13 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 				error = -ENOMEM;
 				goto out;
 			}
-			error = btrfs_scan_one_device(device_name,
-					flags, holder, &fs_devices);
+			device = btrfs_scan_one_device(device_name, flags,
+					holder);
 			kfree(device_name);
-			if (error)
+			if (IS_ERR(device)) {
+				error = PTR_ERR(device);
 				goto out;
+			}
 		}
 	}
 
@@ -1518,6 +1520,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 {
 	struct block_device *bdev = NULL;
 	struct super_block *s;
+	struct btrfs_device *device = NULL;
 	struct btrfs_fs_devices *fs_devices = NULL;
 	struct btrfs_fs_info *fs_info = NULL;
 	struct security_mnt_opts new_sec_opts;
@@ -1561,12 +1564,14 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 		goto error_fs_info;
 	}
 
-	error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
-	if (error) {
+	device = btrfs_scan_one_device(device_name, mode, fs_type);
+	if (IS_ERR(device)) {
 		mutex_unlock(&uuid_mutex);
+		error = PTR_ERR(device);
 		goto error_fs_info;
 	}
 
+	fs_devices = device->fs_devices;
 	fs_info->fs_devices = fs_devices;
 
 	error = btrfs_open_devices(fs_devices, mode, fs_type);
@@ -2227,7 +2232,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
 	struct btrfs_ioctl_vol_args *vol;
-	struct btrfs_fs_devices *fs_devices;
+	struct btrfs_device *device = NULL;
 	int ret = -ENOTTY;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -2240,19 +2245,22 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 	switch (cmd) {
 	case BTRFS_IOC_SCAN_DEV:
 		mutex_lock(&uuid_mutex);
-		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
-					    &btrfs_root_fs_type, &fs_devices);
+		device = btrfs_scan_one_device(vol->name, FMODE_READ,
+					       &btrfs_root_fs_type);
+		ret = PTR_ERR_OR_ZERO(device);
 		mutex_unlock(&uuid_mutex);
 		break;
 	case BTRFS_IOC_DEVICES_READY:
 		mutex_lock(&uuid_mutex);
-		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
-					    &btrfs_root_fs_type, &fs_devices);
-		if (ret) {
+		device = btrfs_scan_one_device(vol->name, FMODE_READ,
+					       &btrfs_root_fs_type);
+		if (IS_ERR(device)) {
 			mutex_unlock(&uuid_mutex);
+			ret = PTR_ERR(device);
 			break;
 		}
-		ret = !(fs_devices->num_devices == fs_devices->total_devices);
+		ret = !(device->fs_devices->num_devices ==
+			device->fs_devices->total_devices);
 		mutex_unlock(&uuid_mutex);
 		break;
 	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 459cc2a2a31c..fd9d4e056b37 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1213,15 +1213,14 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
  * and we are not allowed to call set_blocksize during the scan. The superblock
  * is read via pagecache
  */
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
-			  struct btrfs_fs_devices **fs_devices_ret)
+struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
+					   void *holder)
 {
 	struct btrfs_super_block *disk_super;
 	bool new_device_added = false;
-	struct btrfs_device *device;
+	struct btrfs_device *device = NULL;
 	struct block_device *bdev;
 	struct page *page;
-	int ret = 0;
 	u64 bytenr;
 
 	lockdep_assert_held(&uuid_mutex);
@@ -1237,18 +1236,15 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 
 	bdev = blkdev_get_by_path(path, flags, holder);
 	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
+		return ERR_CAST(bdev);
 
 	if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
-		ret = -EINVAL;
+		device = ERR_PTR(-EINVAL);
 		goto error_bdev_put;
 	}
 
 	device = device_list_add(path, disk_super, &new_device_added);
-	if (IS_ERR(device)) {
-		ret = PTR_ERR(device);
-	} else {
-		*fs_devices_ret = device->fs_devices;
+	if (!IS_ERR(device)) {
 		if (new_device_added)
 			btrfs_free_stale_devices(path, device);
 	}
@@ -1258,7 +1254,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 error_bdev_put:
 	blkdev_put(bdev, flags);
 
-	return ret;
+	return device;
 }
 
 static int contains_pending_extent(struct btrfs_transaction *transaction,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 9665b84b1026..06d8bb7dd557 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -403,8 +403,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			   int mirror_num, int async_submit);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder);
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
-			  struct btrfs_fs_devices **fs_devices_ret);
+struct btrfs_device *btrfs_scan_one_device(const char *path,
+					   fmode_t flags, void *holder);
 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
 void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
-- 
cgit v1.2.3


From d814a49198eafa6163698bdd93961302f3a877a4 Mon Sep 17 00:00:00 2001
From: Ethan Lien <ethanlien@synology.com>
Date: Mon, 2 Jul 2018 15:44:58 +0800
Subject: btrfs: use correct compare function of dirty_metadata_bytes

We use customized, nodesize batch value to update dirty_metadata_bytes.
We should also use batch version of compare function or we will easily
goto fast path and get false result from percpu_counter_compare().

Fixes: e2d845211eda ("Btrfs: use percpu counter for dirty metadata count")
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Ethan Lien <ethanlien@synology.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6023eed3e805..e3858b2fe014 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -959,8 +959,9 @@ static int btree_writepages(struct address_space *mapping,
 
 		fs_info = BTRFS_I(mapping->host)->root->fs_info;
 		/* this is a bit racy, but that's ok */
-		ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
-					     BTRFS_DIRTY_METADATA_THRESH);
+		ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+					     BTRFS_DIRTY_METADATA_THRESH,
+					     fs_info->dirty_metadata_batch);
 		if (ret < 0)
 			return 0;
 	}
@@ -4134,8 +4135,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
 	if (flush_delayed)
 		btrfs_balance_delayed_items(fs_info);
 
-	ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
-				     BTRFS_DIRTY_METADATA_THRESH);
+	ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+				     BTRFS_DIRTY_METADATA_THRESH,
+				     fs_info->dirty_metadata_batch);
 	if (ret > 0) {
 		balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping);
 	}
-- 
cgit v1.2.3


From dec59fa3a760952fc71f2e122e66a7291109670a Mon Sep 17 00:00:00 2001
From: Ethan Lien <ethanlien@synology.com>
Date: Fri, 13 Jul 2018 16:50:42 +0800
Subject: btrfs: use customized batch size for total_bytes_pinned

In commit b150a4f10d878 ("Btrfs: use a percpu to keep track of possibly
pinned bytes") we use total_bytes_pinned to track how many bytes we are
going to free in this transaction. When we are close to ENOSPC, we check it
and know if we can make the allocation by commit the current transaction.
For every data/metadata extent we are going to free, we add
total_bytes_pinned in btrfs_free_extent() and btrfs_free_tree_block(), and
release it in unpin_extent_range() when we finish the transaction. So this
is a variable we frequently update but rarely read - just the suitable
use of percpu_counter. But in previous commit we update total_bytes_pinned
by default 32 batch size, making every update essentially a spin lock
protected update. Since every spin lock/unlock operation involves syncing
a globally used variable and some kind of barrier in a SMP system, this is
more expensive than using total_bytes_pinned as a simple atomic64_t.

So fix this by using a customized batch size. Since we only read
total_bytes_pinned when we are close to ENOSPC and fail to allocate new
chunk, we can use a really large batch size and have nearly no penalty
in most cases.

[Test]
We tested the patch on a 4-cores x86 machine:

1. fallocate a 16GiB size test file
2. take snapshot (so all following writes will be COW)
3. run a 180 sec, 4 jobs, 4K random write fio on test file

We also added a temporary lockdep class on percpu_counter's spin lock
used by total_bytes_pinned to track it by lock_stat.

[Results]
unpatched:
lock_stat version 0.4
-----------------------------------------------------------------------
                              class name    con-bounces    contentions
waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces
acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg

               total_bytes_pinned_percpu:            82             82
        0.21           0.61          29.46           0.36         298340
      635973           0.09          11.01      173476.25           0.27

patched:
lock_stat version 0.4
-----------------------------------------------------------------------
                              class name    con-bounces    contentions
waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces
acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg

               total_bytes_pinned_percpu:             1              1
        0.62           0.62           0.62           0.62          13601
       31542           0.14           9.61       11016.90           0.35

[Analysis]
Since the spin lock only protects a single in-memory variable, the
contentions (number of lock acquisitions that had to wait) in both
unpatched and patched version are low. But when we see acquisitions and
acq-bounces, we get much lower counts in patched version. Here the most
important metric is acq-bounces. It means how many times the lock gets
transferred between different cpus, so the patch can really reduce
cacheline bouncing of spin lock (also the global counter of percpu_counter)
in a SMP system.

Fixes: b150a4f10d878 ("Btrfs: use a percpu to keep track of possibly pinned bytes")
Signed-off-by: Ethan Lien <ethanlien@synology.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  8 ++++++++
 fs/btrfs/extent-tree.c | 39 ++++++++++++++++++++++++---------------
 2 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 427ca5de8542..bf1451bf3ed7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -84,6 +84,14 @@ static const int btrfs_csum_sizes[] = { 4 };
 
 #define BTRFS_DIRTY_METADATA_THRESH	SZ_32M
 
+/*
+ * Use large batch size to reduce overhead of metadata updates.  On the reader
+ * side, we only read it when we are close to ENOSPC and the read overhead is
+ * mostly related to the number of CPUs, so it is OK to use arbitrary large
+ * value here.
+ */
+#define BTRFS_TOTAL_BYTES_PINNED_BATCH	SZ_128M
+
 #define BTRFS_MAX_EXTENT_SIZE SZ_128M
 
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6bba288133b8..2d1f893902f1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -755,7 +755,8 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
 
 	space_info = __find_space_info(fs_info, flags);
 	ASSERT(space_info);
-	percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
+	percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
+		    BTRFS_TOTAL_BYTES_PINNED_BATCH);
 }
 
 /*
@@ -2473,8 +2474,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
 			flags = BTRFS_BLOCK_GROUP_METADATA;
 		space_info = __find_space_info(fs_info, flags);
 		ASSERT(space_info);
-		percpu_counter_add(&space_info->total_bytes_pinned,
-				   -head->num_bytes);
+		percpu_counter_add_batch(&space_info->total_bytes_pinned,
+				   -head->num_bytes,
+				   BTRFS_TOTAL_BYTES_PINNED_BATCH);
 
 		if (head->is_data) {
 			spin_lock(&delayed_refs->lock);
@@ -4178,9 +4180,10 @@ again:
 		 * allocation, and no removed chunk in current transaction,
 		 * don't bother committing the transaction.
 		 */
-		have_pinned_space = percpu_counter_compare(
+		have_pinned_space = __percpu_counter_compare(
 			&data_sinfo->total_bytes_pinned,
-			used + bytes - data_sinfo->total_bytes);
+			used + bytes - data_sinfo->total_bytes,
+			BTRFS_TOTAL_BYTES_PINNED_BATCH);
 		spin_unlock(&data_sinfo->lock);
 
 		/* commit the current transaction and try again */
@@ -4782,8 +4785,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 		return 0;
 
 	/* See if there is enough pinned space to make this reservation */
-	if (percpu_counter_compare(&space_info->total_bytes_pinned,
-				   bytes) >= 0)
+	if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+				   bytes,
+				   BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
 		goto commit;
 
 	/*
@@ -4800,8 +4804,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 		bytes -= delayed_rsv->size;
 	spin_unlock(&delayed_rsv->lock);
 
-	if (percpu_counter_compare(&space_info->total_bytes_pinned,
-				   bytes) < 0) {
+	if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+				   bytes,
+				   BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
 		return -ENOSPC;
 	}
 
@@ -6138,8 +6143,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 			trace_btrfs_space_reservation(info, "pinned",
 						      cache->space_info->flags,
 						      num_bytes, 1);
-			percpu_counter_add(&cache->space_info->total_bytes_pinned,
-					   num_bytes);
+			percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
+					   num_bytes,
+					   BTRFS_TOTAL_BYTES_PINNED_BATCH);
 			set_extent_dirty(info->pinned_extents,
 					 bytenr, bytenr + num_bytes - 1,
 					 GFP_NOFS | __GFP_NOFAIL);
@@ -6217,7 +6223,8 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info,
 
 	trace_btrfs_space_reservation(fs_info, "pinned",
 				      cache->space_info->flags, num_bytes, 1);
-	percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes);
+	percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
+		    num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
 	set_extent_dirty(fs_info->pinned_extents, bytenr,
 			 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
 	return 0;
@@ -6581,7 +6588,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 		trace_btrfs_space_reservation(fs_info, "pinned",
 					      space_info->flags, len, 0);
 		space_info->max_extent_size = 0;
-		percpu_counter_add(&space_info->total_bytes_pinned, -len);
+		percpu_counter_add_batch(&space_info->total_bytes_pinned,
+			    -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
 		if (cache->ro) {
 			space_info->bytes_readonly += len;
 			readonly = true;
@@ -10603,8 +10611,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 
 		space_info->bytes_pinned -= block_group->pinned;
 		space_info->bytes_readonly += block_group->pinned;
-		percpu_counter_add(&space_info->total_bytes_pinned,
-				   -block_group->pinned);
+		percpu_counter_add_batch(&space_info->total_bytes_pinned,
+				   -block_group->pinned,
+				   BTRFS_TOTAL_BYTES_PINNED_BATCH);
 		block_group->pinned = 0;
 
 		spin_unlock(&block_group->lock);
-- 
cgit v1.2.3


From 2556fbb0bead7929ddf67f8b4184f434cee4e7d7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 18 Apr 2018 10:27:57 +0300
Subject: btrfs: Rewrite retry logic in do_chunk_alloc

do_chunk_alloc implements logic to detect whether there is currently
pending chunk allocation (by means of space_info->chunk_alloc being
set) and if so it loops around to the 'again' label. Additionally,
based on the state of the space_info (e.g. whether it's full or not)
and the return value of should_alloc_chunk() it decides whether this
is a "hard" error (ENOSPC) or we can just return 0.

This patch refactors all of this:

1. Put order to the scattered ifs handling the various cases in an
easy-to-read if {} else if{} branches. This makes clear the various
cases we are interested in handling.

2. Call should_alloc_chunk only once and use the result in the
if/else if constructs. All of this is done under space_info->lock, so
even before multiple calls of should_alloc_chunk were unnecessary.

3. Rewrite the "do {} while()" loop currently implemented via label
into an explicit loop construct.

4. Move the mutex locking for the case where the caller is the one doing
the allocation. For the case where the caller needs to wait a concurrent
allocation, introduce a pair of mutex_lock/mutex_unlock to act as a
barrier and reword the comment.

5. Switch local vars to bool type where pertinent.

All in all this shouldn't introduce any functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 74 +++++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2d1f893902f1..bbd756223b23 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4450,7 +4450,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_space_info *space_info;
-	int wait_for_alloc = 0;
+	bool wait_for_alloc = false;
+	bool should_alloc = false;
 	int ret = 0;
 
 	/* Don't re-enter if we're already allocating a chunk */
@@ -4460,45 +4461,44 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 	space_info = __find_space_info(fs_info, flags);
 	ASSERT(space_info);
 
-again:
-	spin_lock(&space_info->lock);
-	if (force < space_info->force_alloc)
-		force = space_info->force_alloc;
-	if (space_info->full) {
-		if (should_alloc_chunk(fs_info, space_info, force))
-			ret = -ENOSPC;
-		else
-			ret = 0;
-		spin_unlock(&space_info->lock);
-		return ret;
-	}
-
-	if (!should_alloc_chunk(fs_info, space_info, force)) {
-		spin_unlock(&space_info->lock);
-		return 0;
-	} else if (space_info->chunk_alloc) {
-		wait_for_alloc = 1;
-	} else {
-		space_info->chunk_alloc = 1;
-	}
-
-	spin_unlock(&space_info->lock);
-
-	mutex_lock(&fs_info->chunk_mutex);
+	do {
+		spin_lock(&space_info->lock);
+		if (force < space_info->force_alloc)
+			force = space_info->force_alloc;
+		should_alloc = should_alloc_chunk(fs_info, space_info, force);
+		if (space_info->full) {
+			/* No more free physical space */
+			if (should_alloc)
+				ret = -ENOSPC;
+			else
+				ret = 0;
+			spin_unlock(&space_info->lock);
+			return ret;
+		} else if (!should_alloc) {
+			spin_unlock(&space_info->lock);
+			return 0;
+		} else if (space_info->chunk_alloc) {
+			/*
+			 * Someone is already allocating, so we need to block
+			 * until this someone is finished and then loop to
+			 * recheck if we should continue with our allocation
+			 * attempt.
+			 */
+			wait_for_alloc = true;
+			spin_unlock(&space_info->lock);
+			mutex_lock(&fs_info->chunk_mutex);
+			mutex_unlock(&fs_info->chunk_mutex);
+		} else {
+			/* Proceed with allocation */
+			space_info->chunk_alloc = 1;
+			wait_for_alloc = false;
+			spin_unlock(&space_info->lock);
+		}
 
-	/*
-	 * The chunk_mutex is held throughout the entirety of a chunk
-	 * allocation, so once we've acquired the chunk_mutex we know that the
-	 * other guy is done and we need to recheck and see if we should
-	 * allocate.
-	 */
-	if (wait_for_alloc) {
-		mutex_unlock(&fs_info->chunk_mutex);
-		wait_for_alloc = 0;
 		cond_resched();
-		goto again;
-	}
+	} while (wait_for_alloc);
 
+	mutex_lock(&fs_info->chunk_mutex);
 	trans->allocating_chunk = true;
 
 	/*
-- 
cgit v1.2.3


From 031f24da2c8a7b611331c368d7bfabccafda09fa Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 22 May 2018 16:43:47 +0800
Subject: btrfs: Use btrfs_mark_bg_unused to replace open code

Introduce a small helper, btrfs_mark_bg_unused(), to acquire locks and
add a block group to unused_bgs list.

No functional modification, and only 3 callers are involved.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  1 +
 fs/btrfs/extent-tree.c | 36 +++++++++++++++++-------------------
 fs/btrfs/scrub.c       |  9 +--------
 3 files changed, 19 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bf1451bf3ed7..ee1e152cb94b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2801,6 +2801,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
 void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
 		       u64 start, u64 end);
+void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg);
 
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bbd756223b23..ced26ba8d392 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6166,16 +6166,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 		 * dirty list to avoid races between cleaner kthread and space
 		 * cache writeout.
 		 */
-		if (!alloc && old_val == 0) {
-			spin_lock(&info->unused_bgs_lock);
-			if (list_empty(&cache->bg_list)) {
-				btrfs_get_block_group(cache);
-				trace_btrfs_add_unused_block_group(cache);
-				list_add_tail(&cache->bg_list,
-					      &info->unused_bgs);
-			}
-			spin_unlock(&info->unused_bgs_lock);
-		}
+		if (!alloc && old_val == 0)
+			btrfs_mark_bg_unused(cache);
 
 		btrfs_put_block_group(cache);
 		total -= num_bytes;
@@ -9987,15 +9979,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 		if (btrfs_chunk_readonly(info, cache->key.objectid)) {
 			inc_block_group_ro(cache, 1);
 		} else if (btrfs_block_group_used(&cache->item) == 0) {
-			spin_lock(&info->unused_bgs_lock);
-			/* Should always be true but just in case. */
-			if (list_empty(&cache->bg_list)) {
-				btrfs_get_block_group(cache);
-				trace_btrfs_add_unused_block_group(cache);
-				list_add_tail(&cache->bg_list,
-					      &info->unused_bgs);
-			}
-			spin_unlock(&info->unused_bgs_lock);
+			ASSERT(list_empty(&cache->bg_list));
+			btrfs_mark_bg_unused(cache);
 		}
 	}
 
@@ -10914,3 +10899,16 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
 			       !atomic_read(&root->will_be_snapshotted));
 	}
 }
+
+void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
+{
+	struct btrfs_fs_info *fs_info = bg->fs_info;
+
+	spin_lock(&fs_info->unused_bgs_lock);
+	if (list_empty(&bg->bg_list)) {
+		btrfs_get_block_group(bg);
+		trace_btrfs_add_unused_block_group(bg);
+		list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
+	}
+	spin_unlock(&fs_info->unused_bgs_lock);
+}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bf4f262f9a52..1235ad8dd9d7 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3951,14 +3951,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 		if (!cache->removed && !cache->ro && cache->reserved == 0 &&
 		    btrfs_block_group_used(&cache->item) == 0) {
 			spin_unlock(&cache->lock);
-			spin_lock(&fs_info->unused_bgs_lock);
-			if (list_empty(&cache->bg_list)) {
-				btrfs_get_block_group(cache);
-				trace_btrfs_add_unused_block_group(cache);
-				list_add_tail(&cache->bg_list,
-					      &fs_info->unused_bgs);
-			}
-			spin_unlock(&fs_info->unused_bgs_lock);
+			btrfs_mark_bg_unused(cache);
 		} else {
 			spin_unlock(&cache->lock);
 		}
-- 
cgit v1.2.3


From c8389d4c0d6eb7c0b7f8b8a98620874f9123cd20 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Tue, 17 Jul 2018 16:58:22 +0800
Subject: btrfs: qgroup: cleanup the unused srcroot from btrfs_qgroup_inherit

Since commit 0b246afa62b0 ("btrfs: root->fs_info cleanup, add fs_info
convenience variables"), the srcroot is no longer used to get
fs_info::nodesize.  In fact, it can be dropped after commit 707e8a071528
("btrfs: use nodesize everywhere, kill leafsize").

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97a59a444500..e0bd48c5a98b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2287,22 +2287,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out;
 
-	if (srcid) {
-		struct btrfs_root *srcroot;
-		struct btrfs_key srckey;
-
-		srckey.objectid = srcid;
-		srckey.type = BTRFS_ROOT_ITEM_KEY;
-		srckey.offset = (u64)-1;
-		srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
-		if (IS_ERR(srcroot)) {
-			ret = PTR_ERR(srcroot);
-			goto out;
-		}
-
-		level_size = fs_info->nodesize;
-	}
-
 	/*
 	 * add qgroup to all inherited groups
 	 */
@@ -2359,6 +2343,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 		 * our counts don't go crazy, so at this point the only
 		 * difference between the two roots should be the root node.
 		 */
+		level_size = fs_info->nodesize;
 		dstgroup->rfer = srcgroup->rfer;
 		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
 		dstgroup->excl = level_size;
-- 
cgit v1.2.3


From fa59f27c8c35bbe00af8eff23de446a7f4b048b0 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 16 Jul 2018 22:18:07 +0800
Subject: btrfs: rename btrfs_parse_early_options

Rename btrfs_parse_early_options() to btrfs_parse_device_options(). As
btrfs_parse_early_options() parses the -o device options and scan the
device provided. So this rename specifies its action. Also the function
name is in line with btrfs_parse_subvol_options().
No functional changes.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 5384afae3364..d7a54c648c5f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -465,9 +465,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 		case Opt_subvolrootid:
 		case Opt_device:
 			/*
-			 * These are parsed by btrfs_parse_subvol_options
-			 * and btrfs_parse_early_options
-			 * and can be happily ignored here.
+			 * These are parsed by btrfs_parse_subvol_options or
+			 * btrfs_parse_device_options and can be ignored here.
 			 */
 			break;
 		case Opt_nodatasum:
@@ -883,8 +882,8 @@ out:
  * All other options will be parsed on much later in the mount process and
  * only when we need to allocate a new super block.
  */
-static int btrfs_parse_early_options(const char *options, fmode_t flags,
-				     void *holder)
+static int btrfs_parse_device_options(const char *options, fmode_t flags,
+				      void *holder)
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *device_name, *opts, *orig, *p;
@@ -951,7 +950,7 @@ static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
 
 	/*
 	 * strsep changes the string, duplicate it because
-	 * btrfs_parse_early_options gets called later
+	 * btrfs_parse_device_options gets called later
 	 */
 	opts = kstrdup(options, GFP_KERNEL);
 	if (!opts)
@@ -1558,7 +1557,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	}
 
 	mutex_lock(&uuid_mutex);
-	error = btrfs_parse_early_options(data, mode, fs_type);
+	error = btrfs_parse_device_options(data, mode, fs_type);
 	if (error) {
 		mutex_unlock(&uuid_mutex);
 		goto error_fs_info;
-- 
cgit v1.2.3


From 711169c40f908df6df91ba2672d05b44243833e4 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:24 +0800
Subject: btrfs: qgroup: Drop quota_root parameter from
 add_qgroup_relation_item

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index e0bd48c5a98b..4f523b7a3e91 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -530,11 +530,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
 	fs_info->qgroup_ulist = NULL;
 }
 
-static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
-				    struct btrfs_root *quota_root,
-				    u64 src, u64 dst)
+static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
+				    u64 dst)
 {
 	int ret;
+	struct btrfs_root *quota_root = trans->fs_info->quota_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 
@@ -1274,11 +1274,11 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 		}
 	}
 
-	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
+	ret = add_qgroup_relation_item(trans, src, dst);
 	if (ret)
 		goto out;
 
-	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
+	ret = add_qgroup_relation_item(trans, dst, src);
 	if (ret) {
 		del_qgroup_relation_item(trans, quota_root, src, dst);
 		goto out;
@@ -2295,12 +2295,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 		for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
 			if (*i_qgroups == 0)
 				continue;
-			ret = add_qgroup_relation_item(trans, quota_root,
-						       objectid, *i_qgroups);
+			ret = add_qgroup_relation_item(trans, objectid,
+						       *i_qgroups);
 			if (ret && ret != -EEXIST)
 				goto out;
-			ret = add_qgroup_relation_item(trans, quota_root,
-						       *i_qgroups, objectid);
+			ret = add_qgroup_relation_item(trans, *i_qgroups,
+						       objectid);
 			if (ret && ret != -EEXIST)
 				goto out;
 		}
-- 
cgit v1.2.3


From 99d7f09ac01cbea28767a84d4d34dd5b1922a245 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:25 +0800
Subject: btrfs: qgroup: Drop quota_root parameter from
 del_qgroup_relation_item

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4f523b7a3e91..c22f1e655a60 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -554,11 +554,11 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
 	return ret;
 }
 
-static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
-				    struct btrfs_root *quota_root,
-				    u64 src, u64 dst)
+static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
+				    u64 dst)
 {
 	int ret;
+	struct btrfs_root *quota_root = trans->fs_info->quota_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 
@@ -1280,7 +1280,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 
 	ret = add_qgroup_relation_item(trans, dst, src);
 	if (ret) {
-		del_qgroup_relation_item(trans, quota_root, src, dst);
+		del_qgroup_relation_item(trans, src, dst);
 		goto out;
 	}
 
@@ -1334,8 +1334,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
 	ret = -ENOENT;
 	goto out;
 exist:
-	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
-	err = del_qgroup_relation_item(trans, quota_root, dst, src);
+	ret = del_qgroup_relation_item(trans, src, dst);
+	err = del_qgroup_relation_item(trans, dst, src);
 	if (err && !ret)
 		ret = err;
 
-- 
cgit v1.2.3


From 69104618f4b9dd445109e8dd755406427bd75b76 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:26 +0800
Subject: btrfs: qgroup: Drop quota_root parameter from del_qgroup_item

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c22f1e655a60..67e0f4f38c31 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -653,10 +653,10 @@ out:
 	return ret;
 }
 
-static int del_qgroup_item(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *quota_root, u64 qgroupid)
+static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
 {
 	int ret;
+	struct btrfs_root *quota_root = trans->fs_info->quota_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 
@@ -1420,7 +1420,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
 			goto out;
 		}
 	}
-	ret = del_qgroup_item(trans, quota_root, qgroupid);
+	ret = del_qgroup_item(trans, qgroupid);
 	if (ret && ret != -ENOENT)
 		goto out;
 
-- 
cgit v1.2.3


From ac8a866af17edc692b50cbdd2aec612de4205c8f Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:27 +0800
Subject: btrfs: qgroup: Drop root parameter from update_qgroup_limit_item

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 67e0f4f38c31..44e1462e0f76 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -700,9 +700,9 @@ out:
 }
 
 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
-				    struct btrfs_root *root,
 				    struct btrfs_qgroup *qgroup)
 {
+	struct btrfs_root *quota_root = trans->fs_info->quota_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	struct extent_buffer *l;
@@ -718,7 +718,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
 	if (!path)
 		return -ENOMEM;
 
-	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+	ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
 	if (ret > 0)
 		ret = -ENOENT;
 
@@ -1509,7 +1509,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
 
 	spin_unlock(&fs_info->qgroup_lock);
 
-	ret = update_qgroup_limit_item(trans, quota_root, qgroup);
+	ret = update_qgroup_limit_item(trans, qgroup);
 	if (ret) {
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 		btrfs_info(fs_info, "unable to update quota limit for %llu",
@@ -2214,7 +2214,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 		if (ret)
 			fs_info->qgroup_flags |=
 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-		ret = update_qgroup_limit_item(trans, quota_root, qgroup);
+		ret = update_qgroup_limit_item(trans, qgroup);
 		if (ret)
 			fs_info->qgroup_flags |=
 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2323,7 +2323,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 		dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
 		dstgroup->rsv_excl = inherit->lim.rsv_excl;
 
-		ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
+		ret = update_qgroup_limit_item(trans, dstgroup);
 		if (ret) {
 			fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 			btrfs_info(fs_info,
-- 
cgit v1.2.3


From 3e07e9a09f27868dbc63d07db8677fcd256e3bbc Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:28 +0800
Subject: btrfs: qgroup: Drop root parameter from update_qgroup_info_item

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 44e1462e0f76..f4b25e0a32f8 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -742,9 +742,10 @@ out:
 }
 
 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
-				   struct btrfs_root *root,
 				   struct btrfs_qgroup *qgroup)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_root *quota_root = fs_info->quota_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	struct extent_buffer *l;
@@ -752,7 +753,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
 	int ret;
 	int slot;
 
-	if (btrfs_is_testing(root->fs_info))
+	if (btrfs_is_testing(fs_info))
 		return 0;
 
 	key.objectid = 0;
@@ -763,7 +764,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
 	if (!path)
 		return -ENOMEM;
 
-	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+	ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
 	if (ret > 0)
 		ret = -ENOENT;
 
@@ -2210,7 +2211,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 					  struct btrfs_qgroup, dirty);
 		list_del_init(&qgroup->dirty);
 		spin_unlock(&fs_info->qgroup_lock);
-		ret = update_qgroup_info_item(trans, quota_root, qgroup);
+		ret = update_qgroup_info_item(trans, qgroup);
 		if (ret)
 			fs_info->qgroup_flags |=
 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-- 
cgit v1.2.3


From 2e980acdd829742966c6a7e565ef3382c0717295 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:29 +0800
Subject: btrfs: qgroup: Drop quota_root and fs_info parameters from
 update_qgroup_status_item

They can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index f4b25e0a32f8..a7890183f20f 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -787,10 +787,10 @@ out:
 	return ret;
 }
 
-static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
-				     struct btrfs_fs_info *fs_info,
-				    struct btrfs_root *root)
+static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_root *quota_root = fs_info->quota_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	struct extent_buffer *l;
@@ -806,7 +806,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
 	if (!path)
 		return -ENOMEM;
 
-	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+	ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
 	if (ret > 0)
 		ret = -ENOENT;
 
@@ -2227,7 +2227,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
 	spin_unlock(&fs_info->qgroup_lock);
 
-	ret = update_qgroup_status_item(trans, fs_info, quota_root);
+	ret = update_qgroup_status_item(trans);
 	if (ret)
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 
@@ -2795,7 +2795,7 @@ out:
 			  err);
 		goto done;
 	}
-	ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
+	ret = update_qgroup_status_item(trans);
 	if (ret < 0) {
 		err = ret;
 		btrfs_err(fs_info, "fail to update qgroup status: %d", err);
-- 
cgit v1.2.3


From 9f8a6ce6ba8ebeb86de59b1adfdc64087049f76f Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:30 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_add_qgroup_relation

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c  | 3 +--
 fs/btrfs/qgroup.c | 5 +++--
 fs/btrfs/qgroup.h | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9dcd16dec754..70b6f943fd70 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5171,8 +5171,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
 	}
 
 	if (sa->assign) {
-		ret = btrfs_add_qgroup_relation(trans, fs_info,
-						sa->src, sa->dst);
+		ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
 	} else {
 		ret = btrfs_del_qgroup_relation(trans, fs_info,
 						sa->src, sa->dst);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a7890183f20f..8b71aec85d84 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1236,9 +1236,10 @@ out:
 	return ret;
 }
 
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+			      u64 dst)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *parent;
 	struct btrfs_qgroup *member;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index bec7c9b17a8e..cb4d0e58f486 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -147,8 +147,8 @@ int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
 				     bool interruptible);
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+			      u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-- 
cgit v1.2.3


From 6b36f1aa5cda39acac50f6f80c81776a31fccd24 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:31 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from __del_qgroup_relation

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8b71aec85d84..953b0d54e5d6 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1300,9 +1300,10 @@ out:
 	return ret;
 }
 
-static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+				 u64 dst)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *parent;
 	struct btrfs_qgroup *member;
@@ -1356,7 +1357,7 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
 	int ret = 0;
 
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
-	ret = __del_qgroup_relation(trans, fs_info, src, dst);
+	ret = __del_qgroup_relation(trans, src, dst);
 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 
 	return ret;
@@ -1429,9 +1430,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
 	while (!list_empty(&qgroup->groups)) {
 		list = list_first_entry(&qgroup->groups,
 					struct btrfs_qgroup_list, next_group);
-		ret = __del_qgroup_relation(trans, fs_info,
-					   qgroupid,
-					   list->group->qgroupid);
+		ret = __del_qgroup_relation(trans, qgroupid,
+					    list->group->qgroupid);
 		if (ret)
 			goto out;
 	}
-- 
cgit v1.2.3


From 39616c2735ad04ecf4874519b64556decb73d968 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:32 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_del_qgroup_relation

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c  | 3 +--
 fs/btrfs/qgroup.c | 5 +++--
 fs/btrfs/qgroup.h | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 70b6f943fd70..09943e396c77 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5173,8 +5173,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
 	if (sa->assign) {
 		ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
 	} else {
-		ret = btrfs_del_qgroup_relation(trans, fs_info,
-						sa->src, sa->dst);
+		ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
 	}
 
 	/* update qgroup status and info */
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 953b0d54e5d6..8c6f594a7ee8 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1351,9 +1351,10 @@ out:
 	return ret;
 }
 
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+			      u64 dst)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret = 0;
 
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index cb4d0e58f486..539d4c449f26 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -149,8 +149,8 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
 				     bool interruptible);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 			      u64 dst);
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+			      u64 dst);
 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info, u64 qgroupid);
 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
-- 
cgit v1.2.3


From 49a05ecde3f130ae9d69ab619e3ea125cf1f32c9 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:33 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_create_qgroup

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c              | 2 +-
 fs/btrfs/qgroup.c             | 4 ++--
 fs/btrfs/qgroup.h             | 3 +--
 fs/btrfs/tests/qgroup-tests.c | 4 ++--
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 09943e396c77..2fdc34a67139 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5227,7 +5227,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
 	}
 
 	if (sa->create) {
-		ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid);
+		ret = btrfs_create_qgroup(trans, sa->qgroupid);
 	} else {
 		ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid);
 	}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8c6f594a7ee8..9c97be3fbf35 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1364,9 +1364,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 	return ret;
 }
 
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u64 qgroupid)
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *qgroup;
 	int ret = 0;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 539d4c449f26..03adfedd9d01 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -151,8 +151,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 			      u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 			      u64 dst);
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u64 qgroupid);
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 qgroupid);
 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index ace94db09d29..b5e332d45d37 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -216,7 +216,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 	btrfs_init_dummy_trans(&trans, fs_info);
 
 	test_msg("qgroup basic add");
-	ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
+	ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID);
 	if (ret) {
 		test_err("couldn't create a qgroup %d", ret);
 		return ret;
@@ -322,7 +322,7 @@ static int test_multiple_refs(struct btrfs_root *root,
 	 * We have BTRFS_FS_TREE_OBJECTID created already from the
 	 * previous test.
 	 */
-	ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
+	ret = btrfs_create_qgroup(&trans, BTRFS_FIRST_FREE_OBJECTID);
 	if (ret) {
 		test_err("couldn't create a qgroup %d", ret);
 		return ret;
-- 
cgit v1.2.3


From 3efbee1d006a97eaec5f070430c75010ef8746f1 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:34 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_remove_qgroup

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c  | 3 +--
 fs/btrfs/qgroup.c | 4 ++--
 fs/btrfs/qgroup.h | 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2fdc34a67139..cb2b74db24ea 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5195,7 +5195,6 @@ drop_write:
 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
 {
 	struct inode *inode = file_inode(file);
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_ioctl_qgroup_create_args *sa;
 	struct btrfs_trans_handle *trans;
@@ -5229,7 +5228,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
 	if (sa->create) {
 		ret = btrfs_create_qgroup(trans, sa->qgroupid);
 	} else {
-		ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid);
+		ret = btrfs_remove_qgroup(trans, sa->qgroupid);
 	}
 
 	err = btrfs_end_transaction(trans);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9c97be3fbf35..d3d205d0e176 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1398,9 +1398,9 @@ out:
 	return ret;
 }
 
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u64 qgroupid)
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *qgroup;
 	struct btrfs_qgroup_list *list;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 03adfedd9d01..41516dddf123 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -152,8 +152,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 			      u64 dst);
 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info, u64 qgroupid);
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
 		       struct btrfs_qgroup_limit *limit);
-- 
cgit v1.2.3


From f0042d5e92b66969a12166d1deb5a979250d6c25 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:35 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_limit_qgroup

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c  | 3 +--
 fs/btrfs/qgroup.c | 4 ++--
 fs/btrfs/qgroup.h | 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cb2b74db24ea..01aab038120e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5245,7 +5245,6 @@ drop_write:
 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
 {
 	struct inode *inode = file_inode(file);
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_ioctl_qgroup_limit_args *sa;
 	struct btrfs_trans_handle *trans;
@@ -5278,7 +5277,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
 		qgroupid = root->root_key.objectid;
 	}
 
-	ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim);
+	ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim);
 
 	err = btrfs_end_transaction(trans);
 	if (err && !ret)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d3d205d0e176..8953d0264887 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1445,10 +1445,10 @@ out:
 	return ret;
 }
 
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info, u64 qgroupid,
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
 		       struct btrfs_qgroup_limit *limit)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *qgroup;
 	int ret = 0;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 41516dddf123..385367989ed6 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -153,8 +153,7 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 			      u64 dst);
 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info, u64 qgroupid,
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
 		       struct btrfs_qgroup_limit *limit);
 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
-- 
cgit v1.2.3


From a95f3aafd6a2d0e8de834c95e91066825e3e7787 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 16:28:03 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_qgroup_trace_extent

It can be fetched from the transaction handle. In addition, remove the
WARN_ON(trans == NULL) because it's not possible to hit this condition.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c   | 15 ++++++---------
 fs/btrfs/qgroup.h   |  5 ++---
 fs/btrfs/tree-log.c |  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8953d0264887..d1e8b4851912 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1580,10 +1580,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
-		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
-		gfp_t gfp_flag)
+int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+			      u64 num_bytes, gfp_t gfp_flag)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_qgroup_extent_record *record;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	int ret;
@@ -1591,8 +1591,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
 	    || bytenr == 0 || num_bytes == 0)
 		return 0;
-	if (WARN_ON(trans == NULL))
-		return -EINVAL;
 	record = kmalloc(sizeof(*record), gfp_flag);
 	if (!record)
 		return -ENOMEM;
@@ -1645,8 +1643,8 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
 
 		num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
 
-		ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr,
-						num_bytes, GFP_NOFS);
+		ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes,
+						GFP_NOFS);
 		if (ret)
 			return ret;
 	}
@@ -1797,8 +1795,7 @@ walk_down:
 			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 			path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
 
-			ret = btrfs_qgroup_trace_extent(trans, fs_info,
-							child_bytenr,
+			ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
 							fs_info->nodesize,
 							GFP_NOFS);
 			if (ret)
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 385367989ed6..0215dc0b1710 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -212,9 +212,8 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
  * Return <0 for error, like memory allocation failure or invalid parameter
  * (NULL trans)
  */
-int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
-		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
-		gfp_t gfp_flag);
+int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+			      u64 num_bytes, gfp_t gfp_flag);
 
 /*
  * Inform qgroup to trace all leaf items of data
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7b7498f1f641..10f6a4223897 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -685,7 +685,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 		 * as the owner of the file extent changed from log tree
 		 * (doesn't affect qgroup) to fs/file tree(affects qgroup)
 		 */
-		ret = btrfs_qgroup_trace_extent(trans, fs_info,
+		ret = btrfs_qgroup_trace_extent(trans,
 				btrfs_file_extent_disk_bytenr(eb, item),
 				btrfs_file_extent_disk_num_bytes(eb, item),
 				GFP_NOFS);
-- 
cgit v1.2.3


From 8d38d7eb7bb60a7a441cec3ba92784d9f1e20d5f Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:37 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from
 btrfs_qgroup_trace_leaf_items

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 fs/btrfs/qgroup.c      | 8 ++++----
 fs/btrfs/qgroup.h      | 1 -
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ced26ba8d392..51da902a0ad7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8760,7 +8760,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
 			else
 				ret = btrfs_dec_ref(trans, root, eb, 0);
 			BUG_ON(ret); /* -ENOMEM */
-			ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb);
+			ret = btrfs_qgroup_trace_leaf_items(trans, eb);
 			if (ret) {
 				btrfs_err_rl(fs_info,
 					     "error %d accounting leaf items. Quota is out of sync, rescan required.",
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d1e8b4851912..11a23b17a432 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1611,9 +1611,9 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 }
 
 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct extent_buffer *eb)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int nr = btrfs_header_nritems(eb);
 	int i, extent_type, ret;
 	struct btrfs_key key;
@@ -1737,7 +1737,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
 	}
 
 	if (root_level == 0) {
-		ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb);
+		ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
 		goto out;
 	}
 
@@ -1803,8 +1803,8 @@ walk_down:
 		}
 
 		if (level == 0) {
-			ret = btrfs_qgroup_trace_leaf_items(trans,fs_info,
-							   path->nodes[level]);
+			ret = btrfs_qgroup_trace_leaf_items(trans,
+							    path->nodes[level]);
 			if (ret)
 				goto out;
 
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 0215dc0b1710..9d434a01d467 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -222,7 +222,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
  * Return <0 for error(ENOMEM)
  */
 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct extent_buffer *eb);
 /*
  * Inform qgroup to trace a whole subtree, including all its child tree
-- 
cgit v1.2.3


From deb406274339f386836313af7eeb8001cca6c33f Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:38 +0800
Subject: btrfs: qgroup: Drop root parameter from btrfs_qgroup_trace_subtree

The fs_info can be fetched from the transaction handle directly.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 fs/btrfs/qgroup.c      | 3 +--
 fs/btrfs/qgroup.h      | 1 -
 fs/btrfs/relocation.c  | 5 ++---
 4 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 51da902a0ad7..4f19bdca3214 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8660,7 +8660,7 @@ skip:
 		}
 
 		if (need_account) {
-			ret = btrfs_qgroup_trace_subtree(trans, root, next,
+			ret = btrfs_qgroup_trace_subtree(trans, next,
 							 generation, level - 1);
 			if (ret) {
 				btrfs_err_rl(fs_info,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 11a23b17a432..0b43e9e2a71e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1714,11 +1714,10 @@ static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
 }
 
 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *root,
 			       struct extent_buffer *root_eb,
 			       u64 root_gen, int root_level)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret = 0;
 	int level;
 	struct extent_buffer *eb = root_eb;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 9d434a01d467..b8f15ce6c83d 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -234,7 +234,6 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
  * Return <0 for error(ENOMEM or tree search error)
  */
 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *root,
 			       struct extent_buffer *root_eb,
 			       u64 root_gen, int root_level);
 int
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b98d7a594542..d6bcf558789e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1879,13 +1879,12 @@ again:
 		 *    and tree block numbers, if current trans doesn't free
 		 *    data reloc tree inode.
 		 */
-		ret = btrfs_qgroup_trace_subtree(trans, src, parent,
+		ret = btrfs_qgroup_trace_subtree(trans, parent,
 				btrfs_header_generation(parent),
 				btrfs_header_level(parent));
 		if (ret < 0)
 			break;
-		ret = btrfs_qgroup_trace_subtree(trans, dest,
-				path->nodes[level],
+		ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level],
 				btrfs_header_generation(path->nodes[level]),
 				btrfs_header_level(path->nodes[level]));
 		if (ret < 0)
-- 
cgit v1.2.3


From 8696d76045579c9611fe1cfc064e48ada32bb796 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:39 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from
 btrfs_qgroup_account_extent

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c             | 20 ++++++++++----------
 fs/btrfs/qgroup.h             |  8 +++-----
 fs/btrfs/tests/qgroup-tests.c | 20 ++++++++++----------
 3 files changed, 23 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 0b43e9e2a71e..5ac57fc3fa4b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2038,12 +2038,11 @@ static int maybe_fs_roots(struct ulist *roots)
 	return is_fstree(unode->val);
 }
 
-int
-btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info,
-			    u64 bytenr, u64 num_bytes,
-			    struct ulist *old_roots, struct ulist *new_roots)
+int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+				u64 num_bytes, struct ulist *old_roots,
+				struct ulist *new_roots)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct ulist *qgroups = NULL;
 	struct ulist *tmp = NULL;
 	u64 seq;
@@ -2173,9 +2172,10 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
 				ulist_del(record->old_roots, qgroup_to_skip,
 					  0);
 			}
-			ret = btrfs_qgroup_account_extent(trans, fs_info,
-					record->bytenr, record->num_bytes,
-					record->old_roots, new_roots);
+			ret = btrfs_qgroup_account_extent(trans, record->bytenr,
+							  record->num_bytes,
+							  record->old_roots,
+							  new_roots);
 			record->old_roots = NULL;
 			new_roots = NULL;
 		}
@@ -2711,8 +2711,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 		if (ret < 0)
 			goto out;
 		/* For rescan, just pass old_roots as NULL */
-		ret = btrfs_qgroup_account_extent(trans, fs_info,
-				found.objectid, num_bytes, NULL, roots);
+		ret = btrfs_qgroup_account_extent(trans, found.objectid,
+						  num_bytes, NULL, roots);
 		if (ret < 0)
 			goto out;
 	}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index b8f15ce6c83d..61b723b1e4d8 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -236,11 +236,9 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
 			       struct extent_buffer *root_eb,
 			       u64 root_gen, int root_level);
-int
-btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info,
-			    u64 bytenr, u64 num_bytes,
-			    struct ulist *old_roots, struct ulist *new_roots);
+int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+				u64 num_bytes, struct ulist *old_roots,
+				struct ulist *new_roots);
 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 		      struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index b5e332d45d37..412b910b04cc 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -249,8 +249,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
-					  nodesize, old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+					  new_roots);
 	if (ret) {
 		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
@@ -285,8 +285,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
-					  nodesize, old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+					  new_roots);
 	if (ret) {
 		test_err("couldn't account space for a qgroup %d", ret);
 		return -EINVAL;
@@ -350,8 +350,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
-					  nodesize, old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+					  new_roots);
 	if (ret) {
 		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
@@ -385,8 +385,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
-					  nodesize, old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+					  new_roots);
 	if (ret) {
 		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
@@ -426,8 +426,8 @@ static int test_multiple_refs(struct btrfs_root *root,
 		return ret;
 	}
 
-	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
-					  nodesize, old_roots, new_roots);
+	ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+					  new_roots);
 	if (ret) {
 		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
-- 
cgit v1.2.3


From 280f8bd2cbe0b4b578c217b8fa504294c30abde1 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:40 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_run_qgroups

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c       | 2 +-
 fs/btrfs/qgroup.c      | 4 ++--
 fs/btrfs/qgroup.h      | 3 +--
 fs/btrfs/transaction.c | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 01aab038120e..d0096383ac3d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5177,7 +5177,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
 	}
 
 	/* update qgroup status and info */
-	err = btrfs_run_qgroups(trans, fs_info);
+	err = btrfs_run_qgroups(trans);
 	if (err < 0)
 		btrfs_handle_fs_error(fs_info, err,
 				      "failed to update qgroup status and info");
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5ac57fc3fa4b..819cdaea51ab 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2193,9 +2193,9 @@ cleanup:
 /*
  * called from commit_transaction. Writes all changed qgroups to disk.
  */
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
-		      struct btrfs_fs_info *fs_info)
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *quota_root = fs_info->quota_root;
 	int ret = 0;
 
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 61b723b1e4d8..2c8960f9028c 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -240,8 +240,7 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 				u64 num_bytes, struct ulist *old_roots,
 				struct ulist *new_roots);
 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
-		      struct btrfs_fs_info *fs_info);
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
 			 struct btrfs_qgroup_inherit *inherit);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ebe50dfb8947..0cbd55c498a1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1152,7 +1152,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
 	ret = btrfs_run_dev_replace(trans, fs_info);
 	if (ret)
 		return ret;
-	ret = btrfs_run_qgroups(trans, fs_info);
+	ret = btrfs_run_qgroups(trans);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From a937742250199a37358a4da0a990744b92c8623c Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:41 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from btrfs_qgroup_inherit

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c       | 2 +-
 fs/btrfs/qgroup.c      | 6 +++---
 fs/btrfs/qgroup.h      | 5 ++---
 fs/btrfs/transaction.c | 3 +--
 4 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d0096383ac3d..4482cf35522e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -601,7 +601,7 @@ static noinline int create_subvol(struct inode *dir,
 	trans->block_rsv = &block_rsv;
 	trans->bytes_reserved = block_rsv.size;
 
-	ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit);
+	ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
 	if (ret)
 		goto fail;
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 819cdaea51ab..96640c56c910 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2238,13 +2238,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
  * cause a transaction abort so we take extra care here to only error
  * when a readonly fs is a reasonable outcome.
  */
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
-			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
-			 struct btrfs_qgroup_inherit *inherit)
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
+			 u64 objectid, struct btrfs_qgroup_inherit *inherit)
 {
 	int ret = 0;
 	int i;
 	u64 *i_qgroups;
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *quota_root = fs_info->quota_root;
 	struct btrfs_qgroup *srcgroup;
 	struct btrfs_qgroup *dstgroup;
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 2c8960f9028c..54b8bb282c0e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -241,9 +241,8 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 				struct ulist *new_roots);
 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
 int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
-			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
-			 struct btrfs_qgroup_inherit *inherit);
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
+			 u64 objectid, struct btrfs_qgroup_inherit *inherit);
 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 			       u64 ref_root, u64 num_bytes,
 			       enum btrfs_qgroup_rsv_type type);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0cbd55c498a1..aec208cbff00 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1355,8 +1355,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 		goto out;
 
 	/* Now qgroup are all updated, we can inherit it to new qgroups */
-	ret = btrfs_qgroup_inherit(trans, fs_info,
-				   src->root_key.objectid, dst_objectid,
+	ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
 				   inherit);
 	if (ret < 0)
 		goto out;
-- 
cgit v1.2.3


From 62088ca7427d23ec1a5def9143098ca9c41c5302 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 18 Jul 2018 14:45:42 +0800
Subject: btrfs: qgroup: Drop fs_info parameter from qgroup_rescan_leaf

It can be fetched from the transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 96640c56c910..e11ed7b3f32a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2640,10 +2640,10 @@ static bool is_last_leaf(struct btrfs_path *path)
  * returns < 0 on error, 0 when more leafs are to be scanned.
  * returns 1 when done.
  */
-static int
-qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-		   struct btrfs_trans_handle *trans)
+static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
+			      struct btrfs_path *path)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_key found;
 	struct extent_buffer *scratch_leaf = NULL;
 	struct ulist *roots = NULL;
@@ -2758,7 +2758,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 		if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
 			err = -EINTR;
 		} else {
-			err = qgroup_rescan_leaf(fs_info, path, trans);
+			err = qgroup_rescan_leaf(trans, path);
 		}
 		if (err > 0)
 			btrfs_commit_transaction(trans);
-- 
cgit v1.2.3


From 321a4bf72b251319aa456ea542161729442b6f61 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 16 Jul 2018 22:58:09 +0800
Subject: btrfs: use the assigned fs_devices instead of the dereference

We have assigned the %fs_info->fs_devices in %fs_devices as its not
modified just use it for the mutex_lock().

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fd9d4e056b37..0b9023f52b2a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2211,7 +2211,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
 	INIT_LIST_HEAD(&seed_devices->alloc_list);
 	mutex_init(&seed_devices->device_list_mutex);
 
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	mutex_lock(&fs_devices->device_list_mutex);
 	list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
 			      synchronize_rcu);
 	list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2231,7 +2231,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
 	generate_random_uuid(fs_devices->fsid);
 	memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
 	memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	super_flags = btrfs_super_flags(disk_super) &
 		      ~BTRFS_SUPER_FLAG_SEEDING;
-- 
cgit v1.2.3


From 46df06b85e28eaf430aa5c8f8aa26120fa746b72 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 13 Jul 2018 20:46:30 +0200
Subject: btrfs: refactor block group replication factor calculation to a
 helper

There are many places that open code the duplicity factor of the block
group profiles, create a common helper. This can be easily extended for
more copies.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 36 ++++++++----------------------------
 fs/btrfs/super.c       | 11 +++--------
 fs/btrfs/volumes.c     | 11 +++++++++++
 fs/btrfs/volumes.h     |  2 ++
 4 files changed, 24 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4f19bdca3214..fd109bfd528d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3931,11 +3931,7 @@ static void update_space_info(struct btrfs_fs_info *info, u64 flags,
 	struct btrfs_space_info *found;
 	int factor;
 
-	if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
-		     BTRFS_BLOCK_GROUP_RAID10))
-		factor = 2;
-	else
-		factor = 1;
+	factor = btrfs_bg_type_to_factor(flags);
 
 	found = __find_space_info(info, flags);
 	ASSERT(found);
@@ -4576,6 +4572,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
 	u64 space_size;
 	u64 avail;
 	u64 used;
+	int factor;
 
 	/* Don't overcommit when in mixed mode. */
 	if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
@@ -4610,10 +4607,8 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
 	 * doesn't include the parity drive, so we don't have to
 	 * change the math
 	 */
-	if (profile & (BTRFS_BLOCK_GROUP_DUP |
-		       BTRFS_BLOCK_GROUP_RAID1 |
-		       BTRFS_BLOCK_GROUP_RAID10))
-		avail >>= 1;
+	factor = btrfs_bg_type_to_factor(profile);
+	avail = div_u64(avail, factor);
 
 	/*
 	 * If we aren't flushing all things, let us overcommit up to
@@ -6094,12 +6089,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 		cache = btrfs_lookup_block_group(info, bytenr);
 		if (!cache)
 			return -ENOENT;
-		if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
-				    BTRFS_BLOCK_GROUP_RAID1 |
-				    BTRFS_BLOCK_GROUP_RAID10))
-			factor = 2;
-		else
-			factor = 1;
+		factor = btrfs_bg_type_to_factor(cache->flags);
+
 		/*
 		 * If this block group has free space cache written out, we
 		 * need to make sure to load it if we are removing space.  This
@@ -9359,13 +9350,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
 			continue;
 		}
 
-		if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
-					  BTRFS_BLOCK_GROUP_RAID10 |
-					  BTRFS_BLOCK_GROUP_DUP))
-			factor = 2;
-		else
-			factor = 1;
-
+		factor = btrfs_bg_type_to_factor(block_group->flags);
 		free_bytes += (block_group->key.offset -
 			       btrfs_block_group_used(&block_group->item)) *
 			       factor;
@@ -10175,12 +10160,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
 	memcpy(&key, &block_group->key, sizeof(key));
 	index = btrfs_bg_flags_to_raid_index(block_group->flags);
-	if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
-				  BTRFS_BLOCK_GROUP_RAID1 |
-				  BTRFS_BLOCK_GROUP_RAID10))
-		factor = 2;
-	else
-		factor = 1;
+	factor = btrfs_bg_type_to_factor(block_group->flags);
 
 	/* make sure this block group isn't part of an allocation cluster */
 	cluster = &fs_info->data_alloc_cluster;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d7a54c648c5f..efe8b03ce380 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2107,14 +2107,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 				btrfs_account_ro_block_groups_free_space(found);
 
 			for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
-				if (!list_empty(&found->block_groups[i])) {
-					switch (i) {
-					case BTRFS_RAID_DUP:
-					case BTRFS_RAID_RAID1:
-					case BTRFS_RAID_RAID10:
-						factor = 2;
-					}
-				}
+				if (!list_empty(&found->block_groups[i]))
+					factor = btrfs_bg_type_to_factor(
+						btrfs_raid_array[i].bg_flag);
 			}
 		}
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0b9023f52b2a..eca67529e536 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7292,3 +7292,14 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
 		fs_devices = fs_devices->seed;
 	}
 }
+
+/*
+ * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10.
+ */
+int btrfs_bg_type_to_factor(u64 flags)
+{
+	if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+		     BTRFS_BLOCK_GROUP_RAID10))
+		return 2;
+	return 1;
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 06d8bb7dd557..6943aab9bdd7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -559,4 +559,6 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
 					struct btrfs_device *failing_dev);
 
+int btrfs_bg_type_to_factor(u64 flags);
+
 #endif
-- 
cgit v1.2.3


From 8b9b6f255485a4354da5474b16e79c447222e9ed Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 11 Jul 2018 13:41:22 +0800
Subject: btrfs: scrub: cleanup the remaining nodatasum fixup code

Remove the remaining code that misused the page cache pages during
device replace and could cause data corruption for compressed nodatasum
extents. Such files do not normally exist but there's a bug that allows
this combination and the corruption was exposed by device replace fixup
code.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 289 -------------------------------------------------------
 1 file changed, 289 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 1235ad8dd9d7..c4eb9eca13b8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -188,15 +188,6 @@ struct scrub_ctx {
 	refcount_t              refs;
 };
 
-struct scrub_fixup_nodatasum {
-	struct scrub_ctx	*sctx;
-	struct btrfs_device	*dev;
-	u64			logical;
-	struct btrfs_root	*root;
-	struct btrfs_work	work;
-	int			mirror_num;
-};
-
 struct scrub_warning {
 	struct btrfs_path	*path;
 	u64			extent_item_size;
@@ -215,8 +206,6 @@ struct full_stripe_lock {
 
 static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
 static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
 static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
 static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 				     struct scrub_block *sblocks_for_recheck);
@@ -531,60 +520,6 @@ out:
 	return ret;
 }
 
-/*
- * used for workers that require transaction commits (i.e., for the
- * NOCOW case)
- */
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
-{
-	struct btrfs_fs_info *fs_info = sctx->fs_info;
-
-	refcount_inc(&sctx->refs);
-	/*
-	 * increment scrubs_running to prevent cancel requests from
-	 * completing as long as a worker is running. we must also
-	 * increment scrubs_paused to prevent deadlocking on pause
-	 * requests used for transactions commits (as the worker uses a
-	 * transaction context). it is safe to regard the worker
-	 * as paused for all matters practical. effectively, we only
-	 * avoid cancellation requests from completing.
-	 */
-	mutex_lock(&fs_info->scrub_lock);
-	atomic_inc(&fs_info->scrubs_running);
-	atomic_inc(&fs_info->scrubs_paused);
-	mutex_unlock(&fs_info->scrub_lock);
-
-	/*
-	 * check if @scrubs_running=@scrubs_paused condition
-	 * inside wait_event() is not an atomic operation.
-	 * which means we may inc/dec @scrub_running/paused
-	 * at any time. Let's wake up @scrub_pause_wait as
-	 * much as we can to let commit transaction blocked less.
-	 */
-	wake_up(&fs_info->scrub_pause_wait);
-
-	atomic_inc(&sctx->workers_pending);
-}
-
-/* used for workers that require transaction commits */
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
-{
-	struct btrfs_fs_info *fs_info = sctx->fs_info;
-
-	/*
-	 * see scrub_pending_trans_workers_inc() why we're pretending
-	 * to be paused in the scrub counters
-	 */
-	mutex_lock(&fs_info->scrub_lock);
-	atomic_dec(&fs_info->scrubs_running);
-	atomic_dec(&fs_info->scrubs_paused);
-	mutex_unlock(&fs_info->scrub_lock);
-	atomic_dec(&sctx->workers_pending);
-	wake_up(&fs_info->scrub_pause_wait);
-	wake_up(&sctx->list_wait);
-	scrub_put_ctx(sctx);
-}
-
 static void scrub_free_csums(struct scrub_ctx *sctx)
 {
 	while (!list_empty(&sctx->csum_list)) {
@@ -858,194 +793,6 @@ out:
 	btrfs_free_path(path);
 }
 
-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
-{
-	struct page *page = NULL;
-	unsigned long index;
-	struct scrub_fixup_nodatasum *fixup = fixup_ctx;
-	int ret;
-	int corrected = 0;
-	struct btrfs_key key;
-	struct inode *inode = NULL;
-	struct btrfs_fs_info *fs_info;
-	u64 end = offset + PAGE_SIZE - 1;
-	struct btrfs_root *local_root;
-	int srcu_index;
-
-	key.objectid = root;
-	key.type = BTRFS_ROOT_ITEM_KEY;
-	key.offset = (u64)-1;
-
-	fs_info = fixup->root->fs_info;
-	srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
-	local_root = btrfs_read_fs_root_no_name(fs_info, &key);
-	if (IS_ERR(local_root)) {
-		srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
-		return PTR_ERR(local_root);
-	}
-
-	key.type = BTRFS_INODE_ITEM_KEY;
-	key.objectid = inum;
-	key.offset = 0;
-	inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
-	srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
-	if (IS_ERR(inode))
-		return PTR_ERR(inode);
-
-	index = offset >> PAGE_SHIFT;
-
-	page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
-	if (!page) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	if (PageUptodate(page)) {
-		if (PageDirty(page)) {
-			/*
-			 * we need to write the data to the defect sector. the
-			 * data that was in that sector is not in memory,
-			 * because the page was modified. we must not write the
-			 * modified page to that sector.
-			 *
-			 * TODO: what could be done here: wait for the delalloc
-			 *       runner to write out that page (might involve
-			 *       COW) and see whether the sector is still
-			 *       referenced afterwards.
-			 *
-			 * For the meantime, we'll treat this error
-			 * incorrectable, although there is a chance that a
-			 * later scrub will find the bad sector again and that
-			 * there's no dirty page in memory, then.
-			 */
-			ret = -EIO;
-			goto out;
-		}
-		ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
-					fixup->logical, page,
-					offset - page_offset(page),
-					fixup->mirror_num);
-		unlock_page(page);
-		corrected = !ret;
-	} else {
-		/*
-		 * we need to get good data first. the general readpage path
-		 * will call repair_io_failure for us, we just have to make
-		 * sure we read the bad mirror.
-		 */
-		ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
-					EXTENT_DAMAGED);
-		if (ret) {
-			/* set_extent_bits should give proper error */
-			WARN_ON(ret > 0);
-			if (ret > 0)
-				ret = -EFAULT;
-			goto out;
-		}
-
-		ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
-						btrfs_get_extent,
-						fixup->mirror_num);
-		wait_on_page_locked(page);
-
-		corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
-						end, EXTENT_DAMAGED, 0, NULL);
-		if (!corrected)
-			clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
-						EXTENT_DAMAGED);
-	}
-
-out:
-	if (page)
-		put_page(page);
-
-	iput(inode);
-
-	if (ret < 0)
-		return ret;
-
-	if (ret == 0 && corrected) {
-		/*
-		 * we only need to call readpage for one of the inodes belonging
-		 * to this extent. so make iterate_extent_inodes stop
-		 */
-		return 1;
-	}
-
-	return -EIO;
-}
-
-static void scrub_fixup_nodatasum(struct btrfs_work *work)
-{
-	struct btrfs_fs_info *fs_info;
-	int ret;
-	struct scrub_fixup_nodatasum *fixup;
-	struct scrub_ctx *sctx;
-	struct btrfs_trans_handle *trans = NULL;
-	struct btrfs_path *path;
-	int uncorrectable = 0;
-
-	fixup = container_of(work, struct scrub_fixup_nodatasum, work);
-	sctx = fixup->sctx;
-	fs_info = fixup->root->fs_info;
-
-	path = btrfs_alloc_path();
-	if (!path) {
-		spin_lock(&sctx->stat_lock);
-		++sctx->stat.malloc_errors;
-		spin_unlock(&sctx->stat_lock);
-		uncorrectable = 1;
-		goto out;
-	}
-
-	trans = btrfs_join_transaction(fixup->root);
-	if (IS_ERR(trans)) {
-		uncorrectable = 1;
-		goto out;
-	}
-
-	/*
-	 * the idea is to trigger a regular read through the standard path. we
-	 * read a page from the (failed) logical address by specifying the
-	 * corresponding copynum of the failed sector. thus, that readpage is
-	 * expected to fail.
-	 * that is the point where on-the-fly error correction will kick in
-	 * (once it's finished) and rewrite the failed sector if a good copy
-	 * can be found.
-	 */
-	ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
-					  scrub_fixup_readpage, fixup, false);
-	if (ret < 0) {
-		uncorrectable = 1;
-		goto out;
-	}
-	WARN_ON(ret != 1);
-
-	spin_lock(&sctx->stat_lock);
-	++sctx->stat.corrected_errors;
-	spin_unlock(&sctx->stat_lock);
-
-out:
-	if (trans && !IS_ERR(trans))
-		btrfs_end_transaction(trans);
-	if (uncorrectable) {
-		spin_lock(&sctx->stat_lock);
-		++sctx->stat.uncorrectable_errors;
-		spin_unlock(&sctx->stat_lock);
-		btrfs_dev_replace_stats_inc(
-			&fs_info->dev_replace.num_uncorrectable_read_errors);
-		btrfs_err_rl_in_rcu(fs_info,
-		    "unable to fixup (nodatasum) error at logical %llu on dev %s",
-			fixup->logical, rcu_str_deref(fixup->dev->name));
-	}
-
-	btrfs_free_path(path);
-	kfree(fixup);
-
-	scrub_pending_trans_workers_dec(sctx);
-}
-
 static inline void scrub_get_recover(struct scrub_recover *recover)
 {
 	refcount_inc(&recover->refs);
@@ -1239,42 +986,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 		goto out;
 	}
 
-	/*
-	 * NOTE: Even for nodatasum case, it's still possible that it's a
-	 * compressed data extent, thus scrub_fixup_nodatasum(), which write
-	 * inode page cache onto disk, could cause serious data corruption.
-	 *
-	 * So here we could only read from disk, and hope our recovery could
-	 * reach disk before the newer write.
-	 */
-	if (0 && !is_metadata && !have_csum) {
-		struct scrub_fixup_nodatasum *fixup_nodatasum;
-
-		WARN_ON(sctx->is_dev_replace);
-
-		/*
-		 * !is_metadata and !have_csum, this means that the data
-		 * might not be COWed, that it might be modified
-		 * concurrently. The general strategy to work on the
-		 * commit root does not help in the case when COW is not
-		 * used.
-		 */
-		fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
-		if (!fixup_nodatasum)
-			goto did_not_correct_error;
-		fixup_nodatasum->sctx = sctx;
-		fixup_nodatasum->dev = dev;
-		fixup_nodatasum->logical = logical;
-		fixup_nodatasum->root = fs_info->extent_root;
-		fixup_nodatasum->mirror_num = failed_mirror_index + 1;
-		scrub_pending_trans_workers_inc(sctx);
-		btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
-				scrub_fixup_nodatasum, NULL, NULL);
-		btrfs_queue_work(fs_info->scrub_workers,
-				 &fixup_nodatasum->work);
-		goto out;
-	}
-
 	/*
 	 * now build and submit the bios for the other mirrors, check
 	 * checksums.
-- 
cgit v1.2.3


From 28187ae569e8a6c34eb7ac8871f479d94be31e16 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 4 Jul 2018 10:24:51 +0300
Subject: btrfs: Simplify page unlocking in alloc_extent_buffer

Current version of the page unlocking code was added in
727011e07cbd ("Btrfs: allow metadata blocks larger than the page size")
but even in this commit that particular flag was never used per-se. In
fact, btrfs only uses PageChecked for data pages to identify pages
which have been dirtied but don't have ORDERED bit set. For more
information see 247e743cbe6e ("Btrfs: Use async helpers to deal with
pages that have been improperly dirtied").

However, this doesn't apply to extent buffer pages. The important bit
here is that the pages are unlocked AFTER the extent buffer has been
properly recorded in the radix tree to avoid races with
btree_releasepage. Let's exploit this fact and simplify the page
unlocking sequence by unlocking the pages in-order and removing the
redundant PageChecked flag setting/clearing.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fea015be4ce1..e9c74ce9623f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5047,13 +5047,8 @@ again:
 	 * after the extent buffer is in the radix tree so
 	 * it doesn't get lost
 	 */
-	SetPageChecked(eb->pages[0]);
-	for (i = 1; i < num_pages; i++) {
-		p = eb->pages[i];
-		ClearPageChecked(p);
-		unlock_page(p);
-	}
-	unlock_page(eb->pages[0]);
+	for (i = 0; i < num_pages; i++)
+		unlock_page(eb->pages[i]);
 	return eb;
 
 free_eb:
-- 
cgit v1.2.3


From b16d011e79fb353abff70b2ef35b22ed90dc8db5 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 4 Jul 2018 10:24:52 +0300
Subject: btrfs: Reword dodgy comments in alloc_extent_buffer

Commit eb14ab8ed24a ("Btrfs: fix page->private races") fixed a genuine
race between extent buffer initialisation and btree_releasepage.
Unfortunately as the code has evolved the comments weren't changed which
made them slightly wrong and they weren't very clear in the fist place.
Fix this by (hopefully) rewording them in a more approachable manner.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e9c74ce9623f..96039dd56c72 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5009,8 +5009,11 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 			uptodate = 0;
 
 		/*
-		 * see below about how we avoid a nasty race with release page
-		 * and why we unlock later
+		 * We can't unlock the pages just yet since the extent buffer
+		 * hasn't been properly inserted in the radix tree, this
+		 * opens a race with btree_releasepage which can free a page
+		 * while we are still filling in all pages for the buffer and
+		 * we could crash.
 		 */
 	}
 	if (uptodate)
@@ -5039,13 +5042,9 @@ again:
 	set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
 
 	/*
-	 * there is a race where release page may have
-	 * tried to find this extent buffer in the radix
-	 * but failed.  It will tell the VM it is safe to
-	 * reclaim the, and it will clear the page private bit.
-	 * We must make sure to set the page private bit properly
-	 * after the extent buffer is in the radix tree so
-	 * it doesn't get lost
+	 * Now it's safe to unlock the pages because any calls to
+	 * btree_releasepage will correctly detect that a page belongs to a
+	 * live buffer and won't free them prematurely.
 	 */
 	for (i = 0; i < num_pages; i++)
 		unlock_page(eb->pages[i]);
-- 
cgit v1.2.3


From d64766fdf9cd172f8416d898cc9d5219ed933b96 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 27 Jun 2018 16:38:22 +0300
Subject: btrfs: Refactor loop in btrfs_release_extent_buffer_page

The purpose of the function is to free all the pages comprising an
extent buffer. This can be achieved with a simple for loop rather than
the slightly more involved 'do {} while' construct. So rewrite the
loop using a 'for' construct. Additionally we can never have an
extent_buffer that has 0 pages so remove the check for index == 0. No
functional changes.

The reversed order used to have a meaning in the past where the first
page served as a blocking point for several callers. See eg
4f2de97acee6532b36dd6e99 ("Btrfs: set page->private to the eb").

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 96039dd56c72..66b5272bbaab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4647,19 +4647,16 @@ int extent_buffer_under_io(struct extent_buffer *eb)
  */
 static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 {
-	int index;
-	struct page *page;
+	int i;
+	int num_pages;
 	int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
 
 	BUG_ON(extent_buffer_under_io(eb));
 
-	index = num_extent_pages(eb);
-	if (index == 0)
-		return;
+	num_pages = num_extent_pages(eb);
+	for (i = 0; i < num_pages; i++) {
+		struct page *page = eb->pages[i];
 
-	do {
-		index--;
-		page = eb->pages[index];
 		if (!page)
 			continue;
 		if (mapped)
@@ -4691,7 +4688,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 
 		/* One for when we allocated the page */
 		put_page(page);
-	} while (index != 0);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 55ac01396ae8cfc0616e85bb4b49a130144633c2 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 19 Jul 2018 17:24:32 +0200
Subject: btrfs: rename btrfs_release_extent_buffer_page

The function used to release one page (and always the first one), but
not anymore since a50924e3a4d7fccb0ecfbd4 ("btrfs: drop constant param
from btrfs_release_extent_buffer_page").  Update the name and comment.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 66b5272bbaab..6a4013ddcf73 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4643,9 +4643,9 @@ int extent_buffer_under_io(struct extent_buffer *eb)
 }
 
 /*
- * Helper for releasing extent buffer page.
+ * Release all pages attached to the extent buffer.
  */
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
+static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
 {
 	int i;
 	int num_pages;
@@ -4696,7 +4696,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
  */
 static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
 {
-	btrfs_release_extent_buffer_page(eb);
+	btrfs_release_extent_buffer_pages(eb);
 	__free_extent_buffer(eb);
 }
 
@@ -5085,7 +5085,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
 		}
 
 		/* Should be safe to release our pages at this point */
-		btrfs_release_extent_buffer_page(eb);
+		btrfs_release_extent_buffer_pages(eb);
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 		if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
 			__free_extent_buffer(eb);
-- 
cgit v1.2.3


From 07e21c4dad42be8d2a6b8cd0b98cf8e6931e12fd Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 27 Jun 2018 16:38:23 +0300
Subject: btrfs: Document locking requirement via lockdep_assert_held

Remove stale comment since there is no longer an eb->eb_lock and
document the locking expectation with a lockdep_assert_held statement.
No functional changes.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6a4013ddcf73..5ab401f79683 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5066,9 +5066,10 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
 	__free_extent_buffer(eb);
 }
 
-/* Expects to have eb->eb_lock already held */
 static int release_extent_buffer(struct extent_buffer *eb)
 {
+	lockdep_assert_held(&eb->refs_lock);
+
 	WARN_ON(atomic_read(&eb->refs) == 0);
 	if (atomic_dec_and_test(&eb->refs)) {
 		if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
-- 
cgit v1.2.3


From b0132a3be5daf84116833542717ff5692f51640e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 27 Jun 2018 16:38:24 +0300
Subject: btrfs: Rename EXTENT_BUFFER_DUMMY to EXTENT_BUFFER_UNMAPPED

EXTENT_BUFFER_DUMMY is an awful name for this flag. Buffers which have
this flag set are not in any way dummy. Rather, they are private in the
sense that are not mapped and linked to the global buffer tree. This
flag has subtle implications to the way free_extent_buffer works for
example, as well as controls whether page->mapping->private_lock is held
during extent_buffer release. Pages for an unmapped buffer cannot be
under io, nor can they be written by a 3rd party so taking the lock is
unnecessary.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ EXTENT_BUFFER_UNMAPPED, update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c   |  4 ++--
 fs/btrfs/extent_io.c | 10 +++++-----
 fs/btrfs/extent_io.h |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e3858b2fe014..a4b713d03a33 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4089,10 +4089,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 	/*
 	 * This is a fast path so only do this check if we have sanity tests
-	 * enabled.  Normal people shouldn't be marking dummy buffers as dirty
+	 * enabled.  Normal people shouldn't be using umapped buffers as dirty
 	 * outside of the sanity tests.
 	 */
-	if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags)))
+	if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
 		return;
 #endif
 	root = BTRFS_I(buf->pages[0]->mapping->host)->root;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5ab401f79683..3440053f3599 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4649,7 +4649,7 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
 {
 	int i;
 	int num_pages;
-	int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+	int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
 
 	BUG_ON(extent_buffer_under_io(eb));
 
@@ -4763,7 +4763,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
 	}
 
 	set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
-	set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
+	set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
 
 	return new;
 }
@@ -4787,7 +4787,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
 	}
 	set_extent_buffer_uptodate(eb);
 	btrfs_set_header_nritems(eb, 0);
-	set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+	set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
 
 	return eb;
 err:
@@ -5088,7 +5088,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
 		/* Should be safe to release our pages at this point */
 		btrfs_release_extent_buffer_pages(eb);
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-		if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
+		if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
 			__free_extent_buffer(eb);
 			return 1;
 		}
@@ -5119,7 +5119,7 @@ void free_extent_buffer(struct extent_buffer *eb)
 
 	spin_lock(&eb->refs_lock);
 	if (atomic_read(&eb->refs) == 2 &&
-	    test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
+	    test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))
 		atomic_dec(&eb->refs);
 
 	if (atomic_read(&eb->refs) == 2 &&
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 48f1ee9ad379..0ecc13b7d6f7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -46,7 +46,7 @@
 #define EXTENT_BUFFER_STALE 6
 #define EXTENT_BUFFER_WRITEBACK 7
 #define EXTENT_BUFFER_READ_ERR 8        /* read IO error */
-#define EXTENT_BUFFER_DUMMY 9
+#define EXTENT_BUFFER_UNMAPPED 9
 #define EXTENT_BUFFER_IN_TREE 10
 #define EXTENT_BUFFER_WRITE_ERR 11    /* write IO error */
 
-- 
cgit v1.2.3


From 315409b0098fb2651d86553f0436b70502b29bb2 Mon Sep 17 00:00:00 2001
From: Gu Jinxiang <gujx@cn.fujitsu.com>
Date: Wed, 4 Jul 2018 18:16:39 +0800
Subject: btrfs: validate type when reading a chunk

Reported in https://bugzilla.kernel.org/show_bug.cgi?id=199839, with an
image that has an invalid chunk type but does not return an error.

Add chunk type check in btrfs_check_chunk_valid, to detect the wrong
type combinations.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199839
Reported-by: Xu Wen <wen.xu@gatech.edu>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index eca67529e536..f237ef9e70a7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6313,6 +6313,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
 	u16 num_stripes;
 	u16 sub_stripes;
 	u64 type;
+	u64 features;
+	bool mixed = false;
 
 	length = btrfs_chunk_length(leaf, chunk);
 	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
@@ -6351,6 +6353,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
 			  btrfs_chunk_type(leaf, chunk));
 		return -EIO;
 	}
+
+	if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+		btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
+		return -EIO;
+	}
+
+	if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+	    (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+		btrfs_err(fs_info,
+			"system chunk with data or metadata type: 0x%llx", type);
+		return -EIO;
+	}
+
+	features = btrfs_super_incompat_flags(fs_info->super_copy);
+	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+		mixed = true;
+
+	if (!mixed) {
+		if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+		    (type & BTRFS_BLOCK_GROUP_DATA)) {
+			btrfs_err(fs_info,
+			"mixed chunk type in non-mixed mode: 0x%llx", type);
+			return -EIO;
+		}
+	}
+
 	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
 	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
 	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
-- 
cgit v1.2.3


From d7e8555b1dd493c809e56e359974eecabe7d3fde Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 18 Jul 2018 17:31:19 +0200
Subject: btrfs: remove unused member async_submit_bio::fs_info

Introduced by c6100a4b4e3d1 ("Btrfs: replace tree->mapping with
tree->private_data") to be used in run_one_async_done where it got
unused after 736cd52e0c720103 ("Btrfs: remove nr_async_submits and
async_submit_draining").

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a4b713d03a33..8033289d2189 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -106,7 +106,6 @@ void __cold btrfs_end_io_wq_exit(void)
  */
 struct async_submit_bio {
 	void *private_data;
-	struct btrfs_fs_info *fs_info;
 	struct bio *bio;
 	extent_submit_bio_start_t *submit_bio_start;
 	extent_submit_bio_done_t *submit_bio_done;
@@ -801,7 +800,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 		return BLK_STS_RESOURCE;
 
 	async->private_data = private_data;
-	async->fs_info = fs_info;
 	async->bio = bio;
 	async->mirror_num = mirror_num;
 	async->submit_bio_start = submit_bio_start;
-- 
cgit v1.2.3


From d7cbfafc4bc37ed21351cdedbc6f9d979545dbf3 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 18 Jul 2018 17:31:10 +0200
Subject: btrfs: remove unused member async_submit_bio::bio_flags

After splitting the start and end hooks in a758781d4b76c3 ("btrfs:
separate types for submit_bio_start and submit_bio_done"), some of
the function arguments were dropped but not removed from the structure.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8033289d2189..d82df15af89c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -110,7 +110,6 @@ struct async_submit_bio {
 	extent_submit_bio_start_t *submit_bio_start;
 	extent_submit_bio_done_t *submit_bio_done;
 	int mirror_num;
-	unsigned long bio_flags;
 	/*
 	 * bio_offset is optional, can be used if the pages in the bio
 	 * can't tell us where in the file the bio should go
@@ -808,7 +807,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
 			run_one_async_done, run_one_async_free);
 
-	async->bio_flags = bio_flags;
 	async->bio_offset = bio_offset;
 
 	async->status = 0;
-- 
cgit v1.2.3


From e288c080dddd1fdc3b7e0165cebd7af51a52d016 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 18 Jul 2018 17:36:24 +0200
Subject: btrfs: unify end_io callbacks of async_submit_bio

The end_io callbacks passed to btrfs_wq_submit_bio
(btrfs_submit_bio_done and btree_submit_bio_done) are effectively the
same code, there's no point to do the indirection. Export
btrfs_submit_bio_done and call it directly.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c   | 28 +++-------------------------
 fs/btrfs/disk-io.h   |  5 +++--
 fs/btrfs/extent_io.h |  3 ---
 fs/btrfs/inode.c     |  8 +++-----
 4 files changed, 9 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d82df15af89c..03a7ed448f38 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -108,7 +108,6 @@ struct async_submit_bio {
 	void *private_data;
 	struct bio *bio;
 	extent_submit_bio_start_t *submit_bio_start;
-	extent_submit_bio_done_t *submit_bio_done;
 	int mirror_num;
 	/*
 	 * bio_offset is optional, can be used if the pages in the bio
@@ -775,7 +774,7 @@ static void run_one_async_done(struct btrfs_work *work)
 		return;
 	}
 
-	async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
+	btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num);
 }
 
 static void run_one_async_free(struct btrfs_work *work)
@@ -789,8 +788,7 @@ static void run_one_async_free(struct btrfs_work *work)
 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset, void *private_data,
-				 extent_submit_bio_start_t *submit_bio_start,
-				 extent_submit_bio_done_t *submit_bio_done)
+				 extent_submit_bio_start_t *submit_bio_start)
 {
 	struct async_submit_bio *async;
 
@@ -802,7 +800,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	async->bio = bio;
 	async->mirror_num = mirror_num;
 	async->submit_bio_start = submit_bio_start;
-	async->submit_bio_done = submit_bio_done;
 
 	btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
 			run_one_async_done, run_one_async_free);
@@ -845,24 +842,6 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
 	return btree_csum_one_bio(bio);
 }
 
-static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
-					    int mirror_num)
-{
-	struct inode *inode = private_data;
-	blk_status_t ret;
-
-	/*
-	 * when we're called for a write, we're already in the async
-	 * submission context.  Just jump into btrfs_map_bio
-	 */
-	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
-	if (ret) {
-		bio->bi_status = ret;
-		bio_endio(bio);
-	}
-	return ret;
-}
-
 static int check_async_write(struct btrfs_inode *bi)
 {
 	if (atomic_read(&bi->sync_writers))
@@ -905,8 +884,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
 		 */
 		ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
 					  bio_offset, private_data,
-					  btree_submit_bio_start,
-					  btree_submit_bio_done);
+					  btree_submit_bio_start);
 	}
 
 	if (ret)
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 1a3d277b027b..4cccba22640f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -120,8 +120,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			int mirror_num, unsigned long bio_flags,
 			u64 bio_offset, void *private_data,
-			extent_submit_bio_start_t *submit_bio_start,
-			extent_submit_bio_done_t *submit_bio_done);
+			extent_submit_bio_start_t *submit_bio_start);
+blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
+			  int mirror_num);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0ecc13b7d6f7..4e6d34dd7caf 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,9 +92,6 @@ typedef	blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *
 typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
 		struct bio *bio, u64 bio_offset);
 
-typedef blk_status_t (extent_submit_bio_done_t)(void *private_data,
-		struct bio *bio, int mirror_num);
-
 struct extent_io_ops {
 	/*
 	 * The following callbacks must be allways defined, the function
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 564ec00c765b..6cb42e33fdc3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1957,7 +1957,7 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
+blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
 			  int mirror_num)
 {
 	struct inode *inode = private_data;
@@ -2030,8 +2030,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
 		/* we're doing a write, do the async checksumming */
 		ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
 					  bio_offset, inode,
-					  btrfs_submit_bio_start,
-					  btrfs_submit_bio_done);
+					  btrfs_submit_bio_start);
 		goto out;
 	} else if (!skip_sum) {
 		ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -8295,8 +8294,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
 	if (write && async_submit) {
 		ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
 					  file_offset, inode,
-					  btrfs_submit_bio_start_direct_io,
-					  btrfs_submit_bio_done);
+					  btrfs_submit_bio_start_direct_io);
 		goto err;
 	} else if (write) {
 		/*
-- 
cgit v1.2.3


From 05912a3c04ebca217507b4323b679c60eda4ac11 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 18 Jul 2018 19:23:45 +0200
Subject: btrfs: drop extent_io_ops::tree_fs_info callback

All implementations of the callback are trivial and do the same and
there's only one user. Merge everything together.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c   |  7 -------
 fs/btrfs/extent_io.c | 14 ++++----------
 fs/btrfs/extent_io.h |  1 -
 fs/btrfs/inode.c     |  7 -------
 4 files changed, 4 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 03a7ed448f38..126460214512 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4523,12 +4523,6 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
 	return 0;
 }
 
-static struct btrfs_fs_info *btree_fs_info(void *private_data)
-{
-	struct inode *inode = private_data;
-	return btrfs_sb(inode->i_sb);
-}
-
 static const struct extent_io_ops btree_extent_io_ops = {
 	/* mandatory callbacks */
 	.submit_bio_hook = btree_submit_bio_hook,
@@ -4537,7 +4531,6 @@ static const struct extent_io_ops btree_extent_io_ops = {
 	.merge_bio_hook = btrfs_merge_bio_hook,
 	.readpage_io_failed_hook = btree_io_failed_hook,
 	.set_range_writeback = btrfs_set_range_writeback,
-	.tree_fs_info = btree_fs_info,
 
 	/* optional callbacks */
 };
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3440053f3599..7a19437c6eda 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -140,14 +140,6 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits,
 
 static void flush_write_bio(struct extent_page_data *epd);
 
-static inline struct btrfs_fs_info *
-tree_fs_info(struct extent_io_tree *tree)
-{
-	if (tree->ops)
-		return tree->ops->tree_fs_info(tree->private_data);
-	return NULL;
-}
-
 int __init extent_io_init(void)
 {
 	extent_state_cache = kmem_cache_create("btrfs_extent_state",
@@ -564,8 +556,10 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
 
 static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
 {
-	btrfs_panic(tree_fs_info(tree), err,
-		    "Locking error: Extent tree was modified by another thread while locked.");
+	struct inode *inode = tree->private_data;
+
+	btrfs_panic(btrfs_sb(inode->i_sb), err,
+	"locking error: extent tree was modified by another thread while locked");
 }
 
 /*
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e6d34dd7caf..004517f3eb35 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -105,7 +105,6 @@ struct extent_io_ops {
 			      size_t size, struct bio *bio,
 			      unsigned long bio_flags);
 	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
-	struct btrfs_fs_info *(*tree_fs_info)(void *private_data);
 	void (*set_range_writeback)(void *private_data, u64 start, u64 end);
 
 	/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6cb42e33fdc3..f279e380f36e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10473,12 +10473,6 @@ static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
 	return -EAGAIN;
 }
 
-static struct btrfs_fs_info *iotree_fs_info(void *private_data)
-{
-	struct inode *inode = private_data;
-	return btrfs_sb(inode->i_sb);
-}
-
 static void btrfs_check_extent_io_range(void *private_data, const char *caller,
 					u64 start, u64 end)
 {
@@ -10553,7 +10547,6 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
 	.merge_bio_hook = btrfs_merge_bio_hook,
 	.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
-	.tree_fs_info = iotree_fs_info,
 	.set_range_writeback = btrfs_set_range_writeback,
 
 	/* optional callbacks */
-- 
cgit v1.2.3


From 00032d38eaa89c76de7d9c1ae6de8c48c14edd74 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 18 Jul 2018 19:28:09 +0200
Subject: btrfs: drop extent_io_ops::merge_bio_hook callback

The data and metadata callback implementation both use the same
function. We can remove the call indirection completely.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 10 +++-------
 fs/btrfs/disk-io.c     |  2 --
 fs/btrfs/extent_io.c   |  4 ++--
 fs/btrfs/extent_io.h   |  3 ---
 fs/btrfs/inode.c       |  5 ++---
 5 files changed, 7 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 70dace47258b..9bfa66592aa7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -299,7 +299,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	struct bio *bio = NULL;
 	struct compressed_bio *cb;
 	unsigned long bytes_left;
-	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	int pg_index = 0;
 	struct page *page;
 	u64 first_byte = disk_start;
@@ -338,9 +337,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 		page = compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
-			submit = io_tree->ops->merge_bio_hook(page, 0,
-							   PAGE_SIZE,
-							   bio, 0);
+			submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, bio, 0);
 
 		page->mapping = NULL;
 		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
@@ -622,9 +619,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
-			submit = tree->ops->merge_bio_hook(page, 0,
-							PAGE_SIZE,
-							comp_bio, 0);
+			submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE,
+					comp_bio, 0);
 
 		page->mapping = NULL;
 		if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 126460214512..0c9c029ade72 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4527,8 +4527,6 @@ static const struct extent_io_ops btree_extent_io_ops = {
 	/* mandatory callbacks */
 	.submit_bio_hook = btree_submit_bio_hook,
 	.readpage_end_io_hook = btree_readpage_end_io_hook,
-	/* note we're sharing with inode.c for the merge bio hook */
-	.merge_bio_hook = btrfs_merge_bio_hook,
 	.readpage_io_failed_hook = btree_io_failed_hook,
 	.set_range_writeback = btrfs_set_range_writeback,
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7a19437c6eda..20af0efd7c17 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2784,8 +2784,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 		else
 			contig = bio_end_sector(bio) == sector;
 
-		if (tree->ops && tree->ops->merge_bio_hook(page, offset,
-					page_size, bio, bio_flags))
+		if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size,
+						      bio, bio_flags))
 			can_merge = false;
 
 		if (prev_bio_flags != bio_flags || !contig || !can_merge ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 004517f3eb35..5c07f87c2ec9 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -101,9 +101,6 @@ struct extent_io_ops {
 	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
 				    struct page *page, u64 start, u64 end,
 				    int mirror);
-	int (*merge_bio_hook)(struct page *page, unsigned long offset,
-			      size_t size, struct bio *bio,
-			      unsigned long bio_flags);
 	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
 	void (*set_range_writeback)(void *private_data, u64 start, u64 end);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f279e380f36e..a72784d5666f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1898,8 +1898,8 @@ static void btrfs_clear_bit_hook(void *private_data,
 }
 
 /*
- * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
- * we don't create bios that span stripes or chunks
+ * Merge bio hook, this must check the chunk tree to make sure we don't create
+ * bios that span stripes or chunks
  *
  * return 1 if page cannot be merged to bio
  * return 0 if page can be merged to bio
@@ -10545,7 +10545,6 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
 	/* mandatory callbacks */
 	.submit_bio_hook = btrfs_submit_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
-	.merge_bio_hook = btrfs_merge_bio_hook,
 	.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
 	.set_range_writeback = btrfs_set_range_writeback,
 
-- 
cgit v1.2.3


From 5cdc84bfde22dc17b11ee7cb18cebd48f4a09f70 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 18 Jul 2018 20:32:52 +0200
Subject: btrfs: drop extent_io_ops::set_range_writeback callback

The data and metadata callback implementation both use the same
function. We can remove the call indirection and intermediate helper
completely.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h     |  2 +-
 fs/btrfs/disk-io.c   |  1 -
 fs/btrfs/extent_io.c | 10 +---------
 fs/btrfs/extent_io.h |  1 -
 fs/btrfs/inode.c     |  5 ++---
 5 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ee1e152cb94b..9c638931b75e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3172,7 +3172,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 			 size_t size, struct bio *bio,
 			 unsigned long bio_flags);
-void btrfs_set_range_writeback(void *private_data, u64 start, u64 end);
+void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end);
 vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0c9c029ade72..9b208ccf24fe 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4528,7 +4528,6 @@ static const struct extent_io_ops btree_extent_io_ops = {
 	.submit_bio_hook = btree_submit_bio_hook,
 	.readpage_end_io_hook = btree_readpage_end_io_hook,
 	.readpage_io_failed_hook = btree_io_failed_hook,
-	.set_range_writeback = btrfs_set_range_writeback,
 
 	/* optional callbacks */
 };
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20af0efd7c17..628f1aef34b0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1380,14 +1380,6 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
 	}
 }
 
-/*
- * helper function to set both pages and extents in the tree writeback
- */
-static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
-{
-	tree->ops->set_range_writeback(tree->private_data, start, end);
-}
-
 /* find the first state struct with 'bits' set after 'start', and
  * return it.  tree->lock must be held.  NULL will returned if
  * nothing was found after 'start'
@@ -3416,7 +3408,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 			continue;
 		}
 
-		set_range_writeback(tree, cur, cur + iosize - 1);
+		btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
 		if (!PageWriteback(page)) {
 			btrfs_err(BTRFS_I(inode)->root->fs_info,
 				   "page %lu not writeback, cur %llu end %llu",
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5c07f87c2ec9..b4d03e677e1d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -102,7 +102,6 @@ struct extent_io_ops {
 				    struct page *page, u64 start, u64 end,
 				    int mirror);
 	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
-	void (*set_range_writeback)(void *private_data, u64 start, u64 end);
 
 	/*
 	 * Optional hooks, called if the pointer is not NULL
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a72784d5666f..4955e04da4c8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10487,9 +10487,9 @@ static void btrfs_check_extent_io_range(void *private_data, const char *caller,
 	}
 }
 
-void btrfs_set_range_writeback(void *private_data, u64 start, u64 end)
+void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 {
-	struct inode *inode = private_data;
+	struct inode *inode = tree->private_data;
 	unsigned long index = start >> PAGE_SHIFT;
 	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
@@ -10546,7 +10546,6 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
 	.submit_bio_hook = btrfs_submit_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
 	.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
-	.set_range_writeback = btrfs_set_range_writeback,
 
 	/* optional callbacks */
 	.fill_delalloc = run_delalloc_range,
-- 
cgit v1.2.3


From ca5d2ba1ae2dfd651b798218d56c3277784fa499 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 23 Jul 2018 09:10:09 +0100
Subject: Btrfs: remove unused key assignment when doing a full send

At send.c:full_send_tree() we were setting the 'key' variable in the loop
while never using it later. We were also using two btrfs_key variables
to store the initial key for search and the key found in every iteration
of the loop. So remove this useless key assignment and use the same
btrfs_key variable to store the initial search key and the key found in
each iteration. This was introduced in the initial send commit but was
never used (commit 31db9f7c23fb ("Btrfs: introduce BTRFS_IOC_SEND for
btrfs send/receive").

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6ff7a1315e52..42e04cd3cd95 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6325,7 +6325,6 @@ static int full_send_tree(struct send_ctx *sctx)
 	int ret;
 	struct btrfs_root *send_root = sctx->send_root;
 	struct btrfs_key key;
-	struct btrfs_key found_key;
 	struct btrfs_path *path;
 	struct extent_buffer *eb;
 	int slot;
@@ -6347,17 +6346,13 @@ static int full_send_tree(struct send_ctx *sctx)
 	while (1) {
 		eb = path->nodes[0];
 		slot = path->slots[0];
-		btrfs_item_key_to_cpu(eb, &found_key, slot);
+		btrfs_item_key_to_cpu(eb, &key, slot);
 
-		ret = changed_cb(path, NULL, &found_key,
+		ret = changed_cb(path, NULL, &key,
 				 BTRFS_COMPARE_TREE_NEW, sctx);
 		if (ret < 0)
 			goto out;
 
-		key.objectid = found_key.objectid;
-		key.type = found_key.type;
-		key.offset = found_key.offset + 1;
-
 		ret = btrfs_next_item(send_root, path);
 		if (ret < 0)
 			goto out;
-- 
cgit v1.2.3


From 5e23a6fea6189d1f9d6e6ca5cb3290b4a0fe2d48 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 23 Jul 2018 14:47:29 +0800
Subject: btrfs: extent-tree: Remove dead alignment check

In find_free_extent() under checks: label, we have the following code:

		search_start = ALIGN(offset, fs_info->stripesize);
		/* move on to the next group */
		if (search_start + num_bytes >
		    block_group->key.objectid + block_group->key.offset) {
			btrfs_add_free_space(block_group, offset, num_bytes);
			goto loop;
		}
		if (offset < search_start)
			btrfs_add_free_space(block_group, offset,
					     search_start - offset);
		BUG_ON(offset > search_start);

However ALIGN() is rounding up, thus @search_start >= @offset and that
BUG_ON() will never be triggered.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fd109bfd528d..c71aa118679d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7564,7 +7564,7 @@ unclustered_alloc:
 			goto loop;
 		}
 checks:
-		search_start = ALIGN(offset, fs_info->stripesize);
+		search_start = round_up(offset, fs_info->stripesize);
 
 		/* move on to the next group */
 		if (search_start + num_bytes >
@@ -7576,7 +7576,6 @@ checks:
 		if (offset < search_start)
 			btrfs_add_free_space(block_group, offset,
 					     search_start - offset);
-		BUG_ON(offset > search_start);
 
 		ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
 				num_bytes, delalloc);
-- 
cgit v1.2.3


From 8e87e8562744e23e10afea98d610cc6b7f4ba9bb Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 20 Jul 2018 19:37:47 +0300
Subject: btrfs: Remove fs_info argument from btrfs_add_dev_item

It can be referenced form the passed transaction handle.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f237ef9e70a7..1c88c4c947bb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1666,10 +1666,8 @@ error:
  * the btrfs_device struct should be fully filled in
  */
 static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info,
 			    struct btrfs_device *device)
 {
-	struct btrfs_root *root = fs_info->chunk_root;
 	int ret;
 	struct btrfs_path *path;
 	struct btrfs_dev_item *dev_item;
@@ -1685,8 +1683,8 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
 	key.type = BTRFS_DEV_ITEM_KEY;
 	key.offset = device->devid;
 
-	ret = btrfs_insert_empty_item(trans, root, path, &key,
-				      sizeof(*dev_item));
+	ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
+				      &key, sizeof(*dev_item));
 	if (ret)
 		goto out;
 
@@ -1711,7 +1709,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
 	ptr = btrfs_device_uuid(dev_item);
 	write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
 	ptr = btrfs_device_fsid(dev_item);
-	write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE);
+	write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE);
 	btrfs_mark_buffer_dirty(leaf);
 
 	ret = 0;
@@ -2449,7 +2447,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		}
 	}
 
-	ret = btrfs_add_dev_item(trans, fs_info, device);
+	ret = btrfs_add_dev_item(trans, device);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto error_sysfs;
-- 
cgit v1.2.3


From 68a9db5f2395cdef98b387bbb816604f6f298056 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 20 Jul 2018 19:37:48 +0300
Subject: btrfs: Remove fs_info from btrfs_rm_dev_replace_remove_srcdev

It can be referenced from the passed srcdev argument.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 2 +-
 fs/btrfs/volumes.c     | 5 ++---
 fs/btrfs/volumes.h     | 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 059ca3d5ddd3..df375e1a0c9f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -667,7 +667,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
 	btrfs_rm_dev_replace_blocked(fs_info);
 
-	btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device);
+	btrfs_rm_dev_replace_remove_srcdev(src_device);
 
 	btrfs_rm_dev_replace_unblocked(fs_info);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1c88c4c947bb..d82658b01773 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1996,12 +1996,11 @@ error_undo:
 	goto out;
 }
 
-void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
-					struct btrfs_device *srcdev)
+void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
 {
 	struct btrfs_fs_devices *fs_devices;
 
-	lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
+	lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
 
 	/*
 	 * in case of fs with no seed, srcdev->fs_devices will point
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 6943aab9bdd7..0ae45ff1961b 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -452,8 +452,7 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
 int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
 int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info);
-void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
-					struct btrfs_device *srcdev);
+void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 				      struct btrfs_device *srcdev);
 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-- 
cgit v1.2.3


From 5495f195fc5b246d8b0b8a2e0fbbc9c94b3ebf24 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 20 Jul 2018 19:37:49 +0300
Subject: btrfs: remove fs_info argument from update_dev_stat_item

It can be referenced from the passed transaction handle.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d82658b01773..fc9d70a16d82 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7043,9 +7043,9 @@ out:
 }
 
 static int update_dev_stat_item(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info,
 				struct btrfs_device *device)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *dev_root = fs_info->dev_root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
@@ -7138,7 +7138,7 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
 		 */
 		smp_rmb();
 
-		ret = update_dev_stat_item(trans, fs_info, device);
+		ret = update_dev_stat_item(trans, device);
 		if (!ret)
 			atomic_sub(stats_cnt, &device->dev_stats_ccnt);
 	}
-- 
cgit v1.2.3


From d6507cf1e2dfa4f6888cd90e4c9a307f7ac9b383 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 20 Jul 2018 19:37:50 +0300
Subject: btrfs: Remove fs_info from btrfs_assign_next_active_device

It can be referenced from the passed 'device' argument which is always
a well-formed device.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 2 +-
 fs/btrfs/volumes.c     | 9 +++++----
 fs/btrfs/volumes.h     | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index df375e1a0c9f..dd17a4d7bea2 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -658,7 +658,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	tgt_device->commit_total_bytes = src_device->commit_total_bytes;
 	tgt_device->commit_bytes_used = src_device->bytes_used;
 
-	btrfs_assign_next_active_device(fs_info, src_device, tgt_device);
+	btrfs_assign_next_active_device(src_device, tgt_device);
 
 	list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
 	fs_info->fs_devices->rw_devices++;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fc9d70a16d82..9cd15964ab6f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1833,9 +1833,10 @@ static struct btrfs_device * btrfs_find_next_active_device(
  * where this function called, there should be always be another device (or
  * this_dev) which is active.
  */
-void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
-		struct btrfs_device *device, struct btrfs_device *this_dev)
+void btrfs_assign_next_active_device(struct btrfs_device *device,
+				     struct btrfs_device *this_dev)
 {
+	struct btrfs_fs_info *fs_info = device->fs_info;
 	struct btrfs_device *next_device;
 
 	if (this_dev)
@@ -1945,7 +1946,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
 		cur_devices->missing_devices--;
 
-	btrfs_assign_next_active_device(fs_info, device, NULL);
+	btrfs_assign_next_active_device(device, NULL);
 
 	if (device->bdev) {
 		cur_devices->open_devices--;
@@ -2077,7 +2078,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 
 	fs_devices->num_devices--;
 
-	btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
+	btrfs_assign_next_active_device(tgtdev, NULL);
 
 	list_del_rcu(&tgtdev->dev_list);
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 0ae45ff1961b..1bcf0f34e8dc 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -407,8 +407,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path,
 					   fmode_t flags, void *holder);
 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
-void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
-		struct btrfs_device *device, struct btrfs_device *this_dev);
+void btrfs_assign_next_active_device(struct btrfs_device *device,
+				     struct btrfs_device *this_dev);
 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
 					 const char *device_path,
 					 struct btrfs_device **device);
-- 
cgit v1.2.3


From 4f5ad7bd6315528ed50a11d53c66854a5d16425b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 20 Jul 2018 19:37:51 +0300
Subject: btrfs: Remove fs_info from btrfs_destroy_dev_replace_tgtdev

This function is always passed a well-formed tgtdevice so the fs_info
can be referenced from there.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 6 +++---
 fs/btrfs/volumes.c     | 5 ++---
 fs/btrfs/volumes.h     | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index dd17a4d7bea2..5a72f9933e58 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -506,7 +506,7 @@ leave:
 	dev_replace->srcdev = NULL;
 	dev_replace->tgtdev = NULL;
 	btrfs_dev_replace_write_unlock(dev_replace);
-	btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+	btrfs_destroy_dev_replace_tgtdev(tgt_device);
 	return ret;
 }
 
@@ -632,7 +632,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 		btrfs_rm_dev_replace_blocked(fs_info);
 		if (tgt_device)
-			btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+			btrfs_destroy_dev_replace_tgtdev(tgt_device);
 		btrfs_rm_dev_replace_unblocked(fs_info);
 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 
@@ -821,7 +821,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
 		btrfs_dev_name(tgt_device));
 
 	if (tgt_device)
-		btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+		btrfs_destroy_dev_replace_tgtdev(tgt_device);
 
 leave:
 	mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9cd15964ab6f..45d7c37050ce 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2063,10 +2063,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 	}
 }
 
-void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				      struct btrfs_device *tgtdev)
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
 {
-	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
 
 	WARN_ON(!tgtdev);
 	mutex_lock(&fs_devices->device_list_mutex);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 1bcf0f34e8dc..69a028058c43 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -455,8 +455,7 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
 void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 				      struct btrfs_device *srcdev);
-void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				      struct btrfs_device *tgtdev);
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 			   u64 logical, u64 len);
-- 
cgit v1.2.3


From f4208794d02e74f16e5744fe2fe294d7f71fa0f9 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 20 Jul 2018 19:37:52 +0300
Subject: btrfs: Remove fs_info form btrfs_free_chunk

It can be referenced from the passed transaction handle.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 45d7c37050ce..991022f5219f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2605,9 +2605,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
 	return btrfs_update_device(trans, device);
 }
 
-static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info, u64 chunk_offset)
+static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = fs_info->chunk_root;
 	int ret;
 	struct btrfs_path *path;
@@ -2777,7 +2777,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
 
-	ret = btrfs_free_chunk(trans, fs_info, chunk_offset);
+	ret = btrfs_free_chunk(trans, chunk_offset);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto out;
-- 
cgit v1.2.3


From 97aff912a2fa75555641303d42bd4c723a98df5d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Fri, 20 Jul 2018 19:37:53 +0300
Subject: btrfs: Remove fs_info from btrfs_finish_chunk_alloc

It can be referenced from the passed transaction handle.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c |  6 ++----
 fs/btrfs/volumes.c     | 10 +++++-----
 fs/btrfs/volumes.h     |  6 ++----
 3 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c71aa118679d..16b916a33e56 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10022,8 +10022,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
 					sizeof(item));
 		if (ret)
 			btrfs_abort_transaction(trans, ret);
-		ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid,
-					       key.offset);
+		ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
 		if (ret)
 			btrfs_abort_transaction(trans, ret);
 		add_block_group_free_space(trans, block_group);
@@ -10594,8 +10593,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		 * Btrfs_remove_chunk will abort the transaction if things go
 		 * horribly wrong.
 		 */
-		ret = btrfs_remove_chunk(trans, fs_info,
-					 block_group->key.objectid);
+		ret = btrfs_remove_chunk(trans, block_group->key.objectid);
 
 		if (ret) {
 			if (trimming)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 991022f5219f..d86346f83a08 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2716,9 +2716,9 @@ static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
 	return em;
 }
 
-int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info, u64 chunk_offset)
+int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct extent_map *em;
 	struct map_lookup *map;
 	u64 dev_extent_len = 0;
@@ -2858,7 +2858,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 	 * step two, delete the device extents and the
 	 * chunk tree entries
 	 */
-	ret = btrfs_remove_chunk(trans, fs_info, chunk_offset);
+	ret = btrfs_remove_chunk(trans, chunk_offset);
 	btrfs_end_transaction(trans);
 	return ret;
 }
@@ -4842,9 +4842,9 @@ error:
 }
 
 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info,
-				u64 chunk_offset, u64 chunk_size)
+			     u64 chunk_offset, u64 chunk_size)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *extent_root = fs_info->extent_root;
 	struct btrfs_root *chunk_root = fs_info->chunk_root;
 	struct btrfs_key key;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 69a028058c43..049619176831 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -462,10 +462,8 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 				    u64 logical);
 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info,
-				u64 chunk_offset, u64 chunk_size);
-int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info, u64 chunk_offset);
+			     u64 chunk_offset, u64 chunk_size);
+int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
 
 static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
 				      int index)
-- 
cgit v1.2.3


From 3c4276936f6fbe52884b4ea4e6cc120b890a0f9f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 20 Jul 2018 11:46:10 -0700
Subject: Btrfs: fix btrfs_write_inode vs delayed iput deadlock

We recently ran into the following deadlock involving
btrfs_write_inode():

[  +0.005066]  __schedule+0x38e/0x8c0
[  +0.007144]  schedule+0x36/0x80
[  +0.006447]  bit_wait+0x11/0x60
[  +0.006446]  __wait_on_bit+0xbe/0x110
[  +0.007487]  ? bit_wait_io+0x60/0x60
[  +0.007319]  __inode_wait_for_writeback+0x96/0xc0
[  +0.009568]  ? autoremove_wake_function+0x40/0x40
[  +0.009565]  inode_wait_for_writeback+0x21/0x30
[  +0.009224]  evict+0xb0/0x190
[  +0.006099]  iput+0x1a8/0x210
[  +0.006103]  btrfs_run_delayed_iputs+0x73/0xc0
[  +0.009047]  btrfs_commit_transaction+0x799/0x8c0
[  +0.009567]  btrfs_write_inode+0x81/0xb0
[  +0.008008]  __writeback_single_inode+0x267/0x320
[  +0.009569]  writeback_sb_inodes+0x25b/0x4e0
[  +0.008702]  wb_writeback+0x102/0x2d0
[  +0.007487]  wb_workfn+0xa4/0x310
[  +0.006794]  ? wb_workfn+0xa4/0x310
[  +0.007143]  process_one_work+0x150/0x410
[  +0.008179]  worker_thread+0x6d/0x520
[  +0.007490]  kthread+0x12c/0x160
[  +0.006620]  ? put_pwq_unlocked+0x80/0x80
[  +0.008185]  ? kthread_park+0xa0/0xa0
[  +0.007484]  ? do_syscall_64+0x53/0x150
[  +0.007837]  ret_from_fork+0x29/0x40

Writeback calls:

btrfs_write_inode
  btrfs_commit_transaction
    btrfs_run_delayed_iputs

If iput() is called on that same inode, evict() will wait for writeback
forever.

btrfs_write_inode() was originally added way back in 4730a4bc5bf3
("btrfs_dirty_inode") to support O_SYNC writes. However, ->write_inode()
hasn't been used for O_SYNC since 148f948ba877 ("vfs: Introduce new
helpers for syncing after writing to O_SYNC file or IS_SYNC inode"), so
btrfs_write_inode() is actually unnecessary (and leads to a bunch of
unnecessary commits). Get rid of it, which also gets rid of the
deadlock.

CC: stable@vger.kernel.org # 3.2+
Signed-off-by: Josef Bacik <jbacik@fb.com>
[Omar: new commit message]
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 26 --------------------------
 fs/btrfs/super.c |  1 -
 2 files changed, 27 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4955e04da4c8..472457795486 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6021,32 +6021,6 @@ err:
 	return ret;
 }
 
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct btrfs_trans_handle *trans;
-	int ret = 0;
-	bool nolock = false;
-
-	if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
-		return 0;
-
-	if (btrfs_fs_closing(root->fs_info) &&
-			btrfs_is_free_space_inode(BTRFS_I(inode)))
-		nolock = true;
-
-	if (wbc->sync_mode == WB_SYNC_ALL) {
-		if (nolock)
-			trans = btrfs_join_transaction_nolock(root);
-		else
-			trans = btrfs_join_transaction(root);
-		if (IS_ERR(trans))
-			return PTR_ERR(trans);
-		ret = btrfs_commit_transaction(trans);
-	}
-	return ret;
-}
-
 /*
  * This is somewhat expensive, updating the tree every time the
  * inode changes.  But, it is most likely to find the inode in cache.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index efe8b03ce380..67de3c0fc85b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2344,7 +2344,6 @@ static const struct super_operations btrfs_super_ops = {
 	.sync_fs	= btrfs_sync_fs,
 	.show_options	= btrfs_show_options,
 	.show_devname	= btrfs_show_devname,
-	.write_inode	= btrfs_write_inode,
 	.alloc_inode	= btrfs_alloc_inode,
 	.destroy_inode	= btrfs_destroy_inode,
 	.statfs		= btrfs_statfs,
-- 
cgit v1.2.3


From 616d374efa23cb699074ea02b301254ad64d224a Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Wed, 18 Jul 2018 00:08:59 +0200
Subject: btrfs: allow defrag on a file opened read-only that has rw
 permissions

Requiring a read-write descriptor conflicts both ways with exec,
returning ETXTBSY whenever you try to defrag a program that's currently
being run, or causing intermittent exec failures on a live system being
defragged.

As defrag doesn't change the file's contents in any way, there's no
reason to consider it a rw operation.  Thus, let's check only whether
the file could have been opened rw.  Such access control is still needed
as currently defrag can use extra disk space, and might trigger bugs.

We return EINVAL when the request is invalid; here it's ok but merely
the user has insufficient privileges.  Thus, the EPERM return value
reflects the error better -- as discussed in the identical case for
dedupe.

According to codesearch.debian.net, no userspace program distinguishes
these values beyond strerror().

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Reviewed-by: David Sterba <dsterba@suse.com>
[ fold the EPERM patch from Adam ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4482cf35522e..80fe3c654612 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2928,8 +2928,14 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 		ret = btrfs_defrag_root(root);
 		break;
 	case S_IFREG:
-		if (!(file->f_mode & FMODE_WRITE)) {
-			ret = -EINVAL;
+		/*
+		 * Note that this does not check the file descriptor for write
+		 * access. This prevents defragmenting executables that are
+		 * running and allows defrag on files open in read-only mode.
+		 */
+		if (!capable(CAP_SYS_ADMIN) &&
+		    inode_permission(inode, MAY_WRITE)) {
+			ret = -EPERM;
 			goto out;
 		}
 
-- 
cgit v1.2.3


From e17385ca2960177da402d6f4d80bdc5b53c29bc4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 20 Jul 2018 16:30:18 +0200
Subject: btrfs: remove unused member btrfs_root::name

Added in 58176a9604c ("Btrfs: Add per-root block accounting and sysfs
entries") in 2007, the roots had names exported in sysfs. The code
was commented out in 4df27c4d5cc1dda54ed ("Btrfs: change how subvolumes
are organized") and cleaned by 182608c8294b5fe9 ("btrfs: remove old
unused commented out code").

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   | 1 -
 fs/btrfs/disk-io.c | 2 --
 2 files changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9c638931b75e..4ca6c4e141ea 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1219,7 +1219,6 @@ struct btrfs_root {
 	u64 defrag_trans_start;
 	struct btrfs_key defrag_progress;
 	struct btrfs_key defrag_max;
-	char *name;
 
 	/* the dirty list is only used by non-reference counted roots */
 	struct list_head dirty_list;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9b208ccf24fe..3830867e0225 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1154,7 +1154,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	root->highest_objectid = 0;
 	root->nr_delalloc_inodes = 0;
 	root->nr_ordered_extents = 0;
-	root->name = NULL;
 	root->inode_tree = RB_ROOT;
 	INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
 	root->block_rsv = NULL;
@@ -3849,7 +3848,6 @@ static void free_fs_root(struct btrfs_root *root)
 	free_extent_buffer(root->commit_root);
 	kfree(root->free_ino_ctl);
 	kfree(root->free_ino_pinned);
-	kfree(root->name);
 	btrfs_put_fs_root(root);
 }
 
-- 
cgit v1.2.3


From e9539cff04728e31e150b41000b828745fc0c2b3 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 20 Jul 2018 16:30:20 +0200
Subject: btrfs: dev-replace: remove unused members of btrfs_dev_replace

Lock owner and nesting level have been unused since day 1, probably
copy&pasted from the extent_buffer locking scheme without much thinking.
The locking of device replace is simpler and does not need any lock
nesting.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   | 2 --
 fs/btrfs/disk-io.c | 2 --
 2 files changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4ca6c4e141ea..5f6ec80d374f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -365,8 +365,6 @@ struct btrfs_dev_replace {
 	struct btrfs_device *srcdev;
 	struct btrfs_device *tgtdev;
 
-	pid_t lock_owner;
-	atomic_t nesting_level;
 	struct mutex lock_finishing_cancel_unmount;
 	rwlock_t lock;
 	atomic_t read_locks;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3830867e0225..c1d287a766c1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2154,8 +2154,6 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
 
 static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
 {
-	fs_info->dev_replace.lock_owner = 0;
-	atomic_set(&fs_info->dev_replace.nesting_level, 0);
 	mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
 	rwlock_init(&fs_info->dev_replace.lock);
 	atomic_set(&fs_info->dev_replace.read_locks, 0);
-- 
cgit v1.2.3


From 2ffad70ed3b6fbc946bc102f9d11c0e7c1cd380f Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 20 Jul 2018 16:30:23 +0200
Subject: btrfs: constify strings passed to assertion helper

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5f6ec80d374f..2e32584c635f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3425,7 +3425,7 @@ do {								\
 #ifdef CONFIG_BTRFS_ASSERT
 
 __cold
-static inline void assfail(char *expr, char *file, int line)
+static inline void assfail(const char *expr, const char *file, int line)
 {
 	pr_err("assertion failed: %s, file: %s, line: %d\n",
 	       expr, file, line);
-- 
cgit v1.2.3


From 84db5ccf4262894852e0d178d250678945d9b0b8 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 20 Jul 2018 16:30:25 +0200
Subject: btrfs: merge free_fs_root helpers

The exported helper just calls the static one. There's no obvious reason
to have them separate eg. for performance reasons where the static one
could be better optimized in the same unit. There's a slight decrease in
code size and stack consumption.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c1d287a766c1..6a5a3cd12886 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -52,7 +52,6 @@
 
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
-static void free_fs_root(struct btrfs_root *root);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 				      struct btrfs_fs_info *fs_info);
@@ -1504,7 +1503,7 @@ int btrfs_init_fs_root(struct btrfs_root *root)
 
 	return 0;
 fail:
-	/* the caller is responsible to call free_fs_root */
+	/* The caller is responsible to call btrfs_free_fs_root */
 	return ret;
 }
 
@@ -1609,14 +1608,14 @@ again:
 	ret = btrfs_insert_fs_root(fs_info, root);
 	if (ret) {
 		if (ret == -EEXIST) {
-			free_fs_root(root);
+			btrfs_free_fs_root(root);
 			goto again;
 		}
 		goto fail;
 	}
 	return root;
 fail:
-	free_fs_root(root);
+	btrfs_free_fs_root(root);
 	return ERR_PTR(ret);
 }
 
@@ -3831,10 +3830,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
 		__btrfs_remove_free_space_cache(root->free_ino_pinned);
 	if (root->free_ino_ctl)
 		__btrfs_remove_free_space_cache(root->free_ino_ctl);
-	free_fs_root(root);
+	btrfs_free_fs_root(root);
 }
 
-static void free_fs_root(struct btrfs_root *root)
+void btrfs_free_fs_root(struct btrfs_root *root)
 {
 	iput(root->ino_cache_inode);
 	WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
@@ -3849,11 +3848,6 @@ static void free_fs_root(struct btrfs_root *root)
 	btrfs_put_fs_root(root);
 }
 
-void btrfs_free_fs_root(struct btrfs_root *root)
-{
-	free_fs_root(root);
-}
-
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
 {
 	u64 root_objectid = 0;
-- 
cgit v1.2.3


From 4559b0a71749c442d34f7cfb9e72c9e58db83948 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 19 Jul 2018 10:49:51 -0400
Subject: btrfs: don't leak ret from do_chunk_alloc

If we're trying to make a data reservation and we have to allocate a
data chunk we could leak ret == 1, as do_chunk_alloc() will return 1 if
it allocated a chunk.  Since the end of the function is the success path
just return 0.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 16b916a33e56..2eb2d7ac0ba4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4226,7 +4226,7 @@ commit_trans:
 				      data_sinfo->flags, bytes, 1);
 	spin_unlock(&data_sinfo->lock);
 
-	return ret;
+	return 0;
 }
 
 int btrfs_check_data_free_space(struct inode *inode,
-- 
cgit v1.2.3


From 0d836392cadd5535f4184d46d901a82eb276ed62 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 20 Jul 2018 10:59:06 +0100
Subject: Btrfs: fix mount failure after fsync due to hard link recreation

If we end up with logging an inode reference item which has the same name
but different index from the one we have persisted, we end up failing when
replaying the log with an errno value of -EEXIST. The error comes from
btrfs_add_link(), which is called from add_inode_ref(), when we are
replaying an inode reference item.

Example scenario where this happens:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ touch /mnt/foo
  $ ln /mnt/foo /mnt/bar

  $ sync

  # Rename the first hard link (foo) to a new name and rename the second
  # hard link (bar) to the old name of the first hard link (foo).
  $ mv /mnt/foo /mnt/qwerty
  $ mv /mnt/bar /mnt/foo

  # Create a new file, in the same parent directory, with the old name of
  # the second hard link (bar) and fsync this new file.
  # We do this instead of calling fsync on foo/qwerty because if we did
  # that the fsync resulted in a full transaction commit, not triggering
  # the problem.
  $ touch /mnt/bar
  $ xfs_io -c "fsync" /mnt/bar

  <power fail>

  $ mount /dev/sdb /mnt
  mount: mount /dev/sdb on /mnt failed: File exists

So fix this by checking if a conflicting inode reference exists (same
name, same parent but different index), removing it (and the associated
dir index entries from the parent inode) if it exists, before attempting
to add the new reference.

A test case for fstests follows soon.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 10f6a4223897..033aeebbe9de 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1290,6 +1290,46 @@ again:
 	return ret;
 }
 
+static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir,
+				  const u8 ref_type, const char *name,
+				  const int namelen)
+{
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	const u64 parent_id = btrfs_ino(BTRFS_I(dir));
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = btrfs_ino(BTRFS_I(inode));
+	key.type = ref_type;
+	if (key.type == BTRFS_INODE_REF_KEY)
+		key.offset = parent_id;
+	else
+		key.offset = btrfs_extref_hash(parent_id, name, namelen);
+
+	ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
+		ret = 0;
+		goto out;
+	}
+	if (key.type == BTRFS_INODE_EXTREF_KEY)
+		ret = btrfs_find_name_in_ext_backref(path->nodes[0],
+						     path->slots[0], parent_id,
+						     name, namelen, NULL);
+	else
+		ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+						 name, namelen, NULL);
+
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 /*
  * replay one inode back reference item found in the log tree.
  * eb, slot and key refer to the buffer and key found in the log tree.
@@ -1399,6 +1439,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 				}
 			}
 
+			/*
+			 * If a reference item already exists for this inode
+			 * with the same parent and name, but different index,
+			 * drop it and the corresponding directory index entries
+			 * from the parent before adding the new reference item
+			 * and dir index entries, otherwise we would fail with
+			 * -EEXIST returned from btrfs_add_link() below.
+			 */
+			ret = btrfs_inode_ref_exists(inode, dir, key->type,
+						     name, namelen);
+			if (ret > 0) {
+				ret = btrfs_unlink_inode(trans, root,
+							 BTRFS_I(dir),
+							 BTRFS_I(inode),
+							 name, namelen);
+				/*
+				 * If we dropped the link count to 0, bump it so
+				 * that later the iput() on the inode will not
+				 * free it. We will fixup the link count later.
+				 */
+				if (!ret && inode->i_nlink == 0)
+					inc_nlink(inode);
+			}
+			if (ret < 0)
+				goto out;
+
 			/* insert our name */
 			ret = btrfs_add_link(trans, BTRFS_I(dir),
 					BTRFS_I(inode),
-- 
cgit v1.2.3


From 46b2f4590aab71d31088a265c86026b1e96c9de4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 24 Jul 2018 11:54:04 +0100
Subject: Btrfs: fix send failure when root has deleted files still open

The more common use case of send involves creating a RO snapshot and then
use it for a send operation. In this case it's not possible to have inodes
in the snapshot that have a link count of zero (inode with an orphan item)
since during snapshot creation we do the orphan cleanup. However, other
less common use cases for send can end up seeing inodes with a link count
of zero and in this case the send operation fails with a ENOENT error
because any attempt to generate a path for the inode, with the purpose
of creating it or updating it at the receiver, fails since there are no
inode reference items. One use case it to use a regular subvolume for
a send operation after turning it to RO mode or turning a RW snapshot
into RO mode and then using it for a send operation. In both cases, if a
file gets all its hard links deleted while there is an open file
descriptor before turning the subvolume/snapshot into RO mode, the send
operation will encounter an inode with a link count of zero and then
fail with errno ENOENT.

Example using a full send with a subvolume:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ btrfs subvolume create /mnt/sv1
  $ touch /mnt/sv1/foo
  $ touch /mnt/sv1/bar

  # keep an open file descriptor on file bar
  $ exec 73</mnt/sv1/bar
  $ unlink /mnt/sv1/bar

  # Turn the subvolume to RO mode and use it for a full send, while
  # holding the open file descriptor.
  $ btrfs property set /mnt/sv1 ro true

  $ btrfs send -f /tmp/full.send /mnt/sv1
  At subvol /mnt/sv1
  ERROR: send ioctl failed with -2: No such file or directory

Example using an incremental send with snapshots:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ btrfs subvolume create /mnt/sv1
  $ touch /mnt/sv1/foo
  $ touch /mnt/sv1/bar

  $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap1

  $ echo "hello world" >> /mnt/sv1/bar

  $ btrfs subvolume snapshot -r /mnt/sv1 /mnt/snap2

  # Turn the second snapshot to RW mode and delete file foo while
  # holding an open file descriptor on it.
  $ btrfs property set /mnt/snap2 ro false
  $ exec 73</mnt/snap2/foo
  $ unlink /mnt/snap2/foo

  # Set the second snapshot back to RO mode and do an incremental send.
  $ btrfs property set /mnt/snap2 ro true

  $ btrfs send -f /tmp/inc.send -p /mnt/snap1 /mnt/snap2
  At subvol /mnt/snap2
  ERROR: send ioctl failed with -2: No such file or directory

So fix this by ignoring inodes with a link count of zero if we are either
doing a full send or if they do not exist in the parent snapshot (they
are new in the send snapshot), and unlink all paths found in the parent
snapshot when doing an incremental send (and ignoring all other inode
items, such as xattrs and extents).

A test case for fstests follows soon.

CC: stable@vger.kernel.org # 4.4+
Reported-by: Martin Wilck <martin.wilck@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 129 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 42e04cd3cd95..551294a6c9e2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
 	u64 cur_inode_rdev;
 	u64 cur_inode_last_extent;
 	u64 cur_inode_next_write_offset;
+	bool ignore_cur_inode;
 
 	u64 send_progress;
 
@@ -5796,6 +5797,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 	int pending_move = 0;
 	int refs_processed = 0;
 
+	if (sctx->ignore_cur_inode)
+		return 0;
+
 	ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
 					      &refs_processed);
 	if (ret < 0)
@@ -5914,6 +5918,93 @@ out:
 	return ret;
 }
 
+struct parent_paths_ctx {
+	struct list_head *refs;
+	struct send_ctx *sctx;
+};
+
+static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
+			     void *ctx)
+{
+	struct parent_paths_ctx *ppctx = ctx;
+
+	return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
+			  ppctx->refs);
+}
+
+/*
+ * Issue unlink operations for all paths of the current inode found in the
+ * parent snapshot.
+ */
+static int btrfs_unlink_all_paths(struct send_ctx *sctx)
+{
+	LIST_HEAD(deleted_refs);
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct parent_paths_ctx ctx;
+	int ret;
+
+	path = alloc_path_for_send();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = sctx->cur_ino;
+	key.type = BTRFS_INODE_REF_KEY;
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+
+	ctx.refs = &deleted_refs;
+	ctx.sctx = sctx;
+
+	while (true) {
+		struct extent_buffer *eb = path->nodes[0];
+		int slot = path->slots[0];
+
+		if (slot >= btrfs_header_nritems(eb)) {
+			ret = btrfs_next_leaf(sctx->parent_root, path);
+			if (ret < 0)
+				goto out;
+			else if (ret > 0)
+				break;
+			continue;
+		}
+
+		btrfs_item_key_to_cpu(eb, &key, slot);
+		if (key.objectid != sctx->cur_ino)
+			break;
+		if (key.type != BTRFS_INODE_REF_KEY &&
+		    key.type != BTRFS_INODE_EXTREF_KEY)
+			break;
+
+		ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
+					record_parent_ref, &ctx);
+		if (ret < 0)
+			goto out;
+
+		path->slots[0]++;
+	}
+
+	while (!list_empty(&deleted_refs)) {
+		struct recorded_ref *ref;
+
+		ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
+		ret = send_unlink(sctx, ref->full_path);
+		if (ret < 0)
+			goto out;
+		fs_path_free(ref->full_path);
+		list_del(&ref->list);
+		kfree(ref);
+	}
+	ret = 0;
+out:
+	btrfs_free_path(path);
+	if (ret)
+		__free_recorded_refs(&deleted_refs);
+	return ret;
+}
+
 static int changed_inode(struct send_ctx *sctx,
 			 enum btrfs_compare_tree_result result)
 {
@@ -5928,6 +6019,7 @@ static int changed_inode(struct send_ctx *sctx,
 	sctx->cur_inode_new_gen = 0;
 	sctx->cur_inode_last_extent = (u64)-1;
 	sctx->cur_inode_next_write_offset = 0;
+	sctx->ignore_cur_inode = false;
 
 	/*
 	 * Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5968,6 +6060,33 @@ static int changed_inode(struct send_ctx *sctx,
 			sctx->cur_inode_new_gen = 1;
 	}
 
+	/*
+	 * Normally we do not find inodes with a link count of zero (orphans)
+	 * because the most common case is to create a snapshot and use it
+	 * for a send operation. However other less common use cases involve
+	 * using a subvolume and send it after turning it to RO mode just
+	 * after deleting all hard links of a file while holding an open
+	 * file descriptor against it or turning a RO snapshot into RW mode,
+	 * keep an open file descriptor against a file, delete it and then
+	 * turn the snapshot back to RO mode before using it for a send
+	 * operation. So if we find such cases, ignore the inode and all its
+	 * items completely if it's a new inode, or if it's a changed inode
+	 * make sure all its previous paths (from the parent snapshot) are all
+	 * unlinked and all other the inode items are ignored.
+	 */
+	if (result == BTRFS_COMPARE_TREE_NEW ||
+	    result == BTRFS_COMPARE_TREE_CHANGED) {
+		u32 nlinks;
+
+		nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
+		if (nlinks == 0) {
+			sctx->ignore_cur_inode = true;
+			if (result == BTRFS_COMPARE_TREE_CHANGED)
+				ret = btrfs_unlink_all_paths(sctx);
+			goto out;
+		}
+	}
+
 	if (result == BTRFS_COMPARE_TREE_NEW) {
 		sctx->cur_inode_gen = left_gen;
 		sctx->cur_inode_new = 1;
@@ -6306,15 +6425,17 @@ static int changed_cb(struct btrfs_path *left_path,
 	    key->objectid == BTRFS_FREE_SPACE_OBJECTID)
 		goto out;
 
-	if (key->type == BTRFS_INODE_ITEM_KEY)
+	if (key->type == BTRFS_INODE_ITEM_KEY) {
 		ret = changed_inode(sctx, result);
-	else if (key->type == BTRFS_INODE_REF_KEY ||
-		 key->type == BTRFS_INODE_EXTREF_KEY)
-		ret = changed_ref(sctx, result);
-	else if (key->type == BTRFS_XATTR_ITEM_KEY)
-		ret = changed_xattr(sctx, result);
-	else if (key->type == BTRFS_EXTENT_DATA_KEY)
-		ret = changed_extent(sctx, result);
+	} else if (!sctx->ignore_cur_inode) {
+		if (key->type == BTRFS_INODE_REF_KEY ||
+		    key->type == BTRFS_INODE_EXTREF_KEY)
+			ret = changed_ref(sctx, result);
+		else if (key->type == BTRFS_XATTR_ITEM_KEY)
+			ret = changed_xattr(sctx, result);
+		else if (key->type == BTRFS_EXTENT_DATA_KEY)
+			ret = changed_extent(sctx, result);
+	}
 
 out:
 	return ret;
-- 
cgit v1.2.3


From b5851021f1ea0e099543fa125744368de7e9b865 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 24 Jul 2018 17:19:48 +0200
Subject: btrfs: extent-tree: remove unused member walk_control::for_reloc

Leftover after fix e339a6b097c5 ("Btrfs: __btrfs_mod_ref should always
use no_quota"), that removed it from the function calls but not the
structure.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2eb2d7ac0ba4..fcaf598f4222 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8330,7 +8330,6 @@ struct walk_control {
 	int keep_locks;
 	int reada_slot;
 	int reada_count;
-	int for_reloc;
 };
 
 #define DROP_REFERENCE	1
@@ -8967,7 +8966,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 	wc->stage = DROP_REFERENCE;
 	wc->update_ref = update_ref;
 	wc->keep_locks = 0;
-	wc->for_reloc = for_reloc;
 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
 
 	while (1) {
@@ -9133,7 +9131,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 	wc->stage = DROP_REFERENCE;
 	wc->update_ref = 0;
 	wc->keep_locks = 1;
-	wc->for_reloc = 1;
 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
 
 	while (1) {
-- 
cgit v1.2.3


From 4465c8b4229de50f17b6780677ce068b3582a55d Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 1 Aug 2018 11:32:25 +0800
Subject: btrfs: Remove fs_info from btrfs_insert_delayed_dir_index

It can be referenced from the passed transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 4 +---
 fs/btrfs/delayed-inode.h | 1 -
 fs/btrfs/dir-item.c      | 4 ++--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 596d2af0c8aa..5d103eda1874 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1418,7 +1418,6 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
 
 /* Will return 0 or -ENOMEM */
 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   const char *name, int name_len,
 				   struct btrfs_inode *dir,
 				   struct btrfs_disk_key *disk_key, u8 type,
@@ -1458,11 +1457,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 	 */
 	BUG_ON(ret);
 
-
 	mutex_lock(&delayed_node->mutex);
 	ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
 	if (unlikely(ret)) {
-		btrfs_err(fs_info,
+		btrfs_err(trans->fs_info,
 			  "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
 			  name_len, name, delayed_node->root->objectid,
 			  delayed_node->inode_id, ret);
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index ca7a97f3ab6b..44558ededcf5 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -86,7 +86,6 @@ static inline void btrfs_init_delayed_root(
 }
 
 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   const char *name, int name_len,
 				   struct btrfs_inode *dir,
 				   struct btrfs_disk_key *disk_key, u8 type,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 39e9766d1cbd..a678b07fcf01 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -160,8 +160,8 @@ second_insert:
 	}
 	btrfs_release_path(path);
 
-	ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
-			name_len, dir, &disk_key, type, index);
+	ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir,
+					      &disk_key, type, index);
 out_free:
 	btrfs_free_path(path);
 	if (ret)
-- 
cgit v1.2.3


From 9add29457ac124e846cde4b875c632edaa722c08 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 1 Aug 2018 11:32:26 +0800
Subject: btrfs: Remove fs_info from btrfs_delete_delayed_dir_index

It can be referenced from the passed transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 6 +++---
 fs/btrfs/delayed-inode.h | 1 -
 fs/btrfs/inode.c         | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5d103eda1874..f51b509f2d9b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1493,7 +1493,6 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
 }
 
 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   struct btrfs_inode *dir, u64 index)
 {
 	struct btrfs_delayed_node *node;
@@ -1509,7 +1508,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 	item_key.type = BTRFS_DIR_INDEX_KEY;
 	item_key.offset = index;
 
-	ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key);
+	ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
+						  &item_key);
 	if (!ret)
 		goto end;
 
@@ -1531,7 +1531,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 	mutex_lock(&node->mutex);
 	ret = __btrfs_add_delayed_deletion_item(node, item);
 	if (unlikely(ret)) {
-		btrfs_err(fs_info,
+		btrfs_err(trans->fs_info,
 			  "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
 			  index, node->root->objectid, node->inode_id, ret);
 		BUG();
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 44558ededcf5..33536cd681d4 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -92,7 +92,6 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 				   u64 index);
 
 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   struct btrfs_inode *dir, u64 index);
 
 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 472457795486..59b3ea32b6de 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3978,7 +3978,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 		goto err;
 	}
 skip_backref:
-	ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index);
+	ret = btrfs_delete_delayed_dir_index(trans, dir, index);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto err;
@@ -4144,7 +4144,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 	}
 	btrfs_release_path(path);
 
-	ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index);
+	ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto out;
-- 
cgit v1.2.3


From ab9ce7d42bf66f0750a4ca4a228a2db238376afb Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 1 Aug 2018 11:32:27 +0800
Subject: btrfs: Remove fs_info from btrfs_del_root

It can be referenced from the passed transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h           | 2 +-
 fs/btrfs/extent-tree.c     | 2 +-
 fs/btrfs/free-space-tree.c | 2 +-
 fs/btrfs/qgroup.c          | 2 +-
 fs/btrfs/root-tree.c       | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2e32584c635f..9ef47a171e2f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2993,7 +2993,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
 		       u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
 		       const char *name, int name_len);
 int btrfs_del_root(struct btrfs_trans_handle *trans,
-		   struct btrfs_fs_info *fs_info, const struct btrfs_key *key);
+		   const struct btrfs_key *key);
 int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		      const struct btrfs_key *key,
 		      struct btrfs_root_item *item);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fcaf598f4222..042dd4186fb8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9028,7 +9028,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 	if (err)
 		goto out_end_trans;
 
-	ret = btrfs_del_root(trans, fs_info, &root->root_key);
+	ret = btrfs_del_root(trans, &root->root_key);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		err = ret;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index b5950aacd697..d6736595ec57 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1236,7 +1236,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
 	if (ret)
 		goto abort;
 
-	ret = btrfs_del_root(trans, fs_info, &free_space_root->root_key);
+	ret = btrfs_del_root(trans, &free_space_root->root_key);
 	if (ret)
 		goto abort;
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index e11ed7b3f32a..4353bb69bb86 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1088,7 +1088,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
 		goto end_trans;
 	}
 
-	ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
+	ret = btrfs_del_root(trans, &quota_root->root_key);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto end_trans;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index c451285976ac..f7c14c454f91 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -320,9 +320,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
 
 /* drop the root item for 'key' from the tree root */
 int btrfs_del_root(struct btrfs_trans_handle *trans,
-		   struct btrfs_fs_info *fs_info, const struct btrfs_key *key)
+		   const struct btrfs_key *key)
 {
-	struct btrfs_root *root = fs_info->tree_root;
+	struct btrfs_root *root = trans->fs_info->tree_root;
 	struct btrfs_path *path;
 	int ret;
 
-- 
cgit v1.2.3


From 3ee1c5530e649182e602eb2f81193289c4d2e655 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 1 Aug 2018 11:32:28 +0800
Subject: btrfs: Remove fs_info from btrfs_del_root_ref

It can be referenced from the passed transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h     | 7 +++----
 fs/btrfs/inode.c     | 8 +++-----
 fs/btrfs/root-tree.c | 9 ++++-----
 3 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9ef47a171e2f..3677082ddf4c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2988,10 +2988,9 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info,
 		       u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
 		       const char *name, int name_len);
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info,
-		       u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
-		       const char *name, int name_len);
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+		       u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+		       int name_len);
 int btrfs_del_root(struct btrfs_trans_handle *trans,
 		   const struct btrfs_key *key);
 int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 59b3ea32b6de..7bf4a8d07e1e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4085,7 +4085,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 			struct inode *dir, u64 objectid,
 			const char *name, int name_len)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_dir_item *di;
@@ -4118,9 +4117,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 	}
 	btrfs_release_path(path);
 
-	ret = btrfs_del_root_ref(trans, fs_info, objectid,
-				 root->root_key.objectid, dir_ino,
-				 &index, name, name_len);
+	ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
+				 dir_ino, &index, name, name_len);
 	if (ret < 0) {
 		if (ret != -ENOENT) {
 			btrfs_abort_transaction(trans, ret);
@@ -6439,7 +6437,7 @@ fail_dir_item:
 	if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
 		u64 local_index;
 		int err;
-		err = btrfs_del_root_ref(trans, fs_info, key.objectid,
+		err = btrfs_del_root_ref(trans, key.objectid,
 					 root->root_key.objectid, parent_ino,
 					 &local_index, name, name_len);
 
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index f7c14c454f91..52fa133ab53c 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -341,13 +341,12 @@ out:
 	return ret;
 }
 
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info,
-		       u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
-		       const char *name, int name_len)
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+		       u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+		       int name_len)
 
 {
-	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_root *tree_root = trans->fs_info->tree_root;
 	struct btrfs_path *path;
 	struct btrfs_root_ref *ref;
 	struct extent_buffer *leaf;
-- 
cgit v1.2.3


From 6025c19fb208e93b99eafc304e7f16160e49fc88 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 1 Aug 2018 11:32:29 +0800
Subject: btrfs: Remove fs_info from btrfs_add_root_ref

It can be referenced from the passed transaction handle.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 7 +++----
 fs/btrfs/inode.c       | 3 +--
 fs/btrfs/ioctl.c       | 3 +--
 fs/btrfs/root-tree.c   | 9 ++++-----
 fs/btrfs/transaction.c | 2 +-
 5 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3677082ddf4c..c275ea258f9a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2984,10 +2984,9 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
 
 /* root-item.c */
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info,
-		       u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
-		       const char *name, int name_len);
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+		       u64 ref_id, u64 dirid, u64 sequence, const char *name,
+		       int name_len);
 int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
 		       u64 ref_id, u64 dirid, u64 *sequence, const char *name,
 		       int name_len);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bf4a8d07e1e..407d068d4208 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6385,7 +6385,6 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
 		   const char *name, int name_len, int add_backref, u64 index)
 {
-	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret = 0;
 	struct btrfs_key key;
 	struct btrfs_root *root = parent_inode->root;
@@ -6401,7 +6400,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 	}
 
 	if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
-		ret = btrfs_add_root_ref(trans, fs_info, key.objectid,
+		ret = btrfs_add_root_ref(trans, key.objectid,
 					 root->root_key.objectid, parent_ino,
 					 index, name, name_len);
 	} else if (add_backref) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 80fe3c654612..6eaadddaca9f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -698,8 +698,7 @@ static noinline int create_subvol(struct inode *dir,
 	ret = btrfs_update_inode(trans, root, dir);
 	BUG_ON(ret);
 
-	ret = btrfs_add_root_ref(trans, fs_info,
-				 objectid, root->root_key.objectid,
+	ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
 				 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
 	BUG_ON(ret);
 
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 52fa133ab53c..65bda0682928 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -412,12 +412,11 @@ out:
  *
  * Will return 0, -ENOMEM, or anything from the CoW path
  */
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
-		       struct btrfs_fs_info *fs_info,
-		       u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
-		       const char *name, int name_len)
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+		       u64 ref_id, u64 dirid, u64 sequence, const char *name,
+		       int name_len)
 {
-	struct btrfs_root *tree_root = fs_info->tree_root;
+	struct btrfs_root *tree_root = trans->fs_info->tree_root;
 	struct btrfs_key key;
 	int ret;
 	struct btrfs_path *path;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index aec208cbff00..001ed1bc2aa8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1573,7 +1573,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	/*
 	 * insert root back/forward references
 	 */
-	ret = btrfs_add_root_ref(trans, fs_info, objectid,
+	ret = btrfs_add_root_ref(trans, objectid,
 				 parent_root->root_key.objectid,
 				 btrfs_ino(BTRFS_I(parent_inode)), index,
 				 dentry->d_name.name, dentry->d_name.len);
-- 
cgit v1.2.3


From 401b3b19d592391b718b7a17295ae34eace973dd Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 1 Aug 2018 11:32:30 +0800
Subject: btrfs: Remove root parameter from btrfs_unlink_subvol

All callers pass the root tree of dir, we can push that down to the
function itself.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 407d068d4208..b8c131b82978 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4081,10 +4081,10 @@ out:
 }
 
 static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
-			struct btrfs_root *root,
-			struct inode *dir, u64 objectid,
-			const char *name, int name_len)
+			       struct inode *dir, u64 objectid,
+			       const char *name, int name_len)
 {
+	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_dir_item *di;
@@ -4335,10 +4335,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
 
 	btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
 
-	ret = btrfs_unlink_subvol(trans, root, dir,
-				dest->root_key.objectid,
-				dentry->d_name.name,
-				dentry->d_name.len);
+	ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
+				  dentry->d_name.name, dentry->d_name.len);
 	if (ret) {
 		err = ret;
 		btrfs_abort_transaction(trans, ret);
@@ -4433,7 +4431,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 		return PTR_ERR(trans);
 
 	if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
-		err = btrfs_unlink_subvol(trans, root, dir,
+		err = btrfs_unlink_subvol(trans, dir,
 					  BTRFS_I(inode)->location.objectid,
 					  dentry->d_name.name,
 					  dentry->d_name.len);
@@ -9505,8 +9503,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	/* src is a subvolume */
 	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
 		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
-		ret = btrfs_unlink_subvol(trans, root, old_dir,
-					  root_objectid,
+		ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
 					  old_dentry->d_name.name,
 					  old_dentry->d_name.len);
 	} else { /* src is an inode */
@@ -9525,8 +9522,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	/* dest is a subvolume */
 	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
 		root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
-		ret = btrfs_unlink_subvol(trans, dest, new_dir,
-					  root_objectid,
+		ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
 					  new_dentry->d_name.name,
 					  new_dentry->d_name.len);
 	} else { /* dest is an inode */
@@ -9786,7 +9782,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
 		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
-		ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
+		ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
 					old_dentry->d_name.name,
 					old_dentry->d_name.len);
 	} else {
@@ -9808,8 +9804,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
 			     BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
 			root_objectid = BTRFS_I(new_inode)->location.objectid;
-			ret = btrfs_unlink_subvol(trans, dest, new_dir,
-						root_objectid,
+			ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
 						new_dentry->d_name.name,
 						new_dentry->d_name.len);
 			BUG_ON(new_inode->i_nlink == 0);
-- 
cgit v1.2.3


From 5b7d687ad5913a56b6a8788435d7a53990b4176d Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 1 Aug 2018 11:32:31 +0800
Subject: btrfs: Remove redundant btrfs_release_path from btrfs_unlink_subvol

Although it is safe to call this on already released paths with no locks
held or extent buffers, removing the redundant btrfs_release_path is
reasonable.

Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b8c131b82978..7e56a203c109 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4137,7 +4137,6 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 
 		leaf = path->nodes[0];
 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-		btrfs_release_path(path);
 		index = key.offset;
 	}
 	btrfs_release_path(path);
-- 
cgit v1.2.3


From afc6961ffd96ee7e2a86e70e9691a008eadf2926 Mon Sep 17 00:00:00 2001
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Date: Thu, 26 Jul 2018 10:22:58 +0900
Subject: btrfs: backref: Use ERR_CAST to return error code

Use ERR_CAST() instead of void * to make meaning clear.

Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 60f4afa8ecbc..ae750b1574a2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2225,7 +2225,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
 
 	fspath = init_data_container(total_bytes);
 	if (IS_ERR(fspath))
-		return (void *)fspath;
+		return ERR_CAST(fspath);
 
 	ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
 	if (!ifp) {
-- 
cgit v1.2.3


From 85c39548199b966062578fb99d4d4ecdeae2afae Mon Sep 17 00:00:00 2001
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Date: Thu, 26 Jul 2018 11:40:54 +0900
Subject: btrfs: extent-tree: Remove unused __btrfs_free_block_rsv

There is no user of this function anymore.

This was forgotten to be removed in commit a575ceeb1338
("Btrfs: get rid of unused orphan infrastructure").

Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 1 -
 fs/btrfs/extent-tree.c | 5 -----
 2 files changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c275ea258f9a..318be7864072 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2761,7 +2761,6 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
 				   unsigned short type);
 void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
 			  struct btrfs_block_rsv *rsv);
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
 int btrfs_block_rsv_add(struct btrfs_root *root,
 			struct btrfs_block_rsv *block_rsv, u64 num_bytes,
 			enum btrfs_reserve_flush_enum flush);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 042dd4186fb8..9e7b237b9547 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5529,11 +5529,6 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
 	kfree(rsv);
 }
 
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
-{
-	kfree(rsv);
-}
-
 int btrfs_block_rsv_add(struct btrfs_root *root,
 			struct btrfs_block_rsv *block_rsv, u64 num_bytes,
 			enum btrfs_reserve_flush_enum flush)
-- 
cgit v1.2.3


From 1e7e1f9e3aba00c9b9c323bfeeddafe69ff21ff6 Mon Sep 17 00:00:00 2001
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Date: Tue, 31 Jul 2018 16:20:21 +0900
Subject: btrfs: replace: Reset on-disk dev stats value after replace

on-disk devs stats value is updated in btrfs_run_dev_stats(),
which is called during commit transaction, if device->dev_stats_ccnt
is not zero.

Since current replace operation does not touch dev_stats_ccnt,
on-disk dev stats value is not updated. Therefore "btrfs device stats"
may return old device's value after umount/mount
(Example: See "btrfs ins dump-t -t DEV $DEV" after btrfs/100 finish).

Fix this by just incrementing dev_stats_ccnt in
btrfs_dev_replace_finishing() when replace is succeeded and this will
update the values.

Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5a72f9933e58..dec01970d8c5 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -671,6 +671,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
 	btrfs_rm_dev_replace_unblocked(fs_info);
 
+	/*
+	 * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will
+	 * update on-disk dev stats value during commit transaction
+	 */
+	atomic_inc(&tgt_device->dev_stats_ccnt);
+
 	/*
 	 * this is again a consistent state where no dev_replace procedure
 	 * is running, the target device is part of the filesystem, the
-- 
cgit v1.2.3


From 2e19f1f9d31b3515356710b8bdfb655f47a98448 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2018 23:04:45 +0100
Subject: btrfs: btrfs_iget never returns an is_bad_inode inode

Just get rid of pointless checks.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c | 4 ----
 fs/btrfs/relocation.c       | 7 ++-----
 fs/btrfs/tree-log.c         | 6 +-----
 3 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 57e7ab7f5e03..0adf38b00fa0 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -71,10 +71,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
 	inode = btrfs_iget(fs_info->sb, &location, root, NULL);
 	if (IS_ERR(inode))
 		return inode;
-	if (is_bad_inode(inode)) {
-		iput(inode);
-		return ERR_PTR(-ENOENT);
-	}
 
 	mapping_set_gfp_mask(inode->i_mapping,
 			mapping_gfp_constraint(inode->i_mapping,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d6bcf558789e..8783a1776540 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3470,11 +3470,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
 	key.offset = 0;
 
 	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
-	if (IS_ERR(inode) || is_bad_inode(inode)) {
-		if (!IS_ERR(inode))
-			iput(inode);
+	if (IS_ERR(inode))
 		return -ENOENT;
-	}
 
 truncate:
 	ret = btrfs_check_trunc_cache_free_space(fs_info,
@@ -4155,7 +4152,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
 	key.type = BTRFS_INODE_ITEM_KEY;
 	key.offset = 0;
 	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
-	BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
+	BUG_ON(IS_ERR(inode));
 	BTRFS_I(inode)->index_cnt = group->key.objectid;
 
 	err = btrfs_orphan_add(trans, BTRFS_I(inode));
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 033aeebbe9de..730b97dee955 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -545,12 +545,8 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
 	key.type = BTRFS_INODE_ITEM_KEY;
 	key.offset = 0;
 	inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
-	if (IS_ERR(inode)) {
+	if (IS_ERR(inode))
 		inode = NULL;
-	} else if (is_bad_inode(inode)) {
-		iput(inode);
-		inode = NULL;
-	}
 	return inode;
 }
 
-- 
cgit v1.2.3


From 8d9e220ca0844bf75b98cb5b8e2c25d203c0d0f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2018 23:04:46 +0100
Subject: btrfs: simplify IS_ERR/PTR_ERR checks

IS_ERR(p) && PTR_ERR(p) == n is a weird way to spell p == ERR_PTR(n).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: David Sterba <dsterba@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 2 +-
 fs/btrfs/tree-log.c    | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 001ed1bc2aa8..3b84f5015029 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -680,7 +680,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
 
 	trans = start_transaction(root, 0, TRANS_ATTACH,
 				  BTRFS_RESERVE_NO_FLUSH, true);
-	if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
+	if (trans == ERR_PTR(-ENOENT))
 		btrfs_wait_for_commit(root->fs_info, 0);
 
 	return trans;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 730b97dee955..1650dc44a5e3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2181,7 +2181,7 @@ again:
 						     dir_key->offset,
 						     name, name_len, 0);
 		}
-		if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
+		if (!log_di || log_di == ERR_PTR(-ENOENT)) {
 			btrfs_dir_item_key_to_cpu(eb, di, &location);
 			btrfs_release_path(path);
 			btrfs_release_path(log_path);
@@ -5011,8 +5011,7 @@ again:
 				 * we don't need to do more work nor fallback to
 				 * a transaction commit.
 				 */
-				if (IS_ERR(other_inode) &&
-				    PTR_ERR(other_inode) == -ENOENT) {
+				if (other_inode == ERR_PTR(-ENOENT)) {
 					goto next_key;
 				} else if (IS_ERR(other_inode)) {
 					err = PTR_ERR(other_inode);
-- 
cgit v1.2.3


From 9bc2ceff660580454f971ed3f891a2c82085433a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2018 23:04:50 +0100
Subject: btrfs: lift make_bad_inode into btrfs_iget

We don't need to check is_bad_inode() after the call of
btrfs_read_locked_inode() - it's exactly the same as checking return
value for being non-zero.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: David Sterba <dsterba@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7e56a203c109..cd99ce8583a8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3771,7 +3771,6 @@ cache_acl:
 
 make_bad:
 	btrfs_free_path(path);
-	make_bad_inode(inode);
 	return ret;
 }
 
@@ -5697,12 +5696,13 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 		int ret;
 
 		ret = btrfs_read_locked_inode(inode);
-		if (!is_bad_inode(inode)) {
+		if (!ret) {
 			inode_tree_add(inode);
 			unlock_new_inode(inode);
 			if (new)
 				*new = 1;
 		} else {
+			make_bad_inode(inode);
 			unlock_new_inode(inode);
 			iput(inode);
 			ASSERT(ret < 0);
-- 
cgit v1.2.3


From f5b3a4173ff624b766c56936bb315e1517603891 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Jul 2018 23:04:51 +0100
Subject: btrfs: simplify btrfs_iget

Don't open-code iget_failed(), don't bother with btrfs_free_path(NULL),
move handling of positive return values of btrfs_lookup_inode() from
btrfs_read_locked_inode() to btrfs_iget() and kill now obviously
pointless ASSERT() in there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cd99ce8583a8..3f51ddc18f98 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3604,18 +3604,15 @@ static int btrfs_read_locked_inode(struct inode *inode)
 		filled = true;
 
 	path = btrfs_alloc_path();
-	if (!path) {
-		ret = -ENOMEM;
-		goto make_bad;
-	}
+	if (!path)
+		return -ENOMEM;
 
 	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
 	ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
 	if (ret) {
-		if (ret > 0)
-			ret = -ENOENT;
-		goto make_bad;
+		btrfs_free_path(path);
+		return ret;
 	}
 
 	leaf = path->nodes[0];
@@ -3768,10 +3765,6 @@ cache_acl:
 
 	btrfs_sync_inode_flags_to_i_flags(inode);
 	return 0;
-
-make_bad:
-	btrfs_free_path(path);
-	return ret;
 }
 
 /*
@@ -5702,11 +5695,15 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 			if (new)
 				*new = 1;
 		} else {
-			make_bad_inode(inode);
-			unlock_new_inode(inode);
-			iput(inode);
-			ASSERT(ret < 0);
-			inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
+			iget_failed(inode);
+			/*
+			 * ret > 0 can come from btrfs_search_slot called by
+			 * btrfs_read_locked_inode, this means the inode item
+			 * was not found.
+			 */
+			if (ret > 0)
+				ret = -ENOENT;
+			inode = ERR_PTR(ret);
 		}
 	}
 
-- 
cgit v1.2.3


From 672d599041c862dd61a1576c32e946ef0d77aa34 Mon Sep 17 00:00:00 2001
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Date: Thu, 2 Aug 2018 16:19:07 +0900
Subject: btrfs: Use wrapper macro for rcu string to remove duplicate code

Cleanup patch and no functional changes.

Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c   |  6 ++----
 fs/btrfs/scrub.c   |  8 ++------
 fs/btrfs/super.c   |  9 +++------
 fs/btrfs/volumes.c | 20 +++++---------------
 4 files changed, 12 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6eaadddaca9f..d3a5d2a41e5f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3157,10 +3157,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
 	di_args->total_bytes = btrfs_device_get_total_bytes(dev);
 	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
 	if (dev->name) {
-		struct rcu_string *name;
-
-		name = rcu_dereference(dev->name);
-		strncpy(di_args->path, name->str, sizeof(di_args->path) - 1);
+		strncpy(di_args->path, rcu_str_deref(dev->name),
+				sizeof(di_args->path) - 1);
 		di_args->path[sizeof(di_args->path) - 1] = 0;
 	} else {
 		di_args->path[0] = '\0';
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c4eb9eca13b8..3be1456b5116 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3779,7 +3779,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 	struct scrub_ctx *sctx;
 	int ret;
 	struct btrfs_device *dev;
-	struct rcu_string *name;
 
 	if (btrfs_fs_closing(fs_info))
 		return -EINVAL;
@@ -3833,11 +3832,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 	if (!is_dev_replace && !readonly &&
 	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-		rcu_read_lock();
-		name = rcu_dereference(dev->name);
-		btrfs_err(fs_info, "scrub: device %s is not writable",
-			  name->str);
-		rcu_read_unlock();
+		btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
+				rcu_str_deref(dev->name));
 		return -EROFS;
 	}
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 67de3c0fc85b..6601c9aa5e35 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2303,7 +2303,6 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 	struct btrfs_fs_devices *cur_devices;
 	struct btrfs_device *dev, *first_dev = NULL;
 	struct list_head *head;
-	struct rcu_string *name;
 
 	/*
 	 * Lightweight locking of the devices. We should not need
@@ -2327,12 +2326,10 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 		cur_devices = cur_devices->seed;
 	}
 
-	if (first_dev) {
-		name = rcu_dereference(first_dev->name);
-		seq_escape(m, name->str, " \t\n\\");
-	} else {
+	if (first_dev)
+		seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
+	else
 		WARN_ON(1);
-	}
 	rcu_read_unlock();
 	return 0;
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d86346f83a08..74977203fc85 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6094,21 +6094,11 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
 	btrfs_io_bio(bio)->stripe_index = dev_nr;
 	bio->bi_end_io = btrfs_end_bio;
 	bio->bi_iter.bi_sector = physical >> 9;
-#ifdef DEBUG
-	{
-		struct rcu_string *name;
-
-		rcu_read_lock();
-		name = rcu_dereference(dev->name);
-		btrfs_debug(fs_info,
-			"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
-			bio_op(bio), bio->bi_opf,
-			(u64)bio->bi_iter.bi_sector,
-			(u_long)dev->bdev->bd_dev, name->str, dev->devid,
-			bio->bi_iter.bi_size);
-		rcu_read_unlock();
-	}
-#endif
+	btrfs_debug_in_rcu(fs_info,
+	"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
+		bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
+		(u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
+		bio->bi_iter.bi_size);
 	bio_set_dev(bio, dev->bdev);
 
 	btrfs_bio_counter_inc_noblocked(fs_info);
-- 
cgit v1.2.3


From 22d3151c2c4cb517a309154d1e828a28106508c7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 30 Jul 2018 12:39:58 +0100
Subject: Btrfs: send, fix incorrect file layout after hole punching beyond eof

When doing an incremental send, if we have a file in the parent snapshot
that has prealloc extents beyond EOF and in the send snapshot it got a
hole punch that partially covers the prealloc extents, the send stream,
when replayed by a receiver, can result in a file that has a size bigger
than it should and filled with zeroes past the correct EOF.

For example:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ xfs_io -f -c "falloc -k 0 4M" /mnt/foobar
  $ xfs_io -c "pwrite -S 0xea 0 1M" /mnt/foobar

  $ btrfs subvolume snapshot -r /mnt /mnt/snap1
  $ btrfs send -f /tmp/1.send /mnt/snap1

  $ xfs_io -c "fpunch 1M 2M" /mnt/foobar

  $ btrfs subvolume snapshot -r /mnt /mnt/snap2
  $ btrfs send -f /tmp/2.send -p /mnt/snap1 /mnt/snap2

  $ stat --format %s /mnt/snap2/foobar
  1048576
  $ md5sum /mnt/snap2/foobar
  d31659e82e87798acd4669a1e0a19d4f  /mnt/snap2/foobar

  $ umount /mnt
  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt

  $ btrfs receive -f /mnt/1.snap /mnt
  $ btrfs receive -f /mnt/2.snap /mnt

  $ stat --format %s /mnt/snap2/foobar
  3145728
  # --> should be 1Mb and not 3Mb (which was the end offset of hole
  #     punch operation)
  $ md5sum /mnt/snap2/foobar
  117baf295297c2a995f92da725b0b651  /mnt/snap2/foobar
  # --> should be d31659e82e87798acd4669a1e0a19d4f as in the original fs

This issue actually happens only since commit ffa7c4296e93 ("Btrfs: send,
do not issue unnecessary truncate operations"), but before that commit we
were issuing a write operation full of zeroes (to "punch" a hole) which
was extending the file size beyond the correct value and then immediately
issue a truncate operation to the correct size and undoing the previous
write operation. Since the send protocol does not support fallocate, for
extent preallocation and hole punching, fix this by not even attempting
to send a "hole" (regular write full of zeroes) if it starts at an offset
greater then or equals to the file's size. This approach, besides being
much more simple then making send issue the truncate operation, adds the
benefit of avoiding the useless pair of write of zeroes and truncate
operations, saving time and IO at the receiver and reducing the size of
the send stream.

A test case for fstests follows soon.

Fixes: ffa7c4296e93 ("Btrfs: send, do not issue unnecessary truncate operations")
CC: stable@vger.kernel.org # 4.17+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 551294a6c9e2..ba8950bfd9c7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5007,6 +5007,15 @@ static int send_hole(struct send_ctx *sctx, u64 end)
 	u64 len;
 	int ret = 0;
 
+	/*
+	 * A hole that starts at EOF or beyond it. Since we do not yet support
+	 * fallocate (for extent preallocation and hole punching), sending a
+	 * write of zeroes starting at EOF or beyond would later require issuing
+	 * a truncate operation which would undo the write and achieve nothing.
+	 */
+	if (offset >= sctx->cur_inode_size)
+		return 0;
+
 	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
 		return send_update_extent(sctx, offset, end - offset);
 
-- 
cgit v1.2.3


From 514c7dca85a0bf40be984dab0b477403a6db901f Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 1 Aug 2018 10:37:16 +0800
Subject: btrfs: Check that each block group has corresponding chunk at mount
 time

A crafted btrfs image with incorrect chunk<->block group mapping will
trigger a lot of unexpected things as the mapping is essential.

Although the problem can be caught by block group item checker
added in "btrfs: tree-checker: Verify block_group_item", it's still not
sufficient.  A sufficiently valid block group item can pass the check
added by the mentioned patch but could fail to match the existing chunk.

This patch will add extra block group -> chunk mapping check, to ensure
we have a completely matching (start, len, flags) chunk for each block
group at mount time.

Here we reuse the original helper find_first_block_group(), which is
already doing the basic bg -> chunk checks, adding further checks of the
start/len and type flags.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199837
Reported-by: Xu Wen <wen.xu@gatech.edu>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e7b237b9547..54a0cfef5c5c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9532,6 +9532,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
 	int ret = 0;
 	struct btrfs_key found_key;
 	struct extent_buffer *leaf;
+	struct btrfs_block_group_item bg;
+	u64 flags;
 	int slot;
 
 	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9566,8 +9568,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
 			"logical %llu len %llu found bg but no related chunk",
 					  found_key.objectid, found_key.offset);
 				ret = -ENOENT;
+			} else if (em->start != found_key.objectid ||
+				   em->len != found_key.offset) {
+				btrfs_err(fs_info,
+		"block group %llu len %llu mismatch with chunk %llu len %llu",
+					  found_key.objectid, found_key.offset,
+					  em->start, em->len);
+				ret = -EUCLEAN;
 			} else {
-				ret = 0;
+				read_extent_buffer(leaf, &bg,
+					btrfs_item_ptr_offset(leaf, slot),
+					sizeof(bg));
+				flags = btrfs_block_group_flags(&bg) &
+					BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+				if (flags != (em->map_lookup->type &
+					      BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+					btrfs_err(fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+						found_key.objectid,
+						found_key.offset, flags,
+						(BTRFS_BLOCK_GROUP_TYPE_MASK &
+						 em->map_lookup->type));
+					ret = -EUCLEAN;
+				} else {
+					ret = 0;
+				}
 			}
 			free_extent_map(em);
 			goto out;
-- 
cgit v1.2.3


From 7ef49515fa6727cb4b6f2f5b0ffbc5fc20a9f8c6 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 1 Aug 2018 10:37:17 +0800
Subject: btrfs: Verify that every chunk has corresponding block group at mount
 time

If a crafted image has missing block group items, it could cause
unexpected behavior and breaks the assumption of 1:1 chunk<->block group
mapping.

Although we have the block group -> chunk mapping check, we still need
chunk -> block group mapping check.

This patch will do extra check to ensure each chunk has its
corresponding block group.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
Reported-by: Xu Wen <wen.xu@gatech.edu>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Gu Jinxiang <gujx@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 54a0cfef5c5c..de6f75f5547b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9844,6 +9844,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
 	return cache;
 }
 
+
+/*
+ * Iterate all chunks and verify that each of them has the corresponding block
+ * group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+	struct extent_map *em;
+	struct btrfs_block_group_cache *bg;
+	u64 start = 0;
+	int ret = 0;
+
+	while (1) {
+		read_lock(&map_tree->map_tree.lock);
+		/*
+		 * lookup_extent_mapping will return the first extent map
+		 * intersecting the range, so setting @len to 1 is enough to
+		 * get the first chunk.
+		 */
+		em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+		read_unlock(&map_tree->map_tree.lock);
+		if (!em)
+			break;
+
+		bg = btrfs_lookup_block_group(fs_info, em->start);
+		if (!bg) {
+			btrfs_err(fs_info,
+	"chunk start=%llu len=%llu doesn't have corresponding block group",
+				     em->start, em->len);
+			ret = -EUCLEAN;
+			free_extent_map(em);
+			break;
+		}
+		if (bg->key.objectid != em->start ||
+		    bg->key.offset != em->len ||
+		    (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+		    (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+			btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
+				em->start, em->len,
+				em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
+				bg->key.objectid, bg->key.offset,
+				bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+			ret = -EUCLEAN;
+			free_extent_map(em);
+			btrfs_put_block_group(bg);
+			break;
+		}
+		start = em->start + em->len;
+		free_extent_map(em);
+		btrfs_put_block_group(bg);
+	}
+	return ret;
+}
+
 int btrfs_read_block_groups(struct btrfs_fs_info *info)
 {
 	struct btrfs_path *path;
@@ -10010,7 +10066,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 
 	btrfs_add_raid_kobjects(info);
 	init_global_block_rsv(info);
-	ret = 0;
+	ret = check_chunk_block_group_mappings(info);
 error:
 	btrfs_free_path(path);
 	return ret;
-- 
cgit v1.2.3


From cf90d884b347c50a1e8c1effc4093e497dd68b4b Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 1 Aug 2018 10:37:19 +0800
Subject: btrfs: Introduce mount time chunk <-> dev extent mapping check

This patch will introduce chunk <-> dev extent mapping check, to protect
us against invalid dev extents or chunks.

Since chunk mapping is the fundamental infrastructure of btrfs, extra
check at mount time could prevent a lot of unexpected behavior (BUG_ON).

Reported-by: Xu Wen <wen.xu@gatech.edu>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403
Link: https://bugzilla.kernel.org/show_bug.cgi?id=200407
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c |   7 ++
 fs/btrfs/volumes.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h |   2 +
 3 files changed, 193 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6a5a3cd12886..5124c15705ce 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3030,6 +3030,13 @@ retry_root_backup:
 	fs_info->generation = generation;
 	fs_info->last_trans_committed = generation;
 
+	ret = btrfs_verify_dev_extents(fs_info);
+	if (ret) {
+		btrfs_err(fs_info,
+			  "failed to verify dev extents against chunks: %d",
+			  ret);
+		goto fail_block_groups;
+	}
 	ret = btrfs_recover_balance(fs_info);
 	if (ret) {
 		btrfs_err(fs_info, "failed to recover balance: %d", ret);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 74977203fc85..96be1e50b027 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6452,6 +6452,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
 	map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
 	map->type = btrfs_chunk_type(leaf, chunk);
 	map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+	map->verified_stripes = 0;
 	for (i = 0; i < num_stripes; i++) {
 		map->stripes[i].physical =
 			btrfs_stripe_offset_nr(leaf, chunk, i);
@@ -7318,3 +7319,186 @@ int btrfs_bg_type_to_factor(u64 flags)
 		return 2;
 	return 1;
 }
+
+
+static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
+{
+	int index = btrfs_bg_flags_to_raid_index(type);
+	int ncopies = btrfs_raid_array[index].ncopies;
+	int data_stripes;
+
+	switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+	case BTRFS_BLOCK_GROUP_RAID5:
+		data_stripes = num_stripes - 1;
+		break;
+	case BTRFS_BLOCK_GROUP_RAID6:
+		data_stripes = num_stripes - 2;
+		break;
+	default:
+		data_stripes = num_stripes / ncopies;
+		break;
+	}
+	return div_u64(chunk_len, data_stripes);
+}
+
+static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
+				 u64 chunk_offset, u64 devid,
+				 u64 physical_offset, u64 physical_len)
+{
+	struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+	struct extent_map *em;
+	struct map_lookup *map;
+	u64 stripe_len;
+	bool found = false;
+	int ret = 0;
+	int i;
+
+	read_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
+	read_unlock(&em_tree->lock);
+
+	if (!em) {
+		btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
+			  physical_offset, devid);
+		ret = -EUCLEAN;
+		goto out;
+	}
+
+	map = em->map_lookup;
+	stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
+	if (physical_len != stripe_len) {
+		btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
+			  physical_offset, devid, em->start, physical_len,
+			  stripe_len);
+		ret = -EUCLEAN;
+		goto out;
+	}
+
+	for (i = 0; i < map->num_stripes; i++) {
+		if (map->stripes[i].dev->devid == devid &&
+		    map->stripes[i].physical == physical_offset) {
+			found = true;
+			if (map->verified_stripes >= map->num_stripes) {
+				btrfs_err(fs_info,
+				"too many dev extents for chunk %llu found",
+					  em->start);
+				ret = -EUCLEAN;
+				goto out;
+			}
+			map->verified_stripes++;
+			break;
+		}
+	}
+	if (!found) {
+		btrfs_err(fs_info,
+	"dev extent physical offset %llu devid %llu has no corresponding chunk",
+			physical_offset, devid);
+		ret = -EUCLEAN;
+	}
+out:
+	free_extent_map(em);
+	return ret;
+}
+
+static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
+{
+	struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+	struct extent_map *em;
+	struct rb_node *node;
+	int ret = 0;
+
+	read_lock(&em_tree->lock);
+	for (node = rb_first(&em_tree->map); node; node = rb_next(node)) {
+		em = rb_entry(node, struct extent_map, rb_node);
+		if (em->map_lookup->num_stripes !=
+		    em->map_lookup->verified_stripes) {
+			btrfs_err(fs_info,
+			"chunk %llu has missing dev extent, have %d expect %d",
+				  em->start, em->map_lookup->verified_stripes,
+				  em->map_lookup->num_stripes);
+			ret = -EUCLEAN;
+			goto out;
+		}
+	}
+out:
+	read_unlock(&em_tree->lock);
+	return ret;
+}
+
+/*
+ * Ensure that all dev extents are mapped to correct chunk, otherwise
+ * later chunk allocation/free would cause unexpected behavior.
+ *
+ * NOTE: This will iterate through the whole device tree, which should be of
+ * the same size level as the chunk tree.  This slightly increases mount time.
+ */
+int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_path *path;
+	struct btrfs_root *root = fs_info->dev_root;
+	struct btrfs_key key;
+	int ret = 0;
+
+	key.objectid = 1;
+	key.type = BTRFS_DEV_EXTENT_KEY;
+	key.offset = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	path->reada = READA_FORWARD;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+
+	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+		ret = btrfs_next_item(root, path);
+		if (ret < 0)
+			goto out;
+		/* No dev extents at all? Not good */
+		if (ret > 0) {
+			ret = -EUCLEAN;
+			goto out;
+		}
+	}
+	while (1) {
+		struct extent_buffer *leaf = path->nodes[0];
+		struct btrfs_dev_extent *dext;
+		int slot = path->slots[0];
+		u64 chunk_offset;
+		u64 physical_offset;
+		u64 physical_len;
+		u64 devid;
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.type != BTRFS_DEV_EXTENT_KEY)
+			break;
+		devid = key.objectid;
+		physical_offset = key.offset;
+
+		dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
+		chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
+		physical_len = btrfs_dev_extent_length(leaf, dext);
+
+		ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
+					    physical_offset, physical_len);
+		if (ret < 0)
+			goto out;
+		ret = btrfs_next_item(root, path);
+		if (ret < 0)
+			goto out;
+		if (ret > 0) {
+			ret = 0;
+			break;
+		}
+	}
+
+	/* Ensure all chunks have corresponding dev extents */
+	ret = verify_chunk_dev_extent_mapping(fs_info);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 049619176831..23e9285d88de 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -345,6 +345,7 @@ struct map_lookup {
 	u64 stripe_len;
 	int num_stripes;
 	int sub_stripes;
+	int verified_stripes; /* For mount time dev extent verification */
 	struct btrfs_bio_stripe stripes[];
 };
 
@@ -556,5 +557,6 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
 					struct btrfs_device *failing_dev);
 
 int btrfs_bg_type_to_factor(u64 flags);
+int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
 
 #endif
-- 
cgit v1.2.3


From 64f64f43c89aca1782aa672e0586f6903c5d8979 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 1 Aug 2018 10:37:20 +0800
Subject: btrfs: Exit gracefully when chunk map cannot be inserted to the tree

It's entirely possible that a crafted btrfs image contains overlapping
chunks.

Although we can't detect such problem by tree-checker, it's not a
catastrophic problem, current extent map can already detect such problem
and return -EEXIST.

We just only need to exit gracefully and fail the mount.

Reported-by: Xu Wen <wen.xu@gatech.edu>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=200409
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 96be1e50b027..7218a79a0e57 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6489,10 +6489,14 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
 	write_lock(&map_tree->map_tree.lock);
 	ret = add_extent_mapping(&map_tree->map_tree, em, 0);
 	write_unlock(&map_tree->map_tree.lock);
-	BUG_ON(ret); /* Tree corruption */
+	if (ret < 0) {
+		btrfs_err(fs_info,
+			  "failed to add chunk map, start=%llu len=%llu: %d",
+			  em->start, em->len, ret);
+	}
 	free_extent_map(em);
 
-	return 0;
+	return ret;
 }
 
 static void fill_device_from_item(struct extent_buffer *leaf,
-- 
cgit v1.2.3


From 39379faaad79e3cf403a6904a08676b7850043ae Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naota@elisp.net>
Date: Fri, 27 Jul 2018 09:04:55 +0900
Subject: btrfs: revert fs_devices state on error of btrfs_init_new_device

When btrfs hits error after modifying fs_devices in
btrfs_init_new_device() (such as btrfs_add_dev_item() returns error), it
leaves everything as is, but frees allocated btrfs_device. As a result,
fs_devices->devices and fs_devices->alloc_list contain already freed
btrfs_device, leading to later use-after-free bug.

Error path also messes the things like ->num_devices. While they go back
to the original value by unscanning btrfs devices, it is safe to revert
them here.

Fixes: 79787eaab461 ("btrfs: replace many BUG_ONs with proper error handling")
Signed-off-by: Naohiro Aota <naota@elisp.net>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7218a79a0e57..da86706123ff 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2321,7 +2321,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	struct super_block *sb = fs_info->sb;
 	struct rcu_string *name;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
-	u64 tmp;
+	u64 orig_super_total_bytes;
+	u64 orig_super_num_devices;
 	int seeding_dev = 0;
 	int ret = 0;
 	bool unlocked = false;
@@ -2417,12 +2418,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	if (!blk_queue_nonrot(q))
 		fs_devices->rotating = 1;
 
-	tmp = btrfs_super_total_bytes(fs_info->super_copy);
+	orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
 	btrfs_set_super_total_bytes(fs_info->super_copy,
-		round_down(tmp + device->total_bytes, fs_info->sectorsize));
+		round_down(orig_super_total_bytes + device->total_bytes,
+			   fs_info->sectorsize));
 
-	tmp = btrfs_super_num_devices(fs_info->super_copy);
-	btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
+	orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
+	btrfs_set_super_num_devices(fs_info->super_copy,
+				    orig_super_num_devices + 1);
 
 	/* add sysfs device entry */
 	btrfs_sysfs_add_device_link(fs_devices, device);
@@ -2502,6 +2505,22 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
 error_sysfs:
 	btrfs_sysfs_rm_device_link(fs_devices, device);
+	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	mutex_lock(&fs_info->chunk_mutex);
+	list_del_rcu(&device->dev_list);
+	list_del(&device->dev_alloc_list);
+	fs_info->fs_devices->num_devices--;
+	fs_info->fs_devices->open_devices--;
+	fs_info->fs_devices->rw_devices--;
+	fs_info->fs_devices->total_devices--;
+	fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
+	atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
+	btrfs_set_super_total_bytes(fs_info->super_copy,
+				    orig_super_total_bytes);
+	btrfs_set_super_num_devices(fs_info->super_copy,
+				    orig_super_num_devices);
+	mutex_unlock(&fs_info->chunk_mutex);
+	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 error_trans:
 	if (seeding_dev)
 		sb->s_flags |= SB_RDONLY;
-- 
cgit v1.2.3


From c883da313ebf459efd33d262ca963e3a5f0ac024 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 30 Jul 2018 07:54:56 -0400
Subject: locks: add tracepoint in flock codepath

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c                      | 1 +
 include/trace/events/filelock.h | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index e533623e2e99..6138a9bcd924 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -990,6 +990,7 @@ out:
 	if (new_fl)
 		locks_free_lock(new_fl);
 	locks_dispose_list(&dispose);
+	trace_flock_lock_inode(inode, request, error);
 	return error;
 }
 
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index d1faf3597b9d..68b17c116907 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -112,8 +112,11 @@ DEFINE_EVENT(filelock_lock, locks_remove_posix,
 		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
 		TP_ARGS(inode, fl, ret));
 
-DECLARE_EVENT_CLASS(filelock_lease,
+DEFINE_EVENT(filelock_lock, flock_lock_inode,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
 
+DECLARE_EVENT_CLASS(filelock_lease,
 	TP_PROTO(struct inode *inode, struct file_lock *fl),
 
 	TP_ARGS(inode, fl),
-- 
cgit v1.2.3


From dffe12a82826082d2129ef91b17b257254cb60fc Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 7 Aug 2018 10:07:00 -0500
Subject: gfs2: Fix gfs2_testbit to use clone bitmaps

Function gfs2_testbit is called in three places. Two of those places,
gfs2_alloc_extent and gfs2_unaligned_extlen, should be using the clone
bitmaps, not the "real" bitmaps. Function gfs2_unaligned_extlen is used
by the block reservations scheme to determine the length of an extent of
free blocks. Before this patch, it wasn't using the clone bitmap, which
means recently-freed blocks were treated as free blocks for the purposes
of an allocation.

This patch adds a new parameter to gfs2_testbit to indicate whether or
not the clone bitmaps should be used (if available).

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h | 21 +++++++++++++++++++++
 fs/gfs2/rgrp.c   | 44 +++++++++++++++++++-------------------------
 2 files changed, 40 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b50908211b69..b96d39c28e17 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -65,6 +65,27 @@ struct gfs2_log_operations {
 
 #define GBF_FULL 1
 
+/**
+ * Clone bitmaps (bi_clone):
+ *
+ * - When a block is freed, we remember the previous state of the block in the
+ *   clone bitmap, and only mark the block as free in the real bitmap.
+ *
+ * - When looking for a block to allocate, we check for a free block in the
+ *   clone bitmap, and if no clone bitmap exists, in the real bitmap.
+ *
+ * - For allocating a block, we mark it as allocated in the real bitmap, and if
+ *   a clone bitmap exists, also in the clone bitmap.
+ *
+ * - At the end of a log_flush, we copy the real bitmap into the clone bitmap
+ *   to make the clone bitmap reflect the current allocation state.
+ *   (Alternatively, we could remove the clone bitmap.)
+ *
+ * The clone bitmaps are in-core only, and is never written to disk.
+ *
+ * These steps ensure that blocks which have been freed in a transaction cannot
+ * be reallocated in that same transaction.
+ */
 struct gfs2_bitmap {
 	struct buffer_head *bi_bh;
 	char *bi_clone;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7c5afeba8888..ef50fe9b880a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -123,17 +123,26 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
 /**
  * gfs2_testbit - test a bit in the bitmaps
  * @rbm: The bit to test
+ * @use_clone: If true, test the clone bitmap, not the official bitmap.
+ *
+ * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
+ * not the "real" bitmaps, to avoid allocating recently freed blocks.
  *
  * Returns: The two bit block state of the requested bit
  */
 
-static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
+static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
 {
 	struct gfs2_bitmap *bi = rbm_bi(rbm);
-	const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
+	const u8 *buffer;
 	const u8 *byte;
 	unsigned int bit;
 
+	if (use_clone && bi->bi_clone)
+		buffer = bi->bi_clone;
+	else
+		buffer = bi->bi_bh->b_data;
+	buffer += bi->bi_offset;
 	byte = buffer + (rbm->offset / GFS2_NBBY);
 	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 
@@ -322,7 +331,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
 	u8 res;
 
 	for (n = 0; n < n_unaligned; n++) {
-		res = gfs2_testbit(rbm);
+		res = gfs2_testbit(rbm, true);
 		if (res != GFS2_BLKST_FREE)
 			return true;
 		(*len)--;
@@ -2146,26 +2155,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 }
 
-/**
- * gfs2_get_block_type - Check a block in a RG is of given type
- * @rgd: the resource group holding the block
- * @block: the block number
- *
- * Returns: The block type (GFS2_BLKST_*)
- */
-
-static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
-{
-	struct gfs2_rbm rbm = { .rgd = rgd, };
-	int ret;
-
-	ret = gfs2_rbm_from_block(&rbm, block);
-	WARN_ON_ONCE(ret != 0);
-
-	return gfs2_testbit(&rbm);
-}
-
-
 /**
  * gfs2_alloc_extent - allocate an extent from a given bitmap
  * @rbm: the resource group information
@@ -2190,7 +2179,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 	block++;
 	while (*n < elen) {
 		ret = gfs2_rbm_from_block(&pos, block);
-		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
+		if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
 			break;
 		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
 		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
@@ -2543,6 +2532,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
 {
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_holder rgd_gh;
+	struct gfs2_rbm rbm;
 	int error = -EINVAL;
 
 	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
@@ -2553,7 +2543,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
 	if (error)
 		goto fail;
 
-	if (gfs2_get_block_type(rgd, no_addr) != type)
+	rbm.rgd = rgd;
+	error = gfs2_rbm_from_block(&rbm, no_addr);
+	WARN_ON_ONCE(error != 0);
+
+	if (gfs2_testbit(&rbm, false) != type)
 		error = -ESTALE;
 
 	gfs2_glock_dq_uninit(&rgd_gh);
-- 
cgit v1.2.3


From 2ba090d521c5e09f32316c179d25bb6f699d3568 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Aug 2018 10:57:12 -0700
Subject: xfs: use WRITE_ONCE to update if_seq

This adds ordering of the updates and makes sure we always see the if_seq
update before the extent tree is modified.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_iext_tree.c | 20 +++++++++++++++++---
 fs/xfs/xfs_aops.c             |  4 ++--
 fs/xfs/xfs_iomap.c            |  3 ++-
 3 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 8a7aea041ee1..771dd072015d 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -14,6 +14,7 @@
 #include "xfs_inode_fork.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
+#include "xfs_bmap.h"
 #include "xfs_trace.h"
 
 /*
@@ -612,6 +613,19 @@ xfs_iext_realloc_root(
 	cur->leaf = new;
 }
 
+/*
+ * Increment the sequence counter if we are on a COW fork.  This allows
+ * the writeback code to skip looking for a COW extent if the COW fork
+ * hasn't changed.  We use WRITE_ONCE here to ensure the update to the
+ * sequence counter is seen before the modifications to the extent
+ * tree itself take effect.
+ */
+static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp, int state)
+{
+	if (state & BMAP_COWFORK)
+		WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
+}
+
 void
 xfs_iext_insert(
 	struct xfs_inode	*ip,
@@ -624,7 +638,7 @@ xfs_iext_insert(
 	struct xfs_iext_leaf	*new = NULL;
 	int			nr_entries, i;
 
-	ifp->if_seq++;
+	xfs_iext_inc_seq(ifp, state);
 
 	if (ifp->if_height == 0)
 		xfs_iext_alloc_root(ifp, cur);
@@ -866,7 +880,7 @@ xfs_iext_remove(
 	ASSERT(ifp->if_u1.if_root != NULL);
 	ASSERT(xfs_iext_valid(ifp, cur));
 
-	ifp->if_seq++;
+	xfs_iext_inc_seq(ifp, state);
 
 	nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
 	for (i = cur->pos; i < nr_entries; i++)
@@ -974,7 +988,7 @@ xfs_iext_update_extent(
 {
 	struct xfs_ifork	*ifp = xfs_iext_state_to_fork(ip, state);
 
-	ifp->if_seq++;
+	xfs_iext_inc_seq(ifp, state);
 
 	if (cur->pos == 0) {
 		struct xfs_bmbt_irec	old;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 235b4ddcd324..49f5f5896a43 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -351,7 +351,7 @@ xfs_map_blocks(
 	if (imap_valid &&
 	    (!xfs_inode_has_cow_data(ip) ||
 	     wpc->io_type == XFS_IO_COW ||
-	     wpc->cow_seq == ip->i_cowfp->if_seq))
+	     wpc->cow_seq == READ_ONCE(ip->i_cowfp->if_seq)))
 		return 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -380,7 +380,7 @@ xfs_map_blocks(
 	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
 		cow_fsb = imap.br_startoff;
 	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
-		wpc->cow_seq = ip->i_cowfp->if_seq;
+		wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		/*
 		 * Truncate can race with writeback since writeback doesn't
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3282575e2df4..6320aca39f39 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -655,6 +655,7 @@ xfs_iomap_write_allocate(
 	unsigned int	*cow_seq)
 {
 	xfs_mount_t	*mp = ip->i_mount;
+	struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
 	xfs_fileoff_t	offset_fsb, last_block;
 	xfs_fileoff_t	end_fsb, map_start_fsb;
 	xfs_filblks_t	count_fsb;
@@ -768,7 +769,7 @@ xfs_iomap_write_allocate(
 				goto error0;
 
 			if (whichfork == XFS_COW_FORK)
-				*cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
+				*cow_seq = READ_ONCE(ifp->if_seq);
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
 
-- 
cgit v1.2.3


From 73971b172a435079340007bee12b4944cc599a8a Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 7 Aug 2018 10:57:13 -0700
Subject: xfs: remove dead error handling code in xfs_dquot_disk_alloc()

Colin Ian King reports that commit 82ff27bc52 ("xfs: automatic dfops
buffer relogging") leaves around some dead error handling code in
xfs_dquot_disk_alloc(). This was discovered via Coverity scan.

Since the associated commit eliminates the act of joining a buffer
to a dfops, this intermediate error state is no longer possible and
the error handling code can be removed. Since the caller cancels the
transaction on error, which cancels the dfops, eliminate the
unnecessary xfs_defer_cancel() call and error handling labels.

Fixes: 82ff27bc52 ("xfs: automatic dfops buffer relogging")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_dquot.c | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 70a76ac41f01..87e6dd5326d5 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -311,7 +311,7 @@ xfs_dquot_disk_alloc(
 			XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
 			XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps);
 	if (error)
-		goto error0;
+		return error;
 	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
 	ASSERT(nmaps == 1);
 	ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -325,10 +325,8 @@ xfs_dquot_disk_alloc(
 	/* now we can just get the buffer (there's nothing to read yet) */
 	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
 			mp->m_quotainfo->qi_dqchunklen, 0);
-	if (!bp) {
-		error = -ENOMEM;
-		goto error1;
-	}
+	if (!bp)
+		return -ENOMEM;
 	bp->b_ops = &xfs_dquot_buf_ops;
 
 	/*
@@ -349,10 +347,8 @@ xfs_dquot_disk_alloc(
 	 * the buffer locked across the _defer_finish call.  We can now do
 	 * this correctly with xfs_defer_bjoin.
 	 *
-	 * Above, we allocated a disk block for the dquot information and
-	 * used get_buf to initialize the dquot.  If the _defer_bjoin fails,
-	 * the buffer is still locked to *tpp, so we must _bhold_release and
-	 * then _trans_brelse the buffer.  If the _defer_finish fails, the old
+	 * Above, we allocated a disk block for the dquot information and used
+	 * get_buf to initialize the dquot. If the _defer_finish fails, the old
 	 * transaction is gone but the new buffer is not joined or held to any
 	 * transaction, so we must _buf_relse it.
 	 *
@@ -362,24 +358,14 @@ xfs_dquot_disk_alloc(
 	 * manually or by committing the transaction.
 	 */
 	xfs_trans_bhold(tp, bp);
-	if (error) {
-		xfs_trans_bhold_release(tp, bp);
-		xfs_trans_brelse(tp, bp);
-		goto error1;
-	}
 	error = xfs_defer_finish(tpp);
 	tp = *tpp;
 	if (error) {
 		xfs_buf_relse(bp);
-		goto error0;
+		return error;
 	}
 	*bpp = bp;
 	return 0;
-
-error1:
-	xfs_defer_cancel(tp);
-error0:
-	return error;
 }
 
 /*
-- 
cgit v1.2.3


From ff361fda55fda55031252aca1f8afef735fe057c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 19 Jun 2018 15:25:30 +0300
Subject: cifs: Silence uninitialized variable warning

This is not really a runtime issue but Smatch complains that:

    fs/cifs/smb2ops.c:1740 smb2_query_symlink()
    error: uninitialized symbol 'resp_buftype'.

The warning is right that it can be uninitialized...  Also "err_buf"
would be NULL at this point and we're not supposed to pass NULLs to
free_rsp_buf() or it might trigger some extra output if we turn on
debugging.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ea92a38b2f08..ae64cbef5e07 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1700,7 +1700,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 		       &resp_buftype);
 	if (!rc || !err_iov.iov_base) {
 		rc = -ENOENT;
-		goto querty_exit;
+		goto free_path;
 	}
 
 	err_buf = err_iov.iov_base;
@@ -1741,6 +1741,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 
  querty_exit:
 	free_rsp_buf(resp_buftype, err_buf);
+ free_path:
 	kfree(utf16_path);
 	return rc;
 }
-- 
cgit v1.2.3


From 95390201e7d8dd1eb764a3cbd50ae538a17fcd02 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 19 Jun 2018 17:27:58 +0200
Subject: cifs: use timespec64 internally

In cifs, the timestamps are stored in memory in the cifs_fattr structure,
which uses the deprecated 'timespec' structure. Now that the VFS code
has moved on to 'timespec64', the next step is to change over the fattr
as well.

This also makes 32-bit and 64-bit systems behave the same way, and
no longer overflow the 32-bit time_t in year 2038.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsencrypt.c |  4 ++--
 fs/cifs/cifsglob.h    |  6 +++---
 fs/cifs/cifsproto.h   |  6 +++---
 fs/cifs/cifssmb.c     | 12 ++++++------
 fs/cifs/inode.c       | 34 ++++++++++++++++------------------
 fs/cifs/netmisc.c     | 19 ++++++++++---------
 6 files changed, 40 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index ee2a8ec70056..b4672eafc5bf 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -452,7 +452,7 @@ find_timestamp(struct cifs_ses *ses)
 	unsigned char *blobptr;
 	unsigned char *blobend;
 	struct ntlmssp2_name *attrptr;
-	struct timespec ts;
+	struct timespec64 ts;
 
 	if (!ses->auth_key.len || !ses->auth_key.response)
 		return 0;
@@ -477,7 +477,7 @@ find_timestamp(struct cifs_ses *ses)
 		blobptr += attrsize; /* advance attr value */
 	}
 
-	ktime_get_real_ts(&ts);
+	ktime_get_real_ts64(&ts);
 	return cpu_to_le64(cifs_UnixTimeToNT(ts));
 }
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c923c7854027..b57d1e22ecb5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1544,9 +1544,9 @@ struct cifs_fattr {
 	dev_t		cf_rdev;
 	unsigned int	cf_nlink;
 	unsigned int	cf_dtype;
-	struct timespec	cf_atime;
-	struct timespec	cf_mtime;
-	struct timespec	cf_ctime;
+	struct timespec64 cf_atime;
+	struct timespec64 cf_mtime;
+	struct timespec64 cf_ctime;
 };
 
 static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1890f534c88b..7ead1a9ac6fb 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -143,9 +143,9 @@ extern enum securityEnum select_sectype(struct TCP_Server_Info *server,
 				enum securityEnum requested);
 extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
 			  const struct nls_table *nls_cp);
-extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
-extern u64 cifs_UnixTimeToNT(struct timespec);
-extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
+extern struct timespec64 cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
+extern u64 cifs_UnixTimeToNT(struct timespec64);
+extern struct timespec64 cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
 				      int offset);
 extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 extern int cifs_get_writer(struct cifsInodeInfo *cinode);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 93408eab92e7..dc2f4cf08fe9 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -508,13 +508,13 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
 		 * this requirement.
 		 */
 		int val, seconds, remain, result;
-		struct timespec ts;
-		unsigned long utc = ktime_get_real_seconds();
+		struct timespec64 ts;
+		time64_t utc = ktime_get_real_seconds();
 		ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
 				    rsp->SrvTime.Time, 0);
-		cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n",
-			 (int)ts.tv_sec, (int)utc,
-			 (int)(utc - ts.tv_sec));
+		cifs_dbg(FYI, "SrvTime %lld sec since 1970 (utc: %lld) diff: %lld\n",
+			 ts.tv_sec, utc,
+			 utc - ts.tv_sec);
 		val = (int)(utc - ts.tv_sec);
 		seconds = abs(val);
 		result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
@@ -4082,7 +4082,7 @@ QInfRetry:
 	if (rc) {
 		cifs_dbg(FYI, "Send error in QueryInfo = %d\n", rc);
 	} else if (data) {
-		struct timespec ts;
+		struct timespec64 ts;
 		__u32 time = le32_to_cpu(pSMBr->last_write_time);
 
 		/* decode response */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a2cfb33e85c1..f6abf18ca492 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -95,7 +95,6 @@ static void
 cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
 {
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
-	struct timespec ts;
 
 	cifs_dbg(FYI, "%s: revalidating inode %llu\n",
 		 __func__, cifs_i->uniqueid);
@@ -114,8 +113,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
 	}
 
 	 /* revalidate if mtime or size have changed */
-	ts = timespec64_to_timespec(inode->i_mtime);
-	if (timespec_equal(&ts, &fattr->cf_mtime) &&
+	if (timespec64_equal(&inode->i_mtime, &fattr->cf_mtime) &&
 	    cifs_i->server_eof == fattr->cf_eof) {
 		cifs_dbg(FYI, "%s: inode %llu is unchanged\n",
 			 __func__, cifs_i->uniqueid);
@@ -164,9 +162,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 	cifs_revalidate_cache(inode, fattr);
 
 	spin_lock(&inode->i_lock);
-	inode->i_atime = timespec_to_timespec64(fattr->cf_atime);
-	inode->i_mtime = timespec_to_timespec64(fattr->cf_mtime);
-	inode->i_ctime = timespec_to_timespec64(fattr->cf_ctime);
+	inode->i_atime = fattr->cf_atime;
+	inode->i_mtime = fattr->cf_mtime;
+	inode->i_ctime = fattr->cf_ctime;
 	inode->i_rdev = fattr->cf_rdev;
 	cifs_nlink_fattr_to_inode(inode, fattr);
 	inode->i_uid = fattr->cf_uid;
@@ -327,8 +325,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 	fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
 	fattr->cf_uid = cifs_sb->mnt_uid;
 	fattr->cf_gid = cifs_sb->mnt_gid;
-	ktime_get_real_ts(&fattr->cf_mtime);
-	fattr->cf_mtime = timespec_trunc(fattr->cf_mtime, sb->s_time_gran);
+	ktime_get_real_ts64(&fattr->cf_mtime);
+	fattr->cf_mtime = timespec64_trunc(fattr->cf_mtime, sb->s_time_gran);
 	fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime;
 	fattr->cf_nlink = 2;
 	fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
@@ -604,8 +602,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 	if (info->LastAccessTime)
 		fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
 	else {
-		ktime_get_real_ts(&fattr->cf_atime);
-		fattr->cf_atime = timespec_trunc(fattr->cf_atime, sb->s_time_gran);
+		ktime_get_real_ts64(&fattr->cf_atime);
+		fattr->cf_atime = timespec64_trunc(fattr->cf_atime, sb->s_time_gran);
 	}
 
 	fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
@@ -1125,14 +1123,14 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
 	if (attrs->ia_valid & ATTR_ATIME) {
 		set_time = true;
 		info_buf.LastAccessTime =
-			cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime)));
+			cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
 	} else
 		info_buf.LastAccessTime = 0;
 
 	if (attrs->ia_valid & ATTR_MTIME) {
 		set_time = true;
 		info_buf.LastWriteTime =
-		    cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime)));
+		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
 	} else
 		info_buf.LastWriteTime = 0;
 
@@ -1145,7 +1143,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
 	if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
 		cifs_dbg(FYI, "CIFS - CTIME changed\n");
 		info_buf.ChangeTime =
-		    cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime)));
+		    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
 	} else
 		info_buf.ChangeTime = 0;
 
@@ -2071,8 +2069,8 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
 	/* old CIFS Unix Extensions doesn't return create time */
 	if (CIFS_I(inode)->createtime) {
 		stat->result_mask |= STATX_BTIME;
-		stat->btime = timespec_to_timespec64(
-		      cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime)));
+		stat->btime =
+		      cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
 	}
 
 	stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
@@ -2278,17 +2276,17 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 		args->gid = INVALID_GID; /* no change */
 
 	if (attrs->ia_valid & ATTR_ATIME)
-		args->atime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime));
+		args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
 	else
 		args->atime = NO_CHANGE_64;
 
 	if (attrs->ia_valid & ATTR_MTIME)
-		args->mtime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime));
+		args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
 	else
 		args->mtime = NO_CHANGE_64;
 
 	if (attrs->ia_valid & ATTR_CTIME)
-		args->ctime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime));
+		args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
 	else
 		args->ctime = NO_CHANGE_64;
 
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index d7ad0dfe4e68..fdd908e4a26b 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -918,10 +918,10 @@ smbCalcSize(void *buf, struct TCP_Server_Info *server)
  * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
  * into Unix UTC (based 1970-01-01, in seconds).
  */
-struct timespec
+struct timespec64
 cifs_NTtimeToUnix(__le64 ntutc)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 	/* BB what about the timezone? BB */
 
 	/* Subtract the NTFS time offset, then convert to 1s intervals. */
@@ -935,12 +935,12 @@ cifs_NTtimeToUnix(__le64 ntutc)
 	 */
 	if (t < 0) {
 		abs_t = -t;
-		ts.tv_nsec = (long)(do_div(abs_t, 10000000) * 100);
+		ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100);
 		ts.tv_nsec = -ts.tv_nsec;
 		ts.tv_sec = -abs_t;
 	} else {
 		abs_t = t;
-		ts.tv_nsec = (long)do_div(abs_t, 10000000) * 100;
+		ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100;
 		ts.tv_sec = abs_t;
 	}
 
@@ -949,7 +949,7 @@ cifs_NTtimeToUnix(__le64 ntutc)
 
 /* Convert the Unix UTC into NT UTC. */
 u64
-cifs_UnixTimeToNT(struct timespec t)
+cifs_UnixTimeToNT(struct timespec64 t)
 {
 	/* Convert to 100ns intervals and then add the NTFS time offset. */
 	return (u64) t.tv_sec * 10000000 + t.tv_nsec/100 + NTFS_TIME_OFFSET;
@@ -959,10 +959,11 @@ static const int total_days_of_prev_months[] = {
 	0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
 };
 
-struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
+struct timespec64 cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
 {
-	struct timespec ts;
-	int sec, min, days, month, year;
+	struct timespec64 ts;
+	time64_t sec;
+	int min, days, month, year;
 	u16 date = le16_to_cpu(le_date);
 	u16 time = le16_to_cpu(le_time);
 	SMB_TIME *st = (SMB_TIME *)&time;
@@ -973,7 +974,7 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
 	sec = 2 * st->TwoSeconds;
 	min = st->Minutes;
 	if ((sec > 59) || (min > 59))
-		cifs_dbg(VFS, "illegal time min %d sec %d\n", min, sec);
+		cifs_dbg(VFS, "illegal time min %d sec %lld\n", min, sec);
 	sec += (min * 60);
 	sec += 60 * 60 * st->Hours;
 	if (st->Hours > 24)
-- 
cgit v1.2.3


From cbedeadf9c44a1a135293717d501882f2933a534 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 19 Jun 2018 17:27:59 +0200
Subject: cifs: use 64-bit timestamps for fscache

In the fscache, we just need the timestamps as cookies to check for
changes, so we don't really care about the overflow, but it's better
to stop using the deprecated timespec so we don't have to go through
explicit conversion functions.

To avoid comparing uninitialized padding values that are copied
while assigning the timespec values, this rearranges the members of
cifs_fscache_inode_auxdata to avoid padding, and assigns them
individually.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cache.c   |  6 ++++--
 fs/cifs/fscache.c | 12 ++++++++----
 fs/cifs/fscache.h |  8 +++++---
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index e1553d1e0e50..b7420e605b28 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -128,8 +128,10 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
 
 	memset(&auxdata, 0, sizeof(auxdata));
 	auxdata.eof = cifsi->server_eof;
-	auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime);
-	auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime);
+	auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+	auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+	auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+	auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
 
 	if (memcmp(data, &auxdata, datalen) != 0)
 		return FSCACHE_CHECKAUX_OBSOLETE;
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 85145a763021..ea6ace9c2417 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -129,8 +129,10 @@ static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi,
 
 	memset(&auxdata, 0, sizeof(auxdata));
 	auxdata.eof = cifsi->server_eof;
-	auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime);
-	auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime);
+	auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+	auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+	auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+	auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
 
 	cifsi->fscache =
 		fscache_acquire_cookie(tcon->fscache,
@@ -166,8 +168,10 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
 	if (cifsi->fscache) {
 		memset(&auxdata, 0, sizeof(auxdata));
 		auxdata.eof = cifsi->server_eof;
-		auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime);
-		auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime);
+		auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+		auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+		auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+		auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
 
 		cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
 		fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index c7e3ac251e16..8c0862e41306 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -31,9 +31,11 @@
  * Auxiliary data attached to CIFS inode within the cache
  */
 struct cifs_fscache_inode_auxdata {
-	struct timespec	last_write_time;
-	struct timespec	last_change_time;
-	u64		eof;
+	u64 last_write_time_sec;
+	u64 last_change_time_sec;
+	u32 last_write_time_nsec;
+	u32 last_change_time_nsec;
+	u64 eof;
 };
 
 /*
-- 
cgit v1.2.3


From 8505c8bfd85a260c9dc5c47e15bd8c5357fcbcd2 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 18 Jun 2018 14:01:59 -0500
Subject: smb3: if server does not support posix do not allow posix mount
 option

If user specifies "posix" on an SMB3.11 mount, then fail the mount
if server does not return the POSIX negotiate context indicating
support for posix.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/connect.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5df2c0698cda..9d02563b2147 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3031,11 +3031,15 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 	}
 
 #ifdef CONFIG_CIFS_SMB311
-	if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) {
-		if (ses->server->vals->protocol_id == SMB311_PROT_ID) {
+	if (volume_info->linux_ext) {
+		if (ses->server->posix_ext_supported) {
 			tcon->posix_extensions = true;
 			printk_once(KERN_WARNING
 				"SMB3.11 POSIX Extensions are experimental\n");
+		} else {
+			cifs_dbg(VFS, "Server does not support mounting with posix SMB3.11 extensions.\n");
+			rc = -EOPNOTSUPP;
+			goto out_fail;
 		}
 	}
 #endif /* 311 */
-- 
cgit v1.2.3


From 7420451f6a109f7f8f1bf283f34d08eba3259fb3 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 19 Jun 2018 14:34:08 -0500
Subject: cifs: allow disabling insecure dialects in the config

allow disabling cifs (SMB1 ie vers=1.0) and vers=2.0 in the
config for the build of cifs.ko if want to always prevent mounting
with these less secure dialects.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Jeremy Allison <jra@samba.org>
---
 fs/cifs/Kconfig   | 17 ++++++++++++++++-
 fs/cifs/connect.c |  9 +++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 5f132d59dfc2..832eafbf803f 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -66,9 +66,24 @@ config CIFS_STATS2
 	  Unless you are a developer or are doing network performance analysis
 	  or tuning, say N.
 
+config CIFS_ALLOW_INSECURE_LEGACY
+	bool "Support legacy servers which use less secure dialects"
+	depends on CIFS
+	default y
+	help
+	  Modern dialects, SMB2.1 and later (including SMB3 and 3.1.1), have
+	  additional security features, including protection against
+	  man-in-the-middle attacks and stronger crypto hashes, so the use
+	  of legacy dialects (SMB1/CIFS and SMB2.0) is discouraged.
+
+	  Disabling this option prevents users from using vers=1.0 or vers=2.0
+	  on mounts with cifs.ko
+
+	  If unsure, say Y.
+
 config CIFS_WEAK_PW_HASH
 	bool "Support legacy servers which use weaker LANMAN security"
-	depends on CIFS
+	depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY
 	help
 	  Modern CIFS servers including Samba and most Windows versions
 	  (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9d02563b2147..842f45859968 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1174,6 +1174,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
 	substring_t args[MAX_OPT_ARGS];
 
 	switch (match_token(value, cifs_smb_version_tokens, args)) {
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 	case Smb_1:
 		if (disable_legacy_dialects) {
 			cifs_dbg(VFS, "mount with legacy dialect disabled\n");
@@ -1198,6 +1199,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
 		vol->ops = &smb20_operations;
 		vol->vals = &smb20_values;
 		break;
+#else
+	case Smb_1:
+		cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
+		return 1;
+	case Smb_20:
+		cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
+		return 1;
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
 	case Smb_21:
 		vol->ops = &smb21_operations;
 		vol->vals = &smb21_values;
-- 
cgit v1.2.3


From 256b4c3f03d77d8c0dc69e3a6ceb3afd0d1810bd Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Tue, 19 Jun 2018 15:18:48 -0700
Subject: CIFS: fix memory leak and remove dead code

also fixes error code in smb311_posix_mkdir() (where
the error assignment needs to go before the goto)
a typo that Dan Carpenter and Paulo and Gustavo
pointed out.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 101 +++++++++++++++++++++++++++---------------------------
 1 file changed, 50 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3c92678cb45b..641fe79708d0 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1928,7 +1928,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
 {
 	struct smb_rqst rqst;
 	struct smb2_create_req *req;
-	struct smb2_create_rsp *rsp;
+	struct smb2_create_rsp *rsp = NULL;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	struct kvec iov[3]; /* make sure at least one for each open context */
@@ -1943,27 +1943,31 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
 	char *pc_buf = NULL;
 	int flags = 0;
 	unsigned int total_len;
-	__le16 *path = cifs_convert_path_to_utf16(full_path, cifs_sb);
-
-	if (!path)
-		return -ENOMEM;
+	__le16 *utf16_path = NULL;
 
 	cifs_dbg(FYI, "mkdir\n");
 
+	/* resource #1: path allocation */
+	utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+	if (!utf16_path)
+		return -ENOMEM;
+
 	if (ses && (ses->server))
 		server = ses->server;
-	else
-		return -EIO;
+	else {
+		rc = -EIO;
+		goto err_free_path;
+	}
 
+	/* resource #2: request */
 	rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len);
-
 	if (rc)
-		return rc;
+		goto err_free_path;
+
 
 	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
-
 	req->ImpersonationLevel = IL_IMPERSONATION;
 	req->DesiredAccess = cpu_to_le32(FILE_WRITE_ATTRIBUTES);
 	/* File attributes ignored on open (used in create though) */
@@ -1992,50 +1996,44 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
 		req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
 		rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
 						 &name_len,
-						 tcon->treeName, path);
-		if (rc) {
-			cifs_small_buf_release(req);
-			return rc;
-		}
+						 tcon->treeName, utf16_path);
+		if (rc)
+			goto err_free_req;
+
 		req->NameLength = cpu_to_le16(name_len * 2);
 		uni_path_len = copy_size;
-		path = copy_path;
+		/* free before overwriting resource */
+		kfree(utf16_path);
+		utf16_path = copy_path;
 	} else {
-		uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
+		uni_path_len = (2 * UniStrnlen((wchar_t *)utf16_path, PATH_MAX)) + 2;
 		/* MUST set path len (NameLength) to 0 opening root of share */
 		req->NameLength = cpu_to_le16(uni_path_len - 2);
 		if (uni_path_len % 8 != 0) {
 			copy_size = roundup(uni_path_len, 8);
 			copy_path = kzalloc(copy_size, GFP_KERNEL);
 			if (!copy_path) {
-				cifs_small_buf_release(req);
-				return -ENOMEM;
+				rc = -ENOMEM;
+				goto err_free_req;
 			}
-			memcpy((char *)copy_path, (const char *)path,
+			memcpy((char *)copy_path, (const char *)utf16_path,
 			       uni_path_len);
 			uni_path_len = copy_size;
-			path = copy_path;
+			/* free before overwriting resource */
+			kfree(utf16_path);
+			utf16_path = copy_path;
 		}
 	}
 
 	iov[1].iov_len = uni_path_len;
-	iov[1].iov_base = path;
+	iov[1].iov_base = utf16_path;
 	req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE;
 
 	if (tcon->posix_extensions) {
-		if (n_iov > 2) {
-			struct create_context *ccontext =
-			    (struct create_context *)iov[n_iov-1].iov_base;
-			ccontext->Next =
-				cpu_to_le32(iov[n_iov-1].iov_len);
-		}
-
+		/* resource #3: posix buf */
 		rc = add_posix_context(iov, &n_iov, mode);
-		if (rc) {
-			cifs_small_buf_release(req);
-			kfree(copy_path);
-			return rc;
-		}
+		if (rc)
+			goto err_free_req;
 		pc_buf = iov[n_iov-1].iov_base;
 	}
 
@@ -2044,32 +2042,33 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
 	rqst.rq_iov = iov;
 	rqst.rq_nvec = n_iov;
 
-	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
-			    &rsp_iov);
-
-	cifs_small_buf_release(req);
-	rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
-
-	if (rc != 0) {
+	/* resource #4: response buffer */
+	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
+	if (rc) {
 		cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
 		trace_smb3_posix_mkdir_err(xid, tcon->tid, ses->Suid,
-				    CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES, rc);
-		goto smb311_mkdir_exit;
-	} else
-		trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid,
-				     ses->Suid, CREATE_NOT_FILE,
-				     FILE_WRITE_ATTRIBUTES);
+					   CREATE_NOT_FILE,
+					   FILE_WRITE_ATTRIBUTES, rc);
+		goto err_free_rsp_buf;
+	}
+
+	rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
+	trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid,
+				    ses->Suid, CREATE_NOT_FILE,
+				    FILE_WRITE_ATTRIBUTES);
 
 	SMB2_close(xid, tcon, rsp->PersistentFileId, rsp->VolatileFileId);
 
 	/* Eventually save off posix specific response info and timestaps */
 
-smb311_mkdir_exit:
-	kfree(copy_path);
-	kfree(pc_buf);
+err_free_rsp_buf:
 	free_rsp_buf(resp_buftype, rsp);
+	kfree(pc_buf);
+err_free_req:
+	cifs_small_buf_release(req);
+err_free_path:
+	kfree(utf16_path);
 	return rc;
-
 }
 #endif /* SMB311 */
 
-- 
cgit v1.2.3


From a12d0c590cc7ae01892f06c5ad6d19580ecdd0de Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <paulo@paulo.ac>
Date: Sat, 23 Jun 2018 14:52:25 -0300
Subject: cifs: Make sure all data pages are signed correctly

Check if every data page is signed correctly in sigining helper.

Signed-off-by: Paulo Alcantara <palcantara@suse.de>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsencrypt.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index b4672eafc5bf..85b31cfa2f3c 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -83,7 +83,13 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
 
 		kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;
 
-		crypto_shash_update(shash, kaddr, len);
+		rc = crypto_shash_update(shash, kaddr, len);
+		if (rc) {
+			cifs_dbg(VFS, "%s: Could not update with payload\n",
+				 __func__);
+			kunmap(rqst->rq_pages[i]);
+			return rc;
+		}
 
 		kunmap(rqst->rq_pages[i]);
 	}
-- 
cgit v1.2.3


From 21ba3845b59c733a79ed4fe1c4f3732e7ece9df7 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 24 Jun 2018 23:18:52 -0500
Subject: smb3: fill in statfs fsid and correct namelen

Fil in the correct namelen (typically 255 not 4096) in the
statfs response and also fill in a reasonably unique fsid
(in this case taken from the volume id, and the creation time
of the volume).

In the case of the POSIX statfs all fields are now filled in,
and in the case of non-POSIX mounts, all fields are filled
in which can be.

Signed-off-by: Steve French <stfrench@gmail.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/cifsfs.c  | 18 ++++++++++--------
 fs/cifs/smb2ops.c |  2 ++
 fs/cifs/smb2pdu.c |  8 ++++++++
 fs/cifs/smb2pdu.h | 11 +++++++++++
 4 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d5aa7ae917bf..69ec5427769c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -209,14 +209,16 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	xid = get_xid();
 
-	/*
-	 * PATH_MAX may be too long - it would presumably be total path,
-	 * but note that some servers (includinng Samba 3) have a shorter
-	 * maximum path.
-	 *
-	 * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO.
-	 */
-	buf->f_namelen = PATH_MAX;
+	if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) > 0)
+		buf->f_namelen =
+		       le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength);
+	else
+		buf->f_namelen = PATH_MAX;
+
+	buf->f_fsid.val[0] = tcon->vol_serial_number;
+	/* are using part of create time for more randomness, see man statfs */
+	buf->f_fsid.val[1] =  (int)le64_to_cpu(tcon->vol_create_time);
+
 	buf->f_files = 0;	/* undefined */
 	buf->f_ffree = 0;	/* unlimited */
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ae64cbef5e07..09506d918ecb 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -548,6 +548,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
 			FS_ATTRIBUTE_INFORMATION);
 	SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
 			FS_DEVICE_INFORMATION);
+	SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+			FS_VOLUME_INFORMATION);
 	SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
 			FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
 	if (no_cached_open)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 641fe79708d0..6852ff5f06be 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -4045,6 +4045,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
 	} else if (level == FS_SECTOR_SIZE_INFORMATION) {
 		max_len = sizeof(struct smb3_fs_ss_info);
 		min_len = sizeof(struct smb3_fs_ss_info);
+	} else if (level == FS_VOLUME_INFORMATION) {
+		max_len = sizeof(struct smb3_fs_vol_info) + MAX_VOL_LABEL_LEN;
+		min_len = sizeof(struct smb3_fs_vol_info);
 	} else {
 		cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level);
 		return -EINVAL;
@@ -4089,6 +4092,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
 		tcon->ss_flags = le32_to_cpu(ss_info->Flags);
 		tcon->perf_sector_size =
 			le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
+	} else if (level == FS_VOLUME_INFORMATION) {
+		struct smb3_fs_vol_info *vol_info = (struct smb3_fs_vol_info *)
+			(offset + (char *)rsp);
+		tcon->vol_serial_number = vol_info->VolumeSerialNumber;
+		tcon->vol_create_time = vol_info->VolumeCreationTime;
 	}
 
 qfsattr_exit:
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index a671adcc44a6..c2a4526512b5 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -1248,6 +1248,17 @@ struct smb3_fs_ss_info {
 	__le32 ByteOffsetForPartitionAlignment;
 } __packed;
 
+/* volume info struct - see MS-FSCC 2.5.9 */
+#define MAX_VOL_LABEL_LEN	32
+struct smb3_fs_vol_info {
+	__le64	VolumeCreationTime;
+	__u32	VolumeSerialNumber;
+	__le32	VolumeLabelLength; /* includes trailing null */
+	__u8	SupportsObjects; /* True if eg like NTFS, supports objects */
+	__u8	Reserved;
+	__u8	VolumeLabel[0]; /* variable len */
+} __packed;
+
 /* partial list of QUERY INFO levels */
 #define FILE_DIRECTORY_INFORMATION	1
 #define FILE_FULL_DIRECTORY_INFORMATION 2
-- 
cgit v1.2.3


From 2d304217832ea720337e7a6aa012aa828a77f9d4 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 24 Jun 2018 23:28:12 -0500
Subject: smb3: add support for statfs for smb3.1.1 posix extensions

Output now matches expected stat -f output for all fields
except for Namelen and ID which were addressed in a companion
patch (which retrieves them from existing SMB3 mechanisms
and works whether POSIX enabled or not)

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/smb2ops.c   | 35 +++++++++++++++++++++++++-
 fs/cifs/smb2pdu.c   | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   |  1 +
 fs/cifs/smb2proto.h |  3 +++
 4 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 09506d918ecb..e2a8b9d90ad8 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1533,6 +1533,39 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
+#ifdef CONFIG_CIFS_SMB311
+static int
+smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
+	     struct kstatfs *buf)
+{
+	int rc;
+	__le16 srch_path = 0; /* Null - open root of share */
+	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+	struct cifs_open_parms oparms;
+	struct cifs_fid fid;
+
+	if (!tcon->posix_extensions)
+		return smb2_queryfs(xid, tcon, buf);
+
+	oparms.tcon = tcon;
+	oparms.desired_access = FILE_READ_ATTRIBUTES;
+	oparms.disposition = FILE_OPEN;
+	oparms.create_options = 0;
+	oparms.fid = &fid;
+	oparms.reconnect = false;
+
+	rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL);
+	if (rc)
+		return rc;
+
+	rc = SMB311_posix_qfs_info(xid, tcon, fid.persistent_fid,
+				   fid.volatile_fid, buf);
+	buf->f_type = SMB2_MAGIC_NUMBER;
+	SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+	return rc;
+}
+#endif /* SMB311 */
+
 static bool
 smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
 {
@@ -3338,7 +3371,7 @@ struct smb_version_operations smb311_operations = {
 	.is_status_pending = smb2_is_status_pending,
 	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
-	.queryfs = smb2_queryfs,
+	.queryfs = smb311_queryfs,
 	.mand_lock = smb2_mand_lock,
 	.mand_unlock_range = smb2_unlock_range,
 	.push_mand_locks = smb2_push_mandatory_locks,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 6852ff5f06be..fa9fc3fab60e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3938,6 +3938,27 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
 	return;
 }
 
+#ifdef CONFIG_CIFS_SMB311
+static void
+copy_posix_fs_info_to_kstatfs(FILE_SYSTEM_POSIX_INFO *response_data,
+			struct kstatfs *kst)
+{
+	kst->f_bsize = le32_to_cpu(response_data->BlockSize);
+	kst->f_blocks = le64_to_cpu(response_data->TotalBlocks);
+	kst->f_bfree =  le64_to_cpu(response_data->BlocksAvail);
+	if (response_data->UserBlocksAvail == cpu_to_le64(-1))
+		kst->f_bavail = kst->f_bfree;
+	else
+		kst->f_bavail = le64_to_cpu(response_data->UserBlocksAvail);
+	if (response_data->TotalFileNodes != cpu_to_le64(-1))
+		kst->f_files = le64_to_cpu(response_data->TotalFileNodes);
+	if (response_data->FreeFileNodes != cpu_to_le64(-1))
+		kst->f_ffree = le64_to_cpu(response_data->FreeFileNodes);
+
+	return;
+}
+#endif /* SMB311 */
+
 static int
 build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
 		   int outbuf_len, u64 persistent_fid, u64 volatile_fid)
@@ -3974,6 +3995,56 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
 	return 0;
 }
 
+#ifdef CONFIG_CIFS_SMB311
+int
+SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
+	      u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
+{
+	struct smb_rqst rqst;
+	struct smb2_query_info_rsp *rsp = NULL;
+	struct kvec iov;
+	struct kvec rsp_iov;
+	int rc = 0;
+	int resp_buftype;
+	struct cifs_ses *ses = tcon->ses;
+	FILE_SYSTEM_POSIX_INFO *info = NULL;
+	int flags = 0;
+
+	rc = build_qfs_info_req(&iov, tcon, FS_POSIX_INFORMATION,
+				sizeof(FILE_SYSTEM_POSIX_INFO),
+				persistent_fid, volatile_fid);
+	if (rc)
+		return rc;
+
+	if (smb3_encryption_required(tcon))
+		flags |= CIFS_TRANSFORM_REQ;
+
+	memset(&rqst, 0, sizeof(struct smb_rqst));
+	rqst.rq_iov = &iov;
+	rqst.rq_nvec = 1;
+
+	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
+	cifs_small_buf_release(iov.iov_base);
+	if (rc) {
+		cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
+		goto posix_qfsinf_exit;
+	}
+	rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
+
+	info = (FILE_SYSTEM_POSIX_INFO *)(
+		le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+	rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+			  le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
+			  sizeof(FILE_SYSTEM_POSIX_INFO));
+	if (!rc)
+		copy_posix_fs_info_to_kstatfs(info, fsdata);
+
+posix_qfsinf_exit:
+	free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+	return rc;
+}
+#endif /* SMB311 */
+
 int
 SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 	      u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c2a4526512b5..ecb0feeac844 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -1223,6 +1223,7 @@ struct smb2_lease_ack {
 #define FS_DRIVER_PATH_INFORMATION	9 /* Local only */
 #define FS_VOLUME_FLAGS_INFORMATION	10 /* Local only */
 #define FS_SECTOR_SIZE_INFORMATION	11 /* SMB3 or later. Query */
+#define FS_POSIX_INFORMATION		100 /* SMB3.1.1 POSIX. Query */
 
 struct smb2_fs_full_size_info {
 	__le64 TotalAllocationUnits;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 6e6a4f2ec890..7019459c5748 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -197,6 +197,9 @@ void smb2_cancelled_close_fid(struct work_struct *work);
 extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_file_id, u64 volatile_file_id,
 			 struct kstatfs *FSData);
+extern int SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
+			 u64 persistent_file_id, u64 volatile_file_id,
+			 struct kstatfs *FSData);
 extern int SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_file_id, u64 volatile_file_id, int lvl);
 extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
-- 
cgit v1.2.3


From 950132afd59385caf6e2b84e5235d069fa10681d Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 28 Jun 2018 18:46:40 -0500
Subject: cifs: add missing debug entries for kconfig options

/proc/fs/cifs/DebugData displays the features (Kconfig options)
used to build cifs.ko but it was missing some, and needed comma
separator.  These can be useful in debugging certain problems
so we know which optional features were enabled in the user's build.
Also clarify them, by making them more closely match the
corresponding CONFIG_CIFS_* parm.

Old format:
Features: dfs fscache posix spnego xattr acl

New format:
Features: DFS,FSCACHE,SMB_DIRECT,STATS,DEBUG2,ALLOW_INSECURE_LEGACY,CIFS_POSIX,UPCALL(SPNEGO),XATTR,ACL

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/cifs_debug.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index bfe999505815..991bfb271908 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,25 +160,41 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
 	seq_printf(m, "Features:");
 #ifdef CONFIG_CIFS_DFS_UPCALL
-	seq_printf(m, " dfs");
+	seq_printf(m, " DFS");
 #endif
 #ifdef CONFIG_CIFS_FSCACHE
-	seq_printf(m, " fscache");
+	seq_printf(m, ",FSCACHE");
+#endif
+#ifdef CONFIG_CIFS_SMB_DIRECT
+	seq_printf(m, ",SMB_DIRECT");
+#endif
+#ifdef CONFIG_CIFS_STATS2
+	seq_printf(m, ",STATS2");
+#elif defined(CONFIG_CIFS_STATS)
+	seq_printf(m, ",STATS");
+#endif
+#ifdef CONFIG_CIFS_DEBUG2
+	seq_printf(m, ",DEBUG2");
+#elif defined(CONFIG_CIFS_DEBUG)
+	seq_printf(m, ",DEBUG");
+#endif
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+	seq_printf(m, ",ALLOW_INSECURE_LEGACY");
 #endif
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-	seq_printf(m, " lanman");
+	seq_printf(m, ",WEAK_PW_HASH");
 #endif
 #ifdef CONFIG_CIFS_POSIX
-	seq_printf(m, " posix");
+	seq_printf(m, ",CIFS_POSIX");
 #endif
 #ifdef CONFIG_CIFS_UPCALL
-	seq_printf(m, " spnego");
+	seq_printf(m, ",UPCALL(SPNEGO)");
 #endif
 #ifdef CONFIG_CIFS_XATTR
-	seq_printf(m, " xattr");
+	seq_printf(m, ",XATTR");
 #endif
 #ifdef CONFIG_CIFS_ACL
-	seq_printf(m, " acl");
+	seq_printf(m, ",ACL");
 #endif
 	seq_putc(m, '\n');
 	seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
-- 
cgit v1.2.3


From 0fdfef9aa7ee68ddd508aef7c98630cfc054f8d6 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 28 Jun 2018 19:30:23 -0500
Subject: smb3: simplify code by removing CONFIG_CIFS_SMB311

We really, really want to be encouraging use of secure dialects,
and SMB3.1.1 offers useful security features, and will soon
be the recommended dialect for many use cases. Simplify the code
by removing the CONFIG_CIFS_SMB311 ifdef so users don't disable
it in the build, and create compatibility and/or security issues
with modern servers - many of which have been supporting this
dialect for multiple years.

Also clarify some of the Kconfig text for cifs.ko about
SMB3.1.1 and current supported features in the module.

Signed-off-by: Steve French <stfrench@microsoft.com>
Acked-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/Kconfig         | 33 ++++++++++++++-------------------
 fs/cifs/cifs_debug.c    |  3 +--
 fs/cifs/cifsfs.c        |  8 --------
 fs/cifs/cifsglob.h      |  8 --------
 fs/cifs/connect.c       | 11 +----------
 fs/cifs/inode.c         |  2 --
 fs/cifs/smb2misc.c      | 13 +++----------
 fs/cifs/smb2ops.c       |  6 ------
 fs/cifs/smb2pdu.c       | 31 +++----------------------------
 fs/cifs/smb2proto.h     |  2 --
 fs/cifs/smb2transport.c |  4 ----
 fs/cifs/transport.c     |  4 ----
 12 files changed, 22 insertions(+), 103 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 832eafbf803f..63d0d852998a 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -16,24 +16,28 @@ config CIFS
 	select CRYPTO_DES
 	help
 	  This is the client VFS module for the SMB3 family of NAS protocols,
-	  as well as for earlier dialects such as SMB2.1, SMB2 and the
+	  (including support for the most recent, most secure dialect SMB3.1.1)
+	  as well as for earlier dialects such as SMB2.1, SMB2 and the older
 	  Common Internet File System (CIFS) protocol.  CIFS was the successor
 	  to the original dialect, the Server Message Block (SMB) protocol, the
 	  native file sharing mechanism for most early PC operating systems.
 
-	  The SMB3 protocol is supported by most modern operating systems and
-	  NAS appliances (e.g. Samba, Windows 8, Windows 2012, MacOS).
+	  The SMB3 protocol is supported by most modern operating systems
+	  and NAS appliances (e.g. Samba, Windows 10, Windows Server 2016,
+	  MacOS) and even in the cloud (e.g. Microsoft Azure).
 	  The older CIFS protocol was included in Windows NT4, 2000 and XP (and
 	  later) as well by Samba (which provides excellent CIFS and SMB3
-	  server support for Linux and many other operating systems). Limited
-	  support for OS/2 and Windows ME and similar very old servers is
-	  provided as well.
+	  server support for Linux and many other operating systems). Use of
+	  dialects older than SMB2.1 is often discouraged on public networks.
+	  This module also provides limited support for OS/2 and Windows ME
+	  and similar very old servers.
 
-	  The cifs module provides an advanced network file system client
+	  This module provides an advanced network file system client
 	  for mounting to SMB3 (and CIFS) compliant servers.  It includes
 	  support for DFS (hierarchical name space), secure per-user
-	  session establishment via Kerberos or NTLM or NTLMv2,
-	  safe distributed caching (oplock), optional packet
+	  session establishment via Kerberos or NTLM or NTLMv2, RDMA
+	  (smbdirect), advanced security features, per-share encryption,
+	  directory leases, safe distributed caching (oplock), optional packet
 	  signing, Unicode and other internationalization improvements.
 
 	  In general, the default dialects, SMB3 and later, enable better
@@ -43,7 +47,7 @@ config CIFS
 	  than SMB3 mounts. SMB2/SMB3 mount options are also
 	  slightly simpler (compared to CIFS) due to protocol improvements.
 
-	  If you need to mount to Samba, Macs or Windows from this machine, say Y.
+	  If you need to mount to Samba, Azure, Macs or Windows from this machine, say Y.
 
 config CIFS_STATS
         bool "CIFS statistics"
@@ -201,15 +205,6 @@ config CIFS_NFSD_EXPORT
 	  help
 	   Allows NFS server to export a CIFS mounted share (nfsd over cifs)
 
-config CIFS_SMB311
-	bool "SMB3.1.1 network file system support"
-	depends on CIFS
-	select CRYPTO_SHA512
-
-	help
-	  This enables support for the newest, and most secure dialect, SMB3.11.
-	  If unsure, say Y
-
 config CIFS_SMB_DIRECT
 	bool "SMB Direct support (Experimental)"
 	depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 991bfb271908..cb516c950438 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -275,10 +275,9 @@ skip_rdma:
 			server->credits,  server->dialect);
 		if (server->sign)
 			seq_printf(m, " signed");
-#ifdef CONFIG_CIFS_SMB311
 		if (server->posix_ext_supported)
 			seq_printf(m, " posix");
-#endif /* 3.1.1 */
+
 		i++;
 		list_for_each(tmp2, &server->smb_ses_list) {
 			ses = list_entry(tmp2, struct cifs_ses,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 69ec5427769c..c162a416ddbf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -483,20 +483,12 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 		seq_puts(s, ",persistenthandles");
 	else if (tcon->use_resilient)
 		seq_puts(s, ",resilienthandles");
-
-#ifdef CONFIG_CIFS_SMB311
 	if (tcon->posix_extensions)
 		seq_puts(s, ",posix");
 	else if (tcon->unix_ext)
 		seq_puts(s, ",unix");
 	else
 		seq_puts(s, ",nounix");
-#else
-	if (tcon->unix_ext)
-		seq_puts(s, ",unix");
-	else
-		seq_puts(s, ",nounix");
-#endif /* SMB311 */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
 		seq_puts(s, ",posixpaths");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b57d1e22ecb5..3ec7e3063865 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -191,9 +191,7 @@ enum smb_version {
 	Smb_21,
 	Smb_30,
 	Smb_302,
-#ifdef CONFIG_CIFS_SMB311
 	Smb_311,
-#endif /* SMB311 */
 	Smb_3any,
 	Smb_default,
 	Smb_version_err
@@ -687,12 +685,10 @@ struct TCP_Server_Info {
 #endif
 	unsigned int	max_read;
 	unsigned int	max_write;
-#ifdef CONFIG_CIFS_SMB311
 	__le16	cipher_type;
 	 /* save initital negprot hash */
 	__u8	preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
 	bool	posix_ext_supported;
-#endif /* 3.1.1 */
 	struct delayed_work reconnect; /* reconnect workqueue job */
 	struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
 	unsigned long echo_interval;
@@ -886,9 +882,7 @@ struct cifs_ses {
 	__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
 	__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
 	__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
-#ifdef CONFIG_CIFS_SMB311
 	__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
-#endif /* 3.1.1 */
 
 	/*
 	 * Network interfaces available on the server this session is
@@ -997,9 +991,7 @@ struct cifs_tcon {
 	bool seal:1;      /* transport encryption for this mounted share */
 	bool unix_ext:1;  /* if false disable Linux extensions to CIFS protocol
 				for this mount even if server would support */
-#ifdef CONFIG_CIFS_SMB311
 	bool posix_extensions; /* if true SMB3.11 posix extensions enabled */
-#endif /* CIFS_311 */
 	bool local_lease:1; /* check leases (only) on local system not remote */
 	bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
 	bool broken_sparse_sup; /* if server or share does not support sparse */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 842f45859968..67f91a6313a0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -303,10 +303,8 @@ static const match_table_t cifs_smb_version_tokens = {
 	{ Smb_21, SMB21_VERSION_STRING },
 	{ Smb_30, SMB30_VERSION_STRING },
 	{ Smb_302, SMB302_VERSION_STRING },
-#ifdef CONFIG_CIFS_SMB311
 	{ Smb_311, SMB311_VERSION_STRING },
 	{ Smb_311, ALT_SMB311_VERSION_STRING },
-#endif /* SMB311 */
 	{ Smb_3any, SMB3ANY_VERSION_STRING },
 	{ Smb_default, SMBDEFAULT_VERSION_STRING },
 	{ Smb_version_err, NULL }
@@ -1219,12 +1217,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
 		vol->ops = &smb30_operations; /* currently identical with 3.0 */
 		vol->vals = &smb302_values;
 		break;
-#ifdef CONFIG_CIFS_SMB311
 	case Smb_311:
 		vol->ops = &smb311_operations;
 		vol->vals = &smb311_values;
 		break;
-#endif /* SMB311 */
 	case Smb_3any:
 		vol->ops = &smb30_operations; /* currently identical with 3.0 */
 		vol->vals = &smb3any_values;
@@ -3039,7 +3035,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 		}
 	}
 
-#ifdef CONFIG_CIFS_SMB311
 	if (volume_info->linux_ext) {
 		if (ses->server->posix_ext_supported) {
 			tcon->posix_extensions = true;
@@ -3051,7 +3046,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			goto out_fail;
 		}
 	}
-#endif /* 311 */
 
 	/*
 	 * BB Do we need to wrap session_mutex around this TCon call and Unix
@@ -4005,11 +3999,9 @@ try_mount_again:
 		goto remote_path_check;
 	}
 
-#ifdef CONFIG_CIFS_SMB311
 	/* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
 	if (tcon->posix_extensions)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
-#endif /* SMB3.11 */
 
 	/* tell server which Unix caps we support */
 	if (cap_unix(tcon->ses)) {
@@ -4472,11 +4464,10 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
 		goto out;
 	}
 
-#ifdef CONFIG_CIFS_SMB311
 	/* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
 	if (tcon->posix_extensions)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
-#endif /* SMB3.11 */
+
 	if (cap_unix(ses))
 		reset_cifs_unix_caps(0, tcon, NULL, vol_info);
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f6abf18ca492..054e880c1dac 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1575,14 +1575,12 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
 
 	server = tcon->ses->server;
 
-#ifdef CONFIG_CIFS_SMB311
 	if ((server->ops->posix_mkdir) && (tcon->posix_extensions)) {
 		rc = server->ops->posix_mkdir(xid, inode, mode, tcon, full_path,
 					      cifs_sb);
 		d_drop(direntry); /* for time being always refresh inode info */
 		goto mkdir_out;
 	}
-#endif /* SMB311 */
 
 	if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 3ff7cec2da81..303d4592ebe7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -93,7 +93,6 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
 	/* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
 };
 
-#ifdef CONFIG_CIFS_SMB311
 static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
 			      __u32 non_ctxlen)
 {
@@ -127,7 +126,6 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
 	/* length of negcontexts including pad from end of sec blob to them */
 	return (len - nc_offset) + size_of_pad_before_neg_ctxts;
 }
-#endif /* CIFS_SMB311 */
 
 int
 smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
@@ -222,10 +220,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
 
 	clc_len = smb2_calc_size(buf, srvr);
 
-#ifdef CONFIG_CIFS_SMB311
 	if (shdr->Command == SMB2_NEGOTIATE)
 		clc_len += get_neg_ctxt_len(shdr, len, clc_len);
-#endif /* SMB311 */
+
 	if (len != clc_len) {
 		cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
 			 clc_len, len, mid);
@@ -451,15 +448,13 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
 	/* Windows doesn't allow paths beginning with \ */
 	if (from[0] == '\\')
 		start_of_path = from + 1;
-#ifdef CONFIG_CIFS_SMB311
+
 	/* SMB311 POSIX extensions paths do not include leading slash */
 	else if (cifs_sb_master_tlink(cifs_sb) &&
 		 cifs_sb_master_tcon(cifs_sb)->posix_extensions &&
 		 (from[0] == '/')) {
 		start_of_path = from + 1;
-	}
-#endif /* 311 */
-	else
+	} else
 		start_of_path = from;
 
 	to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len,
@@ -759,7 +754,6 @@ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
 	return 0;
 }
 
-#ifdef CONFIG_CIFS_SMB311
 /**
  * smb311_update_preauth_hash - update @ses hash with the packet data in @iov
  *
@@ -821,4 +815,3 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec)
 
 	return 0;
 }
-#endif
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e2a8b9d90ad8..27b69977809d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1533,7 +1533,6 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
-#ifdef CONFIG_CIFS_SMB311
 static int
 smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	     struct kstatfs *buf)
@@ -1564,7 +1563,6 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 	return rc;
 }
-#endif /* SMB311 */
 
 static bool
 smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
@@ -3303,7 +3301,6 @@ struct smb_version_operations smb30_operations = {
 	.next_header = smb2_next_header,
 };
 
-#ifdef CONFIG_CIFS_SMB311
 struct smb_version_operations smb311_operations = {
 	.compare_fids = smb2_compare_fids,
 	.setup_request = smb2_setup_request,
@@ -3404,7 +3401,6 @@ struct smb_version_operations smb311_operations = {
 #endif /* CIFS_XATTR */
 	.next_header = smb2_next_header,
 };
-#endif /* CIFS_SMB311 */
 
 struct smb_version_values smb20_values = {
 	.version_string = SMB20_VERSION_STRING,
@@ -3532,7 +3528,6 @@ struct smb_version_values smb302_values = {
 	.create_lease_size = sizeof(struct create_lease_v2),
 };
 
-#ifdef CONFIG_CIFS_SMB311
 struct smb_version_values smb311_values = {
 	.version_string = SMB311_VERSION_STRING,
 	.protocol_id = SMB311_PROT_ID,
@@ -3553,4 +3548,3 @@ struct smb_version_values smb311_values = {
 	.signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
 	.create_lease_size = sizeof(struct create_lease_v2),
 };
-#endif /* SMB311 */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index fa9fc3fab60e..4ea5528fc15c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -370,7 +370,7 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
 	return rc;
 }
 
-#ifdef CONFIG_CIFS_SMB311
+
 /* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */
 #define OFFSET_OF_NEG_CONTEXT 0x68  /* sizeof(struct smb2_negotiate_req) */
 
@@ -585,13 +585,6 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
 	return 0;
 }
 
-#else
-static void assemble_neg_contexts(struct smb2_negotiate_req *req,
-				  unsigned int *total_len)
-{
-	return;
-}
-#endif /* SMB311 */
 
 /*
  *
@@ -636,10 +629,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		return rc;
 
 	req->sync_hdr.SessionId = 0;
-#ifdef CONFIG_CIFS_SMB311
+
 	memset(server->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE);
 	memset(ses->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE);
-#endif
 
 	if (strcmp(ses->server->vals->version_string,
 		   SMB3ANY_VERSION_STRING) == 0) {
@@ -741,10 +733,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		cifs_dbg(FYI, "negotiated smb3.0 dialect\n");
 	else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID))
 		cifs_dbg(FYI, "negotiated smb3.02 dialect\n");
-#ifdef CONFIG_CIFS_SMB311
 	else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
 		cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n");
-#endif /* SMB311 */
 	else {
 		cifs_dbg(VFS, "Illegal dialect returned by server 0x%x\n",
 			 le16_to_cpu(rsp->DialectRevision));
@@ -753,9 +743,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	}
 	server->dialect = le16_to_cpu(rsp->DialectRevision);
 
-	/* BB: add check that dialect was valid given dialect(s) we asked for */
-
-#ifdef CONFIG_CIFS_SMB311
 	/*
 	 * Keep a copy of the hash after negprot. This hash will be
 	 * the starting hash value for all sessions made from this
@@ -763,7 +750,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	 */
 	memcpy(server->preauth_sha_hash, ses->preauth_sha_hash,
 	       SMB2_PREAUTH_HASH_SIZE);
-#endif
+
 	/* SMB2 only has an extended negflavor */
 	server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
 	/* set it to the maximum buffer size value we can send with 1 credit */
@@ -804,7 +791,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 			rc = -EIO;
 	}
 
-#ifdef CONFIG_CIFS_SMB311
 	if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
 		if (rsp->NegotiateContextCount)
 			rc = smb311_decode_neg_context(rsp, server,
@@ -812,7 +798,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		else
 			cifs_dbg(VFS, "Missing expected negotiate contexts\n");
 	}
-#endif /* CONFIG_CIFS_SMB311 */
 neg_exit:
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
@@ -1373,13 +1358,11 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	sess_data->nls_cp = (struct nls_table *) nls_cp;
 	sess_data->previous_session = ses->Suid;
 
-#ifdef CONFIG_CIFS_SMB311
 	/*
 	 * Initialize the session hash with the server one.
 	 */
 	memcpy(ses->preauth_sha_hash, ses->server->preauth_sha_hash,
 	       SMB2_PREAUTH_HASH_SIZE);
-#endif
 
 	while (sess_data->func)
 		sess_data->func(sess_data);
@@ -1920,7 +1903,6 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
 	return 0;
 }
 
-#ifdef CONFIG_CIFS_SMB311
 int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
 			       umode_t mode, struct cifs_tcon *tcon,
 			       const char *full_path,
@@ -2070,7 +2052,6 @@ err_free_path:
 	kfree(utf16_path);
 	return rc;
 }
-#endif /* SMB311 */
 
 int
 SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
@@ -2210,7 +2191,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		dhc_buf = iov[n_iov-1].iov_base;
 	}
 
-#ifdef CONFIG_CIFS_SMB311
 	if (tcon->posix_extensions) {
 		if (n_iov > 2) {
 			struct create_context *ccontext =
@@ -2229,7 +2209,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		}
 		pc_buf = iov[n_iov-1].iov_base;
 	}
-#endif /* SMB311 */
 
 	memset(&rqst, 0, sizeof(struct smb_rqst));
 	rqst.rq_iov = iov;
@@ -3938,7 +3917,6 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
 	return;
 }
 
-#ifdef CONFIG_CIFS_SMB311
 static void
 copy_posix_fs_info_to_kstatfs(FILE_SYSTEM_POSIX_INFO *response_data,
 			struct kstatfs *kst)
@@ -3957,7 +3935,6 @@ copy_posix_fs_info_to_kstatfs(FILE_SYSTEM_POSIX_INFO *response_data,
 
 	return;
 }
-#endif /* SMB311 */
 
 static int
 build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
@@ -3995,7 +3972,6 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
 	return 0;
 }
 
-#ifdef CONFIG_CIFS_SMB311
 int
 SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
 	      u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
@@ -4043,7 +4019,6 @@ posix_qfsinf_exit:
 	free_rsp_buf(resp_buftype, rsp_iov.iov_base);
 	return rc;
 }
-#endif /* SMB311 */
 
 int
 SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 7019459c5748..98d9b30c16a6 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -216,9 +216,7 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
 
 extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
 					enum securityEnum);
-#ifdef CONFIG_CIFS_SMB311
 extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
 extern int smb311_update_preauth_hash(struct cifs_ses *ses,
 				      struct kvec *iov, int nvec);
-#endif
 #endif			/* _SMB2PROTO_H */
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 719d55e63d88..3f778937c0e2 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -70,7 +70,6 @@ err:
 	return rc;
 }
 
-#ifdef CONFIG_CIFS_SMB311
 int
 smb311_crypto_shash_allocate(struct TCP_Server_Info *server)
 {
@@ -98,7 +97,6 @@ err:
 	cifs_free_hash(&p->hmacsha256, &p->sdeschmacsha256);
 	return rc;
 }
-#endif
 
 static struct cifs_ses *
 smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
@@ -395,7 +393,6 @@ generate_smb30signingkey(struct cifs_ses *ses)
 	return generate_smb3signingkey(ses, &triplet);
 }
 
-#ifdef CONFIG_CIFS_SMB311
 int
 generate_smb311signingkey(struct cifs_ses *ses)
 
@@ -423,7 +420,6 @@ generate_smb311signingkey(struct cifs_ses *ses)
 
 	return generate_smb3signingkey(ses, &triplet);
 }
-#endif /* 311 */
 
 int
 smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a341ec839c83..0f9156af5eb0 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -807,11 +807,9 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 	if (rc < 0)
 		goto out;
 
-#ifdef CONFIG_CIFS_SMB311
 	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
 		smb311_update_preauth_hash(ses, rqst->rq_iov,
 					   rqst->rq_nvec);
-#endif
 
 	if (timeout == CIFS_ASYNC_OP)
 		goto out;
@@ -852,7 +850,6 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 	else
 		*resp_buf_type = CIFS_SMALL_BUFFER;
 
-#ifdef CONFIG_CIFS_SMB311
 	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
 		struct kvec iov = {
 			.iov_base = resp_iov->iov_base,
@@ -860,7 +857,6 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 		};
 		smb311_update_preauth_hash(ses, &iov, 1);
 	}
-#endif
 
 	credits = ses->server->ops->get_credits(midQ);
 
-- 
cgit v1.2.3


From c3ed44026cd07fda5976ecb79225759901a160b4 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 28 Jun 2018 22:53:39 -0500
Subject: smb3: remove noisy warning message on mount

Some servers, like Samba, don't support the fsctl for
query_network_interface_info so don't log a noisy warning
message on mount for this by default unless the error is more serious.
Lower the error to an FYI level so it does not get logged by
default.

Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 27b69977809d..4ce72055ca0a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -444,7 +444,11 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
 			FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
 			NULL /* no data input */, 0 /* no data input */,
 			(char **)&out_buf, &ret_data_len);
-	if (rc != 0) {
+	if (rc == -EOPNOTSUPP) {
+		cifs_dbg(FYI,
+			 "server does not support query network interfaces\n");
+		goto out;
+	} else if (rc != 0) {
 		cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc);
 		goto out;
 	}
-- 
cgit v1.2.3


From 8a69e96e610b3ec8a55f6fd4e44363452838caa7 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 29 Jun 2018 16:06:15 -0500
Subject: smb3: snapshot mounts are read-only and make sure info is displayable
 about the mount

snapshot mounts were not marked as read-only and did not display the snapshot
time (in /proc/mounts) specified on mount

With this patch - note that can not write to the snapshot mount (see "ro" in
/proc/mounts line) and also the missing snapshot timewarp token time is
dumped.  Sample line from /proc/mounts with the patch:

//127.0.0.1/scratch /mnt2 smb3 ro,relatime,vers=default,cache=strict,username=testuser,domain=,uid=0,noforceuid,gid=0,noforcegid,addr=127.0.0.1,file_mode=0755,dir_mode=0755,soft,nounix,serverino,mapposix,noperm,rsize=1048576,wsize=1048576,echo_interval=60,snapshot=1234567,actimeo=1 0 0

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
---
 fs/cifs/cifsfs.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c162a416ddbf..de16078e456b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -139,6 +139,9 @@ cifs_read_super(struct super_block *sb)
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
 		sb->s_flags |= SB_POSIXACL;
 
+	if (tcon->snapshot_time)
+		sb->s_flags |= SB_RDONLY;
+
 	if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 	else
@@ -540,6 +543,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	seq_printf(s, ",wsize=%u", cifs_sb->wsize);
 	seq_printf(s, ",echo_interval=%lu",
 			tcon->ses->server->echo_interval / HZ);
+	if (tcon->snapshot_time)
+		seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
 	/* convert actimeo and display it in seconds */
 	seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
 
-- 
cgit v1.2.3


From 289131e1f1e6ad8c661ec05e176b8f0915672059 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 23 Jul 2018 09:15:18 -0500
Subject: SMB3: Number of requests sent should be displayed for SMB3 not just
 CIFS

For SMB2/SMB3 the number of requests sent was not displayed
in /proc/fs/cifs/Stats unless CONFIG_CIFS_STATS2 was
enabled (only number of failed requests displayed). As
with earlier dialects, we should be displaying these
counters if CONFIG_CIFS_STATS is enabled. They
are important for debugging.

e.g. when you cat /proc/fs/cifs/Stats (before the patch)
Resources in use
CIFS Session: 1
Share (unique mount targets): 2
SMB Request/Response Buffer: 1 Pool size: 5
SMB Small Req/Resp Buffer: 1 Pool size: 30
Operations (MIDs): 0

0 session 0 share reconnects
Total vfs operations: 690 maximum at one time: 2

1) \\localhost\test
SMBs: 975
Negotiates: 0 sent 0 failed
SessionSetups: 0 sent 0 failed
Logoffs: 0 sent 0 failed
TreeConnects: 0 sent 0 failed
TreeDisconnects: 0 sent 0 failed
Creates: 0 sent 2 failed
Closes: 0 sent 0 failed
Flushes: 0 sent 0 failed
Reads: 0 sent 0 failed
Writes: 0 sent 0 failed
Locks: 0 sent 0 failed
IOCTLs: 0 sent 1 failed
Cancels: 0 sent 0 failed
Echos: 0 sent 0 failed
QueryDirectories: 0 sent 63 failed

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2pdu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 4ea5528fc15c..0b4d7ebb812d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -360,7 +360,7 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
 		       total_len);
 
 	if (tcon != NULL) {
-#ifdef CONFIG_CIFS_STATS2
+#ifdef CONFIG_CIFS_STATS
 		uint16_t com_code = le16_to_cpu(smb2_command);
 		cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
 #endif
-- 
cgit v1.2.3


From 1995d28f84b3d2a8f586fcd271207f798b80ccd8 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 27 Jul 2018 15:14:04 -0500
Subject: smb3: remove per-session operations from per-tree connection stats

Remove counters from the per-tree connection /proc/fs/cifs/Stats
output that will always be zero (since they are not per-tcon ops)
ie SMB3 Negotiate, SessionSetup, Logoff, Echo, Cancel.

Also clarify "sent" to be "total" per-Pavel's suggestion
(since this "total" includes total for all operations that we try to
send whether or not succesffully sent). Sample output below:

Resources in use
CIFS Session: 1
Share (unique mount targets): 2
SMB Request/Response Buffer: 1 Pool size: 5
SMB Small Req/Resp Buffer: 1 Pool size: 30
Operations (MIDs): 0

1 session 2 share reconnects
Total vfs operations: 23 maximum at one time: 2

1) \\localhost\test
SMBs: 45
TreeConnects: 2 total 0 failed
TreeDisconnects: 0 total 0 failed
Creates: 13 total 2 failed
Closes: 9 total 0 failed
Flushes: 0 total 0 failed
Reads: 0 total 0 failed
Writes: 1 total 0 failed
Locks: 0 total 0 failed
IOCTLs: 3 total 1 failed
QueryDirectories: 4 total 2 failed
ChangeNotifies: 0 total 0 failed
QueryInfos: 10 total 0 failed
SetInfos: 3 total 0 failed
OplockBreaks: 0 sent 0 failed

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2ops.c | 46 ++++++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 4ce72055ca0a..dad6dc763200 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -927,58 +927,48 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 #ifdef CONFIG_CIFS_STATS
 	atomic_t *sent = tcon->stats.smb2_stats.smb2_com_sent;
 	atomic_t *failed = tcon->stats.smb2_stats.smb2_com_failed;
-	seq_printf(m, "\nNegotiates: %d sent %d failed",
-		   atomic_read(&sent[SMB2_NEGOTIATE_HE]),
-		   atomic_read(&failed[SMB2_NEGOTIATE_HE]));
-	seq_printf(m, "\nSessionSetups: %d sent %d failed",
-		   atomic_read(&sent[SMB2_SESSION_SETUP_HE]),
-		   atomic_read(&failed[SMB2_SESSION_SETUP_HE]));
-	seq_printf(m, "\nLogoffs: %d sent %d failed",
-		   atomic_read(&sent[SMB2_LOGOFF_HE]),
-		   atomic_read(&failed[SMB2_LOGOFF_HE]));
-	seq_printf(m, "\nTreeConnects: %d sent %d failed",
+
+	/*
+	 *  Can't display SMB2_NEGOTIATE, SESSION_SETUP, LOGOFF, CANCEL and ECHO
+	 *  totals (requests sent) since those SMBs are per-session not per tcon
+	 */
+	seq_printf(m, "\nTreeConnects: %d total %d failed",
 		   atomic_read(&sent[SMB2_TREE_CONNECT_HE]),
 		   atomic_read(&failed[SMB2_TREE_CONNECT_HE]));
-	seq_printf(m, "\nTreeDisconnects: %d sent %d failed",
+	seq_printf(m, "\nTreeDisconnects: %d total %d failed",
 		   atomic_read(&sent[SMB2_TREE_DISCONNECT_HE]),
 		   atomic_read(&failed[SMB2_TREE_DISCONNECT_HE]));
-	seq_printf(m, "\nCreates: %d sent %d failed",
+	seq_printf(m, "\nCreates: %d total %d failed",
 		   atomic_read(&sent[SMB2_CREATE_HE]),
 		   atomic_read(&failed[SMB2_CREATE_HE]));
-	seq_printf(m, "\nCloses: %d sent %d failed",
+	seq_printf(m, "\nCloses: %d total %d failed",
 		   atomic_read(&sent[SMB2_CLOSE_HE]),
 		   atomic_read(&failed[SMB2_CLOSE_HE]));
-	seq_printf(m, "\nFlushes: %d sent %d failed",
+	seq_printf(m, "\nFlushes: %d total %d failed",
 		   atomic_read(&sent[SMB2_FLUSH_HE]),
 		   atomic_read(&failed[SMB2_FLUSH_HE]));
-	seq_printf(m, "\nReads: %d sent %d failed",
+	seq_printf(m, "\nReads: %d total %d failed",
 		   atomic_read(&sent[SMB2_READ_HE]),
 		   atomic_read(&failed[SMB2_READ_HE]));
-	seq_printf(m, "\nWrites: %d sent %d failed",
+	seq_printf(m, "\nWrites: %d total %d failed",
 		   atomic_read(&sent[SMB2_WRITE_HE]),
 		   atomic_read(&failed[SMB2_WRITE_HE]));
-	seq_printf(m, "\nLocks: %d sent %d failed",
+	seq_printf(m, "\nLocks: %d total %d failed",
 		   atomic_read(&sent[SMB2_LOCK_HE]),
 		   atomic_read(&failed[SMB2_LOCK_HE]));
-	seq_printf(m, "\nIOCTLs: %d sent %d failed",
+	seq_printf(m, "\nIOCTLs: %d total %d failed",
 		   atomic_read(&sent[SMB2_IOCTL_HE]),
 		   atomic_read(&failed[SMB2_IOCTL_HE]));
-	seq_printf(m, "\nCancels: %d sent %d failed",
-		   atomic_read(&sent[SMB2_CANCEL_HE]),
-		   atomic_read(&failed[SMB2_CANCEL_HE]));
-	seq_printf(m, "\nEchos: %d sent %d failed",
-		   atomic_read(&sent[SMB2_ECHO_HE]),
-		   atomic_read(&failed[SMB2_ECHO_HE]));
-	seq_printf(m, "\nQueryDirectories: %d sent %d failed",
+	seq_printf(m, "\nQueryDirectories: %d total %d failed",
 		   atomic_read(&sent[SMB2_QUERY_DIRECTORY_HE]),
 		   atomic_read(&failed[SMB2_QUERY_DIRECTORY_HE]));
-	seq_printf(m, "\nChangeNotifies: %d sent %d failed",
+	seq_printf(m, "\nChangeNotifies: %d total %d failed",
 		   atomic_read(&sent[SMB2_CHANGE_NOTIFY_HE]),
 		   atomic_read(&failed[SMB2_CHANGE_NOTIFY_HE]));
-	seq_printf(m, "\nQueryInfos: %d sent %d failed",
+	seq_printf(m, "\nQueryInfos: %d total %d failed",
 		   atomic_read(&sent[SMB2_QUERY_INFO_HE]),
 		   atomic_read(&failed[SMB2_QUERY_INFO_HE]));
-	seq_printf(m, "\nSetInfos: %d sent %d failed",
+	seq_printf(m, "\nSetInfos: %d total %d failed",
 		   atomic_read(&sent[SMB2_SET_INFO_HE]),
 		   atomic_read(&failed[SMB2_SET_INFO_HE]));
 	seq_printf(m, "\nOplockBreaks: %d sent %d failed",
-- 
cgit v1.2.3


From 22783155f4bf956c346a81624ec9258930a6fe06 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 27 Jul 2018 22:01:49 -0500
Subject: smb3: don't request leases in symlink creation and query

Fixes problem pointed out by Pavel in discussions about commit
729c0c9dd55204f0c9a823ac8a7bfa83d36c7e78

Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org> # 3.18.x+
---
 fs/cifs/link.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index de41f96aba49..2148b0f60e5e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -396,7 +396,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 	struct cifs_io_parms io_parms;
 	int buf_type = CIFS_NO_BUFFER;
 	__le16 *utf16_path;
-	__u8 oplock = SMB2_OPLOCK_LEVEL_II;
+	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	struct smb2_file_all_info *pfile_info = NULL;
 
 	oparms.tcon = tcon;
@@ -459,7 +459,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 	struct cifs_io_parms io_parms;
 	int create_options = CREATE_NOT_DIR;
 	__le16 *utf16_path;
-	__u8 oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	struct kvec iov[2];
 
 	if (backup_cred(cifs_sb))
-- 
cgit v1.2.3


From 06188fcf9c068a2a8e82ad1e2510a008373150e2 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 29 Jul 2018 17:13:39 -0500
Subject: cifs: remove unused stats

These timers were a good idea but weren't used in current code,
and the idea was cifs specific.  Future patch will add similar timers
for SMB2/SMB3, but no sense using memory for cifs timers that
aren't used in current code.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/cifsglob.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3ec7e3063865..8826bc0a9b15 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -961,20 +961,6 @@ struct cifs_tcon {
 			atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS];
 		} smb2_stats;
 	} stats;
-#ifdef CONFIG_CIFS_STATS2
-	unsigned long long time_writes;
-	unsigned long long time_reads;
-	unsigned long long time_opens;
-	unsigned long long time_deletes;
-	unsigned long long time_closes;
-	unsigned long long time_mkdirs;
-	unsigned long long time_rmdirs;
-	unsigned long long time_renames;
-	unsigned long long time_t2renames;
-	unsigned long long time_ffirst;
-	unsigned long long time_fnext;
-	unsigned long long time_fclose;
-#endif /* CONFIG_CIFS_STATS2 */
 	__u64    bytes_read;
 	__u64    bytes_written;
 	spinlock_t stat_lock;  /* protects the two fields above */
-- 
cgit v1.2.3


From e68a932b0b8e38eaa2111fc3aab31ff66988f1c4 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 30 Jul 2018 14:23:58 -0500
Subject: smb3: add tracepoint for session expired or deleted

In debugging reconnection problems, want to be able to more easily
trace cases in which the server has marked the SMB3 session
expired or deleted (to distinguish from timeout cases).

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2ops.c | 4 ++++
 fs/cifs/trace.h   | 1 +
 2 files changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index dad6dc763200..314556c083f3 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1483,7 +1483,11 @@ smb2_is_session_expired(char *buf)
 	    shdr->Status != STATUS_USER_SESSION_DELETED)
 		return false;
 
+	trace_smb3_ses_expired(shdr->TreeId, shdr->SessionId,
+			       le16_to_cpu(shdr->Command),
+			       le64_to_cpu(shdr->MessageId));
 	cifs_dbg(FYI, "Session expired or deleted\n");
+
 	return true;
 }
 
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 67e413f6ee4d..0fdf2f5c07ba 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -281,6 +281,7 @@ DEFINE_EVENT(smb3_cmd_done_class, smb3_##name,    \
 	TP_ARGS(tid, sesid, cmd, mid))
 
 DEFINE_SMB3_CMD_DONE_EVENT(cmd_done);
+DEFINE_SMB3_CMD_DONE_EVENT(ses_expired);
 
 DECLARE_EVENT_CLASS(smb3_exit_err_class,
 	TP_PROTO(unsigned int xid,
-- 
cgit v1.2.3


From bf1fdeb7899a86adfbe0b521bee5cf78bb870a14 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 30 Jul 2018 19:23:09 -0500
Subject: smb3: add reconnect tracepoints

Add tracepoints for reconnecting an smb3 session

Example output (from trace-cmd) with the patch
(showing the session marked for reconnect, the stat failing, and then
the subsequent SMB3 commands after the server comes back up).
The "smb3_reconnect" event is the new one.

           cifsd-25993 [000] .... 29635.368265: smb3_reconnect: server=localhost current_mid=0x1e
            stat-26200 [001] .... 29638.516403: smb3_enter: 	cifs_revalidate_dentry_attr: xid=22
            stat-26200 [001] .... 29648.723296: smb3_exit_err: 	cifs_revalidate_dentry_attr: xid=22 rc=-112
     kworker/0:1-22830 [000] .... 29653.850947: smb3_cmd_done: 	sid=0x0 tid=0x0 cmd=0 mid=0
     kworker/0:1-22830 [000] .... 29653.851191: smb3_cmd_err: 	sid=0x8ae4683c tid=0x0 cmd=1 mid=1 status=0xc0000016 rc=-5
     kworker/0:1-22830 [000] .... 29653.855254: smb3_cmd_done: 	sid=0x8ae4683c tid=0x0 cmd=1 mid=2
     kworker/0:1-22830 [000] .... 29653.855482: smb3_cmd_done: 	sid=0x8ae4683c tid=0x8084f30d cmd=3 mid=3

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/connect.c   |  1 +
 fs/cifs/trace.h     | 26 ++++++++++++++++++++++++++
 fs/cifs/transport.c |  2 ++
 3 files changed, 29 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 67f91a6313a0..d9bd10d295a9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -348,6 +348,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	server->max_read = 0;
 
 	cifs_dbg(FYI, "Reconnecting tcp session\n");
+	trace_smb3_reconnect(server->CurrentMid, server->hostname);
 
 	/* before reconnecting the tcp session, mark the smb session (uid)
 		and the tid bad so they are not used until reconnected */
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 0fdf2f5c07ba..6b50b57e2416 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -423,6 +423,32 @@ DEFINE_EVENT(smb3_open_done_class, smb3_##name,  \
 DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
 DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done);
 
+DECLARE_EVENT_CLASS(smb3_reconnect_class,
+	TP_PROTO(__u64	currmid,
+		char *hostname),
+	TP_ARGS(currmid, hostname),
+	TP_STRUCT__entry(
+		__field(__u64, currmid)
+		__field(char *, hostname)
+	),
+	TP_fast_assign(
+		__entry->currmid = currmid;
+		__entry->hostname = hostname;
+	),
+	TP_printk("server=%s current_mid=0x%llx",
+		__entry->hostname,
+		__entry->currmid)
+)
+
+#define DEFINE_SMB3_RECONNECT_EVENT(name)        \
+DEFINE_EVENT(smb3_reconnect_class, smb3_##name,  \
+	TP_PROTO(__u64	currmid,		\
+		char *hostname),		\
+	TP_ARGS(currmid, hostname))
+
+DEFINE_SMB3_RECONNECT_EVENT(reconnect);
+DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
+
 #endif /* _CIFS_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0f9156af5eb0..357d25351ffa 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -361,6 +361,8 @@ uncork:
 		 * socket so the server throws away the partial SMB
 		 */
 		server->tcpStatus = CifsNeedReconnect;
+		trace_smb3_partial_send_reconnect(server->CurrentMid,
+						  server->hostname);
 	}
 smbd_done:
 	if (rc < 0 && rc != -EINTR)
-- 
cgit v1.2.3


From 9da6ec7775d2cd76df53fbf4f1f35f6d490204f5 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Tue, 31 Jul 2018 08:48:22 +1000
Subject: cifs: use a refcount to protect open/closing the cached file handle

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Cc: <stable@vger.kernel.org>
---
 fs/cifs/cifsglob.h  |  1 +
 fs/cifs/smb2inode.c |  4 +++-
 fs/cifs/smb2ops.c   | 31 ++++++++++++++++++++++++++-----
 fs/cifs/smb2proto.h |  1 +
 4 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8826bc0a9b15..0e6fd5fa4eb6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -907,6 +907,7 @@ cap_unix(struct cifs_ses *ses)
 
 struct cached_fid {
 	bool is_valid:1;	/* Do we have a useable root fid */
+	struct kref refcount;
 	struct cifs_fid *fid;
 	struct mutex fid_mutex;
 	struct cifs_tcon *tcon;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index d01ad706d7fc..f22cbc0d1869 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -120,7 +120,9 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 		break;
 	}
 
-	if (use_cached_root_handle == false)
+	if (use_cached_root_handle)
+		close_shroot(&tcon->crfid);
+	else
 		rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 	if (tmprc)
 		rc = tmprc;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 314556c083f3..8929426ddaa6 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -470,21 +470,36 @@ out:
 	return rc;
 }
 
-void
-smb2_cached_lease_break(struct work_struct *work)
+static void
+smb2_close_cached_fid(struct kref *ref)
 {
-	struct cached_fid *cfid = container_of(work,
-				struct cached_fid, lease_break);
-	mutex_lock(&cfid->fid_mutex);
+	struct cached_fid *cfid = container_of(ref, struct cached_fid,
+					       refcount);
+
 	if (cfid->is_valid) {
 		cifs_dbg(FYI, "clear cached root file handle\n");
 		SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid,
 			   cfid->fid->volatile_fid);
 		cfid->is_valid = false;
 	}
+}
+
+void close_shroot(struct cached_fid *cfid)
+{
+	mutex_lock(&cfid->fid_mutex);
+	kref_put(&cfid->refcount, smb2_close_cached_fid);
 	mutex_unlock(&cfid->fid_mutex);
 }
 
+void
+smb2_cached_lease_break(struct work_struct *work)
+{
+	struct cached_fid *cfid = container_of(work,
+				struct cached_fid, lease_break);
+
+	close_shroot(cfid);
+}
+
 /*
  * Open the directory at the root of a share
  */
@@ -499,6 +514,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 	if (tcon->crfid.is_valid) {
 		cifs_dbg(FYI, "found a cached root file handle\n");
 		memcpy(pfid, tcon->crfid.fid, sizeof(struct cifs_fid));
+		kref_get(&tcon->crfid.refcount);
 		mutex_unlock(&tcon->crfid.fid_mutex);
 		return 0;
 	}
@@ -515,6 +531,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 		memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
 		tcon->crfid.tcon = tcon;
 		tcon->crfid.is_valid = true;
+		kref_init(&tcon->crfid.refcount);
+		kref_get(&tcon->crfid.refcount);
 	}
 	mutex_unlock(&tcon->crfid.fid_mutex);
 	return rc;
@@ -558,6 +576,9 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
 			FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
 	if (no_cached_open)
 		SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+	else
+		close_shroot(&tcon->crfid);
+
 	return;
 }
 
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 98d9b30c16a6..19aa483395c7 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -68,6 +68,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server,
 
 extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
 			struct cifs_fid *pfid);
+extern void close_shroot(struct cached_fid *cfid);
 extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
 				   struct smb2_file_all_info *src);
 extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
-- 
cgit v1.2.3


From fcabb89299d79010eb923afdd26de04afcc0527f Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 31 Jul 2018 01:21:37 -0500
Subject: cifs: simple stats should always be enabled

CONFIG_CIFS_STATS should always be enabled as Pavel recently
noted.  Simple statistics are not a significant performance hit,
and removing the ifdef simplifies the code slightly.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/Kconfig      |  9 +--------
 fs/cifs/cifs_debug.c |  6 ------
 fs/cifs/cifsglob.h   | 10 ----------
 fs/cifs/misc.c       |  2 --
 fs/cifs/smb1ops.c    |  4 ----
 fs/cifs/smb2ops.c    |  4 ----
 fs/cifs/smb2pdu.c    |  2 --
 7 files changed, 1 insertion(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 63d0d852998a..35c83fe7dba0 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -49,16 +49,9 @@ config CIFS
 
 	  If you need to mount to Samba, Azure, Macs or Windows from this machine, say Y.
 
-config CIFS_STATS
-        bool "CIFS statistics"
-        depends on CIFS
-        help
-          Enabling this option will cause statistics for each server share
-	  mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
-
 config CIFS_STATS2
 	bool "Extended statistics"
-	depends on CIFS_STATS
+	depends on CIFS
 	help
 	  Enabling this option will allow more detailed statistics on SMB
 	  request timing to be displayed in /proc/fs/cifs/DebugData and also
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index cb516c950438..3270d9b74603 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -365,7 +365,6 @@ skip_rdma:
 	return 0;
 }
 
-#ifdef CONFIG_CIFS_STATS
 static ssize_t cifs_stats_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *ppos)
 {
@@ -481,7 +480,6 @@ static const struct file_operations cifs_stats_proc_fops = {
 	.release	= single_release,
 	.write		= cifs_stats_proc_write,
 };
-#endif /* STATS */
 
 #ifdef CONFIG_CIFS_SMB_DIRECT
 #define PROC_FILE_DEFINE(name) \
@@ -539,9 +537,7 @@ cifs_proc_init(void)
 	proc_create_single("DebugData", 0, proc_fs_cifs,
 			cifs_debug_data_proc_show);
 
-#ifdef CONFIG_CIFS_STATS
 	proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
-#endif /* STATS */
 	proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
 	proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
 	proc_create("LinuxExtensionsEnabled", 0644, proc_fs_cifs,
@@ -579,9 +575,7 @@ cifs_proc_clean(void)
 	remove_proc_entry("DebugData", proc_fs_cifs);
 	remove_proc_entry("cifsFYI", proc_fs_cifs);
 	remove_proc_entry("traceSMB", proc_fs_cifs);
-#ifdef CONFIG_CIFS_STATS
 	remove_proc_entry("Stats", proc_fs_cifs);
-#endif
 	remove_proc_entry("SecurityFlags", proc_fs_cifs);
 	remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
 	remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0e6fd5fa4eb6..4a3a737134ea 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -931,7 +931,6 @@ struct cifs_tcon {
 	__u32 tid;		/* The 4 byte tree id */
 	__u16 Flags;		/* optional support bits */
 	enum statusEnum tidStatus;
-#ifdef CONFIG_CIFS_STATS
 	atomic_t num_smbs_sent;
 	union {
 		struct {
@@ -965,7 +964,6 @@ struct cifs_tcon {
 	__u64    bytes_read;
 	__u64    bytes_written;
 	spinlock_t stat_lock;  /* protects the two fields above */
-#endif /* CONFIG_CIFS_STATS */
 	FILE_SYSTEM_DEVICE_INFO fsDevInfo;
 	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
 	FILE_SYSTEM_UNIX_INFO fsUnixInfo;
@@ -1331,7 +1329,6 @@ convert_delimiter(char *path, char delim)
 		*pos = delim;
 }
 
-#ifdef CONFIG_CIFS_STATS
 #define cifs_stats_inc atomic_inc
 
 static inline void cifs_stats_bytes_written(struct cifs_tcon *tcon,
@@ -1351,13 +1348,6 @@ static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon,
 	tcon->bytes_read += bytes;
 	spin_unlock(&tcon->stat_lock);
 }
-#else
-
-#define  cifs_stats_inc(field) do {} while (0)
-#define  cifs_stats_bytes_written(tcon, bytes) do {} while (0)
-#define  cifs_stats_bytes_read(tcon, bytes) do {} while (0)
-
-#endif
 
 
 /*
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 53e8362cbc4a..dacb2c05674c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -122,9 +122,7 @@ tconInfoAlloc(void)
 		mutex_init(&ret_buf->crfid.fid_mutex);
 		ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid),
 					     GFP_KERNEL);
-#ifdef CONFIG_CIFS_STATS
 		spin_lock_init(&ret_buf->stat_lock);
-#endif
 	}
 	return ret_buf;
 }
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 646dcd149de1..378151e09e91 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -624,7 +624,6 @@ cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
 static void
 cifs_clear_stats(struct cifs_tcon *tcon)
 {
-#ifdef CONFIG_CIFS_STATS
 	atomic_set(&tcon->stats.cifs_stats.num_writes, 0);
 	atomic_set(&tcon->stats.cifs_stats.num_reads, 0);
 	atomic_set(&tcon->stats.cifs_stats.num_flushes, 0);
@@ -646,13 +645,11 @@ cifs_clear_stats(struct cifs_tcon *tcon)
 	atomic_set(&tcon->stats.cifs_stats.num_locks, 0);
 	atomic_set(&tcon->stats.cifs_stats.num_acl_get, 0);
 	atomic_set(&tcon->stats.cifs_stats.num_acl_set, 0);
-#endif
 }
 
 static void
 cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 {
-#ifdef CONFIG_CIFS_STATS
 	seq_printf(m, " Oplocks breaks: %d",
 		   atomic_read(&tcon->stats.cifs_stats.num_oplock_brks));
 	seq_printf(m, "\nReads:  %d Bytes: %llu",
@@ -684,7 +681,6 @@ cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 		   atomic_read(&tcon->stats.cifs_stats.num_ffirst),
 		   atomic_read(&tcon->stats.cifs_stats.num_fnext),
 		   atomic_read(&tcon->stats.cifs_stats.num_fclose));
-#endif
 }
 
 static void
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 8929426ddaa6..831249001384 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -900,13 +900,11 @@ smb2_can_echo(struct TCP_Server_Info *server)
 static void
 smb2_clear_stats(struct cifs_tcon *tcon)
 {
-#ifdef CONFIG_CIFS_STATS
 	int i;
 	for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++) {
 		atomic_set(&tcon->stats.smb2_stats.smb2_com_sent[i], 0);
 		atomic_set(&tcon->stats.smb2_stats.smb2_com_failed[i], 0);
 	}
-#endif
 }
 
 static void
@@ -945,7 +943,6 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon)
 static void
 smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 {
-#ifdef CONFIG_CIFS_STATS
 	atomic_t *sent = tcon->stats.smb2_stats.smb2_com_sent;
 	atomic_t *failed = tcon->stats.smb2_stats.smb2_com_failed;
 
@@ -995,7 +992,6 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 	seq_printf(m, "\nOplockBreaks: %d sent %d failed",
 		   atomic_read(&sent[SMB2_OPLOCK_BREAK_HE]),
 		   atomic_read(&failed[SMB2_OPLOCK_BREAK_HE]));
-#endif
 }
 
 static void
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0b4d7ebb812d..7c0b30321d9a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -360,10 +360,8 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
 		       total_len);
 
 	if (tcon != NULL) {
-#ifdef CONFIG_CIFS_STATS
 		uint16_t com_code = le16_to_cpu(smb2_command);
 		cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
-#endif
 		cifs_stats_inc(&tcon->num_smbs_sent);
 	}
 
-- 
cgit v1.2.3


From 52ce1ac4298309d76859307967c58835c0b2ce3e Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 31 Jul 2018 01:46:47 -0500
Subject: smb3: display bytes_read and bytes_written in smb3 stats

We were only displaying bytes_read and bytes_written in cifs
stats, fix smb3 stats to also display them.  Sample output
with this patch:

    cat /proc/fs/cifs/Stats:

CIFS Session: 1
Share (unique mount targets): 2
SMB Request/Response Buffer: 1 Pool size: 5
SMB Small Req/Resp Buffer: 1 Pool size: 30
Operations (MIDs): 0

0 session 0 share reconnects
Total vfs operations: 94 maximum at one time: 2

1) \\localhost\test
SMBs: 214
Bytes read: 502092  Bytes written: 31457286
TreeConnects: 1 total 0 failed
TreeDisconnects: 0 total 0 failed
Creates: 52 total 3 failed
Closes: 48 total 0 failed
Flushes: 0 total 0 failed
Reads: 17 total 0 failed
Writes: 31 total 0 failed
...

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2ops.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 831249001384..85e848007f91 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -950,6 +950,9 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 	 *  Can't display SMB2_NEGOTIATE, SESSION_SETUP, LOGOFF, CANCEL and ECHO
 	 *  totals (requests sent) since those SMBs are per-session not per tcon
 	 */
+	seq_printf(m, "\nBytes read: %llu  Bytes written: %llu",
+		   (long long)(tcon->bytes_read),
+		   (long long)(tcon->bytes_written));
 	seq_printf(m, "\nTreeConnects: %d total %d failed",
 		   atomic_read(&sent[SMB2_TREE_CONNECT_HE]),
 		   atomic_read(&failed[SMB2_TREE_CONNECT_HE]));
-- 
cgit v1.2.3


From c281bc0c7412308c7ec0888904f7c99353da4796 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 1 Aug 2018 00:56:12 -0500
Subject: smb3: fix reset of bytes read and written stats

echo 0 > /proc/fs/cifs/Stats is supposed to reset the stats
but there were four (see example below) that were not reset
(bytes read and witten, total vfs ops and max ops
at one time).

...
0 session 0 share reconnects
Total vfs operations: 100 maximum at one time: 2

1) \\localhost\test
SMBs: 0
Bytes read: 502092  Bytes written: 31457286
TreeConnects: 0 total 0 failed
TreeDisconnects: 0 total 0 failed
...

This patch fixes cifs_stats_proc_write to properly reset
those four.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/cifs_debug.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 3270d9b74603..e074820bd4ed 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -381,6 +381,10 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 		atomic_set(&totBufAllocCount, 0);
 		atomic_set(&totSmBufAllocCount, 0);
 #endif /* CONFIG_CIFS_STATS2 */
+		spin_lock(&GlobalMid_Lock);
+		GlobalMaxActiveXid = 0;
+		GlobalCurrentXid = 0;
+		spin_unlock(&GlobalMid_Lock);
 		spin_lock(&cifs_tcp_ses_lock);
 		list_for_each(tmp1, &cifs_tcp_ses_list) {
 			server = list_entry(tmp1, struct TCP_Server_Info,
@@ -393,6 +397,10 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 							  struct cifs_tcon,
 							  tcon_list);
 					atomic_set(&tcon->num_smbs_sent, 0);
+					spin_lock(&tcon->stat_lock);
+					tcon->bytes_read = 0;
+					tcon->bytes_written = 0;
+					spin_unlock(&tcon->stat_lock);
 					if (server->ops->clear_stats)
 						server->ops->clear_stats(tcon);
 				}
-- 
cgit v1.2.3


From b2c96de7fe3cd306df039c89727cb137b89d82ef Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 1 Aug 2018 09:26:11 +1000
Subject: cifs: update init_sg, crypt_message to take an array of rqst

These are used for SMB3 encryption and compounded requests.
Update these functions and the other functions related to SMB3 encryption to
take an array of requests.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsglob.h  |   7 +-
 fs/cifs/smb2ops.c   | 206 +++++++++++++++++++++++++++-------------------------
 fs/cifs/transport.c |  27 +++++--
 3 files changed, 131 insertions(+), 109 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4a3a737134ea..0553929e8339 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -454,10 +454,8 @@ struct smb_version_operations {
 	long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t,
 			  loff_t);
 	/* init transform request - used for encryption for now */
-	int (*init_transform_rq)(struct TCP_Server_Info *, struct smb_rqst *,
-				 struct smb_rqst *);
-	/* free transform request */
-	void (*free_transform_rq)(struct smb_rqst *);
+	int (*init_transform_rq)(struct TCP_Server_Info *, int num_rqst,
+				 struct smb_rqst *, struct smb_rqst *);
 	int (*is_transform_hdr)(void *buf);
 	int (*receive_transform)(struct TCP_Server_Info *,
 				 struct mid_q_entry **);
@@ -1023,6 +1021,7 @@ struct tcon_link {
 };
 
 extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb);
+extern void smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst);
 
 static inline struct cifs_tcon *
 tlink_tcon(struct tcon_link *tlink)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 85e848007f91..ebc13ebebddf 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2378,35 +2378,51 @@ static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
 	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
 }
 
-/* Assumes:
- * rqst->rq_iov[0]  is transform header
- * rqst->rq_iov[1+] data to be encrypted/decrypted
+/* Assumes the first rqst has a transform header as the first iov.
+ * I.e.
+ * rqst[0].rq_iov[0]  is transform header
+ * rqst[0].rq_iov[1+] data to be encrypted/decrypted
+ * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
  */
 static struct scatterlist *
-init_sg(struct smb_rqst *rqst, u8 *sign)
+init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign)
 {
-	unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1;
-	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
+	unsigned int sg_len;
 	struct scatterlist *sg;
 	unsigned int i;
 	unsigned int j;
+	unsigned int idx = 0;
+	int skip;
+
+	sg_len = 1;
+	for (i = 0; i < num_rqst; i++)
+		sg_len += rqst[i].rq_nvec + rqst[i].rq_npages;
 
 	sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL);
 	if (!sg)
 		return NULL;
 
 	sg_init_table(sg, sg_len);
-	smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 20, assoc_data_len);
-	for (i = 1; i < rqst->rq_nvec; i++)
-		smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
-						rqst->rq_iov[i].iov_len);
-	for (j = 0; i < sg_len - 1; i++, j++) {
-		unsigned int len, offset;
+	for (i = 0; i < num_rqst; i++) {
+		for (j = 0; j < rqst[i].rq_nvec; j++) {
+			/*
+			 * The first rqst has a transform header where the
+			 * first 20 bytes are not part of the encrypted blob
+			 */
+			skip = (i == 0) && (j == 0) ? 20 : 0;
+			smb2_sg_set_buf(&sg[idx++],
+					rqst[i].rq_iov[j].iov_base + skip,
+					rqst[i].rq_iov[j].iov_len - skip);
+		}
+
+		for (j = 0; j < rqst[i].rq_npages; j++) {
+			unsigned int len, offset;
 
-		rqst_page_get_length(rqst, j, &len, &offset);
-		sg_set_page(&sg[i], rqst->rq_pages[j], len, offset);
+			rqst_page_get_length(&rqst[i], j, &len, &offset);
+			sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset);
+		}
 	}
-	smb2_sg_set_buf(&sg[sg_len - 1], sign, SMB2_SIGNATURE_SIZE);
+	smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE);
 	return sg;
 }
 
@@ -2438,10 +2454,11 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
  * untouched.
  */
 static int
-crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
+crypt_message(struct TCP_Server_Info *server, int num_rqst,
+	      struct smb_rqst *rqst, int enc)
 {
 	struct smb2_transform_hdr *tr_hdr =
-			(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
+		(struct smb2_transform_hdr *)rqst[0].rq_iov[0].iov_base;
 	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
 	int rc = 0;
 	struct scatterlist *sg;
@@ -2492,7 +2509,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 		crypt_len += SMB2_SIGNATURE_SIZE;
 	}
 
-	sg = init_sg(rqst, sign);
+	sg = init_sg(num_rqst, rqst, sign);
 	if (!sg) {
 		cifs_dbg(VFS, "%s: Failed to init sg", __func__);
 		rc = -ENOMEM;
@@ -2529,103 +2546,98 @@ free_req:
 	return rc;
 }
 
+void
+smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst)
+{
+	int i, j;
+
+	for (i = 0; i < num_rqst; i++) {
+		if (rqst[i].rq_pages) {
+			for (j = rqst[i].rq_npages - 1; j >= 0; j--)
+				put_page(rqst[i].rq_pages[j]);
+			kfree(rqst[i].rq_pages);
+		}
+	}
+}
+
+/*
+ * This function will initialize new_rq and encrypt the content.
+ * The first entry, new_rq[0], only contains a single iov which contains
+ * a smb2_transform_hdr and is pre-allocated by the caller.
+ * This function then populates new_rq[1+] with the content from olq_rq[0+].
+ *
+ * The end result is an array of smb_rqst structures where the first structure
+ * only contains a single iov for the transform header which we then can pass
+ * to crypt_message().
+ *
+ * new_rq[0].rq_iov[0] :  smb2_transform_hdr pre-allocated by the caller
+ * new_rq[1+].rq_iov[*] == old_rq[0+].rq_iov[*] : SMB2/3 requests
+ */
 static int
-smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
-		       struct smb_rqst *old_rq)
+smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
+		       struct smb_rqst *new_rq, struct smb_rqst *old_rq)
 {
-	struct kvec *iov;
 	struct page **pages;
-	struct smb2_transform_hdr *tr_hdr;
-	unsigned int npages = old_rq->rq_npages;
-	unsigned int orig_len;
-	int i;
+	struct smb2_transform_hdr *tr_hdr = new_rq[0].rq_iov[0].iov_base;
+	unsigned int npages;
+	unsigned int orig_len = 0;
+	int i, j;
 	int rc = -ENOMEM;
 
-	pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
-	if (!pages)
-		return rc;
-
-	new_rq->rq_pages = pages;
-	new_rq->rq_offset = old_rq->rq_offset;
-	new_rq->rq_npages = old_rq->rq_npages;
-	new_rq->rq_pagesz = old_rq->rq_pagesz;
-	new_rq->rq_tailsz = old_rq->rq_tailsz;
-
-	for (i = 0; i < npages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
-		if (!pages[i])
-			goto err_free_pages;
-	}
-
-	iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec),
-			    GFP_KERNEL);
-	if (!iov)
-		goto err_free_pages;
+	for (i = 1; i < num_rqst; i++) {
+		npages = old_rq[i - 1].rq_npages;
+		pages = kmalloc_array(npages, sizeof(struct page *),
+				      GFP_KERNEL);
+		if (!pages)
+			goto err_free;
+
+		new_rq[i].rq_pages = pages;
+		new_rq[i].rq_npages = npages;
+		new_rq[i].rq_offset = old_rq[i - 1].rq_offset;
+		new_rq[i].rq_pagesz = old_rq[i - 1].rq_pagesz;
+		new_rq[i].rq_tailsz = old_rq[i - 1].rq_tailsz;
+		new_rq[i].rq_iov = old_rq[i - 1].rq_iov;
+		new_rq[i].rq_nvec = old_rq[i - 1].rq_nvec;
+
+		orig_len += smb_rqst_len(server, &old_rq[i - 1]);
+
+		for (j = 0; j < npages; j++) {
+			pages[j] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+			if (!pages[j])
+				goto err_free;
+		}
 
-	/* copy all iovs from the old */
-	memcpy(&iov[1], &old_rq->rq_iov[0],
-				sizeof(struct kvec) * old_rq->rq_nvec);
+		/* copy pages form the old */
+		for (j = 0; j < npages; j++) {
+			char *dst, *src;
+			unsigned int offset, len;
 
-	new_rq->rq_iov = iov;
-	new_rq->rq_nvec = old_rq->rq_nvec + 1;
+			rqst_page_get_length(&new_rq[i], j, &len, &offset);
 
-	tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
-	if (!tr_hdr)
-		goto err_free_iov;
+			dst = (char *) kmap(new_rq[i].rq_pages[j]) + offset;
+			src = (char *) kmap(old_rq[i - 1].rq_pages[j]) + offset;
 
-	orig_len = smb_rqst_len(server, old_rq);
+			memcpy(dst, src, len);
+			kunmap(new_rq[i].rq_pages[j]);
+			kunmap(old_rq[i - 1].rq_pages[j]);
+		}
+	}
 
-	/* fill the 2nd iov with a transform header */
+	/* fill the 1st iov with a transform header */
 	fill_transform_hdr(tr_hdr, orig_len, old_rq);
-	new_rq->rq_iov[0].iov_base = tr_hdr;
-	new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
-
-	/* copy pages form the old */
-	for (i = 0; i < npages; i++) {
-		char *dst, *src;
-		unsigned int offset, len;
-
-		rqst_page_get_length(new_rq, i, &len, &offset);
 
-		dst = (char *) kmap(new_rq->rq_pages[i]) + offset;
-		src = (char *) kmap(old_rq->rq_pages[i]) + offset;
-
-		memcpy(dst, src, len);
-		kunmap(new_rq->rq_pages[i]);
-		kunmap(old_rq->rq_pages[i]);
-	}
-
-	rc = crypt_message(server, new_rq, 1);
+	rc = crypt_message(server, num_rqst, new_rq, 1);
 	cifs_dbg(FYI, "encrypt message returned %d", rc);
 	if (rc)
-		goto err_free_tr_hdr;
+		goto err_free;
 
 	return rc;
 
-err_free_tr_hdr:
-	kfree(tr_hdr);
-err_free_iov:
-	kfree(iov);
-err_free_pages:
-	for (i = i - 1; i >= 0; i--)
-		put_page(pages[i]);
-	kfree(pages);
+err_free:
+	smb3_free_compound_rqst(num_rqst - 1, &new_rq[1]);
 	return rc;
 }
 
-static void
-smb3_free_transform_rq(struct smb_rqst *rqst)
-{
-	int i = rqst->rq_npages - 1;
-
-	for (; i >= 0; i--)
-		put_page(rqst->rq_pages[i]);
-	kfree(rqst->rq_pages);
-	/* free transform header */
-	kfree(rqst->rq_iov[0].iov_base);
-	kfree(rqst->rq_iov);
-}
-
 static int
 smb3_is_transform_hdr(void *buf)
 {
@@ -2655,7 +2667,7 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
 	rqst.rq_pagesz = PAGE_SIZE;
 	rqst.rq_tailsz = (page_data_size % PAGE_SIZE) ? : PAGE_SIZE;
 
-	rc = crypt_message(server, &rqst, 0);
+	rc = crypt_message(server, 1, &rqst, 0);
 	cifs_dbg(FYI, "decrypt message returned %d\n", rc);
 
 	if (rc)
@@ -3302,7 +3314,6 @@ struct smb_version_operations smb30_operations = {
 	.fallocate = smb3_fallocate,
 	.enum_snapshots = smb3_enum_snapshots,
 	.init_transform_rq = smb3_init_transform_rq,
-	.free_transform_rq = smb3_free_transform_rq,
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
 	.get_dfs_refer = smb2_get_dfs_refer,
@@ -3408,7 +3419,6 @@ struct smb_version_operations smb311_operations = {
 	.fallocate = smb3_fallocate,
 	.enum_snapshots = smb3_enum_snapshots,
 	.init_transform_rq = smb3_init_transform_rq,
-	.free_transform_rq = smb3_free_transform_rq,
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
 	.get_dfs_refer = smb2_get_dfs_refer,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 357d25351ffa..8039c93ba57a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -374,27 +374,40 @@ smbd_done:
 	return rc;
 }
 
+#define MAX_COMPOUND 2
+
 static int
 smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
 {
-	struct smb_rqst cur_rqst;
+	struct kvec iov;
+	struct smb2_transform_hdr tr_hdr;
+	struct smb_rqst cur_rqst[MAX_COMPOUND];
 	int rc;
 
 	if (!(flags & CIFS_TRANSFORM_REQ))
 		return __smb_send_rqst(server, 1, rqst);
 
-	if (!server->ops->init_transform_rq ||
-	    !server->ops->free_transform_rq) {
-		cifs_dbg(VFS, "Encryption requested but transform callbacks are missed\n");
+	memset(&cur_rqst[0], 0, sizeof(cur_rqst));
+	memset(&iov, 0, sizeof(iov));
+	memset(&tr_hdr, 0, sizeof(tr_hdr));
+
+	iov.iov_base = &tr_hdr;
+	iov.iov_len = sizeof(tr_hdr);
+	cur_rqst[0].rq_iov = &iov;
+	cur_rqst[0].rq_nvec = 1;
+
+	if (!server->ops->init_transform_rq) {
+		cifs_dbg(VFS, "Encryption requested but transform callback "
+			 "is missing\n");
 		return -EIO;
 	}
 
-	rc = server->ops->init_transform_rq(server, &cur_rqst, rqst);
+	rc = server->ops->init_transform_rq(server, 2, &cur_rqst[0], rqst);
 	if (rc)
 		return rc;
 
-	rc = __smb_send_rqst(server, 1, &cur_rqst);
-	server->ops->free_transform_rq(&cur_rqst);
+	rc = __smb_send_rqst(server, 2, &cur_rqst[0]);
+	smb3_free_compound_rqst(1, &cur_rqst[1]);
 	return rc;
 }
 
-- 
cgit v1.2.3


From 1f3a8f5f7ac3de4abae9d2c118ffaeed1676fe1c Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 1 Aug 2018 09:26:12 +1000
Subject: cifs: make smb_send_rqst take an array of requests

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/transport.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 8039c93ba57a..169e767ff57f 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -374,10 +374,11 @@ smbd_done:
 	return rc;
 }
 
-#define MAX_COMPOUND 2
+#define MAX_COMPOUND 5
 
 static int
-smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
+smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+	      struct smb_rqst *rqst, int flags)
 {
 	struct kvec iov;
 	struct smb2_transform_hdr tr_hdr;
@@ -385,7 +386,10 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
 	int rc;
 
 	if (!(flags & CIFS_TRANSFORM_REQ))
-		return __smb_send_rqst(server, 1, rqst);
+		return __smb_send_rqst(server, num_rqst, rqst);
+
+	if (num_rqst > MAX_COMPOUND - 1)
+		return -ENOMEM;
 
 	memset(&cur_rqst[0], 0, sizeof(cur_rqst));
 	memset(&iov, 0, sizeof(iov));
@@ -402,12 +406,13 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
 		return -EIO;
 	}
 
-	rc = server->ops->init_transform_rq(server, 2, &cur_rqst[0], rqst);
+	rc = server->ops->init_transform_rq(server, num_rqst + 1,
+					    &cur_rqst[0], rqst);
 	if (rc)
 		return rc;
 
-	rc = __smb_send_rqst(server, 2, &cur_rqst[0]);
-	smb3_free_compound_rqst(1, &cur_rqst[1]);
+	rc = __smb_send_rqst(server, num_rqst + 1, &cur_rqst[0]);
+	smb3_free_compound_rqst(num_rqst, &cur_rqst[1]);
 	return rc;
 }
 
@@ -621,7 +626,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
 	 */
 	cifs_save_when_sent(mid);
 	cifs_in_send_inc(server);
-	rc = smb_send_rqst(server, rqst, flags);
+	rc = smb_send_rqst(server, 1, rqst, flags);
 	cifs_in_send_dec(server);
 
 	if (rc < 0) {
@@ -811,7 +816,7 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 
 	midQ->mid_state = MID_REQUEST_SUBMITTED;
 	cifs_in_send_inc(ses->server);
-	rc = smb_send_rqst(ses->server, rqst, flags);
+	rc = smb_send_rqst(ses->server, 1, rqst, flags);
 	cifs_in_send_dec(ses->server);
 	cifs_save_when_sent(midQ);
 
-- 
cgit v1.2.3


From e0bba0b8548179b696e86c158ea8f45f2ef6ad14 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 1 Aug 2018 09:26:13 +1000
Subject: cifs: add compound_send_recv()

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsproto.h |   4 ++
 fs/cifs/transport.c | 156 ++++++++++++++++++++++++++++++----------------------
 2 files changed, 94 insertions(+), 66 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 7ead1a9ac6fb..20adda4de83b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -94,6 +94,10 @@ extern int cifs_call_async(struct TCP_Server_Info *server,
 extern int cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 			  struct smb_rqst *rqst, int *resp_buf_type,
 			  const int flags, struct kvec *resp_iov);
+extern int compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+			      const int flags, const int num_rqst,
+			      struct smb_rqst *rqst, int *resp_buf_type,
+			      struct kvec *resp_iov);
 extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
 			struct smb_hdr * /* input */ ,
 			struct smb_hdr * /* out */ ,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 169e767ff57f..0b9d0e859f86 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -766,20 +766,21 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
 }
 
 int
-cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
-	       struct smb_rqst *rqst, int *resp_buf_type, const int flags,
-	       struct kvec *resp_iov)
+compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+		   const int flags, const int num_rqst, struct smb_rqst *rqst,
+		   int *resp_buf_type, struct kvec *resp_iov)
 {
-	int rc = 0;
+	int i, j, rc = 0;
 	int timeout, optype;
-	struct mid_q_entry *midQ;
+	struct mid_q_entry *midQ[MAX_COMPOUND];
 	unsigned int credits = 1;
 	char *buf;
 
 	timeout = flags & CIFS_TIMEOUT_MASK;
 	optype = flags & CIFS_OP_MASK;
 
-	*resp_buf_type = CIFS_NO_BUFFER;  /* no response buf yet */
+	for (i = 0; i < num_rqst; i++)
+		resp_buf_type[i] = CIFS_NO_BUFFER;  /* no response buf yet */
 
 	if ((ses == NULL) || (ses->server == NULL)) {
 		cifs_dbg(VFS, "Null session\n");
@@ -806,93 +807,116 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 
 	mutex_lock(&ses->server->srv_mutex);
 
-	midQ = ses->server->ops->setup_request(ses, rqst);
-	if (IS_ERR(midQ)) {
-		mutex_unlock(&ses->server->srv_mutex);
-		/* Update # of requests on wire to server */
-		add_credits(ses->server, 1, optype);
-		return PTR_ERR(midQ);
+	for (i = 0; i < num_rqst; i++) {
+		midQ[i] = ses->server->ops->setup_request(ses, &rqst[i]);
+		if (IS_ERR(midQ[i])) {
+			for (j = 0; j < i; j++)
+				cifs_delete_mid(midQ[j]);
+			mutex_unlock(&ses->server->srv_mutex);
+			/* Update # of requests on wire to server */
+			add_credits(ses->server, 1, optype);
+			return PTR_ERR(midQ[i]);
+		}
+
+		midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
 	}
 
-	midQ->mid_state = MID_REQUEST_SUBMITTED;
 	cifs_in_send_inc(ses->server);
-	rc = smb_send_rqst(ses->server, 1, rqst, flags);
+	rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
 	cifs_in_send_dec(ses->server);
-	cifs_save_when_sent(midQ);
+
+	for (i = 0; i < num_rqst; i++)
+		cifs_save_when_sent(midQ[i]);
 
 	if (rc < 0)
 		ses->server->sequence_number -= 2;
+
 	mutex_unlock(&ses->server->srv_mutex);
 
-	if (rc < 0)
-		goto out;
+	for (i = 0; i < num_rqst; i++) {
+		if (rc < 0)
+			goto out;
 
-	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
-		smb311_update_preauth_hash(ses, rqst->rq_iov,
-					   rqst->rq_nvec);
+		if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+			smb311_update_preauth_hash(ses, rqst[i].rq_iov,
+						   rqst[i].rq_nvec);
 
-	if (timeout == CIFS_ASYNC_OP)
-		goto out;
+		if (timeout == CIFS_ASYNC_OP)
+			goto out;
 
-	rc = wait_for_response(ses->server, midQ);
-	if (rc != 0) {
-		cifs_dbg(FYI, "Cancelling wait for mid %llu\n",	midQ->mid);
-		send_cancel(ses->server, rqst, midQ);
-		spin_lock(&GlobalMid_Lock);
-		if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
-			midQ->mid_flags |= MID_WAIT_CANCELLED;
-			midQ->callback = DeleteMidQEntry;
+		rc = wait_for_response(ses->server, midQ[i]);
+		if (rc != 0) {
+			cifs_dbg(FYI, "Cancelling wait for mid %llu\n",
+				 midQ[i]->mid);
+			send_cancel(ses->server, &rqst[i], midQ[i]);
+			spin_lock(&GlobalMid_Lock);
+			if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
+				midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
+				midQ[i]->callback = DeleteMidQEntry;
+				spin_unlock(&GlobalMid_Lock);
+				add_credits(ses->server, 1, optype);
+				return rc;
+			}
 			spin_unlock(&GlobalMid_Lock);
+		}
+
+		rc = cifs_sync_mid_result(midQ[i], ses->server);
+		if (rc != 0) {
 			add_credits(ses->server, 1, optype);
 			return rc;
 		}
-		spin_unlock(&GlobalMid_Lock);
-	}
-
-	rc = cifs_sync_mid_result(midQ, ses->server);
-	if (rc != 0) {
-		add_credits(ses->server, 1, optype);
-		return rc;
-	}
 
-	if (!midQ->resp_buf || midQ->mid_state != MID_RESPONSE_RECEIVED) {
-		rc = -EIO;
-		cifs_dbg(FYI, "Bad MID state?\n");
-		goto out;
-	}
-
-	buf = (char *)midQ->resp_buf;
-	resp_iov->iov_base = buf;
-	resp_iov->iov_len = midQ->resp_buf_size +
-		ses->server->vals->header_preamble_size;
-	if (midQ->large_buf)
-		*resp_buf_type = CIFS_LARGE_BUFFER;
-	else
-		*resp_buf_type = CIFS_SMALL_BUFFER;
+		if (!midQ[i]->resp_buf ||
+		    midQ[i]->mid_state != MID_RESPONSE_RECEIVED) {
+			rc = -EIO;
+			cifs_dbg(FYI, "Bad MID state?\n");
+			goto out;
+		}
 
-	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
-		struct kvec iov = {
-			.iov_base = resp_iov->iov_base,
-			.iov_len = resp_iov->iov_len
-		};
-		smb311_update_preauth_hash(ses, &iov, 1);
-	}
+		buf = (char *)midQ[i]->resp_buf;
+		resp_iov[i].iov_base = buf;
+		resp_iov[i].iov_len = midQ[i]->resp_buf_size +
+			ses->server->vals->header_preamble_size;
+
+		if (midQ[i]->large_buf)
+			resp_buf_type[i] = CIFS_LARGE_BUFFER;
+		else
+			resp_buf_type[i] = CIFS_SMALL_BUFFER;
+
+		if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+			struct kvec iov = {
+				.iov_base = resp_iov[i].iov_base,
+				.iov_len = resp_iov[i].iov_len
+			};
+			smb311_update_preauth_hash(ses, &iov, 1);
+		}
 
-	credits = ses->server->ops->get_credits(midQ);
+		credits = ses->server->ops->get_credits(midQ[i]);
 
-	rc = ses->server->ops->check_receive(midQ, ses->server,
-					     flags & CIFS_LOG_ERROR);
+		rc = ses->server->ops->check_receive(midQ[i], ses->server,
+						     flags & CIFS_LOG_ERROR);
 
-	/* mark it so buf will not be freed by cifs_delete_mid */
-	if ((flags & CIFS_NO_RESP) == 0)
-		midQ->resp_buf = NULL;
+		/* mark it so buf will not be freed by cifs_delete_mid */
+		if ((flags & CIFS_NO_RESP) == 0)
+			midQ[i]->resp_buf = NULL;
+	}
 out:
-	cifs_delete_mid(midQ);
+	for (i = 0; i < num_rqst; i++)
+		cifs_delete_mid(midQ[i]);
 	add_credits(ses->server, credits, optype);
 
 	return rc;
 }
 
+int
+cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
+	       struct smb_rqst *rqst, int *resp_buf_type, const int flags,
+	       struct kvec *resp_iov)
+{
+	return compound_send_recv(xid, ses, flags, 1, rqst, resp_buf_type,
+				  resp_iov);
+}
+
 int
 SendReceive2(const unsigned int xid, struct cifs_ses *ses,
 	     struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
-- 
cgit v1.2.3


From 020eec5f712ffb4254233490076cc21600b810db Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 1 Aug 2018 16:38:07 -0500
Subject: smb3: add tracepoint for slow responses

If responses take longer than one second from the server,
we can optionally log them to dmesg in current cifs.ko code
(CONFIG_CIFS_STATS2 must be configured and a
/proc/fs/cifs/cifsFYI flag must be set), but can be more useful
to log these via ftrace (tracepoint is smb3_slow_rsp) which
is easier and more granular (still requires CONFIG_CIFS_STATS2
to be configured in the build though).

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/trace.h     | 37 +++++++++++++++++++++++++++++++++++++
 fs/cifs/transport.c |  8 ++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 6b50b57e2416..d4aed5217a56 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -283,6 +283,43 @@ DEFINE_EVENT(smb3_cmd_done_class, smb3_##name,    \
 DEFINE_SMB3_CMD_DONE_EVENT(cmd_done);
 DEFINE_SMB3_CMD_DONE_EVENT(ses_expired);
 
+DECLARE_EVENT_CLASS(smb3_mid_class,
+	TP_PROTO(__u16	cmd,
+		__u64	mid,
+		__u32	pid,
+		unsigned long when_sent,
+		unsigned long when_received),
+	TP_ARGS(cmd, mid, pid, when_sent, when_received),
+	TP_STRUCT__entry(
+		__field(__u16, cmd)
+		__field(__u64, mid)
+		__field(__u32, pid)
+		__field(unsigned long, when_sent)
+		__field(unsigned long, when_received)
+	),
+	TP_fast_assign(
+		__entry->cmd = cmd;
+		__entry->mid = mid;
+		__entry->pid = pid;
+		__entry->when_sent = when_sent;
+		__entry->when_received = when_received;
+	),
+	TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
+		__entry->cmd, __entry->mid, __entry->pid, __entry->when_sent,
+		__entry->when_received)
+)
+
+#define DEFINE_SMB3_MID_EVENT(name)          \
+DEFINE_EVENT(smb3_mid_class, smb3_##name,    \
+	TP_PROTO(__u16	cmd,			\
+		__u64	mid,			\
+		__u32	pid,			\
+		unsigned long when_sent,	\
+		unsigned long when_received),	\
+	TP_ARGS(cmd, mid, pid, when_sent, when_received))
+
+DEFINE_SMB3_MID_EVENT(slow_rsp);
+
 DECLARE_EVENT_CLASS(smb3_exit_err_class,
 	TP_PROTO(unsigned int xid,
 		const char *func_name,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0b9d0e859f86..92de5c528161 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -115,8 +115,12 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 	now = jiffies;
 	/* commands taking longer than one second are indications that
 	   something is wrong, unless it is quite a slow link or server */
-	if (time_after(now, midEntry->when_alloc + HZ)) {
-		if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) {
+	if (time_after(now, midEntry->when_alloc + HZ) &&
+	    (midEntry->command != command)) {
+		trace_smb3_slow_rsp(le16_to_cpu(midEntry->command),
+			       midEntry->mid, midEntry->pid,
+			       midEntry->when_sent, midEntry->when_received);
+		if (cifsFYI & CIFS_TIMER) {
 			pr_debug(" CIFS slow rsp: cmd %d mid %llu",
 			       midEntry->command, midEntry->mid);
 			pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
-- 
cgit v1.2.3


From d258650004617fbd5dfe97d77d26fb37187d06e9 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 1 Aug 2018 22:34:04 -0500
Subject: smb3: fix minor debug output for CONFIG_CIFS_STATS

CONFIG_CIFS_STATS is now always enabled (to simplify the
code and since the STATS are important for some common
customer use cases and also debugging), but needed one
minor change so that STATS shows as enabled in the debug
output in /proc/fs/cifs/DebugData, otherwise it could
get confusing with STATS no longer showing up in the
"Features" list in /proc/fs/cifs/DebugData when basic
stats were in fact available.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/cifs_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index e074820bd4ed..748cabd6d20b 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -170,7 +170,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 #endif
 #ifdef CONFIG_CIFS_STATS2
 	seq_printf(m, ",STATS2");
-#elif defined(CONFIG_CIFS_STATS)
+#else
 	seq_printf(m, ",STATS");
 #endif
 #ifdef CONFIG_CIFS_DEBUG2
-- 
cgit v1.2.3


From fd09b7d3b352105f08b8e02f7afecf7e816380ef Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 2 Aug 2018 20:28:18 -0500
Subject: smb3: Do not send SMB3 SET_INFO if nothing changed

An earlier commit had a typo which prevented the
optimization from working:

commit 18dd8e1a65dd ("Do not send SMB3 SET_INFO request if nothing is changing")

Thank you to Metze for noticing this.  Also clear a
reserved field in the FILE_BASIC_INFO struct we send
that should be zero (all the other fields in that
struct were set or cleared explicitly already in
cifs_set_file_info).

Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org> # 4.9.x+
Reported-by: Stefan Metzmacher <metze@samba.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/inode.c     | 2 ++
 fs/cifs/smb2inode.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 054e880c1dac..d32eaa4b2437 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1120,6 +1120,8 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
 	if (!server->ops->set_file_info)
 		return -ENOSYS;
 
+	info_buf.Pad = 0;
+
 	if (attrs->ia_valid & ATTR_ATIME) {
 		set_time = true;
 		info_buf.LastAccessTime =
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index f22cbc0d1869..1eef1791d0c4 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -283,7 +283,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
 	int rc;
 
 	if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
-	    (buf->LastWriteTime == 0) && (buf->ChangeTime) &&
+	    (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) &&
 	    (buf->Attributes == 0))
 		return 0; /* would be a no op, no sense sending this */
 
-- 
cgit v1.2.3


From a5c62f4833c2c8e6e0f35367b99b717b78f5c029 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Thu, 2 Aug 2018 16:39:52 +0200
Subject: CIFS: fix uninitialized ptr deref in smb2 signing

server->secmech.sdeschmacsha256 is not properly initialized before
smb2_shash_allocate(), set shash after that call.

also fix typo in error message

Fixes: 8de8c4608fe9 ("cifs: Fix validation of signed data in smb2")

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.com>
Reported-by: Xiaoli Feng <xifeng@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2transport.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 3f778937c0e2..7b351c65ee46 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -171,7 +171,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 	struct kvec *iov = rqst->rq_iov;
 	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
 	struct cifs_ses *ses;
-	struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash;
+	struct shash_desc *shash;
 	struct smb_rqst drqst;
 
 	ses = smb2_find_smb_ses(server, shdr->SessionId);
@@ -185,7 +185,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 
 	rc = smb2_crypto_shash_allocate(server);
 	if (rc) {
-		cifs_dbg(VFS, "%s: shah256 alloc failed\n", __func__);
+		cifs_dbg(VFS, "%s: sha256 alloc failed\n", __func__);
 		return rc;
 	}
 
@@ -196,6 +196,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 		return rc;
 	}
 
+	shash = &server->secmech.sdeschmacsha256->shash;
 	rc = crypto_shash_init(shash);
 	if (rc) {
 		cifs_dbg(VFS, "%s: Could not init sha256", __func__);
-- 
cgit v1.2.3


From 468d677954c0d94fec59275d91222257fe8b4416 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 4 Aug 2018 05:24:34 -0500
Subject: smb3: display stats counters for number of slow commands

When CONFIG_CIFS_STATS2 is enabled keep counters for slow
commands (ie server took longer than 1 second to respond)
by SMB2/SMB3 command code.  This can help in diagnosing
whether performance problems are on server (instead of
client) and which commands are causing the problem.

Sample output (the new lines contain words "slow responses ...")

$ cat /proc/fs/cifs/Stats
Resources in use
CIFS Session: 1
Share (unique mount targets): 2
SMB Request/Response Buffer: 1 Pool size: 5
SMB Small Req/Resp Buffer: 1 Pool size: 30
Total Large 10 Small 490 Allocations
Operations (MIDs): 0

0 session 0 share reconnects
Total vfs operations: 67 maximum at one time: 2
4 slow responses from localhost for command 5
1 slow responses from localhost for command 6
1 slow responses from localhost for command 14
1 slow responses from localhost for command 16

1) \\localhost\test
SMBs: 243
Bytes read: 1024000  Bytes written: 104857600
TreeConnects: 1 total 0 failed
TreeDisconnects: 0 total 0 failed
Creates: 40 total 0 failed
Closes: 39 total 0 failed
...

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/cifs_debug.c | 19 +++++++++++++++++--
 fs/cifs/cifsglob.h   |  3 ++-
 fs/cifs/transport.c  |  5 +++++
 3 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 748cabd6d20b..f1fbea947fef 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -378,6 +378,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 	rc = kstrtobool_from_user(buffer, count, &bv);
 	if (rc == 0) {
 #ifdef CONFIG_CIFS_STATS2
+		int i;
+
 		atomic_set(&totBufAllocCount, 0);
 		atomic_set(&totSmBufAllocCount, 0);
 #endif /* CONFIG_CIFS_STATS2 */
@@ -389,6 +391,10 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 		list_for_each(tmp1, &cifs_tcp_ses_list) {
 			server = list_entry(tmp1, struct TCP_Server_Info,
 					    tcp_ses_list);
+#ifdef CONFIG_CIFS_STATS2
+			for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++)
+				atomic_set(&server->smb2slowcmd[i], 0);
+#endif /* CONFIG_CIFS_STATS2 */
 			list_for_each(tmp2, &server->smb_ses_list) {
 				ses = list_entry(tmp2, struct cifs_ses,
 						 smb_ses_list);
@@ -417,13 +423,15 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 static int cifs_stats_proc_show(struct seq_file *m, void *v)
 {
 	int i;
+#ifdef CONFIG_CIFS_STATS2
+	int j;
+#endif /* STATS2 */
 	struct list_head *tmp1, *tmp2, *tmp3;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
 
-	seq_printf(m,
-			"Resources in use\nCIFS Session: %d\n",
+	seq_printf(m, "Resources in use\nCIFS Session: %d\n",
 			sesInfoAllocCount.counter);
 	seq_printf(m, "Share (unique mount targets): %d\n",
 			tconInfoAllocCount.counter);
@@ -452,6 +460,13 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
 	list_for_each(tmp1, &cifs_tcp_ses_list) {
 		server = list_entry(tmp1, struct TCP_Server_Info,
 				    tcp_ses_list);
+#ifdef CONFIG_CIFS_STATS2
+		for (j = 0; j < NUMBER_OF_SMB2_COMMANDS; j++)
+			if (atomic_read(&server->smb2slowcmd[j]))
+				seq_printf(m, "%d slow responses from %s for command %d\n",
+					atomic_read(&server->smb2slowcmd[j]),
+					server->hostname, j);
+#endif /* STATS2 */
 		list_for_each(tmp2, &server->smb_ses_list) {
 			ses = list_entry(tmp2, struct cifs_ses,
 					 smb_ses_list);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0553929e8339..41803d374da0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -680,7 +680,8 @@ struct TCP_Server_Info {
 #ifdef CONFIG_CIFS_STATS2
 	atomic_t in_send; /* requests trying to send */
 	atomic_t num_waiters;   /* blocked waiting to get in sendrecv */
-#endif
+	atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */
+#endif /* STATS2 */
 	unsigned int	max_read;
 	unsigned int	max_write;
 	__le16	cipher_type;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 92de5c528161..c53c0908d4c6 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -117,6 +117,11 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 	   something is wrong, unless it is quite a slow link or server */
 	if (time_after(now, midEntry->when_alloc + HZ) &&
 	    (midEntry->command != command)) {
+		/* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */
+		if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) &&
+		    (le16_to_cpu(midEntry->command) >= 0))
+			cifs_stats_inc(&midEntry->server->smb2slowcmd[le16_to_cpu(midEntry->command)]);
+
 		trace_smb3_slow_rsp(le16_to_cpu(midEntry->command),
 			       midEntry->mid, midEntry->pid,
 			       midEntry->when_sent, midEntry->when_received);
-- 
cgit v1.2.3


From f5580d0f8bf60993a5fbc73ee04678070ffbba57 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 8 Aug 2018 09:53:30 -0500
Subject: gfs2: eliminate update_rgrp_lvb_unlinked

Function update_rgrp_lvb_unlinked used to do the same thing as
be32_add_cpu. This patch removes it in favor of using be32_add_cpu
directly.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Andrew Price <anprice@redhat.com>
---
 fs/gfs2/rgrp.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ef50fe9b880a..1ad3256b9cbc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1111,13 +1111,6 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
 	return 1;
 }
 
-static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
-{
-	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
-	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
-	rgl->rl_unlinked = cpu_to_be32(unlinked);
-}
-
 static u32 count_unlinked(struct gfs2_rgrpd *rgd)
 {
 	struct gfs2_bitmap *bi;
@@ -2489,7 +2482,7 @@ void gfs2_unlink_di(struct inode *inode)
 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-	update_rgrp_lvb_unlinked(rgd, 1);
+	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
 }
 
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
@@ -2509,7 +2502,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-	update_rgrp_lvb_unlinked(rgd, -1);
+	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
 
 	gfs2_statfs_change(sdp, 0, +1, -1);
 	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
-- 
cgit v1.2.3


From da33a871ba178dbe81da7d755818d3c2088cae32 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 8 Aug 2018 12:54:09 -0400
Subject: locks: remove misleading obsolete comment

The spinlock handling in this file has changed significantly since this
comment was written, and the file_lock_lock is no more. In addition,
this overall comment no longer applies. Deleting an entry now requires
both locks.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 6138a9bcd924..11a4d698aba8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -202,10 +202,6 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
  * we often hold the flc_lock as well. In certain cases, when reading the fields
  * protected by this lock, we can skip acquiring it iff we already hold the
  * flc_lock.
- *
- * In particular, adding an entry to the fl_block list requires that you hold
- * both the flc_lock and the blocked_lock_lock (acquired in that order).
- * Deleting an entry from the list however only requires the file_lock_lock.
  */
 static DEFINE_SPINLOCK(blocked_lock_lock);
 
-- 
cgit v1.2.3


From 8eb4ecfab03d21146e144b0693ce96839d58202d Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 1 Aug 2018 09:26:16 +1000
Subject: cifs: add SMB2_close_init()/SMB2_close_free()

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.com>
---
 fs/cifs/smb2pdu.c   | 48 ++++++++++++++++++++++++++++++++++--------------
 fs/cifs/smb2proto.h |  3 +++
 2 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 7c0b30321d9a..78c7190f2295 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2444,44 +2444,63 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
+int
+SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+		u64 persistent_fid, u64 volatile_fid)
+{
+	struct smb2_close_req *req;
+	struct kvec *iov = rqst->rq_iov;
+	unsigned int total_len;
+	int rc;
+
+	rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len);
+	if (rc)
+		return rc;
+
+	req->PersistentFileId = persistent_fid;
+	req->VolatileFileId = volatile_fid;
+	iov[0].iov_base = (char *)req;
+	iov[0].iov_len = total_len;
+
+	return 0;
+}
+
+void
+SMB2_close_free(struct smb_rqst *rqst)
+{
+	cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
 int
 SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
 		 u64 persistent_fid, u64 volatile_fid, int flags)
 {
 	struct smb_rqst rqst;
-	struct smb2_close_req *req;
-	struct smb2_close_rsp *rsp;
+	struct smb2_close_rsp *rsp = NULL;
 	struct cifs_ses *ses = tcon->ses;
 	struct kvec iov[1];
 	struct kvec rsp_iov;
 	int resp_buftype;
 	int rc = 0;
-	unsigned int total_len;
 
 	cifs_dbg(FYI, "Close\n");
 
 	if (!ses || !(ses->server))
 		return -EIO;
 
-	rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len);
-	if (rc)
-		return rc;
-
 	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
-	req->PersistentFileId = persistent_fid;
-	req->VolatileFileId = volatile_fid;
-
-	iov[0].iov_base = (char *)req;
-	iov[0].iov_len = total_len;
-
 	memset(&rqst, 0, sizeof(struct smb_rqst));
+	memset(&iov, 0, sizeof(iov));
 	rqst.rq_iov = iov;
 	rqst.rq_nvec = 1;
 
+	rc = SMB2_close_init(tcon, &rqst, persistent_fid, volatile_fid);
+	if (rc)
+		goto close_exit;
+
 	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
-	cifs_small_buf_release(req);
 	rsp = (struct smb2_close_rsp *)rsp_iov.iov_base;
 
 	if (rc != 0) {
@@ -2494,6 +2513,7 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
 	/* BB FIXME - decode close response, update inode for caching */
 
 close_exit:
+	SMB2_close_free(&rqst);
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 19aa483395c7..fdd8c78648c6 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -141,6 +141,9 @@ extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
 			    u64 persistent_fid, u64 volatile_fid, int flags);
+extern int SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+		      u64 persistent_file_id, u64 volatile_file_id);
+extern void SMB2_close_free(struct smb_rqst *rqst);
 extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
-- 
cgit v1.2.3


From 296ecbae7fdf209b1e0fb08b8bd82e5e9b637439 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 1 Aug 2018 09:26:17 +1000
Subject: cifs: add SMB2_query_info_[init|free]()

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.com>
---
 fs/cifs/smb2pdu.c   | 68 ++++++++++++++++++++++++++++++++++++-----------------
 fs/cifs/smb2proto.h |  5 ++++
 2 files changed, 51 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 78c7190f2295..821f21d4ada7 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2580,36 +2580,22 @@ validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
 	return 0;
 }
 
-static int
-query_info(const unsigned int xid, struct cifs_tcon *tcon,
-	   u64 persistent_fid, u64 volatile_fid, u8 info_class, u8 info_type,
-	   u32 additional_info, size_t output_len, size_t min_len, void **data,
-		u32 *dlen)
+int
+SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+		     u64 persistent_fid, u64 volatile_fid,
+		     u8 info_class, u8 info_type, u32 additional_info,
+		     size_t output_len)
 {
-	struct smb_rqst rqst;
 	struct smb2_query_info_req *req;
-	struct smb2_query_info_rsp *rsp = NULL;
-	struct kvec iov[2];
-	struct kvec rsp_iov;
-	int rc = 0;
-	int resp_buftype;
-	struct cifs_ses *ses = tcon->ses;
-	int flags = 0;
+	struct kvec *iov = rqst->rq_iov;
 	unsigned int total_len;
-
-	cifs_dbg(FYI, "Query Info\n");
-
-	if (!ses || !(ses->server))
-		return -EIO;
+	int rc;
 
 	rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req,
 			     &total_len);
 	if (rc)
 		return rc;
 
-	if (smb3_encryption_required(tcon))
-		flags |= CIFS_TRANSFORM_REQ;
-
 	req->InfoType = info_type;
 	req->FileInfoClass = info_class;
 	req->PersistentFileId = persistent_fid;
@@ -2626,13 +2612,50 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
 	iov[0].iov_base = (char *)req;
 	/* 1 for Buffer */
 	iov[0].iov_len = total_len - 1;
+	return 0;
+}
+
+void
+SMB2_query_info_free(struct smb_rqst *rqst)
+{
+	cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
+static int
+query_info(const unsigned int xid, struct cifs_tcon *tcon,
+	   u64 persistent_fid, u64 volatile_fid, u8 info_class, u8 info_type,
+	   u32 additional_info, size_t output_len, size_t min_len, void **data,
+		u32 *dlen)
+{
+	struct smb_rqst rqst;
+	struct smb2_query_info_rsp *rsp = NULL;
+	struct kvec iov[1];
+	struct kvec rsp_iov;
+	int rc = 0;
+	int resp_buftype;
+	struct cifs_ses *ses = tcon->ses;
+	int flags = 0;
+
+	cifs_dbg(FYI, "Query Info\n");
+
+	if (!ses || !(ses->server))
+		return -EIO;
+
+	if (smb3_encryption_required(tcon))
+		flags |= CIFS_TRANSFORM_REQ;
 
 	memset(&rqst, 0, sizeof(struct smb_rqst));
+	memset(&iov, 0, sizeof(iov));
 	rqst.rq_iov = iov;
 	rqst.rq_nvec = 1;
 
+	rc = SMB2_query_info_init(tcon, &rqst, persistent_fid, volatile_fid,
+				  info_class, info_type, additional_info,
+				  output_len);
+	if (rc)
+		goto qinf_exit;
+
 	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
-	cifs_small_buf_release(req);
 	rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
 
 	if (rc) {
@@ -2661,6 +2684,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
 				   &rsp_iov, min_len, *data);
 
 qinf_exit:
+	SMB2_query_info_free(&rqst);
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index fdd8c78648c6..1255cde5133b 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -153,6 +153,11 @@ extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 			   u64 persistent_file_id, u64 volatile_file_id,
 			   struct smb2_file_all_info *data);
+extern int SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+				u64 persistent_fid, u64 volatile_fid,
+				u8 info_class, u8 info_type,
+				u32 additional_info, size_t output_len);
+extern void SMB2_query_info_free(struct smb_rqst *rqst);
 extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
 			   u64 persistent_file_id, u64 volatile_file_id,
 			   void **data, unsigned int *plen);
-- 
cgit v1.2.3


From 1eb9fb52040fc6e5656c277b562229f09467c9f8 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 8 Aug 2018 15:07:46 +1000
Subject: cifs: create SMB2_open_init()/SMB2_open_free() helpers.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2pdu.c   | 145 +++++++++++++++++++++++++++-------------------------
 fs/cifs/smb2proto.h |   4 ++
 2 files changed, 78 insertions(+), 71 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 821f21d4ada7..a5009a1c9bce 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2052,43 +2052,27 @@ err_free_path:
 }
 
 int
-SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
-	  __u8 *oplock, struct smb2_file_all_info *buf,
-	  struct kvec *err_iov, int *buftype)
+SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
+	       struct cifs_open_parms *oparms, __le16 *path)
 {
-	struct smb_rqst rqst;
+	struct TCP_Server_Info *server = tcon->ses->server;
 	struct smb2_create_req *req;
-	struct smb2_create_rsp *rsp;
-	struct TCP_Server_Info *server;
-	struct cifs_tcon *tcon = oparms->tcon;
-	struct cifs_ses *ses = tcon->ses;
-	struct kvec iov[5]; /* make sure at least one for each open context */
-	struct kvec rsp_iov = {NULL, 0};
-	int resp_buftype;
-	int uni_path_len;
-	__le16 *copy_path = NULL;
-	int copy_size;
-	int rc = 0;
 	unsigned int n_iov = 2;
 	__u32 file_attributes = 0;
-	char *dhc_buf = NULL, *lc_buf = NULL, *pc_buf = NULL;
-	int flags = 0;
+	int copy_size;
+	int uni_path_len;
 	unsigned int total_len;
-
-	cifs_dbg(FYI, "create/open\n");
-
-	if (ses && (ses->server))
-		server = ses->server;
-	else
-		return -EIO;
+	struct kvec *iov = rqst->rq_iov;
+	__le16 *copy_path;
+	int rc;
 
 	rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len);
-
 	if (rc)
 		return rc;
 
-	if (smb3_encryption_required(tcon))
-		flags |= CIFS_TRANSFORM_REQ;
+	iov[0].iov_base = (char *)req;
+	/* -1 since last byte is buf[0] which is sent below (path) */
+	iov[0].iov_len = total_len - 1;
 
 	if (oparms->create_options & CREATE_OPTION_READONLY)
 		file_attributes |= ATTR_READONLY;
@@ -2102,11 +2086,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	req->ShareAccess = FILE_SHARE_ALL_LE;
 	req->CreateDisposition = cpu_to_le32(oparms->disposition);
 	req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
-
-	iov[0].iov_base = (char *)req;
-	/* -1 since last byte is buf[0] which is sent below (path) */
-	iov[0].iov_len = total_len - 1;
-
 	req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req));
 
 	/* [MS-SMB2] 2.2.13 NameOffset:
@@ -2124,10 +2103,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
 						 &name_len,
 						 tcon->treeName, path);
-		if (rc) {
-			cifs_small_buf_release(req);
+		if (rc)
 			return rc;
-		}
 		req->NameLength = cpu_to_le16(name_len * 2);
 		uni_path_len = copy_size;
 		path = copy_path;
@@ -2135,18 +2112,16 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
 		/* MUST set path len (NameLength) to 0 opening root of share */
 		req->NameLength = cpu_to_le16(uni_path_len - 2);
-		if (uni_path_len % 8 != 0) {
-			copy_size = roundup(uni_path_len, 8);
-			copy_path = kzalloc(copy_size, GFP_KERNEL);
-			if (!copy_path) {
-				cifs_small_buf_release(req);
-				return -ENOMEM;
-			}
-			memcpy((char *)copy_path, (const char *)path,
-			       uni_path_len);
-			uni_path_len = copy_size;
-			path = copy_path;
-		}
+		copy_size = uni_path_len;
+		if (copy_size % 8 != 0)
+			copy_size = roundup(copy_size, 8);
+		copy_path = kzalloc(copy_size, GFP_KERNEL);
+		if (!copy_path)
+			return -ENOMEM;
+		memcpy((char *)copy_path, (const char *)path,
+		       uni_path_len);
+		uni_path_len = copy_size;
+		path = copy_path;
 	}
 
 	iov[1].iov_len = uni_path_len;
@@ -2161,12 +2136,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	else {
 		rc = add_lease_context(server, iov, &n_iov,
 				       oparms->fid->lease_key, oplock);
-		if (rc) {
-			cifs_small_buf_release(req);
-			kfree(copy_path);
+		if (rc)
 			return rc;
-		}
-		lc_buf = iov[n_iov-1].iov_base;
 	}
 
 	if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) {
@@ -2180,13 +2151,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 
 		rc = add_durable_context(iov, &n_iov, oparms,
 					tcon->use_persistent);
-		if (rc) {
-			cifs_small_buf_release(req);
-			kfree(copy_path);
-			kfree(lc_buf);
+		if (rc)
 			return rc;
-		}
-		dhc_buf = iov[n_iov-1].iov_base;
 	}
 
 	if (tcon->posix_extensions) {
@@ -2198,23 +2164,63 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		}
 
 		rc = add_posix_context(iov, &n_iov, oparms->mode);
-		if (rc) {
-			cifs_small_buf_release(req);
-			kfree(copy_path);
-			kfree(lc_buf);
-			kfree(dhc_buf);
+		if (rc)
 			return rc;
-		}
-		pc_buf = iov[n_iov-1].iov_base;
 	}
 
+	rqst->rq_nvec = n_iov;
+	return 0;
+}
+
+/* rq_iov[0] is the request and is released by cifs_small_buf_release().
+ * All other vectors are freed by kfree().
+ */
+void
+SMB2_open_free(struct smb_rqst *rqst)
+{
+	int i;
+
+	cifs_small_buf_release(rqst->rq_iov[0].iov_base);
+	for (i = 1; i < rqst->rq_nvec; i++)
+		kfree(rqst->rq_iov[i].iov_base);
+}
+
+int
+SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
+	  __u8 *oplock, struct smb2_file_all_info *buf,
+	  struct kvec *err_iov, int *buftype)
+{
+	struct smb_rqst rqst;
+	struct smb2_create_rsp *rsp = NULL;
+	struct TCP_Server_Info *server;
+	struct cifs_tcon *tcon = oparms->tcon;
+	struct cifs_ses *ses = tcon->ses;
+	struct kvec iov[5]; /* make sure at least one for each open context */
+	struct kvec rsp_iov = {NULL, 0};
+	int resp_buftype;
+	int rc = 0;
+	int flags = 0;
+
+	cifs_dbg(FYI, "create/open\n");
+	if (ses && (ses->server))
+		server = ses->server;
+	else
+		return -EIO;
+
+	if (smb3_encryption_required(tcon))
+		flags |= CIFS_TRANSFORM_REQ;
+
 	memset(&rqst, 0, sizeof(struct smb_rqst));
+	memset(&iov, 0, sizeof(iov));
 	rqst.rq_iov = iov;
-	rqst.rq_nvec = n_iov;
+	rqst.rq_nvec = 5;
+
+	rc = SMB2_open_init(tcon, &rqst, oplock, oparms, path);
+	if (rc)
+		goto creat_exit;
 
 	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
 			    &rsp_iov);
-	cifs_small_buf_release(req);
 	rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
 
 	if (rc != 0) {
@@ -2251,10 +2257,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	else
 		*oplock = rsp->OplockLevel;
 creat_exit:
-	kfree(copy_path);
-	kfree(lc_buf);
-	kfree(dhc_buf);
-	kfree(pc_buf);
+	SMB2_open_free(&rqst);
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 1255cde5133b..57bdd2711974 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -133,6 +133,10 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
 		     __le16 *path, __u8 *oplock,
 		     struct smb2_file_all_info *buf,
 		     struct kvec *err_iov, int *resp_buftype);
+extern int SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+			  __u8 *oplock, struct cifs_open_parms *oparms,
+			  __le16 *path);
+extern void SMB2_open_free(struct smb_rqst *rqst);
 extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
 		     u64 persistent_fid, u64 volatile_fid, u32 opcode,
 		     bool is_fsctl, char *in_data, u32 indatalen,
-- 
cgit v1.2.3


From b24df3e30cbf48255db866720fb71f14bf9d2f39 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 8 Aug 2018 15:07:45 +1000
Subject: cifs: update receive_encrypted_standard to handle compounded
 responses

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsglob.h  |  5 +++-
 fs/cifs/connect.c   | 82 ++++++++++++++++++++++++++++++++---------------------
 fs/cifs/smb2ops.c   | 61 +++++++++++++++++++++++++++++++++------
 fs/cifs/transport.c |  2 --
 4 files changed, 107 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 41803d374da0..0c9ab62c3df4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -76,6 +76,9 @@
 #define SMB_ECHO_INTERVAL_MAX 600
 #define SMB_ECHO_INTERVAL_DEFAULT 60
 
+/* maximum number of PDUs in one compound */
+#define MAX_COMPOUND 5
+
 /*
  * Default number of credits to keep available for SMB3.
  * This value is chosen somewhat arbitrarily. The Windows client
@@ -458,7 +461,7 @@ struct smb_version_operations {
 				 struct smb_rqst *, struct smb_rqst *);
 	int (*is_transform_hdr)(void *buf);
 	int (*receive_transform)(struct TCP_Server_Info *,
-				 struct mid_q_entry **);
+				 struct mid_q_entry **, char **, int *);
 	enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
 			    enum securityEnum);
 	int (*next_header)(char *);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d9bd10d295a9..c832a8a1970a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -850,13 +850,14 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 static int
 cifs_demultiplex_thread(void *p)
 {
-	int length;
+	int i, num_mids, length;
 	struct TCP_Server_Info *server = p;
 	unsigned int pdu_length;
 	unsigned int next_offset;
 	char *buf = NULL;
 	struct task_struct *task_to_wake = NULL;
-	struct mid_q_entry *mid_entry;
+	struct mid_q_entry *mids[MAX_COMPOUND];
+	char *bufs[MAX_COMPOUND];
 
 	current->flags |= PF_MEMALLOC;
 	cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current));
@@ -923,58 +924,75 @@ next_pdu:
 				server->pdu_size = next_offset;
 		}
 
-		mid_entry = NULL;
+		memset(mids, 0, sizeof(mids));
+		memset(bufs, 0, sizeof(bufs));
+		num_mids = 0;
+
 		if (server->ops->is_transform_hdr &&
 		    server->ops->receive_transform &&
 		    server->ops->is_transform_hdr(buf)) {
 			length = server->ops->receive_transform(server,
-								&mid_entry);
+								mids,
+								bufs,
+								&num_mids);
 		} else {
-			mid_entry = server->ops->find_mid(server, buf);
+			mids[0] = server->ops->find_mid(server, buf);
+			bufs[0] = buf;
+			if (mids[0])
+				num_mids = 1;
 
-			if (!mid_entry || !mid_entry->receive)
-				length = standard_receive3(server, mid_entry);
+			if (!mids[0] || !mids[0]->receive)
+				length = standard_receive3(server, mids[0]);
 			else
-				length = mid_entry->receive(server, mid_entry);
+				length = mids[0]->receive(server, mids[0]);
 		}
 
 		if (length < 0) {
-			if (mid_entry)
-				cifs_mid_q_entry_release(mid_entry);
+			for (i = 0; i < num_mids; i++)
+				if (mids[i])
+					cifs_mid_q_entry_release(mids[i]);
 			continue;
 		}
 
 		if (server->large_buf)
 			buf = server->bigbuf;
 
+
 		server->lstrp = jiffies;
-		if (mid_entry != NULL) {
-			mid_entry->resp_buf_size = server->pdu_size;
-			if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) &&
-			     mid_entry->mid_state == MID_RESPONSE_RECEIVED &&
-					server->ops->handle_cancelled_mid)
-				server->ops->handle_cancelled_mid(
-							mid_entry->resp_buf,
-							server);
 
-			if (!mid_entry->multiRsp || mid_entry->multiEnd)
-				mid_entry->callback(mid_entry);
+		for (i = 0; i < num_mids; i++) {
+			if (mids[i] != NULL) {
+				mids[i]->resp_buf_size = server->pdu_size;
+				if ((mids[i]->mid_flags & MID_WAIT_CANCELLED) &&
+				    mids[i]->mid_state == MID_RESPONSE_RECEIVED &&
+				    server->ops->handle_cancelled_mid)
+					server->ops->handle_cancelled_mid(
+							mids[i]->resp_buf,
+							server);
 
-			cifs_mid_q_entry_release(mid_entry);
-		} else if (server->ops->is_oplock_break &&
-			   server->ops->is_oplock_break(buf, server)) {
-			cifs_dbg(FYI, "Received oplock break\n");
-		} else {
-			cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
-				 atomic_read(&midCount));
-			cifs_dump_mem("Received Data is: ", buf,
-				      HEADER_SIZE(server));
+				if (!mids[i]->multiRsp || mids[i]->multiEnd)
+					mids[i]->callback(mids[i]);
+
+				cifs_mid_q_entry_release(mids[i]);
+			} else if (server->ops->is_oplock_break &&
+				   server->ops->is_oplock_break(bufs[i],
+								server)) {
+				cifs_dbg(FYI, "Received oplock break\n");
+			} else {
+				cifs_dbg(VFS, "No task to wake, unknown frame "
+					 "received! NumMids %d\n",
+					 atomic_read(&midCount));
+				cifs_dump_mem("Received Data is: ", bufs[i],
+					      HEADER_SIZE(server));
 #ifdef CONFIG_CIFS_DEBUG2
-			if (server->ops->dump_detail)
-				server->ops->dump_detail(buf, server);
-			cifs_dump_mids(server);
+				if (server->ops->dump_detail)
+					server->ops->dump_detail(bufs[i],
+								 server);
+				cifs_dump_mids(server);
 #endif /* CIFS_DEBUG2 */
+			}
 		}
+
 		if (pdu_length > server->pdu_size) {
 			if (!allocate_buffers(server))
 				continue;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ebc13ebebddf..d23715062c8e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2942,13 +2942,20 @@ discard_data:
 
 static int
 receive_encrypted_standard(struct TCP_Server_Info *server,
-			   struct mid_q_entry **mid)
+			   struct mid_q_entry **mids, char **bufs,
+			   int *num_mids)
 {
-	int length;
+	int ret, length;
 	char *buf = server->smallbuf;
+	char *tmpbuf;
+	struct smb2_sync_hdr *shdr;
 	unsigned int pdu_length = server->pdu_size;
 	unsigned int buf_size;
 	struct mid_q_entry *mid_entry;
+	int next_is_large;
+	char *next_buffer = NULL;
+
+	*num_mids = 0;
 
 	/* switch to large buffer if too big for a small one */
 	if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE) {
@@ -2969,24 +2976,61 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
 	if (length)
 		return length;
 
+	next_is_large = server->large_buf;
+ one_more:
+	shdr = (struct smb2_sync_hdr *)buf;
+	if (shdr->NextCommand) {
+		if (next_is_large) {
+			tmpbuf = server->bigbuf;
+			next_buffer = (char *)cifs_buf_get();
+		} else {
+			tmpbuf = server->smallbuf;
+			next_buffer = (char *)cifs_small_buf_get();
+		}
+		memcpy(next_buffer,
+		       tmpbuf + le32_to_cpu(shdr->NextCommand),
+		       pdu_length - le32_to_cpu(shdr->NextCommand));
+	}
+
 	mid_entry = smb2_find_mid(server, buf);
 	if (mid_entry == NULL)
 		cifs_dbg(FYI, "mid not found\n");
 	else {
 		cifs_dbg(FYI, "mid found\n");
 		mid_entry->decrypted = true;
+		mid_entry->resp_buf_size = server->pdu_size;
 	}
 
-	*mid = mid_entry;
+	if (*num_mids >= MAX_COMPOUND) {
+		cifs_dbg(VFS, "too many PDUs in compound\n");
+		return -1;
+	}
+	bufs[*num_mids] = buf;
+	mids[(*num_mids)++] = mid_entry;
 
 	if (mid_entry && mid_entry->handle)
-		return mid_entry->handle(server, mid_entry);
+		ret = mid_entry->handle(server, mid_entry);
+	else
+		ret = cifs_handle_standard(server, mid_entry);
+
+	if (ret == 0 && shdr->NextCommand) {
+		pdu_length -= le32_to_cpu(shdr->NextCommand);
+		server->large_buf = next_is_large;
+		if (next_is_large)
+			server->bigbuf = next_buffer;
+		else
+			server->smallbuf = next_buffer;
+
+		buf += le32_to_cpu(shdr->NextCommand);
+		goto one_more;
+	}
 
-	return cifs_handle_standard(server, mid_entry);
+	return ret;
 }
 
 static int
-smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
+smb3_receive_transform(struct TCP_Server_Info *server,
+		       struct mid_q_entry **mids, char **bufs, int *num_mids)
 {
 	char *buf = server->smallbuf;
 	unsigned int pdu_length = server->pdu_size;
@@ -3009,10 +3053,11 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 		return -ECONNABORTED;
 	}
 
+	/* TODO: add support for compounds containing READ. */
 	if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
-		return receive_encrypted_read(server, mid);
+		return receive_encrypted_read(server, &mids[0]);
 
-	return receive_encrypted_standard(server, mid);
+	return receive_encrypted_standard(server, mids, bufs, num_mids);
 }
 
 int
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c53c0908d4c6..78f96fa3d7d9 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -383,8 +383,6 @@ smbd_done:
 	return rc;
 }
 
-#define MAX_COMPOUND 5
-
 static int
 smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 	      struct smb_rqst *rqst, int flags)
-- 
cgit v1.2.3


From 730928c8f4be88e9d6a027a16b1e8fa9c59fc077 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 8 Aug 2018 15:07:49 +1000
Subject: cifs: update smb2_queryfs() to use compounding

Change smb2_queryfs() to use a Create/QueryInfo/Close compound request.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2ops.c   | 106 +++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/cifs/smb2pdu.c   |  41 ++++++++++----------
 fs/cifs/smb2pdu.h   |   4 ++
 fs/cifs/smb2proto.h |   6 +++
 4 files changed, 131 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d23715062c8e..15c7cbde2f39 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1524,15 +1524,66 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 				 CIFS_CACHE_READ(cinode) ? 1 : 0);
 }
 
+static void
+smb2_set_related(struct smb_rqst *rqst)
+{
+	struct smb2_sync_hdr *shdr;
+
+	shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base);
+	shdr->Flags |= SMB2_FLAGS_RELATED_OPERATIONS;
+}
+
+char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0};
+
+static void
+smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+{
+	struct smb2_sync_hdr *shdr;
+	unsigned long len = smb_rqst_len(server, rqst);
+
+	/* SMB headers in a compound are 8 byte aligned. */
+	if (len & 7) {
+		rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding;
+		rqst->rq_iov[rqst->rq_nvec].iov_len = 8 - (len & 7);
+		rqst->rq_nvec++;
+		len = smb_rqst_len(server, rqst);
+	}
+
+	shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base);
+	shdr->NextCommand = cpu_to_le32(len);
+}
+
 static int
 smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	     struct kstatfs *buf)
 {
-	int rc;
+	struct smb2_query_info_rsp *rsp;
+	struct smb2_fs_full_size_info *info = NULL;
+	struct smb_rqst rqst[3];
+	int resp_buftype[3];
+	struct kvec rsp_iov[3];
+	struct kvec open_iov[5]; /* 4 + potential padding. */
+	struct kvec qi_iov[1];
+	struct kvec close_iov[1];
+	struct cifs_ses *ses = tcon->ses;
+	struct TCP_Server_Info *server = ses->server;
 	__le16 srch_path = 0; /* Null - open root of share */
 	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	struct cifs_open_parms oparms;
 	struct cifs_fid fid;
+	int flags = 0;
+	int rc;
+
+	if (smb3_encryption_required(tcon))
+		flags |= CIFS_TRANSFORM_REQ;
+
+	memset(rqst, 0, sizeof(rqst));
+	memset(resp_buftype, 0, sizeof(resp_buftype));
+	memset(rsp_iov, 0, sizeof(rsp_iov));
+
+	memset(&open_iov, 0, sizeof(open_iov));
+	rqst[0].rq_iov = open_iov;
+	rqst[0].rq_nvec = 4;
 
 	oparms.tcon = tcon;
 	oparms.desired_access = FILE_READ_ATTRIBUTES;
@@ -1541,13 +1592,56 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	oparms.fid = &fid;
 	oparms.reconnect = false;
 
-	rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL);
+	rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, &srch_path);
 	if (rc)
-		return rc;
+		goto qfs_exit;
+	smb2_set_next_command(server, &rqst[0]);
+
+	memset(&qi_iov, 0, sizeof(qi_iov));
+	rqst[1].rq_iov = qi_iov;
+	rqst[1].rq_nvec = 1;
+
+	rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
+				  FS_FULL_SIZE_INFORMATION,
+				  SMB2_O_INFO_FILESYSTEM, 0,
+				  sizeof(struct smb2_fs_full_size_info));
+	if (rc)
+		goto qfs_exit;
+	smb2_set_next_command(server, &rqst[1]);
+	smb2_set_related(&rqst[1]);
+
+	memset(&close_iov, 0, sizeof(close_iov));
+	rqst[2].rq_iov = close_iov;
+	rqst[2].rq_nvec = 1;
+
+	rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID);
+	if (rc)
+		goto qfs_exit;
+	smb2_set_related(&rqst[2]);
+
+	rc = compound_send_recv(xid, ses, flags, 3, rqst,
+				resp_buftype, rsp_iov);
+	if (rc)
+		goto qfs_exit;
+
+	rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
 	buf->f_type = SMB2_MAGIC_NUMBER;
-	rc = SMB2_QFS_info(xid, tcon, fid.persistent_fid, fid.volatile_fid,
-			   buf);
-	SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+	info = (struct smb2_fs_full_size_info *)(
+		le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+	rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+			       le32_to_cpu(rsp->OutputBufferLength),
+			       &rsp_iov[1],
+			       sizeof(struct smb2_fs_full_size_info));
+	if (!rc)
+		smb2_copy_fs_info_to_kstatfs(info, buf);
+
+qfs_exit:
+	SMB2_open_free(&rqst[0]);
+	SMB2_query_info_free(&rqst[1]);
+	SMB2_close_free(&rqst[2]);
+	free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+	free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+	free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
 	return rc;
 }
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a5009a1c9bce..faf2b779e5cb 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -80,7 +80,7 @@ static const int smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
 	/* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */
 };
 
-static int smb3_encryption_required(const struct cifs_tcon *tcon)
+int smb3_encryption_required(const struct cifs_tcon *tcon)
 {
 	if (!tcon)
 		return 0;
@@ -2182,7 +2182,8 @@ SMB2_open_free(struct smb_rqst *rqst)
 
 	cifs_small_buf_release(rqst->rq_iov[0].iov_base);
 	for (i = 1; i < rqst->rq_nvec; i++)
-		kfree(rqst->rq_iov[i].iov_base);
+		if (rqst->rq_iov[i].iov_base != smb2_padding)
+			kfree(rqst->rq_iov[i].iov_base);
 }
 
 int
@@ -2528,9 +2529,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 	return SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0);
 }
 
-static int
-validate_iov(unsigned int offset, unsigned int buffer_length,
-	     struct kvec *iov, unsigned int min_buf_size)
+int
+smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
+		  struct kvec *iov, unsigned int min_buf_size)
 {
 	unsigned int smb_len = iov->iov_len;
 	char *end_of_smb = smb_len + (char *)iov->iov_base;
@@ -2574,7 +2575,7 @@ validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
 	if (!data)
 		return -EINVAL;
 
-	rc = validate_iov(offset, buffer_length, iov, minbufsize);
+	rc = smb2_validate_iov(offset, buffer_length, iov, minbufsize);
 	if (rc)
 		return rc;
 
@@ -3646,9 +3647,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
 		goto qdir_exit;
 	}
 
-	rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
-			  le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
-			  info_buf_size);
+	rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+			       le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
+			       info_buf_size);
 	if (rc)
 		goto qdir_exit;
 
@@ -3950,9 +3951,9 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
-static void
-copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
-			struct kstatfs *kst)
+void
+smb2_copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
+			     struct kstatfs *kst)
 {
 	kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
 			  le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
@@ -4054,9 +4055,9 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 	info = (FILE_SYSTEM_POSIX_INFO *)(
 		le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
-	rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
-			  le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
-			  sizeof(FILE_SYSTEM_POSIX_INFO));
+	rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+			       le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
+			       sizeof(FILE_SYSTEM_POSIX_INFO));
 	if (!rc)
 		copy_posix_fs_info_to_kstatfs(info, fsdata);
 
@@ -4102,11 +4103,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 	info = (struct smb2_fs_full_size_info *)(
 		le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
-	rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
-			  le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
-			  sizeof(struct smb2_fs_full_size_info));
+	rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+			       le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
+			       sizeof(struct smb2_fs_full_size_info));
 	if (!rc)
-		copy_fs_info_to_kstatfs(info, fsdata);
+		smb2_copy_fs_info_to_kstatfs(info, fsdata);
 
 qfsinf_exit:
 	free_rsp_buf(resp_buftype, rsp_iov.iov_base);
@@ -4166,7 +4167,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rsp_len = le32_to_cpu(rsp->OutputBufferLength);
 	offset = le16_to_cpu(rsp->OutputBufferOffset);
-	rc = validate_iov(offset, rsp_len, &rsp_iov, min_len);
+	rc = smb2_validate_iov(offset, rsp_len, &rsp_iov, min_len);
 	if (rc)
 		goto qfsattr_exit;
 
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index ecb0feeac844..cf37c2f3f3b8 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -153,6 +153,8 @@ struct smb2_transform_hdr {
  *
  */
 
+#define COMPOUND_FID 0xFFFFFFFFFFFFFFFFULL
+
 #define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
 
 struct smb2_err_rsp {
@@ -1373,4 +1375,6 @@ struct smb2_file_eof_info { /* encoding of request for level 10 */
 	__le64 EndOfFile; /* new end of file value */
 } __packed; /* level 20 Set */
 
+extern char smb2_padding[7];
+
 #endif				/* _SMB2PDU_H */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 57bdd2711974..b4076577eeb7 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -229,6 +229,12 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
 
 extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
 					enum securityEnum);
+extern int smb3_encryption_required(const struct cifs_tcon *tcon);
+extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
+			     struct kvec *iov, unsigned int min_buf_size);
+extern void smb2_copy_fs_info_to_kstatfs(
+	 struct smb2_fs_full_size_info *pfs_inf,
+	 struct kstatfs *kst);
 extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
 extern int smb311_update_preauth_hash(struct cifs_ses *ses,
 				      struct kvec *iov, int nvec);
-- 
cgit v1.2.3


From e02789a53d71334b067ad72eee5d4e88a0158083 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 9 Aug 2018 14:33:12 -0500
Subject: smb3: enumerating snapshots was leaving part of the data off end

When enumerating snapshots, the last few bytes of the final
snapshot could be left off since we were miscalculating the
length returned (leaving off the sizeof struct SRV_SNAPSHOT_ARRAY)
See MS-SMB2 section 2.2.32.2. In addition fixup the length used
to allow smaller buffer to be passed in, in order to allow
returning the size of the whole snapshot array more easily.

Sample userspace output with a kernel patched with this
(mounted to a Windows volume with two snapshots).
Before this patch, the second snapshot would be missing a
few bytes at the end.

~/cifs-2.6# ~/enum-snapshots /mnt/file
press enter to issue the ioctl to retrieve snapshot information ...

size of snapshot array = 102
Num snapshots: 2 Num returned: 2 Array Size: 102

Snapshot 0:@GMT-2018.06.30-19.34.17
Snapshot 1:@GMT-2018.06.30-19.33.37

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2ops.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 15c7cbde2f39..7869ea4f6fab 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1369,6 +1369,13 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
 
 }
 
+/* GMT Token is @GMT-YYYY.MM.DD-HH.MM.SS Unicode which is 48 bytes + null */
+#define GMT_TOKEN_SIZE 50
+
+/*
+ * Input buffer contains (empty) struct smb_snapshot array with size filled in
+ * For output see struct SRV_SNAPSHOT_ARRAY in MS-SMB2 section 2.2.32.2
+ */
 static int
 smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
 		   struct cifsFileInfo *cfile, void __user *ioc_buf)
@@ -1398,14 +1405,27 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
 			kfree(retbuf);
 			return rc;
 		}
-		if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) {
-			rc = -ERANGE;
-			kfree(retbuf);
-			return rc;
-		}
 
-		if (ret_data_len > snapshot_in.snapshot_array_size)
-			ret_data_len = snapshot_in.snapshot_array_size;
+		/*
+		 * Check for min size, ie not large enough to fit even one GMT
+		 * token (snapshot).  On the first ioctl some users may pass in
+		 * smaller size (or zero) to simply get the size of the array
+		 * so the user space caller can allocate sufficient memory
+		 * and retry the ioctl again with larger array size sufficient
+		 * to hold all of the snapshot GMT tokens on the second try.
+		 */
+		if (snapshot_in.snapshot_array_size < GMT_TOKEN_SIZE)
+			ret_data_len = sizeof(struct smb_snapshot_array);
+
+		/*
+		 * We return struct SRV_SNAPSHOT_ARRAY, followed by
+		 * the snapshot array (of 50 byte GMT tokens) each
+		 * representing an available previous version of the data
+		 */
+		if (ret_data_len > (snapshot_in.snapshot_array_size +
+					sizeof(struct smb_snapshot_array)))
+			ret_data_len = snapshot_in.snapshot_array_size +
+					sizeof(struct smb_snapshot_array);
 
 		if (copy_to_user(ioc_buf, retbuf, ret_data_len))
 			rc = -EFAULT;
-- 
cgit v1.2.3


From c1777df1a5d541cda918ff0450c8adcc8b69c2fd Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Fri, 10 Aug 2018 11:03:55 +1000
Subject: cifs: add missing support for ACLs in SMB 3.11

We were missing the methods for get_acl and friends for the 3.11
dialect.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/smb2ops.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 7869ea4f6fab..541258447c4c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3586,6 +3586,11 @@ struct smb_version_operations smb311_operations = {
 	.query_all_EAs = smb2_query_eas,
 	.set_EA = smb2_set_ea,
 #endif /* CIFS_XATTR */
+#ifdef CONFIG_CIFS_ACL
+	.get_acl = get_smb2_acl,
+	.get_acl_by_fid = get_smb2_acl_by_fid,
+	.set_acl = set_smb2_acl,
+#endif /* CIFS_ACL */
 	.next_header = smb2_next_header,
 };
 
-- 
cgit v1.2.3


From e55954a5f7ce0e321cea9f91f4ebeb2a4e0165f4 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Fri, 10 Aug 2018 11:31:10 +1000
Subject: cifs: don't show domain= in mount output when domain is empty

Reported-by: Xiaoli Feng <xifeng@redhat.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index de16078e456b..7065426b3280 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -432,7 +432,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	else if (tcon->ses->user_name)
 		seq_show_option(s, "username", tcon->ses->user_name);
 
-	if (tcon->ses->domainName)
+	if (tcon->ses->domainName && tcon->ses->domainName[0] != 0)
 		seq_show_option(s, "domain", tcon->ses->domainName);
 
 	if (srcaddr->sa_family != AF_UNSPEC) {
-- 
cgit v1.2.3


From cdeaf9d04a5a0f51220d2f025385c553220bfb5c Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 10 Aug 2018 02:25:06 -0500
Subject: smb3: allow previous versions to be mounted with snapshot= mount parm

mounting with the "snapshots=" mount parm allows a read-only
view of a previous version of a file system (see MS-SMB2
and "timewarp" tokens, section 2.2.13.2.6) based on the timestamp
passed in on the snapshots mount parm.

Add processing to optionally send this create context.

Example output:

/mnt1 is mounted with "snapshots=..." and will see an earlier
version of the directory, with three fewer files than /mnt2
the current version of the directory.

root@Ubuntu-17-Virtual-Machine:~/cifs-2.6# cat /proc/mounts | grep cifs
//172.22.149.186/public /mnt1 cifs
ro,relatime,vers=default,cache=strict,username=smfrench,uid=0,noforceuid,gid=0,noforcegid,addr=172.22.149.186,file_mode=0755,dir_mode=0755,soft,nounix,mapposix,rsize=1048576,wsize=1048576,echo_interval=60,snapshot=131748608570000000,actimeo=1

//172.22.149.186/public /mnt2 cifs
rw,relatime,vers=default,cache=strict,username=smfrench,uid=0,noforceuid,gid=0,noforcegid,addr=172.22.149.186,file_mode=0755,dir_mode=0755,soft,nounix,mapposix,rsize=1048576,wsize=1048576,echo_interval=60,actimeo=1

root@Ubuntu-17-Virtual-Machine:~/cifs-2.6# ls /mnt1
EmptyDir  newerdir
root@Ubuntu-17-Virtual-Machine:~/cifs-2.6# ls /mnt1/newerdir

root@Ubuntu-17-Virtual-Machine:~/cifs-2.6# ls /mnt2
EmptyDir  file  newerdir  newestdir  timestamp-trace.cap
root@Ubuntu-17-Virtual-Machine:~/cifs-2.6# ls /mnt2/newerdir
new-file-not-in-snapshot

Snapshots are extremely useful for comparing previous versions of files or directories,
and recovering from data corruptions or mistakes.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2pdu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h |  8 ++++++++
 2 files changed, 68 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index faf2b779e5cb..2f1938011395 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1856,6 +1856,51 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
 	return 0;
 }
 
+/* See MS-SMB2 2.2.13.2.7 */
+static struct crt_twarp_ctxt *
+create_twarp_buf(__u64 timewarp)
+{
+	struct crt_twarp_ctxt *buf;
+
+	buf = kzalloc(sizeof(struct crt_twarp_ctxt), GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	buf->ccontext.DataOffset = cpu_to_le16(offsetof
+					(struct crt_twarp_ctxt, Timestamp));
+	buf->ccontext.DataLength = cpu_to_le32(8);
+	buf->ccontext.NameOffset = cpu_to_le16(offsetof
+				(struct crt_twarp_ctxt, Name));
+	buf->ccontext.NameLength = cpu_to_le16(4);
+	/* SMB2_CREATE_TIMEWARP_TOKEN is "TWrp" */
+	buf->Name[0] = 'T';
+	buf->Name[1] = 'W';
+	buf->Name[2] = 'r';
+	buf->Name[3] = 'p';
+	buf->Timestamp = cpu_to_le64(timewarp);
+	return buf;
+}
+
+/* See MS-SMB2 2.2.13.2.7 */
+static int
+add_twarp_context(struct kvec *iov, unsigned int *num_iovec, __u64 timewarp)
+{
+	struct smb2_create_req *req = iov[0].iov_base;
+	unsigned int num = *num_iovec;
+
+	iov[num].iov_base = create_twarp_buf(timewarp);
+	if (iov[num].iov_base == NULL)
+		return -ENOMEM;
+	iov[num].iov_len = sizeof(struct crt_twarp_ctxt);
+	if (!req->CreateContextsOffset)
+		req->CreateContextsOffset = cpu_to_le32(
+				sizeof(struct smb2_create_req) +
+				iov[num - 1].iov_len);
+	le32_add_cpu(&req->CreateContextsLength, sizeof(struct crt_twarp_ctxt));
+	*num_iovec = num + 1;
+	return 0;
+}
+
 static int
 alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
 			    const char *treename, const __le16 *path)
@@ -2168,6 +2213,21 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
 			return rc;
 	}
 
+	if (tcon->snapshot_time) {
+		cifs_dbg(FYI, "adding snapshot context\n");
+		if (n_iov > 2) {
+			struct create_context *ccontext =
+			    (struct create_context *)iov[n_iov-1].iov_base;
+			ccontext->Next =
+				cpu_to_le32(iov[n_iov-1].iov_len);
+		}
+
+		rc = add_twarp_context(iov, &n_iov, tcon->snapshot_time);
+		if (rc)
+			return rc;
+	}
+
+
 	rqst->rq_nvec = n_iov;
 	return 0;
 }
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index cf37c2f3f3b8..a2eeae9e0432 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -767,6 +767,14 @@ struct create_durable_handle_reconnect_v2 {
 	struct durable_reconnect_context_v2 dcontext;
 } __packed;
 
+/* See MS-SMB2 2.2.13.2.5 */
+struct crt_twarp_ctxt {
+	struct create_context ccontext;
+	__u8	Name[8];
+	__le64	Timestamp;
+
+} __packed;
+
 #define COPY_CHUNK_RES_KEY_SIZE	24
 struct resume_key_req {
 	char ResumeKey[COPY_CHUNK_RES_KEY_SIZE];
-- 
cgit v1.2.3


From f9ed6debca45dd9bcc02d77c98822d50aba342f4 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 9 Aug 2018 22:42:53 -0700
Subject: xfs: repair the AGF

Regenerate the AGF from the rmap data.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/agheader_repair.c | 379 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/repair.c          |  27 ++-
 fs/xfs/scrub/repair.h          |   2 +
 fs/xfs/scrub/scrub.c           |   2 +-
 4 files changed, 400 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 1e96621ece3a..aa180492a4a5 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -17,12 +17,19 @@
 #include "xfs_sb.h"
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
 #include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
 #include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
 
 /* Superblock */
 
@@ -54,3 +61,375 @@ xrep_superblock(
 	xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
 	return error;
 }
+
+/* AGF */
+
+struct xrep_agf_allocbt {
+	struct xfs_scrub	*sc;
+	xfs_agblock_t		freeblks;
+	xfs_agblock_t		longest;
+};
+
+/* Record free space shape information. */
+STATIC int
+xrep_agf_walk_allocbt(
+	struct xfs_btree_cur		*cur,
+	struct xfs_alloc_rec_incore	*rec,
+	void				*priv)
+{
+	struct xrep_agf_allocbt		*raa = priv;
+	int				error = 0;
+
+	if (xchk_should_terminate(raa->sc, &error))
+		return error;
+
+	raa->freeblks += rec->ar_blockcount;
+	if (rec->ar_blockcount > raa->longest)
+		raa->longest = rec->ar_blockcount;
+	return error;
+}
+
+/* Does this AGFL block look sane? */
+STATIC int
+xrep_agf_check_agfl_block(
+	struct xfs_mount	*mp,
+	xfs_agblock_t		agbno,
+	void			*priv)
+{
+	struct xfs_scrub	*sc = priv;
+
+	if (!xfs_verify_agbno(mp, sc->sa.agno, agbno))
+		return -EFSCORRUPTED;
+	return 0;
+}
+
+/*
+ * Offset within the xrep_find_ag_btree array for each btree type.  Avoid the
+ * XFS_BTNUM_ names here to avoid creating a sparse array.
+ */
+enum {
+	XREP_AGF_BNOBT = 0,
+	XREP_AGF_CNTBT,
+	XREP_AGF_RMAPBT,
+	XREP_AGF_REFCOUNTBT,
+	XREP_AGF_END,
+	XREP_AGF_MAX
+};
+
+/* Check a btree root candidate. */
+static inline bool
+xrep_check_btree_root(
+	struct xfs_scrub		*sc,
+	struct xrep_find_ag_btree	*fab)
+{
+	struct xfs_mount		*mp = sc->mp;
+	xfs_agnumber_t			agno = sc->sm->sm_agno;
+
+	return xfs_verify_agbno(mp, agno, fab->root) &&
+	       fab->height <= XFS_BTREE_MAXLEVELS;
+}
+
+/*
+ * Given the btree roots described by *fab, find the roots, check them for
+ * sanity, and pass the root data back out via *fab.
+ *
+ * This is /also/ a chicken and egg problem because we have to use the rmapbt
+ * (rooted in the AGF) to find the btrees rooted in the AGF.  We also have no
+ * idea if the btrees make any sense.  If we hit obvious corruptions in those
+ * btrees we'll bail out.
+ */
+STATIC int
+xrep_agf_find_btrees(
+	struct xfs_scrub		*sc,
+	struct xfs_buf			*agf_bp,
+	struct xrep_find_ag_btree	*fab,
+	struct xfs_buf			*agfl_bp)
+{
+	struct xfs_agf			*old_agf = XFS_BUF_TO_AGF(agf_bp);
+	int				error;
+
+	/* Go find the root data. */
+	error = xrep_find_ag_btree_roots(sc, agf_bp, fab, agfl_bp);
+	if (error)
+		return error;
+
+	/* We must find the bnobt, cntbt, and rmapbt roots. */
+	if (!xrep_check_btree_root(sc, &fab[XREP_AGF_BNOBT]) ||
+	    !xrep_check_btree_root(sc, &fab[XREP_AGF_CNTBT]) ||
+	    !xrep_check_btree_root(sc, &fab[XREP_AGF_RMAPBT]))
+		return -EFSCORRUPTED;
+
+	/*
+	 * We relied on the rmapbt to reconstruct the AGF.  If we get a
+	 * different root then something's seriously wrong.
+	 */
+	if (fab[XREP_AGF_RMAPBT].root !=
+	    be32_to_cpu(old_agf->agf_roots[XFS_BTNUM_RMAPi]))
+		return -EFSCORRUPTED;
+
+	/* We must find the refcountbt root if that feature is enabled. */
+	if (xfs_sb_version_hasreflink(&sc->mp->m_sb) &&
+	    !xrep_check_btree_root(sc, &fab[XREP_AGF_REFCOUNTBT]))
+		return -EFSCORRUPTED;
+
+	return 0;
+}
+
+/*
+ * Reinitialize the AGF header, making an in-core copy of the old contents so
+ * that we know which in-core state needs to be reinitialized.
+ */
+STATIC void
+xrep_agf_init_header(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp,
+	struct xfs_agf		*old_agf)
+{
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
+
+	memcpy(old_agf, agf, sizeof(*old_agf));
+	memset(agf, 0, BBTOB(agf_bp->b_length));
+	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
+	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
+	agf->agf_seqno = cpu_to_be32(sc->sa.agno);
+	agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
+	agf->agf_flfirst = old_agf->agf_flfirst;
+	agf->agf_fllast = old_agf->agf_fllast;
+	agf->agf_flcount = old_agf->agf_flcount;
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
+
+	/* Mark the incore AGF data stale until we're done fixing things. */
+	ASSERT(sc->sa.pag->pagf_init);
+	sc->sa.pag->pagf_init = 0;
+}
+
+/* Set btree root information in an AGF. */
+STATIC void
+xrep_agf_set_roots(
+	struct xfs_scrub		*sc,
+	struct xfs_agf			*agf,
+	struct xrep_find_ag_btree	*fab)
+{
+	agf->agf_roots[XFS_BTNUM_BNOi] =
+			cpu_to_be32(fab[XREP_AGF_BNOBT].root);
+	agf->agf_levels[XFS_BTNUM_BNOi] =
+			cpu_to_be32(fab[XREP_AGF_BNOBT].height);
+
+	agf->agf_roots[XFS_BTNUM_CNTi] =
+			cpu_to_be32(fab[XREP_AGF_CNTBT].root);
+	agf->agf_levels[XFS_BTNUM_CNTi] =
+			cpu_to_be32(fab[XREP_AGF_CNTBT].height);
+
+	agf->agf_roots[XFS_BTNUM_RMAPi] =
+			cpu_to_be32(fab[XREP_AGF_RMAPBT].root);
+	agf->agf_levels[XFS_BTNUM_RMAPi] =
+			cpu_to_be32(fab[XREP_AGF_RMAPBT].height);
+
+	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
+		agf->agf_refcount_root =
+				cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].root);
+		agf->agf_refcount_level =
+				cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].height);
+	}
+}
+
+/* Update all AGF fields which derive from btree contents. */
+STATIC int
+xrep_agf_calc_from_btrees(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp)
+{
+	struct xrep_agf_allocbt	raa = { .sc = sc };
+	struct xfs_btree_cur	*cur = NULL;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agblock_t		btreeblks;
+	xfs_agblock_t		blocks;
+	int			error;
+
+	/* Update the AGF counters from the bnobt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_BNO);
+	error = xfs_alloc_query_all(cur, xrep_agf_walk_allocbt, &raa);
+	if (error)
+		goto err;
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+	btreeblks = blocks - 1;
+	agf->agf_freeblks = cpu_to_be32(raa.freeblks);
+	agf->agf_longest = cpu_to_be32(raa.longest);
+
+	/* Update the AGF counters from the cntbt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_CNT);
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+	btreeblks += blocks - 1;
+
+	/* Update the AGF counters from the rmapbt. */
+	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+	error = xfs_btree_count_blocks(cur, &blocks);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+	agf->agf_rmap_blocks = cpu_to_be32(blocks);
+	btreeblks += blocks - 1;
+
+	agf->agf_btreeblks = cpu_to_be32(btreeblks);
+
+	/* Update the AGF counters from the refcountbt. */
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp,
+				sc->sa.agno);
+		error = xfs_btree_count_blocks(cur, &blocks);
+		if (error)
+			goto err;
+		xfs_btree_del_cursor(cur, error);
+		agf->agf_refcount_blocks = cpu_to_be32(blocks);
+	}
+
+	return 0;
+err:
+	xfs_btree_del_cursor(cur, error);
+	return error;
+}
+
+/* Commit the new AGF and reinitialize the incore state. */
+STATIC int
+xrep_agf_commit_new(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp)
+{
+	struct xfs_perag	*pag;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
+
+	/* Trigger fdblocks recalculation */
+	xfs_force_summary_recalc(sc->mp);
+
+	/* Write this to disk. */
+	xfs_trans_buf_set_type(sc->tp, agf_bp, XFS_BLFT_AGF_BUF);
+	xfs_trans_log_buf(sc->tp, agf_bp, 0, BBTOB(agf_bp->b_length) - 1);
+
+	/* Now reinitialize the in-core counters we changed. */
+	pag = sc->sa.pag;
+	pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
+	pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+	pag->pagf_longest = be32_to_cpu(agf->agf_longest);
+	pag->pagf_levels[XFS_BTNUM_BNOi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
+	pag->pagf_levels[XFS_BTNUM_CNTi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+	pag->pagf_levels[XFS_BTNUM_RMAPi] =
+			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+	pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
+	pag->pagf_init = 1;
+
+	return 0;
+}
+
+/* Repair the AGF. v5 filesystems only. */
+int
+xrep_agf(
+	struct xfs_scrub		*sc)
+{
+	struct xrep_find_ag_btree	fab[XREP_AGF_MAX] = {
+		[XREP_AGF_BNOBT] = {
+			.rmap_owner = XFS_RMAP_OWN_AG,
+			.buf_ops = &xfs_allocbt_buf_ops,
+			.magic = XFS_ABTB_CRC_MAGIC,
+		},
+		[XREP_AGF_CNTBT] = {
+			.rmap_owner = XFS_RMAP_OWN_AG,
+			.buf_ops = &xfs_allocbt_buf_ops,
+			.magic = XFS_ABTC_CRC_MAGIC,
+		},
+		[XREP_AGF_RMAPBT] = {
+			.rmap_owner = XFS_RMAP_OWN_AG,
+			.buf_ops = &xfs_rmapbt_buf_ops,
+			.magic = XFS_RMAP_CRC_MAGIC,
+		},
+		[XREP_AGF_REFCOUNTBT] = {
+			.rmap_owner = XFS_RMAP_OWN_REFC,
+			.buf_ops = &xfs_refcountbt_buf_ops,
+			.magic = XFS_REFC_CRC_MAGIC,
+		},
+		[XREP_AGF_END] = {
+			.buf_ops = NULL,
+		},
+	};
+	struct xfs_agf			old_agf;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*agf_bp;
+	struct xfs_buf			*agfl_bp;
+	struct xfs_agf			*agf;
+	int				error;
+
+	/* We require the rmapbt to rebuild anything. */
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return -EOPNOTSUPP;
+
+	xchk_perag_get(sc->mp, &sc->sa);
+	/*
+	 * Make sure we have the AGF buffer, as scrub might have decided it
+	 * was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED.
+	 */
+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL);
+	if (error)
+		return error;
+	agf_bp->b_ops = &xfs_agf_buf_ops;
+	agf = XFS_BUF_TO_AGF(agf_bp);
+
+	/*
+	 * Load the AGFL so that we can screen out OWN_AG blocks that are on
+	 * the AGFL now; these blocks might have once been part of the
+	 * bno/cnt/rmap btrees but are not now.  This is a chicken and egg
+	 * problem: the AGF is corrupt, so we have to trust the AGFL contents
+	 * because we can't do any serious cross-referencing with any of the
+	 * btrees rooted in the AGF.  If the AGFL contents are obviously bad
+	 * then we'll bail out.
+	 */
+	error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.agno, &agfl_bp);
+	if (error)
+		return error;
+
+	/*
+	 * Spot-check the AGFL blocks; if they're obviously corrupt then
+	 * there's nothing we can do but bail out.
+	 */
+	error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(agf_bp), agfl_bp,
+			xrep_agf_check_agfl_block, sc);
+	if (error)
+		return error;
+
+	/*
+	 * Find the AGF btree roots.  This is also a chicken-and-egg situation;
+	 * see the function for more details.
+	 */
+	error = xrep_agf_find_btrees(sc, agf_bp, fab, agfl_bp);
+	if (error)
+		return error;
+
+	/* Start rewriting the header and implant the btrees we found. */
+	xrep_agf_init_header(sc, agf_bp, &old_agf);
+	xrep_agf_set_roots(sc, agf, fab);
+	error = xrep_agf_calc_from_btrees(sc, agf_bp);
+	if (error)
+		goto out_revert;
+
+	/* Commit the changes and reinitialize incore state. */
+	return xrep_agf_commit_new(sc, agf_bp);
+
+out_revert:
+	/* Mark the incore AGF state stale and revert the AGF. */
+	sc->sa.pag->pagf_init = 0;
+	memcpy(agf, &old_agf, sizeof(old_agf));
+	return error;
+}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 85b048b341a0..17cf48564390 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -128,9 +128,12 @@ xrep_roll_ag_trans(
 	int			error;
 
 	/* Keep the AG header buffers locked so we can keep going. */
-	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
 
 	/* Roll the transaction. */
 	error = xfs_trans_roll(&sc->tp);
@@ -138,9 +141,12 @@ xrep_roll_ag_trans(
 		goto out_release;
 
 	/* Join AG headers to the new transaction. */
-	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
 
 	return 0;
 
@@ -150,9 +156,12 @@ out_release:
 	 * buffers will be released during teardown on our way out
 	 * of the kernel.
 	 */
-	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
-	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
-	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
+	if (sc->sa.agi_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+	if (sc->sa.agf_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+	if (sc->sa.agfl_bp)
+		xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
 
 	return error;
 }
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 5a4e92221916..6f0903c51a47 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -58,6 +58,7 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
 
 int xrep_probe(struct xfs_scrub *sc);
 int xrep_superblock(struct xfs_scrub *sc);
+int xrep_agf(struct xfs_scrub *sc);
 
 #else
 
@@ -81,6 +82,7 @@ xrep_calc_ag_resblks(
 
 #define xrep_probe			xrep_notsupported
 #define xrep_superblock			xrep_notsupported
+#define xrep_agf			xrep_notsupported
 
 #endif /* CONFIG_XFS_ONLINE_REPAIR */
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 6efb926f3cf8..1e8a17c8e2b9 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -214,7 +214,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_agf,
-		.repair	= xrep_notsupported,
+		.repair	= xrep_agf,
 	},
 	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
 		.type	= ST_PERAG,
-- 
cgit v1.2.3


From 0e93d3f43ec7d3308bff25ce1be81d46330168c9 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 9 Aug 2018 22:43:02 -0700
Subject: xfs: repair the AGFL

Repair the AGFL from the rmap data.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/agheader_repair.c | 281 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/bitmap.c          |  92 ++++++++++++++
 fs/xfs/scrub/bitmap.h          |   4 +
 fs/xfs/scrub/repair.h          |   2 +
 fs/xfs/scrub/scrub.c           |   2 +-
 5 files changed, 380 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index aa180492a4a5..9ce302360bbb 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -433,3 +433,284 @@ out_revert:
 	memcpy(agf, &old_agf, sizeof(old_agf));
 	return error;
 }
+
+/* AGFL */
+
+struct xrep_agfl {
+	/* Bitmap of other OWN_AG metadata blocks. */
+	struct xfs_bitmap	agmetablocks;
+
+	/* Bitmap of free space. */
+	struct xfs_bitmap	*freesp;
+
+	struct xfs_scrub	*sc;
+};
+
+/* Record all OWN_AG (free space btree) information from the rmap data. */
+STATIC int
+xrep_agfl_walk_rmap(
+	struct xfs_btree_cur	*cur,
+	struct xfs_rmap_irec	*rec,
+	void			*priv)
+{
+	struct xrep_agfl	*ra = priv;
+	xfs_fsblock_t		fsb;
+	int			error = 0;
+
+	if (xchk_should_terminate(ra->sc, &error))
+		return error;
+
+	/* Record all the OWN_AG blocks. */
+	if (rec->rm_owner == XFS_RMAP_OWN_AG) {
+		fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+				rec->rm_startblock);
+		error = xfs_bitmap_set(ra->freesp, fsb, rec->rm_blockcount);
+		if (error)
+			return error;
+	}
+
+	return xfs_bitmap_set_btcur_path(&ra->agmetablocks, cur);
+}
+
+/*
+ * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce
+ * which blocks belong to the AGFL.
+ *
+ * Compute the set of old AGFL blocks by subtracting from the list of OWN_AG
+ * blocks the list of blocks owned by all other OWN_AG metadata (bnobt, cntbt,
+ * rmapbt).  These are the old AGFL blocks, so return that list and the number
+ * of blocks we're actually going to put back on the AGFL.
+ */
+STATIC int
+xrep_agfl_collect_blocks(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp,
+	struct xfs_bitmap	*agfl_extents,
+	xfs_agblock_t		*flcount)
+{
+	struct xrep_agfl	ra;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_btree_cur	*cur;
+	struct xfs_bitmap_range	*br;
+	struct xfs_bitmap_range	*n;
+	int			error;
+
+	ra.sc = sc;
+	ra.freesp = agfl_extents;
+	xfs_bitmap_init(&ra.agmetablocks);
+
+	/* Find all space used by the free space btrees & rmapbt. */
+	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+	error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+
+	/* Find all blocks currently being used by the bnobt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_BNO);
+	error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+
+	/* Find all blocks currently being used by the cntbt. */
+	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+			XFS_BTNUM_CNT);
+	error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur);
+	if (error)
+		goto err;
+
+	xfs_btree_del_cursor(cur, error);
+
+	/*
+	 * Drop the freesp meta blocks that are in use by btrees.
+	 * The remaining blocks /should/ be AGFL blocks.
+	 */
+	error = xfs_bitmap_disunion(agfl_extents, &ra.agmetablocks);
+	xfs_bitmap_destroy(&ra.agmetablocks);
+	if (error)
+		return error;
+
+	/*
+	 * Calculate the new AGFL size.  If we found more blocks than fit in
+	 * the AGFL we'll free them later.
+	 */
+	*flcount = 0;
+	for_each_xfs_bitmap_extent(br, n, agfl_extents) {
+		*flcount += br->len;
+		if (*flcount > xfs_agfl_size(mp))
+			break;
+	}
+	if (*flcount > xfs_agfl_size(mp))
+		*flcount = xfs_agfl_size(mp);
+	return 0;
+
+err:
+	xfs_bitmap_destroy(&ra.agmetablocks);
+	xfs_btree_del_cursor(cur, error);
+	return error;
+}
+
+/* Update the AGF and reset the in-core state. */
+STATIC void
+xrep_agfl_update_agf(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agf_bp,
+	xfs_agblock_t		flcount)
+{
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
+
+	ASSERT(flcount <= xfs_agfl_size(sc->mp));
+
+	/* Trigger fdblocks recalculation */
+	xfs_force_summary_recalc(sc->mp);
+
+	/* Update the AGF counters. */
+	if (sc->sa.pag->pagf_init)
+		sc->sa.pag->pagf_flcount = flcount;
+	agf->agf_flfirst = cpu_to_be32(0);
+	agf->agf_flcount = cpu_to_be32(flcount);
+	agf->agf_fllast = cpu_to_be32(flcount - 1);
+
+	xfs_alloc_log_agf(sc->tp, agf_bp,
+			XFS_AGF_FLFIRST | XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+}
+
+/* Write out a totally new AGFL. */
+STATIC void
+xrep_agfl_init_header(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agfl_bp,
+	struct xfs_bitmap	*agfl_extents,
+	xfs_agblock_t		flcount)
+{
+	struct xfs_mount	*mp = sc->mp;
+	__be32			*agfl_bno;
+	struct xfs_bitmap_range	*br;
+	struct xfs_bitmap_range	*n;
+	struct xfs_agfl		*agfl;
+	xfs_agblock_t		agbno;
+	unsigned int		fl_off;
+
+	ASSERT(flcount <= xfs_agfl_size(mp));
+
+	/*
+	 * Start rewriting the header by setting the bno[] array to
+	 * NULLAGBLOCK, then setting AGFL header fields.
+	 */
+	agfl = XFS_BUF_TO_AGFL(agfl_bp);
+	memset(agfl, 0xFF, BBTOB(agfl_bp->b_length));
+	agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
+	agfl->agfl_seqno = cpu_to_be32(sc->sa.agno);
+	uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
+
+	/*
+	 * Fill the AGFL with the remaining blocks.  If agfl_extents has more
+	 * blocks than fit in the AGFL, they will be freed in a subsequent
+	 * step.
+	 */
+	fl_off = 0;
+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agfl_bp);
+	for_each_xfs_bitmap_extent(br, n, agfl_extents) {
+		agbno = XFS_FSB_TO_AGBNO(mp, br->start);
+
+		trace_xrep_agfl_insert(mp, sc->sa.agno, agbno, br->len);
+
+		while (br->len > 0 && fl_off < flcount) {
+			agfl_bno[fl_off] = cpu_to_be32(agbno);
+			fl_off++;
+			agbno++;
+
+			/*
+			 * We've now used br->start by putting it in the AGFL,
+			 * so bump br so that we don't reap the block later.
+			 */
+			br->start++;
+			br->len--;
+		}
+
+		if (br->len)
+			break;
+		list_del(&br->list);
+		kmem_free(br);
+	}
+
+	/* Write new AGFL to disk. */
+	xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF);
+	xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1);
+}
+
+/* Repair the AGFL. */
+int
+xrep_agfl(
+	struct xfs_scrub	*sc)
+{
+	struct xfs_owner_info	oinfo;
+	struct xfs_bitmap	agfl_extents;
+	struct xfs_mount	*mp = sc->mp;
+	struct xfs_buf		*agf_bp;
+	struct xfs_buf		*agfl_bp;
+	xfs_agblock_t		flcount;
+	int			error;
+
+	/* We require the rmapbt to rebuild anything. */
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return -EOPNOTSUPP;
+
+	xchk_perag_get(sc->mp, &sc->sa);
+	xfs_bitmap_init(&agfl_extents);
+
+	/*
+	 * Read the AGF so that we can query the rmapbt.  We hope that there's
+	 * nothing wrong with the AGF, but all the AG header repair functions
+	 * have this chicken-and-egg problem.
+	 */
+	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
+	if (error)
+		return error;
+	if (!agf_bp)
+		return -ENOMEM;
+
+	/*
+	 * Make sure we have the AGFL buffer, as scrub might have decided it
+	 * was corrupt after xfs_alloc_read_agfl failed with -EFSCORRUPTED.
+	 */
+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGFL_DADDR(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL);
+	if (error)
+		return error;
+	agfl_bp->b_ops = &xfs_agfl_buf_ops;
+
+	/* Gather all the extents we're going to put on the new AGFL. */
+	error = xrep_agfl_collect_blocks(sc, agf_bp, &agfl_extents, &flcount);
+	if (error)
+		goto err;
+
+	/*
+	 * Update AGF and AGFL.  We reset the global free block counter when
+	 * we adjust the AGF flcount (which can fail) so avoid updating any
+	 * buffers until we know that part works.
+	 */
+	xrep_agfl_update_agf(sc, agf_bp, flcount);
+	xrep_agfl_init_header(sc, agfl_bp, &agfl_extents, flcount);
+
+	/*
+	 * Ok, the AGFL should be ready to go now.  Roll the transaction to
+	 * make the new AGFL permanent before we start using it to return
+	 * freespace overflow to the freespace btrees.
+	 */
+	sc->sa.agf_bp = agf_bp;
+	sc->sa.agfl_bp = agfl_bp;
+	error = xrep_roll_ag_trans(sc);
+	if (error)
+		goto err;
+
+	/* Dump any AGFL overflow. */
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+	return xrep_reap_extents(sc, &agfl_extents, &oinfo, XFS_AG_RESV_AGFL);
+err:
+	xfs_bitmap_destroy(&agfl_extents);
+	return error;
+}
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index c770e2d0b6aa..fdadc9e1dc49 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -9,6 +9,7 @@
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
+#include "xfs_btree.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
@@ -209,3 +210,94 @@ out:
 }
 #undef LEFT_ALIGNED
 #undef RIGHT_ALIGNED
+
+/*
+ * Record all btree blocks seen while iterating all records of a btree.
+ *
+ * We know that the btree query_all function starts at the left edge and walks
+ * towards the right edge of the tree.  Therefore, we know that we can walk up
+ * the btree cursor towards the root; if the pointer for a given level points
+ * to the first record/key in that block, we haven't seen this block before;
+ * and therefore we need to remember that we saw this block in the btree.
+ *
+ * So if our btree is:
+ *
+ *    4
+ *  / | \
+ * 1  2  3
+ *
+ * Pretend for this example that each leaf block has 100 btree records.  For
+ * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record
+ * that we saw block 1.  Then we observe that bc_ptrs[1] == 1, so we record
+ * block 4.  The list is [1, 4].
+ *
+ * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the
+ * loop.  The list remains [1, 4].
+ *
+ * For the 101st btree record, we've moved onto leaf block 2.  Now
+ * bc_ptrs[0] == 1 again, so we record that we saw block 2.  We see that
+ * bc_ptrs[1] == 2, so we exit the loop.  The list is now [1, 4, 2].
+ *
+ * For the 102nd record, bc_ptrs[0] == 2, so we continue.
+ *
+ * For the 201st record, we've moved on to leaf block 3.  bc_ptrs[0] == 1, so
+ * we add 3 to the list.  Now it is [1, 4, 2, 3].
+ *
+ * For the 300th record we just exit, with the list being [1, 4, 2, 3].
+ */
+
+/*
+ * Record all the buffers pointed to by the btree cursor.  Callers already
+ * engaged in a btree walk should call this function to capture the list of
+ * blocks going from the leaf towards the root.
+ */
+int
+xfs_bitmap_set_btcur_path(
+	struct xfs_bitmap	*bitmap,
+	struct xfs_btree_cur	*cur)
+{
+	struct xfs_buf		*bp;
+	xfs_fsblock_t		fsb;
+	int			i;
+	int			error;
+
+	for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
+		xfs_btree_get_block(cur, i, &bp);
+		if (!bp)
+			continue;
+		fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+		error = xfs_bitmap_set(bitmap, fsb, 1);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+
+/* Collect a btree's block in the bitmap. */
+STATIC int
+xfs_bitmap_collect_btblock(
+	struct xfs_btree_cur	*cur,
+	int			level,
+	void			*priv)
+{
+	struct xfs_bitmap	*bitmap = priv;
+	struct xfs_buf		*bp;
+	xfs_fsblock_t		fsbno;
+
+	xfs_btree_get_block(cur, level, &bp);
+	if (!bp)
+		return 0;
+
+	fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+	return xfs_bitmap_set(bitmap, fsbno, 1);
+}
+
+/* Walk the btree and mark the bitmap wherever a btree block is found. */
+int
+xfs_bitmap_set_btblocks(
+	struct xfs_bitmap	*bitmap,
+	struct xfs_btree_cur	*cur)
+{
+	return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock, bitmap);
+}
diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h
index dad652ee9177..ae8ecbce6fa6 100644
--- a/fs/xfs/scrub/bitmap.h
+++ b/fs/xfs/scrub/bitmap.h
@@ -28,5 +28,9 @@ void xfs_bitmap_destroy(struct xfs_bitmap *bitmap);
 
 int xfs_bitmap_set(struct xfs_bitmap *bitmap, uint64_t start, uint64_t len);
 int xfs_bitmap_disunion(struct xfs_bitmap *bitmap, struct xfs_bitmap *sub);
+int xfs_bitmap_set_btcur_path(struct xfs_bitmap *bitmap,
+		struct xfs_btree_cur *cur);
+int xfs_bitmap_set_btblocks(struct xfs_bitmap *bitmap,
+		struct xfs_btree_cur *cur);
 
 #endif	/* __XFS_SCRUB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 6f0903c51a47..1d283360b5ab 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -59,6 +59,7 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
 int xrep_probe(struct xfs_scrub *sc);
 int xrep_superblock(struct xfs_scrub *sc);
 int xrep_agf(struct xfs_scrub *sc);
+int xrep_agfl(struct xfs_scrub *sc);
 
 #else
 
@@ -83,6 +84,7 @@ xrep_calc_ag_resblks(
 #define xrep_probe			xrep_notsupported
 #define xrep_superblock			xrep_notsupported
 #define xrep_agf			xrep_notsupported
+#define xrep_agfl			xrep_notsupported
 
 #endif /* CONFIG_XFS_ONLINE_REPAIR */
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 1e8a17c8e2b9..2670f4cf62f4 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -220,7 +220,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_agfl,
-		.repair	= xrep_notsupported,
+		.repair	= xrep_agfl,
 	},
 	[XFS_SCRUB_TYPE_AGI] = {	/* agi */
 		.type	= ST_PERAG,
-- 
cgit v1.2.3


From 13942aa94a8b5df662d93c42c307b2f50cbe88b0 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 9 Aug 2018 22:43:04 -0700
Subject: xfs: repair the AGI

Rebuild the AGI header items with some help from the rmapbt.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/agheader_repair.c | 217 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/scrub/repair.h          |   2 +
 fs/xfs/scrub/scrub.c           |   2 +-
 3 files changed, 220 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 9ce302360bbb..f7568a4b5fe5 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -714,3 +714,220 @@ err:
 	xfs_bitmap_destroy(&agfl_extents);
 	return error;
 }
+
+/* AGI */
+
+/*
+ * Offset within the xrep_find_ag_btree array for each btree type.  Avoid the
+ * XFS_BTNUM_ names here to avoid creating a sparse array.
+ */
+enum {
+	XREP_AGI_INOBT = 0,
+	XREP_AGI_FINOBT,
+	XREP_AGI_END,
+	XREP_AGI_MAX
+};
+
+/*
+ * Given the inode btree roots described by *fab, find the roots, check them
+ * for sanity, and pass the root data back out via *fab.
+ */
+STATIC int
+xrep_agi_find_btrees(
+	struct xfs_scrub		*sc,
+	struct xrep_find_ag_btree	*fab)
+{
+	struct xfs_buf			*agf_bp;
+	struct xfs_mount		*mp = sc->mp;
+	int				error;
+
+	/* Read the AGF. */
+	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
+	if (error)
+		return error;
+	if (!agf_bp)
+		return -ENOMEM;
+
+	/* Find the btree roots. */
+	error = xrep_find_ag_btree_roots(sc, agf_bp, fab, NULL);
+	if (error)
+		return error;
+
+	/* We must find the inobt root. */
+	if (!xrep_check_btree_root(sc, &fab[XREP_AGI_INOBT]))
+		return -EFSCORRUPTED;
+
+	/* We must find the finobt root if that feature is enabled. */
+	if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
+	    !xrep_check_btree_root(sc, &fab[XREP_AGI_FINOBT]))
+		return -EFSCORRUPTED;
+
+	return 0;
+}
+
+/*
+ * Reinitialize the AGI header, making an in-core copy of the old contents so
+ * that we know which in-core state needs to be reinitialized.
+ */
+STATIC void
+xrep_agi_init_header(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agi_bp,
+	struct xfs_agi		*old_agi)
+{
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agi_bp);
+	struct xfs_mount	*mp = sc->mp;
+
+	memcpy(old_agi, agi, sizeof(*old_agi));
+	memset(agi, 0, BBTOB(agi_bp->b_length));
+	agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
+	agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
+	agi->agi_seqno = cpu_to_be32(sc->sa.agno);
+	agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
+	agi->agi_newino = cpu_to_be32(NULLAGINO);
+	agi->agi_dirino = cpu_to_be32(NULLAGINO);
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
+
+	/* We don't know how to fix the unlinked list yet. */
+	memcpy(&agi->agi_unlinked, &old_agi->agi_unlinked,
+			sizeof(agi->agi_unlinked));
+
+	/* Mark the incore AGF data stale until we're done fixing things. */
+	ASSERT(sc->sa.pag->pagi_init);
+	sc->sa.pag->pagi_init = 0;
+}
+
+/* Set btree root information in an AGI. */
+STATIC void
+xrep_agi_set_roots(
+	struct xfs_scrub		*sc,
+	struct xfs_agi			*agi,
+	struct xrep_find_ag_btree	*fab)
+{
+	agi->agi_root = cpu_to_be32(fab[XREP_AGI_INOBT].root);
+	agi->agi_level = cpu_to_be32(fab[XREP_AGI_INOBT].height);
+
+	if (xfs_sb_version_hasfinobt(&sc->mp->m_sb)) {
+		agi->agi_free_root = cpu_to_be32(fab[XREP_AGI_FINOBT].root);
+		agi->agi_free_level = cpu_to_be32(fab[XREP_AGI_FINOBT].height);
+	}
+}
+
+/* Update the AGI counters. */
+STATIC int
+xrep_agi_calc_from_btrees(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agi_bp)
+{
+	struct xfs_btree_cur	*cur;
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agi_bp);
+	struct xfs_mount	*mp = sc->mp;
+	xfs_agino_t		count;
+	xfs_agino_t		freecount;
+	int			error;
+
+	cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno,
+			XFS_BTNUM_INO);
+	error = xfs_ialloc_count_inodes(cur, &count, &freecount);
+	if (error)
+		goto err;
+	xfs_btree_del_cursor(cur, error);
+
+	agi->agi_count = cpu_to_be32(count);
+	agi->agi_freecount = cpu_to_be32(freecount);
+	return 0;
+err:
+	xfs_btree_del_cursor(cur, error);
+	return error;
+}
+
+/* Trigger reinitialization of the in-core data. */
+STATIC int
+xrep_agi_commit_new(
+	struct xfs_scrub	*sc,
+	struct xfs_buf		*agi_bp)
+{
+	struct xfs_perag	*pag;
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agi_bp);
+
+	/* Trigger inode count recalculation */
+	xfs_force_summary_recalc(sc->mp);
+
+	/* Write this to disk. */
+	xfs_trans_buf_set_type(sc->tp, agi_bp, XFS_BLFT_AGI_BUF);
+	xfs_trans_log_buf(sc->tp, agi_bp, 0, BBTOB(agi_bp->b_length) - 1);
+
+	/* Now reinitialize the in-core counters if necessary. */
+	pag = sc->sa.pag;
+	pag->pagi_count = be32_to_cpu(agi->agi_count);
+	pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
+	pag->pagi_init = 1;
+
+	return 0;
+}
+
+/* Repair the AGI. */
+int
+xrep_agi(
+	struct xfs_scrub		*sc)
+{
+	struct xrep_find_ag_btree	fab[XREP_AGI_MAX] = {
+		[XREP_AGI_INOBT] = {
+			.rmap_owner = XFS_RMAP_OWN_INOBT,
+			.buf_ops = &xfs_inobt_buf_ops,
+			.magic = XFS_IBT_CRC_MAGIC,
+		},
+		[XREP_AGI_FINOBT] = {
+			.rmap_owner = XFS_RMAP_OWN_INOBT,
+			.buf_ops = &xfs_inobt_buf_ops,
+			.magic = XFS_FIBT_CRC_MAGIC,
+		},
+		[XREP_AGI_END] = {
+			.buf_ops = NULL
+		},
+	};
+	struct xfs_agi			old_agi;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*agi_bp;
+	struct xfs_agi			*agi;
+	int				error;
+
+	/* We require the rmapbt to rebuild anything. */
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return -EOPNOTSUPP;
+
+	xchk_perag_get(sc->mp, &sc->sa);
+	/*
+	 * Make sure we have the AGI buffer, as scrub might have decided it
+	 * was corrupt after xfs_ialloc_read_agi failed with -EFSCORRUPTED.
+	 */
+	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGI_DADDR(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0, &agi_bp, NULL);
+	if (error)
+		return error;
+	agi_bp->b_ops = &xfs_agi_buf_ops;
+	agi = XFS_BUF_TO_AGI(agi_bp);
+
+	/* Find the AGI btree roots. */
+	error = xrep_agi_find_btrees(sc, fab);
+	if (error)
+		return error;
+
+	/* Start rewriting the header and implant the btrees we found. */
+	xrep_agi_init_header(sc, agi_bp, &old_agi);
+	xrep_agi_set_roots(sc, agi, fab);
+	error = xrep_agi_calc_from_btrees(sc, agi_bp);
+	if (error)
+		goto out_revert;
+
+	/* Reinitialize in-core state. */
+	return xrep_agi_commit_new(sc, agi_bp);
+
+out_revert:
+	/* Mark the incore AGI state stale and revert the AGI. */
+	sc->sa.pag->pagi_init = 0;
+	memcpy(agi, &old_agi, sizeof(old_agi));
+	return error;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 1d283360b5ab..9de321eee4ab 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -60,6 +60,7 @@ int xrep_probe(struct xfs_scrub *sc);
 int xrep_superblock(struct xfs_scrub *sc);
 int xrep_agf(struct xfs_scrub *sc);
 int xrep_agfl(struct xfs_scrub *sc);
+int xrep_agi(struct xfs_scrub *sc);
 
 #else
 
@@ -85,6 +86,7 @@ xrep_calc_ag_resblks(
 #define xrep_superblock			xrep_notsupported
 #define xrep_agf			xrep_notsupported
 #define xrep_agfl			xrep_notsupported
+#define xrep_agi			xrep_notsupported
 
 #endif /* CONFIG_XFS_ONLINE_REPAIR */
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 2670f4cf62f4..4bfae1e61d30 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -226,7 +226,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
 		.type	= ST_PERAG,
 		.setup	= xchk_setup_fs,
 		.scrub	= xchk_agi,
-		.repair	= xrep_notsupported,
+		.repair	= xrep_agi,
 	},
 	[XFS_SCRUB_TYPE_BNOBT] = {	/* bnobt */
 		.type	= ST_PERAG,
-- 
cgit v1.2.3


From c4f7173ac3b7e22934e51f0121833642a581d723 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 10 Aug 2018 18:46:58 -0500
Subject: smb3: create smb3 equivalent alias for cifs pseudo-xattrs

We really, really don't want to be encouraging people to use
cifs (the dialect) since it is insecure, so to avoid confusion
we want to move them to names which include 'smb3' instead of
'cifs' - so this simply creates an alias for the pseudo-xattrs

e.g. can now do:
getfattr -n user.smb3.creationtime /mnt1/file
and
getfattr -n user.smb3.dosattrib /mnt1/file
and
getfattr -n system.smb3_acl /mnt1/file

instead of forcing you to use the string 'cifs' in
these (e.g. getfattr -n system.cifs_acl /mnt1/file)

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/xattr.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 316af84674f1..50ddb795aaeb 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -35,6 +35,14 @@
 #define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
 #define CIFS_XATTR_ATTRIB "cifs.dosattrib"  /* full name: user.cifs.dosattrib */
 #define CIFS_XATTR_CREATETIME "cifs.creationtime"  /* user.cifs.creationtime */
+/*
+ * Although these three are just aliases for the above, need to move away from
+ * confusing users and using the 20+ year old term 'cifs' when it is no longer
+ * secure, replaced by SMB2 (then even more highly secure SMB3) many years ago
+ */
+#define SMB3_XATTR_CIFS_ACL "system.smb3_acl"
+#define SMB3_XATTR_ATTRIB "smb3.dosattrib"  /* full name: user.smb3.dosattrib */
+#define SMB3_XATTR_CREATETIME "smb3.creationtime"  /* user.smb3.creationtime */
 /* BB need to add server (Samba e.g) support for security and trusted prefix */
 
 enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT };
@@ -220,10 +228,12 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
 	switch (handler->flags) {
 	case XATTR_USER:
 		cifs_dbg(FYI, "%s:querying user xattr %s\n", __func__, name);
-		if (strcmp(name, CIFS_XATTR_ATTRIB) == 0) {
+		if ((strcmp(name, CIFS_XATTR_ATTRIB) == 0) ||
+		    (strcmp(name, SMB3_XATTR_ATTRIB) == 0)) {
 			rc = cifs_attrib_get(dentry, inode, value, size);
 			break;
-		} else if (strcmp(name, CIFS_XATTR_CREATETIME) == 0) {
+		} else if ((strcmp(name, CIFS_XATTR_CREATETIME) == 0) ||
+		    (strcmp(name, SMB3_XATTR_CREATETIME) == 0)) {
 			rc = cifs_creation_time_get(dentry, inode, value, size);
 			break;
 		}
@@ -363,6 +373,19 @@ static const struct xattr_handler cifs_cifs_acl_xattr_handler = {
 	.set = cifs_xattr_set,
 };
 
+/*
+ * Although this is just an alias for the above, need to move away from
+ * confusing users and using the 20 year old term 'cifs' when it is no
+ * longer secure and was replaced by SMB2/SMB3 a long time ago, and
+ * SMB3 and later are highly secure.
+ */
+static const struct xattr_handler smb3_acl_xattr_handler = {
+	.name = SMB3_XATTR_CIFS_ACL,
+	.flags = XATTR_CIFS_ACL,
+	.get = cifs_xattr_get,
+	.set = cifs_xattr_set,
+};
+
 static const struct xattr_handler cifs_posix_acl_access_xattr_handler = {
 	.name = XATTR_NAME_POSIX_ACL_ACCESS,
 	.flags = XATTR_ACL_ACCESS,
@@ -381,6 +404,7 @@ const struct xattr_handler *cifs_xattr_handlers[] = {
 	&cifs_user_xattr_handler,
 	&cifs_os2_xattr_handler,
 	&cifs_cifs_acl_xattr_handler,
+	&smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */
 	&cifs_posix_acl_access_xattr_handler,
 	&cifs_posix_acl_default_xattr_handler,
 	NULL
-- 
cgit v1.2.3


From e25ff835af89a80aa6a4de58f413e494b2b96bd1 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 10 Aug 2018 08:48:18 -0700
Subject: xfs: Close race between direct IO and xfs_break_layouts()

This patch is the duplicate of ross's fix for ext4 for xfs.

If the refcount of a page is lowered between the time that it is returned
by dax_busy_page() and when the refcount is again checked in
xfs_break_layouts() => ___wait_var_event(), the waiting function
xfs_wait_dax_page() will never be called.  This means that
xfs_break_layouts() will still have 'retry' set to false, so we'll stop
looping and never check the refcount of other pages in this inode.

Instead, always continue looping as long as dax_layout_busy_page() gives us
a page which it found with an elevated refcount.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 6b31f41eafa2..181e9084519b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -721,12 +721,10 @@ xfs_file_write_iter(
 
 static void
 xfs_wait_dax_page(
-	struct inode		*inode,
-	bool			*did_unlock)
+	struct inode		*inode)
 {
 	struct xfs_inode        *ip = XFS_I(inode);
 
-	*did_unlock = true;
 	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
 	schedule();
 	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
@@ -735,7 +733,7 @@ xfs_wait_dax_page(
 static int
 xfs_break_dax_layouts(
 	struct inode		*inode,
-	bool			*did_unlock)
+	bool			*retry)
 {
 	struct page		*page;
 
@@ -745,9 +743,10 @@ xfs_break_dax_layouts(
 	if (!page)
 		return 0;
 
+	*retry = true;
 	return ___wait_var_event(&page->_refcount,
 			atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
-			0, 0, xfs_wait_dax_page(inode, did_unlock));
+			0, 0, xfs_wait_dax_page(inode));
 }
 
 int
-- 
cgit v1.2.3


From 10259de1d8690a8c6dd98d564ff2690b5511c8f7 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 10 Aug 2018 11:46:14 -0700
Subject: iomap: Switch to offset_in_page for clarity

Instead of open-coding pos & (PAGE_SIZE - 1) and pos & ~PAGE_MASK, use
the offset_in_page macro.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/iomap.c b/fs/iomap.c
index 07501a647d13..8bd54c08deee 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -145,11 +145,11 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
 {
 	unsigned block_bits = inode->i_blkbits;
 	unsigned block_size = (1 << block_bits);
-	unsigned poff = *pos & (PAGE_SIZE - 1);
+	unsigned poff = offset_in_page(*pos);
 	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
 	unsigned first = poff >> block_bits;
 	unsigned last = (poff + plen - 1) >> block_bits;
-	unsigned end = (i_size_read(inode) & (PAGE_SIZE - 1)) >> block_bits;
+	unsigned end = offset_in_page(i_size_read(inode)) >> block_bits;
 
 	/*
 	 * If the block size is smaller than the page size we need to check the
@@ -427,7 +427,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 	loff_t done, ret;
 
 	for (done = 0; done < length; done += ret) {
-		if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
+		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
 			if (!ctx->cur_page_in_bio)
 				unlock_page(ctx->cur_page);
 			put_page(ctx->cur_page);
@@ -609,7 +609,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
 	loff_t block_size = i_blocksize(inode);
 	loff_t block_start = pos & ~(block_size - 1);
 	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
-	unsigned from = pos & (PAGE_SIZE - 1), to = from + len, poff, plen;
+	unsigned from = offset_in_page(pos), to = from + len, poff, plen;
 	int status = 0;
 
 	if (PageUptodate(page))
@@ -714,7 +714,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 	if (unlikely(copied < len && !PageUptodate(page))) {
 		copied = 0;
 	} else {
-		iomap_set_range_uptodate(page, pos & (PAGE_SIZE - 1), len);
+		iomap_set_range_uptodate(page, offset_in_page(pos), len);
 		iomap_set_page_dirty(page);
 	}
 	return __generic_write_end(inode, pos, copied, page);
@@ -776,7 +776,7 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		unsigned long bytes;	/* Bytes to write to page */
 		size_t copied;		/* Bytes copied from user */
 
-		offset = (pos & (PAGE_SIZE - 1));
+		offset = offset_in_page(pos);
 		bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_count(i));
 again:
@@ -890,7 +890,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		unsigned long offset;	/* Offset into pagecache page */
 		unsigned long bytes;	/* Bytes to write to page */
 
-		offset = (pos & (PAGE_SIZE - 1));
+		offset = offset_in_page(pos);
 		bytes = min_t(loff_t, PAGE_SIZE - offset, length);
 
 		rpage = __iomap_read_page(inode, pos);
@@ -982,7 +982,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
 	do {
 		unsigned offset, bytes;
 
-		offset = pos & (PAGE_SIZE - 1); /* Within page */
+		offset = offset_in_page(pos);
 		bytes = min_t(loff_t, PAGE_SIZE - offset, count);
 
 		if (IS_DAX(inode))
@@ -1075,7 +1075,7 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 
 	/* page is wholly or partially inside EOF */
 	if (((page->index + 1) << PAGE_SHIFT) > size)
-		length = size & ~PAGE_MASK;
+		length = offset_in_page(size);
 	else
 		length = PAGE_SIZE;
 
@@ -1238,7 +1238,7 @@ page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
 		goto out_unlock_not_found;
 
 	for (off = 0; off < PAGE_SIZE; off += bsize) {
-		if ((*lastoff & ~PAGE_MASK) >= off + bsize)
+		if (offset_in_page(*lastoff) >= off + bsize)
 			continue;
 		if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
 			unlock_page(page);
-- 
cgit v1.2.3


From fa6c668d807b1e9ac041101dfcb59bd8e279cfe5 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Fri, 10 Aug 2018 13:56:25 -0700
Subject: xfs: remove b_last_holder & associated macros

The old lock tracking infrastructure in xfs using the b_last_holder
field seems to only be useful if you can get into the system with a
debugger; it seems that the existing tracepoints would be the way to
go these days, and this old infrastructure can be removed.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs.h     |  1 -
 fs/xfs/xfs_buf.c | 20 ++------------------
 fs/xfs/xfs_buf.h |  4 ----
 3 files changed, 2 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 583a9f539bf1..f6ffb4f248f7 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -8,7 +8,6 @@
 
 #ifdef CONFIG_XFS_DEBUG
 #define DEBUG 1
-#define XFS_BUF_LOCK_TRACKING 1
 #endif
 
 #ifdef CONFIG_XFS_ASSERT_FATAL
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c641c7fa1a03..e839907e8492 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -34,16 +34,6 @@
 
 static kmem_zone_t *xfs_buf_zone;
 
-#ifdef XFS_BUF_LOCK_TRACKING
-# define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)
-# define XB_CLEAR_OWNER(bp)	((bp)->b_last_holder = -1)
-# define XB_GET_OWNER(bp)	((bp)->b_last_holder)
-#else
-# define XB_SET_OWNER(bp)	do { } while (0)
-# define XB_CLEAR_OWNER(bp)	do { } while (0)
-# define XB_GET_OWNER(bp)	do { } while (0)
-#endif
-
 #define xb_to_gfp(flags) \
 	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
 
@@ -226,7 +216,6 @@ _xfs_buf_alloc(
 	INIT_LIST_HEAD(&bp->b_li_list);
 	sema_init(&bp->b_sema, 0); /* held, no waiters */
 	spin_lock_init(&bp->b_lock);
-	XB_SET_OWNER(bp);
 	bp->b_target = target;
 	bp->b_flags = flags;
 
@@ -1091,12 +1080,10 @@ xfs_buf_trylock(
 	int			locked;
 
 	locked = down_trylock(&bp->b_sema) == 0;
-	if (locked) {
-		XB_SET_OWNER(bp);
+	if (locked)
 		trace_xfs_buf_trylock(bp, _RET_IP_);
-	} else {
+	else
 		trace_xfs_buf_trylock_fail(bp, _RET_IP_);
-	}
 	return locked;
 }
 
@@ -1118,7 +1105,6 @@ xfs_buf_lock(
 	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
 		xfs_log_force(bp->b_target->bt_mount, 0);
 	down(&bp->b_sema);
-	XB_SET_OWNER(bp);
 
 	trace_xfs_buf_lock_done(bp, _RET_IP_);
 }
@@ -1129,9 +1115,7 @@ xfs_buf_unlock(
 {
 	ASSERT(xfs_buf_islocked(bp));
 
-	XB_CLEAR_OWNER(bp);
 	up(&bp->b_sema);
-
 	trace_xfs_buf_unlock(bp, _RET_IP_);
 }
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index f04613181ca1..4e3171acd0f8 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -198,10 +198,6 @@ typedef struct xfs_buf {
 	int			b_last_error;
 
 	const struct xfs_buf_ops	*b_ops;
-
-#ifdef XFS_BUF_LOCK_TRACKING
-	int			b_last_holder;
-#endif
 } xfs_buf_t;
 
 /* Finding and Reading Buffers */
-- 
cgit v1.2.3


From 01239d77b9dd978863d1a75f0d095ab942a1fe66 Mon Sep 17 00:00:00 2001
From: Shan Hai <shan.hai@oracle.com>
Date: Fri, 10 Aug 2018 17:55:55 -0700
Subject: xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree

Fuzzing tool reports a write to null pointer error in the
xfs_bmap_extents_to_btree, fix it by bailing out on encountering
a null pointer.

Signed-off-by: Shan Hai <shan.hai@oracle.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 5648a177e0ac..2760314fdf7f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -712,19 +712,14 @@ xfs_bmap_extents_to_btree(
 	args.wasdel = wasdel;
 	*logflagsp = 0;
 	if ((error = xfs_alloc_vextent(&args))) {
-		xfs_iroot_realloc(ip, -1, whichfork);
 		ASSERT(ifp->if_broot == NULL);
-		XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
-		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-		return error;
+		goto err1;
 	}
 
 	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
-		xfs_iroot_realloc(ip, -1, whichfork);
 		ASSERT(ifp->if_broot == NULL);
-		XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
-		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-		return -ENOSPC;
+		error = -ENOSPC;
+		goto err1;
 	}
 	/*
 	 * Allocation can't fail, the space was reserved.
@@ -736,6 +731,10 @@ xfs_bmap_extents_to_btree(
 	ip->i_d.di_nblocks++;
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+	if (!abp) {
+		error = -ENOSPC;
+		goto err2;
+	}
 	/*
 	 * Fill in the child block.
 	 */
@@ -775,6 +774,15 @@ xfs_bmap_extents_to_btree(
 	*curp = cur;
 	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 	return 0;
+
+err2:
+	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+err1:
+	xfs_iroot_realloc(ip, -1, whichfork);
+	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+
+	return error;
 }
 
 /*
-- 
cgit v1.2.3


From 8bf782f64737be055e6c1fad7a4c7cae7aae2d99 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Fri, 29 Jun 2018 00:12:40 +0530
Subject: orangefs: Adding new return type vm_fault_t

Use new return type vm_fault_t for fault handler. For now,
this is just documenting that the function returns a VM_FAULT
value rather than an errno. Once all instances are converted,
vm_fault_t will become a distinct type.

See the following
commit 1c8f422059ae ("mm: change return type to vm_fault_t")

Fixed checkpatch.pl warning.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/file.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index db0b52187cbc..a5a2fe76568f 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -528,18 +528,19 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 	return ret;
 }
 
-static int orangefs_fault(struct vm_fault *vmf)
+static vm_fault_t orangefs_fault(struct vm_fault *vmf)
 {
 	struct file *file = vmf->vma->vm_file;
-	int rc;
-	rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
+	int ret;
+
+	ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
 	    STATX_SIZE);
-	if (rc == -ESTALE)
-		rc = -EIO;
-	if (rc) {
-		gossip_err("%s: orangefs_inode_getattr failed, "
-		    "rc:%d:.\n", __func__, rc);
-		return rc;
+	if (ret == -ESTALE)
+		ret = -EIO;
+	if (ret) {
+		gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
+				__func__, ret);
+		return VM_FAULT_SIGBUS;
 	}
 	return filemap_fault(vmf);
 }
-- 
cgit v1.2.3


From e1b437691a624a7e767bbb140c730d2779797f53 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 17 Jul 2018 15:42:53 +0100
Subject: orangefs: remove redundant pointer orangefs_inode

Pointer orangefs_inode is being assigned but is never used hence it is
redundant and can be removed.

Cleans up clang warning:
warning: variable 'orangefs_inode' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/inode.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 6e4d2af8f5bc..31932879b716 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -251,7 +251,6 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
 {
 	int ret = -ENOENT;
 	struct inode *inode = path->dentry->d_inode;
-	struct orangefs_inode_s *orangefs_inode = NULL;
 
 	gossip_debug(GOSSIP_INODE_DEBUG,
 		     "orangefs_getattr: called on %pd\n",
@@ -262,8 +261,6 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
 		generic_fillattr(inode, stat);
 
 		/* override block size reported to stat */
-		orangefs_inode = ORANGEFS_I(inode);
-
 		if (request_mask & STATX_SIZE)
 			stat->result_mask = STATX_BASIC_STATS;
 		else
-- 
cgit v1.2.3