From 2ab28f322f9896782da904f5942f3873432addc8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2012 15:27:49 -0400 Subject: Btrfs: wait on ordered extents at the last possible moment Since we don't actually copy the extent information from the source tree in the fast case we don't need to wait for ordered io to be completed in order to fsync, we just need to wait for the io to be completed. So when we're logging our file just attach all of the ordered extents to the log, and then when the log syncs just wait for IO_DONE on the ordered extents and then write the super. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b06d289f998f..083abca56055 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1655,16 +1655,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; struct btrfs_trans_handle *trans; + bool full_sync = 0; trace_btrfs_sync_file(file, datasync); /* * We write the dirty pages in the range and wait until they complete * out of the ->i_mutex. If so, we can flush the dirty pages by - * multi-task, and make the performance up. + * multi-task, and make the performance up. See + * btrfs_wait_ordered_range for an explanation of the ASYNC check. */ atomic_inc(&BTRFS_I(inode)->sync_writers); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + ret = filemap_fdatawrite_range(inode->i_mapping, start, end); + if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + ret = filemap_fdatawrite_range(inode->i_mapping, start, end); atomic_dec(&BTRFS_I(inode)->sync_writers); if (ret) return ret; @@ -1676,7 +1681,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * range being left. */ atomic_inc(&root->log_batch); - btrfs_wait_ordered_range(inode, start, end - start + 1); + full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); + if (full_sync) + btrfs_wait_ordered_range(inode, start, end - start + 1); atomic_inc(&root->log_batch); /* @@ -1743,13 +1751,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret != BTRFS_NO_LOG_SYNC) { if (ret > 0) { + /* + * If we didn't already wait for ordered extents we need + * to do that now. + */ + if (!full_sync) + btrfs_wait_ordered_range(inode, start, + end - start + 1); ret = btrfs_commit_transaction(trans, root); } else { ret = btrfs_sync_log(trans, root); - if (ret == 0) + if (ret == 0) { ret = btrfs_end_transaction(trans, root); - else + } else { + if (!full_sync) + btrfs_wait_ordered_range(inode, start, + end - + start + 1); ret = btrfs_commit_transaction(trans, root); + } } } else { ret = btrfs_end_transaction(trans, root); -- cgit v1.2.3 From 55e301fd57a6239ec14b91a1cf2e70b3dd135194 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 29 Jan 2013 06:04:50 +0000 Subject: Btrfs: move fs/btrfs/ioctl.h to include/uapi/linux/btrfs.h The header file will then be installed under /usr/include/linux so that userspace applications can refer to Btrfs ioctls by name and use the same structs used internally in the kernel. Signed-off-by: Filipe Brandenburger Signed-off-by: Josef Bacik --- fs/btrfs/backref.h | 2 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/ioctl.h | 502 -------------------------------------------- fs/btrfs/qgroup.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/volumes.h | 2 +- include/linux/btrfs.h | 6 + include/uapi/linux/Kbuild | 1 + include/uapi/linux/btrfs.h | 503 +++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 518 insertions(+), 510 deletions(-) delete mode 100644 fs/btrfs/ioctl.h create mode 100644 include/linux/btrfs.h create mode 100644 include/uapi/linux/btrfs.h (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index d61feca79455..310a7f6d09b1 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -19,7 +19,7 @@ #ifndef __BTRFS_BACKREF__ #define __BTRFS_BACKREF__ -#include "ioctl.h" +#include #include "ulist.h" #include "extent_io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 541ce9a9949e..69321013683c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -31,10 +31,10 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" -#include "ioctl.h" struct btrfs_trans_handle; struct btrfs_transaction; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 083abca56055..13c78ea3ebce 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -30,11 +30,11 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "tree-log.h" #include "locking.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 60ec7589900c..fc8aa8bf80a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -39,12 +39,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "ordered-data.h" #include "xattr.h" diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1b554b47e814..96ecefc1724f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -42,12 +42,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "volumes.h" #include "locking.h" diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h deleted file mode 100644 index dabca9cc8c2e..000000000000 --- a/fs/btrfs/ioctl.h +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __IOCTL_ -#define __IOCTL_ -#include - -#define BTRFS_IOCTL_MAGIC 0x94 -#define BTRFS_VOL_NAME_MAX 255 - -/* this should be 4k */ -#define BTRFS_PATH_NAME_MAX 4087 -struct btrfs_ioctl_vol_args { - __s64 fd; - char name[BTRFS_PATH_NAME_MAX + 1]; -}; - -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 - -#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) -#define BTRFS_SUBVOL_RDONLY (1ULL << 1) -#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) -#define BTRFS_FSID_SIZE 16 -#define BTRFS_UUID_SIZE 16 - -#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) - -struct btrfs_qgroup_limit { - __u64 flags; - __u64 max_rfer; - __u64 max_excl; - __u64 rsv_rfer; - __u64 rsv_excl; -}; - -struct btrfs_qgroup_inherit { - __u64 flags; - __u64 num_qgroups; - __u64 num_ref_copies; - __u64 num_excl_copies; - struct btrfs_qgroup_limit lim; - __u64 qgroups[0]; -}; - -struct btrfs_ioctl_qgroup_limit_args { - __u64 qgroupid; - struct btrfs_qgroup_limit lim; -}; - -#define BTRFS_SUBVOL_NAME_MAX 4039 -struct btrfs_ioctl_vol_args_v2 { - __s64 fd; - __u64 transid; - __u64 flags; - union { - struct { - __u64 size; - struct btrfs_qgroup_inherit __user *qgroup_inherit; - }; - __u64 unused[4]; - }; - char name[BTRFS_SUBVOL_NAME_MAX + 1]; -}; - -/* - * structure to report errors and progress to userspace, either as a - * result of a finished scrub, a canceled scrub or a progress inquiry - */ -struct btrfs_scrub_progress { - __u64 data_extents_scrubbed; /* # of data extents scrubbed */ - __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ - __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ - __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ - __u64 read_errors; /* # of read errors encountered (EIO) */ - __u64 csum_errors; /* # of failed csum checks */ - __u64 verify_errors; /* # of occurences, where the metadata - * of a tree block did not match the - * expected values, like generation or - * logical */ - __u64 no_csum; /* # of 4k data block for which no csum - * is present, probably the result of - * data written with nodatasum */ - __u64 csum_discards; /* # of csum for which no data was found - * in the extent tree. */ - __u64 super_errors; /* # of bad super blocks encountered */ - __u64 malloc_errors; /* # of internal kmalloc errors. These - * will likely cause an incomplete - * scrub */ - __u64 uncorrectable_errors; /* # of errors where either no intact - * copy was found or the writeback - * failed */ - __u64 corrected_errors; /* # of errors corrected */ - __u64 last_physical; /* last physical address scrubbed. In - * case a scrub was aborted, this can - * be used to restart the scrub */ - __u64 unverified_errors; /* # of occurences where a read for a - * full (64k) bio failed, but the re- - * check succeeded for each 4k piece. - * Intermittent error. */ -}; - -#define BTRFS_SCRUB_READONLY 1 -struct btrfs_ioctl_scrub_args { - __u64 devid; /* in */ - __u64 start; /* in */ - __u64 end; /* in */ - __u64 flags; /* in */ - struct btrfs_scrub_progress progress; /* out */ - /* pad to 1k */ - __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 -struct btrfs_ioctl_dev_replace_start_params { - __u64 srcdevid; /* in, if 0, use srcdev_name instead */ - __u64 cont_reading_from_srcdev_mode; /* in, see #define - * above */ - __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ - __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 -struct btrfs_ioctl_dev_replace_status_params { - __u64 replace_state; /* out, see #define above */ - __u64 progress_1000; /* out, 0 <= x <= 1000 */ - __u64 time_started; /* out, seconds since 1-Jan-1970 */ - __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ - __u64 num_write_errors; /* out */ - __u64 num_uncorrectable_read_errors; /* out */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 -struct btrfs_ioctl_dev_replace_args { - __u64 cmd; /* in */ - __u64 result; /* out */ - - union { - struct btrfs_ioctl_dev_replace_start_params start; - struct btrfs_ioctl_dev_replace_status_params status; - }; /* in/out */ - - __u64 spare[64]; -}; - -struct btrfs_ioctl_dev_info_args { - __u64 devid; /* in/out */ - __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ - __u64 bytes_used; /* out */ - __u64 total_bytes; /* out */ - __u64 unused[379]; /* pad to 4k */ - __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ -}; - -struct btrfs_ioctl_fs_info_args { - __u64 max_id; /* out */ - __u64 num_devices; /* out */ - __u8 fsid[BTRFS_FSID_SIZE]; /* out */ - __u64 reserved[124]; /* pad to 1k */ -}; - -/* balance control ioctl modes */ -#define BTRFS_BALANCE_CTL_PAUSE 1 -#define BTRFS_BALANCE_CTL_CANCEL 2 - -/* - * this is packed, because it should be exactly the same as its disk - * byte order counterpart (struct btrfs_disk_balance_args) - */ -struct btrfs_balance_args { - __u64 profiles; - __u64 usage; - __u64 devid; - __u64 pstart; - __u64 pend; - __u64 vstart; - __u64 vend; - - __u64 target; - - __u64 flags; - - __u64 unused[8]; -} __attribute__ ((__packed__)); - -/* report balance progress to userspace */ -struct btrfs_balance_progress { - __u64 expected; /* estimated # of chunks that will be - * relocated to fulfill the request */ - __u64 considered; /* # of chunks we have considered so far */ - __u64 completed; /* # of chunks relocated so far */ -}; - -#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) -#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) -#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) - -struct btrfs_ioctl_balance_args { - __u64 flags; /* in/out */ - __u64 state; /* out */ - - struct btrfs_balance_args data; /* in/out */ - struct btrfs_balance_args meta; /* in/out */ - struct btrfs_balance_args sys; /* in/out */ - - struct btrfs_balance_progress stat; /* out */ - - __u64 unused[72]; /* pad to 1k */ -}; - -#define BTRFS_INO_LOOKUP_PATH_MAX 4080 -struct btrfs_ioctl_ino_lookup_args { - __u64 treeid; - __u64 objectid; - char name[BTRFS_INO_LOOKUP_PATH_MAX]; -}; - -struct btrfs_ioctl_search_key { - /* which root are we searching. 0 is the tree of tree roots */ - __u64 tree_id; - - /* keys returned will be >= min and <= max */ - __u64 min_objectid; - __u64 max_objectid; - - /* keys returned will be >= min and <= max */ - __u64 min_offset; - __u64 max_offset; - - /* max and min transids to search for */ - __u64 min_transid; - __u64 max_transid; - - /* keys returned will be >= min and <= max */ - __u32 min_type; - __u32 max_type; - - /* - * how many items did userland ask for, and how many are we - * returning - */ - __u32 nr_items; - - /* align to 64 bits */ - __u32 unused; - - /* some extra for later */ - __u64 unused1; - __u64 unused2; - __u64 unused3; - __u64 unused4; -}; - -struct btrfs_ioctl_search_header { - __u64 transid; - __u64 objectid; - __u64 offset; - __u32 type; - __u32 len; -}; - -#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) -/* - * the buf is an array of search headers where - * each header is followed by the actual item - * the type field is expanded to 32 bits for alignment - */ -struct btrfs_ioctl_search_args { - struct btrfs_ioctl_search_key key; - char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; -}; - -struct btrfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_offset, src_length; - __u64 dest_offset; -}; - -/* flags for the defrag range ioctl */ -#define BTRFS_DEFRAG_RANGE_COMPRESS 1 -#define BTRFS_DEFRAG_RANGE_START_IO 2 - -struct btrfs_ioctl_space_info { - __u64 flags; - __u64 total_bytes; - __u64 used_bytes; -}; - -struct btrfs_ioctl_space_args { - __u64 space_slots; - __u64 total_spaces; - struct btrfs_ioctl_space_info spaces[0]; -}; - -struct btrfs_data_container { - __u32 bytes_left; /* out -- bytes not needed to deliver output */ - __u32 bytes_missing; /* out -- additional bytes needed for result */ - __u32 elem_cnt; /* out */ - __u32 elem_missed; /* out */ - __u64 val[0]; /* out */ -}; - -struct btrfs_ioctl_ino_path_args { - __u64 inum; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *fspath; out */ - __u64 fspath; /* out */ -}; - -struct btrfs_ioctl_logical_ino_args { - __u64 logical; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *inodes; out */ - __u64 inodes; -}; - -enum btrfs_dev_stat_values { - /* disk I/O failure stats */ - BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ - - /* stats for indirect indications for I/O failures */ - BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or - * contents is illegal: this is an - * indication that the block was damaged - * during read or write, or written to - * wrong location or read from wrong - * location */ - BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not - * been written */ - - BTRFS_DEV_STAT_VALUES_MAX -}; - -/* Reset statistics after reading; needs SYS_ADMIN capability */ -#define BTRFS_DEV_STATS_RESET (1ULL << 0) - -struct btrfs_ioctl_get_dev_stats { - __u64 devid; /* in */ - __u64 nr_items; /* in/out */ - __u64 flags; /* in/out */ - - /* out values: */ - __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; - - __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ -}; - -#define BTRFS_QUOTA_CTL_ENABLE 1 -#define BTRFS_QUOTA_CTL_DISABLE 2 -#define BTRFS_QUOTA_CTL_RESCAN 3 -struct btrfs_ioctl_quota_ctl_args { - __u64 cmd; - __u64 status; -}; - -struct btrfs_ioctl_qgroup_assign_args { - __u64 assign; - __u64 src; - __u64 dst; -}; - -struct btrfs_ioctl_qgroup_create_args { - __u64 create; - __u64 qgroupid; -}; -struct btrfs_ioctl_timespec { - __u64 sec; - __u32 nsec; -}; - -struct btrfs_ioctl_received_subvol_args { - char uuid[BTRFS_UUID_SIZE]; /* in */ - __u64 stransid; /* in */ - __u64 rtransid; /* out */ - struct btrfs_ioctl_timespec stime; /* in */ - struct btrfs_ioctl_timespec rtime; /* out */ - __u64 flags; /* in */ - __u64 reserved[16]; /* in */ -}; - -struct btrfs_ioctl_send_args { - __s64 send_fd; /* in */ - __u64 clone_sources_count; /* in */ - __u64 __user *clone_sources; /* in */ - __u64 parent_root; /* in */ - __u64 flags; /* in */ - __u64 reserved[4]; /* in */ -}; - -#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ - struct btrfs_ioctl_vol_args) -/* trans start and trans end are dangerous, and only for - * use by applications that know how to avoid the - * resulting deadlocks - */ -#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) -#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) -#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) - -#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) -#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ - struct btrfs_ioctl_vol_args) - -#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ - struct btrfs_ioctl_clone_range_args) - -#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ - struct btrfs_ioctl_defrag_range_args) -#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ - struct btrfs_ioctl_search_args) -#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ - struct btrfs_ioctl_ino_lookup_args) -#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) -#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ - struct btrfs_ioctl_space_args) -#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) -#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) -#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) -#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) -#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) -#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ - struct btrfs_ioctl_dev_info_args) -#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ - struct btrfs_ioctl_fs_info_args) -#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) -#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ - struct btrfs_ioctl_received_subvol_args) -#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) -#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ - struct btrfs_ioctl_quota_ctl_args) -#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ - struct btrfs_ioctl_qgroup_assign_args) -#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ - struct btrfs_ioctl_qgroup_create_args) -#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ - struct btrfs_ioctl_qgroup_limit_args) -#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ - struct btrfs_ioctl_get_dev_stats) -#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ - struct btrfs_ioctl_dev_replace_args) - -#endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a5c856234323..a0d6368249fa 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -23,13 +23,13 @@ #include #include #include +#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" #include "locking.h" #include "ulist.h" -#include "ioctl.h" #include "backref.h" /* TODO XXX FIXME diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67b373bf3ff9..6846ededfe95 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,13 +41,13 @@ #include #include #include +#include #include "compat.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "xattr.h" #include "volumes.h" diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d3c3939ac751..12bb84166a5f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,8 +21,8 @@ #include #include +#include #include "async-thread.h" -#include "ioctl.h" #define BTRFS_STRIPE_LEN (64 * 1024) diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h new file mode 100644 index 000000000000..22d799147db2 --- /dev/null +++ b/include/linux/btrfs.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_BTRFS_H +#define _LINUX_BTRFS_H + +#include + +#endif /* _LINUX_BTRFS_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 19e765fbfef7..896ee1247294 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -68,6 +68,7 @@ header-y += blkpg.h header-y += blktrace_api.h header-y += bpqether.h header-y += bsg.h +header-y += btrfs.h header-y += can.h header-y += capability.h header-y += capi.h diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h new file mode 100644 index 000000000000..cffbb582dd90 --- /dev/null +++ b/include/uapi/linux/btrfs.h @@ -0,0 +1,503 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef _UAPI_LINUX_BTRFS_H +#define _UAPI_LINUX_BTRFS_H +#include +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 + +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 + +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_rfer; + __u64 max_excl; + __u64 rsv_rfer; + __u64 rsv_excl; +}; + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[0]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + +#define BTRFS_SUBVOL_NAME_MAX 4039 +struct btrfs_ioctl_vol_args_v2 { + __s64 fd; + __u64 transid; + __u64 flags; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit __user *qgroup_inherit; + }; + __u64 unused[4]; + }; + char name[BTRFS_SUBVOL_NAME_MAX + 1]; +}; + +/* + * structure to report errors and progress to userspace, either as a + * result of a finished scrub, a canceled scrub or a progress inquiry + */ +struct btrfs_scrub_progress { + __u64 data_extents_scrubbed; /* # of data extents scrubbed */ + __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ + __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ + __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ + __u64 read_errors; /* # of read errors encountered (EIO) */ + __u64 csum_errors; /* # of failed csum checks */ + __u64 verify_errors; /* # of occurences, where the metadata + * of a tree block did not match the + * expected values, like generation or + * logical */ + __u64 no_csum; /* # of 4k data block for which no csum + * is present, probably the result of + * data written with nodatasum */ + __u64 csum_discards; /* # of csum for which no data was found + * in the extent tree. */ + __u64 super_errors; /* # of bad super blocks encountered */ + __u64 malloc_errors; /* # of internal kmalloc errors. These + * will likely cause an incomplete + * scrub */ + __u64 uncorrectable_errors; /* # of errors where either no intact + * copy was found or the writeback + * failed */ + __u64 corrected_errors; /* # of errors corrected */ + __u64 last_physical; /* last physical address scrubbed. In + * case a scrub was aborted, this can + * be used to restart the scrub */ + __u64 unverified_errors; /* # of occurences where a read for a + * full (64k) bio failed, but the re- + * check succeeded for each 4k piece. + * Intermittent error. */ +}; + +#define BTRFS_SCRUB_READONLY 1 +struct btrfs_ioctl_scrub_args { + __u64 devid; /* in */ + __u64 start; /* in */ + __u64 end; /* in */ + __u64 flags; /* in */ + struct btrfs_scrub_progress progress; /* out */ + /* pad to 1k */ + __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 +struct btrfs_ioctl_dev_replace_start_params { + __u64 srcdevid; /* in, if 0, use srcdev_name instead */ + __u64 cont_reading_from_srcdev_mode; /* in, see #define + * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 +struct btrfs_ioctl_dev_replace_status_params { + __u64 replace_state; /* out, see #define above */ + __u64 progress_1000; /* out, 0 <= x <= 1000 */ + __u64 time_started; /* out, seconds since 1-Jan-1970 */ + __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ + __u64 num_write_errors; /* out */ + __u64 num_uncorrectable_read_errors; /* out */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 +struct btrfs_ioctl_dev_replace_args { + __u64 cmd; /* in */ + __u64 result; /* out */ + + union { + struct btrfs_ioctl_dev_replace_start_params start; + struct btrfs_ioctl_dev_replace_status_params status; + }; /* in/out */ + + __u64 spare[64]; +}; + +struct btrfs_ioctl_dev_info_args { + __u64 devid; /* in/out */ + __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ + __u64 bytes_used; /* out */ + __u64 total_bytes; /* out */ + __u64 unused[379]; /* pad to 4k */ + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ +}; + +struct btrfs_ioctl_fs_info_args { + __u64 max_id; /* out */ + __u64 num_devices; /* out */ + __u8 fsid[BTRFS_FSID_SIZE]; /* out */ + __u64 reserved[124]; /* pad to 1k */ +}; + +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 +#define BTRFS_BALANCE_CTL_CANCEL 2 + +/* + * this is packed, because it should be exactly the same as its disk + * byte order counterpart (struct btrfs_disk_balance_args) + */ +struct btrfs_balance_args { + __u64 profiles; + __u64 usage; + __u64 devid; + __u64 pstart; + __u64 pend; + __u64 vstart; + __u64 vend; + + __u64 target; + + __u64 flags; + + __u64 unused[8]; +} __attribute__ ((__packed__)); + +/* report balance progress to userspace */ +struct btrfs_balance_progress { + __u64 expected; /* estimated # of chunks that will be + * relocated to fulfill the request */ + __u64 considered; /* # of chunks we have considered so far */ + __u64 completed; /* # of chunks relocated so far */ +}; + +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) +#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) + +struct btrfs_ioctl_balance_args { + __u64 flags; /* in/out */ + __u64 state; /* out */ + + struct btrfs_balance_args data; /* in/out */ + struct btrfs_balance_args meta; /* in/out */ + struct btrfs_balance_args sys; /* in/out */ + + struct btrfs_balance_progress stat; /* out */ + + __u64 unused[72]; /* pad to 1k */ +}; + +#define BTRFS_INO_LOOKUP_PATH_MAX 4080 +struct btrfs_ioctl_ino_lookup_args { + __u64 treeid; + __u64 objectid; + char name[BTRFS_INO_LOOKUP_PATH_MAX]; +}; + +struct btrfs_ioctl_search_key { + /* which root are we searching. 0 is the tree of tree roots */ + __u64 tree_id; + + /* keys returned will be >= min and <= max */ + __u64 min_objectid; + __u64 max_objectid; + + /* keys returned will be >= min and <= max */ + __u64 min_offset; + __u64 max_offset; + + /* max and min transids to search for */ + __u64 min_transid; + __u64 max_transid; + + /* keys returned will be >= min and <= max */ + __u32 min_type; + __u32 max_type; + + /* + * how many items did userland ask for, and how many are we + * returning + */ + __u32 nr_items; + + /* align to 64 bits */ + __u32 unused; + + /* some extra for later */ + __u64 unused1; + __u64 unused2; + __u64 unused3; + __u64 unused4; +}; + +struct btrfs_ioctl_search_header { + __u64 transid; + __u64 objectid; + __u64 offset; + __u32 type; + __u32 len; +}; + +#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) +/* + * the buf is an array of search headers where + * each header is followed by the actual item + * the type field is expanded to 32 bits for alignment + */ +struct btrfs_ioctl_search_args { + struct btrfs_ioctl_search_key key; + char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; +}; + +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +/* flags for the defrag range ioctl */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 + +struct btrfs_ioctl_space_info { + __u64 flags; + __u64 total_bytes; + __u64 used_bytes; +}; + +struct btrfs_ioctl_space_args { + __u64 space_slots; + __u64 total_spaces; + struct btrfs_ioctl_space_info spaces[0]; +}; + +struct btrfs_data_container { + __u32 bytes_left; /* out -- bytes not needed to deliver output */ + __u32 bytes_missing; /* out -- additional bytes needed for result */ + __u32 elem_cnt; /* out */ + __u32 elem_missed; /* out */ + __u64 val[0]; /* out */ +}; + +struct btrfs_ioctl_ino_path_args { + __u64 inum; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *fspath; out */ + __u64 fspath; /* out */ +}; + +struct btrfs_ioctl_logical_ino_args { + __u64 logical; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *inodes; out */ + __u64 inodes; +}; + +enum btrfs_dev_stat_values { + /* disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + +struct btrfs_ioctl_get_dev_stats { + __u64 devid; /* in */ + __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ + + /* out values: */ + __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; + + __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ +}; + +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +#define BTRFS_QUOTA_CTL_RESCAN 3 +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 __user *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) +#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args) +#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ + struct btrfs_ioctl_ino_lookup_args) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) +#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) +#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) +#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) +#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ + struct btrfs_ioctl_dev_info_args) +#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ + struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) +#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ + struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_dev_replace_args) + +#endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From 87533c475187c1420794a2e164bc67a7974f1327 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 29 Jan 2013 10:14:48 +0000 Subject: Btrfs: use bit operation for ->fs_state There is no lock to protect fs_info->fs_state, it will introduce some problems, such as the value may be covered by the other task when several tasks modify it. For example: Task0 - CPU0 Task1 - CPU1 mov %fs_state rax or $0x1 rax mov %fs_state rax or $0x2 rax mov rax %fs_state mov rax %fs_state The expected value is 3, but in fact, it is 2. Though this problem doesn't happen now (because there is only one flag currently), the code is error prone, if we add other flags, the above problem will happen to a certainty. Now we use bit operation for it to fix the above problem. In this way, we can make the code more robust and be easy to add new flags. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 4 +++- fs/btrfs/disk-io.c | 5 +++-- fs/btrfs/file.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/btrfs/super.c | 4 ++-- fs/btrfs/transaction.c | 9 ++++----- 6 files changed, 14 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5f5c30aaef36..78fba44ad3d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -338,7 +338,9 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) /* * File system states */ +#define BTRFS_FS_STATE_ERROR 0 +/* Super block flags */ /* Errors detected */ #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) @@ -1549,7 +1551,7 @@ struct btrfs_fs_info { u64 qgroup_seq; /* filesystem state */ - u64 fs_state; + unsigned long fs_state; struct btrfs_delayed_root *delayed_root; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 00b6742fdde7..8e58a1f90546 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2201,7 +2201,8 @@ int open_ctree(struct super_block *sb, goto fail_alloc; /* check FS state, whether FS is broken. */ - fs_info->fs_state |= btrfs_super_flags(disk_super); + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) + set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); if (ret) { @@ -3359,7 +3360,7 @@ int close_ctree(struct btrfs_root *root) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) btrfs_error_commit_super(root); btrfs_put_block_group_cache(fs_info); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 13c78ea3ebce..75d0fe134be3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1545,7 +1545,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, * although we have opened a file as writable, we have * to stop this write operation to ensure FS consistency. */ - if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { mutex_unlock(&inode->i_mutex); err = -EROFS; goto out; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 67783e03d121..c78b2a3fc335 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2708,7 +2708,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, int ret; struct btrfs_root *root = sctx->dev_root; - if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return -EIO; gen = root->fs_info->last_trans_committed; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eda330df45a4..4152f9ea34f5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -98,7 +98,7 @@ static void __save_error_info(struct btrfs_fs_info *fs_info) * today we only save the error info into ram. Long term we'll * also send it down to the disk */ - fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; + set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); } static void save_error_info(struct btrfs_fs_info *fs_info) @@ -114,7 +114,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) if (sb->s_flags & MS_RDONLY) return; - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { sb->s_flags |= MS_RDONLY; printk(KERN_INFO "btrfs is forced readonly\n"); /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 34610dc6d140..baf6d74fd0f2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -61,7 +61,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type) spin_lock(&fs_info->trans_lock); loop: /* The file system has been taken offline. No new transactions. */ - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { spin_unlock(&fs_info->trans_lock); return -EROFS; } @@ -113,7 +113,7 @@ loop: kmem_cache_free(btrfs_transaction_cachep, cur_trans); cur_trans = fs_info->running_transaction; goto loop; - } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { spin_unlock(&fs_info->trans_lock); kmem_cache_free(btrfs_transaction_cachep, cur_trans); return -EROFS; @@ -301,7 +301,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, int ret; u64 qgroup_reserved = 0; - if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); if (current->journal_info) { @@ -645,9 +645,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_run_delayed_iputs(root); if (trans->aborted || - root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) err = -EIO; - } assert_qgroups_uptodate(trans); kmem_cache_free(btrfs_trans_handle_cachep, trans); -- cgit v1.2.3 From 569e0f358c0c37f6733702d4a5d2c412860f7169 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Feb 2013 11:09:14 -0500 Subject: Btrfs: place ordered operations on a per transaction list Miao made the ordered operations stuff run async, which introduced a deadlock where we could get somebody (sync) racing in and committing the transaction while a commit was already happening. The new committer would try and flush ordered operations which would hang waiting for the commit to finish because it is done asynchronously and no longer inherits the callers trans handle. To fix this we need to make the ordered operations list a per transaction list. We can get new inodes added to the ordered operation list by truncating them and then having another process writing to them, so this makes it so that anybody trying to add an ordered operation _must_ start a transaction in order to add itself to the list, which will keep new inodes from getting added to the ordered operations list after we start committing. This should fix the deadlock and also keeps us from doing a lot more work than we need to during commit. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 7 ------- fs/btrfs/disk-io.c | 11 ++++++----- fs/btrfs/file.c | 15 ++++++++++++++- fs/btrfs/ordered-data.c | 13 ++++++++----- fs/btrfs/ordered-data.h | 3 ++- fs/btrfs/transaction.c | 5 +++-- fs/btrfs/transaction.h | 1 + 7 files changed, 34 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 14f01dc70ea6..961ff2986341 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1407,13 +1407,6 @@ struct btrfs_fs_info { */ struct list_head delalloc_inodes; - /* - * special rename and truncate targets that must be on disk before - * we're allowed to commit. This is basically the ext3 style - * data=ordered list. - */ - struct list_head ordered_operations; - /* * there is a pool of worker threads for checksumming during writes * and a pool for checksumming after reads. This is because readers diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9fcce54ecde4..e511d9f78c19 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -56,7 +56,8 @@ static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only); -static void btrfs_destroy_ordered_operations(struct btrfs_root *root); +static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, + struct btrfs_root *root); static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); @@ -2029,7 +2030,6 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->delalloc_inodes); - INIT_LIST_HEAD(&fs_info->ordered_operations); INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->trans_lock); @@ -3538,7 +3538,8 @@ void btrfs_error_commit_super(struct btrfs_root *root) btrfs_cleanup_transaction(root); } -static void btrfs_destroy_ordered_operations(struct btrfs_root *root) +static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, + struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; struct list_head splice; @@ -3548,7 +3549,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root) mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->fs_info->ordered_extent_lock); - list_splice_init(&root->fs_info->ordered_operations, &splice); + list_splice_init(&t->ordered_operations, &splice); while (!list_empty(&splice)) { btrfs_inode = list_entry(splice.next, struct btrfs_inode, ordered_operations); @@ -3829,7 +3830,7 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) while (!list_empty(&list)) { t = list_entry(list.next, struct btrfs_transaction, list); - btrfs_destroy_ordered_operations(root); + btrfs_destroy_ordered_operations(t, root); btrfs_destroy_ordered_extents(root); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 75d0fe134be3..b12ba52c4505 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1628,7 +1628,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp) */ if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, &BTRFS_I(inode)->runtime_flags)) { - btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + + /* + * We need to block on a committing transaction to keep us from + * throwing a ordered operation on to the list and causing + * something like sync to deadlock trying to flush out this + * inode. + */ + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); + btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); + btrfs_end_transaction(trans, root); if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(inode->i_mapping); } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9489fa96e3ed..dc08d77b717e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -612,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) * extra check to make sure the ordered operation list really is empty * before we return */ -int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) +int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int wait) { struct btrfs_inode *btrfs_inode; struct inode *inode; + struct btrfs_transaction *cur_trans = trans->transaction; struct list_head splice; struct list_head works; struct btrfs_delalloc_work *work, *next; @@ -626,7 +628,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->fs_info->ordered_extent_lock); - list_splice_init(&root->fs_info->ordered_operations, &splice); + list_splice_init(&cur_trans->ordered_operations, &splice); while (!list_empty(&splice)) { btrfs_inode = list_entry(splice.next, struct btrfs_inode, ordered_operations); @@ -643,7 +645,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) if (!wait) list_add_tail(&BTRFS_I(inode)->ordered_operations, - &root->fs_info->ordered_operations); + &cur_trans->ordered_operations); spin_unlock(&root->fs_info->ordered_extent_lock); work = btrfs_alloc_delalloc_work(inode, wait, 1); @@ -653,7 +655,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) list_add_tail(&btrfs_inode->ordered_operations, &splice); list_splice_tail(&splice, - &root->fs_info->ordered_operations); + &cur_trans->ordered_operations); spin_unlock(&root->fs_info->ordered_extent_lock); ret = -ENOMEM; goto out; @@ -1033,6 +1035,7 @@ out: void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { + struct btrfs_transaction *cur_trans = trans->transaction; u64 last_mod; last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); @@ -1047,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->ordered_extent_lock); if (list_empty(&BTRFS_I(inode)->ordered_operations)) { list_add_tail(&BTRFS_I(inode)->ordered_operations, - &root->fs_info->ordered_operations); + &cur_trans->ordered_operations); } spin_unlock(&root->fs_info->ordered_extent_lock); } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index d523dbd2314d..267ac99095f6 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -197,7 +197,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); -int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); +int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int wait); void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d574d830a1c4..0c87d18d1881 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -157,6 +157,7 @@ loop: spin_lock_init(&cur_trans->delayed_refs.lock); INIT_LIST_HEAD(&cur_trans->pending_snapshots); + INIT_LIST_HEAD(&cur_trans->ordered_operations); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -1456,7 +1457,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, * it here and no for sure that nothing new will be added * to the list */ - ret = btrfs_run_ordered_operations(root, 1); + ret = btrfs_run_ordered_operations(trans, root, 1); return ret; } @@ -1479,7 +1480,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int should_grow = 0; unsigned long now = get_seconds(); - ret = btrfs_run_ordered_operations(root, 0); + ret = btrfs_run_ordered_operations(trans, root, 0); if (ret) { btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 46628210e5d8..3f772fd0191a 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -43,6 +43,7 @@ struct btrfs_transaction { wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; struct list_head pending_snapshots; + struct list_head ordered_operations; struct btrfs_delayed_ref_root delayed_refs; int aborted; }; -- cgit v1.2.3