diff options
Diffstat (limited to 'drivers/block/loop.c')
| -rw-r--r-- | drivers/block/loop.c | 298 | 
1 files changed, 160 insertions, 138 deletions
| diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4720c7ade0ae..1e888c9e85b3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -76,6 +76,8 @@  #include <linux/splice.h>  #include <linux/sysfs.h>  #include <linux/miscdevice.h> +#include <linux/falloc.h> +  #include <asm/uaccess.h>  static DEFINE_IDR(loop_index_idr); @@ -159,17 +161,19 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {  	&xor_funcs  }; -static loff_t get_loop_size(struct loop_device *lo, struct file *file) +static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file)  { -	loff_t size, offset, loopsize; +	loff_t size, loopsize;  	/* Compute loopsize in bytes */  	size = i_size_read(file->f_mapping->host); -	offset = lo->lo_offset;  	loopsize = size - offset; -	if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) -		loopsize = lo->lo_sizelimit; +	/* offset is beyond i_size, wierd but possible */ +	if (loopsize < 0) +		return 0; +	if (sizelimit > 0 && sizelimit < loopsize) +		loopsize = sizelimit;  	/*  	 * Unfortunately, if we want to do I/O on the device,  	 * the number of 512-byte sectors has to fit into a sector_t. @@ -177,17 +181,25 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)  	return loopsize >> 9;  } +static loff_t get_loop_size(struct loop_device *lo, struct file *file) +{ +	return get_size(lo->lo_offset, lo->lo_sizelimit, file); +} +  static int -figure_loop_size(struct loop_device *lo) +figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)  { -	loff_t size = get_loop_size(lo, lo->lo_backing_file); +	loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);  	sector_t x = (sector_t)size;  	if (unlikely((loff_t)x != size))  		return -EFBIG; - +	if (lo->lo_offset != offset) +		lo->lo_offset = offset; +	if (lo->lo_sizelimit != sizelimit) +		lo->lo_sizelimit = sizelimit;  	set_capacity(lo->lo_disk, x); -	return 0;					 +	return 0;  }  static inline int @@ -203,74 +215,6 @@ lo_do_transfer(struct loop_device *lo, int cmd,  }  /** - * do_lo_send_aops - helper for writing data to a loop device - * - * This is the fast version for backing filesystems which implement the address - * space operations write_begin and write_end. - */ -static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, -		loff_t pos, struct page *unused) -{ -	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ -	struct address_space *mapping = file->f_mapping; -	pgoff_t index; -	unsigned offset, bv_offs; -	int len, ret; - -	mutex_lock(&mapping->host->i_mutex); -	index = pos >> PAGE_CACHE_SHIFT; -	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); -	bv_offs = bvec->bv_offset; -	len = bvec->bv_len; -	while (len > 0) { -		sector_t IV; -		unsigned size, copied; -		int transfer_result; -		struct page *page; -		void *fsdata; - -		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); -		size = PAGE_CACHE_SIZE - offset; -		if (size > len) -			size = len; - -		ret = pagecache_write_begin(file, mapping, pos, size, 0, -							&page, &fsdata); -		if (ret) -			goto fail; - -		file_update_time(file); - -		transfer_result = lo_do_transfer(lo, WRITE, page, offset, -				bvec->bv_page, bv_offs, size, IV); -		copied = size; -		if (unlikely(transfer_result)) -			copied = 0; - -		ret = pagecache_write_end(file, mapping, pos, size, copied, -							page, fsdata); -		if (ret < 0 || ret != copied) -			goto fail; - -		if (unlikely(transfer_result)) -			goto fail; - -		bv_offs += copied; -		len -= copied; -		offset = 0; -		index++; -		pos += copied; -	} -	ret = 0; -out: -	mutex_unlock(&mapping->host->i_mutex); -	return ret; -fail: -	ret = -1; -	goto out; -} - -/**   * __do_lo_send_write - helper for writing data to a loop device   *   * This helper just factors out common code between do_lo_send_direct_write() @@ -297,10 +241,8 @@ static int __do_lo_send_write(struct file *file,  /**   * do_lo_send_direct_write - helper for writing data to a loop device   * - * This is the fast, non-transforming version for backing filesystems which do - * not implement the address space operations write_begin and write_end. - * It uses the write file operation which should be present on all writeable - * filesystems. + * This is the fast, non-transforming version that does not need double + * buffering.   */  static int do_lo_send_direct_write(struct loop_device *lo,  		struct bio_vec *bvec, loff_t pos, struct page *page) @@ -316,15 +258,9 @@ static int do_lo_send_direct_write(struct loop_device *lo,  /**   * do_lo_send_write - helper for writing data to a loop device   * - * This is the slow, transforming version for filesystems which do not - * implement the address space operations write_begin and write_end.  It - * uses the write file operation which should be present on all writeable - * filesystems. - * - * Using fops->write is slower than using aops->{prepare,commit}_write in the - * transforming case because we need to double buffer the data as we cannot do - * the transformations in place as we do not have direct access to the - * destination pages of the backing file. + * This is the slow, transforming version that needs to double buffer the + * data as it cannot do the transformations in place without having direct + * access to the destination pages of the backing file.   */  static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,  		loff_t pos, struct page *page) @@ -350,17 +286,16 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)  	struct page *page = NULL;  	int i, ret = 0; -	do_lo_send = do_lo_send_aops; -	if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) { +	if (lo->transfer != transfer_none) { +		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); +		if (unlikely(!page)) +			goto fail; +		kmap(page); +		do_lo_send = do_lo_send_write; +	} else {  		do_lo_send = do_lo_send_direct_write; -		if (lo->transfer != transfer_none) { -			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); -			if (unlikely(!page)) -				goto fail; -			kmap(page); -			do_lo_send = do_lo_send_write; -		}  	} +  	bio_for_each_segment(bvec, bio, i) {  		ret = do_lo_send(lo, bvec, pos, page);  		if (ret < 0) @@ -447,7 +382,8 @@ do_lo_receive(struct loop_device *lo,  	if (retval < 0)  		return retval; - +	if (retval != bvec->bv_len) +		return -EIO;  	return 0;  } @@ -484,6 +420,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)  			}  		} +		/* +		 * We use punch hole to reclaim the free space used by the +		 * image a.k.a. discard. However we do not support discard if +		 * encryption is enabled, because it may give an attacker +		 * useful information. +		 */ +		if (bio->bi_rw & REQ_DISCARD) { +			struct file *file = lo->lo_backing_file; +			int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + +			if ((!file->f_op->fallocate) || +			    lo->lo_encrypt_key_size) { +				ret = -EOPNOTSUPP; +				goto out; +			} +			ret = file->f_op->fallocate(file, mode, pos, +						    bio->bi_size); +			if (unlikely(ret && ret != -EINVAL && +				     ret != -EOPNOTSUPP)) +				ret = -EIO; +			goto out; +		} +  		ret = lo_send(lo, bio, pos);  		if ((bio->bi_rw & REQ_FUA) && !ret) { @@ -514,7 +473,7 @@ static struct bio *loop_get_bio(struct loop_device *lo)  	return bio_list_pop(&lo->lo_bio_list);  } -static int loop_make_request(struct request_queue *q, struct bio *old_bio) +static void loop_make_request(struct request_queue *q, struct bio *old_bio)  {  	struct loop_device *lo = q->queuedata;  	int rw = bio_rw(old_bio); @@ -532,12 +491,11 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio)  	loop_add_bio(lo, old_bio);  	wake_up(&lo->lo_event);  	spin_unlock_irq(&lo->lo_lock); -	return 0; +	return;  out:  	spin_unlock_irq(&lo->lo_lock);  	bio_io_error(old_bio); -	return 0;  }  struct switch_request { @@ -700,7 +658,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,  		goto out_putf;  	fput(old_file); -	if (max_part > 0) +	if (lo->lo_flags & LO_FLAGS_PARTSCAN)  		ioctl_by_bdev(bdev, BLKRRPART, 0);  	return 0; @@ -777,16 +735,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)  	return sprintf(buf, "%s\n", autoclear ? "1" : "0");  } +static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) +{ +	int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); + +	return sprintf(buf, "%s\n", partscan ? "1" : "0"); +} +  LOOP_ATTR_RO(backing_file);  LOOP_ATTR_RO(offset);  LOOP_ATTR_RO(sizelimit);  LOOP_ATTR_RO(autoclear); +LOOP_ATTR_RO(partscan);  static struct attribute *loop_attrs[] = {  	&loop_attr_backing_file.attr,  	&loop_attr_offset.attr,  	&loop_attr_sizelimit.attr,  	&loop_attr_autoclear.attr, +	&loop_attr_partscan.attr,  	NULL,  }; @@ -807,6 +774,35 @@ static void loop_sysfs_exit(struct loop_device *lo)  			   &loop_attribute_group);  } +static void loop_config_discard(struct loop_device *lo) +{ +	struct file *file = lo->lo_backing_file; +	struct inode *inode = file->f_mapping->host; +	struct request_queue *q = lo->lo_queue; + +	/* +	 * We use punch hole to reclaim the free space used by the +	 * image a.k.a. discard. However we do support discard if +	 * encryption is enabled, because it may give an attacker +	 * useful information. +	 */ +	if ((!file->f_op->fallocate) || +	    lo->lo_encrypt_key_size) { +		q->limits.discard_granularity = 0; +		q->limits.discard_alignment = 0; +		q->limits.max_discard_sectors = 0; +		q->limits.discard_zeroes_data = 0; +		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); +		return; +	} + +	q->limits.discard_granularity = inode->i_sb->s_blocksize; +	q->limits.discard_alignment = 0; +	q->limits.max_discard_sectors = UINT_MAX >> 9; +	q->limits.discard_zeroes_data = 1; +	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); +} +  static int loop_set_fd(struct loop_device *lo, fmode_t mode,  		       struct block_device *bdev, unsigned int arg)  { @@ -849,35 +845,23 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,  	mapping = file->f_mapping;  	inode = mapping->host; -	if (!(file->f_mode & FMODE_WRITE)) -		lo_flags |= LO_FLAGS_READ_ONLY; -  	error = -EINVAL; -	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) { -		const struct address_space_operations *aops = mapping->a_ops; - -		if (aops->write_begin) -			lo_flags |= LO_FLAGS_USE_AOPS; -		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) -			lo_flags |= LO_FLAGS_READ_ONLY; +	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) +		goto out_putf; -		lo_blocksize = S_ISBLK(inode->i_mode) ? -			inode->i_bdev->bd_block_size : PAGE_SIZE; +	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || +	    !file->f_op->write) +		lo_flags |= LO_FLAGS_READ_ONLY; -		error = 0; -	} else { -		goto out_putf; -	} +	lo_blocksize = S_ISBLK(inode->i_mode) ? +		inode->i_bdev->bd_block_size : PAGE_SIZE; +	error = -EFBIG;  	size = get_loop_size(lo, file); - -	if ((loff_t)(sector_t)size != size) { -		error = -EFBIG; +	if ((loff_t)(sector_t)size != size)  		goto out_putf; -	} -	if (!(mode & FMODE_WRITE)) -		lo_flags |= LO_FLAGS_READ_ONLY; +	error = 0;  	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); @@ -919,7 +903,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,  	}  	lo->lo_state = Lo_bound;  	wake_up_process(lo->lo_thread); -	if (max_part > 0) +	if (part_shift) +		lo->lo_flags |= LO_FLAGS_PARTSCAN; +	if (lo->lo_flags & LO_FLAGS_PARTSCAN)  		ioctl_by_bdev(bdev, BLKRRPART, 0);  	return 0; @@ -980,10 +966,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,  	return err;  } -static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) +static int loop_clr_fd(struct loop_device *lo)  {  	struct file *filp = lo->lo_backing_file;  	gfp_t gfp = lo->old_gfp_mask; +	struct block_device *bdev = lo->lo_device;  	if (lo->lo_state != Lo_bound)  		return -ENXIO; @@ -1012,7 +999,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)  	lo->lo_offset = 0;  	lo->lo_sizelimit = 0;  	lo->lo_encrypt_key_size = 0; -	lo->lo_flags = 0;  	lo->lo_thread = NULL;  	memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);  	memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); @@ -1030,8 +1016,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)  	lo->lo_state = Lo_unbound;  	/* This is safe: open() is still holding a reference. */  	module_put(THIS_MODULE); -	if (max_part > 0 && bdev) +	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)  		ioctl_by_bdev(bdev, BLKRRPART, 0); +	lo->lo_flags = 0; +	if (!part_shift) +		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;  	mutex_unlock(&lo->lo_ctl_mutex);  	/*  	 * Need not hold lo_ctl_mutex to fput backing file. @@ -1080,11 +1069,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)  	if (lo->lo_offset != info->lo_offset ||  	    lo->lo_sizelimit != info->lo_sizelimit) { -		lo->lo_offset = info->lo_offset; -		lo->lo_sizelimit = info->lo_sizelimit; -		if (figure_loop_size(lo)) +		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))  			return -EFBIG;  	} +	loop_config_discard(lo);  	memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);  	memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); @@ -1100,6 +1088,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)  	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))  		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; +	if ((info->lo_flags & LO_FLAGS_PARTSCAN) && +	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { +		lo->lo_flags |= LO_FLAGS_PARTSCAN; +		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; +		ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); +	} +  	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;  	lo->lo_init[0] = info->lo_init[0];  	lo->lo_init[1] = info->lo_init[1]; @@ -1260,7 +1255,7 @@ static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)  	err = -ENXIO;  	if (unlikely(lo->lo_state != Lo_bound))  		goto out; -	err = figure_loop_size(lo); +	err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);  	if (unlikely(err))  		goto out;  	sec = get_capacity(lo->lo_disk); @@ -1293,18 +1288,24 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,  		break;  	case LOOP_CLR_FD:  		/* loop_clr_fd would have unlocked lo_ctl_mutex on success */ -		err = loop_clr_fd(lo, bdev); +		err = loop_clr_fd(lo);  		if (!err)  			goto out_unlocked;  		break;  	case LOOP_SET_STATUS: -		err = loop_set_status_old(lo, (struct loop_info __user *) arg); +		err = -EPERM; +		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) +			err = loop_set_status_old(lo, +					(struct loop_info __user *)arg);  		break;  	case LOOP_GET_STATUS:  		err = loop_get_status_old(lo, (struct loop_info __user *) arg);  		break;  	case LOOP_SET_STATUS64: -		err = loop_set_status64(lo, (struct loop_info64 __user *) arg); +		err = -EPERM; +		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) +			err = loop_set_status64(lo, +					(struct loop_info64 __user *) arg);  		break;  	case LOOP_GET_STATUS64:  		err = loop_get_status64(lo, (struct loop_info64 __user *) arg); @@ -1513,7 +1514,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)  		 * In autoclear mode, stop the loop thread  		 * and remove configuration after last close.  		 */ -		err = loop_clr_fd(lo, NULL); +		err = loop_clr_fd(lo);  		if (!err)  			goto out_unlocked;  	} else { @@ -1635,6 +1636,27 @@ static int loop_add(struct loop_device **l, int i)  	if (!disk)  		goto out_free_queue; +	/* +	 * Disable partition scanning by default. The in-kernel partition +	 * scanning can be requested individually per-device during its +	 * setup. Userspace can always add and remove partitions from all +	 * devices. The needed partition minors are allocated from the +	 * extended minor space, the main loop device numbers will continue +	 * to match the loop minors, regardless of the number of partitions +	 * used. +	 * +	 * If max_part is given, partition scanning is globally enabled for +	 * all loop devices. The minors for the main loop devices will be +	 * multiples of max_part. +	 * +	 * Note: Global-for-all-devices, set-only-at-init, read-only module +	 * parameteters like 'max_loop' and 'max_part' make things needlessly +	 * complicated, are too static, inflexible and may surprise +	 * userspace tools. Parameters like this in general should be avoided. +	 */ +	if (!part_shift) +		disk->flags |= GENHD_FL_NO_PART_SCAN; +	disk->flags |= GENHD_FL_EXT_DEVT;  	mutex_init(&lo->lo_ctl_mutex);  	lo->lo_number		= i;  	lo->lo_thread		= NULL; |