From b3cb0d6adc4bbc70b5e37e49a6068e973545ead7 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 1 Sep 2011 18:39:10 +0800 Subject: xen-blkback: Implement discard requests ('feature-discard') ..aka ATA TRIM/SCSI UNMAP command to be passed through the frontend and used as appropiately by the backend. We also advertise certain granulity parameters to the frontend so it can plug them in. If the backend is a realy device - we just end up using 'blkdev_issue_discard' while for loopback devices - we just punch a hole in the image file. Signed-off-by: Li Dongyang [v1: Fixed up pr_debug and commit description] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 86 ++++++++++++++++++++++++++++------ drivers/block/xen-blkback/common.h | 93 ++++++++++++++++++++++++++++++------- drivers/block/xen-blkback/xenbus.c | 58 +++++++++++++++++++++++ 3 files changed, 206 insertions(+), 31 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 2330a9ad5e95..9713d5a490e4 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include #include @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { - pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", + pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" + " | ds %4d\n", current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); + blkif->st_rd_req, blkif->st_wr_req, + blkif->st_f_req, blkif->st_ds_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; blkif->st_oo_req = 0; + blkif->st_ds_req = 0; } int xen_blkif_schedule(void *arg) @@ -410,6 +416,42 @@ static int xen_blkbk_map(struct blkif_request *req, return ret; } +static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) +{ + int err = 0; + int status = BLKIF_RSP_OKAY; + struct block_device *bdev = blkif->vbd.bdev; + + if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) + /* just forward the discard request */ + err = blkdev_issue_discard(bdev, + req->u.discard.sector_number, + req->u.discard.nr_sectors, + GFP_KERNEL, 0); + else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { + /* punch a hole in the backing file */ + struct loop_device *lo = bdev->bd_disk->private_data; + struct file *file = lo->lo_backing_file; + + if (file->f_op->fallocate) + err = file->f_op->fallocate(file, + FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + req->u.discard.sector_number << 9, + req->u.discard.nr_sectors << 9); + else + err = -EOPNOTSUPP; + } else + err = -EOPNOTSUPP; + + if (err == -EOPNOTSUPP) { + pr_debug(DRV_PFX "discard op failed, not supported\n"); + status = BLKIF_RSP_EOPNOTSUPP; + } else if (err) + status = BLKIF_RSP_ERROR; + + make_response(blkif, req->id, req->operation, status); +} + /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -563,6 +605,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_f_req++; operation = WRITE_FLUSH; break; + case BLKIF_OP_DISCARD: + blkif->st_ds_req++; + operation = REQ_DISCARD; + break; case BLKIF_OP_WRITE_BARRIER: default: operation = 0; /* make gcc happy */ @@ -572,7 +618,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, /* Check that the number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || + if (unlikely(nseg == 0 && operation != WRITE_FLUSH && + operation != REQ_DISCARD) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); @@ -627,10 +674,14 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map(req, pending_req, seg)) + if (operation != BLKIF_OP_DISCARD && + xen_blkbk_map(req, pending_req, seg)) goto fail_flush; - /* This corresponding xen_blkif_put is done in __end_block_io_op */ + /* + * This corresponding xen_blkif_put is done in __end_block_io_op, or + * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. + */ xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { @@ -654,18 +705,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, preq.sector_number += seg[i].nsec; } - /* This will be hit if the operation was a flush. */ + /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation != WRITE_FLUSH); + BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); - bio = bio_alloc(GFP_KERNEL, 0); - if (unlikely(bio == NULL)) - goto fail_put_bio; + if (operation == WRITE_FLUSH) { + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; - biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; - bio->bi_private = pending_req; - bio->bi_end_io = end_block_io_op; + biolist[nbio++] = bio; + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + } else if (operation == REQ_DISCARD) { + xen_blk_discard(blkif, req); + xen_blkif_put(blkif); + free_req(pending_req); + return 0; + } } /* diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9e40b283a468..bfb532ea5b1b 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -63,13 +63,26 @@ struct blkif_common_response { /* i386 protocol version */ #pragma pack(push, 4) + +struct blkif_x86_32_request_rw { + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +struct blkif_x86_32_request_discard { + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + uint64_t nr_sectors; +}; + struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + union { + struct blkif_x86_32_request_rw rw; + struct blkif_x86_32_request_discard discard; + } u; }; struct blkif_x86_32_response { uint64_t id; /* copied from request */ @@ -79,13 +92,26 @@ struct blkif_x86_32_response { #pragma pack(pop) /* x86_64 protocol version */ + +struct blkif_x86_64_request_rw { + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +struct blkif_x86_64_request_discard { + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + uint64_t nr_sectors; +}; + struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t __attribute__((__aligned__(8))) id; - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + union { + struct blkif_x86_64_request_rw rw; + struct blkif_x86_64_request_discard discard; + } u; }; struct blkif_x86_64_response { uint64_t __attribute__((__aligned__(8))) id; @@ -113,6 +139,11 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; +enum blkif_backend_type { + BLKIF_BACKEND_PHY = 1, + BLKIF_BACKEND_FILE = 2, +}; + struct xen_vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; @@ -138,6 +169,7 @@ struct xen_blkif { unsigned int irq; /* Comms information. */ enum blkif_protocol blk_protocol; + enum blkif_backend_type blk_backend_type; union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ @@ -159,6 +191,7 @@ struct xen_blkif { int st_wr_req; int st_oo_req; int st_f_req; + int st_ds_req; int st_rd_sect; int st_wr_sect; @@ -182,7 +215,7 @@ struct xen_blkif { struct phys_req { unsigned short dev; - unsigned short nr_sects; + blkif_sector_t nr_sects; struct block_device *bdev; blkif_sector_t sector_number; }; @@ -206,12 +239,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->u.rw.sector_number = src->sector_number; - barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; - for (i = 0; i < n; i++) - dst->u.rw.seg[i] = src->seg[i]; + switch (src->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.sector_number = src->u.rw.sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->u.rw.seg[i]; + break; + case BLKIF_OP_DISCARD: + dst->u.discard.sector_number = src->u.discard.sector_number; + dst->u.discard.nr_sectors = src->u.discard.nr_sectors; + break; + default: + break; + } } static inline void blkif_get_x86_64_req(struct blkif_request *dst, @@ -222,12 +268,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->u.rw.sector_number = src->sector_number; - barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; - for (i = 0; i < n; i++) - dst->u.rw.seg[i] = src->seg[i]; + switch (src->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.sector_number = src->u.rw.sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->u.rw.seg[i]; + break; + case BLKIF_OP_DISCARD: + dst->u.discard.sector_number = src->u.discard.sector_number; + dst->u.discard.nr_sectors = src->u.discard.nr_sectors; + break; + default: + break; + } } #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 3f129b45451a..2b3aef0332f3 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -272,6 +272,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); +VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); @@ -280,6 +281,7 @@ static struct attribute *xen_vbdstat_attrs[] = { &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, &dev_attr_f_req.attr, + &dev_attr_ds_req.attr, &dev_attr_rd_sect.attr, &dev_attr_wr_sect.attr, NULL @@ -419,6 +421,60 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, return err; } +int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + struct xen_blkif *blkif = be->blkif; + char *type; + int err; + int state = 0; + + type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); + if (!IS_ERR(type)) { + if (strncmp(type, "file", 4) == 0) { + state = 1; + blkif->blk_backend_type = BLKIF_BACKEND_FILE; + } + if (strncmp(type, "phy", 3) == 0) { + struct block_device *bdev = be->blkif->vbd.bdev; + struct request_queue *q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) { + err = xenbus_printf(xbt, dev->nodename, + "discard-granularity", "%u", + q->limits.discard_granularity); + if (err) { + xenbus_dev_fatal(dev, err, + "writing discard-granularity"); + goto kfree; + } + err = xenbus_printf(xbt, dev->nodename, + "discard-alignment", "%u", + q->limits.discard_alignment); + if (err) { + xenbus_dev_fatal(dev, err, + "writing discard-alignment"); + goto kfree; + } + state = 1; + blkif->blk_backend_type = BLKIF_BACKEND_PHY; + } + } + } else { + err = PTR_ERR(type); + xenbus_dev_fatal(dev, err, "reading type"); + goto out; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-discard", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-discard"); +kfree: + kfree(type); +out: + return err; +} + /* * Entry point to this code when a new device is created. Allocate the basic * structures, and watch the store waiting for the hotplug scripts to tell us @@ -650,6 +706,8 @@ again: if (err) goto abort; + err = xen_blkbk_discard(xbt, be); + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { -- cgit v1.2.3 From ed30bf317c5ceb25166cdbce3e0b35e33c82b509 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 1 Sep 2011 18:39:09 +0800 Subject: xen-blkfront: Handle discard requests. If the backend advertises 'feature-discard', then interrogate the backend for alignment and granularity. Setup the request queue with the appropiate values and send the discard operation as required. Signed-off-by: Li Dongyang [v1: Amended commit description] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 111 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 88 insertions(+), 23 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b536a9cef917..52076b0d0326 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -98,6 +98,9 @@ struct blkfront_info unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; + unsigned int feature_discard; + unsigned int discard_granularity; + unsigned int discard_alignment; int is_ready; }; @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req) ring_req->operation = info->flush_op; } - ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + if (unlikely(req->cmd_flags & REQ_DISCARD)) { + /* id, sector_number and handle are set above. */ + ring_req->operation = BLKIF_OP_DISCARD; + ring_req->nr_segments = 0; + ring_req->u.discard.nr_sectors = blk_rq_sectors(req); + } else { + ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); + BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); - for_each_sg(info->sg, sg, ring_req->nr_segments, i) { - buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); - fsect = sg->offset >> 9; - lsect = fsect + (sg->length >> 9) - 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - BUG_ON(ref == -ENOSPC); + for_each_sg(info->sg, sg, ring_req->nr_segments, i) { + buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); + fsect = sg->offset >> 9; + lsect = fsect + (sg->length >> 9) - 1; + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head); + BUG_ON(ref == -ENOSPC); - gnttab_grant_foreign_access_ref( - ref, - info->xbdev->otherend_id, - buffer_mfn, - rq_data_dir(req) ); - - info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); - ring_req->u.rw.seg[i] = - (struct blkif_request_segment) { - .gref = ref, - .first_sect = fsect, - .last_sect = lsect }; + gnttab_grant_foreign_access_ref( + ref, + info->xbdev->otherend_id, + buffer_mfn, + rq_data_dir(req)); + + info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); + ring_req->u.rw.seg[i] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + } } info->ring.req_prod_pvt++; @@ -399,6 +409,7 @@ wait: static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { struct request_queue *rq; + struct blkfront_info *info = gd->private_data; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) @@ -406,6 +417,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); + if (info->feature_discard) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); + blk_queue_max_discard_sectors(rq, get_capacity(gd)); + rq->limits.discard_granularity = info->discard_granularity; + rq->limits.discard_alignment = info->discard_alignment; + } + /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); blk_queue_max_hw_sectors(rq, 512); @@ -722,6 +740,19 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { + case BLKIF_OP_DISCARD: + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + struct request_queue *rq = info->rq; + printk(KERN_WARNING "blkfront: %s: discard op failed\n", + info->gd->disk_name); + error = -EOPNOTSUPP; + info->feature_discard = 0; + spin_lock(rq->queue_lock); + queue_flag_clear(QUEUE_FLAG_DISCARD, rq); + spin_unlock(rq->queue_lock); + } + __blk_end_request_all(req, error); + break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { @@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info) bdput(bdev); } +static void blkfront_setup_discard(struct blkfront_info *info) +{ + int err; + char *type; + unsigned int discard_granularity; + unsigned int discard_alignment; + + type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); + if (IS_ERR(type)) + return; + + if (strncmp(type, "phy", 3) == 0) { + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "discard-granularity", "%u", &discard_granularity, + "discard-alignment", "%u", &discard_alignment, + NULL); + if (!err) { + info->feature_discard = 1; + info->discard_granularity = discard_granularity; + info->discard_alignment = discard_alignment; + } + } else if (strncmp(type, "file", 4) == 0) + info->feature_discard = 1; + + kfree(type); +} + /* * Invoked when the backend is finally 'ready' (and has told produced * the details about the physical device - #sectors, size, etc). @@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int binfo; int err; - int barrier, flush; + int barrier, flush, discard; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info *info) info->feature_flush = REQ_FLUSH; info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; } - + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-discard", "%d", &discard, + NULL); + + if (!err && discard) + blkfront_setup_discard(info); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", -- cgit v1.2.3 From 69ef68cef961a934c35308843a04b26d91cad4a8 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Wed, 14 Sep 2011 14:02:40 +0800 Subject: xen-blkfront: fix a deadlock while handling discard response When we get -EOPNOTSUPP response for a discard request, we will clear the discard flag on the request queue so we won't attempt to send discard requests to backend again, and this should be protected under rq->queue_lock. However, when we setup the request queue, we pass blkif_io_lock to blk_init_queue so rq->queue_lock is blkif_io_lock indeed, and this lock is already taken when we are in blkif_interrpt, so remove the spin_lock/spin_unlock when we clear the discard flag or we will end up with deadlock here Signed-off-by: Li Dongyang [v1: Updated description a bit and removed comment from source] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 52076b0d0326..7d1487760283 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -747,9 +747,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) info->gd->disk_name); error = -EOPNOTSUPP; info->feature_discard = 0; - spin_lock(rq->queue_lock); queue_flag_clear(QUEUE_FLAG_DISCARD, rq); - spin_unlock(rq->queue_lock); } __blk_end_request_all(req, error); break; -- cgit v1.2.3 From c555aab97de139ac8762c922248bb68f43a8c488 Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Mon, 15 Aug 2011 12:57:07 +0800 Subject: xen-blkback: fixed indentation and comments This patch fixes belows: 1. Fix code style issue. 2. Fix incorrect functions name in comments. Signed-off-by: Joe Jin Cc: Jens Axboe Cc: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 2 +- drivers/block/xen-blkback/xenbus.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index bfb532ea5b1b..1b1bc4458685 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -46,7 +46,7 @@ #define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ - pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ + pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 2b3aef0332f3..1c44b3254134 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -646,7 +646,7 @@ static void frontend_changed(struct xenbus_device *dev, /* * Enforce precondition before potential leak point. - * blkif_disconnect() is idempotent. + * xen_blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); @@ -667,7 +667,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: - /* implies blkif_disconnect() via blkback_remove() */ + /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ device_unregister(&dev->dev); break; -- cgit v1.2.3 From 8e6dc6fe51957116d363204a725c1262b4b78e19 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 16 Sep 2011 08:38:09 +0100 Subject: xen-blkback: use kzalloc() in favor of kmalloc()+memset() This fixes the problem of three of those four memset()-s having improper size arguments passed: Sizeof a pointer-typed expression returns the size of the pointer, not that of the pointed to data. It also reverts using kmalloc() instead of kzalloc() for the allocation of the pending grant handles array, as that array gets fully initialized in a subsequent loop. Reported-by: Julia Lawall Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9713d5a490e4..e0dab614049c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -823,9 +823,9 @@ static int __init xen_blkif_init(void) mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * + blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * xen_blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * + blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages, GFP_KERNEL); @@ -848,8 +848,6 @@ static int __init xen_blkif_init(void) if (rc) goto failed_init; - memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); - INIT_LIST_HEAD(&blkbk->pending_free); spin_lock_init(&blkbk->pending_free_lock); init_waitqueue_head(&blkbk->pending_free_wq); -- cgit v1.2.3 From d11e6158307bed3f178399a4e6216eec67d16200 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 16 Sep 2011 15:15:14 -0400 Subject: xen-blkfront: If no barrier or flush is supported, use invalid operation. Guard against issuing BLKIF_OP_WRITE_BARRIER or BLKIF_OP_FLUSH_CACHE by checking whether we successfully negotiated with the backend. The negotiation with the backend also sets the q->flush_flags which fortunately for us is also used when submitting an bio to us. If we don't support barriers or flushes it would be set to zero so we should never end up having to deal with REQ_FLUSH | REQ_FUA. However, other third party implementations of __make_request that might be stacked on top of us might not be so smart, so lets fix this up. Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7d1487760283..8bf0cb7b2565 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -380,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq) blk_start_request(req); - if (req->cmd_type != REQ_TYPE_FS) { + if ((req->cmd_type != REQ_TYPE_FS) || + ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && + !info->flush_op)) { __blk_end_request_all(req, -EIO); continue; } -- cgit v1.2.3 From 469738e675524b6aa029ecd46bdda3f878b12eff Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Fri, 7 Oct 2011 21:34:38 +0200 Subject: xen-blkfront: plug device number leak in xlblk_init() error path ... though after a failed xenbus_register_frontend() all may be lost. Acked-by: Ian Campbell Signed-off-by: Laszlo Ersek Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8bf0cb7b2565..773da7d6491e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1450,6 +1450,8 @@ static struct xenbus_driver blkfront = { static int __init xlblk_init(void) { + int ret; + if (!xen_domain()) return -ENODEV; @@ -1459,7 +1461,13 @@ static int __init xlblk_init(void) return -ENODEV; } - return xenbus_register_frontend(&blkfront); + ret = xenbus_register_frontend(&blkfront); + if (ret) { + unregister_blkdev(XENVBD_MAJOR, DEV_NAME); + return ret; + } + + return 0; } module_init(xlblk_init); -- cgit v1.2.3 From 29bde093787f3bdf7b9b4270ada6be7c8076e36b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 10 Oct 2011 00:42:22 -0400 Subject: xen/blkback: Support 'feature-barrier' aka old-style BARRIER requests. We emulate the barrier requests by draining the outstanding bio's and then sending the WRITE_FLUSH command. To drain the I/Os we use the refcnt that is used during disconnect to wait for all the I/Os before disconnecting from the frontend. We latch on its value and if it reaches either the threshold for disconnect or when there are no more outstanding I/Os, then we have drained all I/Os. Suggested-by: Christopher Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 37 +++++++++++++++++++++++++++++++++++-- drivers/block/xen-blkback/common.h | 5 +++++ drivers/block/xen-blkback/xenbus.c | 18 ++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index e0dab614049c..184b1335c8e9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -452,6 +452,23 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) make_response(blkif, req->id, req->operation, status); } +static void xen_blk_drain_io(struct xen_blkif *blkif) +{ + atomic_set(&blkif->drain, 1); + do { + wait_for_completion_interruptible_timeout( + &blkif->drain_complete, HZ); + + if (!atomic_read(&blkif->drain)) + break; + /* The initial value is one, and one refcnt taken at the + * start of the xen_blkif_schedule thread. */ + if (atomic_read(&blkif->refcnt) <= 2) + break; + } while (!kthread_should_stop()); + atomic_set(&blkif->drain, 0); +} + /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -464,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + pr_debug(DRV_PFX "write barrier op failed, not supported\n"); + xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," " error=%d\n", error); @@ -481,6 +503,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); free_req(pending_req); + if (atomic_read(&pending_req->blkif->refcnt) <= 2) { + if (atomic_read(&pending_req->blkif->drain)) + complete(&pending_req->blkif->drain_complete); + } } } @@ -574,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif) return more_to_do; } - /* * Transmutation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlying storage. @@ -591,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int i, nbio = 0; int operation; struct blk_plug plug; + bool drain = false; switch (req->operation) { case BLKIF_OP_READ: @@ -601,6 +627,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_wr_req++; operation = WRITE_ODIRECT; break; + case BLKIF_OP_WRITE_BARRIER: + drain = true; case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; @@ -609,7 +637,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_ds_req++; operation = REQ_DISCARD; break; - case BLKIF_OP_WRITE_BARRIER: default: operation = 0; /* make gcc happy */ goto fail_response; @@ -668,6 +695,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } } + /* Wait on all outstanding I/O's and once that has been completed + * issue the WRITE_FLUSH. + */ + if (drain) + xen_blk_drain_io(pending_req->blkif); + /* * If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 1b1bc4458685..e638457d9de4 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -181,6 +181,9 @@ struct xen_blkif { atomic_t refcnt; wait_queue_head_t wq; + /* for barrier (drain) requests */ + struct completion drain_complete; + atomic_t drain; /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; @@ -229,6 +232,8 @@ int xen_blkif_schedule(void *arg); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); +int xen_blkbk_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); static inline void blkif_get_x86_32_req(struct blkif_request *dst, diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 1c44b3254134..a6d43030b107 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); init_waitqueue_head(&blkif->wq); + init_completion(&blkif->drain_complete); + atomic_set(&blkif->drain, 0); blkif->st_print = jiffies; init_waitqueue_head(&blkif->waiting_to_free); @@ -474,6 +476,19 @@ kfree: out: return err; } +int xen_blkbk_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} /* * Entry point to this code when a new device is created. Allocate the basic @@ -708,6 +723,9 @@ again: err = xen_blkbk_discard(xbt, be); + /* If we can't advertise it is OK. */ + err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { -- cgit v1.2.3 From 5c62cb48602dba95159c81ffeca179d3852e25be Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 10 Oct 2011 12:33:21 -0400 Subject: xen/blkback: Report VBD_WSECT (wr_sect) properly. We did not increment the amount of sectors written to disk b/c we tested for the == WRITE which is incorrect - as the operations are more of WRITE_FLUSH, WRITE_ODIRECT. This patch fixes it by doing a & WRITE check. CC: stable@kernel.org Reported-by: Andy Burns Suggested-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 184b1335c8e9..c15c559e8662 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -776,7 +776,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_FLUSH) + else if (operation & WRITE) blkif->st_wr_sect += preq.nr_sects; return 0; -- cgit v1.2.3 From 64391b2536ca92f9c589b2bfeaca3954896fe057 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 10 Oct 2011 00:47:49 -0400 Subject: xen/blkback: Fix the inhibition to map pages when discarding sector ranges. The 'operation' parameters are the ones provided to the bio layer while the req->operation are the ones passed in between the backend and frontend. We used the wrong 'operation' value to squash the call to map pages when processing the discard operation resulting in an hypercall that did nothing. Lets guard against going in the mapping function by checking for the proper operation type. CC: Li Dongyang Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index c15c559e8662..53c81de6f886 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -707,8 +707,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (operation != BLKIF_OP_DISCARD && - xen_blkbk_map(req, pending_req, seg)) + if (operation == REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* -- cgit v1.2.3 From dda1852802a6cc6fdecb9021e491b2de680c76b9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 14 Oct 2011 12:13:05 -0400 Subject: xen/blkback: Check for proper operation. The patch titled: "xen/blkback: Fix the inhibition to map pages when discarding sector ranges." had the right idea except that it used the wrong comparison operator. It had == instead of !=. This fixes the bug where all (except discard) operations would have been ignored. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 53c81de6f886..a1ee2659d2bc 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -707,7 +707,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (operation == REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) + if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* -- cgit v1.2.3 From 6927d92091df2848fc0e6a693a017d4b2df549c2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 17 Oct 2011 14:27:48 -0400 Subject: xen/blkback: Fix two races in the handling of barrier requests. There are two windows of opportunity to cause a race when processing a barrier request. This patch fixes this. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index a1ee2659d2bc..79efec24569b 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -456,15 +456,15 @@ static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); do { + /* The initial value is one, and one refcnt taken at the + * start of the xen_blkif_schedule thread. */ + if (atomic_read(&blkif->refcnt) <= 2) + break; wait_for_completion_interruptible_timeout( &blkif->drain_complete, HZ); if (!atomic_read(&blkif->drain)) break; - /* The initial value is one, and one refcnt taken at the - * start of the xen_blkif_schedule thread. */ - if (atomic_read(&blkif->refcnt) <= 2) - break; } while (!kthread_should_stop()); atomic_set(&blkif->drain, 0); } @@ -502,11 +502,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); - free_req(pending_req); if (atomic_read(&pending_req->blkif->refcnt) <= 2) { if (atomic_read(&pending_req->blkif->drain)) complete(&pending_req->blkif->drain_complete); } + free_req(pending_req); } } -- cgit v1.2.3