diff options
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r-- | drivers/block/rbd.c | 115 |
1 files changed, 79 insertions, 36 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 191cd177fef2..cb1db2979d3d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -931,12 +931,14 @@ static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u64 snap_id) { u32 which; + const char *snap_name; which = rbd_dev_snap_index(rbd_dev, snap_id); if (which == BAD_SNAP_INDEX) - return NULL; + return ERR_PTR(-ENOENT); - return _rbd_dev_v1_snap_name(rbd_dev, which); + snap_name = _rbd_dev_v1_snap_name(rbd_dev, which); + return snap_name ? snap_name : ERR_PTR(-ENOMEM); } static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) @@ -1561,11 +1563,12 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) obj_request, obj_request->img_request, obj_request->result, xferred, length); /* - * ENOENT means a hole in the image. We zero-fill the - * entire length of the request. A short read also implies - * zero-fill to the end of the request. Either way we - * update the xferred count to indicate the whole request - * was satisfied. + * ENOENT means a hole in the image. We zero-fill the entire + * length of the request. A short read also implies zero-fill + * to the end of the request. An error requires the whole + * length of the request to be reported finished with an error + * to the block layer. In each case we update the xferred + * count to indicate the whole request was satisfied. */ rbd_assert(obj_request->type != OBJ_REQUEST_NODATA); if (obj_request->result == -ENOENT) { @@ -1574,14 +1577,13 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) else zero_pages(obj_request->pages, 0, length); obj_request->result = 0; - obj_request->xferred = length; } else if (xferred < length && !obj_request->result) { if (obj_request->type == OBJ_REQUEST_BIO) zero_bio_chain(obj_request->bio_list, xferred); else zero_pages(obj_request->pages, xferred, length); - obj_request->xferred = length; } + obj_request->xferred = length; obj_request_done_set(obj_request); } @@ -2167,9 +2169,9 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, struct rbd_obj_request *obj_request = NULL; struct rbd_obj_request *next_obj_request; bool write_request = img_request_write_test(img_request); - struct bio *bio_list = 0; + struct bio *bio_list = NULL; unsigned int bio_offset = 0; - struct page **pages = 0; + struct page **pages = NULL; u64 img_offset; u64 resid; u16 opcode; @@ -2207,6 +2209,11 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, rbd_segment_name_free(object_name); if (!obj_request) goto out_unwind; + /* + * set obj_request->img_request before creating the + * osd_request so that it gets the right snapc + */ + rbd_img_obj_request_add(img_request, obj_request); if (type == OBJ_REQUEST_BIO) { unsigned int clone_size; @@ -2248,11 +2255,6 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, obj_request->pages, length, offset & ~PAGE_MASK, false, false); - /* - * set obj_request->img_request before formatting - * the osd_request so that it gets the right snapc - */ - rbd_img_obj_request_add(img_request, obj_request); if (write_request) rbd_osd_req_format_write(obj_request); else @@ -2812,7 +2814,7 @@ out_err: obj_request_done_set(obj_request); } -static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) +static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) { struct rbd_obj_request *obj_request; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; @@ -2827,16 +2829,17 @@ static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); if (!obj_request->osd_req) goto out; - obj_request->callback = rbd_obj_request_put; osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, notify_id, 0, 0); rbd_osd_req_format_read(obj_request); ret = rbd_obj_request_submit(osdc, obj_request); -out: if (ret) - rbd_obj_request_put(obj_request); + goto out; + ret = rbd_obj_request_wait(obj_request); +out: + rbd_obj_request_put(obj_request); return ret; } @@ -2856,7 +2859,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) if (ret) rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); - rbd_obj_notify_ack(rbd_dev, notify_id); + rbd_obj_notify_ack_sync(rbd_dev, notify_id); } /* @@ -3328,6 +3331,31 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev) clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); } +static void rbd_dev_update_size(struct rbd_device *rbd_dev) +{ + sector_t size; + bool removing; + + /* + * Don't hold the lock while doing disk operations, + * or lock ordering will conflict with the bdev mutex via: + * rbd_add() -> blkdev_get() -> rbd_open() + */ + spin_lock_irq(&rbd_dev->lock); + removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); + /* + * If the device is being removed, rbd_dev->disk has + * been destroyed, so don't try to update its size + */ + if (!removing) { + size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; + dout("setting size to %llu sectors", (unsigned long long)size); + set_capacity(rbd_dev->disk, size); + revalidate_disk(rbd_dev->disk); + } +} + static int rbd_dev_refresh(struct rbd_device *rbd_dev) { u64 mapping_size; @@ -3347,12 +3375,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) up_write(&rbd_dev->header_rwsem); if (mapping_size != rbd_dev->mapping.size) { - sector_t size; - - size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; - dout("setting size to %llu sectors", (unsigned long long)size); - set_capacity(rbd_dev->disk, size); - revalidate_disk(rbd_dev->disk); + rbd_dev_update_size(rbd_dev); } return ret; @@ -3706,12 +3729,14 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, if (ret < sizeof (size_buf)) return -ERANGE; - if (order) + if (order) { *order = size_buf.order; + dout(" order %u", (unsigned int)*order); + } *snap_size = le64_to_cpu(size_buf.size); - dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n", - (unsigned long long)snap_id, (unsigned int)*order, + dout(" snap_id 0x%016llx snap_size = %llu\n", + (unsigned long long)snap_id, (unsigned long long)*snap_size); return 0; @@ -4059,8 +4084,13 @@ static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) snap_id = snapc->snaps[which]; snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); - if (IS_ERR(snap_name)) - break; + if (IS_ERR(snap_name)) { + /* ignore no-longer existing snapshots */ + if (PTR_ERR(snap_name) == -ENOENT) + continue; + else + break; + } found = !strcmp(name, snap_name); kfree(snap_name); } @@ -4139,8 +4169,8 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev) /* Look up the snapshot name, and make a copy */ snap_name = rbd_snap_name(rbd_dev, spec->snap_id); - if (!snap_name) { - ret = -ENOMEM; + if (IS_ERR(snap_name)) { + ret = PTR_ERR(snap_name); goto out_err; } @@ -5130,7 +5160,7 @@ static ssize_t rbd_remove(struct bus_type *bus, bool already = false; int ret; - ret = strict_strtoul(buf, 10, &ul); + ret = kstrtoul(buf, 10, &ul); if (ret) return ret; @@ -5161,10 +5191,23 @@ static ssize_t rbd_remove(struct bus_type *bus, if (ret < 0 || already) return ret; - rbd_bus_del_dev(rbd_dev); ret = rbd_dev_header_watch_sync(rbd_dev, false); if (ret) rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); + + /* + * flush remaining watch callbacks - these must be complete + * before the osd_client is shutdown + */ + dout("%s: flushing notifies", __func__); + ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); + /* + * Don't free anything from rbd_dev->disk until after all + * notifies are completely processed. Otherwise + * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting + * in a potential use after free of rbd_dev->disk or rbd_dev. + */ + rbd_bus_del_dev(rbd_dev); rbd_dev_image_release(rbd_dev); module_put(THIS_MODULE); |