summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-iocost.c2
-rw-r--r--block/blk-mq-sched.c22
-rw-r--r--block/genhd.c36
-rw-r--r--drivers/s390/block/dasd.c27
-rw-r--r--drivers/s390/block/dasd_eckd.c163
-rw-r--r--drivers/s390/block/dasd_int.h15
-rw-r--r--include/linux/genhd.h13
7 files changed, 253 insertions, 25 deletions
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 27ca68621137..9a599cc28c29 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1318,7 +1318,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg)
return false;
/* is something in flight? */
- if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime))
+ if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime))
return false;
return true;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 856356b1619e..74cedea56034 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -398,6 +398,28 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != -1));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+ /*
+ * Firstly normal IO request is inserted to scheduler queue or
+ * sw queue, meantime we add flush request to dispatch queue(
+ * hctx->dispatch) directly and there is at most one in-flight
+ * flush request for each hw queue, so it doesn't matter to add
+ * flush request to tail or front of the dispatch queue.
+ *
+ * Secondly in case of NCQ, flush request belongs to non-NCQ
+ * command, and queueing it will fail when there is any
+ * in-flight normal IO request(NCQ command). When adding flush
+ * rq to the front of hctx->dispatch, it is easier to introduce
+ * extra time to flush rq's latency because of S_SCHED_RESTART
+ * compared with adding to the tail of dispatch queue, then
+ * chance of flush merge is increased, and less flush requests
+ * will be issued to controller. It is observed that ~10% time
+ * is saved in blktests block/004 on disk attached to AHCI/NCQ
+ * drive when adding flush rq to the front of hctx->dispatch.
+ *
+ * Simply queue flush rq to the front of hctx->dispatch so that
+ * intensive flush workloads can benefit in case of NCQ HW.
+ */
+ at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
}
diff --git a/block/genhd.c b/block/genhd.c
index ff6268970ddc..9c2e13ce0d19 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -301,6 +301,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
}
EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
+/**
+ * disk_has_partitions
+ * @disk: gendisk of interest
+ *
+ * Walk through the partition table and check if valid partition exists.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * True if the gendisk has at least one valid non-zero size partition.
+ * Otherwise false.
+ */
+bool disk_has_partitions(struct gendisk *disk)
+{
+ struct disk_part_tbl *ptbl;
+ int i;
+ bool ret = false;
+
+ rcu_read_lock();
+ ptbl = rcu_dereference(disk->part_tbl);
+
+ /* Iterate partitions skipping the whole device at index 0 */
+ for (i = 1; i < ptbl->len; i++) {
+ if (rcu_dereference(ptbl->part[i])) {
+ ret = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(disk_has_partitions);
+
/*
* Can be deleted altogether. Later.
*
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6cca72782af6..cf87eb27879f 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -178,6 +178,8 @@ struct dasd_block *dasd_alloc_block(void)
(unsigned long) block);
INIT_LIST_HEAD(&block->ccw_queue);
spin_lock_init(&block->queue_lock);
+ INIT_LIST_HEAD(&block->format_list);
+ spin_lock_init(&block->format_lock);
timer_setup(&block->timer, dasd_block_timeout, 0);
spin_lock_init(&block->profile.lock);
@@ -1779,20 +1781,26 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
if (dasd_ese_needs_format(cqr->block, irb)) {
if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
- device->discipline->ese_read(cqr);
+ device->discipline->ese_read(cqr, irb);
cqr->status = DASD_CQR_SUCCESS;
cqr->stopclk = now;
dasd_device_clear_timer(device);
dasd_schedule_device_bh(device);
return;
}
- fcqr = device->discipline->ese_format(device, cqr);
+ fcqr = device->discipline->ese_format(device, cqr, irb);
if (IS_ERR(fcqr)) {
+ if (PTR_ERR(fcqr) == -EINVAL) {
+ cqr->status = DASD_CQR_ERROR;
+ return;
+ }
/*
* If we can't format now, let the request go
* one extra round. Maybe we can format later.
*/
cqr->status = DASD_CQR_QUEUED;
+ dasd_schedule_device_bh(device);
+ return;
} else {
fcqr->status = DASD_CQR_QUEUED;
cqr->status = DASD_CQR_QUEUED;
@@ -2748,11 +2756,13 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
{
struct request *req;
blk_status_t error = BLK_STS_OK;
+ unsigned int proc_bytes;
int status;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
+ proc_bytes = cqr->proc_bytes;
status = cqr->block->base->discipline->free_cp(cqr, req);
if (status < 0)
error = errno_to_blk_status(status);
@@ -2783,7 +2793,18 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
blk_mq_end_request(req, error);
blk_mq_run_hw_queues(req->q, true);
} else {
- blk_mq_complete_request(req);
+ /*
+ * Partial completed requests can happen with ESE devices.
+ * During read we might have gotten a NRF error and have to
+ * complete a request partially.
+ */
+ if (proc_bytes) {
+ blk_update_request(req, BLK_STS_OK,
+ blk_rq_bytes(req) - proc_bytes);
+ blk_mq_requeue_request(req, true);
+ } else {
+ blk_mq_complete_request(req);
+ }
}
}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a28b9ff82378..ad44d22e8859 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -207,6 +207,45 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head)
geo->head |= head;
}
+/*
+ * calculate failing track from sense data depending if
+ * it is an EAV device or not
+ */
+static int dasd_eckd_track_from_irb(struct irb *irb, struct dasd_device *device,
+ sector_t *track)
+{
+ struct dasd_eckd_private *private = device->private;
+ u8 *sense = NULL;
+ u32 cyl;
+ u8 head;
+
+ sense = dasd_get_sense(irb);
+ if (!sense) {
+ DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+ "ESE error no sense data\n");
+ return -EINVAL;
+ }
+ if (!(sense[27] & DASD_SENSE_BIT_2)) {
+ DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+ "ESE error no valid track data\n");
+ return -EINVAL;
+ }
+
+ if (sense[27] & DASD_SENSE_BIT_3) {
+ /* enhanced addressing */
+ cyl = sense[30] << 20;
+ cyl |= (sense[31] & 0xF0) << 12;
+ cyl |= sense[28] << 8;
+ cyl |= sense[29];
+ } else {
+ cyl = sense[29] << 8;
+ cyl |= sense[30];
+ }
+ head = sense[31] & 0x0F;
+ *track = cyl * private->rdc_data.trk_per_cyl + head;
+ return 0;
+}
+
static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data,
struct dasd_device *device)
{
@@ -2986,6 +3025,37 @@ static int dasd_eckd_format_device(struct dasd_device *base,
0, NULL);
}
+static bool test_and_set_format_track(struct dasd_format_entry *to_format,
+ struct dasd_block *block)
+{
+ struct dasd_format_entry *format;
+ unsigned long flags;
+ bool rc = false;
+
+ spin_lock_irqsave(&block->format_lock, flags);
+ list_for_each_entry(format, &block->format_list, list) {
+ if (format->track == to_format->track) {
+ rc = true;
+ goto out;
+ }
+ }
+ list_add_tail(&to_format->list, &block->format_list);
+
+out:
+ spin_unlock_irqrestore(&block->format_lock, flags);
+ return rc;
+}
+
+static void clear_format_track(struct dasd_format_entry *format,
+ struct dasd_block *block)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&block->format_lock, flags);
+ list_del_init(&format->list);
+ spin_unlock_irqrestore(&block->format_lock, flags);
+}
+
/*
* Callback function to free ESE format requests.
*/
@@ -2993,15 +3063,19 @@ static void dasd_eckd_ese_format_cb(struct dasd_ccw_req *cqr, void *data)
{
struct dasd_device *device = cqr->startdev;
struct dasd_eckd_private *private = device->private;
+ struct dasd_format_entry *format = data;
+ clear_format_track(format, cqr->basedev->block);
private->count--;
dasd_ffree_request(cqr, device);
}
static struct dasd_ccw_req *
-dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
+dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
+ struct irb *irb)
{
struct dasd_eckd_private *private;
+ struct dasd_format_entry *format;
struct format_data_t fdata;
unsigned int recs_per_trk;
struct dasd_ccw_req *fcqr;
@@ -3011,23 +3085,39 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
struct request *req;
sector_t first_trk;
sector_t last_trk;
+ sector_t curr_trk;
int rc;
req = cqr->callback_data;
- base = cqr->block->base;
+ block = cqr->block;
+ base = block->base;
private = base->private;
- block = base->block;
blksize = block->bp_block;
recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
+ format = &startdev->format_entry;
first_trk = blk_rq_pos(req) >> block->s2b_shift;
sector_div(first_trk, recs_per_trk);
last_trk =
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
sector_div(last_trk, recs_per_trk);
+ rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
+ if (rc)
+ return ERR_PTR(rc);
- fdata.start_unit = first_trk;
- fdata.stop_unit = last_trk;
+ if (curr_trk < first_trk || curr_trk > last_trk) {
+ DBF_DEV_EVENT(DBF_WARNING, startdev,
+ "ESE error track %llu not within range %llu - %llu\n",
+ curr_trk, first_trk, last_trk);
+ return ERR_PTR(-EINVAL);
+ }
+ format->track = curr_trk;
+ /* test if track is already in formatting by another thread */
+ if (test_and_set_format_track(format, block))
+ return ERR_PTR(-EEXIST);
+
+ fdata.start_unit = curr_trk;
+ fdata.stop_unit = curr_trk;
fdata.blksize = blksize;
fdata.intensity = private->uses_cdl ? DASD_FMT_INT_COMPAT : 0;
@@ -3044,6 +3134,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
return fcqr;
fcqr->callback = dasd_eckd_ese_format_cb;
+ fcqr->callback_data = (void *) format;
return fcqr;
}
@@ -3051,29 +3142,87 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
/*
* When data is read from an unformatted area of an ESE volume, this function
* returns zeroed data and thereby mimics a read of zero data.
+ *
+ * The first unformatted track is the one that got the NRF error, the address is
+ * encoded in the sense data.
+ *
+ * All tracks before have returned valid data and should not be touched.
+ * All tracks after the unformatted track might be formatted or not. This is
+ * currently not known, remember the processed data and return the remainder of
+ * the request to the blocklayer in __dasd_cleanup_cqr().
*/
-static void dasd_eckd_ese_read(struct dasd_ccw_req *cqr)
+static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
{
+ struct dasd_eckd_private *private;
+ sector_t first_trk, last_trk;
+ sector_t first_blk, last_blk;
unsigned int blksize, off;
+ unsigned int recs_per_trk;
struct dasd_device *base;
struct req_iterator iter;
+ struct dasd_block *block;
+ unsigned int skip_block;
+ unsigned int blk_count;
struct request *req;
struct bio_vec bv;
+ sector_t curr_trk;
+ sector_t end_blk;
char *dst;
+ int rc;
req = (struct request *) cqr->callback_data;
base = cqr->block->base;
blksize = base->block->bp_block;
+ block = cqr->block;
+ private = base->private;
+ skip_block = 0;
+ blk_count = 0;
+
+ recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
+ first_trk = first_blk = blk_rq_pos(req) >> block->s2b_shift;
+ sector_div(first_trk, recs_per_trk);
+ last_trk = last_blk =
+ (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
+ sector_div(last_trk, recs_per_trk);
+ rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
+ if (rc)
+ return rc;
+
+ /* sanity check if the current track from sense data is valid */
+ if (curr_trk < first_trk || curr_trk > last_trk) {
+ DBF_DEV_EVENT(DBF_WARNING, base,
+ "ESE error track %llu not within range %llu - %llu\n",
+ curr_trk, first_trk, last_trk);
+ return -EINVAL;
+ }
+
+ /*
+ * if not the first track got the NRF error we have to skip over valid
+ * blocks
+ */
+ if (curr_trk != first_trk)
+ skip_block = curr_trk * recs_per_trk - first_blk;
+
+ /* we have no information beyond the current track */
+ end_blk = (curr_trk + 1) * recs_per_trk;
rq_for_each_segment(bv, req, iter) {
dst = page_address(bv.bv_page) + bv.bv_offset;
for (off = 0; off < bv.bv_len; off += blksize) {
- if (dst && rq_data_dir(req) == READ) {
+ if (first_blk + blk_count >= end_blk) {
+ cqr->proc_bytes = blk_count * blksize;
+ return 0;
+ }
+ if (dst && !skip_block) {
dst += off;
memset(dst, 0, blksize);
+ } else {
+ skip_block--;
}
+ blk_count++;
}
}
+ return 0;
}
/*
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 91c9f9586e0f..fa552f9f1666 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -187,6 +187,7 @@ struct dasd_ccw_req {
void (*callback)(struct dasd_ccw_req *, void *data);
void *callback_data;
+ unsigned int proc_bytes; /* bytes for partial completion */
};
/*
@@ -387,8 +388,9 @@ struct dasd_discipline {
int (*ext_pool_warn_thrshld)(struct dasd_device *);
int (*ext_pool_oos)(struct dasd_device *);
int (*ext_pool_exhaust)(struct dasd_device *, struct dasd_ccw_req *);
- struct dasd_ccw_req *(*ese_format)(struct dasd_device *, struct dasd_ccw_req *);
- void (*ese_read)(struct dasd_ccw_req *);
+ struct dasd_ccw_req *(*ese_format)(struct dasd_device *,
+ struct dasd_ccw_req *, struct irb *);
+ int (*ese_read)(struct dasd_ccw_req *, struct irb *);
};
extern struct dasd_discipline *dasd_diag_discipline_pointer;
@@ -474,6 +476,11 @@ struct dasd_profile {
spinlock_t lock;
};
+struct dasd_format_entry {
+ struct list_head list;
+ sector_t track;
+};
+
struct dasd_device {
/* Block device stuff. */
struct dasd_block *block;
@@ -539,6 +546,7 @@ struct dasd_device {
struct dentry *debugfs_dentry;
struct dentry *hosts_dentry;
struct dasd_profile profile;
+ struct dasd_format_entry format_entry;
};
struct dasd_block {
@@ -564,6 +572,9 @@ struct dasd_block {
struct dentry *debugfs_dentry;
struct dasd_profile profile;
+
+ struct list_head format_list;
+ spinlock_t format_lock;
};
struct dasd_attention_data {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6fbe58538ad6..07dc91835b98 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -245,18 +245,6 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk)
!(disk->flags & GENHD_FL_NO_PART_SCAN);
}
-static inline bool disk_has_partitions(struct gendisk *disk)
-{
- bool ret = false;
-
- rcu_read_lock();
- if (rcu_dereference(disk->part_tbl)->len > 1)
- ret = true;
- rcu_read_unlock();
-
- return ret;
-}
-
static inline dev_t disk_devt(struct gendisk *disk)
{
return MKDEV(disk->major, disk->first_minor);
@@ -298,6 +286,7 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter);
extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
sector_t sector);
+bool disk_has_partitions(struct gendisk *disk);
/*
* Macros to operate on percpu disk statistics: