summaryrefslogtreecommitdiffstats
path: root/block/blk-ia-ranges.c
blob: c246c425d0d70b33c2dedaeceb9cb85f9f46952c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// SPDX-License-Identifier: GPL-2.0
/*
 *  Block device concurrent positioning ranges.
 *
 *  Copyright (C) 2021 Western Digital Corporation or its Affiliates.
 */
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/init.h>

#include "blk.h"

static ssize_t
blk_ia_range_sector_show(struct blk_independent_access_range *iar,
			 char *buf)
{
	return sprintf(buf, "%llu\n", iar->sector);
}

static ssize_t
blk_ia_range_nr_sectors_show(struct blk_independent_access_range *iar,
			     char *buf)
{
	return sprintf(buf, "%llu\n", iar->nr_sectors);
}

struct blk_ia_range_sysfs_entry {
	struct attribute attr;
	ssize_t (*show)(struct blk_independent_access_range *iar, char *buf);
};

static struct blk_ia_range_sysfs_entry blk_ia_range_sector_entry = {
	.attr = { .name = "sector", .mode = 0444 },
	.show = blk_ia_range_sector_show,
};

static struct blk_ia_range_sysfs_entry blk_ia_range_nr_sectors_entry = {
	.attr = { .name = "nr_sectors", .mode = 0444 },
	.show = blk_ia_range_nr_sectors_show,
};

static struct attribute *blk_ia_range_attrs[] = {
	&blk_ia_range_sector_entry.attr,
	&blk_ia_range_nr_sectors_entry.attr,
	NULL,
};
ATTRIBUTE_GROUPS(blk_ia_range);

static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj,
				      struct attribute *attr, char *buf)
{
	struct blk_ia_range_sysfs_entry *entry =
		container_of(attr, struct blk_ia_range_sysfs_entry, attr);
	struct blk_independent_access_range *iar =
		container_of(kobj, struct blk_independent_access_range, kobj);
	ssize_t ret;

	mutex_lock(&iar->queue->sysfs_lock);
	ret = entry->show(iar, buf);
	mutex_unlock(&iar->queue->sysfs_lock);

	return ret;
}

static const struct sysfs_ops blk_ia_range_sysfs_ops = {
	.show	= blk_ia_range_sysfs_show,
};

/*
 * Independent access range entries are not freed individually, but alltogether
 * with struct blk_independent_access_ranges and its array of ranges. Since
 * kobject_add() takes a reference on the parent kobject contained in
 * struct blk_independent_access_ranges, the array of independent access range
 * entries cannot be freed until kobject_del() is called for all entries.
 * So we do not need to do anything here, but still need this no-op release
 * operation to avoid complaints from the kobject code.
 */
static void blk_ia_range_sysfs_nop_release(struct kobject *kobj)
{
}

static struct kobj_type blk_ia_range_ktype = {
	.sysfs_ops	= &blk_ia_range_sysfs_ops,
	.default_groups	= blk_ia_range_groups,
	.release	= blk_ia_range_sysfs_nop_release,
};

/*
 * This will be executed only after all independent access range entries are
 * removed with kobject_del(), at which point, it is safe to free everything,
 * including the array of ranges.
 */
static void blk_ia_ranges_sysfs_release(struct kobject *kobj)
{
	struct blk_independent_access_ranges *iars =
		container_of(kobj, struct blk_independent_access_ranges, kobj);

	kfree(iars);
}

static struct kobj_type blk_ia_ranges_ktype = {
	.release	= blk_ia_ranges_sysfs_release,
};

/**
 * disk_register_ia_ranges - register with sysfs a set of independent
 *			    access ranges
 * @disk:	Target disk
 * @new_iars:	New set of independent access ranges
 *
 * Register with sysfs a set of independent access ranges for @disk.
 * If @new_iars is not NULL, this set of ranges is registered and the old set
 * specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is
 * registered if it is not already.
 */
int disk_register_independent_access_ranges(struct gendisk *disk,
				struct blk_independent_access_ranges *new_iars)
{
	struct request_queue *q = disk->queue;
	struct blk_independent_access_ranges *iars;
	int i, ret;

	lockdep_assert_held(&q->sysfs_dir_lock);
	lockdep_assert_held(&q->sysfs_lock);

	/* If a new range set is specified, unregister the old one */
	if (new_iars) {
		if (q->ia_ranges)
			disk_unregister_independent_access_ranges(disk);
		q->ia_ranges = new_iars;
	}

	iars = q->ia_ranges;
	if (!iars)
		return 0;

	/*
	 * At this point, iars is the new set of sector access ranges that needs
	 * to be registered with sysfs.
	 */
	WARN_ON(iars->sysfs_registered);
	ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype,
				   &q->kobj, "%s", "independent_access_ranges");
	if (ret) {
		q->ia_ranges = NULL;
		kfree(iars);
		return ret;
	}

	for (i = 0; i < iars->nr_ia_ranges; i++) {
		iars->ia_range[i].queue = q;
		ret = kobject_init_and_add(&iars->ia_range[i].kobj,
					   &blk_ia_range_ktype, &iars->kobj,
					   "%d", i);
		if (ret) {
			while (--i >= 0)
				kobject_del(&iars->ia_range[i].kobj);
			kobject_del(&iars->kobj);
			kobject_put(&iars->kobj);
			return ret;
		}
	}

	iars->sysfs_registered = true;

	return 0;
}

void disk_unregister_independent_access_ranges(struct gendisk *disk)
{
	struct request_queue *q = disk->queue;
	struct blk_independent_access_ranges *iars = q->ia_ranges;
	int i;

	lockdep_assert_held(&q->sysfs_dir_lock);
	lockdep_assert_held(&q->sysfs_lock);

	if (!iars)
		return;

	if (iars->sysfs_registered) {
		for (i = 0; i < iars->nr_ia_ranges; i++)
			kobject_del(&iars->ia_range[i].kobj);
		kobject_del(&iars->kobj);
		kobject_put(&iars->kobj);
	} else {
		kfree(iars);
	}

	q->ia_ranges = NULL;
}

static struct blk_independent_access_range *
disk_find_ia_range(struct blk_independent_access_ranges *iars,
		  sector_t sector)
{
	struct blk_independent_access_range *iar;
	int i;

	for (i = 0; i < iars->nr_ia_ranges; i++) {
		iar = &iars->ia_range[i];
		if (sector >= iar->sector &&
		    sector < iar->sector + iar->nr_sectors)
			return iar;
	}

	return NULL;
}

static bool disk_check_ia_ranges(struct gendisk *disk,
				struct blk_independent_access_ranges *iars)
{
	struct blk_independent_access_range *iar, *tmp;
	sector_t capacity = get_capacity(disk);
	sector_t sector = 0;
	int i;

	/*
	 * While sorting the ranges in increasing LBA order, check that the
	 * ranges do not overlap, that there are no sector holes and that all
	 * sectors belong to one range.
	 */
	for (i = 0; i < iars->nr_ia_ranges; i++) {
		tmp = disk_find_ia_range(iars, sector);
		if (!tmp || tmp->sector != sector) {
			pr_warn("Invalid non-contiguous independent access ranges\n");
			return false;
		}

		iar = &iars->ia_range[i];
		if (tmp != iar) {
			swap(iar->sector, tmp->sector);
			swap(iar->nr_sectors, tmp->nr_sectors);
		}

		sector += iar->nr_sectors;
	}

	if (sector != capacity) {
		pr_warn("Independent access ranges do not match disk capacity\n");
		return false;
	}

	return true;
}

static bool disk_ia_ranges_changed(struct gendisk *disk,
				   struct blk_independent_access_ranges *new)
{
	struct blk_independent_access_ranges *old = disk->queue->ia_ranges;
	int i;

	if (!old)
		return true;

	if (old->nr_ia_ranges != new->nr_ia_ranges)
		return true;

	for (i = 0; i < old->nr_ia_ranges; i++) {
		if (new->ia_range[i].sector != old->ia_range[i].sector ||
		    new->ia_range[i].nr_sectors != old->ia_range[i].nr_sectors)
			return true;
	}

	return false;
}

/**
 * disk_alloc_independent_access_ranges - Allocate an independent access ranges
 *                                        data structure
 * @disk:		target disk
 * @nr_ia_ranges:	Number of independent access ranges
 *
 * Allocate a struct blk_independent_access_ranges structure with @nr_ia_ranges
 * access range descriptors.
 */
struct blk_independent_access_ranges *
disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges)
{
	struct blk_independent_access_ranges *iars;

	iars = kzalloc_node(struct_size(iars, ia_range, nr_ia_ranges),
			    GFP_KERNEL, disk->queue->node);
	if (iars)
		iars->nr_ia_ranges = nr_ia_ranges;
	return iars;
}
EXPORT_SYMBOL_GPL(disk_alloc_independent_access_ranges);

/**
 * disk_set_independent_access_ranges - Set a disk independent access ranges
 * @disk:	target disk
 * @iars:	independent access ranges structure
 *
 * Set the independent access ranges information of the request queue
 * of @disk to @iars. If @iars is NULL and the independent access ranges
 * structure already set is cleared. If there are no differences between
 * @iars and the independent access ranges structure already set, @iars
 * is freed.
 */
void disk_set_independent_access_ranges(struct gendisk *disk,
				struct blk_independent_access_ranges *iars)
{
	struct request_queue *q = disk->queue;

	if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) {
		kfree(iars);
		iars = NULL;
	}

	mutex_lock(&q->sysfs_dir_lock);
	mutex_lock(&q->sysfs_lock);

	if (iars) {
		if (!disk_check_ia_ranges(disk, iars)) {
			kfree(iars);
			iars = NULL;
			goto reg;
		}

		if (!disk_ia_ranges_changed(disk, iars)) {
			kfree(iars);
			goto unlock;
		}
	}

	/*
	 * This may be called for a registered queue. E.g. during a device
	 * revalidation. If that is the case, we need to unregister the old
	 * set of independent access ranges and register the new set. If the
	 * queue is not registered, registration of the device request queue
	 * will register the independent access ranges, so only swap in the
	 * new set and free the old one.
	 */
reg:
	if (blk_queue_registered(q)) {
		disk_register_independent_access_ranges(disk, iars);
	} else {
		swap(q->ia_ranges, iars);
		kfree(iars);
	}

unlock:
	mutex_unlock(&q->sysfs_lock);
	mutex_unlock(&q->sysfs_dir_lock);
}
EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges);