summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-writecache.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-05 15:45:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-05 15:45:03 -0700
commitb25c6644bfd3affd7d0127ce95c5c96c155a7515 (patch)
tree9ef9c0fe74a08b7baf3a3c3753368b0f481b581f /drivers/md/dm-writecache.c
parent818dbde78e0f4f11c9f804c36913a7ccfc2e87ad (diff)
parent64611a15ca9da91ff532982429c44686f4593b5f (diff)
downloadlinux-b25c6644bfd3affd7d0127ce95c5c96c155a7515.tar.bz2
Merge tag 'for-5.8/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - The largest change for this cycle is the DM zoned target's metadata version 2 feature that adds support for pairing regular block devices with a zoned device to ease the performance impact associated with finite random zones of zoned device. The changes came in three batches: the first prepared for and then added the ability to pair a single regular block device, the second was a batch of fixes to improve zoned's reclaim heuristic, and the third removed the limitation of only adding a single additional regular block device to allow many devices. Testing has shown linear scaling as more devices are added. - Add new emulated block size (ebs) target that emulates a smaller logical_block_size than a block device supports The primary use-case is to emulate "512e" devices that have 512 byte logical_block_size and 4KB physical_block_size. This is useful to some legacy applications that otherwise wouldn't be able to be used on 4K devices because they depend on issuing IO in 512 byte granularity. - Add discard interfaces to DM bufio. First consumer of the interface is the dm-ebs target that makes heavy use of dm-bufio. - Fix DM crypt's block queue_limits stacking to not truncate logic_block_size. - Add Documentation for DM integrity's status line. - Switch DMDEBUG from a compile time config option to instead use dynamic debug via pr_debug. - Fix DM multipath target's hueristic for how it manages "queue_if_no_path" state internally. DM multipath now avoids disabling "queue_if_no_path" unless it is actually needed (e.g. in response to configure timeout or explicit "fail_if_no_path" message). This fixes reports of spurious -EIO being reported back to userspace application during fault tolerance testing with an NVMe backend. Added various dynamic DMDEBUG messages to assist with debugging queue_if_no_path in the future. - Add a new DM multipath "Historical Service Time" Path Selector. - Fix DM multipath's dm_blk_ioctl() to switch paths on IO error. - Improve DM writecache target performance by using explicit cache flushing for target's single-threaded usecase and a small cleanup to remove unnecessary test in persistent_memory_claim. - Other small cleanups in DM core, dm-persistent-data, and DM integrity. * tag 'for-5.8/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (62 commits) dm crypt: avoid truncating the logical block size dm mpath: add DM device name to Failing/Reinstating path log messages dm mpath: enhance queue_if_no_path debugging dm mpath: restrict queue_if_no_path state machine dm mpath: simplify __must_push_back dm zoned: check superblock location dm zoned: prefer full zones for reclaim dm zoned: select reclaim zone based on device index dm zoned: allocate zone by device index dm zoned: support arbitrary number of devices dm zoned: move random and sequential zones into struct dmz_dev dm zoned: per-device reclaim dm zoned: add metadata pointer to struct dmz_dev dm zoned: add device pointer to struct dm_zone dm zoned: allocate temporary superblock for tertiary devices dm zoned: convert to xarray dm zoned: add a 'reserved' zone flag dm zoned: improve logging messages for reclaim dm zoned: avoid unnecessary device recalulation for secondary superblock dm zoned: add debugging message for reading superblocks ...
Diffstat (limited to 'drivers/md/dm-writecache.c')
-rw-r--r--drivers/md/dm-writecache.c42
1 files changed, 37 insertions, 5 deletions
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 613c171b1b6d..74f3c506f084 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -234,10 +234,6 @@ static int persistent_memory_claim(struct dm_writecache *wc)
wc->memory_vmapped = false;
- if (!wc->ssd_dev->dax_dev) {
- r = -EOPNOTSUPP;
- goto err1;
- }
s = wc->memory_map_size;
p = s >> PAGE_SHIFT;
if (!p) {
@@ -1143,6 +1139,42 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
return r;
}
+static void memcpy_flushcache_optimized(void *dest, void *source, size_t size)
+{
+ /*
+ * clflushopt performs better with block size 1024, 2048, 4096
+ * non-temporal stores perform better with block size 512
+ *
+ * block size 512 1024 2048 4096
+ * movnti 496 MB/s 642 MB/s 725 MB/s 744 MB/s
+ * clflushopt 373 MB/s 688 MB/s 1.1 GB/s 1.2 GB/s
+ *
+ * We see that movnti performs better for 512-byte blocks, and
+ * clflushopt performs better for 1024-byte and larger blocks. So, we
+ * prefer clflushopt for sizes >= 768.
+ *
+ * NOTE: this happens to be the case now (with dm-writecache's single
+ * threaded model) but re-evaluate this once memcpy_flushcache() is
+ * enabled to use movdir64b which might invalidate this performance
+ * advantage seen with cache-allocating-writes plus flushing.
+ */
+#ifdef CONFIG_X86
+ if (static_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+ likely(boot_cpu_data.x86_clflush_size == 64) &&
+ likely(size >= 768)) {
+ do {
+ memcpy((void *)dest, (void *)source, 64);
+ clflushopt((void *)dest);
+ dest += 64;
+ source += 64;
+ size -= 64;
+ } while (size >= 64);
+ return;
+ }
+#endif
+ memcpy_flushcache(dest, source, size);
+}
+
static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
{
void *buf;
@@ -1168,7 +1200,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
}
} else {
flush_dcache_page(bio_page(bio));
- memcpy_flushcache(data, buf, size);
+ memcpy_flushcache_optimized(data, buf, size);
}
bvec_kunmap_irq(buf, &flags);