diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 16:59:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 16:59:14 -0700 |
commit | f60c55a94e1d127186566f06294f2dadd966e9b4 (patch) | |
tree | 2d3dbd572c0096d24f87f581194563ff76e07a6e /fs | |
parent | 734d1ed83e1f9b7bafb650033fb87c657858cf5b (diff) | |
parent | 95ae251fe82838b85c6d37e5a1775006e2a42ae0 (diff) | |
download | linux-f60c55a94e1d127186566f06294f2dadd966e9b4.tar.bz2 |
Merge tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt
Pull fs-verity support from Eric Biggers:
"fs-verity is a filesystem feature that provides Merkle tree based
hashing (similar to dm-verity) for individual readonly files, mainly
for the purpose of efficient authenticity verification.
This pull request includes:
(a) The fs/verity/ support layer and documentation.
(b) fs-verity support for ext4 and f2fs.
Compared to the original fs-verity patchset from last year, the UAPI
to enable fs-verity on a file has been greatly simplified. Lots of
other things were cleaned up too.
fs-verity is planned to be used by two different projects on Android;
most of the userspace code is in place already. Another userspace tool
("fsverity-utils"), and xfstests, are also available. e2fsprogs and
f2fs-tools already have fs-verity support. Other people have shown
interest in using fs-verity too.
I've tested this on ext4 and f2fs with xfstests, both the existing
tests and the new fs-verity tests. This has also been in linux-next
since July 30 with no reported issues except a couple minor ones I
found myself and folded in fixes for.
Ted and I will be co-maintaining fs-verity"
* tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt:
f2fs: add fs-verity support
ext4: update on-disk format documentation for fs-verity
ext4: add fs-verity read support
ext4: add basic fs-verity support
fs-verity: support builtin file signatures
fs-verity: add SHA-512 support
fs-verity: implement FS_IOC_MEASURE_VERITY ioctl
fs-verity: implement FS_IOC_ENABLE_VERITY ioctl
fs-verity: add data verification hooks for ->readpages()
fs-verity: add the hook for file ->setattr()
fs-verity: add the hook for file ->open()
fs-verity: add inode and superblock fields
fs-verity: add Kconfig and the helper functions for hashing
fs: uapi: define verity bit for FS_IOC_GETFLAGS
fs-verity: add UAPI header
fs-verity: add MAINTAINERS file entry
fs-verity: add a documentation file
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 2 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/ext4/Makefile | 1 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 23 | ||||
-rw-r--r-- | fs/ext4/file.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 55 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 13 | ||||
-rw-r--r-- | fs/ext4/readpage.c | 211 | ||||
-rw-r--r-- | fs/ext4/super.c | 18 | ||||
-rw-r--r-- | fs/ext4/sysfs.c | 6 | ||||
-rw-r--r-- | fs/ext4/verity.c | 367 | ||||
-rw-r--r-- | fs/f2fs/Makefile | 1 | ||||
-rw-r--r-- | fs/f2fs/data.c | 75 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 20 | ||||
-rw-r--r-- | fs/f2fs/file.c | 43 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 5 | ||||
-rw-r--r-- | fs/f2fs/super.c | 3 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 11 | ||||
-rw-r--r-- | fs/f2fs/verity.c | 247 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 2 | ||||
-rw-r--r-- | fs/verity/Kconfig | 55 | ||||
-rw-r--r-- | fs/verity/Makefile | 10 | ||||
-rw-r--r-- | fs/verity/enable.c | 377 | ||||
-rw-r--r-- | fs/verity/fsverity_private.h | 185 | ||||
-rw-r--r-- | fs/verity/hash_algs.c | 280 | ||||
-rw-r--r-- | fs/verity/init.c | 61 | ||||
-rw-r--r-- | fs/verity/measure.c | 57 | ||||
-rw-r--r-- | fs/verity/open.c | 356 | ||||
-rw-r--r-- | fs/verity/signature.c | 157 | ||||
-rw-r--r-- | fs/verity/verify.c | 281 |
30 files changed, 2859 insertions, 68 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 669d46550e6d..2501e6f1f965 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -112,6 +112,8 @@ config MANDATORY_FILE_LOCKING source "fs/crypto/Kconfig" +source "fs/verity/Kconfig" + source "fs/notify/Kconfig" source "fs/quota/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index b2e4973a0bea..14231b4cf383 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_IO_URING) += io_uring.o obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FS_ENCRYPTION) += crypto/ +obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 8fdfcd3c3e04..b17ddc229ac5 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -13,3 +13,4 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o +ext4-$(CONFIG_FS_VERITY) += verity.o diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf660aa7a9e0..9c7f4036021b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -41,6 +41,7 @@ #endif #include <linux/fscrypt.h> +#include <linux/fsverity.h> #include <linux/compiler.h> @@ -395,6 +396,7 @@ struct flex_groups { #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ @@ -402,7 +404,7 @@ struct flex_groups { #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x704BDFFF /* User visible flags */ +#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ #define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ @@ -467,6 +469,7 @@ enum { EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ + EXT4_INODE_VERITY = 20, /* Verity protected inode */ EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ @@ -512,6 +515,7 @@ static inline void ext4_check_flag_values(void) CHECK_FLAG_VALUE(TOPDIR); CHECK_FLAG_VALUE(HUGE_FILE); CHECK_FLAG_VALUE(EXTENTS); + CHECK_FLAG_VALUE(VERITY); CHECK_FLAG_VALUE(EA_INODE); CHECK_FLAG_VALUE(EOFBLOCKS); CHECK_FLAG_VALUE(INLINE_DATA); @@ -1560,6 +1564,7 @@ enum { EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */ + EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ }; #define EXT4_INODE_BIT_FNS(name, field, offset) \ @@ -1610,6 +1615,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_SB(sb) (sb) #endif +static inline bool ext4_verity_in_progress(struct inode *inode) +{ + return IS_ENABLED(CONFIG_FS_VERITY) && + ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); +} + #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime /* @@ -1662,6 +1673,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 #define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000 +#define EXT4_FEATURE_RO_COMPAT_VERITY 0x8000 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -1756,6 +1768,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC) EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM) EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY) EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT) +EXT4_FEATURE_RO_COMPAT_FUNCS(verity, VERITY) EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION) EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE) @@ -1813,7 +1826,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD) EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ EXT4_FEATURE_RO_COMPAT_QUOTA |\ - EXT4_FEATURE_RO_COMPAT_PROJECT) + EXT4_FEATURE_RO_COMPAT_PROJECT |\ + EXT4_FEATURE_RO_COMPAT_VERITY) #define EXTN_FEATURE_FUNCS(ver) \ static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \ @@ -3177,6 +3191,8 @@ static inline void ext4_set_de_type(struct super_block *sb, extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages, bool is_readahead); +extern int __init ext4_init_post_read_processing(void); +extern void ext4_exit_post_read_processing(void); /* symlink.c */ extern const struct inode_operations ext4_encrypted_symlink_inode_operations; @@ -3283,6 +3299,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); +/* verity.c */ +extern const struct fsverity_operations ext4_verityops; + /* * Add new method to test whether block and inode bitmaps are properly * initialized. With uninit_bg reading the block from disk is not enough diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 70b0438dbc94..b8a20bb9a145 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -457,6 +457,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ret) return ret; + ret = fsverity_file_open(inode, filp); + if (ret) + return ret; + /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 006b7a2070bf..d0dc0e3463db 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1340,6 +1340,9 @@ retry_journal: } if (ret) { + bool extended = (pos + len > inode->i_size) && + !ext4_verity_in_progress(inode); + unlock_page(page); /* * __block_write_begin may have instantiated a few blocks @@ -1349,11 +1352,11 @@ retry_journal: * Add inode to orphan list in case we crash before * truncate finishes */ - if (pos + len > inode->i_size && ext4_can_truncate(inode)) + if (extended && ext4_can_truncate(inode)) ext4_orphan_add(handle, inode); ext4_journal_stop(handle); - if (pos + len > inode->i_size) { + if (extended) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might @@ -1406,6 +1409,7 @@ static int ext4_write_end(struct file *file, int ret = 0, ret2; int i_size_changed = 0; int inline_data = ext4_has_inline_data(inode); + bool verity = ext4_verity_in_progress(inode); trace_ext4_write_end(inode, pos, len, copied); if (inline_data) { @@ -1423,12 +1427,16 @@ static int ext4_write_end(struct file *file, /* * it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. + * + * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree + * blocks are being written past EOF, so skip the i_size update. */ - i_size_changed = ext4_update_inode_size(inode, pos + copied); + if (!verity) + i_size_changed = ext4_update_inode_size(inode, pos + copied); unlock_page(page); put_page(page); - if (old_size < pos) + if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); /* * Don't mark the inode dirty under page lock. First, it unnecessarily @@ -1439,7 +1447,7 @@ static int ext4_write_end(struct file *file, if (i_size_changed || inline_data) ext4_mark_inode_dirty(handle, inode); - if (pos + len > inode->i_size && ext4_can_truncate(inode)) + if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them @@ -1450,7 +1458,7 @@ errout: if (!ret) ret = ret2; - if (pos + len > inode->i_size) { + if (pos + len > inode->i_size && !verity) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be @@ -1511,6 +1519,7 @@ static int ext4_journalled_write_end(struct file *file, unsigned from, to; int size_changed = 0; int inline_data = ext4_has_inline_data(inode); + bool verity = ext4_verity_in_progress(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); @@ -1540,13 +1549,14 @@ static int ext4_journalled_write_end(struct file *file, if (!partial) SetPageUptodate(page); } - size_changed = ext4_update_inode_size(inode, pos + copied); + if (!verity) + size_changed = ext4_update_inode_size(inode, pos + copied); ext4_set_inode_state(inode, EXT4_STATE_JDATA); EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; unlock_page(page); put_page(page); - if (old_size < pos) + if (old_size < pos && !verity) pagecache_isize_extended(inode, old_size, pos); if (size_changed || inline_data) { @@ -1555,7 +1565,7 @@ static int ext4_journalled_write_end(struct file *file, ret = ret2; } - if (pos + len > inode->i_size && ext4_can_truncate(inode)) + if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them @@ -1566,7 +1576,7 @@ errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - if (pos + len > inode->i_size) { + if (pos + len > inode->i_size && !verity) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be @@ -2162,7 +2172,8 @@ static int ext4_writepage(struct page *page, trace_ext4_writepage(page); size = i_size_read(inode); - if (page->index == size >> PAGE_SHIFT) + if (page->index == size >> PAGE_SHIFT && + !ext4_verity_in_progress(inode)) len = size & ~PAGE_MASK; else len = PAGE_SIZE; @@ -2246,7 +2257,8 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) * after page tables are updated. */ size = i_size_read(mpd->inode); - if (page->index == size >> PAGE_SHIFT) + if (page->index == size >> PAGE_SHIFT && + !ext4_verity_in_progress(mpd->inode)) len = size & ~PAGE_MASK; else len = PAGE_SIZE; @@ -2345,6 +2357,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) >> inode->i_blkbits; + if (ext4_verity_in_progress(inode)) + blocks = EXT_MAX_BLOCKS; + do { BUG_ON(buffer_locked(bh)); @@ -3061,8 +3076,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, index = pos >> PAGE_SHIFT; - if (ext4_nonda_switch(inode->i_sb) || - S_ISLNK(inode->i_mode)) { + if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) || + ext4_verity_in_progress(inode)) { *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; return ext4_write_begin(file, mapping, pos, len, flags, pagep, fsdata); @@ -3897,6 +3912,8 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) return 0; #endif + if (fsverity_active(inode)) + return 0; /* * If we are doing data journalling we don't support O_DIRECT @@ -4736,6 +4753,8 @@ static bool ext4_should_use_dax(struct inode *inode) return false; if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT)) return false; + if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY)) + return false; return true; } @@ -4760,9 +4779,11 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_ENCRYPTED; if (flags & EXT4_CASEFOLD_FL) new_fl |= S_CASEFOLD; + if (flags & EXT4_VERITY_FL) + new_fl |= S_VERITY; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| - S_ENCRYPTED|S_CASEFOLD); + S_ENCRYPTED|S_CASEFOLD|S_VERITY); } static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, @@ -5552,6 +5573,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + error = fsverity_prepare_setattr(dentry, attr); + if (error) + return error; + if (is_quota_modification(inode, attr)) { error = dquot_initialize(inode); if (error) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5703d607f5af..5444d49cbf09 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1198,6 +1198,17 @@ out: } case EXT4_IOC_SHUTDOWN: return ext4_shutdown(sb, arg); + + case FS_IOC_ENABLE_VERITY: + if (!ext4_has_feature_verity(sb)) + return -EOPNOTSUPP; + return fsverity_ioctl_enable(filp, (const void __user *)arg); + + case FS_IOC_MEASURE_VERITY: + if (!ext4_has_feature_verity(sb)) + return -EOPNOTSUPP; + return fsverity_ioctl_measure(filp, (void __user *)arg); + default: return -ENOTTY; } @@ -1265,6 +1276,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GET_ENCRYPTION_KEY_STATUS: case EXT4_IOC_SHUTDOWN: case FS_IOC_GETFSMAP: + case FS_IOC_ENABLE_VERITY: + case FS_IOC_MEASURE_VERITY: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index c916017db334..a30b203fa461 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -47,13 +47,103 @@ #include "ext4.h" -static inline bool ext4_bio_encrypted(struct bio *bio) +#define NUM_PREALLOC_POST_READ_CTXS 128 + +static struct kmem_cache *bio_post_read_ctx_cache; +static mempool_t *bio_post_read_ctx_pool; + +/* postprocessing steps for read bios */ +enum bio_post_read_step { + STEP_INITIAL = 0, + STEP_DECRYPT, + STEP_VERITY, +}; + +struct bio_post_read_ctx { + struct bio *bio; + struct work_struct work; + unsigned int cur_step; + unsigned int enabled_steps; +}; + +static void __read_end_io(struct bio *bio) { -#ifdef CONFIG_FS_ENCRYPTION - return unlikely(bio->bi_private != NULL); -#else - return false; -#endif + struct page *page; + struct bio_vec *bv; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bv, bio, iter_all) { + page = bv->bv_page; + + /* PG_error was set if any post_read step failed */ + if (bio->bi_status || PageError(page)) { + ClearPageUptodate(page); + /* will re-read again later */ + ClearPageError(page); + } else { + SetPageUptodate(page); + } + unlock_page(page); + } + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx); + +static void decrypt_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fscrypt_decrypt_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + +static void verity_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fsverity_verify_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx) +{ + /* + * We use different work queues for decryption and for verity because + * verity may require reading metadata pages that need decryption, and + * we shouldn't recurse to the same workqueue. + */ + switch (++ctx->cur_step) { + case STEP_DECRYPT: + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { + INIT_WORK(&ctx->work, decrypt_work); + fscrypt_enqueue_decrypt_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ + case STEP_VERITY: + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ + default: + __read_end_io(ctx->bio); + } +} + +static bool bio_post_read_required(struct bio *bio) +{ + return bio->bi_private && !bio->bi_status; } /* @@ -70,30 +160,53 @@ static inline bool ext4_bio_encrypted(struct bio *bio) */ static void mpage_end_io(struct bio *bio) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + if (bio_post_read_required(bio)) { + struct bio_post_read_ctx *ctx = bio->bi_private; - if (ext4_bio_encrypted(bio)) { - if (bio->bi_status) { - fscrypt_release_ctx(bio->bi_private); - } else { - fscrypt_enqueue_decrypt_bio(bio->bi_private, bio); - return; - } + ctx->cur_step = STEP_INITIAL; + bio_post_read_processing(ctx); + return; } - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + __read_end_io(bio); +} - if (!bio->bi_status) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); +static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + +static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode, + struct bio *bio, + pgoff_t first_idx) +{ + unsigned int post_read_steps = 0; + struct bio_post_read_ctx *ctx = NULL; + + if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) + post_read_steps |= 1 << STEP_DECRYPT; + + if (ext4_need_verity(inode, first_idx)) + post_read_steps |= 1 << STEP_VERITY; + + if (post_read_steps) { + ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); + if (!ctx) + return ERR_PTR(-ENOMEM); + ctx->bio = bio; + ctx->enabled_steps = post_read_steps; + bio->bi_private = ctx; } + return ctx; +} - bio_put(bio); +static inline loff_t ext4_readpage_limit(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_FS_VERITY) && + (IS_VERITY(inode) || ext4_verity_in_progress(inode))) + return inode->i_sb->s_maxbytes; + + return i_size_read(inode); } int ext4_mpage_readpages(struct address_space *mapping, @@ -141,7 +254,8 @@ int ext4_mpage_readpages(struct address_space *mapping, block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); last_block = block_in_file + nr_pages * blocks_per_page; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + last_block_in_file = (ext4_readpage_limit(inode) + + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; @@ -218,6 +332,9 @@ int ext4_mpage_readpages(struct address_space *mapping, zero_user_segment(page, first_hole << blkbits, PAGE_SIZE); if (first_hole == 0) { + if (ext4_need_verity(inode, page->index) && + !fsverity_verify_page(page)) + goto set_error_page; SetPageUptodate(page); unlock_page(page); goto next_page; @@ -241,18 +358,16 @@ int ext4_mpage_readpages(struct address_space *mapping, bio = NULL; } if (bio == NULL) { - struct fscrypt_ctx *ctx = NULL; + struct bio_post_read_ctx *ctx; - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) { - ctx = fscrypt_get_ctx(GFP_NOFS); - if (IS_ERR(ctx)) - goto set_error_page; - } bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); + if (!bio) + goto set_error_page; + ctx = get_bio_post_read_ctx(inode, bio, page->index); + if (IS_ERR(ctx)) { + bio_put(bio); + bio = NULL; goto set_error_page; } bio_set_dev(bio, bdev); @@ -293,3 +408,29 @@ int ext4_mpage_readpages(struct address_space *mapping, submit_bio(bio); return 0; } + +int __init ext4_init_post_read_processing(void) +{ + bio_post_read_ctx_cache = + kmem_cache_create("ext4_bio_post_read_ctx", + sizeof(struct bio_post_read_ctx), 0, 0, NULL); + if (!bio_post_read_ctx_cache) + goto fail; + bio_post_read_ctx_pool = + mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, + bio_post_read_ctx_cache); + if (!bio_post_read_ctx_pool) + goto fail_free_cache; + return 0; + +fail_free_cache: + kmem_cache_destroy(bio_post_read_ctx_cache); +fail: + return -ENOMEM; +} + +void ext4_exit_post_read_processing(void) +{ + mempool_destroy(bio_post_read_ctx_pool); + kmem_cache_destroy(bio_post_read_ctx_cache); +} diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 757819139b8f..27cd622676e7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1182,6 +1182,7 @@ void ext4_clear_inode(struct inode *inode) EXT4_I(inode)->jinode = NULL; } fscrypt_put_encryption_info(inode); + fsverity_cleanup_inode(inode); } static struct inode *ext4_nfs_get_inode(struct super_block *sb, @@ -4275,6 +4276,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &ext4_cryptops; #endif +#ifdef CONFIG_FS_VERITY + sb->s_vop = &ext4_verityops; +#endif #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; if (ext4_has_feature_quota(sb)) @@ -4422,6 +4426,11 @@ no_journal: goto failed_mount_wq; } + if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { + ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); + goto failed_mount_wq; + } + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && !ext4_has_feature_encrypt(sb)) { ext4_set_feature_encrypt(sb); @@ -6098,6 +6107,10 @@ static int __init ext4_init_fs(void) err = ext4_init_pending(); if (err) + goto out7; + + err = ext4_init_post_read_processing(); + if (err) goto out6; err = ext4_init_pageio(); @@ -6138,8 +6151,10 @@ out3: out4: ext4_exit_pageio(); out5: - ext4_exit_pending(); + ext4_exit_post_read_processing(); out6: + ext4_exit_pending(); +out7: ext4_exit_es(); return err; @@ -6156,6 +6171,7 @@ static void __exit ext4_exit_fs(void) ext4_exit_sysfs(); ext4_exit_system_zone(); ext4_exit_pageio(); + ext4_exit_post_read_processing(); ext4_exit_es(); ext4_exit_pending(); } diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index b3cd7655a6ff..eb1efad0e20a 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -242,6 +242,9 @@ EXT4_ATTR_FEATURE(encryption); #ifdef CONFIG_UNICODE EXT4_ATTR_FEATURE(casefold); #endif +#ifdef CONFIG_FS_VERITY +EXT4_ATTR_FEATURE(verity); +#endif EXT4_ATTR_FEATURE(metadata_csum_seed); static struct attribute *ext4_feat_attrs[] = { @@ -254,6 +257,9 @@ static struct attribute *ext4_feat_attrs[] = { #ifdef CONFIG_UNICODE ATTR_LIST(casefold), #endif +#ifdef CONFIG_FS_VERITY + ATTR_LIST(verity), +#endif ATTR_LIST(metadata_csum_seed), NULL, }; diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c new file mode 100644 index 000000000000..d0d8a9795dd6 --- /dev/null +++ b/fs/ext4/verity.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/ext4/verity.c: fs-verity support for ext4 + * + * Copyright 2019 Google LLC + */ + +/* + * Implementation of fsverity_operations for ext4. + * + * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past + * the end of the file, starting at the first 64K boundary beyond i_size. This + * approach works because (a) verity files are readonly, and (b) pages fully + * beyond i_size aren't visible to userspace but can be read/written internally + * by ext4 with only some relatively small changes to ext4. This approach + * avoids having to depend on the EA_INODE feature and on rearchitecturing + * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and + * to support encrypting xattrs. Note that the verity metadata *must* be + * encrypted when the file is, since it contains hashes of the plaintext data. + * + * Using a 64K boundary rather than a 4K one keeps things ready for + * architectures with 64K pages, and it doesn't necessarily waste space on-disk + * since there can be a hole between i_size and the start of the Merkle tree. + */ + +#include <linux/quotaops.h> + +#include "ext4.h" +#include "ext4_extents.h" +#include "ext4_jbd2.h" + +static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) +{ + return round_up(inode->i_size, 65536); +} + +/* + * Read some verity metadata from the inode. __vfs_read() can't be used because + * we need to read beyond i_size. + */ +static int pagecache_read(struct inode *inode, void *buf, size_t count, + loff_t pos) +{ + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *addr; + + page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + addr = kmap_atomic(page); + memcpy(buf, addr + offset_in_page(pos), n); + kunmap_atomic(addr); + + put_page(page); + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. + * kernel_write() can't be used because the file descriptor is readonly. + */ +static int pagecache_write(struct inode *inode, const void *buf, size_t count, + loff_t pos) +{ + if (pos + count > inode->i_sb->s_maxbytes) + return -EFBIG; + + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *fsdata; + void *addr; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, + &page, &fsdata); + if (res) + return res; + + addr = kmap_atomic(page); + memcpy(addr + offset_in_page(pos), buf, n); + kunmap_atomic(addr); + + res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, + page, fsdata); + if (res < 0) + return res; + if (res != n) + return -EIO; + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +static int ext4_begin_enable_verity(struct file *filp) +{ + struct inode *inode = file_inode(filp); + const int credits = 2; /* superblock and inode for ext4_orphan_add() */ + handle_t *handle; + int err; + + if (ext4_verity_in_progress(inode)) + return -EBUSY; + + /* + * Since the file was opened readonly, we have to initialize the jbd + * inode and quotas here and not rely on ->open() doing it. This must + * be done before evicting the inline data. + */ + + err = ext4_inode_attach_jinode(inode); + if (err) + return err; + + err = dquot_initialize(inode); + if (err) + return err; + + err = ext4_convert_inline_data(inode); + if (err) + return err; + + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + ext4_warning_inode(inode, + "verity is only allowed on extent-based files"); + return -EOPNOTSUPP; + } + + /* + * ext4 uses the last allocated block to find the verity descriptor, so + * we must remove any other blocks past EOF which might confuse things. + */ + err = ext4_truncate(inode); + if (err) + return err; + + handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + err = ext4_orphan_add(handle, inode); + if (err == 0) + ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + + ext4_journal_stop(handle); + return err; +} + +/* + * ext4 stores the verity descriptor beginning on the next filesystem block + * boundary after the Merkle tree. Then, the descriptor size is stored in the + * last 4 bytes of the last allocated filesystem block --- which is either the + * block in which the descriptor ends, or the next block after that if there + * weren't at least 4 bytes remaining. + * + * We can't simply store the descriptor in an xattr because it *must* be + * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt + * xattrs. Also, if the descriptor includes a large signature blob it may be + * too large to store in an xattr without the EA_INODE feature. + */ +static int ext4_write_verity_descriptor(struct inode *inode, const void *desc, + size_t desc_size, u64 merkle_tree_size) +{ + const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) + + merkle_tree_size, i_blocksize(inode)); + const u64 desc_end = desc_pos + desc_size; + const __le32 desc_size_disk = cpu_to_le32(desc_size); + const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk), + i_blocksize(inode)) - + sizeof(desc_size_disk); + int err; + + err = pagecache_write(inode, desc, desc_size, desc_pos); + if (err) + return err; + + return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk), + desc_size_pos); +} + +static int ext4_end_enable_verity(struct file *filp, const void *desc, + size_t desc_size, u64 merkle_tree_size) +{ + struct inode *inode = file_inode(filp); + const int credits = 2; /* superblock and inode for ext4_orphan_del() */ + handle_t *handle; + int err = 0; + int err2; + + if (desc != NULL) { + /* Succeeded; write the verity descriptor. */ + err = ext4_write_verity_descriptor(inode, desc, desc_size, + merkle_tree_size); + + /* Write all pages before clearing VERITY_IN_PROGRESS. */ + if (!err) + err = filemap_write_and_wait(inode->i_mapping); + } + + /* If we failed, truncate anything we wrote past i_size. */ + if (desc == NULL || err) + ext4_truncate(inode); + + /* + * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and + * deleting the inode from the orphan list, even if something failed. + * If everything succeeded, we'll also set the verity bit in the same + * transaction. + */ + + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + + handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); + if (IS_ERR(handle)) { + ext4_orphan_del(NULL, inode); + return PTR_ERR(handle); + } + + err2 = ext4_orphan_del(handle, inode); + if (err2) + goto out_stop; + + if (desc != NULL && !err) { + struct ext4_iloc iloc; + + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto out_stop; + ext4_set_inode_flag(inode, EXT4_INODE_VERITY); + ext4_set_inode_flags(inode); + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + } +out_stop: + ext4_journal_stop(handle); + return err ?: err2; +} + +static int ext4_get_verity_descriptor_location(struct inode *inode, + size_t *desc_size_ret, + u64 *desc_pos_ret) +{ + struct ext4_ext_path *path; + struct ext4_extent *last_extent; + u32 end_lblk; + u64 desc_size_pos; + __le32 desc_size_disk; + u32 desc_size; + u64 desc_pos; + int err; + + /* + * Descriptor size is in last 4 bytes of last allocated block. + * See ext4_write_verity_descriptor(). + */ + + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + EXT4_ERROR_INODE(inode, "verity file doesn't use extents"); + return -EFSCORRUPTED; + } + + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); + if (IS_ERR(path)) + return PTR_ERR(path); + + last_extent = path[path->p_depth].p_ext; + if (!last_extent) { + EXT4_ERROR_INODE(inode, "verity file has no extents"); + ext4_ext_drop_refs(path); + kfree(path); + return -EFSCORRUPTED; + } + + end_lblk = le32_to_cpu(last_extent->ee_block) + + ext4_ext_get_actual_len(last_extent); + desc_size_pos = (u64)end_lblk << inode->i_blkbits; + ext4_ext_drop_refs(path); + kfree(path); + + if (desc_size_pos < sizeof(desc_size_disk)) + goto bad; + desc_size_pos -= sizeof(desc_size_disk); + + err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), + desc_size_pos); + if (err) + return err; + desc_size = le32_to_cpu(desc_size_disk); + + /* + * The descriptor is stored just before the desc_size_disk, but starting + * on a filesystem block boundary. + */ + + if (desc_size > INT_MAX || desc_size > desc_size_pos) + goto bad; + + desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode)); + if (desc_pos < ext4_verity_metadata_pos(inode)) + goto bad; + + *desc_size_ret = desc_size; + *desc_pos_ret = desc_pos; + return 0; + +bad: + EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor"); + return -EFSCORRUPTED; +} + +static int ext4_get_verity_descriptor(struct inode *inode, void *buf, + size_t buf_size) +{ + size_t desc_size = 0; + u64 desc_pos = 0; + int err; + + err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos); + if (err) + return err; + + if (buf_size) { + if (desc_size > buf_size) + return -ERANGE; + err = pagecache_read(inode, buf, desc_size, desc_pos); + if (err) + return err; + } + return desc_size; +} + +static struct page *ext4_read_merkle_tree_page(struct inode *inode, + pgoff_t index) +{ + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; + + return read_mapping_page(inode->i_mapping, index, NULL); +} + +static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, + u64 index, int log_blocksize) +{ + loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize); + + return pagecache_write(inode, buf, 1 << log_blocksize, pos); +} + +const struct fsverity_operations ext4_verityops = { + .begin_enable_verity = ext4_begin_enable_verity, + .end_enable_verity = ext4_end_enable_verity, + .get_verity_descriptor = ext4_get_verity_descriptor, + .read_merkle_tree_page = ext4_read_merkle_tree_page, + .write_merkle_tree_block = ext4_write_merkle_tree_block, +}; diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 776c4b936504..2aaecc63834f 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -8,3 +8,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o +f2fs-$(CONFIG_FS_VERITY) += verity.o diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index abbf14e9bd72..54cad80acb7d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -74,6 +74,7 @@ static enum count_type __read_io_type(struct page *page) enum bio_post_read_step { STEP_INITIAL = 0, STEP_DECRYPT, + STEP_VERITY, }; struct bio_post_read_ctx { @@ -120,8 +121,23 @@ static void decrypt_work(struct work_struct *work) bio_post_read_processing(ctx); } +static void verity_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fsverity_verify_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + static void bio_post_read_processing(struct bio_post_read_ctx *ctx) { + /* + * We use different work queues for decryption and for verity because + * verity may require reading metadata pages that need decryption, and + * we shouldn't recurse to the same workqueue. + */ switch (++ctx->cur_step) { case STEP_DECRYPT: if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { @@ -131,6 +147,14 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx) } ctx->cur_step++; /* fall-through */ + case STEP_VERITY: + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ default: __read_end_io(ctx->bio); } @@ -608,8 +632,15 @@ out: up_write(&io->io_rwsem); } +static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages, unsigned op_flag) + unsigned nr_pages, unsigned op_flag, + pgoff_t first_idx) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -625,6 +656,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, if (f2fs_encrypted_file(inode)) post_read_steps |= 1 << STEP_DECRYPT; + + if (f2fs_need_verity(inode, first_idx)) + post_read_steps |= 1 << STEP_VERITY; + if (post_read_steps) { ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); if (!ctx) { @@ -646,7 +681,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1569,6 +1604,15 @@ out: return ret; } +static inline loff_t f2fs_readpage_limit(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_FS_VERITY) && + (IS_VERITY(inode) || f2fs_verity_in_progress(inode))) + return inode->i_sb->s_maxbytes; + + return i_size_read(inode); +} + static int f2fs_read_single_page(struct inode *inode, struct page *page, unsigned nr_pages, struct f2fs_map_blocks *map, @@ -1587,7 +1631,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> + last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -1632,6 +1676,11 @@ got_it: } else { zero_out: zero_user_segment(page, 0, PAGE_SIZE); + if (f2fs_need_verity(inode, page->index) && + !fsverity_verify_page(page)) { + ret = -EIO; + goto out; + } if (!PageUptodate(page)) SetPageUptodate(page); unlock_page(page); @@ -1650,7 +1699,7 @@ submit_and_realloc: } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0); + is_readahead ? REQ_RAHEAD : 0, page->index); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2052,7 +2101,7 @@ static int __write_data_page(struct page *page, bool *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index) + if (page->index < end_index || f2fs_verity_in_progress(inode)) goto write; /* @@ -2427,7 +2476,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; loff_t i_size = i_size_read(inode); - if (to > i_size) { + /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ + if (to > i_size && !f2fs_verity_in_progress(inode)) { down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); @@ -2458,7 +2508,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, * the block addresses when there is no need to fill the page. */ if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && - !is_inode_flag_set(inode, FI_NO_PREALLOC)) + !is_inode_flag_set(inode, FI_NO_PREALLOC) && + !f2fs_verity_in_progress(inode)) return 0; /* f2fs_lock_op avoids race between write CP and convert_inline_page */ @@ -2597,7 +2648,8 @@ repeat: if (len == PAGE_SIZE || PageUptodate(page)) return 0; - if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { + if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && + !f2fs_verity_in_progress(inode)) { zero_user_segment(page, len, PAGE_SIZE); return 0; } @@ -2660,7 +2712,8 @@ static int f2fs_write_end(struct file *file, set_page_dirty(page); - if (pos + copied > i_size_read(inode)) + if (pos + copied > i_size_read(inode) && + !f2fs_verity_in_progress(inode)) f2fs_i_size_write(inode, pos + copied); unlock_out: f2fs_put_page(page, 1); @@ -3104,7 +3157,9 @@ void f2fs_clear_page_cache_dirty_tag(struct page *page) int __init f2fs_init_post_read_processing(void) { - bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); + bio_post_read_ctx_cache = + kmem_cache_create("f2fs_bio_post_read_ctx", + sizeof(struct bio_post_read_ctx), 0, 0, NULL); if (!bio_post_read_ctx_cache) goto fail; bio_post_read_ctx_pool = diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 17382da7f0bd..7c5f121edac5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -25,6 +25,7 @@ #include <crypto/hash.h> #include <linux/fscrypt.h> +#include <linux/fsverity.h> #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) @@ -151,7 +152,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 -#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_VERITY 0x0400 #define F2FS_FEATURE_SB_CHKSUM 0x0800 #define __F2FS_HAS_FEATURE(raw_super, mask) \ @@ -630,7 +631,7 @@ enum { #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 #define FADVISE_HOT_BIT 0x20 -#define FADVISE_VERITY_BIT 0x40 /* reserved */ +#define FADVISE_VERITY_BIT 0x40 #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) @@ -650,6 +651,8 @@ enum { #define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) #define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) #define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) +#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT) +#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT) #define DEF_DIR_LEVEL 0 @@ -2412,6 +2415,7 @@ enum { FI_PROJ_INHERIT, /* indicate file inherits projectid */ FI_PIN_FILE, /* indicate file should not be gced */ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2451,6 +2455,12 @@ static inline void clear_inode_flag(struct inode *inode, int flag) __mark_inode_dirty_flag(inode, flag, false); } +static inline bool f2fs_verity_in_progress(struct inode *inode) +{ + return IS_ENABLED(CONFIG_FS_VERITY) && + is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS); +} + static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; @@ -3521,6 +3531,9 @@ void f2fs_exit_sysfs(void); int f2fs_register_sysfs(struct f2fs_sb_info *sbi); void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi); +/* verity.c */ +extern const struct fsverity_operations f2fs_verityops; + /* * crypto support */ @@ -3543,7 +3556,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) */ static inline bool f2fs_post_read_required(struct inode *inode) { - return f2fs_encrypted_file(inode); + return f2fs_encrypted_file(inode) || fsverity_active(inode); } #define F2FS_FEATURE_FUNCS(name, flagname) \ @@ -3561,6 +3574,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(verity, VERITY); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6a7349f9ac15..39fffc19e00c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -496,6 +496,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + err = fsverity_file_open(inode, filp); + if (err) + return err; + filp->f_mode |= FMODE_NOWAIT; return dquot_file_open(inode, filp); @@ -778,6 +782,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; + err = fsverity_prepare_setattr(dentry, attr); + if (err) + return err; + if (is_quota_modification(inode, attr)) { err = dquot_initialize(inode); if (err) @@ -1705,7 +1713,8 @@ static const struct { FS_PROJINHERIT_FL | \ FS_ENCRYPT_FL | \ FS_INLINE_DATA_FL | \ - FS_NOCOW_FL) + FS_NOCOW_FL | \ + FS_VERITY_FL) #define F2FS_SETTABLE_FS_FL ( \ FS_SYNC_FL | \ @@ -1750,6 +1759,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) if (IS_ENCRYPTED(inode)) fsflags |= FS_ENCRYPT_FL; + if (IS_VERITY(inode)) + fsflags |= FS_VERITY_FL; if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) fsflags |= FS_INLINE_DATA_FL; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3103,6 +3114,30 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) return ret; } +static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + + if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { + f2fs_warn(F2FS_I_SB(inode), + "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n", + inode->i_ino); + return -EOPNOTSUPP; + } + + return fsverity_ioctl_enable(filp, (const void __user *)arg); +} + +static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) +{ + if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) + return -EOPNOTSUPP; + + return fsverity_ioctl_measure(filp, (void __user *)arg); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3171,6 +3206,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_precache_extents(filp, arg); case F2FS_IOC_RESIZE_FS: return f2fs_ioc_resize_fs(filp, arg); + case FS_IOC_ENABLE_VERITY: + return f2fs_ioc_enable_verity(filp, arg); + case FS_IOC_MEASURE_VERITY: + return f2fs_ioc_measure_verity(filp, arg); default: return -ENOTTY; } @@ -3290,6 +3329,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: case F2FS_IOC_RESIZE_FS: + case FS_IOC_ENABLE_VERITY: + case FS_IOC_MEASURE_VERITY: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a33d7a849b2d..06da75d418e0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -46,9 +46,11 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; if (file_is_encrypt(inode)) new_fl |= S_ENCRYPTED; + if (file_is_verity(inode)) + new_fl |= S_VERITY; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC| - S_ENCRYPTED); + S_ENCRYPTED|S_VERITY); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -733,6 +735,7 @@ no_delete: } out_clear: fscrypt_put_encryption_info(inode); + fsverity_cleanup_inode(inode); clear_inode(inode); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e15bd29bd453..f43befda0e1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3146,6 +3146,9 @@ try_onemore: #ifdef CONFIG_FS_ENCRYPTION sb->s_cop = &f2fs_cryptops; #endif +#ifdef CONFIG_FS_VERITY + sb->s_vop = &f2fs_verityops; +#endif sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 3aeacd0aacfd..0cd64f994068 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -131,6 +131,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_lost_found(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); + if (f2fs_sb_has_verity(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); @@ -364,6 +367,7 @@ enum feat_id { FEAT_QUOTA_INO, FEAT_INODE_CRTIME, FEAT_LOST_FOUND, + FEAT_VERITY, FEAT_SB_CHECKSUM, }; @@ -381,6 +385,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: case FEAT_LOST_FOUND: + case FEAT_VERITY: case FEAT_SB_CHECKSUM: return snprintf(buf, PAGE_SIZE, "supported\n"); } @@ -470,6 +475,9 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +#ifdef CONFIG_FS_VERITY +F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); +#endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) @@ -534,6 +542,9 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), ATTR_LIST(lost_found), +#ifdef CONFIG_FS_VERITY + ATTR_LIST(verity), +#endif ATTR_LIST(sb_checksum), NULL, }; diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c new file mode 100644 index 000000000000..a401ef72bc82 --- /dev/null +++ b/fs/f2fs/verity.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/f2fs/verity.c: fs-verity support for f2fs + * + * Copyright 2019 Google LLC + */ + +/* + * Implementation of fsverity_operations for f2fs. + * + * Like ext4, f2fs stores the verity metadata (Merkle tree and + * fsverity_descriptor) past the end of the file, starting at the first 64K + * boundary beyond i_size. This approach works because (a) verity files are + * readonly, and (b) pages fully beyond i_size aren't visible to userspace but + * can be read/written internally by f2fs with only some relatively small + * changes to f2fs. Extended attributes cannot be used because (a) f2fs limits + * the total size of an inode's xattr entries to 4096 bytes, which wouldn't be + * enough for even a single Merkle tree block, and (b) f2fs encryption doesn't + * encrypt xattrs, yet the verity metadata *must* be encrypted when the file is + * because it contains hashes of the plaintext data. + * + * Using a 64K boundary rather than a 4K one keeps things ready for + * architectures with 64K pages, and it doesn't necessarily waste space on-disk + * since there can be a hole between i_size and the start of the Merkle tree. + */ + +#include <linux/f2fs_fs.h> + +#include "f2fs.h" +#include "xattr.h" + +static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode) +{ + return round_up(inode->i_size, 65536); +} + +/* + * Read some verity metadata from the inode. __vfs_read() can't be used because + * we need to read beyond i_size. + */ +static int pagecache_read(struct inode *inode, void *buf, size_t count, + loff_t pos) +{ + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *addr; + + page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + addr = kmap_atomic(page); + memcpy(buf, addr + offset_in_page(pos), n); + kunmap_atomic(addr); + + put_page(page); + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. + * kernel_write() can't be used because the file descriptor is readonly. + */ +static int pagecache_write(struct inode *inode, const void *buf, size_t count, + loff_t pos) +{ + if (pos + count > inode->i_sb->s_maxbytes) + return -EFBIG; + + while (count) { + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *fsdata; + void *addr; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, + &page, &fsdata); + if (res) + return res; + + addr = kmap_atomic(page); + memcpy(addr + offset_in_page(pos), buf, n); + kunmap_atomic(addr); + + res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, + page, fsdata); + if (res < 0) + return res; + if (res != n) + return -EIO; + + buf += n; + pos += n; + count -= n; + } + return 0; +} + +/* + * Format of f2fs verity xattr. This points to the location of the verity + * descriptor within the file data rather than containing it directly because + * the verity descriptor *must* be encrypted when f2fs encryption is used. But, + * f2fs encryption does not encrypt xattrs. + */ +struct fsverity_descriptor_location { + __le32 version; + __le32 size; + __le64 pos; +}; + +static int f2fs_begin_enable_verity(struct file *filp) +{ + struct inode *inode = file_inode(filp); + int err; + + if (f2fs_verity_in_progress(inode)) + return -EBUSY; + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + return -EOPNOTSUPP; + + /* + * Since the file was opened readonly, we have to initialize the quotas + * here and not rely on ->open() doing it. This must be done before + * evicting the inline data. + */ + err = dquot_initialize(inode); + if (err) + return err; + + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + + set_inode_flag(inode, FI_VERITY_IN_PROGRESS); + return 0; +} + +static int f2fs_end_enable_verity(struct file *filp, const void *desc, + size_t desc_size, u64 merkle_tree_size) +{ + struct inode *inode = file_inode(filp); + u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size; + struct fsverity_descriptor_location dloc = { + .version = cpu_to_le32(1), + .size = cpu_to_le32(desc_size), + .pos = cpu_to_le64(desc_pos), + }; + int err = 0; + + if (desc != NULL) { + /* Succeeded; write the verity descriptor. */ + err = pagecache_write(inode, desc, desc_size, desc_pos); + + /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */ + if (!err) + err = filemap_write_and_wait(inode->i_mapping); + } + + /* If we failed, truncate anything we wrote past i_size. */ + if (desc == NULL || err) + f2fs_truncate(inode); + + clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); + + if (desc != NULL && !err) { + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), + NULL, XATTR_CREATE); + if (!err) { + file_set_verity(inode); + f2fs_set_inode_flags(inode); + f2fs_mark_inode_dirty_sync(inode, true); + } + } + return err; +} + +static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, + size_t buf_size) +{ + struct fsverity_descriptor_location dloc; + int res; + u32 size; + u64 pos; + + /* Get the descriptor location */ + res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_VERITY, + F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL); + if (res < 0 && res != -ERANGE) + return res; + if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) { + f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format"); + return -EINVAL; + } + size = le32_to_cpu(dloc.size); + pos = le64_to_cpu(dloc.pos); + + /* Get the descriptor */ + if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { + f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); + return -EFSCORRUPTED; + } + if (buf_size) { + if (size > buf_size) + return -ERANGE; + res = pagecache_read(inode, buf, size, pos); + if (res) + return res; + } + return size; +} + +static struct page *f2fs_read_merkle_tree_page(struct inode *inode, + pgoff_t index) +{ + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; + + return read_mapping_page(inode->i_mapping, index, NULL); +} + +static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, + u64 index, int log_blocksize) +{ + loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize); + + return pagecache_write(inode, buf, 1 << log_blocksize, pos); +} + +const struct fsverity_operations f2fs_verityops = { + .begin_enable_verity = f2fs_begin_enable_verity, + .end_enable_verity = f2fs_end_enable_verity, + .get_verity_descriptor = f2fs_get_verity_descriptor, + .read_merkle_tree_page = f2fs_read_merkle_tree_page, + .write_merkle_tree_block = f2fs_write_merkle_tree_block, +}; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index a90920e2f949..de0c600b9cab 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -34,8 +34,10 @@ #define F2FS_XATTR_INDEX_ADVISE 7 /* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */ #define F2FS_XATTR_INDEX_ENCRYPTION 9 +#define F2FS_XATTR_INDEX_VERITY 11 #define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c" +#define F2FS_XATTR_NAME_VERITY "v" struct f2fs_xattr_header { __le32 h_magic; /* magic number for identification */ diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig new file mode 100644 index 000000000000..88fb25119899 --- /dev/null +++ b/fs/verity/Kconfig @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0 + +config FS_VERITY + bool "FS Verity (read-only file-based authenticity protection)" + select CRYPTO + # SHA-256 is selected as it's intended to be the default hash algorithm. + # To avoid bloat, other wanted algorithms must be selected explicitly. + select CRYPTO_SHA256 + help + This option enables fs-verity. fs-verity is the dm-verity + mechanism implemented at the file level. On supported + filesystems (currently EXT4 and F2FS), userspace can use an + ioctl to enable verity for a file, which causes the filesystem + to build a Merkle tree for the file. The filesystem will then + transparently verify any data read from the file against the + Merkle tree. The file is also made read-only. + + This serves as an integrity check, but the availability of the + Merkle tree root hash also allows efficiently supporting + various use cases where normally the whole file would need to + be hashed at once, such as: (a) auditing (logging the file's + hash), or (b) authenticity verification (comparing the hash + against a known good value, e.g. from a digital signature). + + fs-verity is especially useful on large files where not all + the contents may actually be needed. Also, fs-verity verifies + data each time it is paged back in, which provides better + protection against malicious disks vs. an ahead-of-time hash. + + If unsure, say N. + +config FS_VERITY_DEBUG + bool "FS Verity debugging" + depends on FS_VERITY + help + Enable debugging messages related to fs-verity by default. + + Say N unless you are an fs-verity developer. + +config FS_VERITY_BUILTIN_SIGNATURES + bool "FS Verity builtin signature support" + depends on FS_VERITY + select SYSTEM_DATA_VERIFICATION + help + Support verifying signatures of verity files against the X.509 + certificates that have been loaded into the ".fs-verity" + kernel keyring. + + This is meant as a relatively simple mechanism that can be + used to provide an authenticity guarantee for verity files, as + an alternative to IMA appraisal. Userspace programs still + need to check that the verity bit is set in order to get an + authenticity guarantee. + + If unsure, say N. diff --git a/fs/verity/Makefile b/fs/verity/Makefile new file mode 100644 index 000000000000..570e9136334d --- /dev/null +++ b/fs/verity/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_FS_VERITY) += enable.o \ + hash_algs.o \ + init.o \ + measure.o \ + open.o \ + verify.o + +obj-$(CONFIG_FS_VERITY_BUILTIN_SIGNATURES) += signature.o diff --git a/fs/verity/enable.c b/fs/verity/enable.c new file mode 100644 index 000000000000..eabc6ac19906 --- /dev/null +++ b/fs/verity/enable.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/enable.c: ioctl to enable verity on a file + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <crypto/hash.h> +#include <linux/mount.h> +#include <linux/pagemap.h> +#include <linux/sched/signal.h> +#include <linux/uaccess.h> + +static int build_merkle_tree_level(struct inode *inode, unsigned int level, + u64 num_blocks_to_hash, + const struct merkle_tree_params *params, + u8 *pending_hashes, + struct ahash_request *req) +{ + const struct fsverity_operations *vops = inode->i_sb->s_vop; + unsigned int pending_size = 0; + u64 dst_block_num; + u64 i; + int err; + + if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */ + return -EINVAL; + + if (level < params->num_levels) { + dst_block_num = params->level_start[level]; + } else { + if (WARN_ON(num_blocks_to_hash != 1)) + return -EINVAL; + dst_block_num = 0; /* unused */ + } + + for (i = 0; i < num_blocks_to_hash; i++) { + struct page *src_page; + + if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash) + pr_debug("Hashing block %llu of %llu for level %u\n", + i + 1, num_blocks_to_hash, level); + + if (level == 0) { + /* Leaf: hashing a data block */ + src_page = read_mapping_page(inode->i_mapping, i, NULL); + if (IS_ERR(src_page)) { + err = PTR_ERR(src_page); + fsverity_err(inode, + "Error %d reading data page %llu", + err, i); + return err; + } + } else { + /* Non-leaf: hashing hash block from level below */ + src_page = vops->read_merkle_tree_page(inode, + params->level_start[level - 1] + i); + if (IS_ERR(src_page)) { + err = PTR_ERR(src_page); + fsverity_err(inode, + "Error %d reading Merkle tree page %llu", + err, params->level_start[level - 1] + i); + return err; + } + } + + err = fsverity_hash_page(params, inode, req, src_page, + &pending_hashes[pending_size]); + put_page(src_page); + if (err) + return err; + pending_size += params->digest_size; + + if (level == params->num_levels) /* Root hash? */ + return 0; + + if (pending_size + params->digest_size > params->block_size || + i + 1 == num_blocks_to_hash) { + /* Flush the pending hash block */ + memset(&pending_hashes[pending_size], 0, + params->block_size - pending_size); + err = vops->write_merkle_tree_block(inode, + pending_hashes, + dst_block_num, + params->log_blocksize); + if (err) { + fsverity_err(inode, + "Error %d writing Merkle tree block %llu", + err, dst_block_num); + return err; + } + dst_block_num++; + pending_size = 0; + } + + if (fatal_signal_pending(current)) + return -EINTR; + cond_resched(); + } + return 0; +} + +/* + * Build the Merkle tree for the given inode using the given parameters, and + * return the root hash in @root_hash. + * + * The tree is written to a filesystem-specific location as determined by the + * ->write_merkle_tree_block() method. However, the blocks that comprise the + * tree are the same for all filesystems. + */ +static int build_merkle_tree(struct inode *inode, + const struct merkle_tree_params *params, + u8 *root_hash) +{ + u8 *pending_hashes; + struct ahash_request *req; + u64 blocks; + unsigned int level; + int err = -ENOMEM; + + if (inode->i_size == 0) { + /* Empty file is a special case; root hash is all 0's */ + memset(root_hash, 0, params->digest_size); + return 0; + } + + pending_hashes = kmalloc(params->block_size, GFP_KERNEL); + req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL); + if (!pending_hashes || !req) + goto out; + + /* + * Build each level of the Merkle tree, starting at the leaf level + * (level 0) and ascending to the root node (level 'num_levels - 1'). + * Then at the end (level 'num_levels'), calculate the root hash. + */ + blocks = (inode->i_size + params->block_size - 1) >> + params->log_blocksize; + for (level = 0; level <= params->num_levels; level++) { + err = build_merkle_tree_level(inode, level, blocks, params, + pending_hashes, req); + if (err) + goto out; + blocks = (blocks + params->hashes_per_block - 1) >> + params->log_arity; + } + memcpy(root_hash, pending_hashes, params->digest_size); + err = 0; +out: + kfree(pending_hashes); + ahash_request_free(req); + return err; +} + +static int enable_verity(struct file *filp, + const struct fsverity_enable_arg *arg) +{ + struct inode *inode = file_inode(filp); + const struct fsverity_operations *vops = inode->i_sb->s_vop; + struct merkle_tree_params params = { }; + struct fsverity_descriptor *desc; + size_t desc_size = sizeof(*desc) + arg->sig_size; + struct fsverity_info *vi; + int err; + + /* Start initializing the fsverity_descriptor */ + desc = kzalloc(desc_size, GFP_KERNEL); + if (!desc) + return -ENOMEM; + desc->version = 1; + desc->hash_algorithm = arg->hash_algorithm; + desc->log_blocksize = ilog2(arg->block_size); + + /* Get the salt if the user provided one */ + if (arg->salt_size && + copy_from_user(desc->salt, + (const u8 __user *)(uintptr_t)arg->salt_ptr, + arg->salt_size)) { + err = -EFAULT; + goto out; + } + desc->salt_size = arg->salt_size; + + /* Get the signature if the user provided one */ + if (arg->sig_size && + copy_from_user(desc->signature, + (const u8 __user *)(uintptr_t)arg->sig_ptr, + arg->sig_size)) { + err = -EFAULT; + goto out; + } + desc->sig_size = cpu_to_le32(arg->sig_size); + + desc->data_size = cpu_to_le64(inode->i_size); + + /* Prepare the Merkle tree parameters */ + err = fsverity_init_merkle_tree_params(¶ms, inode, + arg->hash_algorithm, + desc->log_blocksize, + desc->salt, desc->salt_size); + if (err) + goto out; + + /* + * Start enabling verity on this file, serialized by the inode lock. + * Fail if verity is already enabled or is already being enabled. + */ + inode_lock(inode); + if (IS_VERITY(inode)) + err = -EEXIST; + else + err = vops->begin_enable_verity(filp); + inode_unlock(inode); + if (err) + goto out; + + /* + * Build the Merkle tree. Don't hold the inode lock during this, since + * on huge files this may take a very long time and we don't want to + * force unrelated syscalls like chown() to block forever. We don't + * need the inode lock here because deny_write_access() already prevents + * the file from being written to or truncated, and we still serialize + * ->begin_enable_verity() and ->end_enable_verity() using the inode + * lock and only allow one process to be here at a time on a given file. + */ + pr_debug("Building Merkle tree...\n"); + BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE); + err = build_merkle_tree(inode, ¶ms, desc->root_hash); + if (err) { + fsverity_err(inode, "Error %d building Merkle tree", err); + goto rollback; + } + pr_debug("Done building Merkle tree. Root hash is %s:%*phN\n", + params.hash_alg->name, params.digest_size, desc->root_hash); + + /* + * Create the fsverity_info. Don't bother trying to save work by + * reusing the merkle_tree_params from above. Instead, just create the + * fsverity_info from the fsverity_descriptor as if it were just loaded + * from disk. This is simpler, and it serves as an extra check that the + * metadata we're writing is valid before actually enabling verity. + */ + vi = fsverity_create_info(inode, desc, desc_size); + if (IS_ERR(vi)) { + err = PTR_ERR(vi); + goto rollback; + } + + if (arg->sig_size) + pr_debug("Storing a %u-byte PKCS#7 signature alongside the file\n", + arg->sig_size); + + /* + * Tell the filesystem to finish enabling verity on the file. + * Serialized with ->begin_enable_verity() by the inode lock. + */ + inode_lock(inode); + err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size); + inode_unlock(inode); + if (err) { + fsverity_err(inode, "%ps() failed with err %d", + vops->end_enable_verity, err); + fsverity_free_info(vi); + } else if (WARN_ON(!IS_VERITY(inode))) { + err = -EINVAL; + fsverity_free_info(vi); + } else { + /* Successfully enabled verity */ + + /* + * Readers can start using ->i_verity_info immediately, so it + * can't be rolled back once set. So don't set it until just + * after the filesystem has successfully enabled verity. + */ + fsverity_set_info(inode, vi); + } +out: + kfree(params.hashstate); + kfree(desc); + return err; + +rollback: + inode_lock(inode); + (void)vops->end_enable_verity(filp, NULL, 0, params.tree_size); + inode_unlock(inode); + goto out; +} + +/** + * fsverity_ioctl_enable() - enable verity on a file + * + * Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of + * Documentation/filesystems/fsverity.rst for the documentation. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) +{ + struct inode *inode = file_inode(filp); + struct fsverity_enable_arg arg; + int err; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (arg.version != 1) + return -EINVAL; + + if (arg.__reserved1 || + memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2))) + return -EINVAL; + + if (arg.block_size != PAGE_SIZE) + return -EINVAL; + + if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt)) + return -EMSGSIZE; + + if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE) + return -EMSGSIZE; + + /* + * Require a regular file with write access. But the actual fd must + * still be readonly so that we can lock out all writers. This is + * needed to guarantee that no writable fds exist to the file once it + * has verity enabled, and to stabilize the data being hashed. + */ + + err = inode_permission(inode, MAY_WRITE); + if (err) + return err; + + if (IS_APPEND(inode)) + return -EPERM; + + if (S_ISDIR(inode->i_mode)) + return -EISDIR; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) /* -EROFS */ + return err; + + err = deny_write_access(filp); + if (err) /* -ETXTBSY */ + goto out_drop_write; + + err = enable_verity(filp, &arg); + if (err) + goto out_allow_write_access; + + /* + * Some pages of the file may have been evicted from pagecache after + * being used in the Merkle tree construction, then read into pagecache + * again by another process reading from the file concurrently. Since + * these pages didn't undergo verification against the file measurement + * which fs-verity now claims to be enforcing, we have to wipe the + * pagecache to ensure that all future reads are verified. + */ + filemap_write_and_wait(inode->i_mapping); + invalidate_inode_pages2(inode->i_mapping); + + /* + * allow_write_access() is needed to pair with deny_write_access(). + * Regardless, the filesystem won't allow writing to verity files. + */ +out_allow_write_access: + allow_write_access(filp); +out_drop_write: + mnt_drop_write_file(filp); + return err; +} +EXPORT_SYMBOL_GPL(fsverity_ioctl_enable); diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h new file mode 100644 index 000000000000..e74c79b64d88 --- /dev/null +++ b/fs/verity/fsverity_private.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs-verity: read-only file-based authenticity protection + * + * Copyright 2019 Google LLC + */ + +#ifndef _FSVERITY_PRIVATE_H +#define _FSVERITY_PRIVATE_H + +#ifdef CONFIG_FS_VERITY_DEBUG +#define DEBUG +#endif + +#define pr_fmt(fmt) "fs-verity: " fmt + +#include <crypto/sha.h> +#include <linux/fsverity.h> + +struct ahash_request; + +/* + * Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty; + * it's enough for over U64_MAX bytes of data using SHA-256 and 4K blocks. + */ +#define FS_VERITY_MAX_LEVELS 8 + +/* + * Largest digest size among all hash algorithms supported by fs-verity. + * Currently assumed to be <= size of fsverity_descriptor::root_hash. + */ +#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE + +/* A hash algorithm supported by fs-verity */ +struct fsverity_hash_alg { + struct crypto_ahash *tfm; /* hash tfm, allocated on demand */ + const char *name; /* crypto API name, e.g. sha256 */ + unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ + unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ +}; + +/* Merkle tree parameters: hash algorithm, initial hash state, and topology */ +struct merkle_tree_params { + const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ + const u8 *hashstate; /* initial hash state or NULL */ + unsigned int digest_size; /* same as hash_alg->digest_size */ + unsigned int block_size; /* size of data and tree blocks */ + unsigned int hashes_per_block; /* number of hashes per tree block */ + unsigned int log_blocksize; /* log2(block_size) */ + unsigned int log_arity; /* log2(hashes_per_block) */ + unsigned int num_levels; /* number of levels in Merkle tree */ + u64 tree_size; /* Merkle tree size in bytes */ + + /* + * Starting block index for each tree level, ordered from leaf level (0) + * to root level ('num_levels - 1') + */ + u64 level_start[FS_VERITY_MAX_LEVELS]; +}; + +/** + * fsverity_info - cached verity metadata for an inode + * + * When a verity file is first opened, an instance of this struct is allocated + * and stored in ->i_verity_info; it remains until the inode is evicted. It + * caches information about the Merkle tree that's needed to efficiently verify + * data read from the file. It also caches the file measurement. The Merkle + * tree pages themselves are not cached here, but the filesystem may cache them. + */ +struct fsverity_info { + struct merkle_tree_params tree_params; + u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE]; + u8 measurement[FS_VERITY_MAX_DIGEST_SIZE]; + const struct inode *inode; +}; + +/* + * Merkle tree properties. The file measurement is the hash of this structure + * excluding the signature and with the sig_size field set to 0. + */ +struct fsverity_descriptor { + __u8 version; /* must be 1 */ + __u8 hash_algorithm; /* Merkle tree hash algorithm */ + __u8 log_blocksize; /* log2 of size of data and tree blocks */ + __u8 salt_size; /* size of salt in bytes; 0 if none */ + __le32 sig_size; /* size of signature in bytes; 0 if none */ + __le64 data_size; /* size of file the Merkle tree is built over */ + __u8 root_hash[64]; /* Merkle tree root hash */ + __u8 salt[32]; /* salt prepended to each hashed block */ + __u8 __reserved[144]; /* must be 0's */ + __u8 signature[]; /* optional PKCS#7 signature */ +}; + +/* Arbitrary limit to bound the kmalloc() size. Can be changed. */ +#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384 + +#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \ + sizeof(struct fsverity_descriptor)) + +/* + * Format in which verity file measurements are signed. This is the same as + * 'struct fsverity_digest', except here some magic bytes are prepended to + * provide some context about what is being signed in case the same key is used + * for non-fsverity purposes, and here the fields have fixed endianness. + */ +struct fsverity_signed_digest { + char magic[8]; /* must be "FSVerity" */ + __le16 digest_algorithm; + __le16 digest_size; + __u8 digest[]; +}; + +/* hash_algs.c */ + +extern struct fsverity_hash_alg fsverity_hash_algs[]; + +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num); +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, + const u8 *salt, size_t salt_size); +int fsverity_hash_page(const struct merkle_tree_params *params, + const struct inode *inode, + struct ahash_request *req, struct page *page, u8 *out); +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, + const void *data, size_t size, u8 *out); +void __init fsverity_check_hash_algs(void); + +/* init.c */ + +extern void __printf(3, 4) __cold +fsverity_msg(const struct inode *inode, const char *level, + const char *fmt, ...); + +#define fsverity_warn(inode, fmt, ...) \ + fsverity_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__) +#define fsverity_err(inode, fmt, ...) \ + fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__) + +/* open.c */ + +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, + const struct inode *inode, + unsigned int hash_algorithm, + unsigned int log_blocksize, + const u8 *salt, size_t salt_size); + +struct fsverity_info *fsverity_create_info(const struct inode *inode, + void *desc, size_t desc_size); + +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi); + +void fsverity_free_info(struct fsverity_info *vi); + +int __init fsverity_init_info_cache(void); +void __init fsverity_exit_info_cache(void); + +/* signature.c */ + +#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES +int fsverity_verify_signature(const struct fsverity_info *vi, + const struct fsverity_descriptor *desc, + size_t desc_size); + +int __init fsverity_init_signature(void); +#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */ +static inline int +fsverity_verify_signature(const struct fsverity_info *vi, + const struct fsverity_descriptor *desc, + size_t desc_size) +{ + return 0; +} + +static inline int fsverity_init_signature(void) +{ + return 0; +} +#endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */ + +/* verify.c */ + +int __init fsverity_init_workqueue(void); +void __init fsverity_exit_workqueue(void); + +#endif /* _FSVERITY_PRIVATE_H */ diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c new file mode 100644 index 000000000000..31e6d7d2389a --- /dev/null +++ b/fs/verity/hash_algs.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/hash_algs.c: fs-verity hash algorithms + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <crypto/hash.h> +#include <linux/scatterlist.h> + +/* The hash algorithms supported by fs-verity */ +struct fsverity_hash_alg fsverity_hash_algs[] = { + [FS_VERITY_HASH_ALG_SHA256] = { + .name = "sha256", + .digest_size = SHA256_DIGEST_SIZE, + .block_size = SHA256_BLOCK_SIZE, + }, + [FS_VERITY_HASH_ALG_SHA512] = { + .name = "sha512", + .digest_size = SHA512_DIGEST_SIZE, + .block_size = SHA512_BLOCK_SIZE, + }, +}; + +/** + * fsverity_get_hash_alg() - validate and prepare a hash algorithm + * @inode: optional inode for logging purposes + * @num: the hash algorithm number + * + * Get the struct fsverity_hash_alg for the given hash algorithm number, and + * ensure it has a hash transform ready to go. The hash transforms are + * allocated on-demand so that we don't waste resources unnecessarily, and + * because the crypto modules may be initialized later than fs/verity/. + * + * Return: pointer to the hash alg on success, else an ERR_PTR() + */ +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num) +{ + struct fsverity_hash_alg *alg; + struct crypto_ahash *tfm; + int err; + + if (num >= ARRAY_SIZE(fsverity_hash_algs) || + !fsverity_hash_algs[num].name) { + fsverity_warn(inode, "Unknown hash algorithm number: %u", num); + return ERR_PTR(-EINVAL); + } + alg = &fsverity_hash_algs[num]; + + /* pairs with cmpxchg() below */ + tfm = READ_ONCE(alg->tfm); + if (likely(tfm != NULL)) + return alg; + /* + * Using the shash API would make things a bit simpler, but the ahash + * API is preferable as it allows the use of crypto accelerators. + */ + tfm = crypto_alloc_ahash(alg->name, 0, 0); + if (IS_ERR(tfm)) { + if (PTR_ERR(tfm) == -ENOENT) { + fsverity_warn(inode, + "Missing crypto API support for hash algorithm \"%s\"", + alg->name); + return ERR_PTR(-ENOPKG); + } + fsverity_err(inode, + "Error allocating hash algorithm \"%s\": %ld", + alg->name, PTR_ERR(tfm)); + return ERR_CAST(tfm); + } + + err = -EINVAL; + if (WARN_ON(alg->digest_size != crypto_ahash_digestsize(tfm))) + goto err_free_tfm; + if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm))) + goto err_free_tfm; + + pr_info("%s using implementation \"%s\"\n", + alg->name, crypto_ahash_driver_name(tfm)); + + /* pairs with READ_ONCE() above */ + if (cmpxchg(&alg->tfm, NULL, tfm) != NULL) + crypto_free_ahash(tfm); + + return alg; + +err_free_tfm: + crypto_free_ahash(tfm); + return ERR_PTR(err); +} + +/** + * fsverity_prepare_hash_state() - precompute the initial hash state + * @alg: hash algorithm + * @salt: a salt which is to be prepended to all data to be hashed + * @salt_size: salt size in bytes, possibly 0 + * + * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed + * initial hash state on success or an ERR_PTR() on failure. + */ +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, + const u8 *salt, size_t salt_size) +{ + u8 *hashstate = NULL; + struct ahash_request *req = NULL; + u8 *padded_salt = NULL; + size_t padded_salt_size; + struct scatterlist sg; + DECLARE_CRYPTO_WAIT(wait); + int err; + + if (salt_size == 0) + return NULL; + + hashstate = kmalloc(crypto_ahash_statesize(alg->tfm), GFP_KERNEL); + if (!hashstate) + return ERR_PTR(-ENOMEM); + + req = ahash_request_alloc(alg->tfm, GFP_KERNEL); + if (!req) { + err = -ENOMEM; + goto err_free; + } + + /* + * Zero-pad the salt to the next multiple of the input size of the hash + * algorithm's compression function, e.g. 64 bytes for SHA-256 or 128 + * bytes for SHA-512. This ensures that the hash algorithm won't have + * any bytes buffered internally after processing the salt, thus making + * salted hashing just as fast as unsalted hashing. + */ + padded_salt_size = round_up(salt_size, alg->block_size); + padded_salt = kzalloc(padded_salt_size, GFP_KERNEL); + if (!padded_salt) { + err = -ENOMEM; + goto err_free; + } + memcpy(padded_salt, salt, salt_size); + + sg_init_one(&sg, padded_salt, padded_salt_size); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ahash_request_set_crypt(req, &sg, NULL, padded_salt_size); + + err = crypto_wait_req(crypto_ahash_init(req), &wait); + if (err) + goto err_free; + + err = crypto_wait_req(crypto_ahash_update(req), &wait); + if (err) + goto err_free; + + err = crypto_ahash_export(req, hashstate); + if (err) + goto err_free; +out: + ahash_request_free(req); + kfree(padded_salt); + return hashstate; + +err_free: + kfree(hashstate); + hashstate = ERR_PTR(err); + goto out; +} + +/** + * fsverity_hash_page() - hash a single data or hash page + * @params: the Merkle tree's parameters + * @inode: inode for which the hashing is being done + * @req: preallocated hash request + * @page: the page to hash + * @out: output digest, size 'params->digest_size' bytes + * + * Hash a single data or hash block, assuming block_size == PAGE_SIZE. + * The hash is salted if a salt is specified in the Merkle tree parameters. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_hash_page(const struct merkle_tree_params *params, + const struct inode *inode, + struct ahash_request *req, struct page *page, u8 *out) +{ + struct scatterlist sg; + DECLARE_CRYPTO_WAIT(wait); + int err; + + if (WARN_ON(params->block_size != PAGE_SIZE)) + return -EINVAL; + + sg_init_table(&sg, 1); + sg_set_page(&sg, page, PAGE_SIZE, 0); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ahash_request_set_crypt(req, &sg, out, PAGE_SIZE); + + if (params->hashstate) { + err = crypto_ahash_import(req, params->hashstate); + if (err) { + fsverity_err(inode, + "Error %d importing hash state", err); + return err; + } + err = crypto_ahash_finup(req); + } else { + err = crypto_ahash_digest(req); + } + + err = crypto_wait_req(err, &wait); + if (err) + fsverity_err(inode, "Error %d computing page hash", err); + return err; +} + +/** + * fsverity_hash_buffer() - hash some data + * @alg: the hash algorithm to use + * @data: the data to hash + * @size: size of data to hash, in bytes + * @out: output digest, size 'alg->digest_size' bytes + * + * Hash some data which is located in physically contiguous memory (i.e. memory + * allocated by kmalloc(), not by vmalloc()). No salt is used. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, + const void *data, size_t size, u8 *out) +{ + struct ahash_request *req; + struct scatterlist sg; + DECLARE_CRYPTO_WAIT(wait); + int err; + + req = ahash_request_alloc(alg->tfm, GFP_KERNEL); + if (!req) + return -ENOMEM; + + sg_init_one(&sg, data, size); + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ahash_request_set_crypt(req, &sg, out, size); + + err = crypto_wait_req(crypto_ahash_digest(req), &wait); + + ahash_request_free(req); + return err; +} + +void __init fsverity_check_hash_algs(void) +{ + size_t i; + + /* + * Sanity check the hash algorithms (could be a build-time check, but + * they're in an array) + */ + for (i = 0; i < ARRAY_SIZE(fsverity_hash_algs); i++) { + const struct fsverity_hash_alg *alg = &fsverity_hash_algs[i]; + + if (!alg->name) + continue; + + BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE); + + /* + * For efficiency, the implementation currently assumes the + * digest and block sizes are powers of 2. This limitation can + * be lifted if the code is updated to handle other values. + */ + BUG_ON(!is_power_of_2(alg->digest_size)); + BUG_ON(!is_power_of_2(alg->block_size)); + } +} diff --git a/fs/verity/init.c b/fs/verity/init.c new file mode 100644 index 000000000000..94c104e00861 --- /dev/null +++ b/fs/verity/init.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/init.c: fs-verity module initialization and logging + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/ratelimit.h> + +void fsverity_msg(const struct inode *inode, const char *level, + const char *fmt, ...) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + struct va_format vaf; + va_list args; + + if (!__ratelimit(&rs)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (inode) + printk("%sfs-verity (%s, inode %lu): %pV\n", + level, inode->i_sb->s_id, inode->i_ino, &vaf); + else + printk("%sfs-verity: %pV\n", level, &vaf); + va_end(args); +} + +static int __init fsverity_init(void) +{ + int err; + + fsverity_check_hash_algs(); + + err = fsverity_init_info_cache(); + if (err) + return err; + + err = fsverity_init_workqueue(); + if (err) + goto err_exit_info_cache; + + err = fsverity_init_signature(); + if (err) + goto err_exit_workqueue; + + pr_debug("Initialized fs-verity\n"); + return 0; + +err_exit_workqueue: + fsverity_exit_workqueue(); +err_exit_info_cache: + fsverity_exit_info_cache(); + return err; +} +late_initcall(fsverity_init) diff --git a/fs/verity/measure.c b/fs/verity/measure.c new file mode 100644 index 000000000000..05049b68c745 --- /dev/null +++ b/fs/verity/measure.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/measure.c: ioctl to get a verity file's measurement + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/uaccess.h> + +/** + * fsverity_ioctl_measure() - get a verity file's measurement + * + * Retrieve the file measurement that the kernel is enforcing for reads from a + * verity file. See the "FS_IOC_MEASURE_VERITY" section of + * Documentation/filesystems/fsverity.rst for the documentation. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_ioctl_measure(struct file *filp, void __user *_uarg) +{ + const struct inode *inode = file_inode(filp); + struct fsverity_digest __user *uarg = _uarg; + const struct fsverity_info *vi; + const struct fsverity_hash_alg *hash_alg; + struct fsverity_digest arg; + + vi = fsverity_get_info(inode); + if (!vi) + return -ENODATA; /* not a verity file */ + hash_alg = vi->tree_params.hash_alg; + + /* + * The user specifies the digest_size their buffer has space for; we can + * return the digest if it fits in the available space. We write back + * the actual size, which may be shorter than the user-specified size. + */ + + if (get_user(arg.digest_size, &uarg->digest_size)) + return -EFAULT; + if (arg.digest_size < hash_alg->digest_size) + return -EOVERFLOW; + + memset(&arg, 0, sizeof(arg)); + arg.digest_algorithm = hash_alg - fsverity_hash_algs; + arg.digest_size = hash_alg->digest_size; + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + if (copy_to_user(uarg->digest, vi->measurement, hash_alg->digest_size)) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL_GPL(fsverity_ioctl_measure); diff --git a/fs/verity/open.c b/fs/verity/open.c new file mode 100644 index 000000000000..63d1004b688c --- /dev/null +++ b/fs/verity/open.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/open.c: opening fs-verity files + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/slab.h> + +static struct kmem_cache *fsverity_info_cachep; + +/** + * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters + * @params: the parameters struct to initialize + * @inode: the inode for which the Merkle tree is being built + * @hash_algorithm: number of hash algorithm to use + * @log_blocksize: log base 2 of block size to use + * @salt: pointer to salt (optional) + * @salt_size: size of salt, possibly 0 + * + * Validate the hash algorithm and block size, then compute the tree topology + * (num levels, num blocks in each level, etc.) and initialize @params. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, + const struct inode *inode, + unsigned int hash_algorithm, + unsigned int log_blocksize, + const u8 *salt, size_t salt_size) +{ + const struct fsverity_hash_alg *hash_alg; + int err; + u64 blocks; + u64 offset; + int level; + + memset(params, 0, sizeof(*params)); + + hash_alg = fsverity_get_hash_alg(inode, hash_algorithm); + if (IS_ERR(hash_alg)) + return PTR_ERR(hash_alg); + params->hash_alg = hash_alg; + params->digest_size = hash_alg->digest_size; + + params->hashstate = fsverity_prepare_hash_state(hash_alg, salt, + salt_size); + if (IS_ERR(params->hashstate)) { + err = PTR_ERR(params->hashstate); + params->hashstate = NULL; + fsverity_err(inode, "Error %d preparing hash state", err); + goto out_err; + } + + if (log_blocksize != PAGE_SHIFT) { + fsverity_warn(inode, "Unsupported log_blocksize: %u", + log_blocksize); + err = -EINVAL; + goto out_err; + } + params->log_blocksize = log_blocksize; + params->block_size = 1 << log_blocksize; + + if (WARN_ON(!is_power_of_2(params->digest_size))) { + err = -EINVAL; + goto out_err; + } + if (params->block_size < 2 * params->digest_size) { + fsverity_warn(inode, + "Merkle tree block size (%u) too small for hash algorithm \"%s\"", + params->block_size, hash_alg->name); + err = -EINVAL; + goto out_err; + } + params->log_arity = params->log_blocksize - ilog2(params->digest_size); + params->hashes_per_block = 1 << params->log_arity; + + pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n", + hash_alg->name, params->block_size, params->hashes_per_block, + (int)salt_size, salt); + + /* + * Compute the number of levels in the Merkle tree and create a map from + * level to the starting block of that level. Level 'num_levels - 1' is + * the root and is stored first. Level 0 is the level directly "above" + * the data blocks and is stored last. + */ + + /* Compute number of levels and the number of blocks in each level */ + blocks = (inode->i_size + params->block_size - 1) >> log_blocksize; + pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks); + while (blocks > 1) { + if (params->num_levels >= FS_VERITY_MAX_LEVELS) { + fsverity_err(inode, "Too many levels in Merkle tree"); + err = -EINVAL; + goto out_err; + } + blocks = (blocks + params->hashes_per_block - 1) >> + params->log_arity; + /* temporarily using level_start[] to store blocks in level */ + params->level_start[params->num_levels++] = blocks; + } + + /* Compute the starting block of each level */ + offset = 0; + for (level = (int)params->num_levels - 1; level >= 0; level--) { + blocks = params->level_start[level]; + params->level_start[level] = offset; + pr_debug("Level %d is %llu blocks starting at index %llu\n", + level, blocks, offset); + offset += blocks; + } + + params->tree_size = offset << log_blocksize; + return 0; + +out_err: + kfree(params->hashstate); + memset(params, 0, sizeof(*params)); + return err; +} + +/* + * Compute the file measurement by hashing the fsverity_descriptor excluding the + * signature and with the sig_size field set to 0. + */ +static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg, + struct fsverity_descriptor *desc, + u8 *measurement) +{ + __le32 sig_size = desc->sig_size; + int err; + + desc->sig_size = 0; + err = fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), measurement); + desc->sig_size = sig_size; + + return err; +} + +/* + * Validate the given fsverity_descriptor and create a new fsverity_info from + * it. The signature (if present) is also checked. + */ +struct fsverity_info *fsverity_create_info(const struct inode *inode, + void *_desc, size_t desc_size) +{ + struct fsverity_descriptor *desc = _desc; + struct fsverity_info *vi; + int err; + + if (desc_size < sizeof(*desc)) { + fsverity_err(inode, "Unrecognized descriptor size: %zu bytes", + desc_size); + return ERR_PTR(-EINVAL); + } + + if (desc->version != 1) { + fsverity_err(inode, "Unrecognized descriptor version: %u", + desc->version); + return ERR_PTR(-EINVAL); + } + + if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) { + fsverity_err(inode, "Reserved bits set in descriptor"); + return ERR_PTR(-EINVAL); + } + + if (desc->salt_size > sizeof(desc->salt)) { + fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size); + return ERR_PTR(-EINVAL); + } + + if (le64_to_cpu(desc->data_size) != inode->i_size) { + fsverity_err(inode, + "Wrong data_size: %llu (desc) != %lld (inode)", + le64_to_cpu(desc->data_size), inode->i_size); + return ERR_PTR(-EINVAL); + } + + vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL); + if (!vi) + return ERR_PTR(-ENOMEM); + vi->inode = inode; + + err = fsverity_init_merkle_tree_params(&vi->tree_params, inode, + desc->hash_algorithm, + desc->log_blocksize, + desc->salt, desc->salt_size); + if (err) { + fsverity_err(inode, + "Error %d initializing Merkle tree parameters", + err); + goto out; + } + + memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size); + + err = compute_file_measurement(vi->tree_params.hash_alg, desc, + vi->measurement); + if (err) { + fsverity_err(inode, "Error %d computing file measurement", err); + goto out; + } + pr_debug("Computed file measurement: %s:%*phN\n", + vi->tree_params.hash_alg->name, + vi->tree_params.digest_size, vi->measurement); + + err = fsverity_verify_signature(vi, desc, desc_size); +out: + if (err) { + fsverity_free_info(vi); + vi = ERR_PTR(err); + } + return vi; +} + +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi) +{ + /* + * Multiple processes may race to set ->i_verity_info, so use cmpxchg. + * This pairs with the READ_ONCE() in fsverity_get_info(). + */ + if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL) + fsverity_free_info(vi); +} + +void fsverity_free_info(struct fsverity_info *vi) +{ + if (!vi) + return; + kfree(vi->tree_params.hashstate); + kmem_cache_free(fsverity_info_cachep, vi); +} + +/* Ensure the inode has an ->i_verity_info */ +static int ensure_verity_info(struct inode *inode) +{ + struct fsverity_info *vi = fsverity_get_info(inode); + struct fsverity_descriptor *desc; + int res; + + if (vi) + return 0; + + res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0); + if (res < 0) { + fsverity_err(inode, + "Error %d getting verity descriptor size", res); + return res; + } + if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) { + fsverity_err(inode, "Verity descriptor is too large (%d bytes)", + res); + return -EMSGSIZE; + } + desc = kmalloc(res, GFP_KERNEL); + if (!desc) + return -ENOMEM; + res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res); + if (res < 0) { + fsverity_err(inode, "Error %d reading verity descriptor", res); + goto out_free_desc; + } + + vi = fsverity_create_info(inode, desc, res); + if (IS_ERR(vi)) { + res = PTR_ERR(vi); + goto out_free_desc; + } + + fsverity_set_info(inode, vi); + res = 0; +out_free_desc: + kfree(desc); + return res; +} + +/** + * fsverity_file_open() - prepare to open a verity file + * @inode: the inode being opened + * @filp: the struct file being set up + * + * When opening a verity file, deny the open if it is for writing. Otherwise, + * set up the inode's ->i_verity_info if not already done. + * + * When combined with fscrypt, this must be called after fscrypt_file_open(). + * Otherwise, we won't have the key set up to decrypt the verity metadata. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_file_open(struct inode *inode, struct file *filp) +{ + if (!IS_VERITY(inode)) + return 0; + + if (filp->f_mode & FMODE_WRITE) { + pr_debug("Denying opening verity file (ino %lu) for write\n", + inode->i_ino); + return -EPERM; + } + + return ensure_verity_info(inode); +} +EXPORT_SYMBOL_GPL(fsverity_file_open); + +/** + * fsverity_prepare_setattr() - prepare to change a verity inode's attributes + * @dentry: dentry through which the inode is being changed + * @attr: attributes to change + * + * Verity files are immutable, so deny truncates. This isn't covered by the + * open-time check because sys_truncate() takes a path, not a file descriptor. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) { + pr_debug("Denying truncate of verity file (ino %lu)\n", + d_inode(dentry)->i_ino); + return -EPERM; + } + return 0; +} +EXPORT_SYMBOL_GPL(fsverity_prepare_setattr); + +/** + * fsverity_cleanup_inode() - free the inode's verity info, if present + * + * Filesystems must call this on inode eviction to free ->i_verity_info. + */ +void fsverity_cleanup_inode(struct inode *inode) +{ + fsverity_free_info(inode->i_verity_info); + inode->i_verity_info = NULL; +} +EXPORT_SYMBOL_GPL(fsverity_cleanup_inode); + +int __init fsverity_init_info_cache(void) +{ + fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info, + SLAB_RECLAIM_ACCOUNT, + measurement); + if (!fsverity_info_cachep) + return -ENOMEM; + return 0; +} + +void __init fsverity_exit_info_cache(void) +{ + kmem_cache_destroy(fsverity_info_cachep); + fsverity_info_cachep = NULL; +} diff --git a/fs/verity/signature.c b/fs/verity/signature.c new file mode 100644 index 000000000000..c8b255232de5 --- /dev/null +++ b/fs/verity/signature.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/signature.c: verification of builtin signatures + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/cred.h> +#include <linux/key.h> +#include <linux/slab.h> +#include <linux/verification.h> + +/* + * /proc/sys/fs/verity/require_signatures + * If 1, all verity files must have a valid builtin signature. + */ +static int fsverity_require_signatures; + +/* + * Keyring that contains the trusted X.509 certificates. + * + * Only root (kuid=0) can modify this. Also, root may use + * keyctl_restrict_keyring() to prevent any more additions. + */ +static struct key *fsverity_keyring; + +/** + * fsverity_verify_signature() - check a verity file's signature + * + * If the file's fs-verity descriptor includes a signature of the file + * measurement, verify it against the certificates in the fs-verity keyring. + * + * Return: 0 on success (signature valid or not required); -errno on failure + */ +int fsverity_verify_signature(const struct fsverity_info *vi, + const struct fsverity_descriptor *desc, + size_t desc_size) +{ + const struct inode *inode = vi->inode; + const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg; + const u32 sig_size = le32_to_cpu(desc->sig_size); + struct fsverity_signed_digest *d; + int err; + + if (sig_size == 0) { + if (fsverity_require_signatures) { + fsverity_err(inode, + "require_signatures=1, rejecting unsigned file!"); + return -EPERM; + } + return 0; + } + + if (sig_size > desc_size - sizeof(*desc)) { + fsverity_err(inode, "Signature overflows verity descriptor"); + return -EBADMSG; + } + + d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL); + if (!d) + return -ENOMEM; + memcpy(d->magic, "FSVerity", 8); + d->digest_algorithm = cpu_to_le16(hash_alg - fsverity_hash_algs); + d->digest_size = cpu_to_le16(hash_alg->digest_size); + memcpy(d->digest, vi->measurement, hash_alg->digest_size); + + err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size, + desc->signature, sig_size, + fsverity_keyring, + VERIFYING_UNSPECIFIED_SIGNATURE, + NULL, NULL); + kfree(d); + + if (err) { + if (err == -ENOKEY) + fsverity_err(inode, + "File's signing cert isn't in the fs-verity keyring"); + else if (err == -EKEYREJECTED) + fsverity_err(inode, "Incorrect file signature"); + else if (err == -EBADMSG) + fsverity_err(inode, "Malformed file signature"); + else + fsverity_err(inode, "Error %d verifying file signature", + err); + return err; + } + + pr_debug("Valid signature for file measurement %s:%*phN\n", + hash_alg->name, hash_alg->digest_size, vi->measurement); + return 0; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *fsverity_sysctl_header; + +static const struct ctl_path fsverity_sysctl_path[] = { + { .procname = "fs", }, + { .procname = "verity", }, + { } +}; + +static struct ctl_table fsverity_sysctl_table[] = { + { + .procname = "require_signatures", + .data = &fsverity_require_signatures, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static int __init fsverity_sysctl_init(void) +{ + fsverity_sysctl_header = register_sysctl_paths(fsverity_sysctl_path, + fsverity_sysctl_table); + if (!fsverity_sysctl_header) { + pr_err("sysctl registration failed!\n"); + return -ENOMEM; + } + return 0; +} +#else /* !CONFIG_SYSCTL */ +static inline int __init fsverity_sysctl_init(void) +{ + return 0; +} +#endif /* !CONFIG_SYSCTL */ + +int __init fsverity_init_signature(void) +{ + struct key *ring; + int err; + + ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0), + current_cred(), KEY_POS_SEARCH | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | + KEY_USR_SEARCH | KEY_USR_SETATTR, + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + if (IS_ERR(ring)) + return PTR_ERR(ring); + + err = fsverity_sysctl_init(); + if (err) + goto err_put_ring; + + fsverity_keyring = ring; + return 0; + +err_put_ring: + key_put(ring); + return err; +} diff --git a/fs/verity/verify.c b/fs/verity/verify.c new file mode 100644 index 000000000000..3e8f2de44667 --- /dev/null +++ b/fs/verity/verify.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/verity/verify.c: data verification functions, i.e. hooks for ->readpages() + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <crypto/hash.h> +#include <linux/bio.h> +#include <linux/ratelimit.h> + +static struct workqueue_struct *fsverity_read_workqueue; + +/** + * hash_at_level() - compute the location of the block's hash at the given level + * + * @params: (in) the Merkle tree parameters + * @dindex: (in) the index of the data block being verified + * @level: (in) the level of hash we want (0 is leaf level) + * @hindex: (out) the index of the hash block containing the wanted hash + * @hoffset: (out) the byte offset to the wanted hash within the hash block + */ +static void hash_at_level(const struct merkle_tree_params *params, + pgoff_t dindex, unsigned int level, pgoff_t *hindex, + unsigned int *hoffset) +{ + pgoff_t position; + + /* Offset of the hash within the level's region, in hashes */ + position = dindex >> (level * params->log_arity); + + /* Index of the hash block in the tree overall */ + *hindex = params->level_start[level] + (position >> params->log_arity); + + /* Offset of the wanted hash (in bytes) within the hash block */ + *hoffset = (position & ((1 << params->log_arity) - 1)) << + (params->log_blocksize - params->log_arity); +} + +/* Extract a hash from a hash page */ +static void extract_hash(struct page *hpage, unsigned int hoffset, + unsigned int hsize, u8 *out) +{ + void *virt = kmap_atomic(hpage); + + memcpy(out, virt + hoffset, hsize); + kunmap_atomic(virt); +} + +static inline int cmp_hashes(const struct fsverity_info *vi, + const u8 *want_hash, const u8 *real_hash, + pgoff_t index, int level) +{ + const unsigned int hsize = vi->tree_params.digest_size; + + if (memcmp(want_hash, real_hash, hsize) == 0) + return 0; + + fsverity_err(vi->inode, + "FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN", + index, level, + vi->tree_params.hash_alg->name, hsize, want_hash, + vi->tree_params.hash_alg->name, hsize, real_hash); + return -EBADMSG; +} + +/* + * Verify a single data page against the file's Merkle tree. + * + * In principle, we need to verify the entire path to the root node. However, + * for efficiency the filesystem may cache the hash pages. Therefore we need + * only ascend the tree until an already-verified page is seen, as indicated by + * the PageChecked bit being set; then verify the path to that page. + * + * This code currently only supports the case where the verity block size is + * equal to PAGE_SIZE. Doing otherwise would be possible but tricky, since we + * wouldn't be able to use the PageChecked bit. + * + * Note that multiple processes may race to verify a hash page and mark it + * Checked, but it doesn't matter; the result will be the same either way. + * + * Return: true if the page is valid, else false. + */ +static bool verify_page(struct inode *inode, const struct fsverity_info *vi, + struct ahash_request *req, struct page *data_page) +{ + const struct merkle_tree_params *params = &vi->tree_params; + const unsigned int hsize = params->digest_size; + const pgoff_t index = data_page->index; + int level; + u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE]; + const u8 *want_hash; + u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; + struct page *hpages[FS_VERITY_MAX_LEVELS]; + unsigned int hoffsets[FS_VERITY_MAX_LEVELS]; + int err; + + if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page))) + return false; + + pr_debug_ratelimited("Verifying data page %lu...\n", index); + + /* + * Starting at the leaf level, ascend the tree saving hash pages along + * the way until we find a verified hash page, indicated by PageChecked; + * or until we reach the root. + */ + for (level = 0; level < params->num_levels; level++) { + pgoff_t hindex; + unsigned int hoffset; + struct page *hpage; + + hash_at_level(params, index, level, &hindex, &hoffset); + + pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n", + level, hindex, hoffset); + + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, + hindex); + if (IS_ERR(hpage)) { + err = PTR_ERR(hpage); + fsverity_err(inode, + "Error %d reading Merkle tree page %lu", + err, hindex); + goto out; + } + + if (PageChecked(hpage)) { + extract_hash(hpage, hoffset, hsize, _want_hash); + want_hash = _want_hash; + put_page(hpage); + pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n", + params->hash_alg->name, + hsize, want_hash); + goto descend; + } + pr_debug_ratelimited("Hash page not yet checked\n"); + hpages[level] = hpage; + hoffsets[level] = hoffset; + } + + want_hash = vi->root_hash; + pr_debug("Want root hash: %s:%*phN\n", + params->hash_alg->name, hsize, want_hash); +descend: + /* Descend the tree verifying hash pages */ + for (; level > 0; level--) { + struct page *hpage = hpages[level - 1]; + unsigned int hoffset = hoffsets[level - 1]; + + err = fsverity_hash_page(params, inode, req, hpage, real_hash); + if (err) + goto out; + err = cmp_hashes(vi, want_hash, real_hash, index, level - 1); + if (err) + goto out; + SetPageChecked(hpage); + extract_hash(hpage, hoffset, hsize, _want_hash); + want_hash = _want_hash; + put_page(hpage); + pr_debug("Verified hash page at level %d, now want %s:%*phN\n", + level - 1, params->hash_alg->name, hsize, want_hash); + } + + /* Finally, verify the data page */ + err = fsverity_hash_page(params, inode, req, data_page, real_hash); + if (err) + goto out; + err = cmp_hashes(vi, want_hash, real_hash, index, -1); +out: + for (; level > 0; level--) + put_page(hpages[level - 1]); + + return err == 0; +} + +/** + * fsverity_verify_page() - verify a data page + * + * Verify a page that has just been read from a verity file. The page must be a + * pagecache page that is still locked and not yet uptodate. + * + * Return: true if the page is valid, else false. + */ +bool fsverity_verify_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + const struct fsverity_info *vi = inode->i_verity_info; + struct ahash_request *req; + bool valid; + + req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); + if (unlikely(!req)) + return false; + + valid = verify_page(inode, vi, req, page); + + ahash_request_free(req); + + return valid; +} +EXPORT_SYMBOL_GPL(fsverity_verify_page); + +#ifdef CONFIG_BLOCK +/** + * fsverity_verify_bio() - verify a 'read' bio that has just completed + * + * Verify a set of pages that have just been read from a verity file. The pages + * must be pagecache pages that are still locked and not yet uptodate. Pages + * that fail verification are set to the Error state. Verification is skipped + * for pages already in the Error state, e.g. due to fscrypt decryption failure. + * + * This is a helper function for use by the ->readpages() method of filesystems + * that issue bios to read data directly into the page cache. Filesystems that + * populate the page cache without issuing bios (e.g. non block-based + * filesystems) must instead call fsverity_verify_page() directly on each page. + * All filesystems must also call fsverity_verify_page() on holes. + */ +void fsverity_verify_bio(struct bio *bio) +{ + struct inode *inode = bio_first_page_all(bio)->mapping->host; + const struct fsverity_info *vi = inode->i_verity_info; + struct ahash_request *req; + struct bio_vec *bv; + struct bvec_iter_all iter_all; + + req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); + if (unlikely(!req)) { + bio_for_each_segment_all(bv, bio, iter_all) + SetPageError(bv->bv_page); + return; + } + + bio_for_each_segment_all(bv, bio, iter_all) { + struct page *page = bv->bv_page; + + if (!PageError(page) && !verify_page(inode, vi, req, page)) + SetPageError(page); + } + + ahash_request_free(req); +} +EXPORT_SYMBOL_GPL(fsverity_verify_bio); +#endif /* CONFIG_BLOCK */ + +/** + * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue + * + * Enqueue verification work for asynchronous processing. + */ +void fsverity_enqueue_verify_work(struct work_struct *work) +{ + queue_work(fsverity_read_workqueue, work); +} +EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); + +int __init fsverity_init_workqueue(void) +{ + /* + * Use an unbound workqueue to allow bios to be verified in parallel + * even when they happen to complete on the same CPU. This sacrifices + * locality, but it's worthwhile since hashing is CPU-intensive. + * + * Also use a high-priority workqueue to prioritize verification work, + * which blocks reads from completing, over regular application tasks. + */ + fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", + WQ_UNBOUND | WQ_HIGHPRI, + num_online_cpus()); + if (!fsverity_read_workqueue) + return -ENOMEM; + return 0; +} + +void __init fsverity_exit_workqueue(void) +{ + destroy_workqueue(fsverity_read_workqueue); + fsverity_read_workqueue = NULL; +} |