summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 16:59:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 16:59:14 -0700
commitf60c55a94e1d127186566f06294f2dadd966e9b4 (patch)
tree2d3dbd572c0096d24f87f581194563ff76e07a6e /fs
parent734d1ed83e1f9b7bafb650033fb87c657858cf5b (diff)
parent95ae251fe82838b85c6d37e5a1775006e2a42ae0 (diff)
downloadlinux-f60c55a94e1d127186566f06294f2dadd966e9b4.tar.bz2
Merge tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt
Pull fs-verity support from Eric Biggers: "fs-verity is a filesystem feature that provides Merkle tree based hashing (similar to dm-verity) for individual readonly files, mainly for the purpose of efficient authenticity verification. This pull request includes: (a) The fs/verity/ support layer and documentation. (b) fs-verity support for ext4 and f2fs. Compared to the original fs-verity patchset from last year, the UAPI to enable fs-verity on a file has been greatly simplified. Lots of other things were cleaned up too. fs-verity is planned to be used by two different projects on Android; most of the userspace code is in place already. Another userspace tool ("fsverity-utils"), and xfstests, are also available. e2fsprogs and f2fs-tools already have fs-verity support. Other people have shown interest in using fs-verity too. I've tested this on ext4 and f2fs with xfstests, both the existing tests and the new fs-verity tests. This has also been in linux-next since July 30 with no reported issues except a couple minor ones I found myself and folded in fixes for. Ted and I will be co-maintaining fs-verity" * tag 'fsverity-for-linus' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt: f2fs: add fs-verity support ext4: update on-disk format documentation for fs-verity ext4: add fs-verity read support ext4: add basic fs-verity support fs-verity: support builtin file signatures fs-verity: add SHA-512 support fs-verity: implement FS_IOC_MEASURE_VERITY ioctl fs-verity: implement FS_IOC_ENABLE_VERITY ioctl fs-verity: add data verification hooks for ->readpages() fs-verity: add the hook for file ->setattr() fs-verity: add the hook for file ->open() fs-verity: add inode and superblock fields fs-verity: add Kconfig and the helper functions for hashing fs: uapi: define verity bit for FS_IOC_GETFLAGS fs-verity: add UAPI header fs-verity: add MAINTAINERS file entry fs-verity: add a documentation file
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/ext4/Makefile1
-rw-r--r--fs/ext4/ext4.h23
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c55
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/readpage.c211
-rw-r--r--fs/ext4/super.c18
-rw-r--r--fs/ext4/sysfs.c6
-rw-r--r--fs/ext4/verity.c367
-rw-r--r--fs/f2fs/Makefile1
-rw-r--r--fs/f2fs/data.c75
-rw-r--r--fs/f2fs/f2fs.h20
-rw-r--r--fs/f2fs/file.c43
-rw-r--r--fs/f2fs/inode.c5
-rw-r--r--fs/f2fs/super.c3
-rw-r--r--fs/f2fs/sysfs.c11
-rw-r--r--fs/f2fs/verity.c247
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/verity/Kconfig55
-rw-r--r--fs/verity/Makefile10
-rw-r--r--fs/verity/enable.c377
-rw-r--r--fs/verity/fsverity_private.h185
-rw-r--r--fs/verity/hash_algs.c280
-rw-r--r--fs/verity/init.c61
-rw-r--r--fs/verity/measure.c57
-rw-r--r--fs/verity/open.c356
-rw-r--r--fs/verity/signature.c157
-rw-r--r--fs/verity/verify.c281
30 files changed, 2859 insertions, 68 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 669d46550e6d..2501e6f1f965 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -112,6 +112,8 @@ config MANDATORY_FILE_LOCKING
source "fs/crypto/Kconfig"
+source "fs/verity/Kconfig"
+
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index b2e4973a0bea..14231b4cf383 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_IO_URING) += io_uring.o
obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
+obj-$(CONFIG_FS_VERITY) += verity/
obj-$(CONFIG_FILE_LOCKING) += locks.o
obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8fdfcd3c3e04..b17ddc229ac5 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -13,3 +13,4 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
+ext4-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf660aa7a9e0..9c7f4036021b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -41,6 +41,7 @@
#endif
#include <linux/fscrypt.h>
+#include <linux/fsverity.h>
#include <linux/compiler.h>
@@ -395,6 +396,7 @@ struct flex_groups {
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
+#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
@@ -402,7 +404,7 @@ struct flex_groups {
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x704BDFFF /* User visible flags */
+#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
@@ -467,6 +469,7 @@ enum {
EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
+ EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
@@ -512,6 +515,7 @@ static inline void ext4_check_flag_values(void)
CHECK_FLAG_VALUE(TOPDIR);
CHECK_FLAG_VALUE(HUGE_FILE);
CHECK_FLAG_VALUE(EXTENTS);
+ CHECK_FLAG_VALUE(VERITY);
CHECK_FLAG_VALUE(EA_INODE);
CHECK_FLAG_VALUE(EOFBLOCKS);
CHECK_FLAG_VALUE(INLINE_DATA);
@@ -1560,6 +1564,7 @@ enum {
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
+ EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
};
#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1610,6 +1615,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_SB(sb) (sb)
#endif
+static inline bool ext4_verity_in_progress(struct inode *inode)
+{
+ return IS_ENABLED(CONFIG_FS_VERITY) &&
+ ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+}
+
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
@@ -1662,6 +1673,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000
#define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000
+#define EXT4_FEATURE_RO_COMPAT_VERITY 0x8000
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1756,6 +1768,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(bigalloc, BIGALLOC)
EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, METADATA_CSUM)
EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, READONLY)
EXT4_FEATURE_RO_COMPAT_FUNCS(project, PROJECT)
+EXT4_FEATURE_RO_COMPAT_FUNCS(verity, VERITY)
EXT4_FEATURE_INCOMPAT_FUNCS(compression, COMPRESSION)
EXT4_FEATURE_INCOMPAT_FUNCS(filetype, FILETYPE)
@@ -1813,7 +1826,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
EXT4_FEATURE_RO_COMPAT_QUOTA |\
- EXT4_FEATURE_RO_COMPAT_PROJECT)
+ EXT4_FEATURE_RO_COMPAT_PROJECT |\
+ EXT4_FEATURE_RO_COMPAT_VERITY)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -3177,6 +3191,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
extern int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages, bool is_readahead);
+extern int __init ext4_init_post_read_processing(void);
+extern void ext4_exit_post_read_processing(void);
/* symlink.c */
extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
@@ -3283,6 +3299,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
/* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
+/* verity.c */
+extern const struct fsverity_operations ext4_verityops;
+
/*
* Add new method to test whether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 70b0438dbc94..b8a20bb9a145 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -457,6 +457,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret)
return ret;
+ ret = fsverity_file_open(inode, filp);
+ if (ret)
+ return ret;
+
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 006b7a2070bf..d0dc0e3463db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1340,6 +1340,9 @@ retry_journal:
}
if (ret) {
+ bool extended = (pos + len > inode->i_size) &&
+ !ext4_verity_in_progress(inode);
+
unlock_page(page);
/*
* __block_write_begin may have instantiated a few blocks
@@ -1349,11 +1352,11 @@ retry_journal:
* Add inode to orphan list in case we crash before
* truncate finishes
*/
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (extended && ext4_can_truncate(inode))
ext4_orphan_add(handle, inode);
ext4_journal_stop(handle);
- if (pos + len > inode->i_size) {
+ if (extended) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might
@@ -1406,6 +1409,7 @@ static int ext4_write_end(struct file *file,
int ret = 0, ret2;
int i_size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ bool verity = ext4_verity_in_progress(inode);
trace_ext4_write_end(inode, pos, len, copied);
if (inline_data) {
@@ -1423,12 +1427,16 @@ static int ext4_write_end(struct file *file,
/*
* it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
+ *
+ * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree
+ * blocks are being written past EOF, so skip the i_size update.
*/
- i_size_changed = ext4_update_inode_size(inode, pos + copied);
+ if (!verity)
+ i_size_changed = ext4_update_inode_size(inode, pos + copied);
unlock_page(page);
put_page(page);
- if (old_size < pos)
+ if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
@@ -1439,7 +1447,7 @@ static int ext4_write_end(struct file *file,
if (i_size_changed || inline_data)
ext4_mark_inode_dirty(handle, inode);
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1450,7 +1458,7 @@ errout:
if (!ret)
ret = ret2;
- if (pos + len > inode->i_size) {
+ if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -1511,6 +1519,7 @@ static int ext4_journalled_write_end(struct file *file,
unsigned from, to;
int size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ bool verity = ext4_verity_in_progress(inode);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_SIZE - 1);
@@ -1540,13 +1549,14 @@ static int ext4_journalled_write_end(struct file *file,
if (!partial)
SetPageUptodate(page);
}
- size_changed = ext4_update_inode_size(inode, pos + copied);
+ if (!verity)
+ size_changed = ext4_update_inode_size(inode, pos + copied);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
unlock_page(page);
put_page(page);
- if (old_size < pos)
+ if (old_size < pos && !verity)
pagecache_isize_extended(inode, old_size, pos);
if (size_changed || inline_data) {
@@ -1555,7 +1565,7 @@ static int ext4_journalled_write_end(struct file *file,
ret = ret2;
}
- if (pos + len > inode->i_size && ext4_can_truncate(inode))
+ if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
* inode->i_size. So truncate them
@@ -1566,7 +1576,7 @@ errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
- if (pos + len > inode->i_size) {
+ if (pos + len > inode->i_size && !verity) {
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
@@ -2162,7 +2172,8 @@ static int ext4_writepage(struct page *page,
trace_ext4_writepage(page);
size = i_size_read(inode);
- if (page->index == size >> PAGE_SHIFT)
+ if (page->index == size >> PAGE_SHIFT &&
+ !ext4_verity_in_progress(inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2246,7 +2257,8 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
* after page tables are updated.
*/
size = i_size_read(mpd->inode);
- if (page->index == size >> PAGE_SHIFT)
+ if (page->index == size >> PAGE_SHIFT &&
+ !ext4_verity_in_progress(mpd->inode))
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
@@ -2345,6 +2357,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
>> inode->i_blkbits;
+ if (ext4_verity_in_progress(inode))
+ blocks = EXT_MAX_BLOCKS;
+
do {
BUG_ON(buffer_locked(bh));
@@ -3061,8 +3076,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_SHIFT;
- if (ext4_nonda_switch(inode->i_sb) ||
- S_ISLNK(inode->i_mode)) {
+ if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) ||
+ ext4_verity_in_progress(inode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);
@@ -3897,6 +3912,8 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
return 0;
#endif
+ if (fsverity_active(inode))
+ return 0;
/*
* If we are doing data journalling we don't support O_DIRECT
@@ -4736,6 +4753,8 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT))
return false;
+ if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
+ return false;
return true;
}
@@ -4760,9 +4779,11 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
new_fl |= S_CASEFOLD;
+ if (flags & EXT4_VERITY_FL)
+ new_fl |= S_VERITY;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
- S_ENCRYPTED|S_CASEFOLD);
+ S_ENCRYPTED|S_CASEFOLD|S_VERITY);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5552,6 +5573,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
+ error = fsverity_prepare_setattr(dentry, attr);
+ if (error)
+ return error;
+
if (is_quota_modification(inode, attr)) {
error = dquot_initialize(inode);
if (error)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5703d607f5af..5444d49cbf09 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1198,6 +1198,17 @@ out:
}
case EXT4_IOC_SHUTDOWN:
return ext4_shutdown(sb, arg);
+
+ case FS_IOC_ENABLE_VERITY:
+ if (!ext4_has_feature_verity(sb))
+ return -EOPNOTSUPP;
+ return fsverity_ioctl_enable(filp, (const void __user *)arg);
+
+ case FS_IOC_MEASURE_VERITY:
+ if (!ext4_has_feature_verity(sb))
+ return -EOPNOTSUPP;
+ return fsverity_ioctl_measure(filp, (void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -1265,6 +1276,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
case EXT4_IOC_SHUTDOWN:
case FS_IOC_GETFSMAP:
+ case FS_IOC_ENABLE_VERITY:
+ case FS_IOC_MEASURE_VERITY:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index c916017db334..a30b203fa461 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -47,13 +47,103 @@
#include "ext4.h"
-static inline bool ext4_bio_encrypted(struct bio *bio)
+#define NUM_PREALLOC_POST_READ_CTXS 128
+
+static struct kmem_cache *bio_post_read_ctx_cache;
+static mempool_t *bio_post_read_ctx_pool;
+
+/* postprocessing steps for read bios */
+enum bio_post_read_step {
+ STEP_INITIAL = 0,
+ STEP_DECRYPT,
+ STEP_VERITY,
+};
+
+struct bio_post_read_ctx {
+ struct bio *bio;
+ struct work_struct work;
+ unsigned int cur_step;
+ unsigned int enabled_steps;
+};
+
+static void __read_end_io(struct bio *bio)
{
-#ifdef CONFIG_FS_ENCRYPTION
- return unlikely(bio->bi_private != NULL);
-#else
- return false;
-#endif
+ struct page *page;
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ page = bv->bv_page;
+
+ /* PG_error was set if any post_read step failed */
+ if (bio->bi_status || PageError(page)) {
+ ClearPageUptodate(page);
+ /* will re-read again later */
+ ClearPageError(page);
+ } else {
+ SetPageUptodate(page);
+ }
+ unlock_page(page);
+ }
+ if (bio->bi_private)
+ mempool_free(bio->bi_private, bio_post_read_ctx_pool);
+ bio_put(bio);
+}
+
+static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
+
+static void decrypt_work(struct work_struct *work)
+{
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+
+ fscrypt_decrypt_bio(ctx->bio);
+
+ bio_post_read_processing(ctx);
+}
+
+static void verity_work(struct work_struct *work)
+{
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+
+ fsverity_verify_bio(ctx->bio);
+
+ bio_post_read_processing(ctx);
+}
+
+static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
+{
+ /*
+ * We use different work queues for decryption and for verity because
+ * verity may require reading metadata pages that need decryption, and
+ * we shouldn't recurse to the same workqueue.
+ */
+ switch (++ctx->cur_step) {
+ case STEP_DECRYPT:
+ if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
+ INIT_WORK(&ctx->work, decrypt_work);
+ fscrypt_enqueue_decrypt_work(&ctx->work);
+ return;
+ }
+ ctx->cur_step++;
+ /* fall-through */
+ case STEP_VERITY:
+ if (ctx->enabled_steps & (1 << STEP_VERITY)) {
+ INIT_WORK(&ctx->work, verity_work);
+ fsverity_enqueue_verify_work(&ctx->work);
+ return;
+ }
+ ctx->cur_step++;
+ /* fall-through */
+ default:
+ __read_end_io(ctx->bio);
+ }
+}
+
+static bool bio_post_read_required(struct bio *bio)
+{
+ return bio->bi_private && !bio->bi_status;
}
/*
@@ -70,30 +160,53 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
*/
static void mpage_end_io(struct bio *bio)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ if (bio_post_read_required(bio)) {
+ struct bio_post_read_ctx *ctx = bio->bi_private;
- if (ext4_bio_encrypted(bio)) {
- if (bio->bi_status) {
- fscrypt_release_ctx(bio->bi_private);
- } else {
- fscrypt_enqueue_decrypt_bio(bio->bi_private, bio);
- return;
- }
+ ctx->cur_step = STEP_INITIAL;
+ bio_post_read_processing(ctx);
+ return;
}
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
+ __read_end_io(bio);
+}
- if (!bio->bi_status) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
+static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx)
+{
+ return fsverity_active(inode) &&
+ idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+}
+
+static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode,
+ struct bio *bio,
+ pgoff_t first_idx)
+{
+ unsigned int post_read_steps = 0;
+ struct bio_post_read_ctx *ctx = NULL;
+
+ if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
+ post_read_steps |= 1 << STEP_DECRYPT;
+
+ if (ext4_need_verity(inode, first_idx))
+ post_read_steps |= 1 << STEP_VERITY;
+
+ if (post_read_steps) {
+ ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+ ctx->bio = bio;
+ ctx->enabled_steps = post_read_steps;
+ bio->bi_private = ctx;
}
+ return ctx;
+}
- bio_put(bio);
+static inline loff_t ext4_readpage_limit(struct inode *inode)
+{
+ if (IS_ENABLED(CONFIG_FS_VERITY) &&
+ (IS_VERITY(inode) || ext4_verity_in_progress(inode)))
+ return inode->i_sb->s_maxbytes;
+
+ return i_size_read(inode);
}
int ext4_mpage_readpages(struct address_space *mapping,
@@ -141,7 +254,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page;
- last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
+ last_block_in_file = (ext4_readpage_limit(inode) +
+ blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
page_block = 0;
@@ -218,6 +332,9 @@ int ext4_mpage_readpages(struct address_space *mapping,
zero_user_segment(page, first_hole << blkbits,
PAGE_SIZE);
if (first_hole == 0) {
+ if (ext4_need_verity(inode, page->index) &&
+ !fsverity_verify_page(page))
+ goto set_error_page;
SetPageUptodate(page);
unlock_page(page);
goto next_page;
@@ -241,18 +358,16 @@ int ext4_mpage_readpages(struct address_space *mapping,
bio = NULL;
}
if (bio == NULL) {
- struct fscrypt_ctx *ctx = NULL;
+ struct bio_post_read_ctx *ctx;
- if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
- ctx = fscrypt_get_ctx(GFP_NOFS);
- if (IS_ERR(ctx))
- goto set_error_page;
- }
bio = bio_alloc(GFP_KERNEL,
min_t(int, nr_pages, BIO_MAX_PAGES));
- if (!bio) {
- if (ctx)
- fscrypt_release_ctx(ctx);
+ if (!bio)
+ goto set_error_page;
+ ctx = get_bio_post_read_ctx(inode, bio, page->index);
+ if (IS_ERR(ctx)) {
+ bio_put(bio);
+ bio = NULL;
goto set_error_page;
}
bio_set_dev(bio, bdev);
@@ -293,3 +408,29 @@ int ext4_mpage_readpages(struct address_space *mapping,
submit_bio(bio);
return 0;
}
+
+int __init ext4_init_post_read_processing(void)
+{
+ bio_post_read_ctx_cache =
+ kmem_cache_create("ext4_bio_post_read_ctx",
+ sizeof(struct bio_post_read_ctx), 0, 0, NULL);
+ if (!bio_post_read_ctx_cache)
+ goto fail;
+ bio_post_read_ctx_pool =
+ mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
+ bio_post_read_ctx_cache);
+ if (!bio_post_read_ctx_pool)
+ goto fail_free_cache;
+ return 0;
+
+fail_free_cache:
+ kmem_cache_destroy(bio_post_read_ctx_cache);
+fail:
+ return -ENOMEM;
+}
+
+void ext4_exit_post_read_processing(void)
+{
+ mempool_destroy(bio_post_read_ctx_pool);
+ kmem_cache_destroy(bio_post_read_ctx_cache);
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 757819139b8f..27cd622676e7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1182,6 +1182,7 @@ void ext4_clear_inode(struct inode *inode)
EXT4_I(inode)->jinode = NULL;
}
fscrypt_put_encryption_info(inode);
+ fsverity_cleanup_inode(inode);
}
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -4275,6 +4276,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_FS_ENCRYPTION
sb->s_cop = &ext4_cryptops;
#endif
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &ext4_verityops;
+#endif
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
if (ext4_has_feature_quota(sb))
@@ -4422,6 +4426,11 @@ no_journal:
goto failed_mount_wq;
}
+ if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
+ ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
+ goto failed_mount_wq;
+ }
+
if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
!ext4_has_feature_encrypt(sb)) {
ext4_set_feature_encrypt(sb);
@@ -6098,6 +6107,10 @@ static int __init ext4_init_fs(void)
err = ext4_init_pending();
if (err)
+ goto out7;
+
+ err = ext4_init_post_read_processing();
+ if (err)
goto out6;
err = ext4_init_pageio();
@@ -6138,8 +6151,10 @@ out3:
out4:
ext4_exit_pageio();
out5:
- ext4_exit_pending();
+ ext4_exit_post_read_processing();
out6:
+ ext4_exit_pending();
+out7:
ext4_exit_es();
return err;
@@ -6156,6 +6171,7 @@ static void __exit ext4_exit_fs(void)
ext4_exit_sysfs();
ext4_exit_system_zone();
ext4_exit_pageio();
+ ext4_exit_post_read_processing();
ext4_exit_es();
ext4_exit_pending();
}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index b3cd7655a6ff..eb1efad0e20a 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -242,6 +242,9 @@ EXT4_ATTR_FEATURE(encryption);
#ifdef CONFIG_UNICODE
EXT4_ATTR_FEATURE(casefold);
#endif
+#ifdef CONFIG_FS_VERITY
+EXT4_ATTR_FEATURE(verity);
+#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
static struct attribute *ext4_feat_attrs[] = {
@@ -254,6 +257,9 @@ static struct attribute *ext4_feat_attrs[] = {
#ifdef CONFIG_UNICODE
ATTR_LIST(casefold),
#endif
+#ifdef CONFIG_FS_VERITY
+ ATTR_LIST(verity),
+#endif
ATTR_LIST(metadata_csum_seed),
NULL,
};
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
new file mode 100644
index 000000000000..d0d8a9795dd6
--- /dev/null
+++ b/fs/ext4/verity.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/ext4/verity.c: fs-verity support for ext4
+ *
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * Implementation of fsverity_operations for ext4.
+ *
+ * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past
+ * the end of the file, starting at the first 64K boundary beyond i_size. This
+ * approach works because (a) verity files are readonly, and (b) pages fully
+ * beyond i_size aren't visible to userspace but can be read/written internally
+ * by ext4 with only some relatively small changes to ext4. This approach
+ * avoids having to depend on the EA_INODE feature and on rearchitecturing
+ * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and
+ * to support encrypting xattrs. Note that the verity metadata *must* be
+ * encrypted when the file is, since it contains hashes of the plaintext data.
+ *
+ * Using a 64K boundary rather than a 4K one keeps things ready for
+ * architectures with 64K pages, and it doesn't necessarily waste space on-disk
+ * since there can be a hole between i_size and the start of the Merkle tree.
+ */
+
+#include <linux/quotaops.h>
+
+#include "ext4.h"
+#include "ext4_extents.h"
+#include "ext4_jbd2.h"
+
+static inline loff_t ext4_verity_metadata_pos(const struct inode *inode)
+{
+ return round_up(inode->i_size, 65536);
+}
+
+/*
+ * Read some verity metadata from the inode. __vfs_read() can't be used because
+ * we need to read beyond i_size.
+ */
+static int pagecache_read(struct inode *inode, void *buf, size_t count,
+ loff_t pos)
+{
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *addr;
+
+ page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
+ NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ addr = kmap_atomic(page);
+ memcpy(buf, addr + offset_in_page(pos), n);
+ kunmap_atomic(addr);
+
+ put_page(page);
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+/*
+ * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
+ * kernel_write() can't be used because the file descriptor is readonly.
+ */
+static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ loff_t pos)
+{
+ if (pos + count > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *fsdata;
+ void *addr;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+ &page, &fsdata);
+ if (res)
+ return res;
+
+ addr = kmap_atomic(page);
+ memcpy(addr + offset_in_page(pos), buf, n);
+ kunmap_atomic(addr);
+
+ res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
+ page, fsdata);
+ if (res < 0)
+ return res;
+ if (res != n)
+ return -EIO;
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+static int ext4_begin_enable_verity(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+ const int credits = 2; /* superblock and inode for ext4_orphan_add() */
+ handle_t *handle;
+ int err;
+
+ if (ext4_verity_in_progress(inode))
+ return -EBUSY;
+
+ /*
+ * Since the file was opened readonly, we have to initialize the jbd
+ * inode and quotas here and not rely on ->open() doing it. This must
+ * be done before evicting the inline data.
+ */
+
+ err = ext4_inode_attach_jinode(inode);
+ if (err)
+ return err;
+
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
+ err = ext4_convert_inline_data(inode);
+ if (err)
+ return err;
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ ext4_warning_inode(inode,
+ "verity is only allowed on extent-based files");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * ext4 uses the last allocated block to find the verity descriptor, so
+ * we must remove any other blocks past EOF which might confuse things.
+ */
+ err = ext4_truncate(inode);
+ if (err)
+ return err;
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ err = ext4_orphan_add(handle, inode);
+ if (err == 0)
+ ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+
+ ext4_journal_stop(handle);
+ return err;
+}
+
+/*
+ * ext4 stores the verity descriptor beginning on the next filesystem block
+ * boundary after the Merkle tree. Then, the descriptor size is stored in the
+ * last 4 bytes of the last allocated filesystem block --- which is either the
+ * block in which the descriptor ends, or the next block after that if there
+ * weren't at least 4 bytes remaining.
+ *
+ * We can't simply store the descriptor in an xattr because it *must* be
+ * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt
+ * xattrs. Also, if the descriptor includes a large signature blob it may be
+ * too large to store in an xattr without the EA_INODE feature.
+ */
+static int ext4_write_verity_descriptor(struct inode *inode, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) +
+ merkle_tree_size, i_blocksize(inode));
+ const u64 desc_end = desc_pos + desc_size;
+ const __le32 desc_size_disk = cpu_to_le32(desc_size);
+ const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk),
+ i_blocksize(inode)) -
+ sizeof(desc_size_disk);
+ int err;
+
+ err = pagecache_write(inode, desc, desc_size, desc_pos);
+ if (err)
+ return err;
+
+ return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk),
+ desc_size_pos);
+}
+
+static int ext4_end_enable_verity(struct file *filp, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ struct inode *inode = file_inode(filp);
+ const int credits = 2; /* superblock and inode for ext4_orphan_del() */
+ handle_t *handle;
+ int err = 0;
+ int err2;
+
+ if (desc != NULL) {
+ /* Succeeded; write the verity descriptor. */
+ err = ext4_write_verity_descriptor(inode, desc, desc_size,
+ merkle_tree_size);
+
+ /* Write all pages before clearing VERITY_IN_PROGRESS. */
+ if (!err)
+ err = filemap_write_and_wait(inode->i_mapping);
+ }
+
+ /* If we failed, truncate anything we wrote past i_size. */
+ if (desc == NULL || err)
+ ext4_truncate(inode);
+
+ /*
+ * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and
+ * deleting the inode from the orphan list, even if something failed.
+ * If everything succeeded, we'll also set the verity bit in the same
+ * transaction.
+ */
+
+ ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
+ if (IS_ERR(handle)) {
+ ext4_orphan_del(NULL, inode);
+ return PTR_ERR(handle);
+ }
+
+ err2 = ext4_orphan_del(handle, inode);
+ if (err2)
+ goto out_stop;
+
+ if (desc != NULL && !err) {
+ struct ext4_iloc iloc;
+
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto out_stop;
+ ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
+ ext4_set_inode_flags(inode);
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ }
+out_stop:
+ ext4_journal_stop(handle);
+ return err ?: err2;
+}
+
+static int ext4_get_verity_descriptor_location(struct inode *inode,
+ size_t *desc_size_ret,
+ u64 *desc_pos_ret)
+{
+ struct ext4_ext_path *path;
+ struct ext4_extent *last_extent;
+ u32 end_lblk;
+ u64 desc_size_pos;
+ __le32 desc_size_disk;
+ u32 desc_size;
+ u64 desc_pos;
+ int err;
+
+ /*
+ * Descriptor size is in last 4 bytes of last allocated block.
+ * See ext4_write_verity_descriptor().
+ */
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ EXT4_ERROR_INODE(inode, "verity file doesn't use extents");
+ return -EFSCORRUPTED;
+ }
+
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ last_extent = path[path->p_depth].p_ext;
+ if (!last_extent) {
+ EXT4_ERROR_INODE(inode, "verity file has no extents");
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ return -EFSCORRUPTED;
+ }
+
+ end_lblk = le32_to_cpu(last_extent->ee_block) +
+ ext4_ext_get_actual_len(last_extent);
+ desc_size_pos = (u64)end_lblk << inode->i_blkbits;
+ ext4_ext_drop_refs(path);
+ kfree(path);
+
+ if (desc_size_pos < sizeof(desc_size_disk))
+ goto bad;
+ desc_size_pos -= sizeof(desc_size_disk);
+
+ err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk),
+ desc_size_pos);
+ if (err)
+ return err;
+ desc_size = le32_to_cpu(desc_size_disk);
+
+ /*
+ * The descriptor is stored just before the desc_size_disk, but starting
+ * on a filesystem block boundary.
+ */
+
+ if (desc_size > INT_MAX || desc_size > desc_size_pos)
+ goto bad;
+
+ desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode));
+ if (desc_pos < ext4_verity_metadata_pos(inode))
+ goto bad;
+
+ *desc_size_ret = desc_size;
+ *desc_pos_ret = desc_pos;
+ return 0;
+
+bad:
+ EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor");
+ return -EFSCORRUPTED;
+}
+
+static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
+ size_t buf_size)
+{
+ size_t desc_size = 0;
+ u64 desc_pos = 0;
+ int err;
+
+ err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos);
+ if (err)
+ return err;
+
+ if (buf_size) {
+ if (desc_size > buf_size)
+ return -ERANGE;
+ err = pagecache_read(inode, buf, desc_size, desc_pos);
+ if (err)
+ return err;
+ }
+ return desc_size;
+}
+
+static struct page *ext4_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index)
+{
+ index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
+
+ return read_mapping_page(inode->i_mapping, index, NULL);
+}
+
+static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
+ u64 index, int log_blocksize)
+{
+ loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize);
+
+ return pagecache_write(inode, buf, 1 << log_blocksize, pos);
+}
+
+const struct fsverity_operations ext4_verityops = {
+ .begin_enable_verity = ext4_begin_enable_verity,
+ .end_enable_verity = ext4_end_enable_verity,
+ .get_verity_descriptor = ext4_get_verity_descriptor,
+ .read_merkle_tree_page = ext4_read_merkle_tree_page,
+ .write_merkle_tree_block = ext4_write_merkle_tree_block,
+};
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 776c4b936504..2aaecc63834f 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -8,3 +8,4 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
+f2fs-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index abbf14e9bd72..54cad80acb7d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -74,6 +74,7 @@ static enum count_type __read_io_type(struct page *page)
enum bio_post_read_step {
STEP_INITIAL = 0,
STEP_DECRYPT,
+ STEP_VERITY,
};
struct bio_post_read_ctx {
@@ -120,8 +121,23 @@ static void decrypt_work(struct work_struct *work)
bio_post_read_processing(ctx);
}
+static void verity_work(struct work_struct *work)
+{
+ struct bio_post_read_ctx *ctx =
+ container_of(work, struct bio_post_read_ctx, work);
+
+ fsverity_verify_bio(ctx->bio);
+
+ bio_post_read_processing(ctx);
+}
+
static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
+ /*
+ * We use different work queues for decryption and for verity because
+ * verity may require reading metadata pages that need decryption, and
+ * we shouldn't recurse to the same workqueue.
+ */
switch (++ctx->cur_step) {
case STEP_DECRYPT:
if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
@@ -131,6 +147,14 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
}
ctx->cur_step++;
/* fall-through */
+ case STEP_VERITY:
+ if (ctx->enabled_steps & (1 << STEP_VERITY)) {
+ INIT_WORK(&ctx->work, verity_work);
+ fsverity_enqueue_verify_work(&ctx->work);
+ return;
+ }
+ ctx->cur_step++;
+ /* fall-through */
default:
__read_end_io(ctx->bio);
}
@@ -608,8 +632,15 @@ out:
up_write(&io->io_rwsem);
}
+static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
+{
+ return fsverity_active(inode) &&
+ idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+}
+
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages, unsigned op_flag)
+ unsigned nr_pages, unsigned op_flag,
+ pgoff_t first_idx)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
@@ -625,6 +656,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
+
+ if (f2fs_need_verity(inode, first_idx))
+ post_read_steps |= 1 << STEP_VERITY;
+
if (post_read_steps) {
ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
if (!ctx) {
@@ -646,7 +681,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
- bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
+ bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -1569,6 +1604,15 @@ out:
return ret;
}
+static inline loff_t f2fs_readpage_limit(struct inode *inode)
+{
+ if (IS_ENABLED(CONFIG_FS_VERITY) &&
+ (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
+ return inode->i_sb->s_maxbytes;
+
+ return i_size_read(inode);
+}
+
static int f2fs_read_single_page(struct inode *inode, struct page *page,
unsigned nr_pages,
struct f2fs_map_blocks *map,
@@ -1587,7 +1631,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
block_in_file = (sector_t)page_index(page);
last_block = block_in_file + nr_pages;
- last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
+ last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >>
blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
@@ -1632,6 +1676,11 @@ got_it:
} else {
zero_out:
zero_user_segment(page, 0, PAGE_SIZE);
+ if (f2fs_need_verity(inode, page->index) &&
+ !fsverity_verify_page(page)) {
+ ret = -EIO;
+ goto out;
+ }
if (!PageUptodate(page))
SetPageUptodate(page);
unlock_page(page);
@@ -1650,7 +1699,7 @@ submit_and_realloc:
}
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0);
+ is_readahead ? REQ_RAHEAD : 0, page->index);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
@@ -2052,7 +2101,7 @@ static int __write_data_page(struct page *page, bool *submitted,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
- if (page->index < end_index)
+ if (page->index < end_index || f2fs_verity_in_progress(inode))
goto write;
/*
@@ -2427,7 +2476,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);
- if (to > i_size) {
+ /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
+ if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
@@ -2458,7 +2508,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
* the block addresses when there is no need to fill the page.
*/
if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
- !is_inode_flag_set(inode, FI_NO_PREALLOC))
+ !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
+ !f2fs_verity_in_progress(inode))
return 0;
/* f2fs_lock_op avoids race between write CP and convert_inline_page */
@@ -2597,7 +2648,8 @@ repeat:
if (len == PAGE_SIZE || PageUptodate(page))
return 0;
- if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
+ if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
+ !f2fs_verity_in_progress(inode)) {
zero_user_segment(page, len, PAGE_SIZE);
return 0;
}
@@ -2660,7 +2712,8 @@ static int f2fs_write_end(struct file *file,
set_page_dirty(page);
- if (pos + copied > i_size_read(inode))
+ if (pos + copied > i_size_read(inode) &&
+ !f2fs_verity_in_progress(inode))
f2fs_i_size_write(inode, pos + copied);
unlock_out:
f2fs_put_page(page, 1);
@@ -3104,7 +3157,9 @@ void f2fs_clear_page_cache_dirty_tag(struct page *page)
int __init f2fs_init_post_read_processing(void)
{
- bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
+ bio_post_read_ctx_cache =
+ kmem_cache_create("f2fs_bio_post_read_ctx",
+ sizeof(struct bio_post_read_ctx), 0, 0, NULL);
if (!bio_post_read_ctx_cache)
goto fail;
bio_post_read_ctx_pool =
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 17382da7f0bd..7c5f121edac5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -25,6 +25,7 @@
#include <crypto/hash.h>
#include <linux/fscrypt.h>
+#include <linux/fsverity.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@@ -151,7 +152,7 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_FEATURE_INODE_CRTIME 0x0100
#define F2FS_FEATURE_LOST_FOUND 0x0200
-#define F2FS_FEATURE_VERITY 0x0400 /* reserved */
+#define F2FS_FEATURE_VERITY 0x0400
#define F2FS_FEATURE_SB_CHKSUM 0x0800
#define __F2FS_HAS_FEATURE(raw_super, mask) \
@@ -630,7 +631,7 @@ enum {
#define FADVISE_ENC_NAME_BIT 0x08
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
-#define FADVISE_VERITY_BIT 0x40 /* reserved */
+#define FADVISE_VERITY_BIT 0x40
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
@@ -650,6 +651,8 @@ enum {
#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT)
#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT)
#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT)
+#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
+#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
#define DEF_DIR_LEVEL 0
@@ -2412,6 +2415,7 @@ enum {
FI_PROJ_INHERIT, /* indicate file inherits projectid */
FI_PIN_FILE, /* indicate file should not be gced */
FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
+ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
};
static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -2451,6 +2455,12 @@ static inline void clear_inode_flag(struct inode *inode, int flag)
__mark_inode_dirty_flag(inode, flag, false);
}
+static inline bool f2fs_verity_in_progress(struct inode *inode)
+{
+ return IS_ENABLED(CONFIG_FS_VERITY) &&
+ is_inode_flag_set(inode, FI_VERITY_IN_PROGRESS);
+}
+
static inline void set_acl_inode(struct inode *inode, umode_t mode)
{
F2FS_I(inode)->i_acl_mode = mode;
@@ -3521,6 +3531,9 @@ void f2fs_exit_sysfs(void);
int f2fs_register_sysfs(struct f2fs_sb_info *sbi);
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi);
+/* verity.c */
+extern const struct fsverity_operations f2fs_verityops;
+
/*
* crypto support
*/
@@ -3543,7 +3556,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
*/
static inline bool f2fs_post_read_required(struct inode *inode)
{
- return f2fs_encrypted_file(inode);
+ return f2fs_encrypted_file(inode) || fsverity_active(inode);
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
@@ -3561,6 +3574,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR);
F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
+F2FS_FEATURE_FUNCS(verity, VERITY);
F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
#ifdef CONFIG_BLK_DEV_ZONED
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6a7349f9ac15..39fffc19e00c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -496,6 +496,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
+ err = fsverity_file_open(inode, filp);
+ if (err)
+ return err;
+
filp->f_mode |= FMODE_NOWAIT;
return dquot_file_open(inode, filp);
@@ -778,6 +782,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
return err;
+ err = fsverity_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
if (is_quota_modification(inode, attr)) {
err = dquot_initialize(inode);
if (err)
@@ -1705,7 +1713,8 @@ static const struct {
FS_PROJINHERIT_FL | \
FS_ENCRYPT_FL | \
FS_INLINE_DATA_FL | \
- FS_NOCOW_FL)
+ FS_NOCOW_FL | \
+ FS_VERITY_FL)
#define F2FS_SETTABLE_FS_FL ( \
FS_SYNC_FL | \
@@ -1750,6 +1759,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
if (IS_ENCRYPTED(inode))
fsflags |= FS_ENCRYPT_FL;
+ if (IS_VERITY(inode))
+ fsflags |= FS_VERITY_FL;
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
fsflags |= FS_INLINE_DATA_FL;
if (is_inode_flag_set(inode, FI_PIN_FILE))
@@ -3103,6 +3114,30 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
return ret;
}
+static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+
+ if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
+ f2fs_warn(F2FS_I_SB(inode),
+ "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem.\n",
+ inode->i_ino);
+ return -EOPNOTSUPP;
+ }
+
+ return fsverity_ioctl_enable(filp, (const void __user *)arg);
+}
+
+static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
+{
+ if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
+ return -EOPNOTSUPP;
+
+ return fsverity_ioctl_measure(filp, (void __user *)arg);
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3171,6 +3206,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_precache_extents(filp, arg);
case F2FS_IOC_RESIZE_FS:
return f2fs_ioc_resize_fs(filp, arg);
+ case FS_IOC_ENABLE_VERITY:
+ return f2fs_ioc_enable_verity(filp, arg);
+ case FS_IOC_MEASURE_VERITY:
+ return f2fs_ioc_measure_verity(filp, arg);
default:
return -ENOTTY;
}
@@ -3290,6 +3329,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_SET_PIN_FILE:
case F2FS_IOC_PRECACHE_EXTENTS:
case F2FS_IOC_RESIZE_FS:
+ case FS_IOC_ENABLE_VERITY:
+ case FS_IOC_MEASURE_VERITY:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index a33d7a849b2d..06da75d418e0 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -46,9 +46,11 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
if (file_is_encrypt(inode))
new_fl |= S_ENCRYPTED;
+ if (file_is_verity(inode))
+ new_fl |= S_VERITY;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
- S_ENCRYPTED);
+ S_ENCRYPTED|S_VERITY);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -733,6 +735,7 @@ no_delete:
}
out_clear:
fscrypt_put_encryption_info(inode);
+ fsverity_cleanup_inode(inode);
clear_inode(inode);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e15bd29bd453..f43befda0e1a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3146,6 +3146,9 @@ try_onemore:
#ifdef CONFIG_FS_ENCRYPTION
sb->s_cop = &f2fs_cryptops;
#endif
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &f2fs_verityops;
+#endif
sb->s_xattr = f2fs_xattr_handlers;
sb->s_export_op = &f2fs_export_ops;
sb->s_magic = F2FS_SUPER_MAGIC;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 3aeacd0aacfd..0cd64f994068 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -131,6 +131,9 @@ static ssize_t features_show(struct f2fs_attr *a,
if (f2fs_sb_has_lost_found(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "lost_found");
+ if (f2fs_sb_has_verity(sbi))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "verity");
if (f2fs_sb_has_sb_chksum(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "sb_checksum");
@@ -364,6 +367,7 @@ enum feat_id {
FEAT_QUOTA_INO,
FEAT_INODE_CRTIME,
FEAT_LOST_FOUND,
+ FEAT_VERITY,
FEAT_SB_CHECKSUM,
};
@@ -381,6 +385,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
case FEAT_QUOTA_INO:
case FEAT_INODE_CRTIME:
case FEAT_LOST_FOUND:
+ case FEAT_VERITY:
case FEAT_SB_CHECKSUM:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
@@ -470,6 +475,9 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
+#ifdef CONFIG_FS_VERITY
+F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY);
+#endif
F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
@@ -534,6 +542,9 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(quota_ino),
ATTR_LIST(inode_crtime),
ATTR_LIST(lost_found),
+#ifdef CONFIG_FS_VERITY
+ ATTR_LIST(verity),
+#endif
ATTR_LIST(sb_checksum),
NULL,
};
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
new file mode 100644
index 000000000000..a401ef72bc82
--- /dev/null
+++ b/fs/f2fs/verity.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/f2fs/verity.c: fs-verity support for f2fs
+ *
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * Implementation of fsverity_operations for f2fs.
+ *
+ * Like ext4, f2fs stores the verity metadata (Merkle tree and
+ * fsverity_descriptor) past the end of the file, starting at the first 64K
+ * boundary beyond i_size. This approach works because (a) verity files are
+ * readonly, and (b) pages fully beyond i_size aren't visible to userspace but
+ * can be read/written internally by f2fs with only some relatively small
+ * changes to f2fs. Extended attributes cannot be used because (a) f2fs limits
+ * the total size of an inode's xattr entries to 4096 bytes, which wouldn't be
+ * enough for even a single Merkle tree block, and (b) f2fs encryption doesn't
+ * encrypt xattrs, yet the verity metadata *must* be encrypted when the file is
+ * because it contains hashes of the plaintext data.
+ *
+ * Using a 64K boundary rather than a 4K one keeps things ready for
+ * architectures with 64K pages, and it doesn't necessarily waste space on-disk
+ * since there can be a hole between i_size and the start of the Merkle tree.
+ */
+
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "xattr.h"
+
+static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode)
+{
+ return round_up(inode->i_size, 65536);
+}
+
+/*
+ * Read some verity metadata from the inode. __vfs_read() can't be used because
+ * we need to read beyond i_size.
+ */
+static int pagecache_read(struct inode *inode, void *buf, size_t count,
+ loff_t pos)
+{
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *addr;
+
+ page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
+ NULL);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ addr = kmap_atomic(page);
+ memcpy(buf, addr + offset_in_page(pos), n);
+ kunmap_atomic(addr);
+
+ put_page(page);
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+/*
+ * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
+ * kernel_write() can't be used because the file descriptor is readonly.
+ */
+static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+ loff_t pos)
+{
+ if (pos + count > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+
+ while (count) {
+ size_t n = min_t(size_t, count,
+ PAGE_SIZE - offset_in_page(pos));
+ struct page *page;
+ void *fsdata;
+ void *addr;
+ int res;
+
+ res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+ &page, &fsdata);
+ if (res)
+ return res;
+
+ addr = kmap_atomic(page);
+ memcpy(addr + offset_in_page(pos), buf, n);
+ kunmap_atomic(addr);
+
+ res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
+ page, fsdata);
+ if (res < 0)
+ return res;
+ if (res != n)
+ return -EIO;
+
+ buf += n;
+ pos += n;
+ count -= n;
+ }
+ return 0;
+}
+
+/*
+ * Format of f2fs verity xattr. This points to the location of the verity
+ * descriptor within the file data rather than containing it directly because
+ * the verity descriptor *must* be encrypted when f2fs encryption is used. But,
+ * f2fs encryption does not encrypt xattrs.
+ */
+struct fsverity_descriptor_location {
+ __le32 version;
+ __le32 size;
+ __le64 pos;
+};
+
+static int f2fs_begin_enable_verity(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+ int err;
+
+ if (f2fs_verity_in_progress(inode))
+ return -EBUSY;
+
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+ return -EOPNOTSUPP;
+
+ /*
+ * Since the file was opened readonly, we have to initialize the quotas
+ * here and not rely on ->open() doing it. This must be done before
+ * evicting the inline data.
+ */
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+
+ set_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+ return 0;
+}
+
+static int f2fs_end_enable_verity(struct file *filp, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ struct inode *inode = file_inode(filp);
+ u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
+ struct fsverity_descriptor_location dloc = {
+ .version = cpu_to_le32(1),
+ .size = cpu_to_le32(desc_size),
+ .pos = cpu_to_le64(desc_pos),
+ };
+ int err = 0;
+
+ if (desc != NULL) {
+ /* Succeeded; write the verity descriptor. */
+ err = pagecache_write(inode, desc, desc_size, desc_pos);
+
+ /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
+ if (!err)
+ err = filemap_write_and_wait(inode->i_mapping);
+ }
+
+ /* If we failed, truncate anything we wrote past i_size. */
+ if (desc == NULL || err)
+ f2fs_truncate(inode);
+
+ clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+
+ if (desc != NULL && !err) {
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
+ F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
+ NULL, XATTR_CREATE);
+ if (!err) {
+ file_set_verity(inode);
+ f2fs_set_inode_flags(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+ }
+ }
+ return err;
+}
+
+static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
+ size_t buf_size)
+{
+ struct fsverity_descriptor_location dloc;
+ int res;
+ u32 size;
+ u64 pos;
+
+ /* Get the descriptor location */
+ res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_VERITY,
+ F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL);
+ if (res < 0 && res != -ERANGE)
+ return res;
+ if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) {
+ f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format");
+ return -EINVAL;
+ }
+ size = le32_to_cpu(dloc.size);
+ pos = le64_to_cpu(dloc.pos);
+
+ /* Get the descriptor */
+ if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes ||
+ pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) {
+ f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
+ return -EFSCORRUPTED;
+ }
+ if (buf_size) {
+ if (size > buf_size)
+ return -ERANGE;
+ res = pagecache_read(inode, buf, size, pos);
+ if (res)
+ return res;
+ }
+ return size;
+}
+
+static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index)
+{
+ index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
+
+ return read_mapping_page(inode->i_mapping, index, NULL);
+}
+
+static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
+ u64 index, int log_blocksize)
+{
+ loff_t pos = f2fs_verity_metadata_pos(inode) + (index << log_blocksize);
+
+ return pagecache_write(inode, buf, 1 << log_blocksize, pos);
+}
+
+const struct fsverity_operations f2fs_verityops = {
+ .begin_enable_verity = f2fs_begin_enable_verity,
+ .end_enable_verity = f2fs_end_enable_verity,
+ .get_verity_descriptor = f2fs_get_verity_descriptor,
+ .read_merkle_tree_page = f2fs_read_merkle_tree_page,
+ .write_merkle_tree_block = f2fs_write_merkle_tree_block,
+};
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index a90920e2f949..de0c600b9cab 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -34,8 +34,10 @@
#define F2FS_XATTR_INDEX_ADVISE 7
/* Should be same as EXT4_XATTR_INDEX_ENCRYPTION */
#define F2FS_XATTR_INDEX_ENCRYPTION 9
+#define F2FS_XATTR_INDEX_VERITY 11
#define F2FS_XATTR_NAME_ENCRYPTION_CONTEXT "c"
+#define F2FS_XATTR_NAME_VERITY "v"
struct f2fs_xattr_header {
__le32 h_magic; /* magic number for identification */
diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig
new file mode 100644
index 000000000000..88fb25119899
--- /dev/null
+++ b/fs/verity/Kconfig
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config FS_VERITY
+ bool "FS Verity (read-only file-based authenticity protection)"
+ select CRYPTO
+ # SHA-256 is selected as it's intended to be the default hash algorithm.
+ # To avoid bloat, other wanted algorithms must be selected explicitly.
+ select CRYPTO_SHA256
+ help
+ This option enables fs-verity. fs-verity is the dm-verity
+ mechanism implemented at the file level. On supported
+ filesystems (currently EXT4 and F2FS), userspace can use an
+ ioctl to enable verity for a file, which causes the filesystem
+ to build a Merkle tree for the file. The filesystem will then
+ transparently verify any data read from the file against the
+ Merkle tree. The file is also made read-only.
+
+ This serves as an integrity check, but the availability of the
+ Merkle tree root hash also allows efficiently supporting
+ various use cases where normally the whole file would need to
+ be hashed at once, such as: (a) auditing (logging the file's
+ hash), or (b) authenticity verification (comparing the hash
+ against a known good value, e.g. from a digital signature).
+
+ fs-verity is especially useful on large files where not all
+ the contents may actually be needed. Also, fs-verity verifies
+ data each time it is paged back in, which provides better
+ protection against malicious disks vs. an ahead-of-time hash.
+
+ If unsure, say N.
+
+config FS_VERITY_DEBUG
+ bool "FS Verity debugging"
+ depends on FS_VERITY
+ help
+ Enable debugging messages related to fs-verity by default.
+
+ Say N unless you are an fs-verity developer.
+
+config FS_VERITY_BUILTIN_SIGNATURES
+ bool "FS Verity builtin signature support"
+ depends on FS_VERITY
+ select SYSTEM_DATA_VERIFICATION
+ help
+ Support verifying signatures of verity files against the X.509
+ certificates that have been loaded into the ".fs-verity"
+ kernel keyring.
+
+ This is meant as a relatively simple mechanism that can be
+ used to provide an authenticity guarantee for verity files, as
+ an alternative to IMA appraisal. Userspace programs still
+ need to check that the verity bit is set in order to get an
+ authenticity guarantee.
+
+ If unsure, say N.
diff --git a/fs/verity/Makefile b/fs/verity/Makefile
new file mode 100644
index 000000000000..570e9136334d
--- /dev/null
+++ b/fs/verity/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_FS_VERITY) += enable.o \
+ hash_algs.o \
+ init.o \
+ measure.o \
+ open.o \
+ verify.o
+
+obj-$(CONFIG_FS_VERITY_BUILTIN_SIGNATURES) += signature.o
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
new file mode 100644
index 000000000000..eabc6ac19906
--- /dev/null
+++ b/fs/verity/enable.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/enable.c: ioctl to enable verity on a file
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <crypto/hash.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+
+static int build_merkle_tree_level(struct inode *inode, unsigned int level,
+ u64 num_blocks_to_hash,
+ const struct merkle_tree_params *params,
+ u8 *pending_hashes,
+ struct ahash_request *req)
+{
+ const struct fsverity_operations *vops = inode->i_sb->s_vop;
+ unsigned int pending_size = 0;
+ u64 dst_block_num;
+ u64 i;
+ int err;
+
+ if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */
+ return -EINVAL;
+
+ if (level < params->num_levels) {
+ dst_block_num = params->level_start[level];
+ } else {
+ if (WARN_ON(num_blocks_to_hash != 1))
+ return -EINVAL;
+ dst_block_num = 0; /* unused */
+ }
+
+ for (i = 0; i < num_blocks_to_hash; i++) {
+ struct page *src_page;
+
+ if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash)
+ pr_debug("Hashing block %llu of %llu for level %u\n",
+ i + 1, num_blocks_to_hash, level);
+
+ if (level == 0) {
+ /* Leaf: hashing a data block */
+ src_page = read_mapping_page(inode->i_mapping, i, NULL);
+ if (IS_ERR(src_page)) {
+ err = PTR_ERR(src_page);
+ fsverity_err(inode,
+ "Error %d reading data page %llu",
+ err, i);
+ return err;
+ }
+ } else {
+ /* Non-leaf: hashing hash block from level below */
+ src_page = vops->read_merkle_tree_page(inode,
+ params->level_start[level - 1] + i);
+ if (IS_ERR(src_page)) {
+ err = PTR_ERR(src_page);
+ fsverity_err(inode,
+ "Error %d reading Merkle tree page %llu",
+ err, params->level_start[level - 1] + i);
+ return err;
+ }
+ }
+
+ err = fsverity_hash_page(params, inode, req, src_page,
+ &pending_hashes[pending_size]);
+ put_page(src_page);
+ if (err)
+ return err;
+ pending_size += params->digest_size;
+
+ if (level == params->num_levels) /* Root hash? */
+ return 0;
+
+ if (pending_size + params->digest_size > params->block_size ||
+ i + 1 == num_blocks_to_hash) {
+ /* Flush the pending hash block */
+ memset(&pending_hashes[pending_size], 0,
+ params->block_size - pending_size);
+ err = vops->write_merkle_tree_block(inode,
+ pending_hashes,
+ dst_block_num,
+ params->log_blocksize);
+ if (err) {
+ fsverity_err(inode,
+ "Error %d writing Merkle tree block %llu",
+ err, dst_block_num);
+ return err;
+ }
+ dst_block_num++;
+ pending_size = 0;
+ }
+
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ cond_resched();
+ }
+ return 0;
+}
+
+/*
+ * Build the Merkle tree for the given inode using the given parameters, and
+ * return the root hash in @root_hash.
+ *
+ * The tree is written to a filesystem-specific location as determined by the
+ * ->write_merkle_tree_block() method. However, the blocks that comprise the
+ * tree are the same for all filesystems.
+ */
+static int build_merkle_tree(struct inode *inode,
+ const struct merkle_tree_params *params,
+ u8 *root_hash)
+{
+ u8 *pending_hashes;
+ struct ahash_request *req;
+ u64 blocks;
+ unsigned int level;
+ int err = -ENOMEM;
+
+ if (inode->i_size == 0) {
+ /* Empty file is a special case; root hash is all 0's */
+ memset(root_hash, 0, params->digest_size);
+ return 0;
+ }
+
+ pending_hashes = kmalloc(params->block_size, GFP_KERNEL);
+ req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL);
+ if (!pending_hashes || !req)
+ goto out;
+
+ /*
+ * Build each level of the Merkle tree, starting at the leaf level
+ * (level 0) and ascending to the root node (level 'num_levels - 1').
+ * Then at the end (level 'num_levels'), calculate the root hash.
+ */
+ blocks = (inode->i_size + params->block_size - 1) >>
+ params->log_blocksize;
+ for (level = 0; level <= params->num_levels; level++) {
+ err = build_merkle_tree_level(inode, level, blocks, params,
+ pending_hashes, req);
+ if (err)
+ goto out;
+ blocks = (blocks + params->hashes_per_block - 1) >>
+ params->log_arity;
+ }
+ memcpy(root_hash, pending_hashes, params->digest_size);
+ err = 0;
+out:
+ kfree(pending_hashes);
+ ahash_request_free(req);
+ return err;
+}
+
+static int enable_verity(struct file *filp,
+ const struct fsverity_enable_arg *arg)
+{
+ struct inode *inode = file_inode(filp);
+ const struct fsverity_operations *vops = inode->i_sb->s_vop;
+ struct merkle_tree_params params = { };
+ struct fsverity_descriptor *desc;
+ size_t desc_size = sizeof(*desc) + arg->sig_size;
+ struct fsverity_info *vi;
+ int err;
+
+ /* Start initializing the fsverity_descriptor */
+ desc = kzalloc(desc_size, GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+ desc->version = 1;
+ desc->hash_algorithm = arg->hash_algorithm;
+ desc->log_blocksize = ilog2(arg->block_size);
+
+ /* Get the salt if the user provided one */
+ if (arg->salt_size &&
+ copy_from_user(desc->salt,
+ (const u8 __user *)(uintptr_t)arg->salt_ptr,
+ arg->salt_size)) {
+ err = -EFAULT;
+ goto out;
+ }
+ desc->salt_size = arg->salt_size;
+
+ /* Get the signature if the user provided one */
+ if (arg->sig_size &&
+ copy_from_user(desc->signature,
+ (const u8 __user *)(uintptr_t)arg->sig_ptr,
+ arg->sig_size)) {
+ err = -EFAULT;
+ goto out;
+ }
+ desc->sig_size = cpu_to_le32(arg->sig_size);
+
+ desc->data_size = cpu_to_le64(inode->i_size);
+
+ /* Prepare the Merkle tree parameters */
+ err = fsverity_init_merkle_tree_params(&params, inode,
+ arg->hash_algorithm,
+ desc->log_blocksize,
+ desc->salt, desc->salt_size);
+ if (err)
+ goto out;
+
+ /*
+ * Start enabling verity on this file, serialized by the inode lock.
+ * Fail if verity is already enabled or is already being enabled.
+ */
+ inode_lock(inode);
+ if (IS_VERITY(inode))
+ err = -EEXIST;
+ else
+ err = vops->begin_enable_verity(filp);
+ inode_unlock(inode);
+ if (err)
+ goto out;
+
+ /*
+ * Build the Merkle tree. Don't hold the inode lock during this, since
+ * on huge files this may take a very long time and we don't want to
+ * force unrelated syscalls like chown() to block forever. We don't
+ * need the inode lock here because deny_write_access() already prevents
+ * the file from being written to or truncated, and we still serialize
+ * ->begin_enable_verity() and ->end_enable_verity() using the inode
+ * lock and only allow one process to be here at a time on a given file.
+ */
+ pr_debug("Building Merkle tree...\n");
+ BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
+ err = build_merkle_tree(inode, &params, desc->root_hash);
+ if (err) {
+ fsverity_err(inode, "Error %d building Merkle tree", err);
+ goto rollback;
+ }
+ pr_debug("Done building Merkle tree. Root hash is %s:%*phN\n",
+ params.hash_alg->name, params.digest_size, desc->root_hash);
+
+ /*
+ * Create the fsverity_info. Don't bother trying to save work by
+ * reusing the merkle_tree_params from above. Instead, just create the
+ * fsverity_info from the fsverity_descriptor as if it were just loaded
+ * from disk. This is simpler, and it serves as an extra check that the
+ * metadata we're writing is valid before actually enabling verity.
+ */
+ vi = fsverity_create_info(inode, desc, desc_size);
+ if (IS_ERR(vi)) {
+ err = PTR_ERR(vi);
+ goto rollback;
+ }
+
+ if (arg->sig_size)
+ pr_debug("Storing a %u-byte PKCS#7 signature alongside the file\n",
+ arg->sig_size);
+
+ /*
+ * Tell the filesystem to finish enabling verity on the file.
+ * Serialized with ->begin_enable_verity() by the inode lock.
+ */
+ inode_lock(inode);
+ err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
+ inode_unlock(inode);
+ if (err) {
+ fsverity_err(inode, "%ps() failed with err %d",
+ vops->end_enable_verity, err);
+ fsverity_free_info(vi);
+ } else if (WARN_ON(!IS_VERITY(inode))) {
+ err = -EINVAL;
+ fsverity_free_info(vi);
+ } else {
+ /* Successfully enabled verity */
+
+ /*
+ * Readers can start using ->i_verity_info immediately, so it
+ * can't be rolled back once set. So don't set it until just
+ * after the filesystem has successfully enabled verity.
+ */
+ fsverity_set_info(inode, vi);
+ }
+out:
+ kfree(params.hashstate);
+ kfree(desc);
+ return err;
+
+rollback:
+ inode_lock(inode);
+ (void)vops->end_enable_verity(filp, NULL, 0, params.tree_size);
+ inode_unlock(inode);
+ goto out;
+}
+
+/**
+ * fsverity_ioctl_enable() - enable verity on a file
+ *
+ * Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of
+ * Documentation/filesystems/fsverity.rst for the documentation.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
+{
+ struct inode *inode = file_inode(filp);
+ struct fsverity_enable_arg arg;
+ int err;
+
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ if (arg.version != 1)
+ return -EINVAL;
+
+ if (arg.__reserved1 ||
+ memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
+ return -EINVAL;
+
+ if (arg.block_size != PAGE_SIZE)
+ return -EINVAL;
+
+ if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt))
+ return -EMSGSIZE;
+
+ if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE)
+ return -EMSGSIZE;
+
+ /*
+ * Require a regular file with write access. But the actual fd must
+ * still be readonly so that we can lock out all writers. This is
+ * needed to guarantee that no writable fds exist to the file once it
+ * has verity enabled, and to stabilize the data being hashed.
+ */
+
+ err = inode_permission(inode, MAY_WRITE);
+ if (err)
+ return err;
+
+ if (IS_APPEND(inode))
+ return -EPERM;
+
+ if (S_ISDIR(inode->i_mode))
+ return -EISDIR;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ err = mnt_want_write_file(filp);
+ if (err) /* -EROFS */
+ return err;
+
+ err = deny_write_access(filp);
+ if (err) /* -ETXTBSY */
+ goto out_drop_write;
+
+ err = enable_verity(filp, &arg);
+ if (err)
+ goto out_allow_write_access;
+
+ /*
+ * Some pages of the file may have been evicted from pagecache after
+ * being used in the Merkle tree construction, then read into pagecache
+ * again by another process reading from the file concurrently. Since
+ * these pages didn't undergo verification against the file measurement
+ * which fs-verity now claims to be enforcing, we have to wipe the
+ * pagecache to ensure that all future reads are verified.
+ */
+ filemap_write_and_wait(inode->i_mapping);
+ invalidate_inode_pages2(inode->i_mapping);
+
+ /*
+ * allow_write_access() is needed to pair with deny_write_access().
+ * Regardless, the filesystem won't allow writing to verity files.
+ */
+out_allow_write_access:
+ allow_write_access(filp);
+out_drop_write:
+ mnt_drop_write_file(filp);
+ return err;
+}
+EXPORT_SYMBOL_GPL(fsverity_ioctl_enable);
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
new file mode 100644
index 000000000000..e74c79b64d88
--- /dev/null
+++ b/fs/verity/fsverity_private.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs-verity: read-only file-based authenticity protection
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef _FSVERITY_PRIVATE_H
+#define _FSVERITY_PRIVATE_H
+
+#ifdef CONFIG_FS_VERITY_DEBUG
+#define DEBUG
+#endif
+
+#define pr_fmt(fmt) "fs-verity: " fmt
+
+#include <crypto/sha.h>
+#include <linux/fsverity.h>
+
+struct ahash_request;
+
+/*
+ * Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty;
+ * it's enough for over U64_MAX bytes of data using SHA-256 and 4K blocks.
+ */
+#define FS_VERITY_MAX_LEVELS 8
+
+/*
+ * Largest digest size among all hash algorithms supported by fs-verity.
+ * Currently assumed to be <= size of fsverity_descriptor::root_hash.
+ */
+#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
+
+/* A hash algorithm supported by fs-verity */
+struct fsverity_hash_alg {
+ struct crypto_ahash *tfm; /* hash tfm, allocated on demand */
+ const char *name; /* crypto API name, e.g. sha256 */
+ unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */
+ unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */
+};
+
+/* Merkle tree parameters: hash algorithm, initial hash state, and topology */
+struct merkle_tree_params {
+ const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */
+ const u8 *hashstate; /* initial hash state or NULL */
+ unsigned int digest_size; /* same as hash_alg->digest_size */
+ unsigned int block_size; /* size of data and tree blocks */
+ unsigned int hashes_per_block; /* number of hashes per tree block */
+ unsigned int log_blocksize; /* log2(block_size) */
+ unsigned int log_arity; /* log2(hashes_per_block) */
+ unsigned int num_levels; /* number of levels in Merkle tree */
+ u64 tree_size; /* Merkle tree size in bytes */
+
+ /*
+ * Starting block index for each tree level, ordered from leaf level (0)
+ * to root level ('num_levels - 1')
+ */
+ u64 level_start[FS_VERITY_MAX_LEVELS];
+};
+
+/**
+ * fsverity_info - cached verity metadata for an inode
+ *
+ * When a verity file is first opened, an instance of this struct is allocated
+ * and stored in ->i_verity_info; it remains until the inode is evicted. It
+ * caches information about the Merkle tree that's needed to efficiently verify
+ * data read from the file. It also caches the file measurement. The Merkle
+ * tree pages themselves are not cached here, but the filesystem may cache them.
+ */
+struct fsverity_info {
+ struct merkle_tree_params tree_params;
+ u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
+ u8 measurement[FS_VERITY_MAX_DIGEST_SIZE];
+ const struct inode *inode;
+};
+
+/*
+ * Merkle tree properties. The file measurement is the hash of this structure
+ * excluding the signature and with the sig_size field set to 0.
+ */
+struct fsverity_descriptor {
+ __u8 version; /* must be 1 */
+ __u8 hash_algorithm; /* Merkle tree hash algorithm */
+ __u8 log_blocksize; /* log2 of size of data and tree blocks */
+ __u8 salt_size; /* size of salt in bytes; 0 if none */
+ __le32 sig_size; /* size of signature in bytes; 0 if none */
+ __le64 data_size; /* size of file the Merkle tree is built over */
+ __u8 root_hash[64]; /* Merkle tree root hash */
+ __u8 salt[32]; /* salt prepended to each hashed block */
+ __u8 __reserved[144]; /* must be 0's */
+ __u8 signature[]; /* optional PKCS#7 signature */
+};
+
+/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
+#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
+
+#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \
+ sizeof(struct fsverity_descriptor))
+
+/*
+ * Format in which verity file measurements are signed. This is the same as
+ * 'struct fsverity_digest', except here some magic bytes are prepended to
+ * provide some context about what is being signed in case the same key is used
+ * for non-fsverity purposes, and here the fields have fixed endianness.
+ */
+struct fsverity_signed_digest {
+ char magic[8]; /* must be "FSVerity" */
+ __le16 digest_algorithm;
+ __le16 digest_size;
+ __u8 digest[];
+};
+
+/* hash_algs.c */
+
+extern struct fsverity_hash_alg fsverity_hash_algs[];
+
+const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
+ unsigned int num);
+const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
+ const u8 *salt, size_t salt_size);
+int fsverity_hash_page(const struct merkle_tree_params *params,
+ const struct inode *inode,
+ struct ahash_request *req, struct page *page, u8 *out);
+int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
+ const void *data, size_t size, u8 *out);
+void __init fsverity_check_hash_algs(void);
+
+/* init.c */
+
+extern void __printf(3, 4) __cold
+fsverity_msg(const struct inode *inode, const char *level,
+ const char *fmt, ...);
+
+#define fsverity_warn(inode, fmt, ...) \
+ fsverity_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__)
+#define fsverity_err(inode, fmt, ...) \
+ fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
+
+/* open.c */
+
+int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
+ const struct inode *inode,
+ unsigned int hash_algorithm,
+ unsigned int log_blocksize,
+ const u8 *salt, size_t salt_size);
+
+struct fsverity_info *fsverity_create_info(const struct inode *inode,
+ void *desc, size_t desc_size);
+
+void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
+
+void fsverity_free_info(struct fsverity_info *vi);
+
+int __init fsverity_init_info_cache(void);
+void __init fsverity_exit_info_cache(void);
+
+/* signature.c */
+
+#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+int fsverity_verify_signature(const struct fsverity_info *vi,
+ const struct fsverity_descriptor *desc,
+ size_t desc_size);
+
+int __init fsverity_init_signature(void);
+#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
+static inline int
+fsverity_verify_signature(const struct fsverity_info *vi,
+ const struct fsverity_descriptor *desc,
+ size_t desc_size)
+{
+ return 0;
+}
+
+static inline int fsverity_init_signature(void)
+{
+ return 0;
+}
+#endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
+
+/* verify.c */
+
+int __init fsverity_init_workqueue(void);
+void __init fsverity_exit_workqueue(void);
+
+#endif /* _FSVERITY_PRIVATE_H */
diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
new file mode 100644
index 000000000000..31e6d7d2389a
--- /dev/null
+++ b/fs/verity/hash_algs.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/hash_algs.c: fs-verity hash algorithms
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <crypto/hash.h>
+#include <linux/scatterlist.h>
+
+/* The hash algorithms supported by fs-verity */
+struct fsverity_hash_alg fsverity_hash_algs[] = {
+ [FS_VERITY_HASH_ALG_SHA256] = {
+ .name = "sha256",
+ .digest_size = SHA256_DIGEST_SIZE,
+ .block_size = SHA256_BLOCK_SIZE,
+ },
+ [FS_VERITY_HASH_ALG_SHA512] = {
+ .name = "sha512",
+ .digest_size = SHA512_DIGEST_SIZE,
+ .block_size = SHA512_BLOCK_SIZE,
+ },
+};
+
+/**
+ * fsverity_get_hash_alg() - validate and prepare a hash algorithm
+ * @inode: optional inode for logging purposes
+ * @num: the hash algorithm number
+ *
+ * Get the struct fsverity_hash_alg for the given hash algorithm number, and
+ * ensure it has a hash transform ready to go. The hash transforms are
+ * allocated on-demand so that we don't waste resources unnecessarily, and
+ * because the crypto modules may be initialized later than fs/verity/.
+ *
+ * Return: pointer to the hash alg on success, else an ERR_PTR()
+ */
+const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
+ unsigned int num)
+{
+ struct fsverity_hash_alg *alg;
+ struct crypto_ahash *tfm;
+ int err;
+
+ if (num >= ARRAY_SIZE(fsverity_hash_algs) ||
+ !fsverity_hash_algs[num].name) {
+ fsverity_warn(inode, "Unknown hash algorithm number: %u", num);
+ return ERR_PTR(-EINVAL);
+ }
+ alg = &fsverity_hash_algs[num];
+
+ /* pairs with cmpxchg() below */
+ tfm = READ_ONCE(alg->tfm);
+ if (likely(tfm != NULL))
+ return alg;
+ /*
+ * Using the shash API would make things a bit simpler, but the ahash
+ * API is preferable as it allows the use of crypto accelerators.
+ */
+ tfm = crypto_alloc_ahash(alg->name, 0, 0);
+ if (IS_ERR(tfm)) {
+ if (PTR_ERR(tfm) == -ENOENT) {
+ fsverity_warn(inode,
+ "Missing crypto API support for hash algorithm \"%s\"",
+ alg->name);
+ return ERR_PTR(-ENOPKG);
+ }
+ fsverity_err(inode,
+ "Error allocating hash algorithm \"%s\": %ld",
+ alg->name, PTR_ERR(tfm));
+ return ERR_CAST(tfm);
+ }
+
+ err = -EINVAL;
+ if (WARN_ON(alg->digest_size != crypto_ahash_digestsize(tfm)))
+ goto err_free_tfm;
+ if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm)))
+ goto err_free_tfm;
+
+ pr_info("%s using implementation \"%s\"\n",
+ alg->name, crypto_ahash_driver_name(tfm));
+
+ /* pairs with READ_ONCE() above */
+ if (cmpxchg(&alg->tfm, NULL, tfm) != NULL)
+ crypto_free_ahash(tfm);
+
+ return alg;
+
+err_free_tfm:
+ crypto_free_ahash(tfm);
+ return ERR_PTR(err);
+}
+
+/**
+ * fsverity_prepare_hash_state() - precompute the initial hash state
+ * @alg: hash algorithm
+ * @salt: a salt which is to be prepended to all data to be hashed
+ * @salt_size: salt size in bytes, possibly 0
+ *
+ * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed
+ * initial hash state on success or an ERR_PTR() on failure.
+ */
+const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
+ const u8 *salt, size_t salt_size)
+{
+ u8 *hashstate = NULL;
+ struct ahash_request *req = NULL;
+ u8 *padded_salt = NULL;
+ size_t padded_salt_size;
+ struct scatterlist sg;
+ DECLARE_CRYPTO_WAIT(wait);
+ int err;
+
+ if (salt_size == 0)
+ return NULL;
+
+ hashstate = kmalloc(crypto_ahash_statesize(alg->tfm), GFP_KERNEL);
+ if (!hashstate)
+ return ERR_PTR(-ENOMEM);
+
+ req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ /*
+ * Zero-pad the salt to the next multiple of the input size of the hash
+ * algorithm's compression function, e.g. 64 bytes for SHA-256 or 128
+ * bytes for SHA-512. This ensures that the hash algorithm won't have
+ * any bytes buffered internally after processing the salt, thus making
+ * salted hashing just as fast as unsalted hashing.
+ */
+ padded_salt_size = round_up(salt_size, alg->block_size);
+ padded_salt = kzalloc(padded_salt_size, GFP_KERNEL);
+ if (!padded_salt) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+ memcpy(padded_salt, salt, salt_size);
+
+ sg_init_one(&sg, padded_salt, padded_salt_size);
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+ ahash_request_set_crypt(req, &sg, NULL, padded_salt_size);
+
+ err = crypto_wait_req(crypto_ahash_init(req), &wait);
+ if (err)
+ goto err_free;
+
+ err = crypto_wait_req(crypto_ahash_update(req), &wait);
+ if (err)
+ goto err_free;
+
+ err = crypto_ahash_export(req, hashstate);
+ if (err)
+ goto err_free;
+out:
+ ahash_request_free(req);
+ kfree(padded_salt);
+ return hashstate;
+
+err_free:
+ kfree(hashstate);
+ hashstate = ERR_PTR(err);
+ goto out;
+}
+
+/**
+ * fsverity_hash_page() - hash a single data or hash page
+ * @params: the Merkle tree's parameters
+ * @inode: inode for which the hashing is being done
+ * @req: preallocated hash request
+ * @page: the page to hash
+ * @out: output digest, size 'params->digest_size' bytes
+ *
+ * Hash a single data or hash block, assuming block_size == PAGE_SIZE.
+ * The hash is salted if a salt is specified in the Merkle tree parameters.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_hash_page(const struct merkle_tree_params *params,
+ const struct inode *inode,
+ struct ahash_request *req, struct page *page, u8 *out)
+{
+ struct scatterlist sg;
+ DECLARE_CRYPTO_WAIT(wait);
+ int err;
+
+ if (WARN_ON(params->block_size != PAGE_SIZE))
+ return -EINVAL;
+
+ sg_init_table(&sg, 1);
+ sg_set_page(&sg, page, PAGE_SIZE, 0);
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+ ahash_request_set_crypt(req, &sg, out, PAGE_SIZE);
+
+ if (params->hashstate) {
+ err = crypto_ahash_import(req, params->hashstate);
+ if (err) {
+ fsverity_err(inode,
+ "Error %d importing hash state", err);
+ return err;
+ }
+ err = crypto_ahash_finup(req);
+ } else {
+ err = crypto_ahash_digest(req);
+ }
+
+ err = crypto_wait_req(err, &wait);
+ if (err)
+ fsverity_err(inode, "Error %d computing page hash", err);
+ return err;
+}
+
+/**
+ * fsverity_hash_buffer() - hash some data
+ * @alg: the hash algorithm to use
+ * @data: the data to hash
+ * @size: size of data to hash, in bytes
+ * @out: output digest, size 'alg->digest_size' bytes
+ *
+ * Hash some data which is located in physically contiguous memory (i.e. memory
+ * allocated by kmalloc(), not by vmalloc()). No salt is used.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
+ const void *data, size_t size, u8 *out)
+{
+ struct ahash_request *req;
+ struct scatterlist sg;
+ DECLARE_CRYPTO_WAIT(wait);
+ int err;
+
+ req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ sg_init_one(&sg, data, size);
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+ ahash_request_set_crypt(req, &sg, out, size);
+
+ err = crypto_wait_req(crypto_ahash_digest(req), &wait);
+
+ ahash_request_free(req);
+ return err;
+}
+
+void __init fsverity_check_hash_algs(void)
+{
+ size_t i;
+
+ /*
+ * Sanity check the hash algorithms (could be a build-time check, but
+ * they're in an array)
+ */
+ for (i = 0; i < ARRAY_SIZE(fsverity_hash_algs); i++) {
+ const struct fsverity_hash_alg *alg = &fsverity_hash_algs[i];
+
+ if (!alg->name)
+ continue;
+
+ BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE);
+
+ /*
+ * For efficiency, the implementation currently assumes the
+ * digest and block sizes are powers of 2. This limitation can
+ * be lifted if the code is updated to handle other values.
+ */
+ BUG_ON(!is_power_of_2(alg->digest_size));
+ BUG_ON(!is_power_of_2(alg->block_size));
+ }
+}
diff --git a/fs/verity/init.c b/fs/verity/init.c
new file mode 100644
index 000000000000..94c104e00861
--- /dev/null
+++ b/fs/verity/init.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/init.c: fs-verity module initialization and logging
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <linux/ratelimit.h>
+
+void fsverity_msg(const struct inode *inode, const char *level,
+ const char *fmt, ...)
+{
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ struct va_format vaf;
+ va_list args;
+
+ if (!__ratelimit(&rs))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (inode)
+ printk("%sfs-verity (%s, inode %lu): %pV\n",
+ level, inode->i_sb->s_id, inode->i_ino, &vaf);
+ else
+ printk("%sfs-verity: %pV\n", level, &vaf);
+ va_end(args);
+}
+
+static int __init fsverity_init(void)
+{
+ int err;
+
+ fsverity_check_hash_algs();
+
+ err = fsverity_init_info_cache();
+ if (err)
+ return err;
+
+ err = fsverity_init_workqueue();
+ if (err)
+ goto err_exit_info_cache;
+
+ err = fsverity_init_signature();
+ if (err)
+ goto err_exit_workqueue;
+
+ pr_debug("Initialized fs-verity\n");
+ return 0;
+
+err_exit_workqueue:
+ fsverity_exit_workqueue();
+err_exit_info_cache:
+ fsverity_exit_info_cache();
+ return err;
+}
+late_initcall(fsverity_init)
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
new file mode 100644
index 000000000000..05049b68c745
--- /dev/null
+++ b/fs/verity/measure.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/measure.c: ioctl to get a verity file's measurement
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <linux/uaccess.h>
+
+/**
+ * fsverity_ioctl_measure() - get a verity file's measurement
+ *
+ * Retrieve the file measurement that the kernel is enforcing for reads from a
+ * verity file. See the "FS_IOC_MEASURE_VERITY" section of
+ * Documentation/filesystems/fsverity.rst for the documentation.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_ioctl_measure(struct file *filp, void __user *_uarg)
+{
+ const struct inode *inode = file_inode(filp);
+ struct fsverity_digest __user *uarg = _uarg;
+ const struct fsverity_info *vi;
+ const struct fsverity_hash_alg *hash_alg;
+ struct fsverity_digest arg;
+
+ vi = fsverity_get_info(inode);
+ if (!vi)
+ return -ENODATA; /* not a verity file */
+ hash_alg = vi->tree_params.hash_alg;
+
+ /*
+ * The user specifies the digest_size their buffer has space for; we can
+ * return the digest if it fits in the available space. We write back
+ * the actual size, which may be shorter than the user-specified size.
+ */
+
+ if (get_user(arg.digest_size, &uarg->digest_size))
+ return -EFAULT;
+ if (arg.digest_size < hash_alg->digest_size)
+ return -EOVERFLOW;
+
+ memset(&arg, 0, sizeof(arg));
+ arg.digest_algorithm = hash_alg - fsverity_hash_algs;
+ arg.digest_size = hash_alg->digest_size;
+
+ if (copy_to_user(uarg, &arg, sizeof(arg)))
+ return -EFAULT;
+
+ if (copy_to_user(uarg->digest, vi->measurement, hash_alg->digest_size))
+ return -EFAULT;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsverity_ioctl_measure);
diff --git a/fs/verity/open.c b/fs/verity/open.c
new file mode 100644
index 000000000000..63d1004b688c
--- /dev/null
+++ b/fs/verity/open.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/open.c: opening fs-verity files
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <linux/slab.h>
+
+static struct kmem_cache *fsverity_info_cachep;
+
+/**
+ * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters
+ * @params: the parameters struct to initialize
+ * @inode: the inode for which the Merkle tree is being built
+ * @hash_algorithm: number of hash algorithm to use
+ * @log_blocksize: log base 2 of block size to use
+ * @salt: pointer to salt (optional)
+ * @salt_size: size of salt, possibly 0
+ *
+ * Validate the hash algorithm and block size, then compute the tree topology
+ * (num levels, num blocks in each level, etc.) and initialize @params.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
+ const struct inode *inode,
+ unsigned int hash_algorithm,
+ unsigned int log_blocksize,
+ const u8 *salt, size_t salt_size)
+{
+ const struct fsverity_hash_alg *hash_alg;
+ int err;
+ u64 blocks;
+ u64 offset;
+ int level;
+
+ memset(params, 0, sizeof(*params));
+
+ hash_alg = fsverity_get_hash_alg(inode, hash_algorithm);
+ if (IS_ERR(hash_alg))
+ return PTR_ERR(hash_alg);
+ params->hash_alg = hash_alg;
+ params->digest_size = hash_alg->digest_size;
+
+ params->hashstate = fsverity_prepare_hash_state(hash_alg, salt,
+ salt_size);
+ if (IS_ERR(params->hashstate)) {
+ err = PTR_ERR(params->hashstate);
+ params->hashstate = NULL;
+ fsverity_err(inode, "Error %d preparing hash state", err);
+ goto out_err;
+ }
+
+ if (log_blocksize != PAGE_SHIFT) {
+ fsverity_warn(inode, "Unsupported log_blocksize: %u",
+ log_blocksize);
+ err = -EINVAL;
+ goto out_err;
+ }
+ params->log_blocksize = log_blocksize;
+ params->block_size = 1 << log_blocksize;
+
+ if (WARN_ON(!is_power_of_2(params->digest_size))) {
+ err = -EINVAL;
+ goto out_err;
+ }
+ if (params->block_size < 2 * params->digest_size) {
+ fsverity_warn(inode,
+ "Merkle tree block size (%u) too small for hash algorithm \"%s\"",
+ params->block_size, hash_alg->name);
+ err = -EINVAL;
+ goto out_err;
+ }
+ params->log_arity = params->log_blocksize - ilog2(params->digest_size);
+ params->hashes_per_block = 1 << params->log_arity;
+
+ pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n",
+ hash_alg->name, params->block_size, params->hashes_per_block,
+ (int)salt_size, salt);
+
+ /*
+ * Compute the number of levels in the Merkle tree and create a map from
+ * level to the starting block of that level. Level 'num_levels - 1' is
+ * the root and is stored first. Level 0 is the level directly "above"
+ * the data blocks and is stored last.
+ */
+
+ /* Compute number of levels and the number of blocks in each level */
+ blocks = (inode->i_size + params->block_size - 1) >> log_blocksize;
+ pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks);
+ while (blocks > 1) {
+ if (params->num_levels >= FS_VERITY_MAX_LEVELS) {
+ fsverity_err(inode, "Too many levels in Merkle tree");
+ err = -EINVAL;
+ goto out_err;
+ }
+ blocks = (blocks + params->hashes_per_block - 1) >>
+ params->log_arity;
+ /* temporarily using level_start[] to store blocks in level */
+ params->level_start[params->num_levels++] = blocks;
+ }
+
+ /* Compute the starting block of each level */
+ offset = 0;
+ for (level = (int)params->num_levels - 1; level >= 0; level--) {
+ blocks = params->level_start[level];
+ params->level_start[level] = offset;
+ pr_debug("Level %d is %llu blocks starting at index %llu\n",
+ level, blocks, offset);
+ offset += blocks;
+ }
+
+ params->tree_size = offset << log_blocksize;
+ return 0;
+
+out_err:
+ kfree(params->hashstate);
+ memset(params, 0, sizeof(*params));
+ return err;
+}
+
+/*
+ * Compute the file measurement by hashing the fsverity_descriptor excluding the
+ * signature and with the sig_size field set to 0.
+ */
+static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg,
+ struct fsverity_descriptor *desc,
+ u8 *measurement)
+{
+ __le32 sig_size = desc->sig_size;
+ int err;
+
+ desc->sig_size = 0;
+ err = fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), measurement);
+ desc->sig_size = sig_size;
+
+ return err;
+}
+
+/*
+ * Validate the given fsverity_descriptor and create a new fsverity_info from
+ * it. The signature (if present) is also checked.
+ */
+struct fsverity_info *fsverity_create_info(const struct inode *inode,
+ void *_desc, size_t desc_size)
+{
+ struct fsverity_descriptor *desc = _desc;
+ struct fsverity_info *vi;
+ int err;
+
+ if (desc_size < sizeof(*desc)) {
+ fsverity_err(inode, "Unrecognized descriptor size: %zu bytes",
+ desc_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (desc->version != 1) {
+ fsverity_err(inode, "Unrecognized descriptor version: %u",
+ desc->version);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) {
+ fsverity_err(inode, "Reserved bits set in descriptor");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (desc->salt_size > sizeof(desc->salt)) {
+ fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (le64_to_cpu(desc->data_size) != inode->i_size) {
+ fsverity_err(inode,
+ "Wrong data_size: %llu (desc) != %lld (inode)",
+ le64_to_cpu(desc->data_size), inode->i_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL);
+ if (!vi)
+ return ERR_PTR(-ENOMEM);
+ vi->inode = inode;
+
+ err = fsverity_init_merkle_tree_params(&vi->tree_params, inode,
+ desc->hash_algorithm,
+ desc->log_blocksize,
+ desc->salt, desc->salt_size);
+ if (err) {
+ fsverity_err(inode,
+ "Error %d initializing Merkle tree parameters",
+ err);
+ goto out;
+ }
+
+ memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size);
+
+ err = compute_file_measurement(vi->tree_params.hash_alg, desc,
+ vi->measurement);
+ if (err) {
+ fsverity_err(inode, "Error %d computing file measurement", err);
+ goto out;
+ }
+ pr_debug("Computed file measurement: %s:%*phN\n",
+ vi->tree_params.hash_alg->name,
+ vi->tree_params.digest_size, vi->measurement);
+
+ err = fsverity_verify_signature(vi, desc, desc_size);
+out:
+ if (err) {
+ fsverity_free_info(vi);
+ vi = ERR_PTR(err);
+ }
+ return vi;
+}
+
+void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
+{
+ /*
+ * Multiple processes may race to set ->i_verity_info, so use cmpxchg.
+ * This pairs with the READ_ONCE() in fsverity_get_info().
+ */
+ if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL)
+ fsverity_free_info(vi);
+}
+
+void fsverity_free_info(struct fsverity_info *vi)
+{
+ if (!vi)
+ return;
+ kfree(vi->tree_params.hashstate);
+ kmem_cache_free(fsverity_info_cachep, vi);
+}
+
+/* Ensure the inode has an ->i_verity_info */
+static int ensure_verity_info(struct inode *inode)
+{
+ struct fsverity_info *vi = fsverity_get_info(inode);
+ struct fsverity_descriptor *desc;
+ int res;
+
+ if (vi)
+ return 0;
+
+ res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0);
+ if (res < 0) {
+ fsverity_err(inode,
+ "Error %d getting verity descriptor size", res);
+ return res;
+ }
+ if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
+ fsverity_err(inode, "Verity descriptor is too large (%d bytes)",
+ res);
+ return -EMSGSIZE;
+ }
+ desc = kmalloc(res, GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+ res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res);
+ if (res < 0) {
+ fsverity_err(inode, "Error %d reading verity descriptor", res);
+ goto out_free_desc;
+ }
+
+ vi = fsverity_create_info(inode, desc, res);
+ if (IS_ERR(vi)) {
+ res = PTR_ERR(vi);
+ goto out_free_desc;
+ }
+
+ fsverity_set_info(inode, vi);
+ res = 0;
+out_free_desc:
+ kfree(desc);
+ return res;
+}
+
+/**
+ * fsverity_file_open() - prepare to open a verity file
+ * @inode: the inode being opened
+ * @filp: the struct file being set up
+ *
+ * When opening a verity file, deny the open if it is for writing. Otherwise,
+ * set up the inode's ->i_verity_info if not already done.
+ *
+ * When combined with fscrypt, this must be called after fscrypt_file_open().
+ * Otherwise, we won't have the key set up to decrypt the verity metadata.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_file_open(struct inode *inode, struct file *filp)
+{
+ if (!IS_VERITY(inode))
+ return 0;
+
+ if (filp->f_mode & FMODE_WRITE) {
+ pr_debug("Denying opening verity file (ino %lu) for write\n",
+ inode->i_ino);
+ return -EPERM;
+ }
+
+ return ensure_verity_info(inode);
+}
+EXPORT_SYMBOL_GPL(fsverity_file_open);
+
+/**
+ * fsverity_prepare_setattr() - prepare to change a verity inode's attributes
+ * @dentry: dentry through which the inode is being changed
+ * @attr: attributes to change
+ *
+ * Verity files are immutable, so deny truncates. This isn't covered by the
+ * open-time check because sys_truncate() takes a path, not a file descriptor.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) {
+ pr_debug("Denying truncate of verity file (ino %lu)\n",
+ d_inode(dentry)->i_ino);
+ return -EPERM;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsverity_prepare_setattr);
+
+/**
+ * fsverity_cleanup_inode() - free the inode's verity info, if present
+ *
+ * Filesystems must call this on inode eviction to free ->i_verity_info.
+ */
+void fsverity_cleanup_inode(struct inode *inode)
+{
+ fsverity_free_info(inode->i_verity_info);
+ inode->i_verity_info = NULL;
+}
+EXPORT_SYMBOL_GPL(fsverity_cleanup_inode);
+
+int __init fsverity_init_info_cache(void)
+{
+ fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
+ SLAB_RECLAIM_ACCOUNT,
+ measurement);
+ if (!fsverity_info_cachep)
+ return -ENOMEM;
+ return 0;
+}
+
+void __init fsverity_exit_info_cache(void)
+{
+ kmem_cache_destroy(fsverity_info_cachep);
+ fsverity_info_cachep = NULL;
+}
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
new file mode 100644
index 000000000000..c8b255232de5
--- /dev/null
+++ b/fs/verity/signature.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/signature.c: verification of builtin signatures
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <linux/cred.h>
+#include <linux/key.h>
+#include <linux/slab.h>
+#include <linux/verification.h>
+
+/*
+ * /proc/sys/fs/verity/require_signatures
+ * If 1, all verity files must have a valid builtin signature.
+ */
+static int fsverity_require_signatures;
+
+/*
+ * Keyring that contains the trusted X.509 certificates.
+ *
+ * Only root (kuid=0) can modify this. Also, root may use
+ * keyctl_restrict_keyring() to prevent any more additions.
+ */
+static struct key *fsverity_keyring;
+
+/**
+ * fsverity_verify_signature() - check a verity file's signature
+ *
+ * If the file's fs-verity descriptor includes a signature of the file
+ * measurement, verify it against the certificates in the fs-verity keyring.
+ *
+ * Return: 0 on success (signature valid or not required); -errno on failure
+ */
+int fsverity_verify_signature(const struct fsverity_info *vi,
+ const struct fsverity_descriptor *desc,
+ size_t desc_size)
+{
+ const struct inode *inode = vi->inode;
+ const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg;
+ const u32 sig_size = le32_to_cpu(desc->sig_size);
+ struct fsverity_signed_digest *d;
+ int err;
+
+ if (sig_size == 0) {
+ if (fsverity_require_signatures) {
+ fsverity_err(inode,
+ "require_signatures=1, rejecting unsigned file!");
+ return -EPERM;
+ }
+ return 0;
+ }
+
+ if (sig_size > desc_size - sizeof(*desc)) {
+ fsverity_err(inode, "Signature overflows verity descriptor");
+ return -EBADMSG;
+ }
+
+ d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+ memcpy(d->magic, "FSVerity", 8);
+ d->digest_algorithm = cpu_to_le16(hash_alg - fsverity_hash_algs);
+ d->digest_size = cpu_to_le16(hash_alg->digest_size);
+ memcpy(d->digest, vi->measurement, hash_alg->digest_size);
+
+ err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size,
+ desc->signature, sig_size,
+ fsverity_keyring,
+ VERIFYING_UNSPECIFIED_SIGNATURE,
+ NULL, NULL);
+ kfree(d);
+
+ if (err) {
+ if (err == -ENOKEY)
+ fsverity_err(inode,
+ "File's signing cert isn't in the fs-verity keyring");
+ else if (err == -EKEYREJECTED)
+ fsverity_err(inode, "Incorrect file signature");
+ else if (err == -EBADMSG)
+ fsverity_err(inode, "Malformed file signature");
+ else
+ fsverity_err(inode, "Error %d verifying file signature",
+ err);
+ return err;
+ }
+
+ pr_debug("Valid signature for file measurement %s:%*phN\n",
+ hash_alg->name, hash_alg->digest_size, vi->measurement);
+ return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *fsverity_sysctl_header;
+
+static const struct ctl_path fsverity_sysctl_path[] = {
+ { .procname = "fs", },
+ { .procname = "verity", },
+ { }
+};
+
+static struct ctl_table fsverity_sysctl_table[] = {
+ {
+ .procname = "require_signatures",
+ .data = &fsverity_require_signatures,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ { }
+};
+
+static int __init fsverity_sysctl_init(void)
+{
+ fsverity_sysctl_header = register_sysctl_paths(fsverity_sysctl_path,
+ fsverity_sysctl_table);
+ if (!fsverity_sysctl_header) {
+ pr_err("sysctl registration failed!\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+#else /* !CONFIG_SYSCTL */
+static inline int __init fsverity_sysctl_init(void)
+{
+ return 0;
+}
+#endif /* !CONFIG_SYSCTL */
+
+int __init fsverity_init_signature(void)
+{
+ struct key *ring;
+ int err;
+
+ ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
+ current_cred(), KEY_POS_SEARCH |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE |
+ KEY_USR_SEARCH | KEY_USR_SETATTR,
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ if (IS_ERR(ring))
+ return PTR_ERR(ring);
+
+ err = fsverity_sysctl_init();
+ if (err)
+ goto err_put_ring;
+
+ fsverity_keyring = ring;
+ return 0;
+
+err_put_ring:
+ key_put(ring);
+ return err;
+}
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
new file mode 100644
index 000000000000..3e8f2de44667
--- /dev/null
+++ b/fs/verity/verify.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/verify.c: data verification functions, i.e. hooks for ->readpages()
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <crypto/hash.h>
+#include <linux/bio.h>
+#include <linux/ratelimit.h>
+
+static struct workqueue_struct *fsverity_read_workqueue;
+
+/**
+ * hash_at_level() - compute the location of the block's hash at the given level
+ *
+ * @params: (in) the Merkle tree parameters
+ * @dindex: (in) the index of the data block being verified
+ * @level: (in) the level of hash we want (0 is leaf level)
+ * @hindex: (out) the index of the hash block containing the wanted hash
+ * @hoffset: (out) the byte offset to the wanted hash within the hash block
+ */
+static void hash_at_level(const struct merkle_tree_params *params,
+ pgoff_t dindex, unsigned int level, pgoff_t *hindex,
+ unsigned int *hoffset)
+{
+ pgoff_t position;
+
+ /* Offset of the hash within the level's region, in hashes */
+ position = dindex >> (level * params->log_arity);
+
+ /* Index of the hash block in the tree overall */
+ *hindex = params->level_start[level] + (position >> params->log_arity);
+
+ /* Offset of the wanted hash (in bytes) within the hash block */
+ *hoffset = (position & ((1 << params->log_arity) - 1)) <<
+ (params->log_blocksize - params->log_arity);
+}
+
+/* Extract a hash from a hash page */
+static void extract_hash(struct page *hpage, unsigned int hoffset,
+ unsigned int hsize, u8 *out)
+{
+ void *virt = kmap_atomic(hpage);
+
+ memcpy(out, virt + hoffset, hsize);
+ kunmap_atomic(virt);
+}
+
+static inline int cmp_hashes(const struct fsverity_info *vi,
+ const u8 *want_hash, const u8 *real_hash,
+ pgoff_t index, int level)
+{
+ const unsigned int hsize = vi->tree_params.digest_size;
+
+ if (memcmp(want_hash, real_hash, hsize) == 0)
+ return 0;
+
+ fsverity_err(vi->inode,
+ "FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
+ index, level,
+ vi->tree_params.hash_alg->name, hsize, want_hash,
+ vi->tree_params.hash_alg->name, hsize, real_hash);
+ return -EBADMSG;
+}
+
+/*
+ * Verify a single data page against the file's Merkle tree.
+ *
+ * In principle, we need to verify the entire path to the root node. However,
+ * for efficiency the filesystem may cache the hash pages. Therefore we need
+ * only ascend the tree until an already-verified page is seen, as indicated by
+ * the PageChecked bit being set; then verify the path to that page.
+ *
+ * This code currently only supports the case where the verity block size is
+ * equal to PAGE_SIZE. Doing otherwise would be possible but tricky, since we
+ * wouldn't be able to use the PageChecked bit.
+ *
+ * Note that multiple processes may race to verify a hash page and mark it
+ * Checked, but it doesn't matter; the result will be the same either way.
+ *
+ * Return: true if the page is valid, else false.
+ */
+static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
+ struct ahash_request *req, struct page *data_page)
+{
+ const struct merkle_tree_params *params = &vi->tree_params;
+ const unsigned int hsize = params->digest_size;
+ const pgoff_t index = data_page->index;
+ int level;
+ u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
+ const u8 *want_hash;
+ u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
+ struct page *hpages[FS_VERITY_MAX_LEVELS];
+ unsigned int hoffsets[FS_VERITY_MAX_LEVELS];
+ int err;
+
+ if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page)))
+ return false;
+
+ pr_debug_ratelimited("Verifying data page %lu...\n", index);
+
+ /*
+ * Starting at the leaf level, ascend the tree saving hash pages along
+ * the way until we find a verified hash page, indicated by PageChecked;
+ * or until we reach the root.
+ */
+ for (level = 0; level < params->num_levels; level++) {
+ pgoff_t hindex;
+ unsigned int hoffset;
+ struct page *hpage;
+
+ hash_at_level(params, index, level, &hindex, &hoffset);
+
+ pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n",
+ level, hindex, hoffset);
+
+ hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
+ hindex);
+ if (IS_ERR(hpage)) {
+ err = PTR_ERR(hpage);
+ fsverity_err(inode,
+ "Error %d reading Merkle tree page %lu",
+ err, hindex);
+ goto out;
+ }
+
+ if (PageChecked(hpage)) {
+ extract_hash(hpage, hoffset, hsize, _want_hash);
+ want_hash = _want_hash;
+ put_page(hpage);
+ pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n",
+ params->hash_alg->name,
+ hsize, want_hash);
+ goto descend;
+ }
+ pr_debug_ratelimited("Hash page not yet checked\n");
+ hpages[level] = hpage;
+ hoffsets[level] = hoffset;
+ }
+
+ want_hash = vi->root_hash;
+ pr_debug("Want root hash: %s:%*phN\n",
+ params->hash_alg->name, hsize, want_hash);
+descend:
+ /* Descend the tree verifying hash pages */
+ for (; level > 0; level--) {
+ struct page *hpage = hpages[level - 1];
+ unsigned int hoffset = hoffsets[level - 1];
+
+ err = fsverity_hash_page(params, inode, req, hpage, real_hash);
+ if (err)
+ goto out;
+ err = cmp_hashes(vi, want_hash, real_hash, index, level - 1);
+ if (err)
+ goto out;
+ SetPageChecked(hpage);
+ extract_hash(hpage, hoffset, hsize, _want_hash);
+ want_hash = _want_hash;
+ put_page(hpage);
+ pr_debug("Verified hash page at level %d, now want %s:%*phN\n",
+ level - 1, params->hash_alg->name, hsize, want_hash);
+ }
+
+ /* Finally, verify the data page */
+ err = fsverity_hash_page(params, inode, req, data_page, real_hash);
+ if (err)
+ goto out;
+ err = cmp_hashes(vi, want_hash, real_hash, index, -1);
+out:
+ for (; level > 0; level--)
+ put_page(hpages[level - 1]);
+
+ return err == 0;
+}
+
+/**
+ * fsverity_verify_page() - verify a data page
+ *
+ * Verify a page that has just been read from a verity file. The page must be a
+ * pagecache page that is still locked and not yet uptodate.
+ *
+ * Return: true if the page is valid, else false.
+ */
+bool fsverity_verify_page(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ const struct fsverity_info *vi = inode->i_verity_info;
+ struct ahash_request *req;
+ bool valid;
+
+ req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
+ if (unlikely(!req))
+ return false;
+
+ valid = verify_page(inode, vi, req, page);
+
+ ahash_request_free(req);
+
+ return valid;
+}
+EXPORT_SYMBOL_GPL(fsverity_verify_page);
+
+#ifdef CONFIG_BLOCK
+/**
+ * fsverity_verify_bio() - verify a 'read' bio that has just completed
+ *
+ * Verify a set of pages that have just been read from a verity file. The pages
+ * must be pagecache pages that are still locked and not yet uptodate. Pages
+ * that fail verification are set to the Error state. Verification is skipped
+ * for pages already in the Error state, e.g. due to fscrypt decryption failure.
+ *
+ * This is a helper function for use by the ->readpages() method of filesystems
+ * that issue bios to read data directly into the page cache. Filesystems that
+ * populate the page cache without issuing bios (e.g. non block-based
+ * filesystems) must instead call fsverity_verify_page() directly on each page.
+ * All filesystems must also call fsverity_verify_page() on holes.
+ */
+void fsverity_verify_bio(struct bio *bio)
+{
+ struct inode *inode = bio_first_page_all(bio)->mapping->host;
+ const struct fsverity_info *vi = inode->i_verity_info;
+ struct ahash_request *req;
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+
+ req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
+ if (unlikely(!req)) {
+ bio_for_each_segment_all(bv, bio, iter_all)
+ SetPageError(bv->bv_page);
+ return;
+ }
+
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ struct page *page = bv->bv_page;
+
+ if (!PageError(page) && !verify_page(inode, vi, req, page))
+ SetPageError(page);
+ }
+
+ ahash_request_free(req);
+}
+EXPORT_SYMBOL_GPL(fsverity_verify_bio);
+#endif /* CONFIG_BLOCK */
+
+/**
+ * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
+ *
+ * Enqueue verification work for asynchronous processing.
+ */
+void fsverity_enqueue_verify_work(struct work_struct *work)
+{
+ queue_work(fsverity_read_workqueue, work);
+}
+EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
+
+int __init fsverity_init_workqueue(void)
+{
+ /*
+ * Use an unbound workqueue to allow bios to be verified in parallel
+ * even when they happen to complete on the same CPU. This sacrifices
+ * locality, but it's worthwhile since hashing is CPU-intensive.
+ *
+ * Also use a high-priority workqueue to prioritize verification work,
+ * which blocks reads from completing, over regular application tasks.
+ */
+ fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
+ WQ_UNBOUND | WQ_HIGHPRI,
+ num_online_cpus());
+ if (!fsverity_read_workqueue)
+ return -ENOMEM;
+ return 0;
+}
+
+void __init fsverity_exit_workqueue(void)
+{
+ destroy_workqueue(fsverity_read_workqueue);
+ fsverity_read_workqueue = NULL;
+}