diff options
Diffstat (limited to 'fs')
41 files changed, 539 insertions, 377 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c index cefa222f7881..8daeed31e1af 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -880,7 +880,7 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; loff_t isize = inode->i_size; int res; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8aeaada1fcae..3aa04224315e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -367,7 +367,14 @@ error: btrfs_print_tree(eb, 0); btrfs_err(fs_info, "block=%llu write time tree block corruption detected", eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + /* + * Be noisy if this is an extent buffer from a log tree. We don't abort + * a transaction in case there's a bad log tree extent buffer, we just + * fallback to a transaction commit. Still we want to know when there is + * a bad log tree extent buffer, as that may signal a bug somewhere. + */ + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || + btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 834bbcb91102..af046d22300e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3541,6 +3541,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *extent; u64 extent_end; + u8 type; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); @@ -3596,10 +3597,16 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); + type = btrfs_file_extent_type(leaf, extent); - if (btrfs_file_extent_disk_bytenr(leaf, extent) == 0 || - btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_PREALLOC) { + /* + * Can't access the extent's disk_bytenr field if this is an + * inline extent, since at that offset, it's where the extent + * data starts. + */ + if (type == BTRFS_FILE_EXTENT_PREALLOC || + (type == BTRFS_FILE_EXTENT_REG && + btrfs_file_extent_disk_bytenr(leaf, extent) == 0)) { /* * Explicit hole or prealloc extent, search for delalloc. * A prealloc extent is treated like a hole. diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index a749367e5ae2..37b86acfcbcf 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -119,6 +119,12 @@ enum { /* Indicate that we want to commit the transaction. */ BTRFS_FS_NEED_TRANS_COMMIT, + /* + * Indicate metadata over-commit is disabled. This is set when active + * zone tracking is needed. + */ + BTRFS_FS_NO_OVERCOMMIT, + #if BITS_PER_LONG == 32 /* Indicate if we have error/warn message printed on 32bit systems */ BTRFS_FS_32BIT_ERROR, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d275bf24b250..af97413abcf4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2765,9 +2765,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) /* * Old roots should be searched when inserting qgroup - * extent record + * extent record. + * + * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case, + * we may have some record inserted during + * NO_ACCOUNTING (thus no old_roots populated), but + * later we start rescan, which clears NO_ACCOUNTING, + * leaving some inserted records without old_roots + * populated. + * + * Those cases are rare and should not cause too much + * time spent during commit_transaction(). */ - if (WARN_ON(!record->old_roots)) { + if (!record->old_roots) { /* Search commit root to find old_roots */ ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) @@ -3357,6 +3367,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; bool stopped = false; + bool did_leaf_rescans = false; path = btrfs_alloc_path(); if (!path) @@ -3377,6 +3388,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) } err = qgroup_rescan_leaf(trans, path); + did_leaf_rescans = true; if (err > 0) btrfs_commit_transaction(trans); @@ -3397,16 +3409,23 @@ out: mutex_unlock(&fs_info->qgroup_rescan_lock); /* - * only update status, since the previous part has already updated the - * qgroup info. + * Only update status, since the previous part has already updated the + * qgroup info, and only if we did any actual work. This also prevents + * race with a concurrent quota disable, which has already set + * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at + * btrfs_quota_disable(). */ - trans = btrfs_start_transaction(fs_info->quota_root, 1); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); + if (did_leaf_rescans) { + trans = btrfs_start_transaction(fs_info->quota_root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + btrfs_err(fs_info, + "fail to start transaction for status update: %d", + err); + } + } else { trans = NULL; - btrfs_err(fs_info, - "fail to start transaction for status update: %d", - err); } mutex_lock(&fs_info->qgroup_rescan_lock); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d28ee4e36f3d..69c09508afb5 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -407,7 +407,8 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) + if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) && + (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) avail = 0; else avail = calc_available_free_space(fs_info, space_info, flush); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fb52aa060093..d43261545264 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2980,7 +2980,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = 0; if (ret) { blk_finish_plug(&plug); - btrfs_abort_transaction(trans, ret); btrfs_set_log_full_commit(trans); mutex_unlock(&root->log_mutex); goto out; @@ -3045,15 +3044,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, blk_finish_plug(&plug); btrfs_set_log_full_commit(trans); - - if (ret != -ENOSPC) { - btrfs_abort_transaction(trans, ret); - mutex_unlock(&log_root_tree->log_mutex); - goto out; - } + if (ret != -ENOSPC) + btrfs_err(fs_info, + "failed to update log for root %llu ret %d", + root->root_key.objectid, ret); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); - ret = BTRFS_LOG_FORCE_COMMIT; goto out; } @@ -3112,7 +3108,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out_wake_log_root; } else if (ret) { btrfs_set_log_full_commit(trans); - btrfs_abort_transaction(trans, ret); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } @@ -3826,7 +3821,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->slots[0]); if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; } + goto done; } @@ -3846,19 +3844,34 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, */ if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; + goto done; } + btrfs_release_path(path); /* - * Find the first key from this transaction again. See the note for - * log_new_dir_dentries, if we're logging a directory recursively we - * won't be holding its i_mutex, which means we can modify the directory - * while we're logging it. If we remove an entry between our first - * search and this search we'll not find the key again and can just - * bail. + * Find the first key from this transaction again or the one we were at + * in the loop below in case we had to reschedule. We may be logging the + * directory without holding its VFS lock, which happen when logging new + * dentries (through log_new_dir_dentries()) or in some cases when we + * need to log the parent directory of an inode. This means a dir index + * key might be deleted from the inode's root, and therefore we may not + * find it anymore. If we can't find it, just move to the next key. We + * can not bail out and ignore, because if we do that we will simply + * not log dir index keys that come after the one that was just deleted + * and we can end up logging a dir index range that ends at (u64)-1 + * (@last_offset is initialized to that), resulting in removing dir + * entries we should not remove at log replay time. */ search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret > 0) + ret = btrfs_next_item(root, path); + if (ret < 0) + err = ret; + /* If ret is 1, there are no more keys in the inode's root. */ if (ret != 0) goto done; @@ -5580,8 +5593,10 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, * LOG_INODE_EXISTS mode) and slow down other fsyncs or transaction * commits. */ - if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) + if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) { + btrfs_set_log_full_commit(trans); return BTRFS_LOG_FORCE_COMMIT; + } inode = btrfs_iget(root->fs_info->sb, ino, root); /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index aa25fa335d3e..bcfef75b97da 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -768,8 +768,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, BTRFS_SUPER_FLAG_CHANGING_FSID_V2); error = lookup_bdev(path, &path_devt); - if (error) + if (error) { + btrfs_err(NULL, "failed to lookup block device for path %s: %d", + path, error); return ERR_PTR(error); + } if (fsid_change_in_progress) { if (!has_metadata_uuid) @@ -836,6 +839,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, unsigned int nofs_flag; if (fs_devices->opened) { + btrfs_err(NULL, + "device %s belongs to fsid %pU, and the fs is already mounted", + path, fs_devices->fsid); mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); } @@ -905,6 +911,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, * generation are equal. */ mutex_unlock(&fs_devices->device_list_mutex); + btrfs_err(NULL, +"device %s already registered with a higher generation, found %llu expect %llu", + path, found_transid, device->generation); return ERR_PTR(-EEXIST); } @@ -2005,42 +2014,42 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) return num_devices; } +static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info, + struct block_device *bdev, int copy_num) +{ + struct btrfs_super_block *disk_super; + const size_t len = sizeof(disk_super->magic); + const u64 bytenr = btrfs_sb_offset(copy_num); + int ret; + + disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr); + if (IS_ERR(disk_super)) + return; + + memset(&disk_super->magic, 0, len); + folio_mark_dirty(virt_to_folio(disk_super)); + btrfs_release_disk_super(disk_super); + + ret = sync_blockdev_range(bdev, bytenr, bytenr + len - 1); + if (ret) + btrfs_warn(fs_info, "error clearing superblock number %d (%d)", + copy_num, ret); +} + void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct block_device *bdev, const char *device_path) { - struct btrfs_super_block *disk_super; int copy_num; if (!bdev) return; for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) { - struct page *page; - int ret; - - disk_super = btrfs_read_dev_one_super(bdev, copy_num, false); - if (IS_ERR(disk_super)) - continue; - - if (bdev_is_zoned(bdev)) { + if (bdev_is_zoned(bdev)) btrfs_reset_sb_log_zones(bdev, copy_num); - continue; - } - - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); - - page = virt_to_page(disk_super); - set_page_dirty(page); - lock_page(page); - /* write_on_page() unlocks the page */ - ret = write_one_page(page); - if (ret) - btrfs_warn(fs_info, - "error clearing superblock number %d (%d)", - copy_num, ret); - btrfs_release_disk_super(disk_super); - + else + btrfs_scratch_superblock(fs_info, bdev, copy_num); } /* Notify udev that device has changed */ diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index a759668477bb..1f503e8e42d4 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -539,6 +539,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } atomic_set(&zone_info->active_zones_left, max_active_zones - nactive); + /* Overcommit does not work well with active zone tacking. */ + set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags); } /* Validate superblock log */ diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index e20f8880363f..ac86bd0ebd63 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -269,7 +269,7 @@ static int dfscache_proc_show(struct seq_file *m, void *v) list_for_each_entry(t, &ce->tlist, list) { seq_printf(m, " %s%s\n", t->name, - ce->tgthint == t ? " (target hint)" : ""); + READ_ONCE(ce->tgthint) == t ? " (target hint)" : ""); } } } @@ -321,7 +321,7 @@ static inline void dump_tgts(const struct cache_entry *ce) cifs_dbg(FYI, "target list:\n"); list_for_each_entry(t, &ce->tlist, list) { cifs_dbg(FYI, " %s%s\n", t->name, - ce->tgthint == t ? " (target hint)" : ""); + READ_ONCE(ce->tgthint) == t ? " (target hint)" : ""); } } @@ -427,7 +427,7 @@ static int cache_entry_hash(const void *data, int size, unsigned int *hash) /* Return target hint of a DFS cache entry */ static inline char *get_tgt_name(const struct cache_entry *ce) { - struct cache_dfs_tgt *t = ce->tgthint; + struct cache_dfs_tgt *t = READ_ONCE(ce->tgthint); return t ? t->name : ERR_PTR(-ENOENT); } @@ -470,6 +470,7 @@ static struct cache_dfs_tgt *alloc_target(const char *name, int path_consumed) static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs, struct cache_entry *ce, const char *tgthint) { + struct cache_dfs_tgt *target; int i; ce->ttl = max_t(int, refs[0].ttl, CACHE_MIN_TTL); @@ -496,8 +497,9 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs, ce->numtgts++; } - ce->tgthint = list_first_entry_or_null(&ce->tlist, - struct cache_dfs_tgt, list); + target = list_first_entry_or_null(&ce->tlist, struct cache_dfs_tgt, + list); + WRITE_ONCE(ce->tgthint, target); return 0; } @@ -558,7 +560,8 @@ static void remove_oldest_entry_locked(void) } /* Add a new DFS cache entry */ -static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs) +static struct cache_entry *add_cache_entry_locked(struct dfs_info3_param *refs, + int numrefs) { int rc; struct cache_entry *ce; @@ -573,11 +576,11 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs) rc = cache_entry_hash(refs[0].path_name, strlen(refs[0].path_name), &hash); if (rc) - return rc; + return ERR_PTR(rc); ce = alloc_cache_entry(refs, numrefs); if (IS_ERR(ce)) - return PTR_ERR(ce); + return ce; spin_lock(&cache_ttl_lock); if (!cache_ttl) { @@ -594,7 +597,7 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs) atomic_inc(&cache_count); - return 0; + return ce; } /* Check if two DFS paths are equal. @s1 and @s2 are expected to be in @cache_cp's charset */ @@ -641,7 +644,9 @@ static struct cache_entry *__lookup_cache_entry(const char *path, unsigned int h * * Use whole path components in the match. Must be called with htable_rw_lock held. * + * Return cached entry if successful. * Return ERR_PTR(-ENOENT) if the entry is not found. + * Return error ptr otherwise. */ static struct cache_entry *lookup_cache_entry(const char *path) { @@ -711,14 +716,15 @@ void dfs_cache_destroy(void) static int update_cache_entry_locked(struct cache_entry *ce, const struct dfs_info3_param *refs, int numrefs) { + struct cache_dfs_tgt *target; + char *th = NULL; int rc; - char *s, *th = NULL; WARN_ON(!rwsem_is_locked(&htable_rw_lock)); - if (ce->tgthint) { - s = ce->tgthint->name; - th = kstrdup(s, GFP_ATOMIC); + target = READ_ONCE(ce->tgthint); + if (target) { + th = kstrdup(target->name, GFP_ATOMIC); if (!th) return -ENOMEM; } @@ -767,51 +773,75 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const * * For interlinks, cifs_mount() and expand_dfs_referral() are supposed to * handle them properly. + * + * On success, return entry with acquired lock for reading, otherwise error ptr. */ -static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path) +static struct cache_entry *cache_refresh_path(const unsigned int xid, + struct cifs_ses *ses, + const char *path, + bool force_refresh) { - int rc; - struct cache_entry *ce; struct dfs_info3_param *refs = NULL; + struct cache_entry *ce; int numrefs = 0; - bool newent = false; + int rc; cifs_dbg(FYI, "%s: search path: %s\n", __func__, path); - down_write(&htable_rw_lock); + down_read(&htable_rw_lock); ce = lookup_cache_entry(path); if (!IS_ERR(ce)) { - if (!cache_entry_expired(ce)) { - dump_ce(ce); - up_write(&htable_rw_lock); - return 0; - } - } else { - newent = true; + if (!force_refresh && !cache_entry_expired(ce)) + return ce; + } else if (PTR_ERR(ce) != -ENOENT) { + up_read(&htable_rw_lock); + return ce; } /* - * Either the entry was not found, or it is expired. + * Unlock shared access as we don't want to hold any locks while getting + * a new referral. The @ses used for performing the I/O could be + * reconnecting and it acquires @htable_rw_lock to look up the dfs cache + * in order to failover -- if necessary. + */ + up_read(&htable_rw_lock); + + /* + * Either the entry was not found, or it is expired, or it is a forced + * refresh. * Request a new DFS referral in order to create or update a cache entry. */ rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - if (rc) - goto out_unlock; + if (rc) { + ce = ERR_PTR(rc); + goto out; + } dump_refs(refs, numrefs); - if (!newent) { - rc = update_cache_entry_locked(ce, refs, numrefs); - goto out_unlock; + down_write(&htable_rw_lock); + /* Re-check as another task might have it added or refreshed already */ + ce = lookup_cache_entry(path); + if (!IS_ERR(ce)) { + if (force_refresh || cache_entry_expired(ce)) { + rc = update_cache_entry_locked(ce, refs, numrefs); + if (rc) + ce = ERR_PTR(rc); + } + } else if (PTR_ERR(ce) == -ENOENT) { + ce = add_cache_entry_locked(refs, numrefs); } - rc = add_cache_entry_locked(refs, numrefs); + if (IS_ERR(ce)) { + up_write(&htable_rw_lock); + goto out; + } -out_unlock: - up_write(&htable_rw_lock); + downgrade_write(&htable_rw_lock); +out: free_dfs_info_array(refs, numrefs); - return rc; + return ce; } /* @@ -878,7 +908,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl) } it->it_path_consumed = t->path_consumed; - if (ce->tgthint == t) + if (READ_ONCE(ce->tgthint) == t) list_add(&it->it_list, head); else list_add_tail(&it->it_list, head); @@ -931,15 +961,8 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl if (IS_ERR(npath)) return PTR_ERR(npath); - rc = cache_refresh_path(xid, ses, npath); - if (rc) - goto out_free_path; - - down_read(&htable_rw_lock); - - ce = lookup_cache_entry(npath); + ce = cache_refresh_path(xid, ses, npath, false); if (IS_ERR(ce)) { - up_read(&htable_rw_lock); rc = PTR_ERR(ce); goto out_free_path; } @@ -1003,72 +1026,6 @@ out_unlock: } /** - * dfs_cache_update_tgthint - update target hint of a DFS cache entry - * - * If it doesn't find the cache entry, then it will get a DFS referral for @path - * and create a new entry. - * - * In case the cache entry exists but expired, it will get a DFS referral - * for @path and then update the respective cache entry. - * - * @xid: syscall id - * @ses: smb session - * @cp: codepage - * @remap: type of character remapping for paths - * @path: path to lookup in DFS referral cache - * @it: DFS target iterator - * - * Return zero if the target hint was updated successfully, otherwise non-zero. - */ -int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *cp, int remap, const char *path, - const struct dfs_cache_tgt_iterator *it) -{ - int rc; - const char *npath; - struct cache_entry *ce; - struct cache_dfs_tgt *t; - - npath = dfs_cache_canonical_path(path, cp, remap); - if (IS_ERR(npath)) - return PTR_ERR(npath); - - cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath); - - rc = cache_refresh_path(xid, ses, npath); - if (rc) - goto out_free_path; - - down_write(&htable_rw_lock); - - ce = lookup_cache_entry(npath); - if (IS_ERR(ce)) { - rc = PTR_ERR(ce); - goto out_unlock; - } - - t = ce->tgthint; - - if (likely(!strcasecmp(it->it_name, t->name))) - goto out_unlock; - - list_for_each_entry(t, &ce->tlist, list) { - if (!strcasecmp(t->name, it->it_name)) { - ce->tgthint = t; - cifs_dbg(FYI, "%s: new target hint: %s\n", __func__, - it->it_name); - break; - } - } - -out_unlock: - up_write(&htable_rw_lock); -out_free_path: - kfree(npath); - return rc; -} - -/** * dfs_cache_noreq_update_tgthint - update target hint of a DFS cache entry * without sending any requests to the currently connected server. * @@ -1092,21 +1049,20 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt cifs_dbg(FYI, "%s: path: %s\n", __func__, path); - if (!down_write_trylock(&htable_rw_lock)) - return; + down_read(&htable_rw_lock); ce = lookup_cache_entry(path); if (IS_ERR(ce)) goto out_unlock; - t = ce->tgthint; + t = READ_ONCE(ce->tgthint); if (unlikely(!strcasecmp(it->it_name, t->name))) goto out_unlock; list_for_each_entry(t, &ce->tlist, list) { if (!strcasecmp(t->name, it->it_name)) { - ce->tgthint = t; + WRITE_ONCE(ce->tgthint, t); cifs_dbg(FYI, "%s: new target hint: %s\n", __func__, it->it_name); break; @@ -1114,7 +1070,7 @@ void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt } out_unlock: - up_write(&htable_rw_lock); + up_read(&htable_rw_lock); } /** @@ -1320,35 +1276,37 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new * target shares in @refs. */ -static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl, - const struct dfs_info3_param *refs, int numrefs) +static void mark_for_reconnect_if_needed(struct TCP_Server_Info *server, + struct dfs_cache_tgt_list *old_tl, + struct dfs_cache_tgt_list *new_tl) { - struct dfs_cache_tgt_iterator *it; - int i; - - for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) { - for (i = 0; i < numrefs; i++) { - if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it), - refs[i].node_name)) + struct dfs_cache_tgt_iterator *oit, *nit; + + for (oit = dfs_cache_get_tgt_iterator(old_tl); oit; + oit = dfs_cache_get_next_tgt(old_tl, oit)) { + for (nit = dfs_cache_get_tgt_iterator(new_tl); nit; + nit = dfs_cache_get_next_tgt(new_tl, nit)) { + if (target_share_equal(server, + dfs_cache_get_tgt_name(oit), + dfs_cache_get_tgt_name(nit))) return; } } cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); - cifs_signal_cifsd_for_reconnect(tcon->ses->server, true); + cifs_signal_cifsd_for_reconnect(server, true); } /* Refresh dfs referral of tcon and mark it for reconnect if needed */ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_refresh) { - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + struct dfs_cache_tgt_list old_tl = DFS_CACHE_TGT_LIST_INIT(old_tl); + struct dfs_cache_tgt_list new_tl = DFS_CACHE_TGT_LIST_INIT(new_tl); struct cifs_ses *ses = CIFS_DFS_ROOT_SES(tcon->ses); struct cifs_tcon *ipc = ses->tcon_ipc; - struct dfs_info3_param *refs = NULL; bool needs_refresh = false; struct cache_entry *ce; unsigned int xid; - int numrefs = 0; int rc = 0; xid = get_xid(); @@ -1357,9 +1315,8 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r ce = lookup_cache_entry(path); needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); if (!IS_ERR(ce)) { - rc = get_targets(ce, &tl); - if (rc) - cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc); + rc = get_targets(ce, &old_tl); + cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc); } up_read(&htable_rw_lock); @@ -1376,26 +1333,18 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r } spin_unlock(&ipc->tc_lock); - rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - if (!rc) { - /* Create or update a cache entry with the new referral */ - dump_refs(refs, numrefs); - - down_write(&htable_rw_lock); - ce = lookup_cache_entry(path); - if (IS_ERR(ce)) - add_cache_entry_locked(refs, numrefs); - else if (force_refresh || cache_entry_expired(ce)) - update_cache_entry_locked(ce, refs, numrefs); - up_write(&htable_rw_lock); - - mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs); + ce = cache_refresh_path(xid, ses, path, true); + if (!IS_ERR(ce)) { + rc = get_targets(ce, &new_tl); + up_read(&htable_rw_lock); + cifs_dbg(FYI, "%s: get_targets: %d\n", __func__, rc); + mark_for_reconnect_if_needed(tcon->ses->server, &old_tl, &new_tl); } out: free_xid(xid); - dfs_cache_free_tgts(&tl); - free_dfs_info_array(refs, numrefs); + dfs_cache_free_tgts(&old_tl); + dfs_cache_free_tgts(&new_tl); return rc; } diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index f7cff0be9327..be3b5a44cf82 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -35,9 +35,6 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl struct dfs_cache_tgt_list *tgt_list); int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tgt_list); -int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *cp, int remap, const char *path, - const struct dfs_cache_tgt_iterator *it); void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it); int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it, struct dfs_info3_param *ref); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4b71f4a92f76..2c9ffa921e6f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -4163,12 +4163,15 @@ smb2_readv_callback(struct mid_q_entry *mid) (struct smb2_hdr *)rdata->iov[0].iov_base; struct cifs_credits credits = { .value = 0, .instance = 0 }; struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], - .rq_nvec = 1, - .rq_pages = rdata->pages, - .rq_offset = rdata->page_offset, - .rq_npages = rdata->nr_pages, - .rq_pagesz = rdata->pagesz, - .rq_tailsz = rdata->tailsz }; + .rq_nvec = 1, }; + + if (rdata->got_bytes) { + rqst.rq_pages = rdata->pages; + rqst.rq_offset = rdata->page_offset; + rqst.rq_npages = rdata->nr_pages; + rqst.rq_pagesz = rdata->pagesz; + rqst.rq_tailsz = rdata->tailsz; + } WARN_ONCE(rdata->server != mid->server, "rdata server %p != mid server %p", diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 90789aaa6567..8c816b25ce7c 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1405,6 +1405,7 @@ void smbd_destroy(struct TCP_Server_Info *server) destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); + server->smbd_conn = NULL; } /* diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 481788c24a68..626a615dafc2 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -577,26 +577,25 @@ static int erofs_fc_parse_param(struct fs_context *fc, } ++ctx->devs->extra_devices; break; - case Opt_fsid: #ifdef CONFIG_EROFS_FS_ONDEMAND + case Opt_fsid: kfree(ctx->fsid); ctx->fsid = kstrdup(param->string, GFP_KERNEL); if (!ctx->fsid) return -ENOMEM; -#else - errorfc(fc, "fsid option not supported"); -#endif break; case Opt_domain_id: -#ifdef CONFIG_EROFS_FS_ONDEMAND kfree(ctx->domain_id); ctx->domain_id = kstrdup(param->string, GFP_KERNEL); if (!ctx->domain_id) return -ENOMEM; + break; #else - errorfc(fc, "domain_id option not supported"); -#endif + case Opt_fsid: + case Opt_domain_id: + errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); break; +#endif default: return -ENOPARAM; } diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index ccf7c55d477f..5200bb86e264 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1032,12 +1032,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (!be->decompressed_pages) be->decompressed_pages = - kvcalloc(be->nr_pages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kcalloc(be->nr_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); if (!be->compressed_pages) be->compressed_pages = - kvcalloc(pclusterpages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kcalloc(pclusterpages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); z_erofs_parse_out_bvecs(be); err2 = z_erofs_parse_in_bvecs(be, &overlapped); @@ -1085,7 +1085,7 @@ out: } if (be->compressed_pages < be->onstack_pages || be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) - kvfree(be->compressed_pages); + kfree(be->compressed_pages); z_erofs_fill_other_copies(be, err); for (i = 0; i < be->nr_pages; ++i) { @@ -1104,7 +1104,7 @@ out: } if (be->decompressed_pages != be->onstack_pages) - kvfree(be->decompressed_pages); + kfree(be->decompressed_pages); pcl->length = 0; pcl->partial = true; diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 0150570c33aa..98fb90b9af71 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -793,12 +793,16 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; /* - * No strict rule how to describe extents for post EOF, yet - * we need do like below. Otherwise, iomap itself will get + * No strict rule on how to describe extents for post EOF, yet + * we need to do like below. Otherwise, iomap itself will get * into an endless loop on post EOF. + * + * Calculate the effective offset by subtracting extent start + * (map.m_la) from the requested offset, and add it to length. + * (NB: offset >= map.m_la always) */ if (iomap->offset >= inode->i_size) - iomap->length = length + map.m_la - offset; + iomap->length = length + offset - map.m_la; } iomap->flags = 0; return 0; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 7decaaf27e82..a2f04a3808db 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -81,6 +81,8 @@ ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *, struct mb_cache_entry **); static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, size_t value_count); +static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, + size_t value_count); static void ext4_xattr_rehash(struct ext4_xattr_header *); static const struct xattr_handler * const ext4_xattr_handler_map[] = { @@ -470,8 +472,22 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode, tmp_data = cpu_to_le32(hash); e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len, &tmp_data, 1); + /* All good? */ + if (e_hash == entry->e_hash) + return 0; + + /* + * Not good. Maybe the entry hash was calculated + * using the buggy signed char version? + */ + e_hash = ext4_xattr_hash_entry_signed(entry->e_name, entry->e_name_len, + &tmp_data, 1); + /* Still no match - bad */ if (e_hash != entry->e_hash) return -EFSCORRUPTED; + + /* Let people know about old hash */ + pr_warn_once("ext4: filesystem with signed xattr name hash"); } return 0; } @@ -3081,7 +3097,29 @@ static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value, while (name_len--) { hash = (hash << NAME_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ - *name++; + (unsigned char)*name++; + } + while (value_count--) { + hash = (hash << VALUE_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ + le32_to_cpu(*value++); + } + return cpu_to_le32(hash); +} + +/* + * ext4_xattr_hash_entry_signed() + * + * Compute the hash of an extended attribute incorrectly. + */ +static __le32 ext4_xattr_hash_entry_signed(char *name, size_t name_len, __le32 *value, size_t value_count) +{ + __u32 hash = 0; + + while (name_len--) { + hash = (hash << NAME_HASH_SHIFT) ^ + (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ + (signed char)*name++; } while (value_count--) { hash = (hash << VALUE_HASH_SHIFT) ^ diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index a4850aee2639..ad670369955f 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -11,9 +11,10 @@ #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> -struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) +static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc, + struct user_namespace *mnt_userns, + struct inode *inode, int type, bool rcu) { - struct fuse_conn *fc = get_fuse_conn(inode); int size; const char *name; void *value = NULL; @@ -25,7 +26,7 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) if (fuse_is_bad(inode)) return ERR_PTR(-EIO); - if (!fc->posix_acl || fc->no_getxattr) + if (fc->no_getxattr) return NULL; if (type == ACL_TYPE_ACCESS) @@ -53,6 +54,46 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) return acl; } +static inline bool fuse_no_acl(const struct fuse_conn *fc, + const struct inode *inode) +{ + /* + * Refuse interacting with POSIX ACLs for daemons that + * don't support FUSE_POSIX_ACL and are not mounted on + * the host to retain backwards compatibility. + */ + return !fc->posix_acl && (i_user_ns(inode) != &init_user_ns); +} + +struct posix_acl *fuse_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type) +{ + struct inode *inode = d_inode(dentry); + struct fuse_conn *fc = get_fuse_conn(inode); + + if (fuse_no_acl(fc, inode)) + return ERR_PTR(-EOPNOTSUPP); + + return __fuse_get_acl(fc, mnt_userns, inode, type, false); +} + +struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + /* + * FUSE daemons before FUSE_POSIX_ACL was introduced could get and set + * POSIX ACLs without them being used for permission checking by the + * vfs. Retain that behavior for backwards compatibility as there are + * filesystems that do all permission checking for acls in the daemon + * and not in the kernel. + */ + if (!fc->posix_acl) + return NULL; + + return __fuse_get_acl(fc, &init_user_ns, inode, type, rcu); +} + int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type) { @@ -64,7 +105,7 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, if (fuse_is_bad(inode)) return -EIO; - if (!fc->posix_acl || fc->no_setxattr) + if (fc->no_setxattr || fuse_no_acl(fc, inode)) return -EOPNOTSUPP; if (type == ACL_TYPE_ACCESS) @@ -99,7 +140,13 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, return ret; } - if (!vfsgid_in_group_p(i_gid_into_vfsgid(&init_user_ns, inode)) && + /* + * Fuse daemons without FUSE_POSIX_ACL never changed the passed + * through POSIX ACLs. Such daemons don't expect setgid bits to + * be stripped. + */ + if (fc->posix_acl && + !vfsgid_in_group_p(i_gid_into_vfsgid(&init_user_ns, inode)) && !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; @@ -108,8 +155,15 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, } else { ret = fuse_removexattr(inode, name); } - forget_all_cached_acls(inode); - fuse_invalidate_attr(inode); + + if (fc->posix_acl) { + /* + * Fuse daemons without FUSE_POSIX_ACL never cached POSIX ACLs + * and didn't invalidate attributes. Retain that behavior. + */ + forget_all_cached_acls(inode); + fuse_invalidate_attr(inode); + } return ret; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index cd1a071b625a..2725fb54328e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1942,7 +1942,8 @@ static const struct inode_operations fuse_dir_inode_operations = { .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, - .get_inode_acl = fuse_get_acl, + .get_inode_acl = fuse_get_inode_acl, + .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, @@ -1964,7 +1965,8 @@ static const struct inode_operations fuse_common_inode_operations = { .permission = fuse_permission, .getattr = fuse_getattr, .listxattr = fuse_listxattr, - .get_inode_acl = fuse_get_acl, + .get_inode_acl = fuse_get_inode_acl, + .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, .fileattr_get = fuse_fileattr_get, .fileattr_set = fuse_fileattr_set, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c673faefdcb9..46797a171a84 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1264,11 +1264,11 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size); int fuse_removexattr(struct inode *inode, const char *name); extern const struct xattr_handler *fuse_xattr_handlers[]; -extern const struct xattr_handler *fuse_acl_xattr_handlers[]; -extern const struct xattr_handler *fuse_no_acl_xattr_handlers[]; struct posix_acl; -struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu); +struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu); +struct posix_acl *fuse_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, int type); int fuse_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, struct posix_acl *acl, int type); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6b3beda16c1b..de9b9ec5ce81 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -311,7 +311,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_dax_dontcache(inode, attr->flags); } -static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) +static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, + struct fuse_conn *fc) { inode->i_mode = attr->mode & S_IFMT; inode->i_size = attr->size; @@ -333,6 +334,12 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) new_decode_dev(attr->rdev)); } else BUG(); + /* + * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL + * so they see the exact same behavior as before. + */ + if (!fc->posix_acl) + inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; } static int fuse_inode_eq(struct inode *inode, void *_nodeidp) @@ -372,7 +379,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, if (!inode) return NULL; - fuse_init_inode(inode, attr); + fuse_init_inode(inode, attr, fc); get_fuse_inode(inode)->nodeid = nodeid; inode->i_flags |= S_AUTOMOUNT; goto done; @@ -388,7 +395,7 @@ retry: if (!fc->writeback_cache || !S_ISREG(attr->mode)) inode->i_flags |= S_NOCMTIME; inode->i_generation = generation; - fuse_init_inode(inode, attr); + fuse_init_inode(inode, attr, fc); unlock_new_inode(inode); } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ @@ -1174,7 +1181,6 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, if ((flags & FUSE_POSIX_ACL)) { fc->default_permissions = 1; fc->posix_acl = 1; - fm->sb->s_xattr = fuse_acl_xattr_handlers; } if (flags & FUSE_CACHE_SYMLINKS) fc->cache_symlinks = 1; @@ -1420,13 +1426,6 @@ static void fuse_sb_defaults(struct super_block *sb) if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); - - /* - * If we are not in the initial user namespace posix - * acls must be translated. - */ - if (sb->s_user_ns != &init_user_ns) - sb->s_xattr = fuse_no_acl_xattr_handlers; } static int fuse_fill_super_submount(struct super_block *sb, diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 0d3e7177fce0..9fe571ab569e 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -203,27 +203,6 @@ static int fuse_xattr_set(const struct xattr_handler *handler, return fuse_setxattr(inode, name, value, size, flags, 0); } -static bool no_xattr_list(struct dentry *dentry) -{ - return false; -} - -static int no_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size) -{ - return -EOPNOTSUPP; -} - -static int no_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, - struct dentry *dentry, struct inode *nodee, - const char *name, const void *value, - size_t size, int flags) -{ - return -EOPNOTSUPP; -} - static const struct xattr_handler fuse_xattr_handler = { .prefix = "", .get = fuse_xattr_get, @@ -234,33 +213,3 @@ const struct xattr_handler *fuse_xattr_handlers[] = { &fuse_xattr_handler, NULL }; - -const struct xattr_handler *fuse_acl_xattr_handlers[] = { - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, - &fuse_xattr_handler, - NULL -}; - -static const struct xattr_handler fuse_no_acl_access_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - .flags = ACL_TYPE_ACCESS, - .list = no_xattr_list, - .get = no_xattr_get, - .set = no_xattr_set, -}; - -static const struct xattr_handler fuse_no_acl_default_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_DEFAULT, - .flags = ACL_TYPE_ACCESS, - .list = no_xattr_list, - .get = no_xattr_get, - .set = no_xattr_set, -}; - -const struct xattr_handler *fuse_no_acl_xattr_handlers[] = { - &fuse_no_acl_access_xattr_handler, - &fuse_no_acl_default_xattr_handler, - &fuse_xattr_handler, - NULL -}; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 723639376ae2..61323deb80bc 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -80,6 +80,15 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) brelse(bd->bd_bh); } +static int __gfs2_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct address_space *mapping = data; + int ret = mapping->a_ops->writepage(page, wbc); + mapping_set_error(mapping, ret); + return ret; +} + /** * gfs2_ail1_start_one - Start I/O on a transaction * @sdp: The superblock @@ -131,7 +140,7 @@ __acquires(&sdp->sd_ail_lock) if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - ret = filemap_fdatawrite_wbc(mapping, wbc); + ret = write_cache_pages(mapping, wbc, __gfs2_writepage, mapping); if (need_resched()) { blk_finish_plug(plug); cond_resched(); diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index fd0a288af299..56be077e5d8a 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -280,7 +280,7 @@ int ksmbd_conn_handler_loop(void *p) { struct ksmbd_conn *conn = (struct ksmbd_conn *)p; struct ksmbd_transport *t = conn->transport; - unsigned int pdu_size; + unsigned int pdu_size, max_allowed_pdu_size; char hdr_buf[4] = {0,}; int size; @@ -305,13 +305,26 @@ int ksmbd_conn_handler_loop(void *p) pdu_size = get_rfc1002_len(hdr_buf); ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size); + if (conn->status == KSMBD_SESS_GOOD) + max_allowed_pdu_size = + SMB3_MAX_MSGSIZE + conn->vals->max_write_size; + else + max_allowed_pdu_size = SMB3_MAX_MSGSIZE; + + if (pdu_size > max_allowed_pdu_size) { + pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n", + pdu_size, max_allowed_pdu_size, + conn->status); + break; + } + /* * Check if pdu size is valid (min : smb header size, * max : 0x00FFFFFF). */ if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE || pdu_size > MAX_STREAM_PROT_LEN) { - continue; + break; } /* 4 for rfc1002 length field */ diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index b6bd8311e6b4..fb8b2d566efb 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -106,7 +106,8 @@ struct ksmbd_startup_request { __u32 sub_auth[3]; /* Subauth value for Security ID */ __u32 smb2_max_credits; /* MAX credits */ __u32 smbd_max_io_size; /* smbd read write size */ - __u32 reserved[127]; /* Reserved room */ + __u32 max_connections; /* Number of maximum simultaneous connections */ + __u32 reserved[126]; /* Reserved room */ __u32 ifc_list_sz; /* interfaces list size */ __s8 ____payload[]; }; diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c index 0ae8d08d85a8..4d9e0b54e3db 100644 --- a/fs/ksmbd/ndr.c +++ b/fs/ksmbd/ndr.c @@ -242,7 +242,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da) return ret; if (da->version != 3 && da->version != 4) { - pr_err("v%d version is not supported\n", da->version); + ksmbd_debug(VFS, "v%d version is not supported\n", da->version); return -EINVAL; } @@ -251,7 +251,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da) return ret; if (da->version != version2) { - pr_err("ndr version mismatched(version: %d, version2: %d)\n", + ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n", da->version, version2); return -EINVAL; } @@ -457,7 +457,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl) if (ret) return ret; if (acl->version != 4) { - pr_err("v%d version is not supported\n", acl->version); + ksmbd_debug(VFS, "v%d version is not supported\n", acl->version); return -EINVAL; } @@ -465,7 +465,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl) if (ret) return ret; if (acl->version != version2) { - pr_err("ndr version mismatched(version: %d, version2: %d)\n", + ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n", acl->version, version2); return -EINVAL; } diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h index ac9d932f8c8a..db7278181760 100644 --- a/fs/ksmbd/server.h +++ b/fs/ksmbd/server.h @@ -41,6 +41,7 @@ struct ksmbd_server_config { unsigned int share_fake_fscaps; struct smb_sid domain_sid; unsigned int auth_mechs; + unsigned int max_connections; char *conf[SERVER_CONF_WORK_GROUP + 1]; }; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 38fbda52e06f..d681f91947d9 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -8663,6 +8663,7 @@ int smb3_decrypt_req(struct ksmbd_work *work) bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; + struct ksmbd_session *sess = work->sess; struct smb2_hdr *rsp = smb2_get_msg(work->response_buf); if (conn->dialect < SMB30_PROT_ID) @@ -8672,6 +8673,7 @@ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work) rsp = ksmbd_resp_buf_next(work); if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && + sess->user && !user_guest(sess->user) && rsp->Status == STATUS_SUCCESS) return true; return false; diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index aa5dbe54f5a1..0c8a770fe318 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -24,8 +24,9 @@ #define SMB21_DEFAULT_IOSIZE (1024 * 1024) #define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024) -#define SMB3_MIN_IOSIZE (64 * 1024) -#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) +#define SMB3_MIN_IOSIZE (64 * 1024) +#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) +#define SMB3_MAX_MSGSIZE (4 * 4096) /* * Definitions for SMB2 Protocol Data Units (network frames) diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index c9aca21637d5..40c721f9227e 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -308,6 +308,9 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) if (req->smbd_max_io_size) init_smbd_max_io_size(req->smbd_max_io_size); + if (req->max_connections) + server_conf.max_connections = req->max_connections; + ret = ksmbd_set_netbios_name(req->netbios_name); ret |= ksmbd_set_server_string(req->server_string); ret |= ksmbd_set_work_group(req->work_group); diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 4c6bd0b69979..603893fd87f5 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -15,6 +15,8 @@ #define IFACE_STATE_DOWN BIT(0) #define IFACE_STATE_CONFIGURED BIT(1) +static atomic_t active_num_conn; + struct interface { struct task_struct *ksmbd_kthread; struct socket *ksmbd_socket; @@ -185,8 +187,10 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk) struct tcp_transport *t; t = alloc_transport(client_sk); - if (!t) + if (!t) { + sock_release(client_sk); return -ENOMEM; + } csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn); if (kernel_getpeername(client_sk, csin) < 0) { @@ -239,6 +243,15 @@ static int ksmbd_kthread_fn(void *p) continue; } + if (server_conf.max_connections && + atomic_inc_return(&active_num_conn) >= server_conf.max_connections) { + pr_info_ratelimited("Limit the maximum number of connections(%u)\n", + atomic_read(&active_num_conn)); + atomic_dec(&active_num_conn); + sock_release(client_sk); + continue; + } + ksmbd_debug(CONN, "connect success: accepted new connection\n"); client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT; client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT; @@ -368,6 +381,8 @@ static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov, static void ksmbd_tcp_disconnect(struct ksmbd_transport *t) { free_transport(TCP_TRANS(t)); + if (server_conf.max_connections) + atomic_dec(&active_num_conn); } static void tcp_destroy_socket(struct socket *ksmbd_socket) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 0ef070349014..c0950edb26b0 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -662,6 +662,39 @@ static struct shrinker nfsd_file_shrinker = { }; /** + * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file + * @nf: nfsd_file to attempt to queue + * @dispose: private list to queue successfully-put objects + * + * Unhash an nfsd_file, try to get a reference to it, and then put that + * reference. If it's the last reference, queue it to the dispose list. + */ +static void +nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) + __must_hold(RCU) +{ + int decrement = 1; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + return; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + return; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } +} + +/** * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode * @inode: inode on which to close out nfsd_files * @dispose: list on which to gather nfsd_files to close out @@ -688,30 +721,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) rcu_read_lock(); do { - int decrement = 1; - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - - /* If we raced with someone else unhashing, ignore it */ - if (!nfsd_file_unhash(nf)) - continue; - - /* If we can't get a reference, ignore it */ - if (!nfsd_file_get(nf)) - continue; - - /* Extra decrement if we remove from the LRU */ - if (nfsd_file_lru_remove(nf)) - ++decrement; - - /* If refcount goes to 0, then put on the dispose list */ - if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); - trace_nfsd_file_closing(nf); - } + nfsd_file_cond_queue(nf, dispose); } while (1); rcu_read_unlock(); } @@ -928,11 +942,8 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) { - nfsd_file_unhash(nf); - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, &dispose); - } + if (!net || nf->nf_net == net) + nfsd_file_cond_queue(nf, &dispose); nf = rhashtable_walk_next(&iter); } diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba3285b..51a4b7885cae 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -195,7 +195,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9b81d012666e..f189ba7995f5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1318,6 +1318,7 @@ try_again: /* allow 20secs for mount/unmount for now - revisit */ if (signal_pending(current) || (schedule_timeout(20*HZ) == 0)) { + finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); return nfserr_eagain; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4809ae0f0138..4ef529379065 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4411,7 +4411,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) if (!count) count = atomic_long_read(&num_delegations); if (count) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } @@ -4421,7 +4421,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4443,16 +4443,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -6235,8 +6225,7 @@ deleg_reaper(struct nfsd_net *nn) static void nfsd4_state_shrinker_worker(struct work_struct *work) { - struct delayed_work *dwork = to_delayed_work(work); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); @@ -8066,11 +8055,20 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8163,6 +8161,8 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d1e581a60480..c2577ee7ffb2 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1457,9 +1457,7 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; + nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) goto out_cache_error; @@ -1469,8 +1467,6 @@ static __net_init int nfsd_init_net(struct net *net) return 0; out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1486,7 +1482,6 @@ static __net_exit void nfsd_exit_net(struct net *net) nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 93b42ef9ed91..fa0144a74267 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b9d15c3df3cc..40ce92a332fe 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { - if (ret != -EEXIST) - return ret; - goto out_check; + if (likely(ret == -EEXIST)) + goto out_check; + if (ret == -ENOENT) { + /* + * Block address translation failed due to invalid + * value of 'ptr'. In this case, return internal code + * -EINVAL (broken bmap) to notify bmap layer of fatal + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } if (ra) { diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 6e4e65ee050d..c14e90764e35 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -792,7 +792,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) if (!c->metacopy && c->stat.size) { err = ovl_copy_up_file(ofs, c->dentry, tmpfile, c->stat.size); if (err) - return err; + goto out_fput; } err = ovl_copy_up_metadata(c, temp); @@ -1011,6 +1011,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, if (err) return err; + if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) || + !kgid_has_mapping(current_user_ns(), ctx.stat.gid)) + return -EOVERFLOW; + ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); if (parent) { diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 98ac37e34e3d..cc694846617a 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) return ctx->features & UFFD_FEATURE_INITIALIZED; } +static void userfaultfd_set_vm_flags(struct vm_area_struct *vma, + vm_flags_t flags) +{ + const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP; + + vma->vm_flags = flags; + /* + * For shared mappings, we want to enable writenotify while + * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply + * recalculate vma->vm_page_prot whenever userfaultfd-wp changes. + */ + if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed) + vma_set_page_prot(vma); +} + static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { @@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, for_each_vma(vmi, vma) { if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, + vma->vm_flags & ~__VM_UFFD_FLAGS); } } mmap_write_unlock(mm); @@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) octx = vma->vm_userfaultfd_ctx.ctx; if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); return 0; } @@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, } else { /* Drop uffd context if remap feature not enabled */ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); } } @@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) prev = vma; } - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } mmap_write_unlock(mm); @@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx.ctx = ctx; if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) @@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; skip: diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 2c53fbb8d918..a9c5c3f720ad 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -442,6 +442,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(inode, zone, false, false); } + } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && + data_size > isize) { + /* Do not expose garbage data */ + data_size = isize; } /* @@ -805,6 +809,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) ret = submit_bio_wait(bio); + /* + * If the file zone was written underneath the file system, the zone + * write pointer may not be where we expect it to be, but the zone + * append write can still succeed. So check manually that we wrote where + * we intended to, that is, at zi->i_wpoffset. + */ + if (!ret) { + sector_t wpsector = + zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); + + if (bio->bi_iter.bi_sector != wpsector) { + zonefs_warn(inode->i_sb, + "Corrupted write pointer %llu for zone at %llu\n", + wpsector, zi->i_zsector); + ret = -EIO; + } + } + zonefs_file_write_dio_end_io(iocb, size, ret, 0); trace_zonefs_file_dio_append(inode, size, ret); |