diff options
Diffstat (limited to 'fs')
214 files changed, 4170 insertions, 9052 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 6489e1fc1afd..11045d8e356a 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -25,9 +25,6 @@ config 9P_FS_POSIX_ACL POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N endif diff --git a/fs/Kconfig b/fs/Kconfig index 7aee6d699fd6..bc821a86d965 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -58,6 +58,13 @@ config FS_DAX_PMD depends on ZONE_DEVICE depends on TRANSPARENT_HUGEPAGE +# Selected by DAX drivers that do not expect filesystem DAX to support +# get_user_pages() of DAX mappings. I.e. "limited" indicates no support +# for fork() of processes with MAP_SHARED mappings or support for +# direct-I/O to a DAX mapping. +config FS_DAX_LIMITED + bool + endif # BLOCK # Posix ACL utility routines @@ -167,17 +174,13 @@ config TMPFS_POSIX_ACL files for sound to work properly. In short, if you're not sure, say Y. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - config TMPFS_XATTR bool "Tmpfs extended attributes" depends on TMPFS default n help Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). + the kernel or by users (see the attr(5) manual page for details). Currently this enables support for the trusted.* and security.* namespaces. @@ -298,7 +301,6 @@ config NFS_COMMON source "net/sunrpc/Kconfig" source "fs/ceph/Kconfig" source "fs/cifs/Kconfig" -source "fs/ncpfs/Kconfig" source "fs/coda/Kconfig" source "fs/afs/Kconfig" source "fs/9p/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index ef772f1eaff8..add789ea270a 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -92,7 +92,6 @@ obj-$(CONFIG_LOCKD) += lockd/ obj-$(CONFIG_NLS) += nls/ obj-$(CONFIG_SYSV_FS) += sysv/ obj-$(CONFIG_CIFS) += cifs/ -obj-$(CONFIG_NCP_FS) += ncpfs/ obj-$(CONFIG_HPFS_FS) += hpfs/ obj-$(CONFIG_NTFS_FS) += ntfs/ obj-$(CONFIG_UFS_FS) += ufs/ diff --git a/fs/affs/dir.c b/fs/affs/dir.c index d180b46453cf..b2bf7016e1b3 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -81,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) * we can jump directly to where we left off. */ ino = (u32)(long)file->private_data; - if (ino && inode_cmp_iversion(inode, file->f_version) == 0) { + if (ino && inode_eq_iversion(inode, file->f_version)) { pr_debug("readdir() left off=%d\n", ino); goto inside; } diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index a537368ba0db..fd9f28b8a933 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -332,11 +332,18 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac) */ int afs_end_cursor(struct afs_addr_cursor *ac) { - if (ac->responded && ac->index != ac->start) - WRITE_ONCE(ac->alist->index, ac->index); + struct afs_addr_list *alist; + + alist = ac->alist; + if (alist) { + if (ac->responded && ac->index != ac->start) + WRITE_ONCE(alist->index, ac->index); + afs_put_addrlist(alist); + } - afs_put_addrlist(ac->alist); + ac->addr = NULL; ac->alist = NULL; + ac->begun = false; return ac->error; } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 23c7f395d718..ba2b458b36d1 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -17,10 +17,13 @@ #include <linux/pagemap.h> #include <linux/ctype.h> #include <linux/sched.h> +#include <linux/dns_resolver.h> #include "internal.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); +static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); @@ -64,6 +67,17 @@ const struct inode_operations afs_dir_inode_operations = { .listxattr = afs_listxattr, }; +const struct file_operations afs_dynroot_file_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .iterate_shared = dcache_readdir, + .llseek = dcache_dir_lseek, +}; + +const struct inode_operations afs_dynroot_inode_operations = { + .lookup = afs_dynroot_lookup, +}; + const struct dentry_operations afs_fs_dentry_operations = { .d_revalidate = afs_d_revalidate, .d_delete = afs_d_delete, @@ -468,25 +482,58 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, } /* + * Probe to see if a cell may exist. This prevents positive dentries from + * being created unnecessarily. + */ +static int afs_probe_cell_name(struct dentry *dentry) +{ + struct afs_cell *cell; + const char *name = dentry->d_name.name; + size_t len = dentry->d_name.len; + int ret; + + /* Names prefixed with a dot are R/W mounts. */ + if (name[0] == '.') { + if (len == 1) + return -EINVAL; + name++; + len--; + } + + cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + if (!IS_ERR(cell)) { + afs_put_cell(afs_d2net(dentry), cell); + return 0; + } + + ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); + if (ret == -ENODATA) + ret = -EDESTADDRREQ; + return ret; +} + +/* * Try to auto mount the mountpoint with pseudo directory, if the autocell * operation is setted. */ -static struct inode *afs_try_auto_mntpt( - int ret, struct dentry *dentry, struct inode *dir, struct key *key, - struct afs_fid *fid) +static struct inode *afs_try_auto_mntpt(struct dentry *dentry, + struct inode *dir, struct afs_fid *fid) { - const char *devname = dentry->d_name.name; struct afs_vnode *vnode = AFS_FS_I(dir); struct inode *inode; + int ret = -ENOENT; - _enter("%d, %p{%pd}, {%x:%u}, %p", - ret, dentry, dentry, vnode->fid.vid, vnode->fid.vnode, key); + _enter("%p{%pd}, {%x:%u}", + dentry, dentry, vnode->fid.vid, vnode->fid.vnode); + + if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + goto out; - if (ret != -ENOENT || - !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) + ret = afs_probe_cell_name(dentry); + if (ret < 0) goto out; - inode = afs_iget_autocell(dir, devname, strlen(devname), key); + inode = afs_iget_pseudo_dir(dir->i_sb, false); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto out; @@ -545,13 +592,16 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, ret = afs_do_lookup(dir, dentry, &fid, key); if (ret < 0) { - inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid); - if (!IS_ERR(inode)) { - key_put(key); - goto success; + if (ret == -ENOENT) { + inode = afs_try_auto_mntpt(dentry, dir, &fid); + if (!IS_ERR(inode)) { + key_put(key); + goto success; + } + + ret = PTR_ERR(inode); } - ret = PTR_ERR(inode); key_put(key); if (ret == -ENOENT) { d_add(dentry, NULL); @@ -583,12 +633,53 @@ success: } /* + * Look up an entry in a dynroot directory. + */ +static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct afs_vnode *vnode; + struct afs_fid fid; + struct inode *inode; + int ret; + + vnode = AFS_FS_I(dir); + + _enter("%pd", dentry); + + ASSERTCMP(d_inode(dentry), ==, NULL); + + if (dentry->d_name.len >= AFSNAMEMAX) { + _leave(" = -ENAMETOOLONG"); + return ERR_PTR(-ENAMETOOLONG); + } + + inode = afs_try_auto_mntpt(dentry, dir, &fid); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + if (ret == -ENOENT) { + d_add(dentry, NULL); + _leave(" = NULL [negative]"); + return NULL; + } + _leave(" = %d [do]", ret); + return ERR_PTR(ret); + } + + d_add(dentry, inode); + _leave(" = 0 { ino=%lu v=%u }", + d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); + return NULL; +} + +/* * check that a dentry lookup hit has found a valid entry * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) { + struct afs_super_info *as = dentry->d_sb->s_fs_info; struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); struct dentry *parent; @@ -600,6 +691,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & LOOKUP_RCU) return -ECHILD; + if (as->dyn_root) + return 1; + if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); _enter("{v={%x:%u} n=%pd fl=%lx},", diff --git a/fs/afs/inode.c b/fs/afs/inode.c index c7f17c44c7ce..6b39d0255b72 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -147,7 +147,7 @@ int afs_iget5_test(struct inode *inode, void *opaque) * * These pseudo inodes don't match anything. */ -static int afs_iget5_autocell_test(struct inode *inode, void *opaque) +static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque) { return 0; } @@ -169,31 +169,34 @@ static int afs_iget5_set(struct inode *inode, void *opaque) } /* - * inode retrieval for autocell + * Create an inode for a dynamic root directory or an autocell dynamic + * automount dir. */ -struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, - int namesz, struct key *key) +struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) { struct afs_iget_data data; struct afs_super_info *as; struct afs_vnode *vnode; - struct super_block *sb; struct inode *inode; static atomic_t afs_autocell_ino; - _enter("{%x:%u},%*.*s,", - AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode, - namesz, namesz, dev_name ?: ""); + _enter(""); - sb = dir->i_sb; as = sb->s_fs_info; - data.volume = as->volume; - data.fid.vid = as->volume->vid; - data.fid.unique = 0; - data.fid.vnode = 0; + if (as->volume) { + data.volume = as->volume; + data.fid.vid = as->volume->vid; + } + if (root) { + data.fid.vnode = 1; + data.fid.unique = 1; + } else { + data.fid.vnode = atomic_inc_return(&afs_autocell_ino); + data.fid.unique = 0; + } - inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino), - afs_iget5_autocell_test, afs_iget5_set, + inode = iget5_locked(sb, data.fid.vnode, + afs_iget5_pseudo_dir_test, afs_iget5_set, &data); if (!inode) { _leave(" = -ENOMEM"); @@ -211,7 +214,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, inode->i_size = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - inode->i_op = &afs_autocell_inode_operations; + if (root) { + inode->i_op = &afs_dynroot_inode_operations; + inode->i_fop = &afs_dynroot_file_operations; + } else { + inode->i_op = &afs_autocell_inode_operations; + } set_nlink(inode, 2); inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; @@ -223,8 +231,12 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, inode->i_generation = 0; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); - set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - inode->i_flags |= S_AUTOMOUNT | S_NOATIME; + if (!root) { + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + inode->i_flags |= S_AUTOMOUNT; + } + + inode->i_flags |= S_NOATIME; unlock_new_inode(inode); _leave(" = %p", inode); return inode; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 804d1f905622..f38d6a561a84 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -36,6 +36,7 @@ struct afs_mount_params { bool rwpath; /* T if the parent should be considered R/W */ bool force; /* T to force cell type */ bool autocell; /* T if set auto mount operation */ + bool dyn_root; /* T if dynamic root */ afs_voltype_t type; /* type of volume requested */ int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ @@ -186,6 +187,7 @@ struct afs_super_info { struct afs_net *net; /* Network namespace */ struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ + bool dyn_root; /* True if dynamic root */ }; static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) @@ -634,10 +636,13 @@ extern bool afs_cm_incoming_call(struct afs_call *); /* * dir.c */ -extern bool afs_dir_check_page(struct inode *, struct page *); +extern const struct file_operations afs_dir_file_operations; extern const struct inode_operations afs_dir_inode_operations; +extern const struct file_operations afs_dynroot_file_operations; +extern const struct inode_operations afs_dynroot_inode_operations; extern const struct dentry_operations afs_fs_dentry_operations; -extern const struct file_operations afs_dir_file_operations; + +extern bool afs_dir_check_page(struct inode *, struct page *); /* * file.c @@ -695,8 +700,7 @@ extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, */ extern int afs_fetch_status(struct afs_vnode *, struct key *); extern int afs_iget5_test(struct inode *, void *); -extern struct inode *afs_iget_autocell(struct inode *, const char *, int, - struct key *); +extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_fid *, struct afs_file_status *, struct afs_callback *, diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 690fea9d84c3..99fd13500a97 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -72,7 +72,7 @@ static int afs_mntpt_open(struct inode *inode, struct file *file) */ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) { - struct afs_super_info *super; + struct afs_super_info *as; struct vfsmount *mnt; struct afs_vnode *vnode; struct page *page; @@ -104,13 +104,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) goto error_no_page; if (mntpt->d_name.name[0] == '.') { - devname[0] = '#'; - memcpy(devname + 1, mntpt->d_name.name, size - 1); + devname[0] = '%'; + memcpy(devname + 1, mntpt->d_name.name + 1, size - 1); memcpy(devname + size, afs_root_cell, sizeof(afs_root_cell)); rwpath = true; } else { - devname[0] = '%'; + devname[0] = '#'; memcpy(devname + 1, mntpt->d_name.name, size); memcpy(devname + size + 1, afs_root_cell, sizeof(afs_root_cell)); @@ -142,11 +142,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) } /* work out what options we want */ - super = AFS_FS_S(mntpt->d_sb); - memcpy(options, "cell=", 5); - strcpy(options + 5, super->volume->cell->name); - if (super->volume->type == AFSVL_RWVOL || rwpath) - strcat(options, ",rwpath"); + as = AFS_FS_S(mntpt->d_sb); + if (as->cell) { + memcpy(options, "cell=", 5); + strcpy(options + 5, as->cell->name); + if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath) + strcat(options, ",rwpath"); + } /* try and do the mount */ _debug("--- attempting mount %s -o %s ---", devname, options); diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index d04511fb3879..ad1328d85526 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -330,26 +330,6 @@ start: if (!afs_start_fs_iteration(fc, vnode)) goto failed; - goto use_server; - -next_server: - _debug("next"); - afs_put_cb_interest(afs_v2net(vnode), fc->cbi); - fc->cbi = NULL; - fc->index++; - if (fc->index >= fc->server_list->nr_servers) - fc->index = 0; - if (fc->index != fc->start) - goto use_server; - - /* That's all the servers poked to no good effect. Try again if some - * of them were busy. - */ - if (fc->flags & AFS_FS_CURSOR_VBUSY) - goto restart_from_beginning; - - fc->ac.error = -EDESTADDRREQ; - goto failed; use_server: _debug("use"); @@ -383,6 +363,7 @@ use_server: afs_get_addrlist(alist); read_unlock(&server->fs_lock); + memset(&fc->ac, 0, sizeof(fc->ac)); /* Probe the current fileserver if we haven't done so yet. */ if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { @@ -397,12 +378,8 @@ use_server: else afs_put_addrlist(alist); - fc->ac.addr = NULL; fc->ac.start = READ_ONCE(alist->index); fc->ac.index = fc->ac.start; - fc->ac.error = 0; - fc->ac.begun = false; - goto iterate_address; iterate_address: ASSERT(fc->ac.alist); @@ -410,16 +387,35 @@ iterate_address: /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ - if (afs_iterate_addresses(&fc->ac)) { - _leave(" = t"); - return true; - } + if (!afs_iterate_addresses(&fc->ac)) + goto next_server; + + _leave(" = t"); + return true; +next_server: + _debug("next"); afs_end_cursor(&fc->ac); - goto next_server; + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + fc->index++; + if (fc->index >= fc->server_list->nr_servers) + fc->index = 0; + if (fc->index != fc->start) + goto use_server; + + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (fc->flags & AFS_FS_CURSOR_VBUSY) + goto restart_from_beginning; + + fc->ac.error = -EDESTADDRREQ; + goto failed; failed: fc->flags |= AFS_FS_CURSOR_STOP; + afs_end_cursor(&fc->ac); _leave(" = f [failed %d]", fc->ac.error); return false; } @@ -458,12 +454,10 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) return false; } + memset(&fc->ac, 0, sizeof(fc->ac)); fc->ac.alist = alist; - fc->ac.addr = NULL; fc->ac.start = READ_ONCE(alist->index); fc->ac.index = fc->ac.start; - fc->ac.error = 0; - fc->ac.begun = false; goto iterate_address; case 0: @@ -520,238 +514,3 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc) return fc->ac.error; } - -#if 0 -/* - * Set a filesystem server cursor for using a specific FS server. - */ -int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - afs_init_fs_cursor(fc, vnode); - - read_seqlock_excl(&vnode->cb_lock); - if (vnode->cb_interest) { - if (vnode->cb_interest->server->fs_state == 0) - fc->server = afs_get_server(vnode->cb_interest->server); - else - fc->ac.error = vnode->cb_interest->server->fs_state; - } else { - fc->ac.error = -ESTALE; - } - read_sequnlock_excl(&vnode->cb_lock); - - return fc->ac.error; -} - -/* - * pick a server to use to try accessing this volume - * - returns with an elevated usage count on the server chosen - */ -bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - struct afs_volume *volume = vnode->volume; - struct afs_server *server; - int ret, state, loop; - - _enter("%s", volume->vlocation->vldb.name); - - /* stick with the server we're already using if we can */ - if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) { - fc->server = afs_get_server(vnode->cb_interest->server); - goto set_server; - } - - down_read(&volume->server_sem); - - /* handle the no-server case */ - if (volume->nservers == 0) { - fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE; - up_read(&volume->server_sem); - _leave(" = f [no servers %d]", fc->ac.error); - return false; - } - - /* basically, just search the list for the first live server and use - * that */ - ret = 0; - for (loop = 0; loop < volume->nservers; loop++) { - server = volume->servers[loop]; - state = server->fs_state; - - _debug("consider %d [%d]", loop, state); - - switch (state) { - case 0: - goto picked_server; - - case -ENETUNREACH: - if (ret == 0) - ret = state; - break; - - case -EHOSTUNREACH: - if (ret == 0 || - ret == -ENETUNREACH) - ret = state; - break; - - case -ECONNREFUSED: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH) - ret = state; - break; - - default: - case -EREMOTEIO: - if (ret == 0 || - ret == -ENETUNREACH || - ret == -EHOSTUNREACH || - ret == -ECONNREFUSED) - ret = state; - break; - } - } - -error: - fc->ac.error = ret; - - /* no available servers - * - TODO: handle the no active servers case better - */ - up_read(&volume->server_sem); - _leave(" = f [%d]", fc->ac.error); - return false; - -picked_server: - /* Found an apparently healthy server. We need to register an interest - * in receiving callbacks before we talk to it. - */ - ret = afs_register_server_cb_interest(vnode, - &volume->cb_interests[loop], server); - if (ret < 0) - goto error; - - fc->server = afs_get_server(server); - up_read(&volume->server_sem); -set_server: - fc->ac.alist = afs_get_addrlist(fc->server->addrs); - fc->ac.addr = &fc->ac.alist->addrs[0]; - _debug("USING SERVER: %pIS\n", &fc->ac.addr->transport); - _leave(" = t (picked %pIS)", &fc->ac.addr->transport); - return true; -} - -/* - * release a server after use - * - releases the ref on the server struct that was acquired by picking - * - records result of using a particular server to access a volume - * - return true to try again, false if okay or to issue error - * - the caller must release the server struct if result was false - */ -bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc, - struct afs_vnode *vnode) -{ - struct afs_volume *volume = vnode->volume; - struct afs_server *server = fc->server; - unsigned loop; - - _enter("%s,%pIS,%d", - volume->vlocation->vldb.name, &fc->ac.addr->transport, - fc->ac.error); - - switch (fc->ac.error) { - /* success */ - case 0: - server->fs_state = 0; - _leave(" = f"); - return false; - - /* the fileserver denied all knowledge of the volume */ - case -ENOMEDIUM: - down_write(&volume->server_sem); - - /* firstly, find where the server is in the active list (if it - * is) */ - for (loop = 0; loop < volume->nservers; loop++) - if (volume->servers[loop] == server) - goto present; - - /* no longer there - may have been discarded by another op */ - goto try_next_server_upw; - - present: - volume->nservers--; - memmove(&volume->servers[loop], - &volume->servers[loop + 1], - sizeof(volume->servers[loop]) * - (volume->nservers - loop)); - volume->servers[volume->nservers] = NULL; - afs_put_server(afs_v2net(vnode), server); - volume->rjservers++; - - if (volume->nservers > 0) - /* another server might acknowledge its existence */ - goto try_next_server_upw; - - /* handle the case where all the fileservers have rejected the - * volume - * - TODO: try asking the fileservers for volume information - * - TODO: contact the VL server again to see if the volume is - * no longer registered - */ - up_write(&volume->server_sem); - afs_put_server(afs_v2net(vnode), server); - fc->server = NULL; - _leave(" = f [completely rejected]"); - return false; - - /* problem reaching the server */ - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - case -ETIME: - case -ETIMEDOUT: - case -EREMOTEIO: - /* mark the server as dead - * TODO: vary dead timeout depending on error - */ - spin_lock(&server->fs_lock); - if (!server->fs_state) { - server->fs_state = fc->ac.error; - printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error); - } - spin_unlock(&server->fs_lock); - goto try_next_server; - - /* miscellaneous error */ - default: - case -ENOMEM: - case -ENONET: - /* tell the caller to accept the result */ - afs_put_server(afs_v2net(vnode), server); - fc->server = NULL; - _leave(" = f [local failure]"); - return false; - } - - /* tell the caller to loop around and try the next server */ -try_next_server_upw: - up_write(&volume->server_sem); -try_next_server: - afs_put_server(afs_v2net(vnode), server); - _leave(" = t [try next server]"); - return true; -} - -/* - * Clean up a fileserver cursor. - */ -int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net) -{ - afs_end_cursor(&fc->ac); - afs_put_server(net, fc->server); - return fc->ac.error; -} - -#endif diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 0ab3f8457839..0f8dc4c8f07c 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -58,7 +58,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, server = afs_lookup_server(cell, key, &vldb->fs_server[i]); if (IS_ERR(server)) { ret = PTR_ERR(server); - if (ret == -ENOENT) + if (ret == -ENOENT || + ret == -ENOMEDIUM) continue; goto error_2; } diff --git a/fs/afs/super.c b/fs/afs/super.c index 1037dd41a622..3623c952b6ff 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -64,6 +64,7 @@ static atomic_t afs_count_active_inodes; enum { afs_no_opt, afs_opt_cell, + afs_opt_dyn, afs_opt_rwpath, afs_opt_vol, afs_opt_autocell, @@ -71,6 +72,7 @@ enum { static const match_table_t afs_options_list = { { afs_opt_cell, "cell=%s" }, + { afs_opt_dyn, "dyn" }, { afs_opt_rwpath, "rwpath" }, { afs_opt_vol, "vol=%s" }, { afs_opt_autocell, "autocell" }, @@ -148,6 +150,11 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) const char *suf = ""; char pref = '%'; + if (as->dyn_root) { + seq_puts(m, "none"); + return 0; + } + switch (volume->type) { case AFSVL_RWVOL: break; @@ -171,8 +178,12 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root) */ static int afs_show_options(struct seq_file *m, struct dentry *root) { + struct afs_super_info *as = AFS_FS_S(root->d_sb); + + if (as->dyn_root) + seq_puts(m, ",dyn"); if (test_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(d_inode(root))->flags)) - seq_puts(m, "autocell"); + seq_puts(m, ",autocell"); return 0; } @@ -212,7 +223,7 @@ static int afs_parse_options(struct afs_mount_params *params, break; case afs_opt_rwpath: - params->rwpath = 1; + params->rwpath = true; break; case afs_opt_vol: @@ -220,7 +231,11 @@ static int afs_parse_options(struct afs_mount_params *params, break; case afs_opt_autocell: - params->autocell = 1; + params->autocell = true; + break; + + case afs_opt_dyn: + params->dyn_root = true; break; default: @@ -254,7 +269,7 @@ static int afs_parse_device_name(struct afs_mount_params *params, int cellnamesz; _enter(",%s", name); - + if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; @@ -336,7 +351,14 @@ static int afs_test_super(struct super_block *sb, void *data) struct afs_super_info *as1 = data; struct afs_super_info *as = AFS_FS_S(sb); - return as->net == as1->net && as->volume->vid == as1->volume->vid; + return (as->net == as1->net && + as->volume && + as->volume->vid == as1->volume->vid); +} + +static int afs_dynroot_test_super(struct super_block *sb, void *data) +{ + return false; } static int afs_set_super(struct super_block *sb, void *data) @@ -365,24 +387,30 @@ static int afs_fill_super(struct super_block *sb, sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; - sb->s_xattr = afs_xattr_handlers; + if (!as->dyn_root) + sb->s_xattr = afs_xattr_handlers; ret = super_setup_bdi(sb); if (ret) return ret; sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; - sprintf(sb->s_id, "%u", as->volume->vid); - - afs_activate_volume(as->volume); /* allocate the root inode and dentry */ - fid.vid = as->volume->vid; - fid.vnode = 1; - fid.unique = 1; - inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); + if (as->dyn_root) { + inode = afs_iget_pseudo_dir(sb, true); + sb->s_flags |= SB_RDONLY; + } else { + sprintf(sb->s_id, "%u", as->volume->vid); + afs_activate_volume(as->volume); + fid.vid = as->volume->vid; + fid.vnode = 1; + fid.unique = 1; + inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); + } + if (IS_ERR(inode)) return PTR_ERR(inode); - if (params->autocell) + if (params->autocell || params->dyn_root) set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); ret = -ENOMEM; @@ -407,7 +435,10 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { as->net = afs_get_net(params->net); - as->cell = afs_get_cell(params->cell); + if (params->dyn_root) + as->dyn_root = true; + else + as->cell = afs_get_cell(params->cell); } return as; } @@ -451,18 +482,20 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, goto error; } - ret = afs_parse_device_name(¶ms, dev_name); - if (ret < 0) - goto error; + if (!params.dyn_root) { + ret = afs_parse_device_name(¶ms, dev_name); + if (ret < 0) + goto error; - /* try and do the mount securely */ - key = afs_request_key(params.cell); - if (IS_ERR(key)) { - _leave(" = %ld [key]", PTR_ERR(key)); - ret = PTR_ERR(key); - goto error; + /* try and do the mount securely */ + key = afs_request_key(params.cell); + if (IS_ERR(key)) { + _leave(" = %ld [key]", PTR_ERR(key)); + ret = PTR_ERR(key); + goto error; + } + params.key = key; } - params.key = key; /* allocate a superblock info record */ ret = -ENOMEM; @@ -470,20 +503,25 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, if (!as) goto error_key; - /* Assume we're going to need a volume record; at the very least we can - * use it to update the volume record if we have one already. This - * checks that the volume exists within the cell. - */ - candidate = afs_create_volume(¶ms); - if (IS_ERR(candidate)) { - ret = PTR_ERR(candidate); - goto error_as; - } + if (!params.dyn_root) { + /* Assume we're going to need a volume record; at the very + * least we can use it to update the volume record if we have + * one already. This checks that the volume exists within the + * cell. + */ + candidate = afs_create_volume(¶ms); + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_as; + } - as->volume = candidate; + as->volume = candidate; + } /* allocate a deviceless superblock */ - sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); + sb = sget(fs_type, + as->dyn_root ? afs_dynroot_test_super : afs_test_super, + afs_set_super, flags, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto error_as; @@ -529,9 +567,11 @@ static void afs_kill_super(struct super_block *sb) /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ - afs_clear_callback_interests(as->net, as->volume->servers); + if (as->volume) + afs_clear_callback_interests(as->net, as->volume->servers); kill_anon_super(sb); - afs_deactivate_volume(as->volume); + if (as->volume) + afs_deactivate_volume(as->volume); afs_destroy_sbi(as); } @@ -619,12 +659,24 @@ static void afs_destroy_inode(struct inode *inode) */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { + struct afs_super_info *as = AFS_FS_S(dentry->d_sb); struct afs_fs_cursor fc; struct afs_volume_status vs; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; int ret; + buf->f_type = dentry->d_sb->s_magic; + buf->f_bsize = AFS_BLOCK_SIZE; + buf->f_namelen = AFSNAMEMAX - 1; + + if (as->dyn_root) { + buf->f_blocks = 1; + buf->f_bavail = 0; + buf->f_bfree = 0; + return 0; + } + key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) return PTR_ERR(key); @@ -645,10 +697,6 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) key_put(key); if (ret == 0) { - buf->f_type = dentry->d_sb->s_magic; - buf->f_bsize = AFS_BLOCK_SIZE; - buf->f_namelen = AFSNAMEMAX - 1; - if (vs.max_quota == 0) buf->f_blocks = vs.part_max_blocks; else diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index e372f89fd36a..5d8562f1ad4a 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) struct afs_uvldbentry__xdr *uvldb; struct afs_vldb_entry *entry; bool new_only = false; - u32 tmp; + u32 tmp, nr_servers; int i, ret; _enter(""); @@ -36,6 +36,10 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) uvldb = call->buffer; entry = call->reply[0]; + nr_servers = ntohl(uvldb->nServers); + if (nr_servers > AFS_NMAXNSERVERS) + nr_servers = AFS_NMAXNSERVERS; + for (i = 0; i < ARRAY_SIZE(uvldb->name) - 1; i++) entry->name[i] = (u8)ntohl(uvldb->name[i]); entry->name[i] = 0; @@ -44,14 +48,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) /* If there is a new replication site that we can use, ignore all the * sites that aren't marked as new. */ - for (i = 0; i < AFS_NMAXNSERVERS; i++) { + for (i = 0; i < nr_servers; i++) { tmp = ntohl(uvldb->serverFlags[i]); if (!(tmp & AFS_VLSF_DONTUSE) && (tmp & AFS_VLSF_NEWREPSITE)) new_only = true; } - for (i = 0; i < AFS_NMAXNSERVERS; i++) { + for (i = 0; i < nr_servers; i++) { struct afs_uuid__xdr *xdr; struct afs_uuid *uuid; int j; diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 684c48293353..b517a588781f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -26,9 +26,8 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, unsigned long type_mask) { struct afs_server_list *slist; - struct afs_server *server; struct afs_volume *volume; - int ret = -ENOMEM, nr_servers = 0, i, j; + int ret = -ENOMEM, nr_servers = 0, i; for (i = 0; i < vldb->nr_servers; i++) if (vldb->fs_mask[i] & type_mask) @@ -58,50 +57,10 @@ static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, refcount_set(&slist->usage, 1); volume->servers = slist; - - /* Make sure a records exists for each server this volume occupies. */ - for (i = 0; i < nr_servers; i++) { - if (!(vldb->fs_mask[i] & type_mask)) - continue; - - server = afs_lookup_server(params->cell, params->key, - &vldb->fs_server[i]); - if (IS_ERR(server)) { - ret = PTR_ERR(server); - if (ret == -ENOENT) - continue; - goto error_2; - } - - /* Insertion-sort by server pointer */ - for (j = 0; j < slist->nr_servers; j++) - if (slist->servers[j].server >= server) - break; - if (j < slist->nr_servers) { - if (slist->servers[j].server == server) { - afs_put_server(params->net, server); - continue; - } - - memmove(slist->servers + j + 1, - slist->servers + j, - (slist->nr_servers - j) * sizeof(struct afs_server_entry)); - } - - slist->servers[j].server = server; - slist->nr_servers++; - } - - if (slist->nr_servers == 0) { - ret = -EDESTADDRREQ; - goto error_2; - } - return volume; -error_2: - afs_put_serverlist(params->net, slist); error_1: + afs_put_cell(params->net, volume->cell); kfree(volume); error_0: return ERR_PTR(ret); @@ -327,7 +286,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* See if the volume's server list got updated. */ new = afs_alloc_server_list(volume->cell, key, - vldb, (1 << volume->type)); + vldb, (1 << volume->type)); if (IS_ERR(new)) { ret = PTR_ERR(new); goto error_vldb; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index ee236231cafa..af2832aaeec5 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -444,11 +444,15 @@ unacquire_none: static int __init befs_init_inodecache(void) { - befs_inode_cachep = kmem_cache_create("befs_inode_cache", - sizeof (struct befs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache", + sizeof(struct befs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct befs_inode_info, + i_data.symlink), + sizeof_field(struct befs_inode_info, + i_data.symlink), + init_once); if (befs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 83732fef510d..bdb201230bae 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1599,6 +1599,8 @@ static int fill_files_note(struct memelfnote *note) /* *Estimated* file count and total data size needed */ count = current->mm->map_count; + if (count > UINT_MAX / 64) + return -EINVAL; size = count * 64; names_ofs = (2 + 3 * count) * sizeof(data[0]); diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 2e558227931a..273351ee4c46 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -38,9 +38,6 @@ config BTRFS_FS_POSIX_ACL POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config BTRFS_FS_CHECK_INTEGRITY diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 264e9bf83ff3..52095f473464 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -34,7 +34,4 @@ config CEPH_FS_POSIX_ACL POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index dbf07051aacd..b4336b42ce3b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -299,7 +299,8 @@ unlock: * start an async read(ahead) operation. return nr_pages we submitted * a read for on success, or negative error code. */ -static int start_read(struct inode *inode, struct list_head *page_list, int max) +static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, + struct list_head *page_list, int max) { struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; @@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) int got = 0; int ret = 0; - if (!current->journal_info) { + if (!rw_ctx) { /* caller of readpages does not hold buffer and read caps * (fadvise, madvise and readahead cases) */ int want = CEPH_CAP_FILE_CACHE; @@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, { struct inode *inode = file_inode(file); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_file_info *ci = file->private_data; + struct ceph_rw_context *rw_ctx; int rc = 0; int max = 0; @@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc == 0) goto out; + rw_ctx = ceph_find_rw_context(ci); max = fsc->mount_options->rsize >> PAGE_SHIFT; - dout("readpages %p file %p nr_pages %d max %d\n", - inode, file, nr_pages, max); + dout("readpages %p file %p ctx %p nr_pages %d max %d\n", + inode, file, rw_ctx, nr_pages, max); while (!list_empty(page_list)) { - rc = start_read(inode, page_list, max); + rc = start_read(inode, rw_ctx, page_list, max); if (rc < 0) goto out; } @@ -574,7 +578,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_fs_client *fsc; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); - long writeback_stat; int err, len = PAGE_SIZE; struct ceph_writeback_ctl ceph_wbc; @@ -615,8 +618,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n", inode, page, page->index, page_off, len, snapc, snapc->seq); - writeback_stat = atomic_long_inc_return(&fsc->writeback_count); - if (writeback_stat > + if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); @@ -651,6 +653,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) end_page_writeback(page); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); ceph_put_snap_context(snapc); /* page's reference */ + + if (atomic_long_dec_return(&fsc->writeback_count) < + CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) + clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); + return err; } @@ -1450,9 +1457,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf) if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || ci->i_inline_version == CEPH_INLINE_NONE) { - current->journal_info = vma->vm_file; + CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); + ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); - current->journal_info = NULL; + ceph_del_rw_context(fi, &rw_ctx); } else ret = -EAGAIN; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a14b2c974c9e..6582c4507e6c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -154,13 +154,19 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) spin_unlock(&mdsc->caps_list_lock); } -void ceph_reserve_caps(struct ceph_mds_client *mdsc, +/* + * Called under mdsc->mutex. + */ +int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need) { - int i; + int i, j; struct ceph_cap *cap; int have; int alloc = 0; + int max_caps; + bool trimmed = false; + struct ceph_mds_session *s; LIST_HEAD(newcaps); dout("reserve caps ctx=%p need=%d\n", ctx, need); @@ -179,16 +185,37 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc, spin_unlock(&mdsc->caps_list_lock); for (i = have; i < need; i++) { +retry: cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); - if (!cap) - break; + if (!cap) { + if (!trimmed) { + for (j = 0; j < mdsc->max_sessions; j++) { + s = __ceph_lookup_mds_session(mdsc, j); + if (!s) + continue; + mutex_unlock(&mdsc->mutex); + + mutex_lock(&s->s_mutex); + max_caps = s->s_nr_caps - (need - i); + ceph_trim_caps(mdsc, s, max_caps); + mutex_unlock(&s->s_mutex); + + ceph_put_mds_session(s); + mutex_lock(&mdsc->mutex); + } + trimmed = true; + goto retry; + } else { + pr_warn("reserve caps ctx=%p ENOMEM " + "need=%d got=%d\n", + ctx, need, have + alloc); + goto out_nomem; + } + } list_add(&cap->caps_item, &newcaps); alloc++; } - /* we didn't manage to reserve as much as we needed */ - if (have + alloc != need) - pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", - ctx, need, have + alloc); + BUG_ON(have + alloc != need); spin_lock(&mdsc->caps_list_lock); mdsc->caps_total_count += alloc; @@ -204,6 +231,24 @@ void ceph_reserve_caps(struct ceph_mds_client *mdsc, dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", ctx, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); + return 0; + +out_nomem: + while (!list_empty(&newcaps)) { + cap = list_first_entry(&newcaps, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + + spin_lock(&mdsc->caps_list_lock); + mdsc->caps_avail_count += have; + mdsc->caps_reserve_count -= have; + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + spin_unlock(&mdsc->caps_list_lock); + return -ENOMEM; } int ceph_unreserve_caps(struct ceph_mds_client *mdsc, @@ -498,7 +543,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, */ if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) { if (issued & CEPH_CAP_FILE_SHARED) - ci->i_shared_gen++; + atomic_inc(&ci->i_shared_gen); if (S_ISDIR(ci->vfs_inode.i_mode)) { dout(" marking %p NOT complete\n", &ci->vfs_inode); __ceph_dir_clear_complete(ci); @@ -577,18 +622,30 @@ void ceph_add_cap(struct inode *inode, } } - if (!ci->i_snap_realm) { + if (!ci->i_snap_realm || + ((flags & CEPH_CAP_FLAG_AUTH) && + realmino != (u64)-1 && ci->i_snap_realm->ino != realmino)) { /* * add this inode to the appropriate snap realm */ struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc, realmino); if (realm) { + struct ceph_snap_realm *oldrealm = ci->i_snap_realm; + if (oldrealm) { + spin_lock(&oldrealm->inodes_with_caps_lock); + list_del_init(&ci->i_snap_realm_item); + spin_unlock(&oldrealm->inodes_with_caps_lock); + } + spin_lock(&realm->inodes_with_caps_lock); ci->i_snap_realm = realm; list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); spin_unlock(&realm->inodes_with_caps_lock); + + if (oldrealm) + ceph_put_snap_realm(mdsc, oldrealm); } else { pr_err("ceph_add_cap: couldn't find snap realm %llx\n", realmino); @@ -890,6 +947,11 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check) /* * called under i_ceph_lock */ +static int __ceph_is_single_caps(struct ceph_inode_info *ci) +{ + return rb_first(&ci->i_caps) == rb_last(&ci->i_caps); +} + static int __ceph_is_any_caps(struct ceph_inode_info *ci) { return !RB_EMPTY_ROOT(&ci->i_caps); @@ -1703,21 +1765,24 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, int mds = -1; /* keep track of how far we've gone through i_caps list to avoid an infinite loop on retry */ struct rb_node *p; - int delayed = 0, sent = 0, num; - bool is_delayed = flags & CHECK_CAPS_NODELAY; + int delayed = 0, sent = 0; + bool no_delay = flags & CHECK_CAPS_NODELAY; bool queue_invalidate = false; - bool force_requeue = false; bool tried_invalidate = false; /* if we are unmounting, flush any unused caps immediately. */ if (mdsc->stopping) - is_delayed = true; + no_delay = true; spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_FLUSH) flags |= CHECK_CAPS_FLUSH; + if (!(flags & CHECK_CAPS_AUTHONLY) || + (ci->i_auth_cap && __ceph_is_single_caps(ci))) + __cap_delay_cancel(mdsc, ci); + goto retry_locked; retry: spin_lock(&ci->i_ceph_lock); @@ -1772,7 +1837,7 @@ retry_locked: * have cached pages, but don't want them, then try to invalidate. * If we fail, it's because pages are locked.... try again later. */ - if ((!is_delayed || mdsc->stopping) && + if ((!no_delay || mdsc->stopping) && !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ !(ci->i_wb_ref || ci->i_wrbuffer_ref) && /* no dirty pages... */ inode->i_data.nrpages && /* have cached pages */ @@ -1781,27 +1846,16 @@ retry_locked: !tried_invalidate) { dout("check_caps trying to invalidate on %p\n", inode); if (try_nonblocking_invalidate(inode) < 0) { - if (revoking & (CEPH_CAP_FILE_CACHE| - CEPH_CAP_FILE_LAZYIO)) { - dout("check_caps queuing invalidate\n"); - queue_invalidate = true; - ci->i_rdcache_revoking = ci->i_rdcache_gen; - } else { - dout("check_caps failed to invalidate pages\n"); - /* we failed to invalidate pages. check these - caps again later. */ - force_requeue = true; - __cap_set_timeouts(mdsc, ci); - } + dout("check_caps queuing invalidate\n"); + queue_invalidate = true; + ci->i_rdcache_revoking = ci->i_rdcache_gen; } tried_invalidate = true; goto retry_locked; } - num = 0; for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { cap = rb_entry(p, struct ceph_cap, ci_node); - num++; /* avoid looping forever */ if (mds >= cap->mds || @@ -1864,7 +1918,7 @@ retry_locked: cap->mds_wanted == want) continue; /* nope, all good */ - if (is_delayed) + if (no_delay) goto ack; /* delay? */ @@ -1955,15 +2009,8 @@ ack: goto retry; /* retake i_ceph_lock and restart our cap scan. */ } - /* - * Reschedule delayed caps release if we delayed anything, - * otherwise cancel. - */ - if (delayed && is_delayed) - force_requeue = true; /* __send_cap delayed release; requeue */ - if (!delayed && !is_delayed) - __cap_delay_cancel(mdsc, ci); - else if (!is_delayed || force_requeue) + /* Reschedule delayed caps release if we delayed anything */ + if (delayed) __cap_delay_requeue(mdsc, ci); spin_unlock(&ci->i_ceph_lock); @@ -2160,7 +2207,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) u64 flush_tid; int err = 0; int dirty; - int wait = wbc->sync_mode == WB_SYNC_ALL; + int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync); dout("write_inode %p wait=%d\n", inode, wait); if (wait) { @@ -3426,7 +3473,14 @@ retry: */ issued = cap->issued; - WARN_ON(issued != cap->implemented); + if (issued != cap->implemented) + pr_err_ratelimited("handle_cap_export: issued != implemented: " + "ino (%llx.%llx) mds%d seq %d mseq %d " + "issued %s implemented %s\n", + ceph_vinop(inode), mds, cap->seq, cap->mseq, + ceph_cap_string(issued), + ceph_cap_string(cap->implemented)); + tcap = __get_cap_for_mds(ci, target); if (tcap) { @@ -3572,12 +3626,13 @@ retry: if ((ph->flags & CEPH_CAP_FLAG_AUTH) && (ocap->seq != le32_to_cpu(ph->seq) || ocap->mseq != le32_to_cpu(ph->mseq))) { - pr_err("handle_cap_import: mismatched seq/mseq: " - "ino (%llx.%llx) mds%d seq %d mseq %d " - "importer mds%d has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, ocap->seq, - ocap->mseq, mds, le32_to_cpu(ph->seq), - le32_to_cpu(ph->mseq)); + pr_err_ratelimited("handle_cap_import: " + "mismatched seq/mseq: ino (%llx.%llx) " + "mds%d seq %d mseq %d importer mds%d " + "has peer seq %d mseq %d\n", + ceph_vinop(inode), peer, ocap->seq, + ocap->mseq, mds, le32_to_cpu(ph->seq), + le32_to_cpu(ph->mseq)); } __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } @@ -3939,11 +3994,20 @@ int ceph_encode_inode_release(void **p, struct inode *inode, cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { - if (force || - ((cap->issued & drop) && - (cap->issued & unless) == 0)) { - if ((cap->issued & drop) && - (cap->issued & unless) == 0) { + unless &= cap->issued; + if (unless) { + if (unless & CEPH_CAP_AUTH_EXCL) + drop &= ~CEPH_CAP_AUTH_SHARED; + if (unless & CEPH_CAP_LINK_EXCL) + drop &= ~CEPH_CAP_LINK_SHARED; + if (unless & CEPH_CAP_XATTR_EXCL) + drop &= ~CEPH_CAP_XATTR_SHARED; + if (unless & CEPH_CAP_FILE_EXCL) + drop &= ~CEPH_CAP_FILE_SHARED; + } + + if (force || (cap->issued & drop)) { + if (cap->issued & drop) { int wanted = __ceph_caps_wanted(ci); if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0) wanted |= cap->mds_wanted; @@ -3975,7 +4039,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, *p += sizeof(*rel); ret = 1; } else { - dout("encode_inode_release %p cap %p %s\n", + dout("encode_inode_release %p cap %p %s (noop)\n", inode, cap, ceph_cap_string(cap->issued)); } } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 8a5266699b67..0c4346806e17 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -173,7 +173,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, * the MDS if/when the directory is modified). */ static int __dcache_readdir(struct file *file, struct dir_context *ctx, - u32 shared_gen) + int shared_gen) { struct ceph_file_info *fi = file->private_data; struct dentry *parent = file->f_path.dentry; @@ -184,7 +184,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, u64 idx = 0; int err = 0; - dout("__dcache_readdir %p v%u at %llx\n", dir, shared_gen, ctx->pos); + dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos); /* search start position */ if (ctx->pos > 2) { @@ -231,11 +231,17 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, goto out; } - di = ceph_dentry(dentry); spin_lock(&dentry->d_lock); - if (di->lease_shared_gen == shared_gen && - d_really_is_positive(dentry) && - fpos_cmp(ctx->pos, di->offset) <= 0) { + di = ceph_dentry(dentry); + if (d_unhashed(dentry) || + d_really_is_negative(dentry) || + di->lease_shared_gen != shared_gen) { + spin_unlock(&dentry->d_lock); + dput(dentry); + err = -EAGAIN; + goto out; + } + if (fpos_cmp(ctx->pos, di->offset) <= 0) { emit_dentry = true; } spin_unlock(&dentry->d_lock); @@ -333,7 +339,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ceph_snap(inode) != CEPH_SNAPDIR && __ceph_dir_is_complete_ordered(ci) && __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { - u32 shared_gen = ci->i_shared_gen; + int shared_gen = atomic_read(&ci->i_shared_gen); spin_unlock(&ci->i_ceph_lock); err = __dcache_readdir(file, ctx, shared_gen); if (err != -EAGAIN) @@ -381,6 +387,7 @@ more: if (op == CEPH_MDS_OP_READDIR) { req->r_direct_hash = ceph_frag_value(frag); __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); + req->r_inode_drop = CEPH_CAP_FILE_EXCL; } if (fi->last_name) { req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); @@ -750,7 +757,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, spin_unlock(&ci->i_ceph_lock); dout(" dir %p complete, -ENOENT\n", dir); d_add(dentry, NULL); - di->lease_shared_gen = ci->i_shared_gen; + di->lease_shared_gen = atomic_read(&ci->i_shared_gen); return NULL; } spin_unlock(&ci->i_ceph_lock); @@ -835,7 +842,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (acls.pagelist) { req->r_pagelist = acls.pagelist; @@ -887,7 +894,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry = dget(dentry); req->r_num_caps = 2; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) @@ -936,7 +943,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) req->r_parent = dir; set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mkdir.mode = cpu_to_le32(mode); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (acls.pagelist) { req->r_pagelist = acls.pagelist; @@ -983,7 +990,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_SHARED on source inode (mds will lock it) */ - req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; + req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; err = ceph_mdsc_do_request(mdsc, dir, req); if (err) { d_drop(dentry); @@ -1096,7 +1103,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_RDCACHE on source inode (mds will lock it) */ - req->r_old_inode_drop = CEPH_CAP_LINK_SHARED; + req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; if (d_really_is_positive(new_dentry)) req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry)); err = ceph_mdsc_do_request(mdsc, old_dir, req); @@ -1106,16 +1113,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * do_request, above). If there is no trace, we need * to do it here. */ - - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_complete(old_dir); - ceph_dir_clear_complete(new_dir); - d_move(old_dentry, new_dentry); - - /* ensure target dentry is invalidated, despite - rehashing bug in vfs_rename_dir */ - ceph_invalidate_dentry_lease(new_dentry); } ceph_mdsc_put_request(req); return err; @@ -1199,12 +1197,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) int valid = 0; spin_lock(&ci->i_ceph_lock); - if (ci->i_shared_gen == di->lease_shared_gen) + if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen) valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); spin_unlock(&ci->i_ceph_lock); dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", - dir, (unsigned)ci->i_shared_gen, dentry, - (unsigned)di->lease_shared_gen, valid); + dir, (unsigned)atomic_read(&ci->i_shared_gen), + dentry, (unsigned)di->lease_shared_gen, valid); return valid; } @@ -1332,24 +1330,37 @@ static void ceph_d_release(struct dentry *dentry) */ static void ceph_d_prune(struct dentry *dentry) { - dout("ceph_d_prune %p\n", dentry); + struct ceph_inode_info *dir_ci; + struct ceph_dentry_info *di; + + dout("ceph_d_prune %pd %p\n", dentry, dentry); /* do we have a valid parent? */ if (IS_ROOT(dentry)) return; - /* if we are not hashed, we don't affect dir's completeness */ - if (d_unhashed(dentry)) + /* we hold d_lock, so d_parent is stable */ + dir_ci = ceph_inode(d_inode(dentry->d_parent)); + if (dir_ci->i_vino.snap == CEPH_SNAPDIR) return; - if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR) + /* who calls d_delete() should also disable dcache readdir */ + if (d_really_is_negative(dentry)) return; - /* - * we hold d_lock, so d_parent is stable, and d_fsdata is never - * cleared until d_release - */ - ceph_dir_clear_complete(d_inode(dentry->d_parent)); + /* d_fsdata does not get cleared until d_release */ + if (!d_unhashed(dentry)) { + __ceph_dir_clear_complete(dir_ci); + return; + } + + /* Disable dcache readdir just in case that someone called d_drop() + * or d_invalidate(), but MDS didn't revoke CEPH_CAP_FILE_SHARED + * properly (dcache readdir is still enabled) */ + di = ceph_dentry(dentry); + if (di->offset > 0 && + di->lease_shared_gen == atomic_read(&dir_ci->i_shared_gen)) + __ceph_dir_clear_ordered(dir_ci); } /* diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5c17125f45c7..6639926eed4e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) return -ENOMEM; } cf->fmode = fmode; + + spin_lock_init(&cf->rw_contexts_lock); + INIT_LIST_HEAD(&cf->rw_contexts); + cf->next_offset = 2; cf->readdir_cache_idx = -1; file->private_data = cf; @@ -396,7 +400,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (acls.pagelist) { req->r_pagelist = acls.pagelist; @@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file) ceph_mdsc_put_request(cf->last_readdir); kfree(cf->last_name); kfree(cf->dir_info); + WARN_ON(!list_empty(&cf->rw_contexts)); kmem_cache_free(ceph_file_cachep, cf); /* wake up anyone waiting for caps on this inode */ @@ -1199,12 +1204,13 @@ again: retry_op = READ_INLINE; } } else { + CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); - current->journal_info = filp; + ceph_add_rw_context(fi, &rw_ctx); ret = generic_file_read_iter(iocb, to); - current->journal_info = NULL; + ceph_del_rw_context(fi, &rw_ctx); } dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ab81652198c4..c6ec5aa46100 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -494,7 +494,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_wrbuffer_ref = 0; ci->i_wrbuffer_ref_head = 0; atomic_set(&ci->i_filelock_ref, 0); - ci->i_shared_gen = 0; + atomic_set(&ci->i_shared_gen, 0); ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; @@ -1041,7 +1041,7 @@ static void update_dentry_lease(struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) goto out_unlock; - di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; + di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); if (duration == 0) goto out_unlock; @@ -1080,6 +1080,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) BUG_ON(d_inode(dn)); + if (S_ISDIR(in->i_mode)) { + /* If inode is directory, d_splice_alias() below will remove + * 'realdn' from its origin parent. We need to ensure that + * origin parent's readdir cache will not reference 'realdn' + */ + realdn = d_find_any_alias(in); + if (realdn) { + struct ceph_dentry_info *di = ceph_dentry(realdn); + spin_lock(&realdn->d_lock); + + realdn->d_op->d_prune(realdn); + + di->time = jiffies; + di->lease_shared_gen = 0; + di->offset = 0; + + spin_unlock(&realdn->d_lock); + dput(realdn); + } + } + /* dn must be unhashed */ if (!d_unhashed(dn)) d_drop(dn); @@ -1295,8 +1316,8 @@ retry_lookup: if (!rinfo->head->is_target) { dout("fill_trace null dentry\n"); if (d_really_is_positive(dn)) { - ceph_dir_clear_ordered(dir); dout("d_delete %p\n", dn); + ceph_dir_clear_ordered(dir); d_delete(dn); } else if (have_lease) { if (d_unhashed(dn)) @@ -1323,7 +1344,6 @@ retry_lookup: dout(" %p links to %p %llx.%llx, not %llx.%llx\n", dn, d_inode(dn), ceph_vinop(d_inode(dn)), ceph_vinop(in)); - ceph_dir_clear_ordered(dir); d_invalidate(dn); have_lease = false; } @@ -1573,9 +1593,19 @@ retry_lookup: } else if (d_really_is_positive(dn) && (ceph_ino(d_inode(dn)) != tvino.ino || ceph_snap(d_inode(dn)) != tvino.snap)) { + struct ceph_dentry_info *di = ceph_dentry(dn); dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); - __ceph_dir_clear_ordered(ci); + + spin_lock(&dn->d_lock); + if (di->offset > 0 && + di->lease_shared_gen == + atomic_read(&ci->i_shared_gen)) { + __ceph_dir_clear_ordered(ci); + di->offset = 0; + } + spin_unlock(&dn->d_lock); + d_delete(dn); dput(dn); goto retry_lookup; @@ -1600,9 +1630,7 @@ retry_lookup: &req->r_caps_reservation); if (ret < 0) { pr_err("fill_inode badness on %p\n", in); - if (d_really_is_positive(dn)) - __ceph_dir_clear_ordered(ci); - else + if (d_really_is_negative(dn)) iput(in); d_drop(dn); err = ret; @@ -2000,8 +2028,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) ceph_encode_timespec(&req->r_args.setattr.atime, &attr->ia_atime); mask |= CEPH_SETATTR_ATIME; - release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; + release |= CEPH_CAP_FILE_SHARED | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } } if (ia_valid & ATTR_MTIME) { @@ -2022,8 +2050,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) ceph_encode_timespec(&req->r_args.setattr.mtime, &attr->ia_mtime); mask |= CEPH_SETATTR_MTIME; - release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; + release |= CEPH_CAP_FILE_SHARED | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } } if (ia_valid & ATTR_SIZE) { @@ -2041,8 +2069,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_args.setattr.old_size = cpu_to_le64(inode->i_size); mask |= CEPH_SETATTR_SIZE; - release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | - CEPH_CAP_FILE_WR; + release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1b468250e947..2e8f90f96540 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -604,10 +604,20 @@ static void __register_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, struct inode *dir) { + int ret = 0; + req->r_tid = ++mdsc->last_tid; - if (req->r_num_caps) - ceph_reserve_caps(mdsc, &req->r_caps_reservation, - req->r_num_caps); + if (req->r_num_caps) { + ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation, + req->r_num_caps); + if (ret < 0) { + pr_err("__register_request %p " + "failed to reserve caps: %d\n", req, ret); + /* set req->r_err to fail early from __do_request */ + req->r_err = ret; + return; + } + } dout("__register_request %p tid %lld\n", req, req->r_tid); ceph_mdsc_get_request(req); insert_request(&mdsc->request_tree, req); @@ -1545,9 +1555,9 @@ out: /* * Trim session cap count down to some max number. */ -static int trim_caps(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session, - int max_caps) +int ceph_trim_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int max_caps) { int trim_caps = session->s_nr_caps - max_caps; @@ -2438,11 +2448,14 @@ out: */ void ceph_invalidate_dir_request(struct ceph_mds_request *req) { - struct inode *inode = req->r_parent; + struct inode *dir = req->r_parent; + struct inode *old_dir = req->r_old_dentry_dir; - dout("invalidate_dir_request %p (complete, lease(s))\n", inode); + dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir); - ceph_dir_clear_complete(inode); + ceph_dir_clear_complete(dir); + if (old_dir) + ceph_dir_clear_complete(old_dir); if (req->r_dentry) ceph_invalidate_dentry_lease(req->r_dentry); if (req->r_old_dentry) @@ -2773,7 +2786,7 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_RECALL_STATE: - trim_caps(mdsc, session, le32_to_cpu(h->max_caps)); + ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps)); break; case CEPH_SESSION_FLUSHMSG: diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 837ac4b087a0..71e3b783ee6f 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -444,4 +444,7 @@ ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target); extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); +extern int ceph_trim_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int max_caps); #endif diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 8a2ca41e4b97..07cf95e6413d 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -922,13 +922,17 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, /* * Move the inode to the new realm */ - spin_lock(&realm->inodes_with_caps_lock); + oldrealm = ci->i_snap_realm; + spin_lock(&oldrealm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + spin_unlock(&oldrealm->inodes_with_caps_lock); + + spin_lock(&realm->inodes_with_caps_lock); list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); - oldrealm = ci->i_snap_realm; ci->i_snap_realm = realm; spin_unlock(&realm->inodes_with_caps_lock); + spin_unlock(&ci->i_ceph_lock); ceph_get_snap_realm(mdsc, realm); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2beeec07fa76..21b2e5b004eb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -256,7 +256,8 @@ struct ceph_inode_xattr { */ struct ceph_dentry_info { struct ceph_mds_session *lease_session; - u32 lease_gen, lease_shared_gen; + int lease_shared_gen; + u32 lease_gen; u32 lease_seq; unsigned long lease_renew_after, lease_renew_from; struct list_head lru; @@ -353,7 +354,7 @@ struct ceph_inode_info { int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; int i_wrbuffer_ref, i_wrbuffer_ref_head; atomic_t i_filelock_ref; - u32 i_shared_gen; /* increment each time we get FILE_SHARED */ + atomic_t i_shared_gen; /* increment each time we get FILE_SHARED */ u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ @@ -648,7 +649,7 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check); extern void ceph_caps_init(struct ceph_mds_client *mdsc); extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); -extern void ceph_reserve_caps(struct ceph_mds_client *mdsc, +extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need); extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx); @@ -668,6 +669,9 @@ struct ceph_file_info { short fmode; /* initialized on open */ short flags; /* CEPH_F_* */ + spinlock_t rw_contexts_lock; + struct list_head rw_contexts; + /* readdir: position within the dir */ u32 frag; struct ceph_mds_request *last_readdir; @@ -684,6 +688,49 @@ struct ceph_file_info { int dir_info_len; }; +struct ceph_rw_context { + struct list_head list; + struct task_struct *thread; + int caps; +}; + +#define CEPH_DEFINE_RW_CONTEXT(_name, _caps) \ + struct ceph_rw_context _name = { \ + .thread = current, \ + .caps = _caps, \ + } + +static inline void ceph_add_rw_context(struct ceph_file_info *cf, + struct ceph_rw_context *ctx) +{ + spin_lock(&cf->rw_contexts_lock); + list_add(&ctx->list, &cf->rw_contexts); + spin_unlock(&cf->rw_contexts_lock); +} + +static inline void ceph_del_rw_context(struct ceph_file_info *cf, + struct ceph_rw_context *ctx) +{ + spin_lock(&cf->rw_contexts_lock); + list_del(&ctx->list); + spin_unlock(&cf->rw_contexts_lock); +} + +static inline struct ceph_rw_context* +ceph_find_rw_context(struct ceph_file_info *cf) +{ + struct ceph_rw_context *ctx, *found = NULL; + spin_lock(&cf->rw_contexts_lock); + list_for_each_entry(ctx, &cf->rw_contexts, list) { + if (ctx->thread == current) { + found = ctx; + break; + } + } + spin_unlock(&cf->rw_contexts_lock); + return found; +} + struct ceph_readdir_cache_control { struct page *page; struct dentry **dentries; diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index c71971c01c63..687da62daf4e 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -108,14 +108,13 @@ config CIFS_XATTR depends on CIFS help Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). CIFS maps the name of - extended attributes beginning with the user namespace prefix - to SMB/CIFS EAs. EAs are stored on Windows servers without the - user namespace prefix, but their names are seen by Linux cifs clients - prefaced by the user namespace prefix. The system namespace - (used by some filesystems to store ACLs) is not supported at - this time. + the kernel or by users (see the attr(5) manual page for details). + CIFS maps the name of extended attributes beginning with the user + namespace prefix to SMB/CIFS EAs. EAs are stored on Windows + servers without the user namespace prefix, but their names are + seen by Linux cifs clients prefaced by the user namespace prefix. + The system namespace (used by some filesystems to store ACLs) is + not supported at this time. If unsure, say Y. diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index c7a863219fa3..e35e711db68e 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -128,6 +128,10 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon) seq_puts(m, " type: CDROM "); else seq_printf(m, " type: %d ", dev_type); + if (tcon->seal) + seq_printf(m, " Encrypted"); + if (tcon->unix_ext) + seq_printf(m, " POSIX Extensions"); if (tcon->ses->server->ops->dump_share_caps) tcon->ses->server->ops->dump_share_caps(m, tcon); @@ -246,7 +250,10 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) atomic_read(&server->smbd_conn->mr_used_count)); skip_rdma: #endif - seq_printf(m, "\nNumber of credits: %d", server->credits); + seq_printf(m, "\nNumber of credits: %d Dialect 0x%x", + server->credits, server->dialect); + if (server->sign) + seq_printf(m, " signed"); i++; list_for_each(tmp2, &server->smb_ses_list) { ses = list_entry(tmp2, struct cifs_ses, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a7be591d8e18..32cdea67bbfd 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1239,9 +1239,11 @@ cifs_init_request_bufs(void) cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n", CIFSMaxBufSize, CIFSMaxBufSize); */ - cifs_req_cachep = kmem_cache_create("cifs_request", + cifs_req_cachep = kmem_cache_create_usercopy("cifs_request", CIFSMaxBufSize + max_hdr_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN, 0, + CIFSMaxBufSize + max_hdr_size, + NULL); if (cifs_req_cachep == NULL) return -ENOMEM; @@ -1267,9 +1269,9 @@ cifs_init_request_bufs(void) more SMBs to use small buffer alloc and is still much more efficient to alloc 1 per page off the slab compared to 17K (5page) alloc of large cifs buffers even when page debugging is on */ - cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", + cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq", MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, - NULL); + 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL); if (cifs_sm_req_cachep == NULL) { mempool_destroy(cifs_req_poolp); kmem_cache_destroy(cifs_req_cachep); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4e0922d24eb2..9ceebf30eb22 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -6343,9 +6343,7 @@ SetEARetry: pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_EA); - parm_data = - (struct fealist *) (((char *) &pSMB->hdr.Protocol) + - offset); + parm_data = (void *)pSMB + offsetof(struct smb_hdr, Protocol) + offset; pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); pSMB->SetupCount = 1; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6eb9f9691ed4..2a2b34ccaf49 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -192,6 +192,35 @@ struct smb2_symlink_err_rsp { __u8 PathBuffer[0]; } __packed; +/* SMB 3.1.1 and later dialects. See MS-SMB2 section 2.2.2.1 */ +struct smb2_error_context_rsp { + __le32 ErrorDataLength; + __le32 ErrorId; + __u8 ErrorContextData; /* ErrorDataLength long array */ +} __packed; + +/* Defines for Type field below (see MS-SMB2 2.2.2.2.2.1) */ +#define MOVE_DST_IPADDR_V4 cpu_to_le32(0x00000001) +#define MOVE_DST_IPADDR_V6 cpu_to_le32(0x00000002) + +struct move_dst_ipaddr { + __le32 Type; + __u32 Reserved; + __u8 address[16]; /* IPv4 followed by 12 bytes rsvd or IPv6 address */ +} __packed; + +struct share_redirect_error_context_rsp { + __le32 StructureSize; + __le32 NotificationType; + __le32 ResourceNameOffset; + __le32 ResourceNameLength; + __le16 Flags; + __le16 TargetType; + __le32 IPAddrCount; + struct move_dst_ipaddr IpAddrMoveList[0]; + /* __u8 ResourceName[] */ /* Name of share as counted Unicode string */ +} __packed; + #define SMB2_CLIENT_GUID_SIZE 16 struct smb2_negotiate_req { @@ -320,7 +349,9 @@ struct smb2_logoff_rsp { } __packed; /* Flags/Reserved for SMB3.1.1 */ -#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001 +#define SMB2_TREE_CONNECT_FLAG_CLUSTER_RECONNECT cpu_to_le16(0x0001) +#define SMB2_TREE_CONNECT_FLAG_REDIRECT_TO_OWNER cpu_to_le16(0x0002) +#define SMB2_TREE_CONNECT_FLAG_EXTENSION_PRESENT cpu_to_le16(0x0004) struct smb2_tree_connect_req { struct smb2_sync_hdr sync_hdr; @@ -331,6 +362,82 @@ struct smb2_tree_connect_req { __u8 Buffer[1]; /* variable length */ } __packed; +/* See MS-SMB2 section 2.2.9.2 */ +/* Context Types */ +#define SMB2_RESERVED_TREE_CONNECT_CONTEXT_ID 0x0000 +#define SMB2_REMOTED_IDENTITY_TREE_CONNECT_CONTEXT_ID cpu_to_le16(0x0001) + +struct tree_connect_contexts { + __le16 ContextType; + __le16 DataLength; + __le32 Reserved; + __u8 Data[0]; +} __packed; + +/* Remoted identity tree connect context structures - see MS-SMB2 2.2.9.2.1 */ +struct smb3_blob_data { + __le16 BlobSize; + __u8 BlobData[0]; +} __packed; + +/* Valid values for Attr */ +#define SE_GROUP_MANDATORY 0x00000001 +#define SE_GROUP_ENABLED_BY_DEFAULT 0x00000002 +#define SE_GROUP_ENABLED 0x00000004 +#define SE_GROUP_OWNER 0x00000008 +#define SE_GROUP_USE_FOR_DENY_ONLY 0x00000010 +#define SE_GROUP_INTEGRITY 0x00000020 +#define SE_GROUP_INTEGRITY_ENABLED 0x00000040 +#define SE_GROUP_RESOURCE 0x20000000 +#define SE_GROUP_LOGON_ID 0xC0000000 + +/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */ + +struct sid_array_data { + __le16 SidAttrCount; + /* SidAttrList - array of sid_attr_data structs */ +} __packed; + +struct luid_attr_data { + +} __packed; + +/* + * struct privilege_data is the same as BLOB_DATA - see MS-SMB2 2.2.9.2.1.5 + * but with size of LUID_ATTR_DATA struct and BlobData set to LUID_ATTR DATA + */ + +struct privilege_array_data { + __le16 PrivilegeCount; + /* array of privilege_data structs */ +} __packed; + +struct remoted_identity_tcon_context { + __le16 TicketType; /* must be 0x0001 */ + __le16 TicketSize; /* total size of this struct */ + __le16 User; /* offset to SID_ATTR_DATA struct with user info */ + __le16 UserName; /* offset to null terminated Unicode username string */ + __le16 Domain; /* offset to null terminated Unicode domain name */ + __le16 Groups; /* offset to SID_ARRAY_DATA struct with group info */ + __le16 RestrictedGroups; /* similar to above */ + __le16 Privileges; /* offset to PRIVILEGE_ARRAY_DATA struct */ + __le16 PrimaryGroup; /* offset to SID_ARRAY_DATA struct */ + __le16 Owner; /* offset to BLOB_DATA struct */ + __le16 DefaultDacl; /* offset to BLOB_DATA struct */ + __le16 DeviceGroups; /* offset to SID_ARRAY_DATA struct */ + __le16 UserClaims; /* offset to BLOB_DATA struct */ + __le16 DeviceClaims; /* offset to BLOB_DATA struct */ + __u8 TicketInfo[0]; /* variable length buf - remoted identity data */ +} __packed; + +struct smb2_tree_connect_req_extension { + __le32 TreeConnectContextOffset; + __le16 TreeConnectContextCount; + __u8 Reserved[10]; + __u8 PathName[0]; /* variable sized array */ + /* followed by array of TreeConnectContexts */ +} __packed; + struct smb2_tree_connect_rsp { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 16 */ @@ -365,7 +472,8 @@ struct smb2_tree_connect_rsp { #define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000 #define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000 #define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000 -#define SHI1005_FLAGS_ALL 0x0000FF33 +#define SMB2_SHAREFLAG_IDENTITY_REMOTING 0x00040000 /* 3.1.1 */ +#define SHI1005_FLAGS_ALL 0x0004FF33 /* Possible share capabilities */ #define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */ @@ -373,6 +481,7 @@ struct smb2_tree_connect_rsp { #define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */ #define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */ #define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ +#define SMB2_SHARE_CAP_REDIRECT_TO_OWNER cpu_to_le32(0x00000100) /* 3.1.1 */ struct smb2_tree_disconnect_req { struct smb2_sync_hdr sync_hdr; @@ -556,6 +665,7 @@ struct create_context { #define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04) #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02) +#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET cpu_to_le32(0x00000004) #define SMB2_LEASE_KEY_SIZE 16 diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 5130492847eb..91710eb571fb 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -217,9 +217,10 @@ static void smbd_destroy_rdma_work(struct work_struct *work) spin_unlock_irqrestore( &info->reassembly_queue_lock, flags); put_receive_buffer(info, response); - } + } else + spin_unlock_irqrestore(&info->reassembly_queue_lock, flags); } while (response); - spin_unlock_irqrestore(&info->reassembly_queue_lock, flags); + info->reassembly_data_length = 0; log_rdma_event(INFO, "free receive buffers\n"); @@ -1934,15 +1935,16 @@ again: * No need to lock if we are not at the * end of the queue */ - if (!queue_length) + if (queue_length) + list_del(&response->list); + else { spin_lock_irq( &info->reassembly_queue_lock); - list_del(&response->list); - queue_removed++; - if (!queue_length) + list_del(&response->list); spin_unlock_irq( &info->reassembly_queue_lock); - + } + queue_removed++; info->count_reassembly_queue--; info->count_dequeue_reassembly_queue++; put_receive_buffer(info, response); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 5fc5dc660600..ef80085ed564 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1218,23 +1218,11 @@ COMPATIBLE_IOCTL(DMX_SET_PES_FILTER) COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE) COMPATIBLE_IOCTL(DMX_GET_PES_PIDS) COMPATIBLE_IOCTL(DMX_GET_STC) -COMPATIBLE_IOCTL(FE_GET_INFO) -COMPATIBLE_IOCTL(FE_DISEQC_RESET_OVERLOAD) -COMPATIBLE_IOCTL(FE_DISEQC_SEND_MASTER_CMD) -COMPATIBLE_IOCTL(FE_DISEQC_RECV_SLAVE_REPLY) -COMPATIBLE_IOCTL(FE_DISEQC_SEND_BURST) -COMPATIBLE_IOCTL(FE_SET_TONE) -COMPATIBLE_IOCTL(FE_SET_VOLTAGE) -COMPATIBLE_IOCTL(FE_ENABLE_HIGH_LNB_VOLTAGE) -COMPATIBLE_IOCTL(FE_READ_STATUS) -COMPATIBLE_IOCTL(FE_READ_BER) -COMPATIBLE_IOCTL(FE_READ_SIGNAL_STRENGTH) -COMPATIBLE_IOCTL(FE_READ_SNR) -COMPATIBLE_IOCTL(FE_READ_UNCORRECTED_BLOCKS) -COMPATIBLE_IOCTL(FE_SET_FRONTEND) -COMPATIBLE_IOCTL(FE_GET_FRONTEND) -COMPATIBLE_IOCTL(FE_GET_EVENT) -COMPATIBLE_IOCTL(FE_DISHNETWORK_SEND_LEGACY_CMD) +COMPATIBLE_IOCTL(DMX_REQBUFS) +COMPATIBLE_IOCTL(DMX_QUERYBUF) +COMPATIBLE_IOCTL(DMX_EXPBUF) +COMPATIBLE_IOCTL(DMX_QBUF) +COMPATIBLE_IOCTL(DMX_DQBUF) COMPATIBLE_IOCTL(VIDEO_STOP) COMPATIBLE_IOCTL(VIDEO_PLAY) COMPATIBLE_IOCTL(VIDEO_FREEZE) diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index 58e2fe40b2a0..5933f995309a 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -33,8 +33,7 @@ config CRAMFS_BLOCKDEV config CRAMFS_MTD bool "Support CramFs image directly mapped in physical memory" - depends on CRAMFS && MTD - depends on CRAMFS=m || MTD=y + depends on CRAMFS && CRAMFS <= MTD default y if !CRAMFS_BLOCKDEV help This option allows the CramFs driver to load data directly from diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 732a786cce9d..ce654526c0fb 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -27,6 +27,7 @@ #include <linux/dcache.h> #include <linux/namei.h> #include <crypto/aes.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 305541bcd108..e33f3d3c5ade 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -13,42 +13,46 @@ #include <linux/scatterlist.h> #include <linux/ratelimit.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + /** * fname_encrypt() - encrypt a filename * - * The caller must have allocated sufficient memory for the @oname string. + * The output buffer must be at least as large as the input buffer. + * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -static int fname_encrypt(struct inode *inode, - const struct qstr *iname, struct fscrypt_str *oname) +int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; struct scatterlist sg; - int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - unsigned int lim; - unsigned int cryptlen; - - lim = inode->i_sb->s_cop->max_namelen(inode); - if (iname->len <= 0 || iname->len > lim) - return -EIO; /* * Copy the filename to the output buffer for encrypting in-place and * pad it with the needed number of NUL bytes. */ - cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); - cryptlen = round_up(cryptlen, padding); - cryptlen = min(cryptlen, lim); - memcpy(oname->name, iname->name, iname->len); - memset(oname->name + iname->len, 0, cryptlen - iname->len); + if (WARN_ON(olen < iname->len)) + return -ENOBUFS; + memcpy(out, iname->name, iname->len); + memset(out + iname->len, 0, olen - iname->len); /* Initialize the IV */ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); @@ -63,8 +67,8 @@ static int fname_encrypt(struct inode *inode, skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); - sg_init_one(&sg, oname->name, cryptlen); - skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); + sg_init_one(&sg, out, olen); + skcipher_request_set_crypt(req, &sg, &sg, olen, iv); /* Do the encryption */ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); @@ -75,7 +79,6 @@ static int fname_encrypt(struct inode *inode, return res; } - oname->len = cryptlen; return 0; } @@ -188,50 +191,52 @@ static int digest_decode(const char *src, int len, char *dst) return cp - dst; } -u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen) +bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, + u32 max_len, u32 *encrypted_len_ret) { - int padding = 32; - struct fscrypt_info *ci = inode->i_crypt_info; - - if (ci) - padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE); - return round_up(ilen, padding); + int padding = 4 << (inode->i_crypt_info->ci_flags & + FS_POLICY_FLAGS_PAD_MASK); + u32 encrypted_len; + + if (orig_len > max_len) + return false; + encrypted_len = max(orig_len, (u32)FS_CRYPTO_BLOCK_SIZE); + encrypted_len = round_up(encrypted_len, padding); + *encrypted_len_ret = min(encrypted_len, max_len); + return true; } -EXPORT_SYMBOL(fscrypt_fname_encrypted_size); /** - * fscrypt_fname_crypto_alloc_obuff() - + * fscrypt_fname_alloc_buffer - allocate a buffer for presented filenames + * + * Allocate a buffer that is large enough to hold any decrypted or encoded + * filename (null-terminated), for the given maximum encrypted filename length. * - * Allocates an output buffer that is sufficient for the crypto operation - * specified by the context and the direction. + * Return: 0 on success, -errno on failure */ int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, struct fscrypt_str *crypto_str) + u32 max_encrypted_len, + struct fscrypt_str *crypto_str) { - u32 olen = fscrypt_fname_encrypted_size(inode, ilen); const u32 max_encoded_len = max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); + u32 max_presented_len; - crypto_str->len = olen; - olen = max(olen, max_encoded_len); + max_presented_len = max(max_encoded_len, max_encrypted_len); - /* - * Allocated buffer can hold one more character to null-terminate the - * string - */ - crypto_str->name = kmalloc(olen + 1, GFP_NOFS); - if (!(crypto_str->name)) + crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS); + if (!crypto_str->name) return -ENOMEM; + crypto_str->len = max_presented_len; return 0; } EXPORT_SYMBOL(fscrypt_fname_alloc_buffer); /** - * fscrypt_fname_crypto_free_buffer() - + * fscrypt_fname_free_buffer - free the buffer for presented filenames * - * Frees the buffer allocated for crypto operation. + * Free the buffer allocated by fscrypt_fname_alloc_buffer(). */ void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { @@ -298,35 +303,6 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); /** - * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk - * space - * - * The caller must have allocated sufficient memory for the @oname string. - * - * Return: 0 on success, -errno on failure - */ -int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - if (fscrypt_is_dot_dotdot(iname)) { - oname->name[0] = '.'; - oname->name[iname->len - 1] = '.'; - oname->len = iname->len; - return 0; - } - if (inode->i_crypt_info) - return fname_encrypt(inode, iname, oname); - /* - * Without a proper key, a user is not allowed to modify the filenames - * in a directory. Consequently, a user space name cannot be mapped to - * a disk-space name - */ - return -ENOKEY; -} -EXPORT_SYMBOL(fscrypt_fname_usr_to_disk); - -/** * fscrypt_setup_filename() - prepare to search a possibly encrypted directory * @dir: the directory that will be searched * @iname: the user-provided filename being searched for @@ -369,11 +345,17 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return ret; if (dir->i_crypt_info) { - ret = fscrypt_fname_alloc_buffer(dir, iname->len, - &fname->crypto_buf); - if (ret) - return ret; - ret = fname_encrypt(dir, iname, &fname->crypto_buf); + if (!fscrypt_fname_encrypted_size(dir, iname->len, + dir->i_sb->s_cop->max_namelen(dir), + &fname->crypto_buf.len)) + return -ENAMETOOLONG; + fname->crypto_buf.name = kmalloc(fname->crypto_buf.len, + GFP_NOFS); + if (!fname->crypto_buf.name) + return -ENOMEM; + + ret = fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; @@ -425,7 +407,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return 0; errout: - fscrypt_fname_free_buffer(&fname->crypto_buf); + kfree(fname->crypto_buf.name); return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index c0b4f5597e1a..ad6722bae8b7 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -50,6 +50,15 @@ struct fscrypt_context { #define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + /* * A pointer to this structure is stored in the file system's in-core * representation of an inode. @@ -71,7 +80,22 @@ typedef enum { #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 #define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; + + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; +} + /* crypto.c */ +extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); extern struct workqueue_struct *fscrypt_read_workqueue; extern int fscrypt_do_page_crypto(const struct inode *inode, @@ -83,6 +107,13 @@ extern int fscrypt_do_page_crypto(const struct inode *inode, extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags); +/* fname.c */ +extern int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen); +extern bool fscrypt_fname_encrypted_size(const struct inode *inode, + u32 orig_len, u32 max_len, + u32 *encrypted_len_ret); + /* keyinfo.c */ extern void __exit fscrypt_essiv_cleanup(void); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 9f5fb2eb9cf7..bec06490fb13 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -110,3 +110,161 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); + +int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + int err; + + /* + * To calculate the size of the encrypted symlink target we need to know + * the amount of NUL padding, which is determined by the flags set in + * the encryption policy which will be inherited from the directory. + * The easiest way to get access to this is to just load the directory's + * fscrypt_info, since we'll need it to create the dir_entry anyway. + * + * Note: in test_dummy_encryption mode, @dir may be unencrypted. + */ + err = fscrypt_get_encryption_info(dir); + if (err) + return err; + if (!fscrypt_has_encryption_key(dir)) + return -ENOKEY; + + /* + * Calculate the size of the encrypted symlink and verify it won't + * exceed max_len. Note that for historical reasons, encrypted symlink + * targets are prefixed with the ciphertext length, despite this + * actually being redundant with i_size. This decreases by 2 bytes the + * longest symlink target we can accept. + * + * We could recover 1 byte by not counting a null terminator, but + * counting it (even though it is meaningless for ciphertext) is simpler + * for now since filesystems will assume it is there and subtract it. + */ + if (!fscrypt_fname_encrypted_size(dir, len, + max_len - sizeof(struct fscrypt_symlink_data), + &disk_link->len)) + return -ENAMETOOLONG; + disk_link->len += sizeof(struct fscrypt_symlink_data); + + disk_link->name = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_symlink); + +int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, struct fscrypt_str *disk_link) +{ + int err; + struct qstr iname = QSTR_INIT(target, len); + struct fscrypt_symlink_data *sd; + unsigned int ciphertext_len; + + err = fscrypt_require_key(inode); + if (err) + return err; + + if (disk_link->name) { + /* filesystem-provided buffer */ + sd = (struct fscrypt_symlink_data *)disk_link->name; + } else { + sd = kmalloc(disk_link->len, GFP_NOFS); + if (!sd) + return -ENOMEM; + } + ciphertext_len = disk_link->len - sizeof(*sd); + sd->len = cpu_to_le16(ciphertext_len); + + err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); + if (err) { + if (!disk_link->name) + kfree(sd); + return err; + } + /* + * Null-terminating the ciphertext doesn't make sense, but we still + * count the null terminator in the length, so we might as well + * initialize it just in case the filesystem writes it out. + */ + sd->encrypted_path[ciphertext_len] = '\0'; + + if (!disk_link->name) + disk_link->name = (unsigned char *)sd; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink); + +/** + * fscrypt_get_symlink - get the target of an encrypted symlink + * @inode: the symlink inode + * @caddr: the on-disk contents of the symlink + * @max_size: size of @caddr buffer + * @done: if successful, will be set up to free the returned target + * + * If the symlink's encryption key is available, we decrypt its target. + * Otherwise, we encode its target for presentation. + * + * This may sleep, so the filesystem must have dropped out of RCU mode already. + * + * Return: the presentable symlink target or an ERR_PTR() + */ +const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size, + struct delayed_call *done) +{ + const struct fscrypt_symlink_data *sd; + struct fscrypt_str cstr, pstr; + int err; + + /* This is for encrypted symlinks only */ + if (WARN_ON(!IS_ENCRYPTED(inode))) + return ERR_PTR(-EINVAL); + + /* + * Try to set up the symlink's encryption key, but we can continue + * regardless of whether the key is available or not. + */ + err = fscrypt_get_encryption_info(inode); + if (err) + return ERR_PTR(err); + + /* + * For historical reasons, encrypted symlink targets are prefixed with + * the ciphertext length, even though this is redundant with i_size. + */ + + if (max_size < sizeof(*sd)) + return ERR_PTR(-EUCLEAN); + sd = caddr; + cstr.name = (unsigned char *)sd->encrypted_path; + cstr.len = le16_to_cpu(sd->len); + + if (cstr.len == 0) + return ERR_PTR(-EUCLEAN); + + if (cstr.len + sizeof(*sd) - 1 > max_size) + return ERR_PTR(-EUCLEAN); + + err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); + if (err) + return ERR_PTR(err); + + err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); + if (err) + goto err_kfree; + + err = -EUCLEAN; + if (pstr.name[0] == '\0') + goto err_kfree; + + pstr.name[pstr.len] = '\0'; + set_delayed_call(done, kfree_link, pstr.name); + return pstr.name; + +err_kfree: + kfree(pstr.name); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(fscrypt_get_symlink); diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 5e6e846f5a24..05f5ee1f0705 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -14,6 +14,7 @@ #include <linux/ratelimit.h> #include <crypto/aes.h> #include <crypto/sha.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" static struct crypto_shash *essiv_hash_tfm; @@ -354,19 +355,9 @@ out: } EXPORT_SYMBOL(fscrypt_get_encryption_info); -void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci) +void fscrypt_put_encryption_info(struct inode *inode) { - struct fscrypt_info *prev; - - if (ci == NULL) - ci = READ_ONCE(inode->i_crypt_info); - if (ci == NULL) - return; - - prev = cmpxchg(&inode->i_crypt_info, ci, NULL); - if (prev != ci) - return; - - put_crypt_info(ci); + put_crypt_info(inode->i_crypt_info); + inode->i_crypt_info = NULL; } EXPORT_SYMBOL(fscrypt_put_encryption_info); @@ -44,6 +44,7 @@ /* The 'colour' (ie low bits) within a PMD of a page offset. */ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) +#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT) static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES]; @@ -375,8 +376,8 @@ restart: * unmapped. */ if (pmd_downgrade && dax_is_zero_entry(entry)) - unmap_mapping_range(mapping, - (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0); + unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, + PG_PMD_NR, false); err = radix_tree_preload( mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM); @@ -538,12 +539,10 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) - unmap_mapping_range(mapping, - (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, - PMD_SIZE, 0); + unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, + PG_PMD_NR, false); else /* pte entry */ - unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT, - PAGE_SIZE, 0); + unmap_mapping_pages(mapping, vmf->pgoff, 1, false); } spin_lock_irq(&mapping->tree_lock); @@ -636,8 +635,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); unlock_pmd: - spin_unlock(ptl); #endif + spin_unlock(ptl); } else { if (pfn != pte_pfn(*ptep)) goto unlock_pte; @@ -1096,7 +1095,7 @@ static bool dax_fault_is_synchronous(unsigned long flags, } static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, - const struct iomap_ops *ops) + int *iomap_errp, const struct iomap_ops *ops) { struct vm_area_struct *vma = vmf->vma; struct address_space *mapping = vma->vm_file->f_mapping; @@ -1149,6 +1148,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, * that we never have to deal with more than a single extent here. */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); + if (iomap_errp) + *iomap_errp = error; if (error) { vmf_ret = dax_fault_return(error); goto unlock_entry; @@ -1269,12 +1270,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, } #ifdef CONFIG_FS_DAX_PMD -/* - * The 'colour' (ie low bits) within a PMD of a page offset. This comes up - * more often than one might expect in the below functions. - */ -#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) - static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, void *entry) { @@ -1488,6 +1483,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * @vmf: The description of the fault * @pe_size: Size of the page to fault in * @pfnp: PFN to insert for synchronous faults if fsync is required + * @iomap_errp: Storage for detailed error code in case of error * @ops: Iomap ops passed from the file system * * When a page fault occurs, filesystems may call this helper in @@ -1496,11 +1492,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * successfully. */ int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - pfn_t *pfnp, const struct iomap_ops *ops) + pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) { switch (pe_size) { case PE_SIZE_PTE: - return dax_iomap_pte_fault(vmf, pfnp, ops); + return dax_iomap_pte_fault(vmf, pfnp, iomap_errp, ops); case PE_SIZE_PMD: return dax_iomap_pmd_fault(vmf, pfnp, ops); default: diff --git a/fs/dcache.c b/fs/dcache.c index c6d996ee2d61..7c38f39958bc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,8 +37,6 @@ #include <linux/prefetch.h> #include <linux/ratelimit.h> #include <linux/list_lru.h> -#include <linux/kasan.h> - #include "internal.h" #include "mount.h" @@ -193,7 +191,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char unsigned long a,b,mask; for (;;) { - a = *(unsigned long *)cs; + a = read_word_at_a_time(cs); b = load_unaligned_zeropad(ct); if (tcount < sizeof(unsigned long)) break; @@ -466,9 +464,11 @@ static void dentry_lru_add(struct dentry *dentry) * d_drop() is used mainly for stuff that wants to invalidate a dentry for some * reason (NFS timeouts or autofs deletes). * - * __d_drop requires dentry->d_lock. + * __d_drop requires dentry->d_lock + * ___d_drop doesn't mark dentry as "unhashed" + * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). */ -void __d_drop(struct dentry *dentry) +static void ___d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { struct hlist_bl_head *b; @@ -484,12 +484,17 @@ void __d_drop(struct dentry *dentry) hlist_bl_lock(b); __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; hlist_bl_unlock(b); /* After this call, in-progress rcu-walk path lookup will fail. */ write_seqcount_invalidate(&dentry->d_seq); } } + +void __d_drop(struct dentry *dentry) +{ + ___d_drop(dentry); + dentry->d_hash.pprev = NULL; +} EXPORT_SYMBOL(__d_drop); void d_drop(struct dentry *dentry) @@ -1621,9 +1626,6 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } atomic_set(&p->u.count, 1); dname = p->name; - if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS)) - kasan_unpoison_shadow(dname, - round_up(name->len + 1, sizeof(unsigned long))); } else { dname = dentry->d_iname; } @@ -1696,9 +1698,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_anon(struct super_block *sb) +{ + return __d_alloc(sb, NULL); +} +EXPORT_SYMBOL(d_alloc_anon); + struct dentry *d_alloc_cursor(struct dentry * parent) { - struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + struct dentry *dentry = d_alloc_anon(parent->d_sb); if (dentry) { dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; dentry->d_parent = dget(parent); @@ -1884,7 +1892,7 @@ struct dentry *d_make_root(struct inode *root_inode) struct dentry *res = NULL; if (root_inode) { - res = __d_alloc(root_inode->i_sb, NULL); + res = d_alloc_anon(root_inode->i_sb); if (res) d_instantiate(res, root_inode); else @@ -1923,33 +1931,19 @@ struct dentry *d_find_any_alias(struct inode *inode) } EXPORT_SYMBOL(d_find_any_alias); -static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) +static struct dentry *__d_instantiate_anon(struct dentry *dentry, + struct inode *inode, + bool disconnected) { - struct dentry *tmp; struct dentry *res; unsigned add_flags; - if (!inode) - return ERR_PTR(-ESTALE); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - res = d_find_any_alias(inode); - if (res) - goto out_iput; - - tmp = __d_alloc(inode->i_sb, NULL); - if (!tmp) { - res = ERR_PTR(-ENOMEM); - goto out_iput; - } - - security_d_instantiate(tmp, inode); + security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); res = __d_find_any_alias(inode); if (res) { spin_unlock(&inode->i_lock); - dput(tmp); + dput(dentry); goto out_iput; } @@ -1959,24 +1953,57 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) if (disconnected) add_flags |= DCACHE_DISCONNECTED; - spin_lock(&tmp->d_lock); - __d_set_inode_and_type(tmp, inode, add_flags); - hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); + spin_lock(&dentry->d_lock); + __d_set_inode_and_type(dentry, inode, add_flags); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); if (!disconnected) { - hlist_bl_lock(&tmp->d_sb->s_roots); - hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_roots); - hlist_bl_unlock(&tmp->d_sb->s_roots); + hlist_bl_lock(&dentry->d_sb->s_roots); + hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots); + hlist_bl_unlock(&dentry->d_sb->s_roots); } - spin_unlock(&tmp->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); - return tmp; + return dentry; out_iput: iput(inode); return res; } +struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode) +{ + return __d_instantiate_anon(dentry, inode, true); +} +EXPORT_SYMBOL(d_instantiate_anon); + +static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) +{ + struct dentry *tmp; + struct dentry *res; + + if (!inode) + return ERR_PTR(-ESTALE); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + res = d_find_any_alias(inode); + if (res) + goto out_iput; + + tmp = d_alloc_anon(inode->i_sb); + if (!tmp) { + res = ERR_PTR(-ENOMEM); + goto out_iput; + } + + return __d_instantiate_anon(tmp, inode, disconnected); + +out_iput: + iput(inode); + return res; +} + /** * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode * @inode: inode to allocate the dentry for @@ -1997,7 +2024,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) */ struct dentry *d_obtain_alias(struct inode *inode) { - return __d_obtain_alias(inode, 1); + return __d_obtain_alias(inode, true); } EXPORT_SYMBOL(d_obtain_alias); @@ -2018,7 +2045,7 @@ EXPORT_SYMBOL(d_obtain_alias); */ struct dentry *d_obtain_root(struct inode *inode) { - return __d_obtain_alias(inode, 0); + return __d_obtain_alias(inode, false); } EXPORT_SYMBOL(d_obtain_root); @@ -2380,7 +2407,7 @@ EXPORT_SYMBOL(d_delete); static void __d_rehash(struct dentry *entry) { struct hlist_bl_head *b = d_hash(entry->d_name.hash); - BUG_ON(!d_unhashed(entry)); + hlist_bl_lock(b); hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); @@ -2815,9 +2842,9 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* unhash both */ - /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ - __d_drop(dentry); - __d_drop(target); + /* ___d_drop does write_seqcount_barrier, but they're OK to nest. */ + ___d_drop(dentry); + ___d_drop(target); /* Switch the names.. */ if (exchange) @@ -2829,6 +2856,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target, __d_rehash(dentry); if (exchange) __d_rehash(target); + else + target->d_hash.pprev = NULL; /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { @@ -3523,6 +3552,7 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) return result; } +EXPORT_SYMBOL(is_subdir); static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) { @@ -3598,8 +3628,9 @@ static void __init dcache_init(void) * but it is probably not worth it because of the cache nature * of the dcache. */ - dentry_cache = KMEM_CACHE(dentry, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT); + dentry_cache = KMEM_CACHE_USERCOPY(dentry, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT, + d_iname); /* Hash may have been set up in dcache_init_early */ if (!hashdist) @@ -3637,8 +3668,8 @@ void __init vfs_caches_init_early(void) void __init vfs_caches_init(void) { - names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL); dcache_init(); inode_init(); diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index c5a53fcc43ea..f0138674c1ed 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -242,7 +242,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); - bool need_revalidate = inode_cmp_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, file->f_version); if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) return 0; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 7e244093c0e5..179cd5c2f52a 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -193,10 +193,13 @@ static void exofs_init_once(void *foo) */ static int init_inodecache(void) { - exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", + exofs_inode_cachep = kmem_cache_create_usercopy("exofs_inode_cache", sizeof(struct exofs_i_info), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | - SLAB_ACCOUNT, exofs_init_once); + SLAB_ACCOUNT, + offsetof(struct exofs_i_info, i_data), + sizeof_field(struct exofs_i_info, i_data), + exofs_init_once); if (exofs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index c634874e12d9..894e4c53d1d2 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig @@ -13,8 +13,7 @@ config EXT2_FS_XATTR depends on EXT2_FS help Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). + the kernel or by users (see the attr(5) manual page for details). If unsure, say N. @@ -26,9 +25,6 @@ config EXT2_FS_POSIX_ACL Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config EXT2_FS_SECURITY diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 4111085a129f..3b8114def693 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -294,7 +294,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); unsigned char *types = NULL; - bool need_revalidate = inode_cmp_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, file->f_version); if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) return 0; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 2da67699dc33..09640220fda8 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf) } down_read(&ei->dax_sem); - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops); up_read(&ei->dax_sem); if (vmf->flags & FAULT_FLAG_WRITE) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 554c98b8a93a..7666c065b96f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -221,11 +221,13 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", - sizeof(struct ext2_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache", + sizeof(struct ext2_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ext2_inode_info, i_data), + sizeof_field(struct ext2_inode_info, i_data), + init_once); if (ext2_inode_cachep == NULL) return -ENOMEM; return 0; @@ -960,8 +962,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { err = bdev_dax_supported(sb, blocksize); - if (err) - goto failed_mount; + if (err) { + ext2_msg(sb, KERN_ERR, + "DAX unsupported by block device. Turning off DAX."); + sbi->s_mount_opt &= ~EXT2_MOUNT_DAX; + } } /* If the blocksize doesn't match, re-read the thing.. */ @@ -1226,7 +1231,7 @@ static void ext2_clear_super_error(struct super_block *sb) * write and hope for the best. */ ext2_msg(sb, KERN_ERR, - "previous I/O error to superblock detected\n"); + "previous I/O error to superblock detected"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 73b850f5659c..a453cc87082b 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -82,9 +82,6 @@ config EXT4_FS_POSIX_ACL POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config EXT4_FS_SECURITY diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index a48fc5ae2701..9b63f5416a2f 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* File: fs/ext4/acl.h diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index a943e568292e..f9b3e0a83526 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -355,10 +355,10 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, - EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group), + EXT4_B2C(sbi, offset + sbi->s_itb_per_group), EXT4_B2C(sbi, offset)); if (next_zero_bit < - EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group)) + EXT4_B2C(sbi, offset + sbi->s_itb_per_group)) /* bad bitmap for inode tables */ return blk; return 0; diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index bee888e0e2db..913061c0de1b 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -147,11 +147,11 @@ int ext4_setup_system_zone(struct super_block *sb) int ret; if (!test_opt(sb, BLOCK_VALIDITY)) { - if (EXT4_SB(sb)->system_blks.rb_node) + if (sbi->system_blks.rb_node) ext4_release_system_zone(sb); return 0; } - if (EXT4_SB(sb)->system_blks.rb_node) + if (sbi->system_blks.rb_node) return 0; for (i=0; i < ngroups; i++) { @@ -173,7 +173,7 @@ int ext4_setup_system_zone(struct super_block *sb) } if (test_opt(sb, DEBUG)) - debug_print_tree(EXT4_SB(sb)); + debug_print_tree(sbi); return 0; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index afda0a0499ce..da87cf757f7d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -209,7 +209,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (inode_cmp_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, file->f_version)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext4_dir_entry_2 *) (bh->b_data + i); @@ -569,7 +569,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) * cached entries. */ if ((!info->curr_node) || - inode_cmp_iversion(inode, file->f_version)) { + !inode_eq_iversion(inode, file->f_version)) { info->curr_node = NULL; free_rb_tree_fname(&info->root); file->f_version = inode_query_iversion(inode); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4e091eae38b1..3241475a1733 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * ext4.h * @@ -611,10 +611,10 @@ enum { /* * Flags used by ext4_free_blocks */ -#define EXT4_FREE_BLOCKS_METADATA 0x0001 -#define EXT4_FREE_BLOCKS_FORGET 0x0002 -#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 -#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 +#define EXT4_FREE_BLOCKS_METADATA 0x0001 +#define EXT4_FREE_BLOCKS_FORGET 0x0002 +#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 +#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 @@ -1986,10 +1986,10 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) /* Legal values for the dx_root hash_version field: */ -#define DX_HASH_LEGACY 0 -#define DX_HASH_HALF_MD4 1 -#define DX_HASH_TEA 2 -#define DX_HASH_LEGACY_UNSIGNED 3 +#define DX_HASH_LEGACY 0 +#define DX_HASH_HALF_MD4 1 +#define DX_HASH_TEA 2 +#define DX_HASH_LEGACY_UNSIGNED 3 #define DX_HASH_HALF_MD4_UNSIGNED 4 #define DX_HASH_TEA_UNSIGNED 5 @@ -2000,7 +2000,6 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, struct shash_desc shash; char ctx[4]; } desc; - int err; BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx)); @@ -2008,8 +2007,7 @@ static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, desc.shash.flags = 0; *(u32 *)desc.ctx = crc; - err = crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); + BUG_ON(crypto_shash_update(&desc.shash, address, length)); return *(u32 *)desc.ctx; } diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 8ecf84b8f5a1..98fb0c119c68 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -1,19 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ #ifndef _EXT4_EXTENTS diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 48143e32411c..15b6dd733780 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * ext4_jbd2.h * @@ -5,10 +6,6 @@ * * Copyright 1998--1999 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Ext4-specific journaling extensions. */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c941251ac0c0..054416e9d827 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> @@ -5,19 +6,6 @@ * Architecture independence: * Copyright (c) 2005, Bull S.A. * Written by Pierre Peiffer <pierre.peiffer@bull.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ /* diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index ca90fc96f47e..8efdeb903d6b 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * fs/ext4/extents_status.h * diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a0ae27b1bc66..fb6f023622fe 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -280,7 +280,8 @@ out: static int ext4_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { - int result; + int result, error = 0; + int retries = 0; handle_t *handle = NULL; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; @@ -304,6 +305,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, sb_start_pagefault(sb); file_update_time(vmf->vma->vm_file); down_read(&EXT4_I(inode)->i_mmap_sem); +retry: handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, EXT4_DATA_TRANS_BLOCKS(sb)); if (IS_ERR(handle)) { @@ -314,9 +316,13 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, } else { down_read(&EXT4_I(inode)->i_mmap_sem); } - result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops); + result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops); if (write) { ext4_journal_stop(handle); + + if ((result & VM_FAULT_ERROR) && error == -ENOSPC && + ext4_should_retry_alloc(sb, &retries)) + goto retry; /* Handling synchronous page fault? */ if (result & VM_FAULT_NEEDDSYNC) result = dax_finish_sync_fault(vmf, pe_size, pfn); diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 7ec340898598..e871c4bf18e9 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2017 Oracle. All Rights Reserved. * * Author: Darrick J. Wong <darrick.wong@oracle.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "ext4.h" #include <linux/fsmap.h> diff --git a/fs/ext4/fsmap.h b/fs/ext4/fsmap.h index 9a2cd367cc66..68c8001fee85 100644 --- a/fs/ext4/fsmap.h +++ b/fs/ext4/fsmap.h @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2017 Oracle. All Rights Reserved. * * Author: Darrick J. Wong <darrick.wong@oracle.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef __EXT4_FSMAP_H__ #define __EXT4_FSMAP_H__ diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 00c6dd29e621..e22dcfab308b 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/hash.c * * Copyright (C) 2002 by Theodore Ts'o - * - * This file is released under the GPL v2. - * - * This file may be redistributed under the terms of the GNU Public - * License. */ #include <linux/fs.h> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b32cf263750d..7830d28df331 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -303,7 +303,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) /* Do this BEFORE marking the inode not in use or returning an error */ ext4_clear_inode(inode); - es = EXT4_SB(sb)->s_es; + es = sbi->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { ext4_error(sb, "reserved or nonexistent inode %lu", ino); goto error_return; @@ -1157,7 +1157,7 @@ got: ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ ext4_set_inode_state(inode, EXT4_STATE_NEW); - ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; + ei->i_extra_isize = sbi->s_want_extra_isize; ei->i_inline_off = 0; if (ext4_has_feature_inline_data(sb)) ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index a8b987b71173..70cf4c7b268a 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1,15 +1,7 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2012 Taobao. * Written by Tao Ma <boyu.mt@taobao.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/iomap.h> @@ -1495,7 +1487,7 @@ int ext4_read_inline_dir(struct file *file, * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (inode_cmp_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, file->f_version)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0eff5b761c6e..c94780075b04 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3768,10 +3768,18 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) /* Credits for sb + inode write */ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); + /* + * We wrote the data but cannot extend + * i_size. Bail out. In async io case, we do + * not return error here because we have + * already submmitted the corresponding + * bio. Returning error here makes the caller + * think that this IO is done and failed + * resulting in race with bio's completion + * handler. + */ + if (!ret) + ret = PTR_ERR(handle); if (inode->i_nlink) ext4_orphan_del(NULL, inode); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d9f8b90a93ed..769a62708b1c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1,19 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ @@ -769,10 +757,10 @@ void ext4_mb_generate_buddy(struct super_block *sb, clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); period = get_cycles() - period; - spin_lock(&EXT4_SB(sb)->s_bal_lock); - EXT4_SB(sb)->s_mb_buddies_generated++; - EXT4_SB(sb)->s_mb_generation_time += period; - spin_unlock(&EXT4_SB(sb)->s_bal_lock); + spin_lock(&sbi->s_bal_lock); + sbi->s_mb_buddies_generated++; + sbi->s_mb_generation_time += period; + spin_unlock(&sbi->s_bal_lock); } static void mb_regenerate_buddy(struct ext4_buddy *e4b) @@ -1459,7 +1447,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ext4_fsblk_t blocknr; blocknr = ext4_group_first_block_no(sb, e4b->bd_group); - blocknr += EXT4_C2B(EXT4_SB(sb), block); + blocknr += EXT4_C2B(sbi, block); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, @@ -4850,9 +4838,9 @@ do_more: if (in_range(ext4_block_bitmap(sb, gdp), block, count) || in_range(ext4_inode_bitmap(sb, gdp), block, count) || in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group) || + sbi->s_itb_per_group) || in_range(block + count - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { + sbi->s_itb_per_group)) { ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index dcf52540f379..88c98f17e3d9 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * fs/ext4/mballoc.h * diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index cf5181b62df1..61a9d1927817 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -1,15 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright IBM Corporation, 2007 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * */ #include <linux/slab.h> diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 9bb36909ec92..b96e4bd3b3ec 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. * Written by Takashi Sato <t-sato@yk.jp.nec.com> * Akira Fujita <a-fujita@rs.jp.nec.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/fs.h> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6660686e505a..b1f21e3a0763 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3057,39 +3057,19 @@ static int ext4_symlink(struct inode *dir, struct inode *inode; int err, len = strlen(symname); int credits; - bool encryption_required; struct fscrypt_str disk_link; - struct fscrypt_symlink_data *sd = NULL; if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) return -EIO; - disk_link.len = len + 1; - disk_link.name = (char *) symname; - - encryption_required = (ext4_encrypted_inode(dir) || - DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))); - if (encryption_required) { - err = fscrypt_get_encryption_info(dir); - if (err) - return err; - if (!fscrypt_has_encryption_key(dir)) - return -ENOKEY; - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - sd = kzalloc(disk_link.len, GFP_KERNEL); - if (!sd) - return -ENOMEM; - } - - if (disk_link.len > dir->i_sb->s_blocksize) { - err = -ENAMETOOLONG; - goto err_free_sd; - } + err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, + &disk_link); + if (err) + return err; err = dquot_initialize(dir); if (err) - goto err_free_sd; + return err; if ((disk_link.len > EXT4_N_BLOCKS * 4)) { /* @@ -3118,27 +3098,18 @@ static int ext4_symlink(struct inode *dir, if (IS_ERR(inode)) { if (handle) ext4_journal_stop(handle); - err = PTR_ERR(inode); - goto err_free_sd; + return PTR_ERR(inode); } - if (encryption_required) { - struct qstr istr; - struct fscrypt_str ostr = - FSTR_INIT(sd->encrypted_path, disk_link.len); - - istr.name = (const unsigned char *) symname; - istr.len = len; - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); + if (IS_ENCRYPTED(inode)) { + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); if (err) goto err_drop_inode; - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *) sd; inode->i_op = &ext4_encrypted_symlink_inode_operations; } if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - if (!encryption_required) + if (!IS_ENCRYPTED(inode)) inode->i_op = &ext4_symlink_inode_operations; inode_nohighmem(inode); ext4_set_aops(inode); @@ -3180,7 +3151,7 @@ static int ext4_symlink(struct inode *dir, } else { /* clear the extent format for fast symlink */ ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); - if (!encryption_required) { + if (!IS_ENCRYPTED(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; inode->i_link = (char *)&EXT4_I(inode)->i_data; } @@ -3195,16 +3166,17 @@ static int ext4_symlink(struct inode *dir, if (handle) ext4_journal_stop(handle); - kfree(sd); - return err; + goto out_free_encrypted_link; + err_drop_inode: if (handle) ext4_journal_stop(handle); clear_nlink(inode); unlock_new_inode(inode); iput(inode); -err_free_sd: - kfree(sd); +out_free_encrypted_link: + if (disk_link.name != (unsigned char *)symname) + kfree(disk_link.name); return err; } @@ -3222,9 +3194,9 @@ static int ext4_link(struct dentry *old_dentry, if (err) return err; - if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && - (!projid_eq(EXT4_I(dir)->i_projid, - EXT4_I(old_dentry->d_inode)->i_projid))) + if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && + (!projid_eq(EXT4_I(dir)->i_projid, + EXT4_I(old_dentry->d_inode)->i_projid))) return -EXDEV; err = dquot_initialize(dir); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 50443bda8e98..b6bec270a8e4 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1477,7 +1477,7 @@ static int ext4_flex_group_add(struct super_block *sb, goto exit_journal; group = flex_gd->groups[0].group; - BUG_ON(group != EXT4_SB(sb)->s_groups_count); + BUG_ON(group != sbi->s_groups_count); err = ext4_add_new_descs(handle, sb, group, resize_inode, flex_gd->count); if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5de959fb0244..39bf464c35f1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/super.c * @@ -743,6 +744,7 @@ __acquires(bitlock) } ext4_unlock_group(sb, grp); + ext4_commit_super(sb, 1); ext4_handle_error(sb); /* * We only get here in the ERRORS_RO case; relocking the group @@ -871,7 +873,6 @@ static void ext4_put_super(struct super_block *sb) ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); - flush_workqueue(sbi->rsv_conversion_wq); destroy_workqueue(sbi->rsv_conversion_wq); if (sbi->s_journal) { @@ -1037,11 +1038,13 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", - sizeof(struct ext4_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache", + sizeof(struct ext4_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ext4_inode_info, i_data), + sizeof_field(struct ext4_inode_info, i_data), + init_once); if (ext4_inode_cachep == NULL) return -ENOMEM; return 0; @@ -1070,9 +1073,7 @@ void ext4_clear_inode(struct inode *inode) jbd2_free_inode(EXT4_I(inode)->jinode); EXT4_I(inode)->jinode = NULL; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION - fscrypt_put_encryption_info(inode, NULL); -#endif + fscrypt_put_encryption_info(inode); } static struct inode *ext4_nfs_get_inode(struct super_block *sb, @@ -2677,7 +2678,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, * compensate. */ if (sb->s_blocksize == 1024 && nr == 0 && - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) + le32_to_cpu(sbi->s_es->s_first_data_block) == 0) has_super++; return (has_super + ext4_group_first_block_no(sb, bg)); @@ -3122,7 +3123,7 @@ int ext4_register_li_request(struct super_block *sb, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr = NULL; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = sbi->s_groups_count; int ret = 0; mutex_lock(&ext4_li_mtx); @@ -3711,11 +3712,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (ext4_has_feature_inline_data(sb)) { ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" " that may contain inline data"); - goto failed_mount; + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX; } err = bdev_dax_supported(sb, blocksize); - if (err) - goto failed_mount; + if (err) { + ext4_msg(sb, KERN_ERR, + "DAX unsupported by block device. Turning off DAX."); + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX; + } } if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { @@ -4837,7 +4841,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) bool needs_barrier = false; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + if (unlikely(ext4_forced_shutdown(sbi))) return 0; trace_ext4_sync_fs(sb, wait); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index a2006c9af1d9..dd05af983092 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -28,59 +28,28 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, struct delayed_call *done) { struct page *cpage = NULL; - char *caddr, *paddr = NULL; - struct fscrypt_str cstr, pstr; - struct fscrypt_symlink_data *sd; - int res; - u32 max_size = inode->i_sb->s_blocksize; + const void *caddr; + unsigned int max_size; + const char *paddr; if (!dentry) return ERR_PTR(-ECHILD); - res = fscrypt_get_encryption_info(inode); - if (res) - return ERR_PTR(res); - if (ext4_inode_is_fast_symlink(inode)) { - caddr = (char *) EXT4_I(inode)->i_data; + caddr = EXT4_I(inode)->i_data; max_size = sizeof(EXT4_I(inode)->i_data); } else { cpage = read_mapping_page(inode->i_mapping, 0, NULL); if (IS_ERR(cpage)) return ERR_CAST(cpage); caddr = page_address(cpage); + max_size = inode->i_sb->s_blocksize; } - /* Symlink is encrypted */ - sd = (struct fscrypt_symlink_data *)caddr; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) { - /* Symlink data on the disk is corrupted */ - res = -EFSCORRUPTED; - goto errout; - } - - res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (res) - goto errout; - paddr = pstr.name; - - res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res) - goto errout; - - /* Null-terminate the name */ - paddr[pstr.len] = '\0'; + paddr = fscrypt_get_symlink(inode, caddr, max_size, done); if (cpage) put_page(cpage); - set_delayed_call(done, kfree_link, paddr); return paddr; -errout: - if (cpage) - put_page(cpage); - kfree(paddr); - return ERR_PTR(res); } const struct inode_operations ext4_encrypted_symlink_inode_operations = { diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index e21afd52e7d7..1205261f130c 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -11,6 +11,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <linux/proc_fs.h> #include "ext4.h" @@ -329,6 +330,13 @@ static void ext4_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } +static void ext4_kset_release(struct kobject *kobj) +{ + struct kset *kset = container_of(kobj, struct kset, kobj); + + kfree(kset); +} + static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, @@ -342,20 +350,18 @@ static struct kobj_type ext4_sb_ktype = { static struct kobj_type ext4_ktype = { .sysfs_ops = &ext4_attr_ops, + .release = ext4_kset_release, }; -static struct kset ext4_kset = { - .kobj = {.ktype = &ext4_ktype}, -}; +static struct kset *ext4_kset; static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, .sysfs_ops = &ext4_attr_ops, + .release = (void (*)(struct kobject *))kfree, }; -static struct kobject ext4_feat = { - .kset = &ext4_kset, -}; +static struct kobject *ext4_feat; #define PROC_FILE_SHOW_DEFN(name) \ static int name##_open(struct inode *inode, struct file *file) \ @@ -392,12 +398,15 @@ int ext4_register_sysfs(struct super_block *sb) const struct ext4_proc_files *p; int err; - sbi->s_kobj.kset = &ext4_kset; + sbi->s_kobj.kset = ext4_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); @@ -427,25 +436,45 @@ int __init ext4_init_sysfs(void) { int ret; - kobject_set_name(&ext4_kset.kobj, "ext4"); - ext4_kset.kobj.parent = fs_kobj; - ret = kset_register(&ext4_kset); + ext4_kset = kzalloc(sizeof(*ext4_kset), GFP_KERNEL); + if (!ext4_kset) + return -ENOMEM; + + kobject_set_name(&ext4_kset->kobj, "ext4"); + ext4_kset->kobj.parent = fs_kobj; + ext4_kset->kobj.ktype = &ext4_ktype; + ret = kset_register(ext4_kset); if (ret) - return ret; + goto kset_err; + + ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL); + if (!ext4_feat) { + ret = -ENOMEM; + goto kset_err; + } - ret = kobject_init_and_add(&ext4_feat, &ext4_feat_ktype, + ext4_feat->kset = ext4_kset; + ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype, NULL, "features"); if (ret) - kset_unregister(&ext4_kset); - else - ext4_proc_root = proc_mkdir(proc_dirname, NULL); + goto feat_err; + + ext4_proc_root = proc_mkdir(proc_dirname, NULL); + return ret; + +feat_err: + kobject_put(ext4_feat); +kset_err: + kset_unregister(ext4_kset); + ext4_kset = NULL; return ret; } void ext4_exit_sysfs(void) { - kobject_put(&ext4_feat); - kset_unregister(&ext4_kset); + kobject_put(ext4_feat); + kset_unregister(ext4_kset); + ext4_kset = NULL; remove_proc_entry(proc_dirname, NULL); ext4_proc_root = NULL; } diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index b64a9fa0ff41..0cb13badf473 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/truncate.h * diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index f8cc07588ac9..dd54c4f995c8 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* File: fs/ext4/xattr.h diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 378c221d68a9..9a20ef42fadd 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -35,8 +35,7 @@ config F2FS_FS_XATTR default y help Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). + the kernel or by users (see the attr(5) manual page for details). If unsure, say N. @@ -49,9 +48,6 @@ config F2FS_FS_POSIX_ACL Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config F2FS_FS_SECURITY diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 89c838bfb067..205add3d0f3a 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -585,7 +585,7 @@ no_delete: !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); } out_clear: - fscrypt_put_encryption_info(inode, NULL); + fscrypt_put_encryption_info(inode); clear_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c4c94c7e9f4f..b68e7b03959f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -486,27 +486,16 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; size_t len = strlen(symname); - struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); - struct fscrypt_symlink_data *sd = NULL; + struct fscrypt_str disk_link; int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - return err; - - if (!fscrypt_has_encryption_key(dir)) - return -ENOKEY; - - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - } - - if (disk_link.len > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; + err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, + &disk_link); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -516,7 +505,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return PTR_ERR(inode); - if (f2fs_encrypted_inode(inode)) + if (IS_ENCRYPTED(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; else inode->i_op = &f2fs_symlink_inode_operations; @@ -526,38 +515,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) - goto out; + goto out_handle_failed_inode; f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); - if (f2fs_encrypted_inode(inode)) { - struct qstr istr = QSTR_INIT(symname, len); - struct fscrypt_str ostr; - - sd = f2fs_kzalloc(sbi, disk_link.len, GFP_NOFS); - if (!sd) { - err = -ENOMEM; - goto err_out; - } - - err = fscrypt_get_encryption_info(inode); - if (err) - goto err_out; - - if (!fscrypt_has_encryption_key(inode)) { - err = -ENOKEY; - goto err_out; - } - - ostr.name = sd->encrypted_path; - ostr.len = disk_link.len; - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err) - goto err_out; - - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *)sd; - } + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); + if (err) + goto err_out; err = page_symlink(inode, disk_link.name, disk_link.len); @@ -584,12 +548,14 @@ err_out: f2fs_unlink(dir, dentry); } - kfree(sd); - f2fs_balance_fs(sbi, true); - return err; -out: + goto out_free_encrypted_link; + +out_handle_failed_inode: handle_failed_inode(inode); +out_free_encrypted_link: + if (disk_link.name != (unsigned char *)symname) + kfree(disk_link.name); return err; } @@ -1148,68 +1114,20 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *cpage = NULL; - char *caddr, *paddr = NULL; - struct fscrypt_str cstr = FSTR_INIT(NULL, 0); - struct fscrypt_str pstr = FSTR_INIT(NULL, 0); - struct fscrypt_symlink_data *sd; - u32 max_size = inode->i_sb->s_blocksize; - int res; + struct page *page; + const char *target; if (!dentry) return ERR_PTR(-ECHILD); - res = fscrypt_get_encryption_info(inode); - if (res) - return ERR_PTR(res); - - cpage = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(cpage)) - return ERR_CAST(cpage); - caddr = page_address(cpage); - - /* Symlink is encrypted */ - sd = (struct fscrypt_symlink_data *)caddr; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - - /* this is broken symlink case */ - if (unlikely(cstr.len == 0)) { - res = -ENOENT; - goto errout; - } - - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) { - /* Symlink data on the disk is corrupted */ - res = -EIO; - goto errout; - } - res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (res) - goto errout; - - res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res) - goto errout; - - /* this is broken symlink case */ - if (unlikely(pstr.name[0] == 0)) { - res = -ENOENT; - goto errout; - } - - paddr = pstr.name; - - /* Null-terminate the name */ - paddr[pstr.len] = '\0'; + page = read_mapping_page(inode->i_mapping, 0, NULL); + if (IS_ERR(page)) + return ERR_CAST(page); - put_page(cpage); - set_delayed_call(done, kfree_link, paddr); - return paddr; -errout: - fscrypt_fname_free_buffer(&pstr); - put_page(cpage); - return ERR_PTR(res); + target = fscrypt_get_symlink(inode, page_address(page), + inode->i_sb->s_blocksize, done); + put_page(page); + return target; } const struct inode_operations f2fs_encrypted_symlink_inode_operations = { diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index cefea792cde8..2649759c478a 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -46,7 +46,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry) { int ret = 1; spin_lock(&dentry->d_lock); - if (inode_cmp_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry))) + if (!inode_eq_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry))) ret = 0; spin_unlock(&dentry->d_lock); return ret; diff --git a/fs/fcntl.c b/fs/fcntl.c index ea3629c15c44..4fc731876d6b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -418,7 +418,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, break; case F_ADD_SEALS: case F_GET_SEALS: - err = shmem_fcntl(filp, cmd, arg); + err = memfd_fcntl(filp, cmd, arg); break; case F_GET_RW_HINT: case F_SET_RW_HINT: diff --git a/fs/fhandle.c b/fs/fhandle.c index 0ace128f5d23..0ee727485615 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -69,8 +69,7 @@ static long do_sys_name_to_handle(struct path *path, } else retval = 0; /* copy the mount id */ - if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id, - sizeof(*mnt_id)) || + if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || copy_to_user(ufh, handle, sizeof(struct file_handle) + handle_bytes)) retval = -EFAULT; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index f989efa051a0..48b24bb50d02 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -332,9 +332,13 @@ vxfs_init(void) { int rv; - vxfs_inode_cachep = kmem_cache_create("vxfs_inode", + vxfs_inode_cachep = kmem_cache_create_usercopy("vxfs_inode", sizeof(struct vxfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + offsetof(struct vxfs_inode_info, vii_immed.vi_immed), + sizeof_field(struct vxfs_inode_info, + vii_immed.vi_immed), + NULL); if (!vxfs_inode_cachep) return -ENOMEM; rv = register_filesystem(&vxfs_fs_type); diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index c0225d4b5435..3ed2b088dcfd 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -3,8 +3,7 @@ config GFS2_FS depends on (64BIT || LBDAF) select FS_POSIX_ACL select CRC32 - select CRYPTO - select CRYPTO_CRC32C + select LIBCRC32C select QUOTACTL select FS_IOMAP help diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 90af87ff29ba..82fb5583445c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1921,19 +1921,29 @@ void gfs2_glock_exit(void) destroy_workqueue(gfs2_delete_workqueue); } -static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) +static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) { - while ((gi->gl = rhashtable_walk_next(&gi->hti))) { - if (IS_ERR(gi->gl)) { - if (PTR_ERR(gi->gl) == -EAGAIN) - continue; - gi->gl = NULL; - return; + if (n == 0) + gi->gl = rhashtable_walk_peek(&gi->hti); + else { + gi->gl = rhashtable_walk_next(&gi->hti); + n--; + } + for (;;) { + if (IS_ERR_OR_NULL(gi->gl)) { + if (!gi->gl) + return; + if (PTR_ERR(gi->gl) != -EAGAIN) { + gi->gl = NULL; + return; + } + n = 0; + } else if (gi->sdp == gi->gl->gl_name.ln_sbd && + !__lockref_is_dead(&gi->gl->gl_lockref)) { + if (!n--) + break; } - /* Skip entries for other sb and dead entries */ - if (gi->sdp == gi->gl->gl_name.ln_sbd && - !__lockref_is_dead(&gi->gl->gl_lockref)) - return; + gi->gl = rhashtable_walk_next(&gi->hti); } } @@ -1941,18 +1951,24 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct gfs2_glock_iter *gi = seq->private; - loff_t n = *pos; + loff_t n; - rhashtable_walk_enter(&gl_hash_table, &gi->hti); - if (rhashtable_walk_start_check(&gi->hti) != 0) - return NULL; + /* + * We can either stay where we are, skip to the next hash table + * entry, or start from the beginning. + */ + if (*pos < gi->last_pos) { + rhashtable_walk_exit(&gi->hti); + rhashtable_walk_enter(&gl_hash_table, &gi->hti); + n = *pos + 1; + } else { + n = *pos - gi->last_pos; + } - do { - gfs2_glock_iter_next(gi); - } while (gi->gl && n--); + rhashtable_walk_start(&gi->hti); + gfs2_glock_iter_next(gi, n); gi->last_pos = *pos; - return gi->gl; } @@ -1963,8 +1979,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, (*pos)++; gi->last_pos = *pos; - gfs2_glock_iter_next(gi); - + gfs2_glock_iter_next(gi, 1); return gi->gl; } @@ -1975,7 +1990,6 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) gi->gl = NULL; rhashtable_walk_stop(&gi->hti); - rhashtable_walk_exit(&gi->hti); } static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) @@ -2041,7 +2055,13 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file, seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; + /* + * Initially, we are "before" the first hash table entry; the + * first call to rhashtable_walk_next gets us the first entry. + */ + gi->last_pos = -1; gi->gl = NULL; + rhashtable_walk_enter(&gl_hash_table, &gi->hti); } return ret; } @@ -2057,6 +2077,7 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file) struct gfs2_glock_iter *gi = seq->private; gi->gl = NULL; + rhashtable_walk_exit(&gi->hti); return seq_release_private(inode, file); } diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig index 24bc20fd42f7..7cc8b4acf66a 100644 --- a/fs/hfsplus/Kconfig +++ b/fs/hfsplus/Kconfig @@ -20,9 +20,6 @@ config HFSPLUS_FS_POSIX_ACL POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - It needs to understand that POSIX ACLs are treated only under Linux. POSIX ACLs doesn't mean something under Mac OS X. Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs, diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index e8120a282435..15e06fb552da 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -444,7 +444,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, int res = -ENOMEM; mutex_lock(&sbi->vh_mutex); - inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); + inode = hfsplus_new_inode(dir->i_sb, dir, S_IFLNK | S_IRWXUGO); if (!inode) goto out; @@ -486,7 +486,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, int res = -ENOMEM; mutex_lock(&sbi->vh_mutex); - inode = hfsplus_new_inode(dir->i_sb, mode); + inode = hfsplus_new_inode(dir->i_sb, dir, mode); if (!inode) goto out; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index a015044daa05..d9255abafb81 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -478,7 +478,8 @@ extern const struct address_space_operations hfsplus_aops; extern const struct address_space_operations hfsplus_btree_aops; extern const struct dentry_operations hfsplus_dentry_operations; -struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode); +struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, + umode_t mode); void hfsplus_delete_inode(struct inode *inode); void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 190c60efbc99..c0c8d433864f 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -354,7 +354,8 @@ static const struct file_operations hfsplus_file_operations = { .unlocked_ioctl = hfsplus_ioctl, }; -struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) +struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, + umode_t mode) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct inode *inode = new_inode(sb); @@ -364,9 +365,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) return NULL; inode->i_ino = sbi->next_cnid++; - inode->i_mode = mode; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); + inode_init_owner(inode, dir, mode); set_nlink(inode, 1); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 1d458b716957..513c357c734b 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -549,7 +549,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->hidden_dir) { mutex_lock(&sbi->vh_mutex); - sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); + sbi->hidden_dir = hfsplus_new_inode(sb, root, S_IFDIR); if (!sbi->hidden_dir) { mutex_unlock(&sbi->vh_mutex); err = -ENOMEM; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8a85f3f53446..8fe1b0aa2896 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -55,16 +55,6 @@ struct hugetlbfs_config { umode_t mode; }; -struct hugetlbfs_inode_info { - struct shared_policy policy; - struct inode vfs_inode; -}; - -static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) -{ - return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); -} - int sysctl_hugetlb_shm_group; enum { @@ -520,8 +510,16 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (hole_end > hole_start) { struct address_space *mapping = inode->i_mapping; + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); inode_lock(inode); + + /* protected by i_mutex */ + if (info->seals & F_SEAL_WRITE) { + inode_unlock(inode); + return -EPERM; + } + i_mmap_lock_write(mapping); if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, @@ -539,6 +537,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); struct address_space *mapping = inode->i_mapping; struct hstate *h = hstate_inode(inode); struct vm_area_struct pseudo_vma; @@ -570,6 +569,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, if (error) goto out; + if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { + error = -EPERM; + goto out; + } + /* * Initialize a pseudo vma as this is required by the huge page * allocation routines. If NUMA is configured, use page index @@ -660,6 +664,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) struct hstate *h = hstate_inode(inode); int error; unsigned int ia_valid = attr->ia_valid; + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); BUG_ON(!inode); @@ -668,9 +673,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) return error; if (ia_valid & ATTR_SIZE) { - if (attr->ia_size & ~huge_page_mask(h)) + loff_t oldsize = inode->i_size; + loff_t newsize = attr->ia_size; + + if (newsize & ~huge_page_mask(h)) return -EINVAL; - error = hugetlb_vmtruncate(inode, attr->ia_size); + /* protected by i_mutex */ + if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || + (newsize > oldsize && (info->seals & F_SEAL_GROW))) + return -EPERM; + error = hugetlb_vmtruncate(inode, newsize); if (error) return error; } @@ -722,6 +734,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode = new_inode(sb); if (inode) { + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); + inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, @@ -729,6 +743,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_mapping->private_data = resv_map; + info->seals = F_SEAL_SEAL; switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); diff --git a/fs/inode.c b/fs/inode.c index e2ca0f4b5151..ef362364d396 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -498,7 +498,6 @@ EXPORT_SYMBOL(__remove_inode_hash); void clear_inode(struct inode *inode) { - might_sleep(); /* * We have to cycle tree_lock here because reclaim can be still in the * process of removing the last page (in __delete_from_page_cache()) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 4055f51617ef..c125d662777c 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/checkpoint.c * @@ -5,10 +6,6 @@ * * Copyright 1999 Red Hat Software --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Checkpoint routines for the generic filesystem journaling code. * Part of the ext2fs journaling system. * diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 3c1c31321d9b..8de0e7723316 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/commit.c * @@ -5,10 +6,6 @@ * * Copyright 1998 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Journal commit routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */ diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 67546c7ad473..3fbf48ec2188 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/journal.c * @@ -5,10 +6,6 @@ * * Copyright 1998 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Generic filesystem journal-writing code; part of the ext2fs * journaling system. * diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 02dd3360cb20..f99910b69c78 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/recovery.c * @@ -5,10 +6,6 @@ * * Copyright 1999-2000 Red Hat Software --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Journal recovery routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */ diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f9aefcda5854..696ef15ec942 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/revoke.c * @@ -5,10 +6,6 @@ * * Copyright 2000 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Journal revoke routines for the generic filesystem journaling code; * part of the ext2fs journaling system. * diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8b08044b3120..ac311037d7a5 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/transaction.c * @@ -5,10 +6,6 @@ * * Copyright 1998 Red Hat corp --- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * Generic filesystem transaction handling code; part of the ext2fs * journaling system. * @@ -495,8 +492,10 @@ void jbd2_journal_free_reserved(handle_t *handle) EXPORT_SYMBOL(jbd2_journal_free_reserved); /** - * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle + * int jbd2_journal_start_reserved() - start reserved handle * @handle: handle to start + * @type: for handle statistics + * @line_no: for handle statistics * * Start handle that has been previously reserved with jbd2_journal_reserve(). * This attaches @handle to the running transaction (or creates one if there's @@ -626,6 +625,7 @@ error_out: * int jbd2_journal_restart() - restart a handle . * @handle: handle to restart * @nblocks: nr credits requested + * @gfp_mask: memory allocation flags (for start_this_handle) * * Restart a handle for a multi-transaction filesystem * operation. diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig index d8bb6c411e96..ad850c5bf2ca 100644 --- a/fs/jffs2/Kconfig +++ b/fs/jffs2/Kconfig @@ -68,8 +68,7 @@ config JFFS2_FS_XATTR default n help Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). + the kernel or by users (see the attr(5) manual page for details). If unsure, say N. @@ -82,9 +81,6 @@ config JFFS2_FS_POSIX_ACL Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config JFFS2_FS_SECURITY diff --git a/fs/jfs/Kconfig b/fs/jfs/Kconfig index 57cef19951db..851de78fdabb 100644 --- a/fs/jfs/Kconfig +++ b/fs/jfs/Kconfig @@ -16,9 +16,6 @@ config JFS_POSIX_ACL Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config JFS_SECURITY diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 90373aebfdca..1b9264fd54b6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -965,9 +965,11 @@ static int __init init_jfs_fs(void) int rc; jfs_inode_cachep = - kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - init_once); + kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), + 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, + offsetof(struct jfs_inode_info, i_inline), + sizeof_field(struct jfs_inode_info, i_inline), + init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index c53d9cc5ae7a..a03ce3422578 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -275,7 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - size_t len; + ssize_t len; char *buf; if (of->atomic_write_len) { diff --git a/fs/locks.c b/fs/locks.c index 21b4dfa289ee..d6ff4beb70ce 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1554,9 +1554,9 @@ out: EXPORT_SYMBOL(__break_lease); /** - * lease_get_mtime - get the last modified time of an inode + * lease_get_mtime - update modified time of an inode with exclusive lease * @inode: the inode - * @time: pointer to a timespec which will contain the last modified time + * @time: pointer to a timespec which contains the last modified time * * This is to force NFS clients to flush their caches for files with * exclusive leases. The justification is that if someone has an @@ -1580,8 +1580,6 @@ void lease_get_mtime(struct inode *inode, struct timespec *time) if (has_lease) *time = current_time(inode); - else - *time = inode->i_mtime; } EXPORT_SYMBOL(lease_get_mtime); diff --git a/fs/mbcache.c b/fs/mbcache.c index b8b8b9ced9f8..bf41e2e72c18 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -94,6 +94,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, entry->e_key = key; entry->e_value = value; entry->e_reusable = reusable; + entry->e_referenced = 0; head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { @@ -238,7 +239,9 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) spin_lock(&cache->c_list_lock); if (!list_empty(&entry->e_list)) { list_del_init(&entry->e_list); - cache->c_entry_count--; + if (!WARN_ONCE(cache->c_entry_count == 0, + "mbcache: attempt to decrement c_entry_count past zero")) + cache->c_entry_count--; atomic_dec(&entry->e_refcnt); } spin_unlock(&cache->c_list_lock); @@ -269,9 +272,6 @@ static unsigned long mb_cache_count(struct shrinker *shrink, struct mb_cache *cache = container_of(shrink, struct mb_cache, c_shrink); - /* Unlikely, but not impossible */ - if (unlikely(cache->c_entry_count < 0)) - return 0; return cache->c_entry_count; } diff --git a/fs/ncpfs/Kconfig b/fs/ncpfs/Kconfig deleted file mode 100644 index c931cf22a1f6..000000000000 --- a/fs/ncpfs/Kconfig +++ /dev/null @@ -1,108 +0,0 @@ -# -# NCP Filesystem configuration -# -config NCP_FS - tristate "NCP file system support (to mount NetWare volumes)" - depends on IPX!=n || INET - help - NCP (NetWare Core Protocol) is a protocol that runs over IPX and is - used by Novell NetWare clients to talk to file servers. It is to - IPX what NFS is to TCP/IP, if that helps. Saying Y here allows you - to mount NetWare file server volumes and to access them just like - any other Unix directory. For details, please read the file - <file:Documentation/filesystems/ncpfs.txt> in the kernel source and - the IPX-HOWTO from <http://www.tldp.org/docs.html#howto>. - - You do not have to say Y here if you want your Linux box to act as a - file *server* for Novell NetWare clients. - - General information about how to connect Linux, Windows machines and - Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. - - To compile this as a module, choose M here: the module will be called - ncpfs. Say N unless you are connected to a Novell network. - -config NCPFS_PACKET_SIGNING - bool "Packet signatures" - depends on NCP_FS - help - NCP allows packets to be signed for stronger security. If you want - security, say Y. Normal users can leave it off. To be able to use - packet signing you must use ncpfs > 2.0.12. - -config NCPFS_IOCTL_LOCKING - bool "Proprietary file locking" - depends on NCP_FS - help - Allows locking of records on remote volumes. Say N unless you have - special applications which are able to utilize this locking scheme. - -config NCPFS_STRONG - bool "Clear remove/delete inhibit when needed" - depends on NCP_FS - help - Allows manipulation of files flagged as Delete or Rename Inhibit. - To use this feature you must mount volumes with the ncpmount - parameter "-s" (ncpfs-2.0.12 and newer). Say Y unless you are not - mounting volumes with -f 444. - -config NCPFS_NFS_NS - bool "Use NFS namespace if available" - depends on NCP_FS - help - Allows you to utilize NFS namespace on NetWare servers. It brings - you case sensitive filenames. Say Y. You can disable it at - mount-time with the `-N nfs' parameter of ncpmount. - -config NCPFS_OS2_NS - bool "Use LONG (OS/2) namespace if available" - depends on NCP_FS - help - Allows you to utilize OS2/LONG namespace on NetWare servers. - Filenames in this namespace are limited to 255 characters, they are - case insensitive, and case in names is preserved. Say Y. You can - disable it at mount time with the -N os2 parameter of ncpmount. - -config NCPFS_SMALLDOS - bool "Lowercase DOS filenames" - depends on NCP_FS - ---help--- - If you say Y here, every filename on a NetWare server volume using - the OS2/LONG namespace and created under DOS or on a volume using - DOS namespace will be converted to lowercase characters. - Saying N here will give you these filenames in uppercase. - - This is only a cosmetic option since the OS2/LONG namespace is case - insensitive. The only major reason for this option is backward - compatibility when moving from DOS to OS2/LONG namespace support. - Long filenames (created by Win95) will not be affected. - - This option does not solve the problem that filenames appear - differently under Linux and under Windows, since Windows does an - additional conversions on the client side. You can achieve similar - effects by saying Y to "Allow using of Native Language Support" - below. - -config NCPFS_NLS - bool "Use Native Language Support" - depends on NCP_FS - select NLS - help - Allows you to use codepages and I/O charsets for file name - translation between the server file system and input/output. This - may be useful, if you want to access the server with other operating - systems, e.g. Windows 95. See also NLS for more Information. - - To select codepages and I/O charsets use ncpfs-2.2.0.13 or newer. - -config NCPFS_EXTRAS - bool "Enable symbolic links and execute flags" - depends on NCP_FS - help - This enables the use of symbolic links and an execute permission - bit on NCPFS. The file server need not have long name space or NFS - name space loaded for these to work. - - To use the new attributes, it is recommended to use the flags - '-f 600 -d 755' on the ncpmount command line. - diff --git a/fs/ncpfs/Makefile b/fs/ncpfs/Makefile deleted file mode 100644 index 66fe5f878817..000000000000 --- a/fs/ncpfs/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the linux ncp filesystem routines. -# - -obj-$(CONFIG_NCP_FS) += ncpfs.o - -ncpfs-y := dir.o file.o inode.o ioctl.o mmap.o ncplib_kernel.o sock.o \ - ncpsign_kernel.o getopt.o - -ncpfs-$(CONFIG_NCPFS_EXTRAS) += symlink.o -ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o - -# If you want debugging output, please uncomment the following line -# ccflags-y := -DDEBUG_NCP=1 - -CFLAGS_ncplib_kernel.o := -finline-functions diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c deleted file mode 100644 index 0c57c5c5d40a..000000000000 --- a/fs/ncpfs/dir.c +++ /dev/null @@ -1,1232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * dir.c - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * Modified for big endian by J.F. Chadima and David S. Miller - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * Modified 1998, 1999 Wolfram Pienkoss for NLS - * Modified 1999 Wolfram Pienkoss for directory caching - * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info - * - */ - - -#include <linux/time.h> -#include <linux/errno.h> -#include <linux/stat.h> -#include <linux/kernel.h> -#include <linux/vmalloc.h> -#include <linux/mm.h> -#include <linux/namei.h> -#include <linux/uaccess.h> -#include <asm/byteorder.h> - -#include "ncp_fs.h" - -static void ncp_read_volume_list(struct file *, struct dir_context *, - struct ncp_cache_control *); -static void ncp_do_readdir(struct file *, struct dir_context *, - struct ncp_cache_control *); - -static int ncp_readdir(struct file *, struct dir_context *); - -static int ncp_create(struct inode *, struct dentry *, umode_t, bool); -static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); -static int ncp_unlink(struct inode *, struct dentry *); -static int ncp_mkdir(struct inode *, struct dentry *, umode_t); -static int ncp_rmdir(struct inode *, struct dentry *); -static int ncp_rename(struct inode *, struct dentry *, - struct inode *, struct dentry *, unsigned int); -static int ncp_mknod(struct inode * dir, struct dentry *dentry, - umode_t mode, dev_t rdev); -#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) -extern int ncp_symlink(struct inode *, struct dentry *, const char *); -#else -#define ncp_symlink NULL -#endif - -const struct file_operations ncp_dir_operations = -{ - .llseek = generic_file_llseek, - .read = generic_read_dir, - .iterate = ncp_readdir, - .unlocked_ioctl = ncp_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ncp_compat_ioctl, -#endif -}; - -const struct inode_operations ncp_dir_inode_operations = -{ - .create = ncp_create, - .lookup = ncp_lookup, - .unlink = ncp_unlink, - .symlink = ncp_symlink, - .mkdir = ncp_mkdir, - .rmdir = ncp_rmdir, - .mknod = ncp_mknod, - .rename = ncp_rename, - .setattr = ncp_notify_change, -}; - -/* - * Dentry operations routines - */ -static int ncp_lookup_validate(struct dentry *, unsigned int); -static int ncp_hash_dentry(const struct dentry *, struct qstr *); -static int ncp_compare_dentry(const struct dentry *, - unsigned int, const char *, const struct qstr *); -static int ncp_delete_dentry(const struct dentry *); -static void ncp_d_prune(struct dentry *dentry); - -const struct dentry_operations ncp_dentry_operations = -{ - .d_revalidate = ncp_lookup_validate, - .d_hash = ncp_hash_dentry, - .d_compare = ncp_compare_dentry, - .d_delete = ncp_delete_dentry, - .d_prune = ncp_d_prune, -}; - -#define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) - -static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator) -{ -#ifdef CONFIG_NCPFS_SMALLDOS - int ns = ncp_namespace(i); - - if ((ns == NW_NS_DOS) -#ifdef CONFIG_NCPFS_OS2_NS - || ((ns == NW_NS_OS2) && (nscreator == NW_NS_DOS)) -#endif /* CONFIG_NCPFS_OS2_NS */ - ) - return 0; -#endif /* CONFIG_NCPFS_SMALLDOS */ - return 1; -} - -#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) - -static inline int ncp_case_sensitive(const struct inode *i) -{ -#ifdef CONFIG_NCPFS_NFS_NS - return ncp_namespace(i) == NW_NS_NFS; -#else - return 0; -#endif /* CONFIG_NCPFS_NFS_NS */ -} - -/* - * Note: leave the hash unchanged if the directory - * is case-sensitive. - */ -static int -ncp_hash_dentry(const struct dentry *dentry, struct qstr *this) -{ - struct inode *inode = d_inode_rcu(dentry); - - if (!inode) - return 0; - - if (!ncp_case_sensitive(inode)) { - struct nls_table *t; - unsigned long hash; - int i; - - t = NCP_IO_TABLE(dentry->d_sb); - hash = init_name_hash(dentry); - for (i=0; i<this->len ; i++) - hash = partial_name_hash(ncp_tolower(t, this->name[i]), - hash); - this->hash = end_name_hash(hash); - } - return 0; -} - -static int -ncp_compare_dentry(const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name) -{ - struct inode *pinode; - - if (len != name->len) - return 1; - - pinode = d_inode_rcu(dentry->d_parent); - if (!pinode) - return 1; - - if (ncp_case_sensitive(pinode)) - return strncmp(str, name->name, len); - - return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len); -} - -/* - * This is the callback from dput() when d_count is going to 0. - * We use this to unhash dentries with bad inodes. - * Closing files can be safely postponed until iput() - it's done there anyway. - */ -static int -ncp_delete_dentry(const struct dentry * dentry) -{ - struct inode *inode = d_inode(dentry); - - if (inode) { - if (is_bad_inode(inode)) - return 1; - } else - { - /* N.B. Unhash negative dentries? */ - } - return 0; -} - -static inline int -ncp_single_volume(struct ncp_server *server) -{ - return (server->m.mounted_vol[0] != '\0'); -} - -static inline int ncp_is_server_root(struct inode *inode) -{ - return !ncp_single_volume(NCP_SERVER(inode)) && - is_root_inode(inode); -} - - -/* - * This is the callback when the dcache has a lookup hit. - */ - - -#ifdef CONFIG_NCPFS_STRONG -/* try to delete a readonly file (NW R bit set) */ - -static int -ncp_force_unlink(struct inode *dir, struct dentry* dentry) -{ - int res=0x9c,res2; - struct nw_modify_dos_info info; - __le32 old_nwattr; - struct inode *inode; - - memset(&info, 0, sizeof(info)); - - /* remove the Read-Only flag on the NW server */ - inode = d_inode(dentry); - - old_nwattr = NCP_FINFO(inode)->nwattr; - info.attributes = old_nwattr & ~(aRONLY|aDELETEINHIBIT|aRENAMEINHIBIT); - res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(inode), inode, NULL, DM_ATTRIBUTES, &info); - if (res2) - goto leave_me; - - /* now try again the delete operation */ - res = ncp_del_file_or_subdir2(NCP_SERVER(dir), dentry); - - if (res) /* delete failed, set R bit again */ - { - info.attributes = old_nwattr; - res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(inode), inode, NULL, DM_ATTRIBUTES, &info); - if (res2) - goto leave_me; - } -leave_me: - return(res); -} -#endif /* CONFIG_NCPFS_STRONG */ - -#ifdef CONFIG_NCPFS_STRONG -static int -ncp_force_rename(struct inode *old_dir, struct dentry* old_dentry, char *_old_name, - struct inode *new_dir, struct dentry* new_dentry, char *_new_name) -{ - struct nw_modify_dos_info info; - int res=0x90,res2; - struct inode *old_inode = d_inode(old_dentry); - __le32 old_nwattr = NCP_FINFO(old_inode)->nwattr; - __le32 new_nwattr = 0; /* shut compiler warning */ - int old_nwattr_changed = 0; - int new_nwattr_changed = 0; - - memset(&info, 0, sizeof(info)); - - /* remove the Read-Only flag on the NW server */ - - info.attributes = old_nwattr & ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT); - res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(old_inode), old_inode, NULL, DM_ATTRIBUTES, &info); - if (!res2) - old_nwattr_changed = 1; - if (new_dentry && d_really_is_positive(new_dentry)) { - new_nwattr = NCP_FINFO(d_inode(new_dentry))->nwattr; - info.attributes = new_nwattr & ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT); - res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(new_dir), new_dir, _new_name, DM_ATTRIBUTES, &info); - if (!res2) - new_nwattr_changed = 1; - } - /* now try again the rename operation */ - /* but only if something really happened */ - if (new_nwattr_changed || old_nwattr_changed) { - res = ncp_ren_or_mov_file_or_subdir(NCP_SERVER(old_dir), - old_dir, _old_name, - new_dir, _new_name); - } - if (res) - goto leave_me; - /* file was successfully renamed, so: - do not set attributes on old file - it no longer exists - copy attributes from old file to new */ - new_nwattr_changed = old_nwattr_changed; - new_nwattr = old_nwattr; - old_nwattr_changed = 0; - -leave_me:; - if (old_nwattr_changed) { - info.attributes = old_nwattr; - res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(old_inode), old_inode, NULL, DM_ATTRIBUTES, &info); - /* ignore errors */ - } - if (new_nwattr_changed) { - info.attributes = new_nwattr; - res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(new_dir), new_dir, _new_name, DM_ATTRIBUTES, &info); - /* ignore errors */ - } - return(res); -} -#endif /* CONFIG_NCPFS_STRONG */ - - -static int -ncp_lookup_validate(struct dentry *dentry, unsigned int flags) -{ - struct ncp_server *server; - struct dentry *parent; - struct inode *dir; - struct ncp_entry_info finfo; - int res, val = 0, len; - __u8 __name[NCP_MAXPATHLEN + 1]; - - if (dentry == dentry->d_sb->s_root) - return 1; - - if (flags & LOOKUP_RCU) - return -ECHILD; - - parent = dget_parent(dentry); - dir = d_inode(parent); - - if (d_really_is_negative(dentry)) - goto finished; - - server = NCP_SERVER(dir); - - /* - * Inspired by smbfs: - * The default validation is based on dentry age: - * We set the max age at mount time. (But each - * successful server lookup renews the timestamp.) - */ - val = NCP_TEST_AGE(server, dentry); - if (val) - goto finished; - - ncp_dbg(2, "%pd2 not valid, age=%ld, server lookup\n", - dentry, NCP_GET_AGE(dentry)); - - len = sizeof(__name); - if (ncp_is_server_root(dir)) { - res = ncp_io2vol(server, __name, &len, dentry->d_name.name, - dentry->d_name.len, 1); - if (!res) { - res = ncp_lookup_volume(server, __name, &(finfo.i)); - if (!res) - ncp_update_known_namespace(server, finfo.i.volNumber, NULL); - } - } else { - res = ncp_io2vol(server, __name, &len, dentry->d_name.name, - dentry->d_name.len, !ncp_preserve_case(dir)); - if (!res) - res = ncp_obtain_info(server, dir, __name, &(finfo.i)); - } - finfo.volume = finfo.i.volNumber; - ncp_dbg(2, "looked for %pd/%s, res=%d\n", - dentry->d_parent, __name, res); - /* - * If we didn't find it, or if it has a different dirEntNum to - * what we remember, it's not valid any more. - */ - if (!res) { - struct inode *inode = d_inode(dentry); - - inode_lock(inode); - if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) { - ncp_new_dentry(dentry); - val=1; - } else - ncp_dbg(2, "found, but dirEntNum changed\n"); - - ncp_update_inode2(inode, &finfo); - inode_unlock(inode); - } - -finished: - ncp_dbg(2, "result=%d\n", val); - dput(parent); - return val; -} - -static time_t ncp_obtain_mtime(struct dentry *dentry) -{ - struct inode *inode = d_inode(dentry); - struct ncp_server *server = NCP_SERVER(inode); - struct nw_info_struct i; - - if (!ncp_conn_valid(server) || ncp_is_server_root(inode)) - return 0; - - if (ncp_obtain_info(server, inode, NULL, &i)) - return 0; - - return ncp_date_dos2unix(i.modifyTime, i.modifyDate); -} - -static inline void -ncp_invalidate_dircache_entries(struct dentry *parent) -{ - struct ncp_server *server = NCP_SERVER(d_inode(parent)); - struct dentry *dentry; - - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - dentry->d_fsdata = NULL; - ncp_age_dentry(server, dentry); - } - spin_unlock(&parent->d_lock); -} - -static int ncp_readdir(struct file *file, struct dir_context *ctx) -{ - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = d_inode(dentry); - struct page *page = NULL; - struct ncp_server *server = NCP_SERVER(inode); - union ncp_dir_cache *cache = NULL; - struct ncp_cache_control ctl; - int result, mtime_valid = 0; - time_t mtime = 0; - - ctl.page = NULL; - ctl.cache = NULL; - - ncp_dbg(2, "reading %pD2, pos=%d\n", file, (int)ctx->pos); - - result = -EIO; - /* Do not generate '.' and '..' when server is dead. */ - if (!ncp_conn_valid(server)) - goto out; - - result = 0; - if (!dir_emit_dots(file, ctx)) - goto out; - - page = grab_cache_page(&inode->i_data, 0); - if (!page) - goto read_really; - - ctl.cache = cache = kmap(page); - ctl.head = cache->head; - - if (!PageUptodate(page) || !ctl.head.eof) - goto init_cache; - - if (ctx->pos == 2) { - if (jiffies - ctl.head.time >= NCP_MAX_AGE(server)) - goto init_cache; - - mtime = ncp_obtain_mtime(dentry); - mtime_valid = 1; - if ((!mtime) || (mtime != ctl.head.mtime)) - goto init_cache; - } - - if (ctx->pos > ctl.head.end) - goto finished; - - ctl.fpos = ctx->pos + (NCP_DIRCACHE_START - 2); - ctl.ofs = ctl.fpos / NCP_DIRCACHE_SIZE; - ctl.idx = ctl.fpos % NCP_DIRCACHE_SIZE; - - for (;;) { - if (ctl.ofs != 0) { - ctl.page = find_lock_page(&inode->i_data, ctl.ofs); - if (!ctl.page) - goto invalid_cache; - ctl.cache = kmap(ctl.page); - if (!PageUptodate(ctl.page)) - goto invalid_cache; - } - while (ctl.idx < NCP_DIRCACHE_SIZE) { - struct dentry *dent; - bool over; - - spin_lock(&dentry->d_lock); - if (!(NCP_FINFO(inode)->flags & NCPI_DIR_CACHE)) { - spin_unlock(&dentry->d_lock); - goto invalid_cache; - } - dent = ctl.cache->dentry[ctl.idx]; - if (unlikely(!lockref_get_not_dead(&dent->d_lockref))) { - spin_unlock(&dentry->d_lock); - goto invalid_cache; - } - spin_unlock(&dentry->d_lock); - if (d_really_is_negative(dent)) { - dput(dent); - goto invalid_cache; - } - over = !dir_emit(ctx, dent->d_name.name, - dent->d_name.len, - d_inode(dent)->i_ino, DT_UNKNOWN); - dput(dent); - if (over) - goto finished; - ctx->pos += 1; - ctl.idx += 1; - if (ctx->pos > ctl.head.end) - goto finished; - } - if (ctl.page) { - kunmap(ctl.page); - SetPageUptodate(ctl.page); - unlock_page(ctl.page); - put_page(ctl.page); - ctl.page = NULL; - } - ctl.idx = 0; - ctl.ofs += 1; - } -invalid_cache: - if (ctl.page) { - kunmap(ctl.page); - unlock_page(ctl.page); - put_page(ctl.page); - ctl.page = NULL; - } - ctl.cache = cache; -init_cache: - ncp_invalidate_dircache_entries(dentry); - if (!mtime_valid) { - mtime = ncp_obtain_mtime(dentry); - mtime_valid = 1; - } - ctl.head.mtime = mtime; - ctl.head.time = jiffies; - ctl.head.eof = 0; - ctl.fpos = 2; - ctl.ofs = 0; - ctl.idx = NCP_DIRCACHE_START; - ctl.filled = 0; - ctl.valid = 1; -read_really: - spin_lock(&dentry->d_lock); - NCP_FINFO(inode)->flags |= NCPI_DIR_CACHE; - spin_unlock(&dentry->d_lock); - if (ncp_is_server_root(inode)) { - ncp_read_volume_list(file, ctx, &ctl); - } else { - ncp_do_readdir(file, ctx, &ctl); - } - ctl.head.end = ctl.fpos - 1; - ctl.head.eof = ctl.valid; -finished: - if (ctl.page) { - kunmap(ctl.page); - SetPageUptodate(ctl.page); - unlock_page(ctl.page); - put_page(ctl.page); - } - if (page) { - cache->head = ctl.head; - kunmap(page); - SetPageUptodate(page); - unlock_page(page); - put_page(page); - } -out: - return result; -} - -static void ncp_d_prune(struct dentry *dentry) -{ - if (!dentry->d_fsdata) /* not referenced from page cache */ - return; - NCP_FINFO(d_inode(dentry->d_parent))->flags &= ~NCPI_DIR_CACHE; -} - -static int -ncp_fill_cache(struct file *file, struct dir_context *ctx, - struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, - int inval_childs) -{ - struct dentry *newdent, *dentry = file->f_path.dentry; - struct inode *dir = d_inode(dentry); - struct ncp_cache_control ctl = *ctrl; - struct qstr qname; - int valid = 0; - int hashed = 0; - ino_t ino = 0; - __u8 __name[NCP_MAXPATHLEN + 1]; - - qname.len = sizeof(__name); - if (ncp_vol2io(NCP_SERVER(dir), __name, &qname.len, - entry->i.entryName, entry->i.nameLen, - !ncp_preserve_entry_case(dir, entry->i.NSCreator))) - return 1; /* I'm not sure */ - - qname.name = __name; - - newdent = d_hash_and_lookup(dentry, &qname); - if (IS_ERR(newdent)) - goto end_advance; - if (!newdent) { - newdent = d_alloc(dentry, &qname); - if (!newdent) - goto end_advance; - } else { - hashed = 1; - - /* If case sensitivity changed for this volume, all entries below this one - should be thrown away. This entry itself is not affected, as its case - sensitivity is controlled by its own parent. */ - if (inval_childs) - shrink_dcache_parent(newdent); - - /* - * NetWare's OS2 namespace is case preserving yet case - * insensitive. So we update dentry's name as received from - * server. Parent dir's i_mutex is locked because we're in - * readdir. - */ - dentry_update_name_case(newdent, &qname); - } - - if (d_really_is_negative(newdent)) { - struct inode *inode; - - entry->opened = 0; - entry->ino = iunique(dir->i_sb, 2); - inode = ncp_iget(dir->i_sb, entry); - if (inode) { - d_instantiate(newdent, inode); - if (!hashed) - d_rehash(newdent); - } else { - spin_lock(&dentry->d_lock); - NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE; - spin_unlock(&dentry->d_lock); - } - } else { - struct inode *inode = d_inode(newdent); - - inode_lock_nested(inode, I_MUTEX_CHILD); - ncp_update_inode2(inode, entry); - inode_unlock(inode); - } - - if (ctl.idx >= NCP_DIRCACHE_SIZE) { - if (ctl.page) { - kunmap(ctl.page); - SetPageUptodate(ctl.page); - unlock_page(ctl.page); - put_page(ctl.page); - } - ctl.cache = NULL; - ctl.idx -= NCP_DIRCACHE_SIZE; - ctl.ofs += 1; - ctl.page = grab_cache_page(&dir->i_data, ctl.ofs); - if (ctl.page) - ctl.cache = kmap(ctl.page); - } - if (ctl.cache) { - if (d_really_is_positive(newdent)) { - newdent->d_fsdata = newdent; - ctl.cache->dentry[ctl.idx] = newdent; - ino = d_inode(newdent)->i_ino; - ncp_new_dentry(newdent); - } - valid = 1; - } - dput(newdent); -end_advance: - if (!valid) - ctl.valid = 0; - if (!ctl.filled && (ctl.fpos == ctx->pos)) { - if (!ino) - ino = iunique(dir->i_sb, 2); - ctl.filled = !dir_emit(ctx, qname.name, qname.len, - ino, DT_UNKNOWN); - if (!ctl.filled) - ctx->pos += 1; - } - ctl.fpos += 1; - ctl.idx += 1; - *ctrl = ctl; - return (ctl.valid || !ctl.filled); -} - -static void -ncp_read_volume_list(struct file *file, struct dir_context *ctx, - struct ncp_cache_control *ctl) -{ - struct inode *inode = file_inode(file); - struct ncp_server *server = NCP_SERVER(inode); - struct ncp_volume_info info; - struct ncp_entry_info entry; - int i; - - ncp_dbg(1, "pos=%ld\n", (unsigned long)ctx->pos); - - for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { - int inval_dentry; - - if (ncp_get_volume_info_with_number(server, i, &info) != 0) - return; - if (!strlen(info.volume_name)) - continue; - - ncp_dbg(1, "found vol: %s\n", info.volume_name); - - if (ncp_lookup_volume(server, info.volume_name, - &entry.i)) { - ncp_dbg(1, "could not lookup vol %s\n", - info.volume_name); - continue; - } - inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL); - entry.volume = entry.i.volNumber; - if (!ncp_fill_cache(file, ctx, ctl, &entry, inval_dentry)) - return; - } -} - -static void -ncp_do_readdir(struct file *file, struct dir_context *ctx, - struct ncp_cache_control *ctl) -{ - struct inode *dir = file_inode(file); - struct ncp_server *server = NCP_SERVER(dir); - struct nw_search_sequence seq; - struct ncp_entry_info entry; - int err; - void* buf; - int more; - size_t bufsize; - - ncp_dbg(1, "%pD2, fpos=%ld\n", file, (unsigned long)ctx->pos); - ncp_vdbg("init %pD, volnum=%d, dirent=%u\n", - file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum); - - err = ncp_initialize_search(server, dir, &seq); - if (err) { - ncp_dbg(1, "init failed, err=%d\n", err); - return; - } - /* We MUST NOT use server->buffer_size handshaked with server if we are - using UDP, as for UDP server uses max. buffer size determined by - MTU, and for TCP server uses hardwired value 65KB (== 66560 bytes). - So we use 128KB, just to be sure, as there is no way how to know - this value in advance. */ - bufsize = 131072; - buf = vmalloc(bufsize); - if (!buf) - return; - do { - int cnt; - char* rpl; - size_t rpls; - - err = ncp_search_for_fileset(server, &seq, &more, &cnt, buf, bufsize, &rpl, &rpls); - if (err) /* Error */ - break; - if (!cnt) /* prevent endless loop */ - break; - while (cnt--) { - size_t onerpl; - - if (rpls < offsetof(struct nw_info_struct, entryName)) - break; /* short packet */ - ncp_extract_file_info(rpl, &entry.i); - onerpl = offsetof(struct nw_info_struct, entryName) + entry.i.nameLen; - if (rpls < onerpl) - break; /* short packet */ - (void)ncp_obtain_nfs_info(server, &entry.i); - rpl += onerpl; - rpls -= onerpl; - entry.volume = entry.i.volNumber; - if (!ncp_fill_cache(file, ctx, ctl, &entry, 0)) - break; - } - } while (more); - vfree(buf); - return; -} - -int ncp_conn_logged_in(struct super_block *sb) -{ - struct ncp_server* server = NCP_SBP(sb); - int result; - - if (ncp_single_volume(server)) { - int len; - struct dentry* dent; - __u32 volNumber; - __le32 dirEntNum; - __le32 DosDirNum; - __u8 __name[NCP_MAXPATHLEN + 1]; - - len = sizeof(__name); - result = ncp_io2vol(server, __name, &len, server->m.mounted_vol, - strlen(server->m.mounted_vol), 1); - if (result) - goto out; - result = -ENOENT; - if (ncp_get_volume_root(server, __name, &volNumber, &dirEntNum, &DosDirNum)) { - ncp_vdbg("%s not found\n", server->m.mounted_vol); - goto out; - } - dent = sb->s_root; - if (dent) { - struct inode* ino = d_inode(dent); - if (ino) { - ncp_update_known_namespace(server, volNumber, NULL); - NCP_FINFO(ino)->volNumber = volNumber; - NCP_FINFO(ino)->dirEntNum = dirEntNum; - NCP_FINFO(ino)->DosDirNum = DosDirNum; - result = 0; - } else { - ncp_dbg(1, "d_inode(sb->s_root) == NULL!\n"); - } - } else { - ncp_dbg(1, "sb->s_root == NULL!\n"); - } - } else - result = 0; - -out: - return result; -} - -static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) -{ - struct ncp_server *server = NCP_SERVER(dir); - struct inode *inode = NULL; - struct ncp_entry_info finfo; - int error, res, len; - __u8 __name[NCP_MAXPATHLEN + 1]; - - error = -EIO; - if (!ncp_conn_valid(server)) - goto finished; - - ncp_vdbg("server lookup for %pd2\n", dentry); - - len = sizeof(__name); - if (ncp_is_server_root(dir)) { - res = ncp_io2vol(server, __name, &len, dentry->d_name.name, - dentry->d_name.len, 1); - if (!res) - res = ncp_lookup_volume(server, __name, &(finfo.i)); - if (!res) - ncp_update_known_namespace(server, finfo.i.volNumber, NULL); - } else { - res = ncp_io2vol(server, __name, &len, dentry->d_name.name, - dentry->d_name.len, !ncp_preserve_case(dir)); - if (!res) - res = ncp_obtain_info(server, dir, __name, &(finfo.i)); - } - ncp_vdbg("looked for %pd2, res=%d\n", dentry, res); - /* - * If we didn't find an entry, make a negative dentry. - */ - if (res) - goto add_entry; - - /* - * Create an inode for the entry. - */ - finfo.opened = 0; - finfo.ino = iunique(dir->i_sb, 2); - finfo.volume = finfo.i.volNumber; - error = -EACCES; - inode = ncp_iget(dir->i_sb, &finfo); - - if (inode) { - ncp_new_dentry(dentry); -add_entry: - d_add(dentry, inode); - error = 0; - } - -finished: - ncp_vdbg("result=%d\n", error); - return ERR_PTR(error); -} - -/* - * This code is common to create, mkdir, and mknod. - */ -static int ncp_instantiate(struct inode *dir, struct dentry *dentry, - struct ncp_entry_info *finfo) -{ - struct inode *inode; - int error = -EINVAL; - - finfo->ino = iunique(dir->i_sb, 2); - inode = ncp_iget(dir->i_sb, finfo); - if (!inode) - goto out_close; - d_instantiate(dentry,inode); - error = 0; -out: - return error; - -out_close: - ncp_vdbg("%pd2 failed, closing file\n", dentry); - ncp_close_file(NCP_SERVER(dir), finfo->file_handle); - goto out; -} - -int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev, __le32 attributes) -{ - struct ncp_server *server = NCP_SERVER(dir); - struct ncp_entry_info finfo; - int error, result, len; - int opmode; - __u8 __name[NCP_MAXPATHLEN + 1]; - - ncp_vdbg("creating %pd2, mode=%hx\n", dentry, mode); - - ncp_age_dentry(server, dentry); - len = sizeof(__name); - error = ncp_io2vol(server, __name, &len, dentry->d_name.name, - dentry->d_name.len, !ncp_preserve_case(dir)); - if (error) - goto out; - - error = -EACCES; - - if (S_ISREG(mode) && - (server->m.flags & NCP_MOUNT_EXTRAS) && - (mode & S_IXUGO)) - attributes |= aSYSTEM | aSHARED; - - result = ncp_open_create_file_or_subdir(server, dir, __name, - OC_MODE_CREATE | OC_MODE_OPEN | OC_MODE_REPLACE, - attributes, AR_READ | AR_WRITE, &finfo); - opmode = O_RDWR; - if (result) { - result = ncp_open_create_file_or_subdir(server, dir, __name, - OC_MODE_CREATE | OC_MODE_OPEN | OC_MODE_REPLACE, - attributes, AR_WRITE, &finfo); - if (result) { - if (result == 0x87) - error = -ENAMETOOLONG; - else if (result < 0) - error = result; - ncp_dbg(1, "%pd2 failed\n", dentry); - goto out; - } - opmode = O_WRONLY; - } - finfo.access = opmode; - if (ncp_is_nfs_extras(server, finfo.volume)) { - finfo.i.nfs.mode = mode; - finfo.i.nfs.rdev = new_encode_dev(rdev); - if (ncp_modify_nfs_info(server, finfo.volume, - finfo.i.dirEntNum, - mode, new_encode_dev(rdev)) != 0) - goto out; - } - - error = ncp_instantiate(dir, dentry, &finfo); -out: - return error; -} - -static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) -{ - return ncp_create_new(dir, dentry, mode, 0, 0); -} - -static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - struct ncp_entry_info finfo; - struct ncp_server *server = NCP_SERVER(dir); - int error, len; - __u8 __name[NCP_MAXPATHLEN + 1]; - - ncp_dbg(1, "making %pd2\n", dentry); - - ncp_age_dentry(server, dentry); - len = sizeof(__name); - error = ncp_io2vol(server, __name, &len, dentry->d_name.name, - dentry->d_name.len, !ncp_preserve_case(dir)); - if (error) - goto out; - - error = ncp_open_create_file_or_subdir(server, dir, __name, - OC_MODE_CREATE, aDIR, - cpu_to_le16(0xffff), - &finfo); - if (error == 0) { - if (ncp_is_nfs_extras(server, finfo.volume)) { - mode |= S_IFDIR; - finfo.i.nfs.mode = mode; - if (ncp_modify_nfs_info(server, - finfo.volume, - finfo.i.dirEntNum, - mode, 0) != 0) - goto out; - } - error = ncp_instantiate(dir, dentry, &finfo); - } else if (error > 0) { - error = -EACCES; - } -out: - return error; -} - -static int ncp_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct ncp_server *server = NCP_SERVER(dir); - int error, result, len; - __u8 __name[NCP_MAXPATHLEN + 1]; - - ncp_dbg(1, "removing %pd2\n", dentry); - - len = sizeof(__name); - error = ncp_io2vol(server, __name, &len, dentry->d_name.name, - dentry->d_name.len, !ncp_preserve_case(dir)); - if (error) - goto out; - - result = ncp_del_file_or_subdir(server, dir, __name); - switch (result) { - case 0x00: - error = 0; - break; - case 0x85: /* unauthorized to delete file */ - case 0x8A: /* unauthorized to delete file */ - error = -EACCES; - break; - case 0x8F: - case 0x90: /* read only */ - error = -EPERM; - break; - case 0x9F: /* in use by another client */ - error = -EBUSY; - break; - case 0xA0: /* directory not empty */ - error = -ENOTEMPTY; - break; - case 0xFF: /* someone deleted file */ - error = -ENOENT; - break; - default: - error = result < 0 ? result : -EACCES; - break; - } -out: - return error; -} - -static int ncp_unlink(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = d_inode(dentry); - struct ncp_server *server; - int error; - - server = NCP_SERVER(dir); - ncp_dbg(1, "unlinking %pd2\n", dentry); - - /* - * Check whether to close the file ... - */ - if (inode) { - ncp_vdbg("closing file\n"); - ncp_make_closed(inode); - } - - error = ncp_del_file_or_subdir2(server, dentry); -#ifdef CONFIG_NCPFS_STRONG - /* 9C is Invalid path.. It should be 8F, 90 - read only, but - it is not :-( */ - if ((error == 0x9C || error == 0x90) && server->m.flags & NCP_MOUNT_STRONG) { /* R/O */ - error = ncp_force_unlink(dir, dentry); - } -#endif - switch (error) { - case 0x00: - ncp_dbg(1, "removed %pd2\n", dentry); - break; - case 0x85: - case 0x8A: - error = -EACCES; - break; - case 0x8D: /* some files in use */ - case 0x8E: /* all files in use */ - error = -EBUSY; - break; - case 0x8F: /* some read only */ - case 0x90: /* all read only */ - case 0x9C: /* !!! returned when in-use or read-only by NW4 */ - error = -EPERM; - break; - case 0xFF: - error = -ENOENT; - break; - default: - error = error < 0 ? error : -EACCES; - break; - } - return error; -} - -static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) -{ - struct ncp_server *server = NCP_SERVER(old_dir); - int error; - int old_len, new_len; - __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1]; - - if (flags) - return -EINVAL; - - ncp_dbg(1, "%pd2 to %pd2\n", old_dentry, new_dentry); - - ncp_age_dentry(server, old_dentry); - ncp_age_dentry(server, new_dentry); - - old_len = sizeof(__old_name); - error = ncp_io2vol(server, __old_name, &old_len, - old_dentry->d_name.name, old_dentry->d_name.len, - !ncp_preserve_case(old_dir)); - if (error) - goto out; - - new_len = sizeof(__new_name); - error = ncp_io2vol(server, __new_name, &new_len, - new_dentry->d_name.name, new_dentry->d_name.len, - !ncp_preserve_case(new_dir)); - if (error) - goto out; - - error = ncp_ren_or_mov_file_or_subdir(server, old_dir, __old_name, - new_dir, __new_name); -#ifdef CONFIG_NCPFS_STRONG - if ((error == 0x90 || error == 0x8B || error == -EACCES) && - server->m.flags & NCP_MOUNT_STRONG) { /* RO */ - error = ncp_force_rename(old_dir, old_dentry, __old_name, - new_dir, new_dentry, __new_name); - } -#endif - switch (error) { - case 0x00: - ncp_dbg(1, "renamed %pd -> %pd\n", - old_dentry, new_dentry); - ncp_d_prune(old_dentry); - ncp_d_prune(new_dentry); - break; - case 0x9E: - error = -ENAMETOOLONG; - break; - case 0xFF: - error = -ENOENT; - break; - default: - error = error < 0 ? error : -EACCES; - break; - } -out: - return error; -} - -static int ncp_mknod(struct inode * dir, struct dentry *dentry, - umode_t mode, dev_t rdev) -{ - if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) { - ncp_dbg(1, "mode = 0%ho\n", mode); - return ncp_create_new(dir, dentry, mode, rdev, 0); - } - return -EPERM; /* Strange, but true */ -} - -/* The following routines are taken directly from msdos-fs */ - -/* Linear day numbers of the respective 1sts in non-leap years. */ - -static int day_n[] = -{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0}; -/* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ - -static int utc2local(int time) -{ - return time - sys_tz.tz_minuteswest * 60; -} - -static int local2utc(int time) -{ - return time + sys_tz.tz_minuteswest * 60; -} - -/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int -ncp_date_dos2unix(__le16 t, __le16 d) -{ - unsigned short time = le16_to_cpu(t), date = le16_to_cpu(d); - int month, year, secs; - - /* first subtract and mask after that... Otherwise, if - date == 0, bad things happen */ - month = ((date >> 5) - 1) & 15; - year = date >> 9; - secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + - 86400 * ((date & 31) - 1 + day_n[month] + (year / 4) + - year * 365 - ((year & 3) == 0 && month < 2 ? 1 : 0) + 3653); - /* days since 1.1.70 plus 80's leap day */ - return local2utc(secs); -} - - -/* Convert linear UNIX date to a MS-DOS time/date pair. */ -void -ncp_date_unix2dos(int unix_date, __le16 *time, __le16 *date) -{ - int day, year, nl_day, month; - - unix_date = utc2local(unix_date); - *time = cpu_to_le16( - (unix_date % 60) / 2 + (((unix_date / 60) % 60) << 5) + - (((unix_date / 3600) % 24) << 11)); - day = unix_date / 86400 - 3652; - year = day / 365; - if ((year + 3) / 4 + 365 * year > day) - year--; - day -= (year + 3) / 4 + 365 * year; - if (day == 59 && !(year & 3)) { - nl_day = day; - month = 2; - } else { - nl_day = (year & 3) || day <= 59 ? day : day - 1; - for (month = 1; month < 12; month++) - if (day_n[month] > nl_day) - break; - } - *date = cpu_to_le16(nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9)); -} diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c deleted file mode 100644 index 8f8cc0334ddd..000000000000 --- a/fs/ncpfs/file.c +++ /dev/null @@ -1,263 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * file.c - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/uaccess.h> - -#include <linux/time.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <linux/sched.h> - -#include "ncp_fs.h" - -static int ncp_fsync(struct file *file, loff_t start, loff_t end, int datasync) -{ - return file_write_and_wait_range(file, start, end); -} - -/* - * Open a file with the specified read/write mode. - */ -int ncp_make_open(struct inode *inode, int right) -{ - int error; - int access; - - error = -EINVAL; - if (!inode) { - pr_err("%s: got NULL inode\n", __func__); - goto out; - } - - ncp_dbg(1, "opened=%d, volume # %u, dir entry # %u\n", - atomic_read(&NCP_FINFO(inode)->opened), - NCP_FINFO(inode)->volNumber, - NCP_FINFO(inode)->dirEntNum); - error = -EACCES; - mutex_lock(&NCP_FINFO(inode)->open_mutex); - if (!atomic_read(&NCP_FINFO(inode)->opened)) { - struct ncp_entry_info finfo; - int result; - - /* tries max. rights */ - finfo.access = O_RDWR; - result = ncp_open_create_file_or_subdir(NCP_SERVER(inode), - inode, NULL, OC_MODE_OPEN, - 0, AR_READ | AR_WRITE, &finfo); - if (!result) - goto update; - /* RDWR did not succeeded, try readonly or writeonly as requested */ - switch (right) { - case O_RDONLY: - finfo.access = O_RDONLY; - result = ncp_open_create_file_or_subdir(NCP_SERVER(inode), - inode, NULL, OC_MODE_OPEN, - 0, AR_READ, &finfo); - break; - case O_WRONLY: - finfo.access = O_WRONLY; - result = ncp_open_create_file_or_subdir(NCP_SERVER(inode), - inode, NULL, OC_MODE_OPEN, - 0, AR_WRITE, &finfo); - break; - } - if (result) { - ncp_vdbg("failed, result=%d\n", result); - goto out_unlock; - } - /* - * Update the inode information. - */ - update: - ncp_update_inode(inode, &finfo); - atomic_set(&NCP_FINFO(inode)->opened, 1); - } - - access = NCP_FINFO(inode)->access; - ncp_vdbg("file open, access=%x\n", access); - if (access == right || access == O_RDWR) { - atomic_inc(&NCP_FINFO(inode)->opened); - error = 0; - } - -out_unlock: - mutex_unlock(&NCP_FINFO(inode)->open_mutex); -out: - return error; -} - -static ssize_t -ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file_inode(file); - size_t already_read = 0; - off_t pos = iocb->ki_pos; - size_t bufsize; - int error; - void *freepage; - size_t freelen; - - ncp_dbg(1, "enter %pD2\n", file); - - if (!iov_iter_count(to)) - return 0; - if (pos > inode->i_sb->s_maxbytes) - return 0; - iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos); - - error = ncp_make_open(inode, O_RDONLY); - if (error) { - ncp_dbg(1, "open failed, error=%d\n", error); - return error; - } - - bufsize = NCP_SERVER(inode)->buffer_size; - - error = -EIO; - freelen = ncp_read_bounce_size(bufsize); - freepage = vmalloc(freelen); - if (!freepage) - goto outrel; - error = 0; - /* First read in as much as possible for each bufsize. */ - while (iov_iter_count(to)) { - int read_this_time; - size_t to_read = min_t(size_t, - bufsize - (pos % bufsize), - iov_iter_count(to)); - - error = ncp_read_bounce(NCP_SERVER(inode), - NCP_FINFO(inode)->file_handle, - pos, to_read, to, &read_this_time, - freepage, freelen); - if (error) { - error = -EIO; /* NW errno -> Linux errno */ - break; - } - pos += read_this_time; - already_read += read_this_time; - - if (read_this_time != to_read) - break; - } - vfree(freepage); - - iocb->ki_pos = pos; - - file_accessed(file); - - ncp_dbg(1, "exit %pD2\n", file); -outrel: - ncp_inode_close(inode); - return already_read ? already_read : error; -} - -static ssize_t -ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file_inode(file); - size_t already_written = 0; - size_t bufsize; - int errno; - void *bouncebuffer; - off_t pos; - - ncp_dbg(1, "enter %pD2\n", file); - errno = generic_write_checks(iocb, from); - if (errno <= 0) - return errno; - - errno = ncp_make_open(inode, O_WRONLY); - if (errno) { - ncp_dbg(1, "open failed, error=%d\n", errno); - return errno; - } - bufsize = NCP_SERVER(inode)->buffer_size; - - errno = file_update_time(file); - if (errno) - goto outrel; - - bouncebuffer = vmalloc(bufsize); - if (!bouncebuffer) { - errno = -EIO; /* -ENOMEM */ - goto outrel; - } - pos = iocb->ki_pos; - while (iov_iter_count(from)) { - int written_this_time; - size_t to_write = min_t(size_t, - bufsize - (pos % bufsize), - iov_iter_count(from)); - - if (!copy_from_iter_full(bouncebuffer, to_write, from)) { - errno = -EFAULT; - break; - } - if (ncp_write_kernel(NCP_SERVER(inode), - NCP_FINFO(inode)->file_handle, - pos, to_write, bouncebuffer, &written_this_time) != 0) { - errno = -EIO; - break; - } - pos += written_this_time; - already_written += written_this_time; - - if (written_this_time != to_write) - break; - } - vfree(bouncebuffer); - - iocb->ki_pos = pos; - - if (pos > i_size_read(inode)) { - inode_lock(inode); - if (pos > i_size_read(inode)) - i_size_write(inode, pos); - inode_unlock(inode); - } - ncp_dbg(1, "exit %pD2\n", file); -outrel: - ncp_inode_close(inode); - return already_written ? already_written : errno; -} - -static int ncp_release(struct inode *inode, struct file *file) { - if (ncp_make_closed(inode)) { - ncp_dbg(1, "failed to close\n"); - } - return 0; -} - -const struct file_operations ncp_file_operations = -{ - .llseek = generic_file_llseek, - .read_iter = ncp_file_read_iter, - .write_iter = ncp_file_write_iter, - .unlocked_ioctl = ncp_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ncp_compat_ioctl, -#endif - .mmap = ncp_mmap, - .release = ncp_release, - .fsync = ncp_fsync, -}; - -const struct inode_operations ncp_file_inode_operations = -{ - .setattr = ncp_notify_change, -}; diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c deleted file mode 100644 index 5c941bef14c4..000000000000 --- a/fs/ncpfs/getopt.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * getopt.c - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/string.h> - -#include <asm/errno.h> - -#include "getopt.h" - -/** - * ncp_getopt - option parser - * @caller: name of the caller, for error messages - * @options: the options string - * @opts: an array of &struct option entries controlling parser operations - * @optopt: output; will contain the current option - * @optarg: output; will contain the value (if one exists) - * @value: output; may be NULL; will be overwritten with the integer value - * of the current argument. - * - * Helper to parse options on the format used by mount ("a=b,c=d,e,f"). - * Returns opts->val if a matching entry in the 'opts' array is found, - * 0 when no more tokens are found, -1 if an error is encountered. - */ -int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts, - char **optopt, char **optarg, unsigned long *value) -{ - char *token; - char *val; - - do { - if ((token = strsep(options, ",")) == NULL) - return 0; - } while (*token == '\0'); - if (optopt) - *optopt = token; - - if ((val = strchr (token, '=')) != NULL) { - *val++ = 0; - } - *optarg = val; - for (; opts->name; opts++) { - if (!strcmp(opts->name, token)) { - if (!val) { - if (opts->has_arg & OPT_NOPARAM) { - return opts->val; - } - pr_info("%s: the %s option requires an argument\n", - caller, token); - return -EINVAL; - } - if (opts->has_arg & OPT_INT) { - int rc = kstrtoul(val, 0, value); - - if (rc) { - pr_info("%s: invalid numeric value in %s=%s\n", - caller, token, val); - return rc; - } - return opts->val; - } - if (opts->has_arg & OPT_STRING) { - return opts->val; - } - pr_info("%s: unexpected argument %s to the %s option\n", - caller, val, token); - return -EINVAL; - } - } - pr_info("%s: Unrecognized mount option %s\n", caller, token); - return -EOPNOTSUPP; -} diff --git a/fs/ncpfs/getopt.h b/fs/ncpfs/getopt.h deleted file mode 100644 index 30f0da317670..000000000000 --- a/fs/ncpfs/getopt.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_GETOPT_H -#define _LINUX_GETOPT_H - -#define OPT_NOPARAM 1 -#define OPT_INT 2 -#define OPT_STRING 4 -struct ncp_option { - const char *name; - unsigned int has_arg; - int val; -}; - -extern int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts, - char **optopt, char **optarg, unsigned long *value); - -#endif /* _LINUX_GETOPT_H */ diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c deleted file mode 100644 index 41de88cdc053..000000000000 --- a/fs/ncpfs/inode.c +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * inode.c - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * Modified for big endian by J.F. Chadima and David S. Miller - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * Modified 1998 Wolfram Pienkoss for NLS - * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> - -#include <linux/uaccess.h> -#include <asm/byteorder.h> - -#include <linux/time.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/file.h> -#include <linux/fcntl.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/init.h> -#include <linux/vfs.h> -#include <linux/mount.h> -#include <linux/seq_file.h> -#include <linux/sched/signal.h> -#include <linux/namei.h> - -#include <net/sock.h> - -#include "ncp_fs.h" -#include "getopt.h" - -#define NCP_DEFAULT_FILE_MODE 0600 -#define NCP_DEFAULT_DIR_MODE 0700 -#define NCP_DEFAULT_TIME_OUT 10 -#define NCP_DEFAULT_RETRY_COUNT 20 - -static void ncp_evict_inode(struct inode *); -static void ncp_put_super(struct super_block *); -static int ncp_statfs(struct dentry *, struct kstatfs *); -static int ncp_show_options(struct seq_file *, struct dentry *); - -static struct kmem_cache * ncp_inode_cachep; - -static struct inode *ncp_alloc_inode(struct super_block *sb) -{ - struct ncp_inode_info *ei; - ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, GFP_KERNEL); - if (!ei) - return NULL; - return &ei->vfs_inode; -} - -static void ncp_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); -} - -static void ncp_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, ncp_i_callback); -} - -static void init_once(void *foo) -{ - struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; - - mutex_init(&ei->open_mutex); - inode_init_once(&ei->vfs_inode); -} - -static int init_inodecache(void) -{ - ncp_inode_cachep = kmem_cache_create("ncp_inode_cache", - sizeof(struct ncp_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); - if (ncp_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - /* - * Make sure all delayed rcu free inodes are flushed before we - * destroy cache. - */ - rcu_barrier(); - kmem_cache_destroy(ncp_inode_cachep); -} - -static int ncp_remount(struct super_block *sb, int *flags, char* data) -{ - sync_filesystem(sb); - *flags |= SB_NODIRATIME; - return 0; -} - -static const struct super_operations ncp_sops = -{ - .alloc_inode = ncp_alloc_inode, - .destroy_inode = ncp_destroy_inode, - .drop_inode = generic_delete_inode, - .evict_inode = ncp_evict_inode, - .put_super = ncp_put_super, - .statfs = ncp_statfs, - .remount_fs = ncp_remount, - .show_options = ncp_show_options, -}; - -/* - * Fill in the ncpfs-specific information in the inode. - */ -static void ncp_update_dirent(struct inode *inode, struct ncp_entry_info *nwinfo) -{ - NCP_FINFO(inode)->DosDirNum = nwinfo->i.DosDirNum; - NCP_FINFO(inode)->dirEntNum = nwinfo->i.dirEntNum; - NCP_FINFO(inode)->volNumber = nwinfo->volume; -} - -void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo) -{ - ncp_update_dirent(inode, nwinfo); - NCP_FINFO(inode)->nwattr = nwinfo->i.attributes; - NCP_FINFO(inode)->access = nwinfo->access; - memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle, - sizeof(nwinfo->file_handle)); - ncp_dbg(1, "updated %s, volnum=%d, dirent=%u\n", - nwinfo->i.entryName, NCP_FINFO(inode)->volNumber, - NCP_FINFO(inode)->dirEntNum); -} - -static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi) -{ - /* NFS namespace mode overrides others if it's set. */ - ncp_dbg(1, "(%s) nfs.mode=0%o\n", nwi->entryName, nwi->nfs.mode); - if (nwi->nfs.mode) { - /* XXX Security? */ - inode->i_mode = nwi->nfs.mode; - } - - inode->i_blocks = (i_size_read(inode) + NCP_BLOCK_SIZE - 1) >> NCP_BLOCK_SHIFT; - - inode->i_mtime.tv_sec = ncp_date_dos2unix(nwi->modifyTime, nwi->modifyDate); - inode->i_ctime.tv_sec = ncp_date_dos2unix(nwi->creationTime, nwi->creationDate); - inode->i_atime.tv_sec = ncp_date_dos2unix(0, nwi->lastAccessDate); - inode->i_atime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; -} - -static void ncp_update_attrs(struct inode *inode, struct ncp_entry_info *nwinfo) -{ - struct nw_info_struct *nwi = &nwinfo->i; - struct ncp_server *server = NCP_SERVER(inode); - - if (nwi->attributes & aDIR) { - inode->i_mode = server->m.dir_mode; - /* for directories dataStreamSize seems to be some - Object ID ??? */ - i_size_write(inode, NCP_BLOCK_SIZE); - } else { - u32 size; - - inode->i_mode = server->m.file_mode; - size = le32_to_cpu(nwi->dataStreamSize); - i_size_write(inode, size); -#ifdef CONFIG_NCPFS_EXTRAS - if ((server->m.flags & (NCP_MOUNT_EXTRAS|NCP_MOUNT_SYMLINKS)) - && (nwi->attributes & aSHARED)) { - switch (nwi->attributes & (aHIDDEN|aSYSTEM)) { - case aHIDDEN: - if (server->m.flags & NCP_MOUNT_SYMLINKS) { - if (/* (size >= NCP_MIN_SYMLINK_SIZE) - && */ (size <= NCP_MAX_SYMLINK_SIZE)) { - inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFLNK; - NCP_FINFO(inode)->flags |= NCPI_KLUDGE_SYMLINK; - break; - } - } - /* FALLTHROUGH */ - case 0: - if (server->m.flags & NCP_MOUNT_EXTRAS) - inode->i_mode |= S_IRUGO; - break; - case aSYSTEM: - if (server->m.flags & NCP_MOUNT_EXTRAS) - inode->i_mode |= (inode->i_mode >> 2) & S_IXUGO; - break; - /* case aSYSTEM|aHIDDEN: */ - default: - /* reserved combination */ - break; - } - } -#endif - } - if (nwi->attributes & aRONLY) inode->i_mode &= ~S_IWUGO; -} - -void ncp_update_inode2(struct inode* inode, struct ncp_entry_info *nwinfo) -{ - NCP_FINFO(inode)->flags = 0; - if (!atomic_read(&NCP_FINFO(inode)->opened)) { - NCP_FINFO(inode)->nwattr = nwinfo->i.attributes; - ncp_update_attrs(inode, nwinfo); - } - - ncp_update_dates(inode, &nwinfo->i); - ncp_update_dirent(inode, nwinfo); -} - -/* - * Fill in the inode based on the ncp_entry_info structure. Used only for brand new inodes. - */ -static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo) -{ - struct ncp_server *server = NCP_SERVER(inode); - - NCP_FINFO(inode)->flags = 0; - - ncp_update_attrs(inode, nwinfo); - - ncp_dbg(2, "inode->i_mode = %u\n", inode->i_mode); - - set_nlink(inode, 1); - inode->i_uid = server->m.uid; - inode->i_gid = server->m.gid; - - ncp_update_dates(inode, &nwinfo->i); - ncp_update_inode(inode, nwinfo); -} - -#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) -static const struct inode_operations ncp_symlink_inode_operations = { - .get_link = page_get_link, - .setattr = ncp_notify_change, -}; -#endif - -/* - * Get a new inode. - */ -struct inode * -ncp_iget(struct super_block *sb, struct ncp_entry_info *info) -{ - struct inode *inode; - - if (info == NULL) { - pr_err("%s: info is NULL\n", __func__); - return NULL; - } - - inode = new_inode(sb); - if (inode) { - atomic_set(&NCP_FINFO(inode)->opened, info->opened); - - inode->i_ino = info->ino; - ncp_set_attr(inode, info); - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ncp_file_inode_operations; - inode->i_fop = &ncp_file_operations; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ncp_dir_inode_operations; - inode->i_fop = &ncp_dir_operations; -#ifdef CONFIG_NCPFS_NFS_NS - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - init_special_inode(inode, inode->i_mode, - new_decode_dev(info->i.nfs.rdev)); -#endif -#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &ncp_symlink_inode_operations; - inode_nohighmem(inode); - inode->i_data.a_ops = &ncp_symlink_aops; -#endif - } else { - make_bad_inode(inode); - } - insert_inode_hash(inode); - } else - pr_err("%s: iget failed!\n", __func__); - return inode; -} - -static void -ncp_evict_inode(struct inode *inode) -{ - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); - - if (S_ISDIR(inode->i_mode)) { - ncp_dbg(2, "put directory %ld\n", inode->i_ino); - } - - if (ncp_make_closed(inode) != 0) { - /* We can't do anything but complain. */ - pr_err("%s: could not close\n", __func__); - } -} - -static void ncp_stop_tasks(struct ncp_server *server) { - struct sock* sk = server->ncp_sock->sk; - - lock_sock(sk); - sk->sk_error_report = server->error_report; - sk->sk_data_ready = server->data_ready; - sk->sk_write_space = server->write_space; - release_sock(sk); - del_timer_sync(&server->timeout_tm); - - flush_work(&server->rcv.tq); - if (sk->sk_socket->type == SOCK_STREAM) - flush_work(&server->tx.tq); - else - flush_work(&server->timeout_tq); -} - -static int ncp_show_options(struct seq_file *seq, struct dentry *root) -{ - struct ncp_server *server = NCP_SBP(root->d_sb); - unsigned int tmp; - - if (!uid_eq(server->m.uid, GLOBAL_ROOT_UID)) - seq_printf(seq, ",uid=%u", - from_kuid_munged(&init_user_ns, server->m.uid)); - if (!gid_eq(server->m.gid, GLOBAL_ROOT_GID)) - seq_printf(seq, ",gid=%u", - from_kgid_munged(&init_user_ns, server->m.gid)); - if (!uid_eq(server->m.mounted_uid, GLOBAL_ROOT_UID)) - seq_printf(seq, ",owner=%u", - from_kuid_munged(&init_user_ns, server->m.mounted_uid)); - tmp = server->m.file_mode & S_IALLUGO; - if (tmp != NCP_DEFAULT_FILE_MODE) - seq_printf(seq, ",mode=0%o", tmp); - tmp = server->m.dir_mode & S_IALLUGO; - if (tmp != NCP_DEFAULT_DIR_MODE) - seq_printf(seq, ",dirmode=0%o", tmp); - if (server->m.time_out != NCP_DEFAULT_TIME_OUT * HZ / 100) { - tmp = server->m.time_out * 100 / HZ; - seq_printf(seq, ",timeout=%u", tmp); - } - if (server->m.retry_count != NCP_DEFAULT_RETRY_COUNT) - seq_printf(seq, ",retry=%u", server->m.retry_count); - if (server->m.flags != 0) - seq_printf(seq, ",flags=%lu", server->m.flags); - if (server->m.wdog_pid != NULL) - seq_printf(seq, ",wdogpid=%u", pid_vnr(server->m.wdog_pid)); - - return 0; -} - -static const struct ncp_option ncp_opts[] = { - { "uid", OPT_INT, 'u' }, - { "gid", OPT_INT, 'g' }, - { "owner", OPT_INT, 'o' }, - { "mode", OPT_INT, 'm' }, - { "dirmode", OPT_INT, 'd' }, - { "timeout", OPT_INT, 't' }, - { "retry", OPT_INT, 'r' }, - { "flags", OPT_INT, 'f' }, - { "wdogpid", OPT_INT, 'w' }, - { "ncpfd", OPT_INT, 'n' }, - { "infofd", OPT_INT, 'i' }, /* v5 */ - { "version", OPT_INT, 'v' }, - { NULL, 0, 0 } }; - -static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options) { - int optval; - char *optarg; - unsigned long optint; - int version = 0; - int ret; - - data->flags = 0; - data->int_flags = 0; - data->mounted_uid = GLOBAL_ROOT_UID; - data->wdog_pid = NULL; - data->ncp_fd = ~0; - data->time_out = NCP_DEFAULT_TIME_OUT; - data->retry_count = NCP_DEFAULT_RETRY_COUNT; - data->uid = GLOBAL_ROOT_UID; - data->gid = GLOBAL_ROOT_GID; - data->file_mode = NCP_DEFAULT_FILE_MODE; - data->dir_mode = NCP_DEFAULT_DIR_MODE; - data->info_fd = -1; - data->mounted_vol[0] = 0; - - while ((optval = ncp_getopt("ncpfs", &options, ncp_opts, NULL, &optarg, &optint)) != 0) { - ret = optval; - if (ret < 0) - goto err; - switch (optval) { - case 'u': - data->uid = make_kuid(current_user_ns(), optint); - if (!uid_valid(data->uid)) { - ret = -EINVAL; - goto err; - } - break; - case 'g': - data->gid = make_kgid(current_user_ns(), optint); - if (!gid_valid(data->gid)) { - ret = -EINVAL; - goto err; - } - break; - case 'o': - data->mounted_uid = make_kuid(current_user_ns(), optint); - if (!uid_valid(data->mounted_uid)) { - ret = -EINVAL; - goto err; - } - break; - case 'm': - data->file_mode = optint; - break; - case 'd': - data->dir_mode = optint; - break; - case 't': - data->time_out = optint; - break; - case 'r': - data->retry_count = optint; - break; - case 'f': - data->flags = optint; - break; - case 'w': - data->wdog_pid = find_get_pid(optint); - break; - case 'n': - data->ncp_fd = optint; - break; - case 'i': - data->info_fd = optint; - break; - case 'v': - ret = -ECHRNG; - if (optint < NCP_MOUNT_VERSION_V4) - goto err; - if (optint > NCP_MOUNT_VERSION_V5) - goto err; - version = optint; - break; - - } - } - return 0; -err: - put_pid(data->wdog_pid); - data->wdog_pid = NULL; - return ret; -} - -static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) -{ - struct ncp_mount_data_kernel data; - struct ncp_server *server; - struct inode *root_inode; - struct socket *sock; - int error; - int default_bufsize; -#ifdef CONFIG_NCPFS_PACKET_SIGNING - int options; -#endif - struct ncp_entry_info finfo; - - memset(&data, 0, sizeof(data)); - server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); - if (!server) - return -ENOMEM; - sb->s_fs_info = server; - - error = -EFAULT; - if (raw_data == NULL) - goto out; - switch (*(int*)raw_data) { - case NCP_MOUNT_VERSION: - { - struct ncp_mount_data* md = (struct ncp_mount_data*)raw_data; - - data.flags = md->flags; - data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE; - data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid); - data.wdog_pid = find_get_pid(md->wdog_pid); - data.ncp_fd = md->ncp_fd; - data.time_out = md->time_out; - data.retry_count = md->retry_count; - data.uid = make_kuid(current_user_ns(), md->uid); - data.gid = make_kgid(current_user_ns(), md->gid); - data.file_mode = md->file_mode; - data.dir_mode = md->dir_mode; - data.info_fd = -1; - memcpy(data.mounted_vol, md->mounted_vol, - NCP_VOLNAME_LEN+1); - } - break; - case NCP_MOUNT_VERSION_V4: - { - struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; - - data.flags = md->flags; - data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid); - data.wdog_pid = find_get_pid(md->wdog_pid); - data.ncp_fd = md->ncp_fd; - data.time_out = md->time_out; - data.retry_count = md->retry_count; - data.uid = make_kuid(current_user_ns(), md->uid); - data.gid = make_kgid(current_user_ns(), md->gid); - data.file_mode = md->file_mode; - data.dir_mode = md->dir_mode; - data.info_fd = -1; - } - break; - default: - error = -ECHRNG; - if (memcmp(raw_data, "vers", 4) == 0) { - error = ncp_parse_options(&data, raw_data); - } - if (error) - goto out; - break; - } - error = -EINVAL; - if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) || - !gid_valid(data.gid)) - goto out; - sock = sockfd_lookup(data.ncp_fd, &error); - if (!sock) - goto out; - - if (sock->type == SOCK_STREAM) - default_bufsize = 0xF000; - else - default_bufsize = 1024; - - sb->s_flags |= SB_NODIRATIME; /* probably even noatime */ - sb->s_maxbytes = 0xFFFFFFFFU; - sb->s_blocksize = 1024; /* Eh... Is this correct? */ - sb->s_blocksize_bits = 10; - sb->s_magic = NCP_SUPER_MAGIC; - sb->s_op = &ncp_sops; - sb->s_d_op = &ncp_dentry_operations; - - server = NCP_SBP(sb); - memset(server, 0, sizeof(*server)); - - error = super_setup_bdi(sb); - if (error) - goto out_fput; - - server->ncp_sock = sock; - - if (data.info_fd != -1) { - struct socket *info_sock = sockfd_lookup(data.info_fd, &error); - if (!info_sock) - goto out_fput; - server->info_sock = info_sock; - error = -EBADFD; - if (info_sock->type != SOCK_STREAM) - goto out_fput2; - } - -/* server->lock = 0; */ - mutex_init(&server->mutex); - server->packet = NULL; -/* server->buffer_size = 0; */ -/* server->conn_status = 0; */ -/* server->root_dentry = NULL; */ -/* server->root_setuped = 0; */ - mutex_init(&server->root_setup_lock); -#ifdef CONFIG_NCPFS_PACKET_SIGNING -/* server->sign_wanted = 0; */ -/* server->sign_active = 0; */ -#endif - init_rwsem(&server->auth_rwsem); - server->auth.auth_type = NCP_AUTH_NONE; -/* server->auth.object_name_len = 0; */ -/* server->auth.object_name = NULL; */ -/* server->auth.object_type = 0; */ -/* server->priv.len = 0; */ -/* server->priv.data = NULL; */ - - server->m = data; - /* Although anything producing this is buggy, it happens - now because of PATH_MAX changes.. */ - if (server->m.time_out < 1) { - server->m.time_out = 10; - pr_info("You need to recompile your ncpfs utils..\n"); - } - server->m.time_out = server->m.time_out * HZ / 100; - server->m.file_mode = (server->m.file_mode & S_IRWXUGO) | S_IFREG; - server->m.dir_mode = (server->m.dir_mode & S_IRWXUGO) | S_IFDIR; - -#ifdef CONFIG_NCPFS_NLS - /* load the default NLS charsets */ - server->nls_vol = load_nls_default(); - server->nls_io = load_nls_default(); -#endif /* CONFIG_NCPFS_NLS */ - - atomic_set(&server->dentry_ttl, 0); /* no caching */ - - INIT_LIST_HEAD(&server->tx.requests); - mutex_init(&server->rcv.creq_mutex); - server->tx.creq = NULL; - server->rcv.creq = NULL; - - timer_setup(&server->timeout_tm, ncpdgram_timeout_call, 0); -#undef NCP_PACKET_SIZE -#define NCP_PACKET_SIZE 131072 - error = -ENOMEM; - server->packet_size = NCP_PACKET_SIZE; - server->packet = vmalloc(NCP_PACKET_SIZE); - if (server->packet == NULL) - goto out_nls; - server->txbuf = vmalloc(NCP_PACKET_SIZE); - if (server->txbuf == NULL) - goto out_packet; - server->rxbuf = vmalloc(NCP_PACKET_SIZE); - if (server->rxbuf == NULL) - goto out_txbuf; - - lock_sock(sock->sk); - server->data_ready = sock->sk->sk_data_ready; - server->write_space = sock->sk->sk_write_space; - server->error_report = sock->sk->sk_error_report; - sock->sk->sk_user_data = server; - sock->sk->sk_data_ready = ncp_tcp_data_ready; - sock->sk->sk_error_report = ncp_tcp_error_report; - if (sock->type == SOCK_STREAM) { - server->rcv.ptr = (unsigned char*)&server->rcv.buf; - server->rcv.len = 10; - server->rcv.state = 0; - INIT_WORK(&server->rcv.tq, ncp_tcp_rcv_proc); - INIT_WORK(&server->tx.tq, ncp_tcp_tx_proc); - sock->sk->sk_write_space = ncp_tcp_write_space; - } else { - INIT_WORK(&server->rcv.tq, ncpdgram_rcv_proc); - INIT_WORK(&server->timeout_tq, ncpdgram_timeout_proc); - } - release_sock(sock->sk); - - ncp_lock_server(server); - error = ncp_connect(server); - ncp_unlock_server(server); - if (error < 0) - goto out_rxbuf; - ncp_dbg(1, "NCP_SBP(sb) = %p\n", NCP_SBP(sb)); - - error = -EMSGSIZE; /* -EREMOTESIDEINCOMPATIBLE */ -#ifdef CONFIG_NCPFS_PACKET_SIGNING - if (ncp_negotiate_size_and_options(server, default_bufsize, - NCP_DEFAULT_OPTIONS, &(server->buffer_size), &options) == 0) - { - if (options != NCP_DEFAULT_OPTIONS) - { - if (ncp_negotiate_size_and_options(server, - default_bufsize, - options & 2, - &(server->buffer_size), &options) != 0) - - { - goto out_disconnect; - } - } - ncp_lock_server(server); - if (options & 2) - server->sign_wanted = 1; - ncp_unlock_server(server); - } - else -#endif /* CONFIG_NCPFS_PACKET_SIGNING */ - if (ncp_negotiate_buffersize(server, default_bufsize, - &(server->buffer_size)) != 0) - goto out_disconnect; - ncp_dbg(1, "bufsize = %d\n", server->buffer_size); - - memset(&finfo, 0, sizeof(finfo)); - finfo.i.attributes = aDIR; - finfo.i.dataStreamSize = 0; /* ignored */ - finfo.i.dirEntNum = 0; - finfo.i.DosDirNum = 0; -#ifdef CONFIG_NCPFS_SMALLDOS - finfo.i.NSCreator = NW_NS_DOS; -#endif - finfo.volume = NCP_NUMBER_OF_VOLUMES; - /* set dates of mountpoint to Jan 1, 1986; 00:00 */ - finfo.i.creationTime = finfo.i.modifyTime - = cpu_to_le16(0x0000); - finfo.i.creationDate = finfo.i.modifyDate - = finfo.i.lastAccessDate - = cpu_to_le16(0x0C21); - finfo.i.nameLen = 0; - finfo.i.entryName[0] = '\0'; - - finfo.opened = 0; - finfo.ino = 2; /* tradition */ - - server->name_space[finfo.volume] = NW_NS_DOS; - - error = -ENOMEM; - root_inode = ncp_iget(sb, &finfo); - if (!root_inode) - goto out_disconnect; - ncp_dbg(1, "root vol=%d\n", NCP_FINFO(root_inode)->volNumber); - sb->s_root = d_make_root(root_inode); - if (!sb->s_root) - goto out_disconnect; - return 0; - -out_disconnect: - ncp_lock_server(server); - ncp_disconnect(server); - ncp_unlock_server(server); -out_rxbuf: - ncp_stop_tasks(server); - vfree(server->rxbuf); -out_txbuf: - vfree(server->txbuf); -out_packet: - vfree(server->packet); -out_nls: -#ifdef CONFIG_NCPFS_NLS - unload_nls(server->nls_io); - unload_nls(server->nls_vol); -#endif - mutex_destroy(&server->rcv.creq_mutex); - mutex_destroy(&server->root_setup_lock); - mutex_destroy(&server->mutex); -out_fput2: - if (server->info_sock) - sockfd_put(server->info_sock); -out_fput: - sockfd_put(sock); -out: - put_pid(data.wdog_pid); - sb->s_fs_info = NULL; - kfree(server); - return error; -} - -static void delayed_free(struct rcu_head *p) -{ - struct ncp_server *server = container_of(p, struct ncp_server, rcu); -#ifdef CONFIG_NCPFS_NLS - /* unload the NLS charsets */ - unload_nls(server->nls_vol); - unload_nls(server->nls_io); -#endif /* CONFIG_NCPFS_NLS */ - kfree(server); -} - -static void ncp_put_super(struct super_block *sb) -{ - struct ncp_server *server = NCP_SBP(sb); - - ncp_lock_server(server); - ncp_disconnect(server); - ncp_unlock_server(server); - - ncp_stop_tasks(server); - - mutex_destroy(&server->rcv.creq_mutex); - mutex_destroy(&server->root_setup_lock); - mutex_destroy(&server->mutex); - - if (server->info_sock) - sockfd_put(server->info_sock); - sockfd_put(server->ncp_sock); - kill_pid(server->m.wdog_pid, SIGTERM, 1); - put_pid(server->m.wdog_pid); - - kfree(server->priv.data); - kfree(server->auth.object_name); - vfree(server->rxbuf); - vfree(server->txbuf); - vfree(server->packet); - call_rcu(&server->rcu, delayed_free); -} - -static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct dentry* d; - struct inode* i; - struct ncp_inode_info* ni; - struct ncp_server* s; - struct ncp_volume_info vi; - struct super_block *sb = dentry->d_sb; - int err; - __u8 dh; - - d = sb->s_root; - if (!d) { - goto dflt; - } - i = d_inode(d); - if (!i) { - goto dflt; - } - ni = NCP_FINFO(i); - if (!ni) { - goto dflt; - } - s = NCP_SBP(sb); - if (!s) { - goto dflt; - } - if (!s->m.mounted_vol[0]) { - goto dflt; - } - - err = ncp_dirhandle_alloc(s, ni->volNumber, ni->DosDirNum, &dh); - if (err) { - goto dflt; - } - err = ncp_get_directory_info(s, dh, &vi); - ncp_dirhandle_free(s, dh); - if (err) { - goto dflt; - } - buf->f_type = NCP_SUPER_MAGIC; - buf->f_bsize = vi.sectors_per_block * 512; - buf->f_blocks = vi.total_blocks; - buf->f_bfree = vi.free_blocks; - buf->f_bavail = vi.free_blocks; - buf->f_files = vi.total_dir_entries; - buf->f_ffree = vi.available_dir_entries; - buf->f_namelen = 12; - return 0; - - /* We cannot say how much disk space is left on a mounted - NetWare Server, because free space is distributed over - volumes, and the current user might have disk quotas. So - free space is not that simple to determine. Our decision - here is to err conservatively. */ - -dflt:; - buf->f_type = NCP_SUPER_MAGIC; - buf->f_bsize = NCP_BLOCK_SIZE; - buf->f_blocks = 0; - buf->f_bfree = 0; - buf->f_bavail = 0; - buf->f_namelen = 12; - return 0; -} - -int ncp_notify_change(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = d_inode(dentry); - int result = 0; - __le32 info_mask; - struct nw_modify_dos_info info; - struct ncp_server *server; - - result = -EIO; - - server = NCP_SERVER(inode); - if (!server) /* How this could happen? */ - goto out; - - result = -EPERM; - if (IS_DEADDIR(d_inode(dentry))) - goto out; - - /* ageing the dentry to force validation */ - ncp_age_dentry(server, dentry); - - result = setattr_prepare(dentry, attr); - if (result < 0) - goto out; - - result = -EPERM; - if ((attr->ia_valid & ATTR_UID) && !uid_eq(attr->ia_uid, server->m.uid)) - goto out; - - if ((attr->ia_valid & ATTR_GID) && !gid_eq(attr->ia_gid, server->m.gid)) - goto out; - - if (((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & - ~(S_IFREG | S_IFDIR | S_IRWXUGO)))) - goto out; - - info_mask = 0; - memset(&info, 0, sizeof(info)); - -#if 1 - if ((attr->ia_valid & ATTR_MODE) != 0) - { - umode_t newmode = attr->ia_mode; - - info_mask |= DM_ATTRIBUTES; - - if (S_ISDIR(inode->i_mode)) { - newmode &= server->m.dir_mode; - } else { -#ifdef CONFIG_NCPFS_EXTRAS - if (server->m.flags & NCP_MOUNT_EXTRAS) { - /* any non-default execute bit set */ - if (newmode & ~server->m.file_mode & S_IXUGO) - info.attributes |= aSHARED | aSYSTEM; - /* read for group/world and not in default file_mode */ - else if (newmode & ~server->m.file_mode & S_IRUGO) - info.attributes |= aSHARED; - } else -#endif - newmode &= server->m.file_mode; - } - if (newmode & S_IWUGO) - info.attributes &= ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT); - else - info.attributes |= (aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT); - -#ifdef CONFIG_NCPFS_NFS_NS - if (ncp_is_nfs_extras(server, NCP_FINFO(inode)->volNumber)) { - result = ncp_modify_nfs_info(server, - NCP_FINFO(inode)->volNumber, - NCP_FINFO(inode)->dirEntNum, - attr->ia_mode, 0); - if (result != 0) - goto out; - info.attributes &= ~(aSHARED | aSYSTEM); - { - /* mark partial success */ - struct iattr tmpattr; - - tmpattr.ia_valid = ATTR_MODE; - tmpattr.ia_mode = attr->ia_mode; - - setattr_copy(inode, &tmpattr); - mark_inode_dirty(inode); - } - } -#endif - } -#endif - - /* Do SIZE before attributes, otherwise mtime together with size does not work... - */ - if ((attr->ia_valid & ATTR_SIZE) != 0) { - int written; - - ncp_dbg(1, "trying to change size to %llu\n", attr->ia_size); - - if ((result = ncp_make_open(inode, O_WRONLY)) < 0) { - result = -EACCES; - goto out; - } - ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, - attr->ia_size, 0, "", &written); - - /* According to ndir, the changes only take effect after - closing the file */ - ncp_inode_close(inode); - result = ncp_make_closed(inode); - if (result) - goto out; - - if (attr->ia_size != i_size_read(inode)) { - truncate_setsize(inode, attr->ia_size); - mark_inode_dirty(inode); - } - } - if ((attr->ia_valid & ATTR_CTIME) != 0) { - info_mask |= (DM_CREATE_TIME | DM_CREATE_DATE); - ncp_date_unix2dos(attr->ia_ctime.tv_sec, - &info.creationTime, &info.creationDate); - } - if ((attr->ia_valid & ATTR_MTIME) != 0) { - info_mask |= (DM_MODIFY_TIME | DM_MODIFY_DATE); - ncp_date_unix2dos(attr->ia_mtime.tv_sec, - &info.modifyTime, &info.modifyDate); - } - if ((attr->ia_valid & ATTR_ATIME) != 0) { - __le16 dummy; - info_mask |= (DM_LAST_ACCESS_DATE); - ncp_date_unix2dos(attr->ia_atime.tv_sec, - &dummy, &info.lastAccessDate); - } - if (info_mask != 0) { - result = ncp_modify_file_or_subdir_dos_info(NCP_SERVER(inode), - inode, info_mask, &info); - if (result != 0) { - if (info_mask == (DM_CREATE_TIME | DM_CREATE_DATE)) { - /* NetWare seems not to allow this. I - do not know why. So, just tell the - user everything went fine. This is - a terrible hack, but I do not know - how to do this correctly. */ - result = 0; - } else - goto out; - } -#ifdef CONFIG_NCPFS_STRONG - if ((!result) && (info_mask & DM_ATTRIBUTES)) - NCP_FINFO(inode)->nwattr = info.attributes; -#endif - } - if (result) - goto out; - - setattr_copy(inode, attr); - mark_inode_dirty(inode); - -out: - if (result > 0) - result = -EACCES; - return result; -} - -static struct dentry *ncp_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_nodev(fs_type, flags, data, ncp_fill_super); -} - -static struct file_system_type ncp_fs_type = { - .owner = THIS_MODULE, - .name = "ncpfs", - .mount = ncp_mount, - .kill_sb = kill_anon_super, - .fs_flags = FS_BINARY_MOUNTDATA, -}; -MODULE_ALIAS_FS("ncpfs"); - -static int __init init_ncp_fs(void) -{ - int err; - ncp_dbg(1, "called\n"); - - err = init_inodecache(); - if (err) - goto out1; - err = register_filesystem(&ncp_fs_type); - if (err) - goto out; - return 0; -out: - destroy_inodecache(); -out1: - return err; -} - -static void __exit exit_ncp_fs(void) -{ - ncp_dbg(1, "called\n"); - unregister_filesystem(&ncp_fs_type); - destroy_inodecache(); -} - -module_init(init_ncp_fs) -module_exit(exit_ncp_fs) -MODULE_LICENSE("GPL"); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c deleted file mode 100644 index d378b98cd7b6..000000000000 --- a/fs/ncpfs/ioctl.c +++ /dev/null @@ -1,923 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ioctl.c - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * Modified 1998, 1999 Wolfram Pienkoss for NLS - * - */ - -#include <linux/capability.h> -#include <linux/compat.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/ioctl.h> -#include <linux/time.h> -#include <linux/mm.h> -#include <linux/mount.h> -#include <linux/slab.h> -#include <linux/highuid.h> -#include <linux/vmalloc.h> -#include <linux/sched.h> -#include <linux/cred.h> - -#include <linux/uaccess.h> - -#include "ncp_fs.h" - -/* maximum limit for ncp_objectname_ioctl */ -#define NCP_OBJECT_NAME_MAX_LEN 4096 -/* maximum limit for ncp_privatedata_ioctl */ -#define NCP_PRIVATE_DATA_MAX_LEN 8192 -/* maximum negotiable packet size */ -#define NCP_PACKET_SIZE_INTERNAL 65536 - -static int -ncp_get_fs_info(struct ncp_server * server, struct inode *inode, - struct ncp_fs_info __user *arg) -{ - struct ncp_fs_info info; - - if (copy_from_user(&info, arg, sizeof(info))) - return -EFAULT; - - if (info.version != NCP_GET_FS_INFO_VERSION) { - ncp_dbg(1, "info.version invalid: %d\n", info.version); - return -EINVAL; - } - /* TODO: info.addr = server->m.serv_addr; */ - SET_UID(info.mounted_uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid)); - info.connection = server->connection; - info.buffer_size = server->buffer_size; - info.volume_number = NCP_FINFO(inode)->volNumber; - info.directory_id = NCP_FINFO(inode)->DosDirNum; - - if (copy_to_user(arg, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -static int -ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode, - struct ncp_fs_info_v2 __user * arg) -{ - struct ncp_fs_info_v2 info2; - - if (copy_from_user(&info2, arg, sizeof(info2))) - return -EFAULT; - - if (info2.version != NCP_GET_FS_INFO_VERSION_V2) { - ncp_dbg(1, "info.version invalid: %d\n", info2.version); - return -EINVAL; - } - info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid); - info2.connection = server->connection; - info2.buffer_size = server->buffer_size; - info2.volume_number = NCP_FINFO(inode)->volNumber; - info2.directory_id = NCP_FINFO(inode)->DosDirNum; - info2.dummy1 = info2.dummy2 = info2.dummy3 = 0; - - if (copy_to_user(arg, &info2, sizeof(info2))) - return -EFAULT; - return 0; -} - -#ifdef CONFIG_COMPAT -struct compat_ncp_objectname_ioctl -{ - s32 auth_type; - u32 object_name_len; - compat_caddr_t object_name; /* a userspace data, in most cases user name */ -}; - -struct compat_ncp_fs_info_v2 { - s32 version; - u32 mounted_uid; - u32 connection; - u32 buffer_size; - - u32 volume_number; - u32 directory_id; - - u32 dummy1; - u32 dummy2; - u32 dummy3; -}; - -struct compat_ncp_ioctl_request { - u32 function; - u32 size; - compat_caddr_t data; -}; - -struct compat_ncp_privatedata_ioctl -{ - u32 len; - compat_caddr_t data; /* ~1000 for NDS */ -}; - -#define NCP_IOC_GET_FS_INFO_V2_32 _IOWR('n', 4, struct compat_ncp_fs_info_v2) -#define NCP_IOC_NCPREQUEST_32 _IOR('n', 1, struct compat_ncp_ioctl_request) -#define NCP_IOC_GETOBJECTNAME_32 _IOWR('n', 9, struct compat_ncp_objectname_ioctl) -#define NCP_IOC_SETOBJECTNAME_32 _IOR('n', 9, struct compat_ncp_objectname_ioctl) -#define NCP_IOC_GETPRIVATEDATA_32 _IOWR('n', 10, struct compat_ncp_privatedata_ioctl) -#define NCP_IOC_SETPRIVATEDATA_32 _IOR('n', 10, struct compat_ncp_privatedata_ioctl) - -static int -ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode, - struct compat_ncp_fs_info_v2 __user * arg) -{ - struct compat_ncp_fs_info_v2 info2; - - if (copy_from_user(&info2, arg, sizeof(info2))) - return -EFAULT; - - if (info2.version != NCP_GET_FS_INFO_VERSION_V2) { - ncp_dbg(1, "info.version invalid: %d\n", info2.version); - return -EINVAL; - } - info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid); - info2.connection = server->connection; - info2.buffer_size = server->buffer_size; - info2.volume_number = NCP_FINFO(inode)->volNumber; - info2.directory_id = NCP_FINFO(inode)->DosDirNum; - info2.dummy1 = info2.dummy2 = info2.dummy3 = 0; - - if (copy_to_user(arg, &info2, sizeof(info2))) - return -EFAULT; - return 0; -} -#endif - -#define NCP_IOC_GETMOUNTUID16 _IOW('n', 2, u16) -#define NCP_IOC_GETMOUNTUID32 _IOW('n', 2, u32) -#define NCP_IOC_GETMOUNTUID64 _IOW('n', 2, u64) - -#ifdef CONFIG_NCPFS_NLS -/* Here we are select the iocharset and the codepage for NLS. - * Thanks Petr Vandrovec for idea and many hints. - */ -static int -ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) -{ - struct ncp_nls_ioctl user; - struct nls_table *codepage; - struct nls_table *iocharset; - struct nls_table *oldset_io; - struct nls_table *oldset_cp; - int utf8; - int err; - - if (copy_from_user(&user, arg, sizeof(user))) - return -EFAULT; - - codepage = NULL; - user.codepage[NCP_IOCSNAME_LEN] = 0; - if (!user.codepage[0] || !strcmp(user.codepage, "default")) - codepage = load_nls_default(); - else { - codepage = load_nls(user.codepage); - if (!codepage) { - return -EBADRQC; - } - } - - iocharset = NULL; - user.iocharset[NCP_IOCSNAME_LEN] = 0; - if (!user.iocharset[0] || !strcmp(user.iocharset, "default")) { - iocharset = load_nls_default(); - utf8 = 0; - } else if (!strcmp(user.iocharset, "utf8")) { - iocharset = load_nls_default(); - utf8 = 1; - } else { - iocharset = load_nls(user.iocharset); - if (!iocharset) { - unload_nls(codepage); - return -EBADRQC; - } - utf8 = 0; - } - - mutex_lock(&server->root_setup_lock); - if (server->root_setuped) { - oldset_cp = codepage; - oldset_io = iocharset; - err = -EBUSY; - } else { - if (utf8) - NCP_SET_FLAG(server, NCP_FLAG_UTF8); - else - NCP_CLR_FLAG(server, NCP_FLAG_UTF8); - oldset_cp = server->nls_vol; - server->nls_vol = codepage; - oldset_io = server->nls_io; - server->nls_io = iocharset; - err = 0; - } - mutex_unlock(&server->root_setup_lock); - unload_nls(oldset_cp); - unload_nls(oldset_io); - - return err; -} - -static int -ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) -{ - struct ncp_nls_ioctl user; - int len; - - memset(&user, 0, sizeof(user)); - mutex_lock(&server->root_setup_lock); - if (server->nls_vol && server->nls_vol->charset) { - len = strlen(server->nls_vol->charset); - if (len > NCP_IOCSNAME_LEN) - len = NCP_IOCSNAME_LEN; - strncpy(user.codepage, server->nls_vol->charset, len); - user.codepage[len] = 0; - } - - if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) - strcpy(user.iocharset, "utf8"); - else if (server->nls_io && server->nls_io->charset) { - len = strlen(server->nls_io->charset); - if (len > NCP_IOCSNAME_LEN) - len = NCP_IOCSNAME_LEN; - strncpy(user.iocharset, server->nls_io->charset, len); - user.iocharset[len] = 0; - } - mutex_unlock(&server->root_setup_lock); - - if (copy_to_user(arg, &user, sizeof(user))) - return -EFAULT; - return 0; -} -#endif /* CONFIG_NCPFS_NLS */ - -static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg) -{ - struct ncp_server *server = NCP_SERVER(inode); - int result; - struct ncp_ioctl_request request; - char* bouncebuffer; - void __user *argp = (void __user *)arg; - - switch (cmd) { -#ifdef CONFIG_COMPAT - case NCP_IOC_NCPREQUEST_32: -#endif - case NCP_IOC_NCPREQUEST: -#ifdef CONFIG_COMPAT - if (cmd == NCP_IOC_NCPREQUEST_32) { - struct compat_ncp_ioctl_request request32; - if (copy_from_user(&request32, argp, sizeof(request32))) - return -EFAULT; - request.function = request32.function; - request.size = request32.size; - request.data = compat_ptr(request32.data); - } else -#endif - if (copy_from_user(&request, argp, sizeof(request))) - return -EFAULT; - - if ((request.function > 255) - || (request.size > - NCP_PACKET_SIZE - sizeof(struct ncp_request_header))) { - return -EINVAL; - } - bouncebuffer = vmalloc(NCP_PACKET_SIZE_INTERNAL); - if (!bouncebuffer) - return -ENOMEM; - if (copy_from_user(bouncebuffer, request.data, request.size)) { - vfree(bouncebuffer); - return -EFAULT; - } - ncp_lock_server(server); - - /* FIXME: We hack around in the server's structures - here to be able to use ncp_request */ - - server->has_subfunction = 0; - server->current_size = request.size; - memcpy(server->packet, bouncebuffer, request.size); - - result = ncp_request2(server, request.function, - bouncebuffer, NCP_PACKET_SIZE_INTERNAL); - if (result < 0) - result = -EIO; - else - result = server->reply_size; - ncp_unlock_server(server); - ncp_dbg(1, "copy %d bytes\n", result); - if (result >= 0) - if (copy_to_user(request.data, bouncebuffer, result)) - result = -EFAULT; - vfree(bouncebuffer); - return result; - - case NCP_IOC_CONN_LOGGED_IN: - - if (!(server->m.int_flags & NCP_IMOUNT_LOGGEDIN_POSSIBLE)) - return -EINVAL; - mutex_lock(&server->root_setup_lock); - if (server->root_setuped) - result = -EBUSY; - else { - result = ncp_conn_logged_in(inode->i_sb); - if (result == 0) - server->root_setuped = 1; - } - mutex_unlock(&server->root_setup_lock); - return result; - - case NCP_IOC_GET_FS_INFO: - return ncp_get_fs_info(server, inode, argp); - - case NCP_IOC_GET_FS_INFO_V2: - return ncp_get_fs_info_v2(server, inode, argp); - -#ifdef CONFIG_COMPAT - case NCP_IOC_GET_FS_INFO_V2_32: - return ncp_get_compat_fs_info_v2(server, inode, argp); -#endif - /* we have too many combinations of CONFIG_COMPAT, - * CONFIG_64BIT and CONFIG_UID16, so just handle - * any of the possible ioctls */ - case NCP_IOC_GETMOUNTUID16: - { - u16 uid; - - SET_UID(uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid)); - if (put_user(uid, (u16 __user *)argp)) - return -EFAULT; - return 0; - } - case NCP_IOC_GETMOUNTUID32: - { - uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid); - if (put_user(uid, (u32 __user *)argp)) - return -EFAULT; - return 0; - } - case NCP_IOC_GETMOUNTUID64: - { - uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid); - if (put_user(uid, (u64 __user *)argp)) - return -EFAULT; - return 0; - } - case NCP_IOC_GETROOT: - { - struct ncp_setroot_ioctl sr; - - result = -EACCES; - mutex_lock(&server->root_setup_lock); - if (server->m.mounted_vol[0]) { - struct dentry* dentry = inode->i_sb->s_root; - - if (dentry) { - struct inode* s_inode = d_inode(dentry); - - if (s_inode) { - sr.volNumber = NCP_FINFO(s_inode)->volNumber; - sr.dirEntNum = NCP_FINFO(s_inode)->dirEntNum; - sr.namespace = server->name_space[sr.volNumber]; - result = 0; - } else - ncp_dbg(1, "d_inode(s_root)==NULL\n"); - } else - ncp_dbg(1, "s_root==NULL\n"); - } else { - sr.volNumber = -1; - sr.namespace = 0; - sr.dirEntNum = 0; - result = 0; - } - mutex_unlock(&server->root_setup_lock); - if (!result && copy_to_user(argp, &sr, sizeof(sr))) - result = -EFAULT; - return result; - } - - case NCP_IOC_SETROOT: - { - struct ncp_setroot_ioctl sr; - __u32 vnum; - __le32 de; - __le32 dosde; - struct dentry* dentry; - - if (copy_from_user(&sr, argp, sizeof(sr))) - return -EFAULT; - mutex_lock(&server->root_setup_lock); - if (server->root_setuped) - result = -EBUSY; - else { - if (sr.volNumber < 0) { - server->m.mounted_vol[0] = 0; - vnum = NCP_NUMBER_OF_VOLUMES; - de = 0; - dosde = 0; - result = 0; - } else if (sr.volNumber >= NCP_NUMBER_OF_VOLUMES) { - result = -EINVAL; - } else if (ncp_mount_subdir(server, sr.volNumber, - sr.namespace, sr.dirEntNum, - &vnum, &de, &dosde)) { - result = -ENOENT; - } else - result = 0; - - if (result == 0) { - dentry = inode->i_sb->s_root; - if (dentry) { - struct inode* s_inode = d_inode(dentry); - - if (s_inode) { - NCP_FINFO(s_inode)->volNumber = vnum; - NCP_FINFO(s_inode)->dirEntNum = de; - NCP_FINFO(s_inode)->DosDirNum = dosde; - server->root_setuped = 1; - } else { - ncp_dbg(1, "d_inode(s_root)==NULL\n"); - result = -EIO; - } - } else { - ncp_dbg(1, "s_root==NULL\n"); - result = -EIO; - } - } - } - mutex_unlock(&server->root_setup_lock); - - return result; - } - -#ifdef CONFIG_NCPFS_PACKET_SIGNING - case NCP_IOC_SIGN_INIT: - { - struct ncp_sign_init sign; - - if (argp) - if (copy_from_user(&sign, argp, sizeof(sign))) - return -EFAULT; - ncp_lock_server(server); - mutex_lock(&server->rcv.creq_mutex); - if (argp) { - if (server->sign_wanted) { - memcpy(server->sign_root,sign.sign_root,8); - memcpy(server->sign_last,sign.sign_last,16); - server->sign_active = 1; - } - /* ignore when signatures not wanted */ - } else { - server->sign_active = 0; - } - mutex_unlock(&server->rcv.creq_mutex); - ncp_unlock_server(server); - return 0; - } - - case NCP_IOC_SIGN_WANTED: - { - int state; - - ncp_lock_server(server); - state = server->sign_wanted; - ncp_unlock_server(server); - if (put_user(state, (int __user *)argp)) - return -EFAULT; - return 0; - } - - case NCP_IOC_SET_SIGN_WANTED: - { - int newstate; - - /* get only low 8 bits... */ - if (get_user(newstate, (unsigned char __user *)argp)) - return -EFAULT; - result = 0; - ncp_lock_server(server); - if (server->sign_active) { - /* cannot turn signatures OFF when active */ - if (!newstate) - result = -EINVAL; - } else { - server->sign_wanted = newstate != 0; - } - ncp_unlock_server(server); - return result; - } - -#endif /* CONFIG_NCPFS_PACKET_SIGNING */ - -#ifdef CONFIG_NCPFS_IOCTL_LOCKING - case NCP_IOC_LOCKUNLOCK: - { - struct ncp_lock_ioctl rqdata; - - if (copy_from_user(&rqdata, argp, sizeof(rqdata))) - return -EFAULT; - if (rqdata.origin != 0) - return -EINVAL; - /* check for cmd */ - switch (rqdata.cmd) { - case NCP_LOCK_EX: - case NCP_LOCK_SH: - if (rqdata.timeout < 0) - return -EINVAL; - if (rqdata.timeout == 0) - rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; - else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT) - rqdata.timeout = NCP_LOCK_MAX_TIMEOUT; - break; - case NCP_LOCK_LOG: - rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; /* has no effect */ - case NCP_LOCK_CLEAR: - break; - default: - return -EINVAL; - } - /* locking needs both read and write access */ - if ((result = ncp_make_open(inode, O_RDWR)) != 0) - { - return result; - } - result = -EISDIR; - if (!S_ISREG(inode->i_mode)) - goto outrel; - if (rqdata.cmd == NCP_LOCK_CLEAR) - { - result = ncp_ClearPhysicalRecord(NCP_SERVER(inode), - NCP_FINFO(inode)->file_handle, - rqdata.offset, - rqdata.length); - if (result > 0) result = 0; /* no such lock */ - } - else - { - int lockcmd; - - switch (rqdata.cmd) - { - case NCP_LOCK_EX: lockcmd=1; break; - case NCP_LOCK_SH: lockcmd=3; break; - default: lockcmd=0; break; - } - result = ncp_LogPhysicalRecord(NCP_SERVER(inode), - NCP_FINFO(inode)->file_handle, - lockcmd, - rqdata.offset, - rqdata.length, - rqdata.timeout); - if (result > 0) result = -EAGAIN; - } -outrel: - ncp_inode_close(inode); - return result; - } -#endif /* CONFIG_NCPFS_IOCTL_LOCKING */ - -#ifdef CONFIG_COMPAT - case NCP_IOC_GETOBJECTNAME_32: - { - struct compat_ncp_objectname_ioctl user; - size_t outl; - - if (copy_from_user(&user, argp, sizeof(user))) - return -EFAULT; - down_read(&server->auth_rwsem); - user.auth_type = server->auth.auth_type; - outl = user.object_name_len; - user.object_name_len = server->auth.object_name_len; - if (outl > user.object_name_len) - outl = user.object_name_len; - result = 0; - if (outl) { - if (copy_to_user(compat_ptr(user.object_name), - server->auth.object_name, - outl)) - result = -EFAULT; - } - up_read(&server->auth_rwsem); - if (!result && copy_to_user(argp, &user, sizeof(user))) - result = -EFAULT; - return result; - } -#endif - - case NCP_IOC_GETOBJECTNAME: - { - struct ncp_objectname_ioctl user; - size_t outl; - - if (copy_from_user(&user, argp, sizeof(user))) - return -EFAULT; - down_read(&server->auth_rwsem); - user.auth_type = server->auth.auth_type; - outl = user.object_name_len; - user.object_name_len = server->auth.object_name_len; - if (outl > user.object_name_len) - outl = user.object_name_len; - result = 0; - if (outl) { - if (copy_to_user(user.object_name, - server->auth.object_name, - outl)) - result = -EFAULT; - } - up_read(&server->auth_rwsem); - if (!result && copy_to_user(argp, &user, sizeof(user))) - result = -EFAULT; - return result; - } - -#ifdef CONFIG_COMPAT - case NCP_IOC_SETOBJECTNAME_32: -#endif - case NCP_IOC_SETOBJECTNAME: - { - struct ncp_objectname_ioctl user; - void* newname; - void* oldname; - size_t oldnamelen; - void* oldprivate; - size_t oldprivatelen; - -#ifdef CONFIG_COMPAT - if (cmd == NCP_IOC_SETOBJECTNAME_32) { - struct compat_ncp_objectname_ioctl user32; - if (copy_from_user(&user32, argp, sizeof(user32))) - return -EFAULT; - user.auth_type = user32.auth_type; - user.object_name_len = user32.object_name_len; - user.object_name = compat_ptr(user32.object_name); - } else -#endif - if (copy_from_user(&user, argp, sizeof(user))) - return -EFAULT; - - if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) - return -ENOMEM; - if (user.object_name_len) { - newname = memdup_user(user.object_name, - user.object_name_len); - if (IS_ERR(newname)) - return PTR_ERR(newname); - } else { - newname = NULL; - } - down_write(&server->auth_rwsem); - oldname = server->auth.object_name; - oldnamelen = server->auth.object_name_len; - oldprivate = server->priv.data; - oldprivatelen = server->priv.len; - server->auth.auth_type = user.auth_type; - server->auth.object_name_len = user.object_name_len; - server->auth.object_name = newname; - server->priv.len = 0; - server->priv.data = NULL; - up_write(&server->auth_rwsem); - kfree(oldprivate); - kfree(oldname); - return 0; - } - -#ifdef CONFIG_COMPAT - case NCP_IOC_GETPRIVATEDATA_32: -#endif - case NCP_IOC_GETPRIVATEDATA: - { - struct ncp_privatedata_ioctl user; - size_t outl; - -#ifdef CONFIG_COMPAT - if (cmd == NCP_IOC_GETPRIVATEDATA_32) { - struct compat_ncp_privatedata_ioctl user32; - if (copy_from_user(&user32, argp, sizeof(user32))) - return -EFAULT; - user.len = user32.len; - user.data = compat_ptr(user32.data); - } else -#endif - if (copy_from_user(&user, argp, sizeof(user))) - return -EFAULT; - - down_read(&server->auth_rwsem); - outl = user.len; - user.len = server->priv.len; - if (outl > user.len) outl = user.len; - result = 0; - if (outl) { - if (copy_to_user(user.data, - server->priv.data, - outl)) - result = -EFAULT; - } - up_read(&server->auth_rwsem); - if (result) - return result; -#ifdef CONFIG_COMPAT - if (cmd == NCP_IOC_GETPRIVATEDATA_32) { - struct compat_ncp_privatedata_ioctl user32; - user32.len = user.len; - user32.data = (unsigned long) user.data; - if (copy_to_user(argp, &user32, sizeof(user32))) - return -EFAULT; - } else -#endif - if (copy_to_user(argp, &user, sizeof(user))) - return -EFAULT; - - return 0; - } - -#ifdef CONFIG_COMPAT - case NCP_IOC_SETPRIVATEDATA_32: -#endif - case NCP_IOC_SETPRIVATEDATA: - { - struct ncp_privatedata_ioctl user; - void* new; - void* old; - size_t oldlen; - -#ifdef CONFIG_COMPAT - if (cmd == NCP_IOC_SETPRIVATEDATA_32) { - struct compat_ncp_privatedata_ioctl user32; - if (copy_from_user(&user32, argp, sizeof(user32))) - return -EFAULT; - user.len = user32.len; - user.data = compat_ptr(user32.data); - } else -#endif - if (copy_from_user(&user, argp, sizeof(user))) - return -EFAULT; - - if (user.len > NCP_PRIVATE_DATA_MAX_LEN) - return -ENOMEM; - if (user.len) { - new = memdup_user(user.data, user.len); - if (IS_ERR(new)) - return PTR_ERR(new); - } else { - new = NULL; - } - down_write(&server->auth_rwsem); - old = server->priv.data; - oldlen = server->priv.len; - server->priv.len = user.len; - server->priv.data = new; - up_write(&server->auth_rwsem); - kfree(old); - return 0; - } - -#ifdef CONFIG_NCPFS_NLS - case NCP_IOC_SETCHARSETS: - return ncp_set_charsets(server, argp); - - case NCP_IOC_GETCHARSETS: - return ncp_get_charsets(server, argp); - -#endif /* CONFIG_NCPFS_NLS */ - - case NCP_IOC_SETDENTRYTTL: - { - u_int32_t user; - - if (copy_from_user(&user, argp, sizeof(user))) - return -EFAULT; - /* 20 secs at most... */ - if (user > 20000) - return -EINVAL; - user = (user * HZ) / 1000; - atomic_set(&server->dentry_ttl, user); - return 0; - } - - case NCP_IOC_GETDENTRYTTL: - { - u_int32_t user = (atomic_read(&server->dentry_ttl) * 1000) / HZ; - if (copy_to_user(argp, &user, sizeof(user))) - return -EFAULT; - return 0; - } - - } - return -EINVAL; -} - -long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file_inode(filp); - struct ncp_server *server = NCP_SERVER(inode); - kuid_t uid = current_uid(); - int need_drop_write = 0; - long ret; - - switch (cmd) { - case NCP_IOC_SETCHARSETS: - case NCP_IOC_CONN_LOGGED_IN: - case NCP_IOC_SETROOT: - if (!capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto out; - } - break; - } - if (!uid_eq(server->m.mounted_uid, uid)) { - switch (cmd) { - /* - * Only mount owner can issue these ioctls. Information - * necessary to authenticate to other NDS servers are - * stored here. - */ - case NCP_IOC_GETOBJECTNAME: - case NCP_IOC_SETOBJECTNAME: - case NCP_IOC_GETPRIVATEDATA: - case NCP_IOC_SETPRIVATEDATA: -#ifdef CONFIG_COMPAT - case NCP_IOC_GETOBJECTNAME_32: - case NCP_IOC_SETOBJECTNAME_32: - case NCP_IOC_GETPRIVATEDATA_32: - case NCP_IOC_SETPRIVATEDATA_32: -#endif - ret = -EACCES; - goto out; - /* - * These require write access on the inode if user id - * does not match. Note that they do not write to the - * file... But old code did mnt_want_write, so I keep - * it as is. Of course not for mountpoint owner, as - * that breaks read-only mounts altogether as ncpmount - * needs working NCP_IOC_NCPREQUEST and - * NCP_IOC_GET_FS_INFO. Some of these codes (setdentryttl, - * signinit, setsignwanted) should be probably restricted - * to owner only, or even more to CAP_SYS_ADMIN). - */ - case NCP_IOC_GET_FS_INFO: - case NCP_IOC_GET_FS_INFO_V2: - case NCP_IOC_NCPREQUEST: - case NCP_IOC_SETDENTRYTTL: - case NCP_IOC_SIGN_INIT: - case NCP_IOC_LOCKUNLOCK: - case NCP_IOC_SET_SIGN_WANTED: -#ifdef CONFIG_COMPAT - case NCP_IOC_GET_FS_INFO_V2_32: - case NCP_IOC_NCPREQUEST_32: -#endif - ret = mnt_want_write_file(filp); - if (ret) - goto out; - need_drop_write = 1; - ret = inode_permission(inode, MAY_WRITE); - if (ret) - goto outDropWrite; - break; - /* - * Read access required. - */ - case NCP_IOC_GETMOUNTUID16: - case NCP_IOC_GETMOUNTUID32: - case NCP_IOC_GETMOUNTUID64: - case NCP_IOC_GETROOT: - case NCP_IOC_SIGN_WANTED: - ret = inode_permission(inode, MAY_READ); - if (ret) - goto out; - break; - /* - * Anybody can read these. - */ - case NCP_IOC_GETCHARSETS: - case NCP_IOC_GETDENTRYTTL: - default: - /* Three codes below are protected by CAP_SYS_ADMIN above. */ - case NCP_IOC_SETCHARSETS: - case NCP_IOC_CONN_LOGGED_IN: - case NCP_IOC_SETROOT: - break; - } - } - ret = __ncp_ioctl(inode, cmd, arg); -outDropWrite: - if (need_drop_write) - mnt_drop_write_file(filp); -out: - return ret; -} - -#ifdef CONFIG_COMPAT -long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - long ret; - - arg = (unsigned long) compat_ptr(arg); - ret = ncp_ioctl(file, cmd, arg); - return ret; -} -#endif diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c deleted file mode 100644 index a5c5cf2ff007..000000000000 --- a/fs/ncpfs/mmap.c +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * mmap.c - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * - */ - -#include <linux/stat.h> -#include <linux/time.h> -#include <linux/kernel.h> -#include <linux/gfp.h> -#include <linux/mm.h> -#include <linux/shm.h> -#include <linux/errno.h> -#include <linux/mman.h> -#include <linux/string.h> -#include <linux/fcntl.h> -#include <linux/memcontrol.h> - -#include <linux/uaccess.h> - -#include "ncp_fs.h" - -/* - * Fill in the supplied page for mmap - * XXX: how are we excluding truncate/invalidate here? Maybe need to lock - * page? - */ -static int ncp_file_mmap_fault(struct vm_fault *vmf) -{ - struct inode *inode = file_inode(vmf->vma->vm_file); - char *pg_addr; - unsigned int already_read; - unsigned int count; - int bufsize; - int pos; /* XXX: loff_t ? */ - - /* - * ncpfs has nothing against high pages as long - * as recvmsg and memset works on it - */ - vmf->page = alloc_page(GFP_HIGHUSER); - if (!vmf->page) - return VM_FAULT_OOM; - pg_addr = kmap(vmf->page); - pos = vmf->pgoff << PAGE_SHIFT; - - count = PAGE_SIZE; - /* what we can read in one go */ - bufsize = NCP_SERVER(inode)->buffer_size; - - already_read = 0; - if (ncp_make_open(inode, O_RDONLY) >= 0) { - while (already_read < count) { - int read_this_time; - int to_read; - - to_read = bufsize - (pos % bufsize); - - to_read = min_t(unsigned int, to_read, count - already_read); - - if (ncp_read_kernel(NCP_SERVER(inode), - NCP_FINFO(inode)->file_handle, - pos, to_read, - pg_addr + already_read, - &read_this_time) != 0) { - read_this_time = 0; - } - pos += read_this_time; - already_read += read_this_time; - - if (read_this_time < to_read) { - break; - } - } - ncp_inode_close(inode); - - } - - if (already_read < PAGE_SIZE) - memset(pg_addr + already_read, 0, PAGE_SIZE - already_read); - flush_dcache_page(vmf->page); - kunmap(vmf->page); - - /* - * If I understand ncp_read_kernel() properly, the above always - * fetches from the network, here the analogue of disk. - * -- nyc - */ - count_vm_event(PGMAJFAULT); - count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT); - return VM_FAULT_MAJOR; -} - -static const struct vm_operations_struct ncp_file_mmap = -{ - .fault = ncp_file_mmap_fault, -}; - - -/* This is used for a general mmap of a ncp file */ -int ncp_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct inode *inode = file_inode(file); - - ncp_dbg(1, "called\n"); - - if (!ncp_conn_valid(NCP_SERVER(inode))) - return -EIO; - - /* only PAGE_COW or read-only supported now */ - if (vma->vm_flags & VM_SHARED) - return -EINVAL; - /* we do not support files bigger than 4GB... We eventually - supports just 4GB... */ - if (vma_pages(vma) + vma->vm_pgoff - > (1U << (32 - PAGE_SHIFT))) - return -EFBIG; - - vma->vm_ops = &ncp_file_mmap; - file_accessed(file); - return 0; -} diff --git a/fs/ncpfs/ncp_fs.h b/fs/ncpfs/ncp_fs.h deleted file mode 100644 index bdd262b6c198..000000000000 --- a/fs/ncpfs/ncp_fs.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/ncp_fs.h> -#include "ncp_fs_i.h" -#include "ncp_fs_sb.h" - -#undef NCPFS_PARANOIA -#ifdef NCPFS_PARANOIA -#define ncp_vdbg(fmt, ...) \ - pr_debug(fmt, ##__VA_ARGS__) -#else -#define ncp_vdbg(fmt, ...) \ -do { \ - if (0) \ - pr_debug(fmt, ##__VA_ARGS__); \ -} while (0) -#endif - -#ifndef DEBUG_NCP -#define DEBUG_NCP 0 -#endif - -#if DEBUG_NCP > 0 && !defined(DEBUG) -#define DEBUG -#endif - -#define ncp_dbg(level, fmt, ...) \ -do { \ - if (level <= DEBUG_NCP) \ - pr_debug(fmt, ##__VA_ARGS__); \ -} while (0) - -#define NCP_MAX_RPC_TIMEOUT (6*HZ) - - -struct ncp_entry_info { - struct nw_info_struct i; - ino_t ino; - int opened; - int access; - unsigned int volume; - __u8 file_handle[6]; -}; - -static inline struct ncp_server *NCP_SBP(const struct super_block *sb) -{ - return sb->s_fs_info; -} - -#define NCP_SERVER(inode) NCP_SBP((inode)->i_sb) -static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode) -{ - return container_of(inode, struct ncp_inode_info, vfs_inode); -} - -/* linux/fs/ncpfs/inode.c */ -int ncp_notify_change(struct dentry *, struct iattr *); -struct inode *ncp_iget(struct super_block *, struct ncp_entry_info *); -void ncp_update_inode(struct inode *, struct ncp_entry_info *); -void ncp_update_inode2(struct inode *, struct ncp_entry_info *); - -/* linux/fs/ncpfs/dir.c */ -extern const struct inode_operations ncp_dir_inode_operations; -extern const struct file_operations ncp_dir_operations; -extern const struct dentry_operations ncp_dentry_operations; -int ncp_conn_logged_in(struct super_block *); -int ncp_date_dos2unix(__le16 time, __le16 date); -void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date); - -/* linux/fs/ncpfs/ioctl.c */ -long ncp_ioctl(struct file *, unsigned int, unsigned long); -long ncp_compat_ioctl(struct file *, unsigned int, unsigned long); - -/* linux/fs/ncpfs/sock.c */ -int ncp_request2(struct ncp_server *server, int function, - void* reply, int max_reply_size); -static inline int ncp_request(struct ncp_server *server, int function) { - return ncp_request2(server, function, server->packet, server->packet_size); -} -int ncp_connect(struct ncp_server *server); -int ncp_disconnect(struct ncp_server *server); -void ncp_lock_server(struct ncp_server *server); -void ncp_unlock_server(struct ncp_server *server); - -/* linux/fs/ncpfs/symlink.c */ -#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) -extern const struct address_space_operations ncp_symlink_aops; -int ncp_symlink(struct inode*, struct dentry*, const char*); -#endif - -/* linux/fs/ncpfs/file.c */ -extern const struct inode_operations ncp_file_inode_operations; -extern const struct file_operations ncp_file_operations; -int ncp_make_open(struct inode *, int); - -/* linux/fs/ncpfs/mmap.c */ -int ncp_mmap(struct file *, struct vm_area_struct *); - -/* linux/fs/ncpfs/ncplib_kernel.c */ -int ncp_make_closed(struct inode *); - -#include "ncplib_kernel.h" diff --git a/fs/ncpfs/ncp_fs_i.h b/fs/ncpfs/ncp_fs_i.h deleted file mode 100644 index 3432bafb53a5..000000000000 --- a/fs/ncpfs/ncp_fs_i.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ncp_fs_i.h - * - * Copyright (C) 1995 Volker Lendecke - * - */ - -#ifndef _LINUX_NCP_FS_I -#define _LINUX_NCP_FS_I - -/* - * This is the ncpfs part of the inode structure. This must contain - * all the information we need to work with an inode after creation. - */ -struct ncp_inode_info { - __le32 dirEntNum; - __le32 DosDirNum; - __u8 volNumber; - __le32 nwattr; - struct mutex open_mutex; - atomic_t opened; - int access; - int flags; -#define NCPI_KLUDGE_SYMLINK 0x0001 -#define NCPI_DIR_CACHE 0x0002 - __u8 file_handle[6]; - struct inode vfs_inode; -}; - -#endif /* _LINUX_NCP_FS_I */ diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h deleted file mode 100644 index f06cde4adf71..000000000000 --- a/fs/ncpfs/ncp_fs_sb.h +++ /dev/null @@ -1,174 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ncp_fs_sb.h - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * - */ - -#ifndef _NCP_FS_SB -#define _NCP_FS_SB - -#include <linux/types.h> -#include <linux/ncp_mount.h> -#include <linux/net.h> -#include <linux/mutex.h> -#include <linux/backing-dev.h> -#include <linux/workqueue.h> - -#define NCP_DEFAULT_OPTIONS 0 /* 2 for packet signatures */ - -struct sock; - -struct ncp_mount_data_kernel { - unsigned long flags; /* NCP_MOUNT_* flags */ - unsigned int int_flags; /* internal flags */ -#define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 - kuid_t mounted_uid; /* Who may umount() this filesystem? */ - struct pid *wdog_pid; /* Who cares for our watchdog packets? */ - unsigned int ncp_fd; /* The socket to the ncp port */ - unsigned int time_out; /* How long should I wait after - sending a NCP request? */ - unsigned int retry_count; /* And how often should I retry? */ - unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; - kuid_t uid; - kgid_t gid; - umode_t file_mode; - umode_t dir_mode; - int info_fd; -}; - -struct ncp_server { - struct rcu_head rcu; - struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of - interest for us later, so we store - it completely. */ - - __u8 name_space[NCP_NUMBER_OF_VOLUMES + 2]; - - struct socket *ncp_sock;/* ncp socket */ - struct socket *info_sock; - - u8 sequence; - u8 task; - u16 connection; /* Remote connection number */ - - u8 completion; /* Status message from server */ - u8 conn_status; /* Bit 4 = 1 ==> Server going down, no - requests allowed anymore. - Bit 0 = 1 ==> Server is down. */ - - int buffer_size; /* Negotiated bufsize */ - - int reply_size; /* Size of last reply */ - - int packet_size; - unsigned char *packet; /* Here we prepare requests and - receive replies */ - unsigned char *txbuf; /* Storage for current request */ - unsigned char *rxbuf; /* Storage for reply to current request */ - - int lock; /* To prevent mismatch in protocols. */ - struct mutex mutex; - - int current_size; /* for packet preparation */ - int has_subfunction; - int ncp_reply_size; - - int root_setuped; - struct mutex root_setup_lock; - - /* info for packet signing */ - int sign_wanted; /* 1=Server needs signed packets */ - int sign_active; /* 0=don't do signing, 1=do */ - char sign_root[8]; /* generated from password and encr. key */ - char sign_last[16]; - - /* Authentication info: NDS or BINDERY, username */ - struct { - int auth_type; - size_t object_name_len; - void* object_name; - int object_type; - } auth; - /* Password info */ - struct { - size_t len; - void* data; - } priv; - struct rw_semaphore auth_rwsem; - - /* nls info: codepage for volume and charset for I/O */ - struct nls_table *nls_vol; - struct nls_table *nls_io; - - /* maximum age in jiffies */ - atomic_t dentry_ttl; - - /* miscellaneous */ - unsigned int flags; - - spinlock_t requests_lock; /* Lock accesses to tx.requests, tx.creq and rcv.creq when STREAM mode */ - - void (*data_ready)(struct sock* sk); - void (*error_report)(struct sock* sk); - void (*write_space)(struct sock* sk); /* STREAM mode only */ - struct { - struct work_struct tq; /* STREAM/DGRAM: data/error ready */ - struct ncp_request_reply* creq; /* STREAM/DGRAM: awaiting reply from this request */ - struct mutex creq_mutex; /* DGRAM only: lock accesses to rcv.creq */ - - unsigned int state; /* STREAM only: receiver state */ - struct { - __u32 magic __packed; - __u32 len __packed; - __u16 type __packed; - __u16 p1 __packed; - __u16 p2 __packed; - __u16 p3 __packed; - __u16 type2 __packed; - } buf; /* STREAM only: temporary buffer */ - unsigned char* ptr; /* STREAM only: pointer to data */ - size_t len; /* STREAM only: length of data to receive */ - } rcv; - struct { - struct list_head requests; /* STREAM only: queued requests */ - struct work_struct tq; /* STREAM only: transmitter ready */ - struct ncp_request_reply* creq; /* STREAM only: currently transmitted entry */ - } tx; - struct timer_list timeout_tm; /* DGRAM only: timeout timer */ - struct work_struct timeout_tq; /* DGRAM only: associated queue, we run timers from process context */ - int timeout_last; /* DGRAM only: current timeout length */ - int timeout_retries; /* DGRAM only: retries left */ - struct { - size_t len; - __u8 data[128]; - } unexpected_packet; -}; - -extern void ncp_tcp_rcv_proc(struct work_struct *work); -extern void ncp_tcp_tx_proc(struct work_struct *work); -extern void ncpdgram_rcv_proc(struct work_struct *work); -extern void ncpdgram_timeout_proc(struct work_struct *work); -extern void ncpdgram_timeout_call(struct timer_list *t); -extern void ncp_tcp_data_ready(struct sock* sk); -extern void ncp_tcp_write_space(struct sock* sk); -extern void ncp_tcp_error_report(struct sock* sk); - -#define NCP_FLAG_UTF8 1 - -#define NCP_CLR_FLAG(server, flag) ((server)->flags &= ~(flag)) -#define NCP_SET_FLAG(server, flag) ((server)->flags |= (flag)) -#define NCP_IS_FLAG(server, flag) ((server)->flags & (flag)) - -static inline int ncp_conn_valid(struct ncp_server *server) -{ - return ((server->conn_status & 0x11) == 0); -} - -static inline void ncp_invalidate_conn(struct ncp_server *server) -{ - server->conn_status |= 0x01; -} - -#endif diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c deleted file mode 100644 index 804adfebba2f..000000000000 --- a/fs/ncpfs/ncplib_kernel.c +++ /dev/null @@ -1,1322 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ncplib_kernel.c - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * Modified for big endian by J.F. Chadima and David S. Miller - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * Modified 1999 Wolfram Pienkoss for NLS - * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include "ncp_fs.h" - -static inline void assert_server_locked(struct ncp_server *server) -{ - if (server->lock == 0) { - ncp_dbg(1, "server not locked!\n"); - } -} - -static void ncp_add_byte(struct ncp_server *server, __u8 x) -{ - assert_server_locked(server); - *(__u8 *) (&(server->packet[server->current_size])) = x; - server->current_size += 1; - return; -} - -static void ncp_add_word(struct ncp_server *server, __le16 x) -{ - assert_server_locked(server); - put_unaligned(x, (__le16 *) (&(server->packet[server->current_size]))); - server->current_size += 2; - return; -} - -static void ncp_add_be16(struct ncp_server *server, __u16 x) -{ - assert_server_locked(server); - put_unaligned(cpu_to_be16(x), (__be16 *) (&(server->packet[server->current_size]))); - server->current_size += 2; -} - -static void ncp_add_dword(struct ncp_server *server, __le32 x) -{ - assert_server_locked(server); - put_unaligned(x, (__le32 *) (&(server->packet[server->current_size]))); - server->current_size += 4; - return; -} - -static void ncp_add_be32(struct ncp_server *server, __u32 x) -{ - assert_server_locked(server); - put_unaligned(cpu_to_be32(x), (__be32 *)(&(server->packet[server->current_size]))); - server->current_size += 4; -} - -static inline void ncp_add_dword_lh(struct ncp_server *server, __u32 x) { - ncp_add_dword(server, cpu_to_le32(x)); -} - -static void ncp_add_mem(struct ncp_server *server, const void *source, int size) -{ - assert_server_locked(server); - memcpy(&(server->packet[server->current_size]), source, size); - server->current_size += size; - return; -} - -static void ncp_add_pstring(struct ncp_server *server, const char *s) -{ - int len = strlen(s); - assert_server_locked(server); - if (len > 255) { - ncp_dbg(1, "string too long: %s\n", s); - len = 255; - } - ncp_add_byte(server, len); - ncp_add_mem(server, s, len); - return; -} - -static inline void ncp_init_request(struct ncp_server *server) -{ - ncp_lock_server(server); - - server->current_size = sizeof(struct ncp_request_header); - server->has_subfunction = 0; -} - -static inline void ncp_init_request_s(struct ncp_server *server, int subfunction) -{ - ncp_lock_server(server); - - server->current_size = sizeof(struct ncp_request_header) + 2; - ncp_add_byte(server, subfunction); - - server->has_subfunction = 1; -} - -static inline char * -ncp_reply_data(struct ncp_server *server, int offset) -{ - return &(server->packet[sizeof(struct ncp_reply_header) + offset]); -} - -static inline u8 BVAL(const void *data) -{ - return *(const u8 *)data; -} - -static u8 ncp_reply_byte(struct ncp_server *server, int offset) -{ - return *(const u8 *)ncp_reply_data(server, offset); -} - -static inline u16 WVAL_LH(const void *data) -{ - return get_unaligned_le16(data); -} - -static u16 -ncp_reply_le16(struct ncp_server *server, int offset) -{ - return get_unaligned_le16(ncp_reply_data(server, offset)); -} - -static u16 -ncp_reply_be16(struct ncp_server *server, int offset) -{ - return get_unaligned_be16(ncp_reply_data(server, offset)); -} - -static inline u32 DVAL_LH(const void *data) -{ - return get_unaligned_le32(data); -} - -static __le32 -ncp_reply_dword(struct ncp_server *server, int offset) -{ - return get_unaligned((__le32 *)ncp_reply_data(server, offset)); -} - -static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { - return le32_to_cpu(ncp_reply_dword(server, offset)); -} - -int -ncp_negotiate_buffersize(struct ncp_server *server, int size, int *target) -{ - int result; - - ncp_init_request(server); - ncp_add_be16(server, size); - - if ((result = ncp_request(server, 33)) != 0) { - ncp_unlock_server(server); - return result; - } - *target = min_t(unsigned int, ncp_reply_be16(server, 0), size); - - ncp_unlock_server(server); - return 0; -} - - -/* options: - * bit 0 ipx checksum - * bit 1 packet signing - */ -int -ncp_negotiate_size_and_options(struct ncp_server *server, - int size, int options, int *ret_size, int *ret_options) { - int result; - - /* there is minimum */ - if (size < NCP_BLOCK_SIZE) size = NCP_BLOCK_SIZE; - - ncp_init_request(server); - ncp_add_be16(server, size); - ncp_add_byte(server, options); - - if ((result = ncp_request(server, 0x61)) != 0) - { - ncp_unlock_server(server); - return result; - } - - /* NCP over UDP returns 0 (!!!) */ - result = ncp_reply_be16(server, 0); - if (result >= NCP_BLOCK_SIZE) - size = min(result, size); - *ret_size = size; - *ret_options = ncp_reply_byte(server, 4); - - ncp_unlock_server(server); - return 0; -} - -int ncp_get_volume_info_with_number(struct ncp_server* server, - int n, struct ncp_volume_info* target) { - int result; - int len; - - ncp_init_request_s(server, 44); - ncp_add_byte(server, n); - - if ((result = ncp_request(server, 22)) != 0) { - goto out; - } - target->total_blocks = ncp_reply_dword_lh(server, 0); - target->free_blocks = ncp_reply_dword_lh(server, 4); - target->purgeable_blocks = ncp_reply_dword_lh(server, 8); - target->not_yet_purgeable_blocks = ncp_reply_dword_lh(server, 12); - target->total_dir_entries = ncp_reply_dword_lh(server, 16); - target->available_dir_entries = ncp_reply_dword_lh(server, 20); - target->sectors_per_block = ncp_reply_byte(server, 28); - - memset(&(target->volume_name), 0, sizeof(target->volume_name)); - - result = -EIO; - len = ncp_reply_byte(server, 29); - if (len > NCP_VOLNAME_LEN) { - ncp_dbg(1, "volume name too long: %d\n", len); - goto out; - } - memcpy(&(target->volume_name), ncp_reply_data(server, 30), len); - result = 0; -out: - ncp_unlock_server(server); - return result; -} - -int ncp_get_directory_info(struct ncp_server* server, __u8 n, - struct ncp_volume_info* target) { - int result; - int len; - - ncp_init_request_s(server, 45); - ncp_add_byte(server, n); - - if ((result = ncp_request(server, 22)) != 0) { - goto out; - } - target->total_blocks = ncp_reply_dword_lh(server, 0); - target->free_blocks = ncp_reply_dword_lh(server, 4); - target->purgeable_blocks = 0; - target->not_yet_purgeable_blocks = 0; - target->total_dir_entries = ncp_reply_dword_lh(server, 8); - target->available_dir_entries = ncp_reply_dword_lh(server, 12); - target->sectors_per_block = ncp_reply_byte(server, 20); - - memset(&(target->volume_name), 0, sizeof(target->volume_name)); - - result = -EIO; - len = ncp_reply_byte(server, 21); - if (len > NCP_VOLNAME_LEN) { - ncp_dbg(1, "volume name too long: %d\n", len); - goto out; - } - memcpy(&(target->volume_name), ncp_reply_data(server, 22), len); - result = 0; -out: - ncp_unlock_server(server); - return result; -} - -int -ncp_close_file(struct ncp_server *server, const char *file_id) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 0); - ncp_add_mem(server, file_id, 6); - - result = ncp_request(server, 66); - ncp_unlock_server(server); - return result; -} - -int -ncp_make_closed(struct inode *inode) -{ - int err; - - err = 0; - mutex_lock(&NCP_FINFO(inode)->open_mutex); - if (atomic_read(&NCP_FINFO(inode)->opened) == 1) { - atomic_set(&NCP_FINFO(inode)->opened, 0); - err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); - - if (!err) - ncp_vdbg("volnum=%d, dirent=%u, error=%d\n", - NCP_FINFO(inode)->volNumber, - NCP_FINFO(inode)->dirEntNum, err); - } - mutex_unlock(&NCP_FINFO(inode)->open_mutex); - return err; -} - -static void ncp_add_handle_path(struct ncp_server *server, __u8 vol_num, - __le32 dir_base, int have_dir_base, - const char *path) -{ - ncp_add_byte(server, vol_num); - ncp_add_dword(server, dir_base); - if (have_dir_base != 0) { - ncp_add_byte(server, 1); /* dir_base */ - } else { - ncp_add_byte(server, 0xff); /* no handle */ - } - if (path != NULL) { - ncp_add_byte(server, 1); /* 1 component */ - ncp_add_pstring(server, path); - } else { - ncp_add_byte(server, 0); - } -} - -int ncp_dirhandle_alloc(struct ncp_server* server, __u8 volnum, __le32 dirent, - __u8* dirhandle) { - int result; - - ncp_init_request(server); - ncp_add_byte(server, 12); /* subfunction */ - ncp_add_byte(server, NW_NS_DOS); - ncp_add_byte(server, 0); - ncp_add_word(server, 0); - ncp_add_handle_path(server, volnum, dirent, 1, NULL); - if ((result = ncp_request(server, 87)) == 0) { - *dirhandle = ncp_reply_byte(server, 0); - } - ncp_unlock_server(server); - return result; -} - -int ncp_dirhandle_free(struct ncp_server* server, __u8 dirhandle) { - int result; - - ncp_init_request_s(server, 20); - ncp_add_byte(server, dirhandle); - result = ncp_request(server, 22); - ncp_unlock_server(server); - return result; -} - -void ncp_extract_file_info(const void *structure, struct nw_info_struct *target) -{ - const __u8 *name_len; - const int info_struct_size = offsetof(struct nw_info_struct, nameLen); - - memcpy(target, structure, info_struct_size); - name_len = structure + info_struct_size; - target->nameLen = *name_len; - memcpy(target->entryName, name_len + 1, *name_len); - target->entryName[*name_len] = '\0'; - target->volNumber = le32_to_cpu(target->volNumber); - return; -} - -#ifdef CONFIG_NCPFS_NFS_NS -static inline void ncp_extract_nfs_info(const unsigned char *structure, - struct nw_nfs_info *target) -{ - target->mode = DVAL_LH(structure); - target->rdev = DVAL_LH(structure + 8); -} -#endif - -int ncp_obtain_nfs_info(struct ncp_server *server, - struct nw_info_struct *target) - -{ - int result = 0; -#ifdef CONFIG_NCPFS_NFS_NS - __u32 volnum = target->volNumber; - - if (ncp_is_nfs_extras(server, volnum)) { - ncp_init_request(server); - ncp_add_byte(server, 19); /* subfunction */ - ncp_add_byte(server, server->name_space[volnum]); - ncp_add_byte(server, NW_NS_NFS); - ncp_add_byte(server, 0); - ncp_add_byte(server, volnum); - ncp_add_dword(server, target->dirEntNum); - /* We must retrieve both nlinks and rdev, otherwise some server versions - report zeroes instead of valid data */ - ncp_add_dword_lh(server, NSIBM_NFS_MODE | NSIBM_NFS_NLINKS | NSIBM_NFS_RDEV); - - if ((result = ncp_request(server, 87)) == 0) { - ncp_extract_nfs_info(ncp_reply_data(server, 0), &target->nfs); - ncp_dbg(1, "(%s) mode=0%o, rdev=0x%x\n", - target->entryName, target->nfs.mode, - target->nfs.rdev); - } else { - target->nfs.mode = 0; - target->nfs.rdev = 0; - } - ncp_unlock_server(server); - - } else -#endif - { - target->nfs.mode = 0; - target->nfs.rdev = 0; - } - return result; -} - -/* - * Returns information for a (one-component) name relative to - * the specified directory. - */ -int ncp_obtain_info(struct ncp_server *server, struct inode *dir, const char *path, - struct nw_info_struct *target) -{ - __u8 volnum = NCP_FINFO(dir)->volNumber; - __le32 dirent = NCP_FINFO(dir)->dirEntNum; - int result; - - if (target == NULL) { - pr_err("%s: invalid call\n", __func__); - return -EINVAL; - } - ncp_init_request(server); - ncp_add_byte(server, 6); /* subfunction */ - ncp_add_byte(server, server->name_space[volnum]); - ncp_add_byte(server, server->name_space[volnum]); /* N.B. twice ?? */ - ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */ - ncp_add_dword(server, RIM_ALL); - ncp_add_handle_path(server, volnum, dirent, 1, path); - - if ((result = ncp_request(server, 87)) != 0) - goto out; - ncp_extract_file_info(ncp_reply_data(server, 0), target); - ncp_unlock_server(server); - - result = ncp_obtain_nfs_info(server, target); - return result; - -out: - ncp_unlock_server(server); - return result; -} - -#ifdef CONFIG_NCPFS_NFS_NS -static int -ncp_obtain_DOS_dir_base(struct ncp_server *server, - __u8 ns, __u8 volnum, __le32 dirent, - const char *path, /* At most 1 component */ - __le32 *DOS_dir_base) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 6); /* subfunction */ - ncp_add_byte(server, ns); - ncp_add_byte(server, ns); - ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */ - ncp_add_dword(server, RIM_DIRECTORY); - ncp_add_handle_path(server, volnum, dirent, 1, path); - - if ((result = ncp_request(server, 87)) == 0) - { - if (DOS_dir_base) *DOS_dir_base=ncp_reply_dword(server, 0x34); - } - ncp_unlock_server(server); - return result; -} -#endif /* CONFIG_NCPFS_NFS_NS */ - -static inline int -ncp_get_known_namespace(struct ncp_server *server, __u8 volume) -{ -#if defined(CONFIG_NCPFS_OS2_NS) || defined(CONFIG_NCPFS_NFS_NS) - int result; - __u8 *namespace; - __u16 no_namespaces; - - ncp_init_request(server); - ncp_add_byte(server, 24); /* Subfunction: Get Name Spaces Loaded */ - ncp_add_word(server, 0); - ncp_add_byte(server, volume); - - if ((result = ncp_request(server, 87)) != 0) { - ncp_unlock_server(server); - return NW_NS_DOS; /* not result ?? */ - } - - result = NW_NS_DOS; - no_namespaces = ncp_reply_le16(server, 0); - namespace = ncp_reply_data(server, 2); - - while (no_namespaces > 0) { - ncp_dbg(1, "found %d on %d\n", *namespace, volume); - -#ifdef CONFIG_NCPFS_NFS_NS - if ((*namespace == NW_NS_NFS) && !(server->m.flags&NCP_MOUNT_NO_NFS)) - { - result = NW_NS_NFS; - break; - } -#endif /* CONFIG_NCPFS_NFS_NS */ -#ifdef CONFIG_NCPFS_OS2_NS - if ((*namespace == NW_NS_OS2) && !(server->m.flags&NCP_MOUNT_NO_OS2)) - { - result = NW_NS_OS2; - } -#endif /* CONFIG_NCPFS_OS2_NS */ - namespace += 1; - no_namespaces -= 1; - } - ncp_unlock_server(server); - return result; -#else /* neither OS2 nor NFS - only DOS */ - return NW_NS_DOS; -#endif /* defined(CONFIG_NCPFS_OS2_NS) || defined(CONFIG_NCPFS_NFS_NS) */ -} - -int -ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns) -{ - int ns = ncp_get_known_namespace(server, volume); - - if (ret_ns) - *ret_ns = ns; - - ncp_dbg(1, "namespace[%d] = %d\n", volume, server->name_space[volume]); - - if (server->name_space[volume] == ns) - return 0; - server->name_space[volume] = ns; - return 1; -} - -static int -ncp_ObtainSpecificDirBase(struct ncp_server *server, - __u8 nsSrc, __u8 nsDst, __u8 vol_num, __le32 dir_base, - const char *path, /* At most 1 component */ - __le32 *dirEntNum, __le32 *DosDirNum) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 6); /* subfunction */ - ncp_add_byte(server, nsSrc); - ncp_add_byte(server, nsDst); - ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */ - ncp_add_dword(server, RIM_ALL); - ncp_add_handle_path(server, vol_num, dir_base, 1, path); - - if ((result = ncp_request(server, 87)) != 0) - { - ncp_unlock_server(server); - return result; - } - - if (dirEntNum) - *dirEntNum = ncp_reply_dword(server, 0x30); - if (DosDirNum) - *DosDirNum = ncp_reply_dword(server, 0x34); - ncp_unlock_server(server); - return 0; -} - -int -ncp_mount_subdir(struct ncp_server *server, - __u8 volNumber, __u8 srcNS, __le32 dirEntNum, - __u32* volume, __le32* newDirEnt, __le32* newDosEnt) -{ - int dstNS; - int result; - - ncp_update_known_namespace(server, volNumber, &dstNS); - if ((result = ncp_ObtainSpecificDirBase(server, srcNS, dstNS, volNumber, - dirEntNum, NULL, newDirEnt, newDosEnt)) != 0) - { - return result; - } - *volume = volNumber; - server->m.mounted_vol[1] = 0; - server->m.mounted_vol[0] = 'X'; - return 0; -} - -int -ncp_get_volume_root(struct ncp_server *server, - const char *volname, __u32* volume, __le32* dirent, __le32* dosdirent) -{ - int result; - - ncp_dbg(1, "looking up vol %s\n", volname); - - ncp_init_request(server); - ncp_add_byte(server, 22); /* Subfunction: Generate dir handle */ - ncp_add_byte(server, 0); /* DOS namespace */ - ncp_add_byte(server, 0); /* reserved */ - ncp_add_byte(server, 0); /* reserved */ - ncp_add_byte(server, 0); /* reserved */ - - ncp_add_byte(server, 0); /* faked volume number */ - ncp_add_dword(server, 0); /* faked dir_base */ - ncp_add_byte(server, 0xff); /* Don't have a dir_base */ - ncp_add_byte(server, 1); /* 1 path component */ - ncp_add_pstring(server, volname); - - if ((result = ncp_request(server, 87)) != 0) { - ncp_unlock_server(server); - return result; - } - *dirent = *dosdirent = ncp_reply_dword(server, 4); - *volume = ncp_reply_byte(server, 8); - ncp_unlock_server(server); - return 0; -} - -int -ncp_lookup_volume(struct ncp_server *server, - const char *volname, struct nw_info_struct *target) -{ - int result; - - memset(target, 0, sizeof(*target)); - result = ncp_get_volume_root(server, volname, - &target->volNumber, &target->dirEntNum, &target->DosDirNum); - if (result) { - return result; - } - ncp_update_known_namespace(server, target->volNumber, NULL); - target->nameLen = strlen(volname); - memcpy(target->entryName, volname, target->nameLen+1); - target->attributes = aDIR; - /* set dates to Jan 1, 1986 00:00 */ - target->creationTime = target->modifyTime = cpu_to_le16(0x0000); - target->creationDate = target->modifyDate = target->lastAccessDate = cpu_to_le16(0x0C21); - target->nfs.mode = 0; - return 0; -} - -int ncp_modify_file_or_subdir_dos_info_path(struct ncp_server *server, - struct inode *dir, - const char *path, - __le32 info_mask, - const struct nw_modify_dos_info *info) -{ - __u8 volnum = NCP_FINFO(dir)->volNumber; - __le32 dirent = NCP_FINFO(dir)->dirEntNum; - int result; - - ncp_init_request(server); - ncp_add_byte(server, 7); /* subfunction */ - ncp_add_byte(server, server->name_space[volnum]); - ncp_add_byte(server, 0); /* reserved */ - ncp_add_word(server, cpu_to_le16(0x8006)); /* search attribs: all */ - - ncp_add_dword(server, info_mask); - ncp_add_mem(server, info, sizeof(*info)); - ncp_add_handle_path(server, volnum, dirent, 1, path); - - result = ncp_request(server, 87); - ncp_unlock_server(server); - return result; -} - -int ncp_modify_file_or_subdir_dos_info(struct ncp_server *server, - struct inode *dir, - __le32 info_mask, - const struct nw_modify_dos_info *info) -{ - return ncp_modify_file_or_subdir_dos_info_path(server, dir, NULL, - info_mask, info); -} - -#ifdef CONFIG_NCPFS_NFS_NS -int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent, - __u32 mode, __u32 rdev) - -{ - int result = 0; - - ncp_init_request(server); - if (server->name_space[volnum] == NW_NS_NFS) { - ncp_add_byte(server, 25); /* subfunction */ - ncp_add_byte(server, server->name_space[volnum]); - ncp_add_byte(server, NW_NS_NFS); - ncp_add_byte(server, volnum); - ncp_add_dword(server, dirent); - /* we must always operate on both nlinks and rdev, otherwise - rdev is not set */ - ncp_add_dword_lh(server, NSIBM_NFS_MODE | NSIBM_NFS_NLINKS | NSIBM_NFS_RDEV); - ncp_add_dword_lh(server, mode); - ncp_add_dword_lh(server, 1); /* nlinks */ - ncp_add_dword_lh(server, rdev); - result = ncp_request(server, 87); - } - ncp_unlock_server(server); - return result; -} -#endif - - -static int -ncp_DeleteNSEntry(struct ncp_server *server, - __u8 have_dir_base, __u8 volnum, __le32 dirent, - const char* name, __u8 ns, __le16 attr) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 8); /* subfunction */ - ncp_add_byte(server, ns); - ncp_add_byte(server, 0); /* reserved */ - ncp_add_word(server, attr); /* search attribs: all */ - ncp_add_handle_path(server, volnum, dirent, have_dir_base, name); - - result = ncp_request(server, 87); - ncp_unlock_server(server); - return result; -} - -int -ncp_del_file_or_subdir2(struct ncp_server *server, - struct dentry *dentry) -{ - struct inode *inode = d_inode(dentry); - __u8 volnum; - __le32 dirent; - - if (!inode) { - return 0xFF; /* Any error */ - } - volnum = NCP_FINFO(inode)->volNumber; - dirent = NCP_FINFO(inode)->DosDirNum; - return ncp_DeleteNSEntry(server, 1, volnum, dirent, NULL, NW_NS_DOS, cpu_to_le16(0x8006)); -} - -int -ncp_del_file_or_subdir(struct ncp_server *server, - struct inode *dir, const char *name) -{ - __u8 volnum = NCP_FINFO(dir)->volNumber; - __le32 dirent = NCP_FINFO(dir)->dirEntNum; - int name_space; - - name_space = server->name_space[volnum]; -#ifdef CONFIG_NCPFS_NFS_NS - if (name_space == NW_NS_NFS) - { - int result; - - result=ncp_obtain_DOS_dir_base(server, name_space, volnum, dirent, name, &dirent); - if (result) return result; - name = NULL; - name_space = NW_NS_DOS; - } -#endif /* CONFIG_NCPFS_NFS_NS */ - return ncp_DeleteNSEntry(server, 1, volnum, dirent, name, name_space, cpu_to_le16(0x8006)); -} - -static inline void ConvertToNWfromDWORD(__u16 v0, __u16 v1, __u8 ret[6]) -{ - __le16 *dest = (__le16 *) ret; - dest[1] = cpu_to_le16(v0); - dest[2] = cpu_to_le16(v1); - dest[0] = cpu_to_le16(v0 + 1); - return; -} - -/* If both dir and name are NULL, then in target there's already a - looked-up entry that wants to be opened. */ -int ncp_open_create_file_or_subdir(struct ncp_server *server, - struct inode *dir, const char *name, - int open_create_mode, - __le32 create_attributes, - __le16 desired_acc_rights, - struct ncp_entry_info *target) -{ - __le16 search_attribs = cpu_to_le16(0x0006); - __u8 volnum; - __le32 dirent; - int result; - - volnum = NCP_FINFO(dir)->volNumber; - dirent = NCP_FINFO(dir)->dirEntNum; - - if ((create_attributes & aDIR) != 0) { - search_attribs |= cpu_to_le16(0x8000); - } - ncp_init_request(server); - ncp_add_byte(server, 1); /* subfunction */ - ncp_add_byte(server, server->name_space[volnum]); - ncp_add_byte(server, open_create_mode); - ncp_add_word(server, search_attribs); - ncp_add_dword(server, RIM_ALL); - ncp_add_dword(server, create_attributes); - /* The desired acc rights seem to be the inherited rights mask - for directories */ - ncp_add_word(server, desired_acc_rights); - ncp_add_handle_path(server, volnum, dirent, 1, name); - - if ((result = ncp_request(server, 87)) != 0) - goto out; - if (!(create_attributes & aDIR)) - target->opened = 1; - - /* in target there's a new finfo to fill */ - ncp_extract_file_info(ncp_reply_data(server, 6), &(target->i)); - target->volume = target->i.volNumber; - ConvertToNWfromDWORD(ncp_reply_le16(server, 0), - ncp_reply_le16(server, 2), - target->file_handle); - - ncp_unlock_server(server); - - (void)ncp_obtain_nfs_info(server, &(target->i)); - return 0; - -out: - ncp_unlock_server(server); - return result; -} - -int -ncp_initialize_search(struct ncp_server *server, struct inode *dir, - struct nw_search_sequence *target) -{ - __u8 volnum = NCP_FINFO(dir)->volNumber; - __le32 dirent = NCP_FINFO(dir)->dirEntNum; - int result; - - ncp_init_request(server); - ncp_add_byte(server, 2); /* subfunction */ - ncp_add_byte(server, server->name_space[volnum]); - ncp_add_byte(server, 0); /* reserved */ - ncp_add_handle_path(server, volnum, dirent, 1, NULL); - - result = ncp_request(server, 87); - if (result) - goto out; - memcpy(target, ncp_reply_data(server, 0), sizeof(*target)); - -out: - ncp_unlock_server(server); - return result; -} - -int ncp_search_for_fileset(struct ncp_server *server, - struct nw_search_sequence *seq, - int* more, - int* cnt, - char* buffer, - size_t bufsize, - char** rbuf, - size_t* rsize) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 20); - ncp_add_byte(server, server->name_space[seq->volNumber]); - ncp_add_byte(server, 0); /* datastream */ - ncp_add_word(server, cpu_to_le16(0x8006)); - ncp_add_dword(server, RIM_ALL); - ncp_add_word(server, cpu_to_le16(32767)); /* max returned items */ - ncp_add_mem(server, seq, 9); -#ifdef CONFIG_NCPFS_NFS_NS - if (server->name_space[seq->volNumber] == NW_NS_NFS) { - ncp_add_byte(server, 0); /* 0 byte pattern */ - } else -#endif - { - ncp_add_byte(server, 2); /* 2 byte pattern */ - ncp_add_byte(server, 0xff); /* following is a wildcard */ - ncp_add_byte(server, '*'); - } - result = ncp_request2(server, 87, buffer, bufsize); - if (result) { - ncp_unlock_server(server); - return result; - } - if (server->ncp_reply_size < 12) { - ncp_unlock_server(server); - return 0xFF; - } - *rsize = server->ncp_reply_size - 12; - ncp_unlock_server(server); - buffer = buffer + sizeof(struct ncp_reply_header); - *rbuf = buffer + 12; - *cnt = WVAL_LH(buffer + 10); - *more = BVAL(buffer + 9); - memcpy(seq, buffer, 9); - return 0; -} - -static int -ncp_RenameNSEntry(struct ncp_server *server, - struct inode *old_dir, const char *old_name, __le16 old_type, - struct inode *new_dir, const char *new_name) -{ - int result = -EINVAL; - - if ((old_dir == NULL) || (old_name == NULL) || - (new_dir == NULL) || (new_name == NULL)) - goto out; - - ncp_init_request(server); - ncp_add_byte(server, 4); /* subfunction */ - ncp_add_byte(server, server->name_space[NCP_FINFO(old_dir)->volNumber]); - ncp_add_byte(server, 1); /* rename flag */ - ncp_add_word(server, old_type); /* search attributes */ - - /* source Handle Path */ - ncp_add_byte(server, NCP_FINFO(old_dir)->volNumber); - ncp_add_dword(server, NCP_FINFO(old_dir)->dirEntNum); - ncp_add_byte(server, 1); - ncp_add_byte(server, 1); /* 1 source component */ - - /* dest Handle Path */ - ncp_add_byte(server, NCP_FINFO(new_dir)->volNumber); - ncp_add_dword(server, NCP_FINFO(new_dir)->dirEntNum); - ncp_add_byte(server, 1); - ncp_add_byte(server, 1); /* 1 destination component */ - - /* source path string */ - ncp_add_pstring(server, old_name); - /* dest path string */ - ncp_add_pstring(server, new_name); - - result = ncp_request(server, 87); - ncp_unlock_server(server); -out: - return result; -} - -int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, - struct inode *old_dir, const char *old_name, - struct inode *new_dir, const char *new_name) -{ - int result; - __le16 old_type = cpu_to_le16(0x06); - -/* If somebody can do it atomic, call me... vandrove@vc.cvut.cz */ - result = ncp_RenameNSEntry(server, old_dir, old_name, old_type, - new_dir, new_name); - if (result == 0xFF) /* File Not Found, try directory */ - { - old_type = cpu_to_le16(0x16); - result = ncp_RenameNSEntry(server, old_dir, old_name, old_type, - new_dir, new_name); - } - if (result != 0x92) return result; /* All except NO_FILES_RENAMED */ - result = ncp_del_file_or_subdir(server, new_dir, new_name); - if (result != 0) return -EACCES; - result = ncp_RenameNSEntry(server, old_dir, old_name, old_type, - new_dir, new_name); - return result; -} - - -/* We have to transfer to/from user space */ -int -ncp_read_kernel(struct ncp_server *server, const char *file_id, - __u32 offset, __u16 to_read, char *target, int *bytes_read) -{ - const char *source; - int result; - - ncp_init_request(server); - ncp_add_byte(server, 0); - ncp_add_mem(server, file_id, 6); - ncp_add_be32(server, offset); - ncp_add_be16(server, to_read); - - if ((result = ncp_request(server, 72)) != 0) { - goto out; - } - *bytes_read = ncp_reply_be16(server, 0); - source = ncp_reply_data(server, 2 + (offset & 1)); - - memcpy(target, source, *bytes_read); -out: - ncp_unlock_server(server); - return result; -} - -/* There is a problem... egrep and some other silly tools do: - x = mmap(NULL, MAP_PRIVATE, PROT_READ|PROT_WRITE, <ncpfs fd>, 32768); - read(<ncpfs fd>, x, 32768); - Now copying read result by copy_to_user causes pagefault. This pagefault - could not be handled because of server was locked due to read. So we have - to use temporary buffer. So ncp_unlock_server must be done before - copy_to_user (and for write, copy_from_user must be done before - ncp_init_request... same applies for send raw packet ioctl). Because of - file is normally read in bigger chunks, caller provides kmalloced - (vmalloced) chunk of memory with size >= to_read... - */ -int -ncp_read_bounce(struct ncp_server *server, const char *file_id, - __u32 offset, __u16 to_read, struct iov_iter *to, - int *bytes_read, void *bounce, __u32 bufsize) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 0); - ncp_add_mem(server, file_id, 6); - ncp_add_be32(server, offset); - ncp_add_be16(server, to_read); - result = ncp_request2(server, 72, bounce, bufsize); - ncp_unlock_server(server); - if (!result) { - int len = get_unaligned_be16((char *)bounce + - sizeof(struct ncp_reply_header)); - result = -EIO; - if (len <= to_read) { - char* source; - - source = (char*)bounce + - sizeof(struct ncp_reply_header) + 2 + - (offset & 1); - *bytes_read = len; - result = 0; - if (copy_to_iter(source, len, to) != len) - result = -EFAULT; - } - } - return result; -} - -int -ncp_write_kernel(struct ncp_server *server, const char *file_id, - __u32 offset, __u16 to_write, - const char *source, int *bytes_written) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 0); - ncp_add_mem(server, file_id, 6); - ncp_add_be32(server, offset); - ncp_add_be16(server, to_write); - ncp_add_mem(server, source, to_write); - - if ((result = ncp_request(server, 73)) == 0) - *bytes_written = to_write; - ncp_unlock_server(server); - return result; -} - -#ifdef CONFIG_NCPFS_IOCTL_LOCKING -int -ncp_LogPhysicalRecord(struct ncp_server *server, const char *file_id, - __u8 locktype, __u32 offset, __u32 length, __u16 timeout) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, locktype); - ncp_add_mem(server, file_id, 6); - ncp_add_be32(server, offset); - ncp_add_be32(server, length); - ncp_add_be16(server, timeout); - - if ((result = ncp_request(server, 0x1A)) != 0) - { - ncp_unlock_server(server); - return result; - } - ncp_unlock_server(server); - return 0; -} - -int -ncp_ClearPhysicalRecord(struct ncp_server *server, const char *file_id, - __u32 offset, __u32 length) -{ - int result; - - ncp_init_request(server); - ncp_add_byte(server, 0); /* who knows... lanalyzer says that */ - ncp_add_mem(server, file_id, 6); - ncp_add_be32(server, offset); - ncp_add_be32(server, length); - - if ((result = ncp_request(server, 0x1E)) != 0) - { - ncp_unlock_server(server); - return result; - } - ncp_unlock_server(server); - return 0; -} -#endif /* CONFIG_NCPFS_IOCTL_LOCKING */ - -#ifdef CONFIG_NCPFS_NLS -/* This are the NLS conversion routines with inspirations and code parts - * from the vfat file system and hints from Petr Vandrovec. - */ - -int -ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen, - const unsigned char *iname, unsigned int ilen, int cc) -{ - struct nls_table *in = server->nls_io; - struct nls_table *out = server->nls_vol; - unsigned char *vname_start; - unsigned char *vname_end; - const unsigned char *iname_end; - - iname_end = iname + ilen; - vname_start = vname; - vname_end = vname + *vlen - 1; - - while (iname < iname_end) { - int chl; - wchar_t ec; - - if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { - int k; - unicode_t u; - - k = utf8_to_utf32(iname, iname_end - iname, &u); - if (k < 0 || u > MAX_WCHAR_T) - return -EINVAL; - iname += k; - ec = u; - } else { - if (*iname == NCP_ESC) { - int k; - - if (iname_end - iname < 5) - goto nospec; - - ec = 0; - for (k = 1; k < 5; k++) { - unsigned char nc; - - nc = iname[k] - '0'; - if (nc >= 10) { - nc -= 'A' - '0' - 10; - if ((nc < 10) || (nc > 15)) { - goto nospec; - } - } - ec = (ec << 4) | nc; - } - iname += 5; - } else { -nospec:; - if ( (chl = in->char2uni(iname, iname_end - iname, &ec)) < 0) - return chl; - iname += chl; - } - } - - /* unitoupper should be here! */ - - chl = out->uni2char(ec, vname, vname_end - vname); - if (chl < 0) - return chl; - - /* this is wrong... */ - if (cc) { - int chi; - - for (chi = 0; chi < chl; chi++){ - vname[chi] = ncp_toupper(out, vname[chi]); - } - } - vname += chl; - } - - *vname = 0; - *vlen = vname - vname_start; - return 0; -} - -int -ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen, - const unsigned char *vname, unsigned int vlen, int cc) -{ - struct nls_table *in = server->nls_vol; - struct nls_table *out = server->nls_io; - const unsigned char *vname_end; - unsigned char *iname_start; - unsigned char *iname_end; - unsigned char *vname_cc; - int err; - - vname_cc = NULL; - - if (cc) { - int i; - - /* this is wrong! */ - vname_cc = kmalloc(vlen, GFP_KERNEL); - if (!vname_cc) - return -ENOMEM; - for (i = 0; i < vlen; i++) - vname_cc[i] = ncp_tolower(in, vname[i]); - vname = vname_cc; - } - - iname_start = iname; - iname_end = iname + *ilen - 1; - vname_end = vname + vlen; - - while (vname < vname_end) { - wchar_t ec; - int chl; - - if ( (chl = in->char2uni(vname, vname_end - vname, &ec)) < 0) { - err = chl; - goto quit; - } - vname += chl; - - /* unitolower should be here! */ - - if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { - int k; - - k = utf32_to_utf8(ec, iname, iname_end - iname); - if (k < 0) { - err = -ENAMETOOLONG; - goto quit; - } - iname += k; - } else { - if ( (chl = out->uni2char(ec, iname, iname_end - iname)) >= 0) { - iname += chl; - } else { - int k; - - if (iname_end - iname < 5) { - err = -ENAMETOOLONG; - goto quit; - } - *iname = NCP_ESC; - for (k = 4; k > 0; k--) { - unsigned char v; - - v = (ec & 0xF) + '0'; - if (v > '9') { - v += 'A' - '9' - 1; - } - iname[k] = v; - ec >>= 4; - } - iname += 5; - } - } - } - - *iname = 0; - *ilen = iname - iname_start; - err = 0; -quit:; - if (cc) - kfree(vname_cc); - return err; -} - -#else - -int -ncp__io2vol(unsigned char *vname, unsigned int *vlen, - const unsigned char *iname, unsigned int ilen, int cc) -{ - int i; - - if (*vlen <= ilen) - return -ENAMETOOLONG; - - if (cc) - for (i = 0; i < ilen; i++) { - *vname = toupper(*iname); - vname++; - iname++; - } - else { - memmove(vname, iname, ilen); - vname += ilen; - } - - *vlen = ilen; - *vname = 0; - return 0; -} - -int -ncp__vol2io(unsigned char *iname, unsigned int *ilen, - const unsigned char *vname, unsigned int vlen, int cc) -{ - int i; - - if (*ilen <= vlen) - return -ENAMETOOLONG; - - if (cc) - for (i = 0; i < vlen; i++) { - *iname = tolower(*vname); - iname++; - vname++; - } - else { - memmove(iname, vname, vlen); - iname += vlen; - } - - *ilen = vlen; - *iname = 0; - return 0; -} - -#endif diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h deleted file mode 100644 index aaae8aa9bf7d..000000000000 --- a/fs/ncpfs/ncplib_kernel.h +++ /dev/null @@ -1,215 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ncplib_kernel.h - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * Modified for big endian by J.F. Chadima and David S. Miller - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * Modified 1998, 1999 Wolfram Pienkoss for NLS - * Modified 1999 Wolfram Pienkoss for directory caching - * - */ - -#ifndef _NCPLIB_H -#define _NCPLIB_H - - -#include <linux/fs.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/pagemap.h> - -#include <linux/uaccess.h> -#include <asm/byteorder.h> -#include <asm/unaligned.h> -#include <asm/string.h> - -#ifdef CONFIG_NCPFS_NLS -#include <linux/nls.h> -#else -#include <linux/ctype.h> -#endif /* CONFIG_NCPFS_NLS */ - -#define NCP_MIN_SYMLINK_SIZE 8 -#define NCP_MAX_SYMLINK_SIZE 512 - -#define NCP_BLOCK_SHIFT 9 -#define NCP_BLOCK_SIZE (1 << (NCP_BLOCK_SHIFT)) - -int ncp_negotiate_buffersize(struct ncp_server *, int, int *); -int ncp_negotiate_size_and_options(struct ncp_server *server, int size, - int options, int *ret_size, int *ret_options); - -int ncp_get_volume_info_with_number(struct ncp_server* server, int n, - struct ncp_volume_info *target); - -int ncp_get_directory_info(struct ncp_server* server, __u8 dirhandle, - struct ncp_volume_info* target); - -int ncp_close_file(struct ncp_server *, const char *); -static inline int ncp_read_bounce_size(__u32 size) { - return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8; -}; -int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16, - struct iov_iter *, int *, void *bounce, __u32 bouncelen); -int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16, - char *, int *); -int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16, - const char *, int *); - -static inline void ncp_inode_close(struct inode *inode) { - atomic_dec(&NCP_FINFO(inode)->opened); -} - -void ncp_extract_file_info(const void* src, struct nw_info_struct* target); -int ncp_obtain_info(struct ncp_server *server, struct inode *, const char *, - struct nw_info_struct *target); -int ncp_obtain_nfs_info(struct ncp_server *server, struct nw_info_struct *target); -int ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns); -int ncp_get_volume_root(struct ncp_server *server, const char *volname, - __u32 *volume, __le32 *dirent, __le32 *dosdirent); -int ncp_lookup_volume(struct ncp_server *, const char *, struct nw_info_struct *); -int ncp_modify_file_or_subdir_dos_info(struct ncp_server *, struct inode *, - __le32, const struct nw_modify_dos_info *info); -int ncp_modify_file_or_subdir_dos_info_path(struct ncp_server *, struct inode *, - const char* path, __le32, const struct nw_modify_dos_info *info); -int ncp_modify_nfs_info(struct ncp_server *, __u8 volnum, __le32 dirent, - __u32 mode, __u32 rdev); - -int ncp_del_file_or_subdir2(struct ncp_server *, struct dentry*); -int ncp_del_file_or_subdir(struct ncp_server *, struct inode *, const char *); -int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, const char *, - int, __le32, __le16, struct ncp_entry_info *); - -int ncp_initialize_search(struct ncp_server *, struct inode *, - struct nw_search_sequence *target); -int ncp_search_for_fileset(struct ncp_server *server, - struct nw_search_sequence *seq, - int* more, int* cnt, - char* buffer, size_t bufsize, - char** rbuf, size_t* rsize); - -int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, - struct inode *, const char *, struct inode *, const char *); - - -int -ncp_LogPhysicalRecord(struct ncp_server *server, - const char *file_id, __u8 locktype, - __u32 offset, __u32 length, __u16 timeout); - -#ifdef CONFIG_NCPFS_IOCTL_LOCKING -int -ncp_ClearPhysicalRecord(struct ncp_server *server, - const char *file_id, - __u32 offset, __u32 length); -#endif /* CONFIG_NCPFS_IOCTL_LOCKING */ - -int -ncp_mount_subdir(struct ncp_server *, __u8, __u8, __le32, - __u32* volume, __le32* dirent, __le32* dosdirent); -int ncp_dirhandle_alloc(struct ncp_server *, __u8 vol, __le32 dirent, __u8 *dirhandle); -int ncp_dirhandle_free(struct ncp_server *, __u8 dirhandle); - -int ncp_create_new(struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t rdev, __le32 attributes); - -static inline int ncp_is_nfs_extras(struct ncp_server* server, unsigned int volnum) { -#ifdef CONFIG_NCPFS_NFS_NS - return (server->m.flags & NCP_MOUNT_NFS_EXTRAS) && - (server->name_space[volnum] == NW_NS_NFS); -#else - return 0; -#endif -} - -#ifdef CONFIG_NCPFS_NLS - -int ncp__io2vol(struct ncp_server *, unsigned char *, unsigned int *, - const unsigned char *, unsigned int, int); -int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *, - const unsigned char *, unsigned int, int); - -#define NCP_ESC ':' -#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io) -#define ncp_tolower(t, c) nls_tolower(t, c) -#define ncp_toupper(t, c) nls_toupper(t, c) -#define ncp_strnicmp(t, s1, s2, len) \ - nls_strnicmp(t, s1, s2, len) -#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(S,m,i,n,k,U) -#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(S,m,i,n,k,U) - -#else - -int ncp__io2vol(unsigned char *, unsigned int *, - const unsigned char *, unsigned int, int); -int ncp__vol2io(unsigned char *, unsigned int *, - const unsigned char *, unsigned int, int); - -#define NCP_IO_TABLE(sb) NULL -#define ncp_tolower(t, c) tolower(c) -#define ncp_toupper(t, c) toupper(c) -#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U) -#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U) - - -static inline int ncp_strnicmp(const struct nls_table *t, - const unsigned char *s1, const unsigned char *s2, int len) -{ - while (len--) { - if (tolower(*s1++) != tolower(*s2++)) - return 1; - } - - return 0; -} - -#endif /* CONFIG_NCPFS_NLS */ - -#define NCP_GET_AGE(dentry) (jiffies - (dentry)->d_time) -#define NCP_MAX_AGE(server) atomic_read(&(server)->dentry_ttl) -#define NCP_TEST_AGE(server,dentry) (NCP_GET_AGE(dentry) < NCP_MAX_AGE(server)) - -static inline void -ncp_age_dentry(struct ncp_server* server, struct dentry* dentry) -{ - dentry->d_time = jiffies - NCP_MAX_AGE(server); -} - -static inline void -ncp_new_dentry(struct dentry* dentry) -{ - dentry->d_time = jiffies; -} - -struct ncp_cache_head { - time_t mtime; - unsigned long time; /* cache age */ - unsigned long end; /* last valid fpos in cache */ - int eof; -}; - -#define NCP_DIRCACHE_SIZE ((int)(PAGE_SIZE/sizeof(struct dentry *))) -union ncp_dir_cache { - struct ncp_cache_head head; - struct dentry *dentry[NCP_DIRCACHE_SIZE]; -}; - -#define NCP_FIRSTCACHE_SIZE ((int)((NCP_DIRCACHE_SIZE * \ - sizeof(struct dentry *) - sizeof(struct ncp_cache_head)) / \ - sizeof(struct dentry *))) - -#define NCP_DIRCACHE_START (NCP_DIRCACHE_SIZE - NCP_FIRSTCACHE_SIZE) - -struct ncp_cache_control { - struct ncp_cache_head head; - struct page *page; - union ncp_dir_cache *cache; - unsigned long fpos, ofs; - int filled, valid, idx; -}; - -#endif /* _NCPLIB_H */ diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c deleted file mode 100644 index 8085b1a3ba47..000000000000 --- a/fs/ncpfs/ncpsign_kernel.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ncpsign_kernel.c - * - * Arne de Bruijn (arne@knoware.nl), 1997 - * - */ - - -#ifdef CONFIG_NCPFS_PACKET_SIGNING - -#include <linux/string.h> -#include <linux/ncp.h> -#include <linux/bitops.h> -#include "ncp_fs.h" -#include "ncpsign_kernel.h" - -/* i386: 32-bit, little endian, handles mis-alignment */ -#ifdef __i386__ -#define GET_LE32(p) (*(const int *)(p)) -#define PUT_LE32(p,v) { *(int *)(p)=v; } -#else -/* from include/ncplib.h */ -#define BVAL(buf,pos) (((const __u8 *)(buf))[pos]) -#define PVAL(buf,pos) ((unsigned)BVAL(buf,pos)) -#define BSET(buf,pos,val) (((__u8 *)(buf))[pos] = (val)) - -static inline __u16 -WVAL_LH(const __u8 * buf, int pos) -{ - return PVAL(buf, pos) | PVAL(buf, pos + 1) << 8; -} -static inline __u32 -DVAL_LH(const __u8 * buf, int pos) -{ - return WVAL_LH(buf, pos) | WVAL_LH(buf, pos + 2) << 16; -} -static inline void -WSET_LH(__u8 * buf, int pos, __u16 val) -{ - BSET(buf, pos, val & 0xff); - BSET(buf, pos + 1, val >> 8); -} -static inline void -DSET_LH(__u8 * buf, int pos, __u32 val) -{ - WSET_LH(buf, pos, val & 0xffff); - WSET_LH(buf, pos + 2, val >> 16); -} - -#define GET_LE32(p) DVAL_LH(p,0) -#define PUT_LE32(p,v) DSET_LH(p,0,v) -#endif - -static void nwsign(char *r_data1, char *r_data2, char *outdata) { - int i; - unsigned int w0,w1,w2,w3; - static int rbit[4]={0, 2, 1, 3}; -#ifdef __i386__ - unsigned int *data2=(unsigned int *)r_data2; -#else - unsigned int data2[16]; - for (i=0;i<16;i++) - data2[i]=GET_LE32(r_data2+(i<<2)); -#endif - w0=GET_LE32(r_data1); - w1=GET_LE32(r_data1+4); - w2=GET_LE32(r_data1+8); - w3=GET_LE32(r_data1+12); - for (i=0;i<16;i+=4) { - w0=rol32(w0 + ((w1 & w2) | ((~w1) & w3)) + data2[i+0],3); - w3=rol32(w3 + ((w0 & w1) | ((~w0) & w2)) + data2[i+1],7); - w2=rol32(w2 + ((w3 & w0) | ((~w3) & w1)) + data2[i+2],11); - w1=rol32(w1 + ((w2 & w3) | ((~w2) & w0)) + data2[i+3],19); - } - for (i=0;i<4;i++) { - w0=rol32(w0 + (((w2 | w3) & w1) | (w2 & w3)) + 0x5a827999 + data2[i+0],3); - w3=rol32(w3 + (((w1 | w2) & w0) | (w1 & w2)) + 0x5a827999 + data2[i+4],5); - w2=rol32(w2 + (((w0 | w1) & w3) | (w0 & w1)) + 0x5a827999 + data2[i+8],9); - w1=rol32(w1 + (((w3 | w0) & w2) | (w3 & w0)) + 0x5a827999 + data2[i+12],13); - } - for (i=0;i<4;i++) { - w0=rol32(w0 + ((w1 ^ w2) ^ w3) + 0x6ed9eba1 + data2[rbit[i]+0],3); - w3=rol32(w3 + ((w0 ^ w1) ^ w2) + 0x6ed9eba1 + data2[rbit[i]+8],9); - w2=rol32(w2 + ((w3 ^ w0) ^ w1) + 0x6ed9eba1 + data2[rbit[i]+4],11); - w1=rol32(w1 + ((w2 ^ w3) ^ w0) + 0x6ed9eba1 + data2[rbit[i]+12],15); - } - PUT_LE32(outdata,(w0+GET_LE32(r_data1)) & 0xffffffff); - PUT_LE32(outdata+4,(w1+GET_LE32(r_data1+4)) & 0xffffffff); - PUT_LE32(outdata+8,(w2+GET_LE32(r_data1+8)) & 0xffffffff); - PUT_LE32(outdata+12,(w3+GET_LE32(r_data1+12)) & 0xffffffff); -} - -/* Make a signature for the current packet and add it at the end of the */ -/* packet. */ -void __sign_packet(struct ncp_server *server, const char *packet, size_t size, __u32 totalsize, void *sign_buff) { - unsigned char data[64]; - - memcpy(data, server->sign_root, 8); - *(__u32*)(data + 8) = totalsize; - if (size < 52) { - memcpy(data + 12, packet, size); - memset(data + 12 + size, 0, 52 - size); - } else { - memcpy(data + 12, packet, 52); - } - nwsign(server->sign_last, data, server->sign_last); - memcpy(sign_buff, server->sign_last, 8); -} - -int sign_verify_reply(struct ncp_server *server, const char *packet, size_t size, __u32 totalsize, const void *sign_buff) { - unsigned char data[64]; - unsigned char hash[16]; - - memcpy(data, server->sign_root, 8); - *(__u32*)(data + 8) = totalsize; - if (size < 52) { - memcpy(data + 12, packet, size); - memset(data + 12 + size, 0, 52 - size); - } else { - memcpy(data + 12, packet, 52); - } - nwsign(server->sign_last, data, hash); - return memcmp(sign_buff, hash, 8); -} - -#endif /* CONFIG_NCPFS_PACKET_SIGNING */ - diff --git a/fs/ncpfs/ncpsign_kernel.h b/fs/ncpfs/ncpsign_kernel.h deleted file mode 100644 index 57ff0a0650b8..000000000000 --- a/fs/ncpfs/ncpsign_kernel.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ncpsign_kernel.h - * - * Arne de Bruijn (arne@knoware.nl), 1997 - * - */ - -#ifndef _NCPSIGN_KERNEL_H -#define _NCPSIGN_KERNEL_H - -#ifdef CONFIG_NCPFS_PACKET_SIGNING -void __sign_packet(struct ncp_server *server, const char *data, size_t size, __u32 totalsize, void *sign_buff); -int sign_verify_reply(struct ncp_server *server, const char *data, size_t size, __u32 totalsize, const void *sign_buff); -#endif - -static inline size_t sign_packet(struct ncp_server *server, const char *data, size_t size, __u32 totalsize, void *sign_buff) { -#ifdef CONFIG_NCPFS_PACKET_SIGNING - if (server->sign_active) { - __sign_packet(server, data, size, totalsize, sign_buff); - return 8; - } -#endif - return 0; -} - -#endif diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c deleted file mode 100644 index 4c13174d85b7..000000000000 --- a/fs/ncpfs/sock.c +++ /dev/null @@ -1,855 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/fs/ncpfs/sock.c - * - * Copyright (C) 1992, 1993 Rick Sladkey - * - * Modified 1995, 1996 by Volker Lendecke to be usable for ncp - * Modified 1997 Peter Waltenberg, Bill Hawes, David Woodhouse for 2.1 dcache - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/time.h> -#include <linux/errno.h> -#include <linux/socket.h> -#include <linux/fcntl.h> -#include <linux/stat.h> -#include <linux/string.h> -#include <linux/sched/signal.h> -#include <linux/uaccess.h> -#include <linux/in.h> -#include <linux/net.h> -#include <linux/mm.h> -#include <linux/netdevice.h> -#include <linux/signal.h> -#include <linux/slab.h> -#include <net/scm.h> -#include <net/sock.h> -#include <linux/ipx.h> -#include <linux/poll.h> -#include <linux/file.h> - -#include "ncp_fs.h" - -#include "ncpsign_kernel.h" - -static int _recv(struct socket *sock, void *buf, int size, unsigned flags) -{ - struct msghdr msg = {NULL, }; - struct kvec iov = {buf, size}; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size); - return sock_recvmsg(sock, &msg, flags); -} - -static int _send(struct socket *sock, const void *buff, int len) -{ - struct msghdr msg = { .msg_flags = 0 }; - struct kvec vec = {.iov_base = (void *)buff, .iov_len = len}; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &vec, 1, len); - return sock_sendmsg(sock, &msg); -} - -struct ncp_request_reply { - struct list_head req; - wait_queue_head_t wq; - atomic_t refs; - unsigned char* reply_buf; - size_t datalen; - int result; - enum { RQ_DONE, RQ_INPROGRESS, RQ_QUEUED, RQ_IDLE, RQ_ABANDONED } status; - struct iov_iter from; - struct kvec tx_iov[3]; - u_int16_t tx_type; - u_int32_t sign[6]; -}; - -static inline struct ncp_request_reply* ncp_alloc_req(void) -{ - struct ncp_request_reply *req; - - req = kmalloc(sizeof(struct ncp_request_reply), GFP_KERNEL); - if (!req) - return NULL; - - init_waitqueue_head(&req->wq); - atomic_set(&req->refs, (1)); - req->status = RQ_IDLE; - - return req; -} - -static void ncp_req_get(struct ncp_request_reply *req) -{ - atomic_inc(&req->refs); -} - -static void ncp_req_put(struct ncp_request_reply *req) -{ - if (atomic_dec_and_test(&req->refs)) - kfree(req); -} - -void ncp_tcp_data_ready(struct sock *sk) -{ - struct ncp_server *server = sk->sk_user_data; - - server->data_ready(sk); - schedule_work(&server->rcv.tq); -} - -void ncp_tcp_error_report(struct sock *sk) -{ - struct ncp_server *server = sk->sk_user_data; - - server->error_report(sk); - schedule_work(&server->rcv.tq); -} - -void ncp_tcp_write_space(struct sock *sk) -{ - struct ncp_server *server = sk->sk_user_data; - - /* We do not need any locking: we first set tx.creq, and then we do sendmsg, - not vice versa... */ - server->write_space(sk); - if (server->tx.creq) - schedule_work(&server->tx.tq); -} - -void ncpdgram_timeout_call(struct timer_list *t) -{ - struct ncp_server *server = from_timer(server, t, timeout_tm); - - schedule_work(&server->timeout_tq); -} - -static inline void ncp_finish_request(struct ncp_server *server, struct ncp_request_reply *req, int result) -{ - req->result = result; - if (req->status != RQ_ABANDONED) - memcpy(req->reply_buf, server->rxbuf, req->datalen); - req->status = RQ_DONE; - wake_up_all(&req->wq); - ncp_req_put(req); -} - -static void __abort_ncp_connection(struct ncp_server *server) -{ - struct ncp_request_reply *req; - - ncp_invalidate_conn(server); - del_timer(&server->timeout_tm); - while (!list_empty(&server->tx.requests)) { - req = list_entry(server->tx.requests.next, struct ncp_request_reply, req); - - list_del_init(&req->req); - ncp_finish_request(server, req, -EIO); - } - req = server->rcv.creq; - if (req) { - server->rcv.creq = NULL; - ncp_finish_request(server, req, -EIO); - server->rcv.ptr = NULL; - server->rcv.state = 0; - } - req = server->tx.creq; - if (req) { - server->tx.creq = NULL; - ncp_finish_request(server, req, -EIO); - } -} - -static inline int get_conn_number(struct ncp_reply_header *rp) -{ - return rp->conn_low | (rp->conn_high << 8); -} - -static inline void __ncp_abort_request(struct ncp_server *server, struct ncp_request_reply *req, int err) -{ - /* If req is done, we got signal, but we also received answer... */ - switch (req->status) { - case RQ_IDLE: - case RQ_DONE: - break; - case RQ_QUEUED: - list_del_init(&req->req); - ncp_finish_request(server, req, err); - break; - case RQ_INPROGRESS: - req->status = RQ_ABANDONED; - break; - case RQ_ABANDONED: - break; - } -} - -static inline void ncp_abort_request(struct ncp_server *server, struct ncp_request_reply *req, int err) -{ - mutex_lock(&server->rcv.creq_mutex); - __ncp_abort_request(server, req, err); - mutex_unlock(&server->rcv.creq_mutex); -} - -static inline void __ncptcp_abort(struct ncp_server *server) -{ - __abort_ncp_connection(server); -} - -static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req) -{ - struct msghdr msg = { .msg_iter = req->from, .msg_flags = MSG_DONTWAIT }; - return sock_sendmsg(sock, &msg); -} - -static void __ncptcp_try_send(struct ncp_server *server) -{ - struct ncp_request_reply *rq; - struct msghdr msg = { .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT }; - int result; - - rq = server->tx.creq; - if (!rq) - return; - - msg.msg_iter = rq->from; - result = sock_sendmsg(server->ncp_sock, &msg); - - if (result == -EAGAIN) - return; - - if (result < 0) { - pr_err("tcp: Send failed: %d\n", result); - __ncp_abort_request(server, rq, result); - return; - } - if (!msg_data_left(&msg)) { - server->rcv.creq = rq; - server->tx.creq = NULL; - return; - } - rq->from = msg.msg_iter; -} - -static inline void ncp_init_header(struct ncp_server *server, struct ncp_request_reply *req, struct ncp_request_header *h) -{ - req->status = RQ_INPROGRESS; - h->conn_low = server->connection; - h->conn_high = server->connection >> 8; - h->sequence = ++server->sequence; -} - -static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request_reply *req) -{ - size_t signlen, len = req->tx_iov[1].iov_len; - struct ncp_request_header *h = req->tx_iov[1].iov_base; - - ncp_init_header(server, req, h); - signlen = sign_packet(server, - req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1, - len - sizeof(struct ncp_request_header) + 1, - cpu_to_le32(len), req->sign); - if (signlen) { - /* NCP over UDP appends signature */ - req->tx_iov[2].iov_base = req->sign; - req->tx_iov[2].iov_len = signlen; - } - iov_iter_kvec(&req->from, WRITE | ITER_KVEC, - req->tx_iov + 1, signlen ? 2 : 1, len + signlen); - server->rcv.creq = req; - server->timeout_last = server->m.time_out; - server->timeout_retries = server->m.retry_count; - ncpdgram_send(server->ncp_sock, req); - mod_timer(&server->timeout_tm, jiffies + server->m.time_out); -} - -#define NCP_TCP_XMIT_MAGIC (0x446D6454) -#define NCP_TCP_XMIT_VERSION (1) -#define NCP_TCP_RCVD_MAGIC (0x744E6350) - -static void ncptcp_start_request(struct ncp_server *server, struct ncp_request_reply *req) -{ - size_t signlen, len = req->tx_iov[1].iov_len; - struct ncp_request_header *h = req->tx_iov[1].iov_base; - - ncp_init_header(server, req, h); - signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1, - len - sizeof(struct ncp_request_header) + 1, - cpu_to_be32(len + 24), req->sign + 4) + 16; - - req->sign[0] = htonl(NCP_TCP_XMIT_MAGIC); - req->sign[1] = htonl(len + signlen); - req->sign[2] = htonl(NCP_TCP_XMIT_VERSION); - req->sign[3] = htonl(req->datalen + 8); - /* NCP over TCP prepends signature */ - req->tx_iov[0].iov_base = req->sign; - req->tx_iov[0].iov_len = signlen; - iov_iter_kvec(&req->from, WRITE | ITER_KVEC, - req->tx_iov, 2, len + signlen); - - server->tx.creq = req; - __ncptcp_try_send(server); -} - -static inline void __ncp_start_request(struct ncp_server *server, struct ncp_request_reply *req) -{ - /* we copy the data so that we do not depend on the caller - staying alive */ - memcpy(server->txbuf, req->tx_iov[1].iov_base, req->tx_iov[1].iov_len); - req->tx_iov[1].iov_base = server->txbuf; - - if (server->ncp_sock->type == SOCK_STREAM) - ncptcp_start_request(server, req); - else - ncpdgram_start_request(server, req); -} - -static int ncp_add_request(struct ncp_server *server, struct ncp_request_reply *req) -{ - mutex_lock(&server->rcv.creq_mutex); - if (!ncp_conn_valid(server)) { - mutex_unlock(&server->rcv.creq_mutex); - pr_err("tcp: Server died\n"); - return -EIO; - } - ncp_req_get(req); - if (server->tx.creq || server->rcv.creq) { - req->status = RQ_QUEUED; - list_add_tail(&req->req, &server->tx.requests); - mutex_unlock(&server->rcv.creq_mutex); - return 0; - } - __ncp_start_request(server, req); - mutex_unlock(&server->rcv.creq_mutex); - return 0; -} - -static void __ncp_next_request(struct ncp_server *server) -{ - struct ncp_request_reply *req; - - server->rcv.creq = NULL; - if (list_empty(&server->tx.requests)) { - return; - } - req = list_entry(server->tx.requests.next, struct ncp_request_reply, req); - list_del_init(&req->req); - __ncp_start_request(server, req); -} - -static void info_server(struct ncp_server *server, unsigned int id, const void * data, size_t len) -{ - if (server->info_sock) { - struct msghdr msg = { .msg_flags = MSG_NOSIGNAL }; - __be32 hdr[2] = {cpu_to_be32(len + 8), cpu_to_be32(id)}; - struct kvec iov[2] = { - {.iov_base = hdr, .iov_len = 8}, - {.iov_base = (void *)data, .iov_len = len}, - }; - - iov_iter_kvec(&msg.msg_iter, ITER_KVEC | WRITE, - iov, 2, len + 8); - - sock_sendmsg(server->info_sock, &msg); - } -} - -void ncpdgram_rcv_proc(struct work_struct *work) -{ - struct ncp_server *server = - container_of(work, struct ncp_server, rcv.tq); - struct socket* sock; - - sock = server->ncp_sock; - - while (1) { - struct ncp_reply_header reply; - int result; - - result = _recv(sock, &reply, sizeof(reply), MSG_PEEK | MSG_DONTWAIT); - if (result < 0) { - break; - } - if (result >= sizeof(reply)) { - struct ncp_request_reply *req; - - if (reply.type == NCP_WATCHDOG) { - unsigned char buf[10]; - - if (server->connection != get_conn_number(&reply)) { - goto drop; - } - result = _recv(sock, buf, sizeof(buf), MSG_DONTWAIT); - if (result < 0) { - ncp_dbg(1, "recv failed with %d\n", result); - continue; - } - if (result < 10) { - ncp_dbg(1, "too short (%u) watchdog packet\n", result); - continue; - } - if (buf[9] != '?') { - ncp_dbg(1, "bad signature (%02X) in watchdog packet\n", buf[9]); - continue; - } - buf[9] = 'Y'; - _send(sock, buf, sizeof(buf)); - continue; - } - if (reply.type != NCP_POSITIVE_ACK && reply.type != NCP_REPLY) { - result = _recv(sock, server->unexpected_packet.data, sizeof(server->unexpected_packet.data), MSG_DONTWAIT); - if (result < 0) { - continue; - } - info_server(server, 0, server->unexpected_packet.data, result); - continue; - } - mutex_lock(&server->rcv.creq_mutex); - req = server->rcv.creq; - if (req && (req->tx_type == NCP_ALLOC_SLOT_REQUEST || (server->sequence == reply.sequence && - server->connection == get_conn_number(&reply)))) { - if (reply.type == NCP_POSITIVE_ACK) { - server->timeout_retries = server->m.retry_count; - server->timeout_last = NCP_MAX_RPC_TIMEOUT; - mod_timer(&server->timeout_tm, jiffies + NCP_MAX_RPC_TIMEOUT); - } else if (reply.type == NCP_REPLY) { - result = _recv(sock, server->rxbuf, req->datalen, MSG_DONTWAIT); -#ifdef CONFIG_NCPFS_PACKET_SIGNING - if (result >= 0 && server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) { - if (result < 8 + 8) { - result = -EIO; - } else { - unsigned int hdrl; - - result -= 8; - hdrl = sock->sk->sk_family == AF_INET ? 8 : 6; - if (sign_verify_reply(server, server->rxbuf + hdrl, result - hdrl, cpu_to_le32(result), server->rxbuf + result)) { - pr_info("Signature violation\n"); - result = -EIO; - } - } - } -#endif - del_timer(&server->timeout_tm); - server->rcv.creq = NULL; - ncp_finish_request(server, req, result); - __ncp_next_request(server); - mutex_unlock(&server->rcv.creq_mutex); - continue; - } - } - mutex_unlock(&server->rcv.creq_mutex); - } -drop:; - _recv(sock, &reply, sizeof(reply), MSG_DONTWAIT); - } -} - -static void __ncpdgram_timeout_proc(struct ncp_server *server) -{ - /* If timer is pending, we are processing another request... */ - if (!timer_pending(&server->timeout_tm)) { - struct ncp_request_reply* req; - - req = server->rcv.creq; - if (req) { - int timeout; - - if (server->m.flags & NCP_MOUNT_SOFT) { - if (server->timeout_retries-- == 0) { - __ncp_abort_request(server, req, -ETIMEDOUT); - return; - } - } - /* Ignore errors */ - ncpdgram_send(server->ncp_sock, req); - timeout = server->timeout_last << 1; - if (timeout > NCP_MAX_RPC_TIMEOUT) { - timeout = NCP_MAX_RPC_TIMEOUT; - } - server->timeout_last = timeout; - mod_timer(&server->timeout_tm, jiffies + timeout); - } - } -} - -void ncpdgram_timeout_proc(struct work_struct *work) -{ - struct ncp_server *server = - container_of(work, struct ncp_server, timeout_tq); - mutex_lock(&server->rcv.creq_mutex); - __ncpdgram_timeout_proc(server); - mutex_unlock(&server->rcv.creq_mutex); -} - -static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len) -{ - int result; - - if (buffer) { - result = _recv(server->ncp_sock, buffer, len, MSG_DONTWAIT); - } else { - static unsigned char dummy[1024]; - - if (len > sizeof(dummy)) { - len = sizeof(dummy); - } - result = _recv(server->ncp_sock, dummy, len, MSG_DONTWAIT); - } - if (result < 0) { - return result; - } - if (result > len) { - pr_err("tcp: bug in recvmsg (%u > %zu)\n", result, len); - return -EIO; - } - return result; -} - -static int __ncptcp_rcv_proc(struct ncp_server *server) -{ - /* We have to check the result, so store the complete header */ - while (1) { - int result; - struct ncp_request_reply *req; - int datalen; - int type; - - while (server->rcv.len) { - result = do_tcp_rcv(server, server->rcv.ptr, server->rcv.len); - if (result == -EAGAIN) { - return 0; - } - if (result <= 0) { - req = server->rcv.creq; - if (req) { - __ncp_abort_request(server, req, -EIO); - } else { - __ncptcp_abort(server); - } - if (result < 0) { - pr_err("tcp: error in recvmsg: %d\n", result); - } else { - ncp_dbg(1, "tcp: EOF\n"); - } - return -EIO; - } - if (server->rcv.ptr) { - server->rcv.ptr += result; - } - server->rcv.len -= result; - } - switch (server->rcv.state) { - case 0: - if (server->rcv.buf.magic != htonl(NCP_TCP_RCVD_MAGIC)) { - pr_err("tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic)); - __ncptcp_abort(server); - return -EIO; - } - datalen = ntohl(server->rcv.buf.len) & 0x0FFFFFFF; - if (datalen < 10) { - pr_err("tcp: Unexpected reply len %d\n", datalen); - __ncptcp_abort(server); - return -EIO; - } -#ifdef CONFIG_NCPFS_PACKET_SIGNING - if (server->sign_active) { - if (datalen < 18) { - pr_err("tcp: Unexpected reply len %d\n", datalen); - __ncptcp_abort(server); - return -EIO; - } - server->rcv.buf.len = datalen - 8; - server->rcv.ptr = (unsigned char*)&server->rcv.buf.p1; - server->rcv.len = 8; - server->rcv.state = 4; - break; - } -#endif - type = ntohs(server->rcv.buf.type); -#ifdef CONFIG_NCPFS_PACKET_SIGNING -cont:; -#endif - if (type != NCP_REPLY) { - if (datalen - 8 <= sizeof(server->unexpected_packet.data)) { - *(__u16*)(server->unexpected_packet.data) = htons(type); - server->unexpected_packet.len = datalen - 8; - - server->rcv.state = 5; - server->rcv.ptr = server->unexpected_packet.data + 2; - server->rcv.len = datalen - 10; - break; - } - ncp_dbg(1, "tcp: Unexpected NCP type %02X\n", type); -skipdata2:; - server->rcv.state = 2; -skipdata:; - server->rcv.ptr = NULL; - server->rcv.len = datalen - 10; - break; - } - req = server->rcv.creq; - if (!req) { - ncp_dbg(1, "Reply without appropriate request\n"); - goto skipdata2; - } - if (datalen > req->datalen + 8) { - pr_err("tcp: Unexpected reply len %d (expected at most %zd)\n", datalen, req->datalen + 8); - server->rcv.state = 3; - goto skipdata; - } - req->datalen = datalen - 8; - ((struct ncp_reply_header*)server->rxbuf)->type = NCP_REPLY; - server->rcv.ptr = server->rxbuf + 2; - server->rcv.len = datalen - 10; - server->rcv.state = 1; - break; -#ifdef CONFIG_NCPFS_PACKET_SIGNING - case 4: - datalen = server->rcv.buf.len; - type = ntohs(server->rcv.buf.type2); - goto cont; -#endif - case 1: - req = server->rcv.creq; - if (req->tx_type != NCP_ALLOC_SLOT_REQUEST) { - if (((struct ncp_reply_header*)server->rxbuf)->sequence != server->sequence) { - pr_err("tcp: Bad sequence number\n"); - __ncp_abort_request(server, req, -EIO); - return -EIO; - } - if ((((struct ncp_reply_header*)server->rxbuf)->conn_low | (((struct ncp_reply_header*)server->rxbuf)->conn_high << 8)) != server->connection) { - pr_err("tcp: Connection number mismatch\n"); - __ncp_abort_request(server, req, -EIO); - return -EIO; - } - } -#ifdef CONFIG_NCPFS_PACKET_SIGNING - if (server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) { - if (sign_verify_reply(server, server->rxbuf + 6, req->datalen - 6, cpu_to_be32(req->datalen + 16), &server->rcv.buf.type)) { - pr_err("tcp: Signature violation\n"); - __ncp_abort_request(server, req, -EIO); - return -EIO; - } - } -#endif - ncp_finish_request(server, req, req->datalen); - nextreq:; - __ncp_next_request(server); - case 2: - next:; - server->rcv.ptr = (unsigned char*)&server->rcv.buf; - server->rcv.len = 10; - server->rcv.state = 0; - break; - case 3: - ncp_finish_request(server, server->rcv.creq, -EIO); - goto nextreq; - case 5: - info_server(server, 0, server->unexpected_packet.data, server->unexpected_packet.len); - goto next; - } - } -} - -void ncp_tcp_rcv_proc(struct work_struct *work) -{ - struct ncp_server *server = - container_of(work, struct ncp_server, rcv.tq); - - mutex_lock(&server->rcv.creq_mutex); - __ncptcp_rcv_proc(server); - mutex_unlock(&server->rcv.creq_mutex); -} - -void ncp_tcp_tx_proc(struct work_struct *work) -{ - struct ncp_server *server = - container_of(work, struct ncp_server, tx.tq); - - mutex_lock(&server->rcv.creq_mutex); - __ncptcp_try_send(server); - mutex_unlock(&server->rcv.creq_mutex); -} - -static int do_ncp_rpc_call(struct ncp_server *server, int size, - unsigned char* reply_buf, int max_reply_size) -{ - int result; - struct ncp_request_reply *req; - - req = ncp_alloc_req(); - if (!req) - return -ENOMEM; - - req->reply_buf = reply_buf; - req->datalen = max_reply_size; - req->tx_iov[1].iov_base = server->packet; - req->tx_iov[1].iov_len = size; - req->tx_type = *(u_int16_t*)server->packet; - - result = ncp_add_request(server, req); - if (result < 0) - goto out; - - if (wait_event_interruptible(req->wq, req->status == RQ_DONE)) { - ncp_abort_request(server, req, -EINTR); - result = -EINTR; - goto out; - } - - result = req->result; - -out: - ncp_req_put(req); - - return result; -} - -/* - * We need the server to be locked here, so check! - */ - -static int ncp_do_request(struct ncp_server *server, int size, - void* reply, int max_reply_size) -{ - int result; - - if (server->lock == 0) { - pr_err("Server not locked!\n"); - return -EIO; - } - if (!ncp_conn_valid(server)) { - return -EIO; - } - { - sigset_t old_set; - unsigned long mask, flags; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - old_set = current->blocked; - if (current->flags & PF_EXITING) - mask = 0; - else - mask = sigmask(SIGKILL); - if (server->m.flags & NCP_MOUNT_INTR) { - /* FIXME: This doesn't seem right at all. So, like, - we can't handle SIGINT and get whatever to stop? - What if we've blocked it ourselves? What about - alarms? Why, in fact, are we mucking with the - sigmask at all? -- r~ */ - if (current->sighand->action[SIGINT - 1].sa.sa_handler == SIG_DFL) - mask |= sigmask(SIGINT); - if (current->sighand->action[SIGQUIT - 1].sa.sa_handler == SIG_DFL) - mask |= sigmask(SIGQUIT); - } - siginitsetinv(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - - result = do_ncp_rpc_call(server, size, reply, max_reply_size); - - spin_lock_irqsave(¤t->sighand->siglock, flags); - current->blocked = old_set; - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - } - - ncp_dbg(2, "do_ncp_rpc_call returned %d\n", result); - - return result; -} - -/* ncp_do_request assures that at least a complete reply header is - * received. It assumes that server->current_size contains the ncp - * request size - */ -int ncp_request2(struct ncp_server *server, int function, - void* rpl, int size) -{ - struct ncp_request_header *h; - struct ncp_reply_header* reply = rpl; - int result; - - h = (struct ncp_request_header *) (server->packet); - if (server->has_subfunction != 0) { - *(__u16 *) & (h->data[0]) = htons(server->current_size - sizeof(*h) - 2); - } - h->type = NCP_REQUEST; - /* - * The server shouldn't know or care what task is making a - * request, so we always use the same task number. - */ - h->task = 2; /* (current->pid) & 0xff; */ - h->function = function; - - result = ncp_do_request(server, server->current_size, reply, size); - if (result < 0) { - ncp_dbg(1, "ncp_request_error: %d\n", result); - goto out; - } - server->completion = reply->completion_code; - server->conn_status = reply->connection_state; - server->reply_size = result; - server->ncp_reply_size = result - sizeof(struct ncp_reply_header); - - result = reply->completion_code; - - if (result != 0) - ncp_vdbg("completion code=%x\n", result); -out: - return result; -} - -int ncp_connect(struct ncp_server *server) -{ - struct ncp_request_header *h; - int result; - - server->connection = 0xFFFF; - server->sequence = 255; - - h = (struct ncp_request_header *) (server->packet); - h->type = NCP_ALLOC_SLOT_REQUEST; - h->task = 2; /* see above */ - h->function = 0; - - result = ncp_do_request(server, sizeof(*h), server->packet, server->packet_size); - if (result < 0) - goto out; - server->connection = h->conn_low + (h->conn_high * 256); - result = 0; -out: - return result; -} - -int ncp_disconnect(struct ncp_server *server) -{ - struct ncp_request_header *h; - - h = (struct ncp_request_header *) (server->packet); - h->type = NCP_DEALLOC_SLOT_REQUEST; - h->task = 2; /* see above */ - h->function = 0; - - return ncp_do_request(server, sizeof(*h), server->packet, server->packet_size); -} - -void ncp_lock_server(struct ncp_server *server) -{ - mutex_lock(&server->mutex); - if (server->lock) - pr_warn("%s: was locked!\n", __func__); - server->lock = 1; -} - -void ncp_unlock_server(struct ncp_server *server) -{ - if (!server->lock) { - pr_warn("%s: was not locked!\n", __func__); - return; - } - server->lock = 0; - mutex_unlock(&server->mutex); -} diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c deleted file mode 100644 index b6e16da4837a..000000000000 --- a/fs/ncpfs/symlink.c +++ /dev/null @@ -1,182 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/fs/ncpfs/symlink.c - * - * Code for allowing symbolic links on NCPFS (i.e. NetWare) - * Symbolic links are not supported on native NetWare, so we use an - * infrequently-used flag (Sh) and store a two-word magic header in - * the file to make sure we don't accidentally use a non-link file - * as a link. - * - * When using the NFS namespace, we set the mode to indicate a symlink and - * don't bother with the magic numbers. - * - * from linux/fs/ext2/symlink.c - * - * Copyright (C) 1998-99, Frank A. Vorstenbosch - * - * ncpfs symlink handling code - * NLS support (c) 1999 Petr Vandrovec - * Modified 2000 Ben Harris, University of Cambridge for NFS NS meta-info - * - */ - - -#include <linux/uaccess.h> - -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/time.h> -#include <linux/slab.h> -#include <linux/mm.h> -#include <linux/stat.h> -#include "ncp_fs.h" - -/* these magic numbers must appear in the symlink file -- this makes it a bit - more resilient against the magic attributes being set on random files. */ - -#define NCP_SYMLINK_MAGIC0 cpu_to_le32(0x6c6d7973) /* "symlnk->" */ -#define NCP_SYMLINK_MAGIC1 cpu_to_le32(0x3e2d6b6e) - -/* ----- read a symbolic link ------------------------------------------ */ - -static int ncp_symlink_readpage(struct file *file, struct page *page) -{ - struct inode *inode = page->mapping->host; - int error, length, len; - char *link, *rawlink; - char *buf = kmap(page); - - error = -ENOMEM; - rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL); - if (!rawlink) - goto fail; - - if (ncp_make_open(inode,O_RDONLY)) - goto failEIO; - - error=ncp_read_kernel(NCP_SERVER(inode),NCP_FINFO(inode)->file_handle, - 0,NCP_MAX_SYMLINK_SIZE,rawlink,&length); - - ncp_inode_close(inode); - /* Close file handle if no other users... */ - ncp_make_closed(inode); - if (error) - goto failEIO; - - if (NCP_FINFO(inode)->flags & NCPI_KLUDGE_SYMLINK) { - if (length<NCP_MIN_SYMLINK_SIZE || - ((__le32 *)rawlink)[0]!=NCP_SYMLINK_MAGIC0 || - ((__le32 *)rawlink)[1]!=NCP_SYMLINK_MAGIC1) - goto failEIO; - link = rawlink + 8; - length -= 8; - } else { - link = rawlink; - } - - len = NCP_MAX_SYMLINK_SIZE; - error = ncp_vol2io(NCP_SERVER(inode), buf, &len, link, length, 0); - kfree(rawlink); - if (error) - goto fail; - SetPageUptodate(page); - kunmap(page); - unlock_page(page); - return 0; - -failEIO: - error = -EIO; - kfree(rawlink); -fail: - SetPageError(page); - kunmap(page); - unlock_page(page); - return error; -} - -/* - * symlinks can't do much... - */ -const struct address_space_operations ncp_symlink_aops = { - .readpage = ncp_symlink_readpage, -}; - -/* ----- create a new symbolic link -------------------------------------- */ - -int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct inode *inode; - char *rawlink; - int length, err, i, outlen; - int kludge; - umode_t mode; - __le32 attr; - unsigned int hdr; - - ncp_dbg(1, "dir=%p, dentry=%p, symname=%s\n", dir, dentry, symname); - - if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) - kludge = 0; - else -#ifdef CONFIG_NCPFS_EXTRAS - if (NCP_SERVER(dir)->m.flags & NCP_MOUNT_SYMLINKS) - kludge = 1; - else -#endif - /* EPERM is returned by VFS if symlink procedure does not exist */ - return -EPERM; - - rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL); - if (!rawlink) - return -ENOMEM; - - if (kludge) { - mode = 0; - attr = aSHARED | aHIDDEN; - ((__le32 *)rawlink)[0]=NCP_SYMLINK_MAGIC0; - ((__le32 *)rawlink)[1]=NCP_SYMLINK_MAGIC1; - hdr = 8; - } else { - mode = S_IFLNK | S_IRWXUGO; - attr = 0; - hdr = 0; - } - - length = strlen(symname); - /* map to/from server charset, do not touch upper/lower case as - symlink can point out of ncp filesystem */ - outlen = NCP_MAX_SYMLINK_SIZE - hdr; - err = ncp_io2vol(NCP_SERVER(dir), rawlink + hdr, &outlen, symname, length, 0); - if (err) - goto failfree; - - outlen += hdr; - - err = -EIO; - if (ncp_create_new(dir,dentry,mode,0,attr)) { - goto failfree; - } - - inode=d_inode(dentry); - - if (ncp_make_open(inode, O_WRONLY)) - goto failfree; - - if (ncp_write_kernel(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, - 0, outlen, rawlink, &i) || i!=outlen) { - goto fail; - } - - ncp_inode_close(inode); - ncp_make_closed(inode); - kfree(rawlink); - return 0; -fail:; - ncp_inode_close(inode); - ncp_make_closed(inode); -failfree:; - kfree(rawlink); - return err; -} - -/* ----- EOF ----- */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ceeaf0fb6657..7d893543cf3b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1314,7 +1314,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) - && !inode_cmp_iversion_raw(inode, fattr->pre_change_attr)) { + && inode_eq_iversion_raw(inode, fattr->pre_change_attr)) { inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); @@ -1373,7 +1373,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ - if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode_cmp_iversion_raw(inode, fattr->change_attr)) + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE; if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) @@ -1803,7 +1803,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { - if (inode_cmp_iversion_raw(inode, fattr->change_attr)) { + if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2758480555fa..1a70581e1cb2 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -251,6 +251,34 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) } /* + * Fill in the pre_op attr for the wcc data + */ +void fill_pre_wcc(struct svc_fh *fhp) +{ + struct inode *inode; + struct kstat stat; + __be32 err; + + if (fhp->fh_pre_saved) + return; + + inode = d_inode(fhp->fh_dentry); + err = fh_getattr(fhp, &stat); + if (err) { + /* Grab the times from inode anyway */ + stat.mtime = inode->i_mtime; + stat.ctime = inode->i_ctime; + stat.size = inode->i_size; + } + + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; + fhp->fh_pre_size = stat.size; + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + fhp->fh_pre_saved = true; +} + +/* * Fill in the post_op attr for the wcc data */ void fill_post_wcc(struct svc_fh *fhp) @@ -261,7 +289,8 @@ void fill_post_wcc(struct svc_fh *fhp) printk("nfsd: inode locked twice during operation.\n"); err = fh_getattr(fhp, &fhp->fh_post_attr); - fhp->fh_post_change = nfsd4_change_attribute(d_inode(fhp->fh_dentry)); + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, + d_inode(fhp->fh_dentry)); if (err) { fhp->fh_post_saved = false; /* Grab the ctime anyway - set_change_info might use it */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 008ea0b627d0..a0bed2b2004d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1363,14 +1363,14 @@ nfsd4_layoutget(struct svc_rqst *rqstp, const struct nfsd4_layout_ops *ops; struct nfs4_layout_stateid *ls; __be32 nfserr; - int accmode; + int accmode = NFSD_MAY_READ_IF_EXEC; switch (lgp->lg_seg.iomode) { case IOMODE_READ: - accmode = NFSD_MAY_READ; + accmode |= NFSD_MAY_READ; break; case IOMODE_RW: - accmode = NFSD_MAY_READ | NFSD_MAY_WRITE; + accmode |= NFSD_MAY_READ | NFSD_MAY_WRITE; break; default: dprintk("%s: invalid iomode %d\n", @@ -1703,6 +1703,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) status = nfserr_minor_vers_mismatch; if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; + status = nfserr_resource; + if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) + goto out; status = nfs41_check_op_ordering(args); if (status) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b29b5a185a2c..150521c9671b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3590,6 +3590,7 @@ nfsd4_verify_open_stid(struct nfs4_stid *s) switch (s->sc_type) { default: break; + case 0: case NFS4_CLOSED_STID: case NFS4_CLOSED_DELEG_STID: ret = nfserr_bad_stateid; @@ -5182,7 +5183,6 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) lockowner(stp->st_stateowner))) goto out; - stp->st_stid.sc_type = NFS4_CLOSED_STID; release_lock_stateid(stp); ret = nfs_ok; @@ -6078,10 +6078,8 @@ out: * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. */ - if (status && new) { - lock_stp->st_stid.sc_type = NFS4_CLOSED_STID; + if (status && new) release_lock_stateid(lock_stp); - } mutex_unlock(&lock_stp->st_mutex); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2c61c6b8ae09..e502fd16246b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -455,8 +455,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, } label->len = 0; -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) && + bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { READ_BUF(4); len += 4; dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ @@ -476,7 +476,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (!label->data) return nfserr_jukebox; } -#endif if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { if (!umask) goto xdr_error; @@ -1918,8 +1917,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (argp->taglen > NFSD4_MAX_TAGLEN) goto xdr_error; - if (argp->opcnt > 100) - goto xdr_error; + /* + * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS + * here, so we return success at the xdr level so that + * nfsd4_proc can handle this is an NFS-level error. + */ + if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) + return 0; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); @@ -1991,7 +1995,7 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); *p++ = 0; } else if (IS_I_VERSION(inode)) { - p = xdr_encode_hyper(p, nfsd4_change_attribute(inode)); + p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); } else { *p++ = cpu_to_be32(stat->ctime.tv_sec); *p++ = cpu_to_be32(stat->ctime.tv_nsec); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index b8444189223b..755e256a9103 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -253,36 +253,20 @@ fh_clear_wcc(struct svc_fh *fhp) * By using both ctime and the i_version counter we guarantee that as * long as time doesn't go backwards we never reuse an old value. */ -static inline u64 nfsd4_change_attribute(struct inode *inode) +static inline u64 nfsd4_change_attribute(struct kstat *stat, + struct inode *inode) { u64 chattr; - chattr = inode->i_ctime.tv_sec; + chattr = stat->ctime.tv_sec; chattr <<= 30; - chattr += inode->i_ctime.tv_nsec; + chattr += stat->ctime.tv_nsec; chattr += inode_query_iversion(inode); return chattr; } -/* - * Fill in the pre_op attr for the wcc data - */ -static inline void -fill_pre_wcc(struct svc_fh *fhp) -{ - struct inode *inode; - - inode = d_inode(fhp->fh_dentry); - if (!fhp->fh_pre_saved) { - fhp->fh_pre_mtime = inode->i_mtime; - fhp->fh_pre_ctime = inode->i_ctime; - fhp->fh_pre_size = inode->i_size; - fhp->fh_pre_change = nfsd4_change_attribute(inode); - fhp->fh_pre_saved = true; - } -} - -extern void fill_post_wcc(struct svc_fh *); +extern void fill_pre_wcc(struct svc_fh *fhp); +extern void fill_post_wcc(struct svc_fh *fhp); #else #define fh_clear_wcc(ignored) #define fill_pre_wcc(ignored) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 644a0342f0e0..79b6064f8977 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -188,6 +188,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl((u32) stat->ino); *p++ = htonl((u32) stat->atime.tv_sec); *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); + time = stat->mtime; lease_get_mtime(d_inode(dentry), &time); *p++ = htonl((u32) time.tv_sec); *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 6c5009cc4e6f..68cb9e4740b4 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -130,7 +130,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, } int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned int flags, - time_t ctime, __u64 cno) + time64_t ctime, __u64 cno) { int err; diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 7bbccc099709..10e16935fff6 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -46,7 +46,7 @@ struct nilfs_segsum_info { unsigned long nfileblk; u64 seg_seq; __u64 cno; - time_t ctime; + time64_t ctime; sector_t next; }; @@ -120,7 +120,7 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, struct nilfs_segment_buffer *prev); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); -int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time_t, +int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned int, time64_t, __u64); int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9f3ffba41533..0953635e7d48 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2040,7 +2040,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) goto out; /* Update time stamp */ - sci->sc_seg_ctime = get_seconds(); + sci->sc_seg_ctime = ktime_get_real_seconds(); err = nilfs_segctor_collect(sci, nilfs, mode); if (unlikely(err)) diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 84084a4d9b3e..04634e3e3d58 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -157,7 +157,7 @@ struct nilfs_sc_info { unsigned long sc_blk_cnt; unsigned long sc_datablk_cnt; unsigned long sc_nblk_this_inc; - time_t sc_seg_ctime; + time64_t sc_seg_ctime; __u64 sc_cno; unsigned long sc_flags; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 1341a41e7b43..c7fa139d50e8 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -526,7 +526,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) * @modtime: modification time (option) */ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, - unsigned long nblocks, time_t modtime) + unsigned long nblocks, time64_t modtime) { struct buffer_head *bh; struct nilfs_segment_usage *su; diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 158a9190c8ec..673a891350f4 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -35,7 +35,7 @@ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end); int nilfs_sufile_alloc(struct inode *, __u64 *); int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, - unsigned long nblocks, time_t modtime); + unsigned long nblocks, time64_t modtime); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned int, size_t); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3073b646e1ba..6ffeca84d7c3 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -283,10 +283,10 @@ int nilfs_commit_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; - time_t t; + time64_t t; /* nilfs->ns_sem must be locked by the caller. */ - t = get_seconds(); + t = ktime_get_real_seconds(); nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 490303e3d517..4b25837e7724 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -31,7 +31,7 @@ static struct kset *nilfs_kset; #define NILFS_SHOW_TIME(time_t_val, buf) ({ \ struct tm res; \ int count = 0; \ - time_to_tm(time_t_val, 0, &res); \ + time64_to_tm(time_t_val, 0, &res); \ res.tm_year += 1900; \ res.tm_mon += 1; \ count = scnprintf(buf, PAGE_SIZE, \ @@ -579,7 +579,7 @@ nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t ctime; + time64_t ctime; down_read(&nilfs->ns_segctor_sem); ctime = nilfs->ns_ctime; @@ -593,13 +593,13 @@ nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t ctime; + time64_t ctime; down_read(&nilfs->ns_segctor_sem); ctime = nilfs->ns_ctime; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime); + return snprintf(buf, PAGE_SIZE, "%llu\n", ctime); } static ssize_t @@ -607,7 +607,7 @@ nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t nongc_ctime; + time64_t nongc_ctime; down_read(&nilfs->ns_segctor_sem); nongc_ctime = nilfs->ns_nongc_ctime; @@ -621,14 +621,13 @@ nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t nongc_ctime; + time64_t nongc_ctime; down_read(&nilfs->ns_segctor_sem); nongc_ctime = nilfs->ns_nongc_ctime; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)nongc_ctime); + return snprintf(buf, PAGE_SIZE, "%llu\n", nongc_ctime); } static ssize_t @@ -728,7 +727,7 @@ nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t sbwtime; + time64_t sbwtime; down_read(&nilfs->ns_sem); sbwtime = nilfs->ns_sbwtime; @@ -742,13 +741,13 @@ nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr, struct the_nilfs *nilfs, char *buf) { - time_t sbwtime; + time64_t sbwtime; down_read(&nilfs->ns_sem); sbwtime = nilfs->ns_sbwtime; up_read(&nilfs->ns_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime); + return snprintf(buf, PAGE_SIZE, "%llu\n", sbwtime); } static ssize_t diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 883d732b0259..36da1779f976 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -116,7 +116,7 @@ struct the_nilfs { */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; - time_t ns_sbwtime; + time64_t ns_sbwtime; unsigned int ns_sbwcount; unsigned int ns_sbsize; unsigned int ns_mount_state; @@ -131,8 +131,8 @@ struct the_nilfs { __u64 ns_nextnum; unsigned long ns_pseg_offset; __u64 ns_cno; - time_t ns_ctime; - time_t ns_nongc_ctime; + time64_t ns_ctime; + time64_t ns_nongc_ctime; atomic_t ns_ndirtyblks; /* @@ -267,7 +267,7 @@ struct nilfs_root { static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) { - u64 t = get_seconds(); + u64 t = ktime_get_real_seconds(); return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 40b5cc97f7b0..917fadca8a7b 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -311,7 +311,9 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) if (had_lock < 0) return ERR_PTR(had_lock); + down_read(&OCFS2_I(inode)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(inode, type, di_bh); + up_read(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock); brelse(di_bh); @@ -330,7 +332,9 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh) if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return 0; + down_read(&OCFS2_I(inode)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh); + up_read(&OCFS2_I(inode)->ip_xattr_sem); if (IS_ERR(acl) || !acl) return PTR_ERR(acl); ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); @@ -361,8 +365,10 @@ int ocfs2_init_acl(handle_t *handle, if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + down_read(&OCFS2_I(dir)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, dir_bh); + up_read(&OCFS2_I(dir)->ip_xattr_sem); if (IS_ERR(acl)) return PTR_ERR(acl); } diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index ab5105f9767e..9a876bb07cac 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et, struct ocfs2_extent_rec *rec); static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et); static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); + +static int ocfs2_reuse_blk_from_dealloc(handle_t *handle, + struct ocfs2_extent_tree *et, + struct buffer_head **new_eb_bh, + int blk_wanted, int *blk_given); +static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et); + static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, @@ -448,6 +455,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, if (!obj) obj = (void *)bh->b_data; et->et_object = obj; + et->et_dealloc = NULL; et->et_ops->eo_fill_root_el(et); if (!et->et_ops->eo_fill_max_leaf_clusters) @@ -1158,7 +1166,7 @@ static int ocfs2_add_branch(handle_t *handle, struct buffer_head **last_eb_bh, struct ocfs2_alloc_context *meta_ac) { - int status, new_blocks, i; + int status, new_blocks, i, block_given = 0; u64 next_blkno, new_last_eb_blk; struct buffer_head *bh; struct buffer_head **new_eb_bhs = NULL; @@ -1213,11 +1221,31 @@ static int ocfs2_add_branch(handle_t *handle, goto bail; } - status = ocfs2_create_new_meta_bhs(handle, et, new_blocks, - meta_ac, new_eb_bhs); - if (status < 0) { - mlog_errno(status); - goto bail; + /* Firstyly, try to reuse dealloc since we have already estimated how + * many extent blocks we may use. + */ + if (!ocfs2_is_dealloc_empty(et)) { + status = ocfs2_reuse_blk_from_dealloc(handle, et, + new_eb_bhs, new_blocks, + &block_given); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + BUG_ON(block_given > new_blocks); + + if (block_given < new_blocks) { + BUG_ON(!meta_ac); + status = ocfs2_create_new_meta_bhs(handle, et, + new_blocks - block_given, + meta_ac, + &new_eb_bhs[block_given]); + if (status < 0) { + mlog_errno(status); + goto bail; + } } /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be @@ -1340,15 +1368,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle, struct ocfs2_alloc_context *meta_ac, struct buffer_head **ret_new_eb_bh) { - int status, i; + int status, i, block_given = 0; u32 new_clusters; struct buffer_head *new_eb_bh = NULL; struct ocfs2_extent_block *eb; struct ocfs2_extent_list *root_el; struct ocfs2_extent_list *eb_el; - status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, - &new_eb_bh); + if (!ocfs2_is_dealloc_empty(et)) { + status = ocfs2_reuse_blk_from_dealloc(handle, et, + &new_eb_bh, 1, + &block_given); + } else if (meta_ac) { + status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, + &new_eb_bh); + + } else { + BUG(); + } + if (status < 0) { mlog_errno(status); goto bail; @@ -1511,7 +1549,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, int depth = le16_to_cpu(el->l_tree_depth); struct buffer_head *bh = NULL; - BUG_ON(meta_ac == NULL); + BUG_ON(meta_ac == NULL && ocfs2_is_dealloc_empty(et)); shift = ocfs2_find_branch_target(et, &bh); if (shift < 0) { @@ -2598,11 +2636,8 @@ static void ocfs2_unlink_subtree(handle_t *handle, int i; struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el; - struct ocfs2_extent_list *el; struct ocfs2_extent_block *eb; - el = path_leaf_el(left_path); - eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data; for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) @@ -3938,7 +3973,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle, struct ocfs2_path *path, struct ocfs2_extent_rec *insert_rec) { - int ret, i, next_free; + int i, next_free; struct buffer_head *bh; struct ocfs2_extent_list *el; struct ocfs2_extent_rec *rec; @@ -3955,7 +3990,6 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle, ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), "Owner %llu has a bad extent list\n", (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); - ret = -EIO; return; } @@ -5057,7 +5091,6 @@ int ocfs2_split_extent(handle_t *handle, struct buffer_head *last_eb_bh = NULL; struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; struct ocfs2_merge_ctxt ctxt; - struct ocfs2_extent_list *rightmost_el; if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < @@ -5093,9 +5126,7 @@ int ocfs2_split_extent(handle_t *handle, } eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - rightmost_el = &eb->h_list; - } else - rightmost_el = path_root_el(path); + } if (rec->e_cpos == split_rec->e_cpos && rec->e_leaf_clusters == split_rec->e_leaf_clusters) @@ -6585,6 +6616,154 @@ ocfs2_find_per_slot_free_list(int type, return fl; } +static struct ocfs2_per_slot_free_list * +ocfs2_find_preferred_free_list(int type, + int preferred_slot, + int *real_slot, + struct ocfs2_cached_dealloc_ctxt *ctxt) +{ + struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; + + while (fl) { + if (fl->f_inode_type == type && fl->f_slot == preferred_slot) { + *real_slot = fl->f_slot; + return fl; + } + + fl = fl->f_next_suballocator; + } + + /* If we can't find any free list matching preferred slot, just use + * the first one. + */ + fl = ctxt->c_first_suballocator; + *real_slot = fl->f_slot; + + return fl; +} + +/* Return Value 1 indicates empty */ +static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et) +{ + struct ocfs2_per_slot_free_list *fl = NULL; + + if (!et->et_dealloc) + return 1; + + fl = et->et_dealloc->c_first_suballocator; + if (!fl) + return 1; + + if (!fl->f_first) + return 1; + + return 0; +} + +/* If extent was deleted from tree due to extent rotation and merging, and + * no metadata is reserved ahead of time. Try to reuse some extents + * just deleted. This is only used to reuse extent blocks. + * It is supposed to find enough extent blocks in dealloc if our estimation + * on metadata is accurate. + */ +static int ocfs2_reuse_blk_from_dealloc(handle_t *handle, + struct ocfs2_extent_tree *et, + struct buffer_head **new_eb_bh, + int blk_wanted, int *blk_given) +{ + int i, status = 0, real_slot; + struct ocfs2_cached_dealloc_ctxt *dealloc; + struct ocfs2_per_slot_free_list *fl; + struct ocfs2_cached_block_free *bf; + struct ocfs2_extent_block *eb; + struct ocfs2_super *osb = + OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); + + *blk_given = 0; + + /* If extent tree doesn't have a dealloc, this is not faulty. Just + * tell upper caller dealloc can't provide any block and it should + * ask for alloc to claim more space. + */ + dealloc = et->et_dealloc; + if (!dealloc) + goto bail; + + for (i = 0; i < blk_wanted; i++) { + /* Prefer to use local slot */ + fl = ocfs2_find_preferred_free_list(EXTENT_ALLOC_SYSTEM_INODE, + osb->slot_num, &real_slot, + dealloc); + /* If no more block can be reused, we should claim more + * from alloc. Just return here normally. + */ + if (!fl) { + status = 0; + break; + } + + bf = fl->f_first; + fl->f_first = bf->free_next; + + new_eb_bh[i] = sb_getblk(osb->sb, bf->free_blk); + if (new_eb_bh[i] == NULL) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + mlog(0, "Reusing block(%llu) from " + "dealloc(local slot:%d, real slot:%d)\n", + bf->free_blk, osb->slot_num, real_slot); + + ocfs2_set_new_buffer_uptodate(et->et_ci, new_eb_bh[i]); + + status = ocfs2_journal_access_eb(handle, et->et_ci, + new_eb_bh[i], + OCFS2_JOURNAL_ACCESS_CREATE); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + memset(new_eb_bh[i]->b_data, 0, osb->sb->s_blocksize); + eb = (struct ocfs2_extent_block *) new_eb_bh[i]->b_data; + + /* We can't guarantee that buffer head is still cached, so + * polutlate the extent block again. + */ + strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); + eb->h_blkno = cpu_to_le64(bf->free_blk); + eb->h_fs_generation = cpu_to_le32(osb->fs_generation); + eb->h_suballoc_slot = cpu_to_le16(real_slot); + eb->h_suballoc_loc = cpu_to_le64(bf->free_bg); + eb->h_suballoc_bit = cpu_to_le16(bf->free_bit); + eb->h_list.l_count = + cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); + + /* We'll also be dirtied by the caller, so + * this isn't absolutely necessary. + */ + ocfs2_journal_dirty(handle, new_eb_bh[i]); + + if (!fl->f_first) { + dealloc->c_first_suballocator = fl->f_next_suballocator; + kfree(fl); + } + kfree(bf); + } + + *blk_given = i; + +bail: + if (unlikely(status < 0)) { + for (i = 0; i < blk_wanted; i++) + brelse(new_eb_bh[i]); + } + + return status; +} + int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, int type, int slot, u64 suballoc, u64 blkno, unsigned int bit) @@ -7382,6 +7561,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) struct buffer_head *gd_bh = NULL; struct ocfs2_dinode *main_bm; struct ocfs2_group_desc *gd = NULL; + struct ocfs2_trim_fs_info info, *pinfo = NULL; start = range->start >> osb->s_clustersize_bits; len = range->len >> osb->s_clustersize_bits; @@ -7419,6 +7599,42 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) trace_ocfs2_trim_fs(start, len, minlen); + ocfs2_trim_fs_lock_res_init(osb); + ret = ocfs2_trim_fs_lock(osb, NULL, 1); + if (ret < 0) { + if (ret != -EAGAIN) { + mlog_errno(ret); + ocfs2_trim_fs_lock_res_uninit(osb); + goto out_unlock; + } + + mlog(ML_NOTICE, "Wait for trim on device (%s) to " + "finish, which is running from another node.\n", + osb->dev_str); + ret = ocfs2_trim_fs_lock(osb, &info, 0); + if (ret < 0) { + mlog_errno(ret); + ocfs2_trim_fs_lock_res_uninit(osb); + goto out_unlock; + } + + if (info.tf_valid && info.tf_success && + info.tf_start == start && info.tf_len == len && + info.tf_minlen == minlen) { + /* Avoid sending duplicated trim to a shared device */ + mlog(ML_NOTICE, "The same trim on device (%s) was " + "just done from node (%u), return.\n", + osb->dev_str, info.tf_nodenum); + range->len = info.tf_trimlen; + goto out_trimunlock; + } + } + + info.tf_nodenum = osb->node_num; + info.tf_start = start; + info.tf_len = len; + info.tf_minlen = minlen; + /* Determine first and last group to examine based on start and len */ first_group = ocfs2_which_cluster_group(main_bm_inode, start); if (first_group == osb->first_cluster_group_blkno) @@ -7463,6 +7679,13 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); } range->len = trimmed * sb->s_blocksize; + + info.tf_trimlen = range->len; + info.tf_success = (ret ? 0 : 1); + pinfo = &info; +out_trimunlock: + ocfs2_trim_fs_unlock(osb, pinfo); + ocfs2_trim_fs_lock_res_uninit(osb); out_unlock: ocfs2_inode_unlock(main_bm_inode, 0); brelse(main_bm_bh); diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 27b75cf32cfa..250bcacdf9e9 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -61,6 +61,7 @@ struct ocfs2_extent_tree { ocfs2_journal_access_func et_root_journal_access; void *et_object; unsigned int et_max_leaf_clusters; + struct ocfs2_cached_dealloc_ctxt *et_dealloc; }; /* diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d1516327b787..e8e205bf2e41 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -797,6 +797,7 @@ struct ocfs2_write_ctxt { struct ocfs2_cached_dealloc_ctxt w_dealloc; struct list_head w_unwritten_list; + unsigned int w_unwritten_count; }; void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) @@ -1386,6 +1387,7 @@ retry: desc->c_clear_unwritten = 0; list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list); list_add_tail(&new->ue_node, &wc->w_unwritten_list); + wc->w_unwritten_count++; new = NULL; unlock: spin_unlock(&oi->ip_lock); @@ -2256,7 +2258,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, ue->ue_phys = desc->c_phys; list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list); - dwc->dw_zero_count++; + dwc->dw_zero_count += wc->w_unwritten_count; } ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); @@ -2330,6 +2332,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode, ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); + /* Attach dealloc with extent tree in case that we may reuse extents + * which are already unlinked from current extent tree due to extent + * rotation and merging. + */ + et.et_dealloc = &dealloc; + ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2, &data_ac, &meta_ac); if (ret) { diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 62e8ec619b4c..af2e7473956e 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -314,12 +314,13 @@ void o2quo_conn_err(u8 node) node, qs->qs_connected); clear_bit(node, qs->qs_conn_bm); + + if (test_bit(node, qs->qs_hb_bm)) + o2quo_set_hold(qs, node); } mlog(0, "node %u, %d total\n", node, qs->qs_connected); - if (test_bit(node, qs->qs_hb_bm)) - o2quo_set_hold(qs, node); spin_unlock(&qs->qs_lock); } diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index b95e7df5b76a..0276f7f8d5e6 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -196,7 +196,7 @@ struct o2net_msg_handler { u32 nh_msg_type; u32 nh_key; o2net_msg_handler_func *nh_func; - o2net_msg_handler_func *nh_func_data; + void *nh_func_data; o2net_post_msg_handler_func *nh_post_func; struct kref nh_kref; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 32f9c72dff17..977763d4c27d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1776,7 +1776,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (inode_cmp_iversion(inode, *f_version)) { + if (!inode_eq_iversion(inode, *f_version)) { for (i = 0; i < i_size_read(inode) && i < offset; ) { de = (struct ocfs2_dir_entry *) (data->id_data + i); @@ -1870,7 +1870,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (inode_cmp_iversion(inode, *f_version)) { + if (!inode_eq_iversion(inode, *f_version)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ocfs2_dir_entry *) (bh->b_data + i); /* It's too expensive to do a full @@ -1958,7 +1958,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); - error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level); + error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1); if (lock_level && error >= 0) { /* We release EX lock which used to update atime * and get PR lock again to reduce contention diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9c3e0f13ca87..a7df226f9449 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1122,13 +1122,6 @@ recheck: /* sleep if we haven't finished voting yet */ if (sleep) { unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS); - - /* - if (kref_read(&mle->mle_refs) < 2) - mlog(ML_ERROR, "mle (%p) refs=%d, name=%.*s\n", mle, - kref_read(&mle->mle_refs), - res->lockname.len, res->lockname.name); - */ atomic_set(&mle->woken, 0); (void)wait_event_timeout(mle->wq, (atomic_read(&mle->woken) == 1), diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4689940a953c..9479f99c2145 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -259,6 +259,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { .flags = 0, }; +static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { + .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, +}; + static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; @@ -676,6 +680,24 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, &ocfs2_nfs_sync_lops, osb); } +void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb) +{ + struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; + + ocfs2_lock_res_init_once(lockres); + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, lockres->l_name); + ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_TRIM_FS, + &ocfs2_trim_fs_lops, osb); +} + +void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb) +{ + struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; + + ocfs2_simple_drop_lockres(osb, lockres); + ocfs2_lock_res_free(lockres); +} + static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, struct ocfs2_super *osb) { @@ -1742,6 +1764,27 @@ int ocfs2_rw_lock(struct inode *inode, int write) return status; } +int ocfs2_try_rw_lock(struct inode *inode, int write) +{ + int status, level; + struct ocfs2_lock_res *lockres; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + mlog(0, "inode %llu try to take %s RW lock\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + write ? "EXMODE" : "PRMODE"); + + if (ocfs2_mount_local(osb)) + return 0; + + lockres = &OCFS2_I(inode)->ip_rw_lockres; + + level = write ? DLM_LOCK_EX : DLM_LOCK_PR; + + status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0); + return status; +} + void ocfs2_rw_unlock(struct inode *inode, int write) { int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; @@ -2486,6 +2529,15 @@ int ocfs2_inode_lock_with_page(struct inode *inode, ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); if (ret == -EAGAIN) { unlock_page(page); + /* + * If we can't get inode lock immediately, we should not return + * directly here, since this will lead to a softlockup problem. + * The method is to get a blocking lock and immediately unlock + * before returning, this can avoid CPU resource waste due to + * lots of retries, and benefits fairness in getting lock. + */ + if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) + ocfs2_inode_unlock(inode, ex); ret = AOP_TRUNCATED_PAGE; } @@ -2494,13 +2546,18 @@ int ocfs2_inode_lock_with_page(struct inode *inode, int ocfs2_inode_lock_atime(struct inode *inode, struct vfsmount *vfsmnt, - int *level) + int *level, int wait) { int ret; - ret = ocfs2_inode_lock(inode, NULL, 0); + if (wait) + ret = ocfs2_inode_lock(inode, NULL, 0); + else + ret = ocfs2_try_inode_lock(inode, NULL, 0); + if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); return ret; } @@ -2512,9 +2569,14 @@ int ocfs2_inode_lock_atime(struct inode *inode, struct buffer_head *bh = NULL; ocfs2_inode_unlock(inode, 0); - ret = ocfs2_inode_lock(inode, &bh, 1); + if (wait) + ret = ocfs2_inode_lock(inode, &bh, 1); + else + ret = ocfs2_try_inode_lock(inode, &bh, 1); + if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); return ret; } *level = 1; @@ -2745,6 +2807,70 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) ex ? LKM_EXMODE : LKM_PRMODE); } +int ocfs2_trim_fs_lock(struct ocfs2_super *osb, + struct ocfs2_trim_fs_info *info, int trylock) +{ + int status; + struct ocfs2_trim_fs_lvb *lvb; + struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; + + if (info) + info->tf_valid = 0; + + if (ocfs2_is_hard_readonly(osb)) + return -EROFS; + + if (ocfs2_mount_local(osb)) + return 0; + + status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, + trylock ? DLM_LKF_NOQUEUE : 0, 0); + if (status < 0) { + if (status != -EAGAIN) + mlog_errno(status); + return status; + } + + if (info) { + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && + lvb->lvb_version == OCFS2_TRIMFS_LVB_VERSION) { + info->tf_valid = 1; + info->tf_success = lvb->lvb_success; + info->tf_nodenum = be32_to_cpu(lvb->lvb_nodenum); + info->tf_start = be64_to_cpu(lvb->lvb_start); + info->tf_len = be64_to_cpu(lvb->lvb_len); + info->tf_minlen = be64_to_cpu(lvb->lvb_minlen); + info->tf_trimlen = be64_to_cpu(lvb->lvb_trimlen); + } + } + + return status; +} + +void ocfs2_trim_fs_unlock(struct ocfs2_super *osb, + struct ocfs2_trim_fs_info *info) +{ + struct ocfs2_trim_fs_lvb *lvb; + struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; + + if (ocfs2_mount_local(osb)) + return; + + if (info) { + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_TRIMFS_LVB_VERSION; + lvb->lvb_success = info->tf_success; + lvb->lvb_nodenum = cpu_to_be32(info->tf_nodenum); + lvb->lvb_start = cpu_to_be64(info->tf_start); + lvb->lvb_len = cpu_to_be64(info->tf_len); + lvb->lvb_minlen = cpu_to_be64(info->tf_minlen); + lvb->lvb_trimlen = cpu_to_be64(info->tf_trimlen); + } + + ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); +} + int ocfs2_dentry_lock(struct dentry *dentry, int ex) { int ret; diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index a7fc18ba0dc1..256e0a9067b8 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -70,6 +70,29 @@ struct ocfs2_orphan_scan_lvb { __be32 lvb_os_seqno; }; +#define OCFS2_TRIMFS_LVB_VERSION 1 + +struct ocfs2_trim_fs_lvb { + __u8 lvb_version; + __u8 lvb_success; + __u8 lvb_reserved[2]; + __be32 lvb_nodenum; + __be64 lvb_start; + __be64 lvb_len; + __be64 lvb_minlen; + __be64 lvb_trimlen; +}; + +struct ocfs2_trim_fs_info { + u8 tf_valid; /* lvb is valid, or not */ + u8 tf_success; /* trim is successful, or not */ + u32 tf_nodenum; /* osb node number */ + u64 tf_start; /* trim start offset in clusters */ + u64 tf_len; /* trim end offset in clusters */ + u64 tf_minlen; /* trim minimum contiguous free clusters */ + u64 tf_trimlen; /* trimmed length in bytes */ +}; + struct ocfs2_lock_holder { struct list_head oh_list; struct pid *oh_owner_pid; @@ -116,13 +139,14 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res); int ocfs2_create_new_inode_locks(struct inode *inode); int ocfs2_drop_inode_locks(struct inode *inode); int ocfs2_rw_lock(struct inode *inode, int write); +int ocfs2_try_rw_lock(struct inode *inode, int write); void ocfs2_rw_unlock(struct inode *inode, int write); int ocfs2_open_lock(struct inode *inode); int ocfs2_try_open_lock(struct inode *inode, int write); void ocfs2_open_unlock(struct inode *inode); int ocfs2_inode_lock_atime(struct inode *inode, struct vfsmount *vfsmnt, - int *level); + int *level, int wait); int ocfs2_inode_lock_full_nested(struct inode *inode, struct buffer_head **ret_bh, int ex, @@ -140,6 +164,9 @@ int ocfs2_inode_lock_with_page(struct inode *inode, /* 99% of the time we don't want to supply any additional flags -- * those are for very specific cases only. */ #define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL) +#define ocfs2_try_inode_lock(i, b, e)\ + ocfs2_inode_lock_full_nested(i, b, e, OCFS2_META_LOCK_NOQUEUE,\ + OI_LS_NORMAL) void ocfs2_inode_unlock(struct inode *inode, int ex); int ocfs2_super_lock(struct ocfs2_super *osb, @@ -153,6 +180,12 @@ int ocfs2_rename_lock(struct ocfs2_super *osb); void ocfs2_rename_unlock(struct ocfs2_super *osb); int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex); +void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb); +void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb); +int ocfs2_trim_fs_lock(struct ocfs2_super *osb, + struct ocfs2_trim_fs_info *info, int trylock); +void ocfs2_trim_fs_unlock(struct ocfs2_super *osb, + struct ocfs2_trim_fs_info *info); int ocfs2_dentry_lock(struct dentry *dentry, int ex); void ocfs2_dentry_unlock(struct dentry *dentry, int ex); int ocfs2_file_lock(struct file *file, int ex, int trylock); diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index e4719e0a3f99..06cb96462bf9 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -38,6 +38,7 @@ #include "inode.h" #include "super.h" #include "symlink.h" +#include "aops.h" #include "ocfs2_trace.h" #include "buffer_head_io.h" @@ -832,6 +833,50 @@ out: return ret; } +/* Is IO overwriting allocated blocks? */ +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, + u64 map_start, u64 map_len) +{ + int ret = 0, is_last; + u32 mapping_end, cpos; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_extent_rec rec; + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len)) + return ret; + else + return -EAGAIN; + } + + cpos = map_start >> osb->s_clustersize_bits; + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, + map_start + map_len); + is_last = 0; + while (cpos < mapping_end && !is_last) { + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, + NULL, &rec, &is_last); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (rec.e_blkno == 0ULL) + break; + + if (rec.e_flags & OCFS2_EXT_REFCOUNTED) + break; + + cpos = le32_to_cpu(rec.e_cpos) + + le16_to_cpu(rec.e_leaf_clusters); + } + + if (cpos < mapping_end) + ret = -EAGAIN; +out: + return ret; +} + int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) { struct inode *inode = file->f_mapping->host; diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index 67ea57d2fd59..1057586ec19f 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h @@ -53,6 +53,9 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 map_start, u64 map_len); +int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, + u64 map_start, u64 map_len); + int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin); int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a1d051055472..5d1784a365a3 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) spin_unlock(&oi->ip_lock); } + file->f_mode |= FMODE_NOWAIT; + leave: return status; } @@ -2132,12 +2134,12 @@ out: } static int ocfs2_prepare_inode_for_write(struct file *file, - loff_t pos, - size_t count) + loff_t pos, size_t count, int wait) { - int ret = 0, meta_level = 0; + int ret = 0, meta_level = 0, overwrite_io = 0; struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); + struct buffer_head *di_bh = NULL; loff_t end; /* @@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * if we need to make modifications here. */ for(;;) { - ret = ocfs2_inode_lock(inode, NULL, meta_level); + if (wait) + ret = ocfs2_inode_lock(inode, NULL, meta_level); + else + ret = ocfs2_try_inode_lock(inode, + overwrite_io ? NULL : &di_bh, meta_level); if (ret < 0) { meta_level = -1; - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); goto out; } + /* + * Check if IO will overwrite allocated blocks in case + * IOCB_NOWAIT flag is set. + */ + if (!wait && !overwrite_io) { + overwrite_io = 1; + if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = ocfs2_overwrite_io(inode, di_bh, pos, count); + brelse(di_bh); + di_bh = NULL; + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (ret < 0) { + if (ret != -EAGAIN) + mlog_errno(ret); + goto out_unlock; + } + } + /* Clear suid / sgid if necessary. We do this here * instead of later in the write path because * remove_suid() calls ->setattr without any hint that @@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, out_unlock: trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, - pos, count); + pos, count, wait); + + brelse(di_bh); if (meta_level >= 0) ocfs2_inode_unlock(inode, meta_level); @@ -2211,7 +2242,7 @@ out: static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { - int direct_io, rw_level; + int rw_level; ssize_t written = 0; ssize_t ret; size_t count = iov_iter_count(from); @@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, void *saved_ki_complete = NULL; int append_write = ((iocb->ki_pos + count) >= i_size_read(inode) ? 1 : 0); + int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2230,12 +2263,17 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, file->f_path.dentry->d_name.name, (unsigned int)from->nr_segs); /* GRRRRR */ + if (!direct_io && nowait) + return -EOPNOTSUPP; + if (count == 0) return 0; - direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; - - inode_lock(inode); + if (nowait) { + if (!inode_trylock(inode)) + return -EAGAIN; + } else + inode_lock(inode); /* * Concurrent O_DIRECT writes are allowed with @@ -2244,9 +2282,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, */ rw_level = (!direct_io || full_coherency || append_write); - ret = ocfs2_rw_lock(inode, rw_level); + if (nowait) + ret = ocfs2_try_rw_lock(inode, rw_level); + else + ret = ocfs2_rw_lock(inode, rw_level); if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); goto out_mutex; } @@ -2260,9 +2302,13 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, * other nodes to drop their caches. Buffered I/O * already does this in write_begin(). */ - ret = ocfs2_inode_lock(inode, NULL, 1); + if (nowait) + ret = ocfs2_try_inode_lock(inode, NULL, 1); + else + ret = ocfs2_inode_lock(inode, NULL, 1); if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); goto out; } @@ -2277,9 +2323,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, } count = ret; - ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count); + ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait); if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); goto out; } @@ -2355,6 +2402,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, int ret = 0, rw_level = -1, lock_level = 0; struct file *filp = iocb->ki_filp; struct inode *inode = file_inode(filp); + int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; + int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0; trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2369,14 +2418,22 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, goto bail; } + if (!direct_io && nowait) + return -EOPNOTSUPP; + /* * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. */ - if (iocb->ki_flags & IOCB_DIRECT) { - ret = ocfs2_rw_lock(inode, 0); + if (direct_io) { + if (nowait) + ret = ocfs2_try_rw_lock(inode, 0); + else + ret = ocfs2_rw_lock(inode, 0); + if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); goto bail; } rw_level = 0; @@ -2393,9 +2450,11 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, * like i_size. This allows the checks down below * generic_file_aio_read() a chance of actually working. */ - ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); + ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level, + !nowait); if (ret < 0) { - mlog_errno(ret); + if (ret != -EAGAIN) + mlog_errno(ret); goto bail; } ocfs2_inode_unlock(inode, lock_level); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 36304434eacf..e5dcea6cee5f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -666,23 +666,24 @@ static int __ocfs2_journal_access(handle_t *handle, /* we can safely remove this assertion after testing. */ if (!buffer_uptodate(bh)) { mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n"); - mlog(ML_ERROR, "b_blocknr=%llu\n", - (unsigned long long)bh->b_blocknr); + mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n", + (unsigned long long)bh->b_blocknr, bh->b_state); lock_buffer(bh); /* - * A previous attempt to write this buffer head failed. - * Nothing we can do but to retry the write and hope for - * the best. + * A previous transaction with a couple of buffer heads fail + * to checkpoint, so all the bhs are marked as BH_Write_EIO. + * For current transaction, the bh is just among those error + * bhs which previous transaction handle. We can't just clear + * its BH_Write_EIO and reuse directly, since other bhs are + * not written to disk yet and that will cause metadata + * inconsistency. So we should set fs read-only to avoid + * further damage. */ if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) { - clear_buffer_write_io_error(bh); - set_buffer_uptodate(bh); - } - - if (!buffer_uptodate(bh)) { unlock_buffer(bh); - return -EIO; + return ocfs2_error(osb->sb, "A previous attempt to " + "write this buffer head failed\n"); } unlock_buffer(bh); } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 098f5c712569..fb9a20e3d608 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) int ret = 0, lock_level = 0; ret = ocfs2_inode_lock_atime(file_inode(file), - file->f_path.mnt, &lock_level); + file->f_path.mnt, &lock_level, 1); if (ret < 0) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9a50f222ac97..6867eef2e06b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -404,6 +404,7 @@ struct ocfs2_super struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_rename_lockres; struct ocfs2_lock_res osb_nfs_sync_lockres; + struct ocfs2_lock_res osb_trim_fs_lockres; struct ocfs2_dlm_debug *osb_dlm_debug; struct dentry *osb_debug_root; diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index d277aabf5dfb..7051b994c776 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -50,6 +50,7 @@ enum ocfs2_lock_type { OCFS2_LOCK_TYPE_NFS_SYNC, OCFS2_LOCK_TYPE_ORPHAN_SCAN, OCFS2_LOCK_TYPE_REFCOUNT, + OCFS2_LOCK_TYPE_TRIM_FS, OCFS2_NUM_LOCK_TYPES }; @@ -93,6 +94,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) case OCFS2_LOCK_TYPE_REFCOUNT: c = 'T'; break; + case OCFS2_LOCK_TYPE_TRIM_FS: + c = 'I'; + break; default: c = '\0'; } @@ -115,6 +119,7 @@ static char *ocfs2_lock_type_strings[] = { [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", [OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount", + [OCFS2_LOCK_TYPE_TRIM_FS] = "TrimFs", }; static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index a0b5d00ef0a9..e2a11aaece10 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -1449,20 +1449,22 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range); TRACE_EVENT(ocfs2_prepare_inode_for_write, TP_PROTO(unsigned long long ino, unsigned long long saved_pos, - unsigned long count), - TP_ARGS(ino, saved_pos, count), + unsigned long count, int wait), + TP_ARGS(ino, saved_pos, count, wait), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, saved_pos) __field(unsigned long, count) + __field(int, wait) ), TP_fast_assign( __entry->ino = ino; __entry->saved_pos = saved_pos; __entry->count = count; + __entry->wait = wait; ), - TP_printk("%llu %llu %lu", __entry->ino, - __entry->saved_pos, __entry->count) + TP_printk("%llu %llu %lu %d", __entry->ino, + __entry->saved_pos, __entry->count, __entry->wait) ); DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 9f0b95abc09f..d8f5f6ce99dc 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2438,6 +2438,8 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, } le16_add_cpu(&bg->bg_free_bits_count, num_bits); if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { + if (undo_fn) + jbd_unlock_bh_state(group_bh); return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n", (unsigned long long)le64_to_cpu(bg->bg_blkno), le16_to_cpu(bg->bg_bits), @@ -2563,16 +2565,16 @@ static int _ocfs2_free_clusters(handle_t *handle, int status; u16 bg_start_bit; u64 bg_blkno; - struct ocfs2_dinode *fe; /* You can't ever have a contiguous set of clusters * bigger than a block group bitmap so we never have to worry * about looping on them. * This is expensive. We can safely remove once this stuff has * gotten tested really well. */ - BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); + BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, + ocfs2_blocks_to_clusters(bitmap_inode->i_sb, + start_blk))); - fe = (struct ocfs2_dinode *) bitmap_bh->b_data; ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, &bg_start_bit); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 80efa5699fb0..ffa4952d432b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -474,9 +474,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); - status = -EINVAL; + status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL; mlog_errno(status); - /* FIXME: Should ERROR_RO_FS */ mlog(ML_ERROR, "Unable to load system inode %d, " "possibly corrupt fs?", i); goto bail; @@ -505,7 +504,7 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); - status = -EINVAL; + status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL; mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n", status, i, osb->slot_num); goto bail; @@ -1208,14 +1207,15 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) read_super_error: brelse(bh); + if (status) + mlog_errno(status); + if (osb) { atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); ocfs2_dismount_volume(sb, 1); } - if (status) - mlog_errno(status); return status; } @@ -1843,6 +1843,9 @@ static int ocfs2_mount_volume(struct super_block *sb) status = ocfs2_dlm_init(osb); if (status < 0) { mlog_errno(status); + if (status == -EBADR && ocfs2_userspace_stack(osb)) + mlog(ML_ERROR, "couldn't mount because cluster name on" + " disk does not match the running cluster name.\n"); goto leave; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index c5898c59d411..c261c1dfd374 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -638,14 +638,17 @@ int ocfs2_calc_xattr_init(struct inode *dir, si->value_len); if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + down_read(&OCFS2_I(dir)->ip_xattr_sem); acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, "", NULL, 0); + up_read(&OCFS2_I(dir)->ip_xattr_sem); if (acl_len > 0) { a_size = ocfs2_xattr_entry_real_size(0, acl_len); if (S_ISDIR(mode)) a_size <<= 1; } else if (acl_len != 0 && acl_len != -ENODATA) { + ret = acl_len; mlog_errno(ret); return ret; } @@ -6415,7 +6418,7 @@ static int ocfs2_reflink_xattr_header(handle_t *handle, * and then insert the extents one by one. */ if (xv->xr_list.l_tree_depth) { - memcpy(new_xv, &def_xv, sizeof(def_xv)); + memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE); vb->vb_xv = new_xv; vb->vb_bh = value_bh; ocfs2_init_xattr_value_extent_tree(&data_et, diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index ae782df5c063..fe484cf93e5c 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -33,7 +33,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", @@ -118,8 +118,12 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) return 0; /* We do not need to continue with negative dentries. */ - if (!dentry->d_inode) - goto out; + if (!dentry->d_inode) { + gossip_debug(GOSSIP_DCACHE_DEBUG, + "%s: negative dentry or positive dentry and inode valid.\n", + __func__); + return 1; + } /* Now we must perform a getattr to validate the inode contents. */ @@ -129,14 +133,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) __FILE__, __func__, __LINE__); return 0; } - if (ret == 0) - return 0; - -out: - gossip_debug(GOSSIP_DCACHE_DEBUG, - "%s: negative dentry or positive dentry and inode valid.\n", - __func__); - return 1; + return !ret; } const struct dentry_operations orangefs_dentry_operations = { diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index c98bba2dbc94..6e3134e6d98a 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -41,7 +41,7 @@ static int orangefs_create(struct inode *dir, ORANGEFS_TYPE_METAFILE, mode); strncpy(new_op->upcall.req.create.d_name, - dentry->d_name.name, ORANGEFS_NAME_MAX); + dentry->d_name.name, ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -142,7 +142,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); gossip_debug(GOSSIP_NAME_DEBUG, "%s: doing lookup on %s under %pU,%d\n", @@ -244,7 +244,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) new_op->upcall.req.remove.parent_refn = parent->refn; strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, "orangefs_unlink", get_interruptible_flag(inode)); @@ -300,8 +300,8 @@ static int orangefs_symlink(struct inode *dir, strncpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name, - ORANGEFS_NAME_MAX); - strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); + strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -372,7 +372,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode ORANGEFS_TYPE_DIRECTORY, mode); strncpy(new_op->upcall.req.mkdir.d_name, - dentry->d_name.name, ORANGEFS_NAME_MAX); + dentry->d_name.name, ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -453,10 +453,10 @@ static int orangefs_rename(struct inode *old_dir, strncpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); strncpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name, - ORANGEFS_NAME_MAX); + ORANGEFS_NAME_MAX - 1); ret = service_operation(new_op, "orangefs_rename", diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1c59dff530de..6e35f2f3c897 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -328,7 +328,7 @@ static int help_show(struct seq_file *m, void *v) /* * initialize the client-debug file. */ -int orangefs_client_debug_init(void) +static int orangefs_client_debug_init(void) { int rc = -ENOMEM; @@ -1056,7 +1056,7 @@ int orangefs_debugfs_new_debug(void __user *arg) client_debug_string, llu(mask_info.mask_value)); } else { - gossip_lerr("Invalid mask type....\n"); + gossip_err("Invalid mask type....\n"); return -EINVAL; } diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h index b5fd9cd4960f..51147f9ce3d6 100644 --- a/fs/orangefs/orangefs-debugfs.h +++ b/fs/orangefs/orangefs-debugfs.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ int orangefs_debugfs_init(int); void orangefs_debugfs_cleanup(void); -int orangefs_client_debug_init(void); int orangefs_prepare_debugfs_help_string(int); int orangefs_debugfs_new_client_mask(void __user *); int orangefs_debugfs_new_client_string(void __user *); diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 2595453fe737..eebbaece85ef 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -56,11 +56,7 @@ #include "orangefs-dev-proto.h" -#ifdef ORANGEFS_KERNEL_DEBUG -#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 10 -#else #define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 20 -#endif #define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS 30 @@ -104,11 +100,11 @@ enum orangefs_vfs_op_states { * orangefs kernel memory related flags */ -#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) +#if (defined CONFIG_DEBUG_SLAB) #define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE #else #define ORANGEFS_CACHE_CREATE_FLAGS 0 -#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */ +#endif extern int orangefs_init_acl(struct inode *inode, struct inode *dir); extern const struct xattr_handler *orangefs_xattr_handlers[]; @@ -471,8 +467,6 @@ int orangefs_inode_check_changed(struct inode *inode); int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr); -void orangefs_make_bad_inode(struct inode *inode); - int orangefs_unmount_sb(struct super_block *sb); bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 97fe93129f38..ea6256d136d1 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -230,25 +230,42 @@ static int orangefs_inode_type(enum orangefs_ds_type objtype) return -1; } -static int orangefs_inode_is_stale(struct inode *inode, int new, +static void orangefs_make_bad_inode(struct inode *inode) +{ + if (is_root_handle(inode)) { + /* + * if this occurs, the pvfs2-client-core was killed but we + * can't afford to lose the inode operations and such + * associated with the root handle in any case. + */ + gossip_debug(GOSSIP_UTILS_DEBUG, + "*** NOT making bad root inode %pU\n", + get_khandle_from_ino(inode)); + } else { + gossip_debug(GOSSIP_UTILS_DEBUG, + "*** making bad inode %pU\n", + get_khandle_from_ino(inode)); + make_bad_inode(inode); + } +} + +static int orangefs_inode_is_stale(struct inode *inode, struct ORANGEFS_sys_attr_s *attrs, char *link_target) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); int type = orangefs_inode_type(attrs->objtype); - if (!new) { - /* - * If the inode type or symlink target have changed then this - * inode is stale. - */ - if (type == -1 || !(inode->i_mode & type)) { - orangefs_make_bad_inode(inode); - return 1; - } - if (type == S_IFLNK && strncmp(orangefs_inode->link_target, - link_target, ORANGEFS_NAME_MAX)) { - orangefs_make_bad_inode(inode); - return 1; - } + /* + * If the inode type or symlink target have changed then this + * inode is stale. + */ + if (type == -1 || !(inode->i_mode & type)) { + orangefs_make_bad_inode(inode); + return 1; + } + if (type == S_IFLNK && strncmp(orangefs_inode->link_target, + link_target, ORANGEFS_NAME_MAX)) { + orangefs_make_bad_inode(inode); + return 1; } return 0; } @@ -294,16 +311,18 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, if (ret != 0) goto out; - type = orangefs_inode_type(new_op-> - downcall.resp.getattr.attributes.objtype); - ret = orangefs_inode_is_stale(inode, new, - &new_op->downcall.resp.getattr.attributes, - new_op->downcall.resp.getattr.link_target); - if (ret) { - ret = -ESTALE; - goto out; + if (!new) { + ret = orangefs_inode_is_stale(inode, + &new_op->downcall.resp.getattr.attributes, + new_op->downcall.resp.getattr.link_target); + if (ret) { + ret = -ESTALE; + goto out; + } } + type = orangefs_inode_type(new_op-> + downcall.resp.getattr.attributes.objtype); switch (type) { case S_IFREG: inode->i_flags = orangefs_inode_flags(&new_op-> @@ -348,6 +367,12 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass, inode->i_link = orangefs_inode->link_target; } break; + /* i.e. -1 */ + default: + /* XXX: ESTALE? This is what is done if it is not new. */ + orangefs_make_bad_inode(inode); + ret = -ESTALE; + goto out; } inode->i_uid = make_kuid(&init_user_ns, new_op-> @@ -401,7 +426,7 @@ int orangefs_inode_check_changed(struct inode *inode) if (ret != 0) goto out; - ret = orangefs_inode_is_stale(inode, 0, + ret = orangefs_inode_is_stale(inode, &new_op->downcall.resp.getattr.attributes, new_op->downcall.resp.getattr.link_target); out: @@ -444,25 +469,6 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) return ret; } -void orangefs_make_bad_inode(struct inode *inode) -{ - if (is_root_handle(inode)) { - /* - * if this occurs, the pvfs2-client-core was killed but we - * can't afford to lose the inode operations and such - * associated with the root handle in any case. - */ - gossip_debug(GOSSIP_UTILS_DEBUG, - "*** NOT making bad root inode %pU\n", - get_khandle_from_ino(inode)); - } else { - gossip_debug(GOSSIP_UTILS_DEBUG, - "*** making bad inode %pU\n", - get_khandle_from_ino(inode)); - make_bad_inode(inode); - } -} - /* * The following is a very dirty hack that is now a permanent part of the * ORANGEFS protocol. See protocol.h for more error definitions. @@ -537,6 +543,7 @@ int orangefs_normalize_to_errno(__s32 error_code) */ } else { gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n"); + error_code = -EINVAL; } return error_code; } diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index e0bf5e4dce0d..dc6e3e6269c3 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -395,13 +395,6 @@ struct ORANGEFS_dev_map_desc { /* gossip.h *****************************************************************/ -#ifdef GOSSIP_DISABLE_DEBUG -#define gossip_debug(mask, fmt, ...) \ -do { \ - if (0) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ -} while (0) -#else extern __u64 orangefs_gossip_debug_mask; /* try to avoid function call overhead by checking masks in macro */ @@ -410,13 +403,5 @@ do { \ if (orangefs_gossip_debug_mask & (mask)) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) -#endif /* GOSSIP_DISABLE_DEBUG */ - -/* do file and line number printouts w/ the GNU preprocessor */ -#define gossip_ldebug(mask, fmt, ...) \ - gossip_debug(mask, "%s: " fmt, __func__, ##__VA_ARGS__) #define gossip_err pr_err -#define gossip_lerr(fmt, ...) \ - gossip_err("%s line %d: " fmt, \ - __FILE__, __LINE__, ##__VA_ARGS__) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 36f1390b5ed7..3ae5fdba0225 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -335,7 +335,7 @@ static int orangefs_encode_fh(struct inode *inode, struct orangefs_object_kref refn; if (*max_len < len) { - gossip_lerr("fh buffer is too small for encoding\n"); + gossip_err("fh buffer is too small for encoding\n"); *max_len = len; type = 255; goto out; @@ -383,7 +383,7 @@ static int orangefs_unmount(int id, __s32 fs_id, const char *devname) op->upcall.req.fs_umount.id = id; op->upcall.req.fs_umount.fs_id = fs_id; strncpy(op->upcall.req.fs_umount.orangefs_config_server, - devname, ORANGEFS_MAX_SERVER_ADDR_LEN); + devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1); r = service_operation(op, "orangefs_fs_umount", 0); /* Not much to do about an error here. */ if (r) @@ -478,7 +478,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, devname, - ORANGEFS_MAX_SERVER_ADDR_LEN); + ORANGEFS_MAX_SERVER_ADDR_LEN - 1); gossip_debug(GOSSIP_SUPER_DEBUG, "Attempting ORANGEFS Mount via host %s\n", @@ -520,7 +520,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, */ strncpy(ORANGEFS_SB(sb)->devname, devname, - ORANGEFS_MAX_SERVER_ADDR_LEN); + ORANGEFS_MAX_SERVER_ADDR_LEN - 1); /* mount_pending must be cleared */ ORANGEFS_SB(sb)->mount_pending = 0; @@ -610,11 +610,16 @@ void orangefs_kill_sb(struct super_block *sb) int orangefs_inode_cache_initialize(void) { - orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache", - sizeof(struct orangefs_inode_s), - 0, - ORANGEFS_CACHE_CREATE_FLAGS, - orangefs_inode_cache_ctor); + orangefs_inode_cache = kmem_cache_create_usercopy( + "orangefs_inode_cache", + sizeof(struct orangefs_inode_s), + 0, + ORANGEFS_CACHE_CREATE_FLAGS, + offsetof(struct orangefs_inode_s, + link_target), + sizeof_field(struct orangefs_inode_s, + link_target), + orangefs_inode_cache_ctor); if (!orangefs_inode_cache) { gossip_err("Cannot create orangefs_inode_cache\n"); diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 5ac415466861..406e72de88f6 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -47,9 +47,28 @@ config OVERLAY_FS_INDEX The inodes index feature prevents breaking of lower hardlinks on copy up. - Note, that the inodes index feature is read-only backward compatible. - That is, mounting an overlay which has an index dir on a kernel that - doesn't support this feature read-only, will not have any negative - outcomes. However, mounting the same overlay with an old kernel - read-write and then mounting it again with a new kernel, will have - unexpected results. + Note, that the inodes index feature is not backward compatible. + That is, mounting an overlay which has an inodes index on a kernel + that doesn't support this feature will have unexpected results. + +config OVERLAY_FS_NFS_EXPORT + bool "Overlayfs: turn on NFS export feature by default" + depends on OVERLAY_FS + depends on OVERLAY_FS_INDEX + help + If this config option is enabled then overlay filesystems will use + the inodes index dir to decode overlay NFS file handles by default. + In this case, it is still possible to turn off NFS export support + globally with the "nfs_export=off" module option or on a filesystem + instance basis with the "nfs_export=off" mount option. + + The NFS export feature creates an index on copy up of every file and + directory. This full index is used to detect overlay filesystems + inconsistencies on lookup, like redirect from multiple upper dirs to + the same lower dir. The full index may incur some overhead on mount + time, especially when verifying that directory file handles are not + stale. + + Note, that the NFS export feature is not backward compatible. + That is, mounting an overlay which has a full index on a kernel + that doesn't support this feature will have unexpected results. diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 99373bbc1478..30802347a020 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -4,4 +4,5 @@ obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \ + export.o diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index eb3b8d39fb61..d855f508fa20 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -232,13 +232,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) +struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper) { struct ovl_fh *fh; int fh_type, fh_len, dwords; void *buf; int buflen = MAX_HANDLE_SZ; - uuid_t *uuid = &lower->d_sb->s_uuid; + uuid_t *uuid = &real->d_sb->s_uuid; buf = kmalloc(buflen, GFP_KERNEL); if (!buf) @@ -250,7 +250,7 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) * the price or reconnecting the dentry. */ dwords = buflen >> 2; - fh_type = exportfs_encode_fh(lower, buf, &dwords, 0); + fh_type = exportfs_encode_fh(real, buf, &dwords, 0); buflen = (dwords << 2); fh = ERR_PTR(-EIO); @@ -288,8 +288,8 @@ out: return fh; } -static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, - struct dentry *upper) +int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) { const struct ovl_fh *fh = NULL; int err; @@ -315,6 +315,94 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, return err; } +/* Store file handle of @upper dir in @index dir entry */ +static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index) +{ + const struct ovl_fh *fh; + int err; + + fh = ovl_encode_fh(upper, true); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0); + + kfree(fh); + return err; +} + +/* + * Create and install index entry. + * + * Caller must hold i_mutex on indexdir. + */ +static int ovl_create_index(struct dentry *dentry, struct dentry *origin, + struct dentry *upper) +{ + struct dentry *indexdir = ovl_indexdir(dentry->d_sb); + struct inode *dir = d_inode(indexdir); + struct dentry *index = NULL; + struct dentry *temp = NULL; + struct qstr name = { }; + int err; + + /* + * For now this is only used for creating index entry for directories, + * because non-dir are copied up directly to index and then hardlinked + * to upper dir. + * + * TODO: implement create index for non-dir, so we can call it when + * encoding file handle for non-dir in case index does not exist. + */ + if (WARN_ON(!d_is_dir(dentry))) + return -EIO; + + /* Directory not expected to be indexed before copy up */ + if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) + return -EIO; + + err = ovl_get_index_name(origin, &name); + if (err) + return err; + + temp = ovl_lookup_temp(indexdir); + if (IS_ERR(temp)) + goto temp_err; + + err = ovl_do_mkdir(dir, temp, S_IFDIR, true); + if (err) + goto out; + + err = ovl_set_upper_fh(upper, temp); + if (err) + goto out_cleanup; + + index = lookup_one_len(name.name, indexdir, name.len); + if (IS_ERR(index)) { + err = PTR_ERR(index); + } else { + err = ovl_do_rename(dir, temp, dir, index, 0); + dput(index); + } + + if (err) + goto out_cleanup; + +out: + dput(temp); + kfree(name.name); + return err; + +temp_err: + err = PTR_ERR(temp); + temp = NULL; + goto out; + +out_cleanup: + ovl_cleanup(dir, temp); + goto out; +} + struct ovl_copy_up_ctx { struct dentry *parent; struct dentry *dentry; @@ -327,6 +415,7 @@ struct ovl_copy_up_ctx { struct dentry *workdir; bool tmpfile; bool origin; + bool indexed; }; static int ovl_link_up(struct ovl_copy_up_ctx *c) @@ -361,7 +450,10 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) } } inode_unlock(udir); - ovl_set_nlink_upper(c->dentry); + if (err) + return err; + + err = ovl_set_nlink_upper(c->dentry); return err; } @@ -498,6 +590,12 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) if (err) goto out_cleanup; + if (S_ISDIR(c->stat.mode) && c->indexed) { + err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); + if (err) + goto out_cleanup; + } + if (c->tmpfile) { inode_lock_nested(udir, I_MUTEX_PARENT); err = ovl_install_temp(c, temp, &newdentry); @@ -536,20 +634,33 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; - bool indexed = false; + bool to_index = false; - if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) && - c->stat.nlink > 1) - indexed = true; + /* + * Indexed non-dir is copied up directly to the index entry and then + * hardlinked to upper dir. Indexed dir is copied up to indexdir, + * then index entry is created and then copied up dir installed. + * Copying dir up to indexdir instead of workdir simplifies locking. + */ + if (ovl_need_index(c->dentry)) { + c->indexed = true; + if (S_ISDIR(c->stat.mode)) + c->workdir = ovl_indexdir(c->dentry->d_sb); + else + to_index = true; + } - if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed) + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) c->origin = true; - if (indexed) { + if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); if (err) return err; + } else if (WARN_ON(!c->parent)) { + /* Disconnected dentry must be copied up to index dir */ + return -EIO; } else { /* * Mark parent "impure" because it may now contain non-pure @@ -572,11 +683,17 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) } } - if (indexed) { - if (!err) - ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); - kfree(c->destname.name); - } else if (!err) { + + if (err) + goto out; + + if (c->indexed) + ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + + if (to_index) { + /* Initialize nlink for copy up of disconnected dentry */ + err = ovl_set_nlink_upper(c->dentry); + } else { struct inode *udir = d_inode(c->destdir); /* Restore timestamps on parent (best effort) */ @@ -587,6 +704,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); } +out: + if (to_index) + kfree(c->destname.name); return err; } @@ -611,14 +731,17 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, if (err) return err; - ovl_path_upper(parent, &parentpath); - ctx.destdir = parentpath.dentry; - ctx.destname = dentry->d_name; + if (parent) { + ovl_path_upper(parent, &parentpath); + ctx.destdir = parentpath.dentry; + ctx.destname = dentry->d_name; - err = vfs_getattr(&parentpath, &ctx.pstat, - STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); - if (err) - return err; + err = vfs_getattr(&parentpath, &ctx.pstat, + STATX_ATIME | STATX_MTIME, + AT_STATX_SYNC_AS_STAT); + if (err) + return err; + } /* maybe truncate regular file. this has no effect on dirs */ if (flags & O_TRUNC) @@ -639,7 +762,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } else { if (!ovl_dentry_upper(dentry)) err = ovl_do_copy_up(&ctx); - if (!err && !ovl_dentry_has_upper_alias(dentry)) + if (!err && parent && !ovl_dentry_has_upper_alias(dentry)) err = ovl_link_up(&ctx); ovl_copy_up_end(dentry); } @@ -652,10 +775,19 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; const struct cred *old_cred = ovl_override_creds(dentry->d_sb); + bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED); + + /* + * With NFS export, copy up can get called for a disconnected non-dir. + * In this case, we will copy up lower inode to index dir without + * linking it to upper dir. + */ + if (WARN_ON(disconnected && d_is_dir(dentry))) + return -EIO; while (!err) { struct dentry *next; - struct dentry *parent; + struct dentry *parent = NULL; /* * Check if copy-up has happened as well as for upper alias (in @@ -671,12 +803,12 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) * with rename. */ if (ovl_dentry_upper(dentry) && - ovl_dentry_has_upper_alias(dentry)) + (ovl_dentry_has_upper_alias(dentry) || disconnected)) break; next = dget(dentry); /* find the topmost dentry not yet copied up */ - for (;;) { + for (; !disconnected;) { parent = dget_parent(next); if (ovl_dentry_upper(parent)) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f9788bc116a8..839709c7803a 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -63,8 +63,7 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir) } /* caller holds i_mutex on workdir */ -static struct dentry *ovl_whiteout(struct dentry *workdir, - struct dentry *dentry) +static struct dentry *ovl_whiteout(struct dentry *workdir) { int err; struct dentry *whiteout; @@ -83,6 +82,38 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, return whiteout; } +/* Caller must hold i_mutex on both workdir and dir */ +int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, + struct dentry *dentry) +{ + struct inode *wdir = workdir->d_inode; + struct dentry *whiteout; + int err; + int flags = 0; + + whiteout = ovl_whiteout(workdir); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + return err; + + if (d_is_dir(dentry)) + flags = RENAME_EXCHANGE; + + err = ovl_do_rename(wdir, whiteout, dir, dentry, flags); + if (err) + goto kill_whiteout; + if (flags) + ovl_cleanup(wdir, dentry); + +out: + dput(whiteout); + return err; + +kill_whiteout: + ovl_cleanup(wdir, whiteout); + goto out; +} + int ovl_create_real(struct inode *dir, struct dentry *newdentry, struct cattr *attr, struct dentry *hardlink, bool debug) { @@ -181,11 +212,6 @@ static bool ovl_type_origin(struct dentry *dentry) return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); } -static bool ovl_may_have_whiteouts(struct dentry *dentry) -{ - return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); -} - static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -301,37 +327,6 @@ out: return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) -{ - int err; - struct dentry *ret = NULL; - LIST_HEAD(list); - - err = ovl_check_empty_dir(dentry, &list); - if (err) { - ret = ERR_PTR(err); - goto out_free; - } - - /* - * When removing an empty opaque directory, then it makes no sense to - * replace it with an exact replica of itself. - * - * If upperdentry has whiteouts, clear them. - * - * Can race with copy-up, since we don't hold the upperdir mutex. - * Doesn't matter, since copy-up can't create a non-empty directory - * from an empty one. - */ - if (!list_empty(&list)) - ret = ovl_clear_empty(dentry, &list); - -out_free: - ovl_cache_free(&list); - - return ret; -} - static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name, const struct posix_acl *acl) { @@ -623,23 +618,20 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper) return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper); } -static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, + struct list_head *list) { struct dentry *workdir = ovl_workdir(dentry); - struct inode *wdir = workdir->d_inode; struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); - struct inode *udir = upperdir->d_inode; - struct dentry *whiteout; struct dentry *upper; struct dentry *opaquedir = NULL; int err; - int flags = 0; if (WARN_ON(!workdir)) return -EROFS; - if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry); + if (!list_empty(list)) { + opaquedir = ovl_clear_empty(dentry, list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -662,24 +654,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) goto out_dput_upper; } - whiteout = ovl_whiteout(workdir, dentry); - err = PTR_ERR(whiteout); - if (IS_ERR(whiteout)) - goto out_dput_upper; - - if (d_is_dir(upper)) - flags = RENAME_EXCHANGE; - - err = ovl_do_rename(wdir, whiteout, udir, upper, flags); + err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper); if (err) - goto kill_whiteout; - if (flags) - ovl_cleanup(wdir, upper); + goto out_d_drop; ovl_dentry_version_inc(dentry->d_parent, true); out_d_drop: d_drop(dentry); - dput(whiteout); out_dput_upper: dput(upper); out_unlock: @@ -688,13 +669,10 @@ out_dput: dput(opaquedir); out: return err; - -kill_whiteout: - ovl_cleanup(wdir, whiteout); - goto out_d_drop; } -static int ovl_remove_upper(struct dentry *dentry, bool is_dir) +static int ovl_remove_upper(struct dentry *dentry, bool is_dir, + struct list_head *list) { struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *dir = upperdir->d_inode; @@ -702,10 +680,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; - /* Redirect/origin dir can be !ovl_lower_positive && not clean */ - if (is_dir && (ovl_dentry_get_redirect(dentry) || - ovl_may_have_whiteouts(dentry))) { - opaquedir = ovl_check_empty_and_clear(dentry); + if (!list_empty(list)) { + opaquedir = ovl_clear_empty(dentry, list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -746,11 +722,26 @@ out: return err; } +static bool ovl_pure_upper(struct dentry *dentry) +{ + return !ovl_dentry_lower(dentry) && + !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry)); +} + static int ovl_do_remove(struct dentry *dentry, bool is_dir) { int err; bool locked = false; const struct cred *old_cred; + bool lower_positive = ovl_lower_positive(dentry); + LIST_HEAD(list); + + /* No need to clean pure upper removed by vfs_rmdir() */ + if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) { + err = ovl_check_empty_dir(dentry, &list); + if (err) + goto out; + } err = ovl_want_write(dentry); if (err) @@ -765,10 +756,10 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) goto out_drop_write; old_cred = ovl_override_creds(dentry->d_sb); - if (!ovl_lower_positive(dentry)) - err = ovl_remove_upper(dentry, is_dir); + if (!lower_positive) + err = ovl_remove_upper(dentry, is_dir, &list); else - err = ovl_remove_and_whiteout(dentry, is_dir); + err = ovl_remove_and_whiteout(dentry, &list); revert_creds(old_cred); if (!err) { if (is_dir) @@ -780,6 +771,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) out_drop_write: ovl_drop_write(dentry); out: + ovl_cache_free(&list); return err; } @@ -915,6 +907,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, bool samedir = olddir == newdir; struct dentry *opaquedir = NULL; const struct cred *old_cred = NULL; + LIST_HEAD(list); err = -EINVAL; if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) @@ -929,6 +922,27 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (!overwrite && !ovl_can_move(new)) goto out; + if (overwrite && new_is_dir && !ovl_pure_upper(new)) { + err = ovl_check_empty_dir(new, &list); + if (err) + goto out; + } + + if (overwrite) { + if (ovl_lower_positive(old)) { + if (!ovl_dentry_is_whiteout(new)) { + /* Whiteout source */ + flags |= RENAME_WHITEOUT; + } else { + /* Switch whiteouts */ + flags |= RENAME_EXCHANGE; + } + } else if (is_dir && ovl_dentry_is_whiteout(new)) { + flags |= RENAME_EXCHANGE; + cleanup_whiteout = true; + } + } + err = ovl_want_write(old); if (err) goto out; @@ -952,9 +966,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_cred = ovl_override_creds(old->d_sb); - if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) || - ovl_may_have_whiteouts(new))) { - opaquedir = ovl_check_empty_and_clear(new); + if (!list_empty(&list)) { + opaquedir = ovl_clear_empty(new, &list); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; @@ -962,21 +975,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, } } - if (overwrite) { - if (ovl_lower_positive(old)) { - if (!ovl_dentry_is_whiteout(new)) { - /* Whiteout source */ - flags |= RENAME_WHITEOUT; - } else { - /* Switch whiteouts */ - flags |= RENAME_EXCHANGE; - } - } else if (is_dir && ovl_dentry_is_whiteout(new)) { - flags |= RENAME_EXCHANGE; - cleanup_whiteout = true; - } - } - old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); @@ -1094,6 +1092,7 @@ out_drop_write: ovl_drop_write(old); out: dput(opaquedir); + ovl_cache_free(&list); return err; } diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c new file mode 100644 index 000000000000..bb94ce9da5c8 --- /dev/null +++ b/fs/overlayfs/export.c @@ -0,0 +1,715 @@ +/* + * Overlayfs NFS export support. + * + * Amir Goldstein <amir73il@gmail.com> + * + * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/cred.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/xattr.h> +#include <linux/exportfs.h> +#include <linux/ratelimit.h> +#include "overlayfs.h" + +/* + * We only need to encode origin if there is a chance that the same object was + * encoded pre copy up and then we need to stay consistent with the same + * encoding also after copy up. If non-pure upper is not indexed, then it was + * copied up before NFS export was enabled. In that case we don't need to worry + * about staying consistent with pre copy up encoding and we encode an upper + * file handle. Overlay root dentry is a private case of non-indexed upper. + * + * The following table summarizes the different file handle encodings used for + * different overlay object types: + * + * Object type | Encoding + * -------------------------------- + * Pure upper | U + * Non-indexed upper | U + * Indexed upper | L (*) + * Non-upper | L (*) + * + * U = upper file handle + * L = lower file handle + * + * (*) Connecting an overlay dir from real lower dentry is not always + * possible when there are redirects in lower layers. To mitigate this case, + * we copy up the lower dir first and then encode an upper dir file handle. + */ +static bool ovl_should_encode_origin(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + + if (!ovl_dentry_lower(dentry)) + return false; + + /* + * Decoding a merge dir, whose origin's parent is under a redirected + * lower dir is not always possible. As a simple aproximation, we do + * not encode lower dir file handles when overlay has multiple lower + * layers and origin is below the topmost lower layer. + * + * TODO: copy up only the parent that is under redirected lower. + */ + if (d_is_dir(dentry) && ofs->upper_mnt && + OVL_E(dentry)->lowerstack[0].layer->idx > 1) + return false; + + /* Decoding a non-indexed upper from origin is not implemented */ + if (ovl_dentry_upper(dentry) && + !ovl_test_flag(OVL_INDEX, d_inode(dentry))) + return false; + + return true; +} + +static int ovl_encode_maybe_copy_up(struct dentry *dentry) +{ + int err; + + if (ovl_dentry_upper(dentry)) + return 0; + + err = ovl_want_write(dentry); + if (err) + return err; + + err = ovl_copy_up(dentry); + + ovl_drop_write(dentry); + return err; +} + +static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) +{ + struct dentry *origin = ovl_dentry_lower(dentry); + struct ovl_fh *fh = NULL; + int err; + + /* + * If we should not encode a lower dir file handle, copy up and encode + * an upper dir file handle. + */ + if (!ovl_should_encode_origin(dentry)) { + err = ovl_encode_maybe_copy_up(dentry); + if (err) + goto fail; + + origin = NULL; + } + + /* Encode an upper or origin file handle */ + fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin); + err = PTR_ERR(fh); + if (IS_ERR(fh)) + goto fail; + + err = -EOVERFLOW; + if (fh->len > buflen) + goto fail; + + memcpy(buf, (char *)fh, fh->len); + err = fh->len; + +out: + kfree(fh); + return err; + +fail: + pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n", + dentry, err, buflen, fh ? (int)fh->len : 0, + fh ? fh->type : 0); + goto out; +} + +static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len) +{ + int res, len = *max_len << 2; + + res = ovl_d_to_fh(dentry, (char *)fid, len); + if (res <= 0) + return FILEID_INVALID; + + len = res; + + /* Round up to dwords */ + *max_len = (len + 3) >> 2; + return OVL_FILEID; +} + +static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len, + struct inode *parent) +{ + struct dentry *dentry; + int type; + + /* TODO: encode connectable file handles */ + if (parent) + return FILEID_INVALID; + + dentry = d_find_any_alias(inode); + if (WARN_ON(!dentry)) + return FILEID_INVALID; + + type = ovl_dentry_to_fh(dentry, fid, max_len); + + dput(dentry); + return type; +} + +/* + * Find or instantiate an overlay dentry from real dentries and index. + */ +static struct dentry *ovl_obtain_alias(struct super_block *sb, + struct dentry *upper_alias, + struct ovl_path *lowerpath, + struct dentry *index) +{ + struct dentry *lower = lowerpath ? lowerpath->dentry : NULL; + struct dentry *upper = upper_alias ?: index; + struct dentry *dentry; + struct inode *inode; + struct ovl_entry *oe; + + /* We get overlay directory dentries with ovl_lookup_real() */ + if (d_is_dir(upper ?: lower)) + return ERR_PTR(-EIO); + + inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower); + if (IS_ERR(inode)) { + dput(upper); + return ERR_CAST(inode); + } + + if (index) + ovl_set_flag(OVL_INDEX, inode); + + dentry = d_find_any_alias(inode); + if (!dentry) { + dentry = d_alloc_anon(inode->i_sb); + if (!dentry) + goto nomem; + oe = ovl_alloc_entry(lower ? 1 : 0); + if (!oe) + goto nomem; + + if (lower) { + oe->lowerstack->dentry = dget(lower); + oe->lowerstack->layer = lowerpath->layer; + } + dentry->d_fsdata = oe; + if (upper_alias) + ovl_dentry_set_upper_alias(dentry); + } + + return d_instantiate_anon(dentry, inode); + +nomem: + iput(inode); + dput(dentry); + return ERR_PTR(-ENOMEM); +} + +/* Get the upper or lower dentry in stach whose on layer @idx */ +static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx) +{ + struct ovl_entry *oe = dentry->d_fsdata; + int i; + + if (!idx) + return ovl_dentry_upper(dentry); + + for (i = 0; i < oe->numlower; i++) { + if (oe->lowerstack[i].layer->idx == idx) + return oe->lowerstack[i].dentry; + } + + return NULL; +} + +/* + * Lookup a child overlay dentry to get a connected overlay dentry whose real + * dentry is @real. If @real is on upper layer, we lookup a child overlay + * dentry with the same name as the real dentry. Otherwise, we need to consult + * index for lookup. + */ +static struct dentry *ovl_lookup_real_one(struct dentry *connected, + struct dentry *real, + struct ovl_layer *layer) +{ + struct inode *dir = d_inode(connected); + struct dentry *this, *parent = NULL; + struct name_snapshot name; + int err; + + /* + * Lookup child overlay dentry by real name. The dir mutex protects us + * from racing with overlay rename. If the overlay dentry that is above + * real has already been moved to a parent that is not under the + * connected overlay dir, we return -ECHILD and restart the lookup of + * connected real path from the top. + */ + inode_lock_nested(dir, I_MUTEX_PARENT); + err = -ECHILD; + parent = dget_parent(real); + if (ovl_dentry_real_at(connected, layer->idx) != parent) + goto fail; + + /* + * We also need to take a snapshot of real dentry name to protect us + * from racing with underlying layer rename. In this case, we don't + * care about returning ESTALE, only from dereferencing a free name + * pointer because we hold no lock on the real dentry. + */ + take_dentry_name_snapshot(&name, real); + this = lookup_one_len(name.name, connected, strlen(name.name)); + err = PTR_ERR(this); + if (IS_ERR(this)) { + goto fail; + } else if (!this || !this->d_inode) { + dput(this); + err = -ENOENT; + goto fail; + } else if (ovl_dentry_real_at(this, layer->idx) != real) { + dput(this); + err = -ESTALE; + goto fail; + } + +out: + release_dentry_name_snapshot(&name); + dput(parent); + inode_unlock(dir); + return this; + +fail: + pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + real, layer->idx, connected, err); + this = ERR_PTR(err); + goto out; +} + +static struct dentry *ovl_lookup_real(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer); + +/* + * Lookup an indexed or hashed overlay dentry by real inode. + */ +static struct dentry *ovl_lookup_real_inode(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + struct dentry *index = NULL; + struct dentry *this = NULL; + struct inode *inode; + + /* + * Decoding upper dir from index is expensive, so first try to lookup + * overlay dentry in inode/dcache. + */ + inode = ovl_lookup_inode(sb, real, !layer->idx); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (inode) { + this = d_find_any_alias(inode); + iput(inode); + } + + /* + * For decoded lower dir file handle, lookup index by origin to check + * if lower dir was copied up and and/or removed. + */ + if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) { + index = ovl_lookup_index(ofs, NULL, real, false); + if (IS_ERR(index)) + return index; + } + + /* Get connected upper overlay dir from index */ + if (index) { + struct dentry *upper = ovl_index_upper(ofs, index); + + dput(index); + if (IS_ERR_OR_NULL(upper)) + return upper; + + /* + * ovl_lookup_real() in lower layer may call recursively once to + * ovl_lookup_real() in upper layer. The first level call walks + * back lower parents to the topmost indexed parent. The second + * recursive call walks back from indexed upper to the topmost + * connected/hashed upper parent (or up to root). + */ + this = ovl_lookup_real(sb, upper, &upper_layer); + dput(upper); + } + + if (!this) + return NULL; + + if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { + dput(this); + this = ERR_PTR(-EIO); + } + + return this; +} + +/* + * Lookup an indexed or hashed overlay dentry, whose real dentry is an + * ancestor of @real. + */ +static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct dentry *next, *parent = NULL; + struct dentry *ancestor = ERR_PTR(-EIO); + + if (real == layer->mnt->mnt_root) + return dget(sb->s_root); + + /* Find the topmost indexed or hashed ancestor */ + next = dget(real); + for (;;) { + parent = dget_parent(next); + + /* + * Lookup a matching overlay dentry in inode/dentry + * cache or in index by real inode. + */ + ancestor = ovl_lookup_real_inode(sb, next, layer); + if (ancestor) + break; + + if (parent == layer->mnt->mnt_root) { + ancestor = dget(sb->s_root); + break; + } + + /* + * If @real has been moved out of the layer root directory, + * we will eventully hit the real fs root. This cannot happen + * by legit overlay rename, so we return error in that case. + */ + if (parent == next) { + ancestor = ERR_PTR(-EXDEV); + break; + } + + dput(next); + next = parent; + } + + dput(parent); + dput(next); + + return ancestor; +} + +/* + * Lookup a connected overlay dentry whose real dentry is @real. + * If @real is on upper layer, we lookup a child overlay dentry with the same + * path the real dentry. Otherwise, we need to consult index for lookup. + */ +static struct dentry *ovl_lookup_real(struct super_block *sb, + struct dentry *real, + struct ovl_layer *layer) +{ + struct dentry *connected; + int err = 0; + + connected = ovl_lookup_real_ancestor(sb, real, layer); + if (IS_ERR(connected)) + return connected; + + while (!err) { + struct dentry *next, *this; + struct dentry *parent = NULL; + struct dentry *real_connected = ovl_dentry_real_at(connected, + layer->idx); + + if (real_connected == real) + break; + + /* Find the topmost dentry not yet connected */ + next = dget(real); + for (;;) { + parent = dget_parent(next); + + if (parent == real_connected) + break; + + /* + * If real has been moved out of 'real_connected', + * we will not find 'real_connected' and hit the layer + * root. In that case, we need to restart connecting. + * This game can go on forever in the worst case. We + * may want to consider taking s_vfs_rename_mutex if + * this happens more than once. + */ + if (parent == layer->mnt->mnt_root) { + dput(connected); + connected = dget(sb->s_root); + break; + } + + /* + * If real file has been moved out of the layer root + * directory, we will eventully hit the real fs root. + * This cannot happen by legit overlay rename, so we + * return error in that case. + */ + if (parent == next) { + err = -EXDEV; + break; + } + + dput(next); + next = parent; + } + + if (!err) { + this = ovl_lookup_real_one(connected, next, layer); + if (IS_ERR(this)) + err = PTR_ERR(this); + + /* + * Lookup of child in overlay can fail when racing with + * overlay rename of child away from 'connected' parent. + * In this case, we need to restart the lookup from the + * top, because we cannot trust that 'real_connected' is + * still an ancestor of 'real'. There is a good chance + * that the renamed overlay ancestor is now in cache, so + * ovl_lookup_real_ancestor() will find it and we can + * continue to connect exactly from where lookup failed. + */ + if (err == -ECHILD) { + this = ovl_lookup_real_ancestor(sb, real, + layer); + err = IS_ERR(this) ? PTR_ERR(this) : 0; + } + if (!err) { + dput(connected); + connected = this; + } + } + + dput(parent); + dput(next); + } + + if (err) + goto fail; + + return connected; + +fail: + pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", + real, layer->idx, connected, err); + dput(connected); + return ERR_PTR(err); +} + +/* + * Get an overlay dentry from upper/lower real dentries and index. + */ +static struct dentry *ovl_get_dentry(struct super_block *sb, + struct dentry *upper, + struct ovl_path *lowerpath, + struct dentry *index) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt }; + struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer; + struct dentry *real = upper ?: (index ?: lowerpath->dentry); + + /* + * Obtain a disconnected overlay dentry from a non-dir real dentry + * and index. + */ + if (!d_is_dir(real)) + return ovl_obtain_alias(sb, upper, lowerpath, index); + + /* Removed empty directory? */ + if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real)) + return ERR_PTR(-ENOENT); + + /* + * If real dentry is connected and hashed, get a connected overlay + * dentry whose real dentry is @real. + */ + return ovl_lookup_real(sb, real, layer); +} + +static struct dentry *ovl_upper_fh_to_d(struct super_block *sb, + struct ovl_fh *fh) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct dentry *dentry; + struct dentry *upper; + + if (!ofs->upper_mnt) + return ERR_PTR(-EACCES); + + upper = ovl_decode_fh(fh, ofs->upper_mnt); + if (IS_ERR_OR_NULL(upper)) + return upper; + + dentry = ovl_get_dentry(sb, upper, NULL, NULL); + dput(upper); + + return dentry; +} + +static struct dentry *ovl_lower_fh_to_d(struct super_block *sb, + struct ovl_fh *fh) +{ + struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_path origin = { }; + struct ovl_path *stack = &origin; + struct dentry *dentry = NULL; + struct dentry *index = NULL; + struct inode *inode = NULL; + bool is_deleted = false; + int err; + + /* First lookup indexed upper by fh */ + if (ofs->indexdir) { + index = ovl_get_index_fh(ofs, fh); + err = PTR_ERR(index); + if (IS_ERR(index)) { + if (err != -ESTALE) + return ERR_PTR(err); + + /* Found a whiteout index - treat as deleted inode */ + is_deleted = true; + index = NULL; + } + } + + /* Then try to get upper dir by index */ + if (index && d_is_dir(index)) { + struct dentry *upper = ovl_index_upper(ofs, index); + + err = PTR_ERR(upper); + if (IS_ERR_OR_NULL(upper)) + goto out_err; + + dentry = ovl_get_dentry(sb, upper, NULL, NULL); + dput(upper); + goto out; + } + + /* Then lookup origin by fh */ + err = ovl_check_origin_fh(ofs, fh, NULL, &stack); + if (err) { + goto out_err; + } else if (index) { + err = ovl_verify_origin(index, origin.dentry, false); + if (err) + goto out_err; + } else if (is_deleted) { + /* Lookup deleted non-dir by origin inode */ + if (!d_is_dir(origin.dentry)) + inode = ovl_lookup_inode(sb, origin.dentry, false); + err = -ESTALE; + if (!inode || atomic_read(&inode->i_count) == 1) + goto out_err; + + /* Deleted but still open? */ + index = dget(ovl_i_dentry_upper(inode)); + } + + dentry = ovl_get_dentry(sb, NULL, &origin, index); + +out: + dput(origin.dentry); + dput(index); + iput(inode); + return dentry; + +out_err: + dentry = ERR_PTR(err); + goto out; +} + +static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct dentry *dentry = NULL; + struct ovl_fh *fh = (struct ovl_fh *) fid; + int len = fh_len << 2; + unsigned int flags = 0; + int err; + + err = -EINVAL; + if (fh_type != OVL_FILEID) + goto out_err; + + err = ovl_check_fh_len(fh, len); + if (err) + goto out_err; + + flags = fh->flags; + dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ? + ovl_upper_fh_to_d(sb, fh) : + ovl_lower_fh_to_d(sb, fh); + err = PTR_ERR(dentry); + if (IS_ERR(dentry) && err != -ESTALE) + goto out_err; + + return dentry; + +out_err: + pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n", + len, fh_type, flags, err); + return ERR_PTR(err); +} + +static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n"); + return ERR_PTR(-EACCES); +} + +static int ovl_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + /* + * ovl_fh_to_dentry() returns connected dir overlay dentries and + * ovl_fh_to_parent() is not implemented, so we should not get here. + */ + WARN_ON_ONCE(1); + return -EIO; +} + +static struct dentry *ovl_get_parent(struct dentry *dentry) +{ + /* + * ovl_fh_to_dentry() returns connected dir overlay dentries, so we + * should not get here. + */ + WARN_ON_ONCE(1); + return ERR_PTR(-EIO); +} + +const struct export_operations ovl_export_operations = { + .encode_fh = ovl_encode_inode_fh, + .fh_to_dentry = ovl_fh_to_dentry, + .fh_to_parent = ovl_fh_to_parent, + .get_name = ovl_get_name, + .get_parent = ovl_get_parent, +}; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 00b6b294272a..fcd97b783fa1 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -105,12 +105,20 @@ int ovl_getattr(const struct path *path, struct kstat *stat, * Lower hardlinks may be broken on copy up to different * upper files, so we cannot use the lower origin st_ino * for those different files, even for the same fs case. + * + * Similarly, several redirected dirs can point to the + * same dir on a lower layer. With the "verify_lower" + * feature, we do not use the lower origin st_ino, if + * we haven't verified that this redirect is unique. + * * With inodes index enabled, it is safe to use st_ino - * of an indexed hardlinked origin. The index validates - * that the upper hardlink is not broken. + * of an indexed origin. The index validates that the + * upper hardlink is not broken and that a redirected + * dir is the only redirect to that origin. */ - if (is_dir || lowerstat.nlink == 1 || - ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || + (!ovl_verify_lower(dentry->d_sb) && + (is_dir || lowerstat.nlink == 1))) stat->ino = lowerstat.ino; if (samefs) @@ -343,8 +351,10 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) { + /* Copy up of disconnected dentry does not set upper alias */ if (ovl_dentry_upper(dentry) && - ovl_dentry_has_upper_alias(dentry)) + (ovl_dentry_has_upper_alias(dentry) || + (dentry->d_flags & DCACHE_DISCONNECTED))) return false; if (special_file(d_inode(dentry)->i_mode)) @@ -604,9 +614,25 @@ static int ovl_inode_set(struct inode *inode, void *data) } static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, - struct dentry *upperdentry) + struct dentry *upperdentry, bool strict) { /* + * For directories, @strict verify from lookup path performs consistency + * checks, so NULL lower/upper in dentry must match NULL lower/upper in + * inode. Non @strict verify from NFS handle decode path passes NULL for + * 'unknown' lower/upper. + */ + if (S_ISDIR(inode->i_mode) && strict) { + /* Real lower dir moved to upper layer under us? */ + if (!lowerdentry && ovl_inode_lower(inode)) + return false; + + /* Lookup of an uncovered redirect origin? */ + if (!upperdentry && ovl_inode_upper(inode)) + return false; + } + + /* * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. * This happens when finding a copied up overlay inode for a renamed * or hardlinked overlay dentry and lower dentry cannot be followed @@ -625,14 +651,35 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, - struct dentry *index) +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, + bool is_upper) { - struct dentry *lowerdentry = ovl_dentry_lower(dentry); + struct inode *inode, *key = d_inode(real); + + inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); + if (!inode) + return NULL; + + if (!ovl_verify_inode(inode, is_upper ? NULL : real, + is_upper ? real : NULL, false)) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + return inode; +} + +struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, + struct dentry *lowerdentry, struct dentry *index, + unsigned int numlower) +{ + struct ovl_fs *ofs = sb->s_fs_info; struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; /* Already indexed or could be indexed on copy up? */ - bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + bool indexed = (index || (ovl_indexdir(sb) && !upperdentry)); + struct dentry *origin = indexed ? lowerdentry : NULL; + bool is_dir; if (WARN_ON(upperdentry && indexed && !lowerdentry)) return ERR_PTR(-EIO); @@ -641,17 +688,22 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, realinode = d_inode(lowerdentry); /* - * Copy up origin (lower) may exist for non-indexed upper, but we must - * not use lower as hash key in that case. - * Hash inodes that are or could be indexed by origin inode and - * non-indexed upper inodes that could be hard linked by upper inode. + * Copy up origin (lower) may exist for non-indexed non-dir upper, but + * we must not use lower as hash key in that case. + * Hash non-dir that is or could be indexed by origin inode. + * Hash dir that is or could be merged by origin inode. + * Hash pure upper and non-indexed non-dir by upper inode. + * Hash non-indexed dir by upper inode for NFS export. */ - if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { - struct inode *key = d_inode(indexed ? lowerdentry : - upperdentry); - unsigned int nlink; + is_dir = S_ISDIR(realinode->i_mode); + if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt)) + origin = lowerdentry; + + if (upperdentry || origin) { + struct inode *key = d_inode(origin ?: upperdentry); + unsigned int nlink = is_dir ? 1 : realinode->i_nlink; - inode = iget5_locked(dentry->d_sb, (unsigned long) key, + inode = iget5_locked(sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); if (!inode) goto out_nomem; @@ -660,7 +712,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, * Verify that the underlying files stored in the inode * match those in the dentry. */ - if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) { + if (!ovl_verify_inode(inode, lowerdentry, upperdentry, + true)) { iput(inode); inode = ERR_PTR(-ESTALE); goto out; @@ -670,11 +723,12 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, goto out; } - nlink = ovl_get_nlink(lowerdentry, upperdentry, - realinode->i_nlink); + /* Recalculate nlink for non-dir due to indexing */ + if (!is_dir) + nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { - inode = new_inode(dentry->d_sb); + inode = new_inode(sb); if (!inode) goto out_nomem; } @@ -685,10 +739,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, ovl_set_flag(OVL_IMPURE, inode); /* Check for non-merge dir that may have whiteouts */ - if (S_ISDIR(realinode->i_mode)) { - struct ovl_entry *oe = dentry->d_fsdata; - - if (((upperdentry && lowerdentry) || oe->numlower > 1) || + if (is_dir) { + if (((upperdentry && lowerdentry) || numlower > 1) || ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { ovl_set_flag(OVL_WHITEOUTS, inode); } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index beb945e1963c..de3e6da1d5a5 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/cred.h> +#include <linux/ctype.h> #include <linux/namei.h> #include <linux/xattr.h> #include <linux/ratelimit.h> @@ -84,15 +85,54 @@ invalid: static int ovl_acceptable(void *ctx, struct dentry *dentry) { - return 1; + /* + * A non-dir origin may be disconnected, which is fine, because + * we only need it for its unique inode number. + */ + if (!d_is_dir(dentry)) + return 1; + + /* Don't decode a deleted empty directory */ + if (d_unhashed(dentry)) + return 0; + + /* Check if directory belongs to the layer we are decoding from */ + return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root); } -static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) +/* + * Check validity of an overlay file handle buffer. + * + * Return 0 for a valid file handle. + * Return -ENODATA for "origin unknown". + * Return <0 for an invalid file handle. + */ +int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) { - int res; + if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len) + return -EINVAL; + + if (fh->magic != OVL_FH_MAGIC) + return -EINVAL; + + /* Treat larger version and unknown flags as "origin unknown" */ + if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) + return -ENODATA; + + /* Treat endianness mismatch as "origin unknown" */ + if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && + (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) + return -ENODATA; + + return 0; +} + +static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name) +{ + int res, err; struct ovl_fh *fh = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + res = vfs_getxattr(dentry, name, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; @@ -102,28 +142,20 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (res == 0) return NULL; - fh = kzalloc(res, GFP_KERNEL); + fh = kzalloc(res, GFP_KERNEL); if (!fh) return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + res = vfs_getxattr(dentry, name, fh, res); if (res < 0) goto fail; - if (res < sizeof(struct ovl_fh) || res < fh->len) - goto invalid; - - if (fh->magic != OVL_FH_MAGIC) + err = ovl_check_fh_len(fh, res); + if (err < 0) { + if (err == -ENODATA) + goto out; goto invalid; - - /* Treat larger version and unknown flags as "origin unknown" */ - if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) - goto out; - - /* Treat endianness mismatch as "origin unknown" */ - if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && - (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) - goto out; + } return fh; @@ -139,47 +171,41 @@ invalid: goto out; } -static struct dentry *ovl_get_origin(struct dentry *dentry, - struct vfsmount *mnt) +struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt) { - struct dentry *origin = NULL; - struct ovl_fh *fh = ovl_get_origin_fh(dentry); + struct dentry *real; int bytes; - if (IS_ERR_OR_NULL(fh)) - return (struct dentry *)fh; - /* * Make sure that the stored uuid matches the uuid of the lower * layer where file handle will be decoded. */ if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) - goto out; + return NULL; bytes = (fh->len - offsetof(struct ovl_fh, fid)); - origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, - bytes >> 2, (int)fh->type, - ovl_acceptable, NULL); - if (IS_ERR(origin)) { - /* Treat stale file handle as "origin unknown" */ - if (origin == ERR_PTR(-ESTALE)) - origin = NULL; - goto out; + real = exportfs_decode_fh(mnt, (struct fid *)fh->fid, + bytes >> 2, (int)fh->type, + ovl_acceptable, mnt); + if (IS_ERR(real)) { + /* + * Treat stale file handle to lower file as "origin unknown". + * upper file handle could become stale when upper file is + * unlinked and this information is needed to handle stale + * index entries correctly. + */ + if (real == ERR_PTR(-ESTALE) && + !(fh->flags & OVL_FH_FLAG_PATH_UPPER)) + real = NULL; + return real; } - if (ovl_dentry_weird(origin) || - ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) - goto invalid; - -out: - kfree(fh); - return origin; + if (ovl_dentry_weird(real)) { + dput(real); + return NULL; + } -invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin); - dput(origin); - origin = NULL; - goto out; + return real; } static bool ovl_is_opaquedir(struct dentry *dentry) @@ -284,47 +310,81 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, } -static int ovl_check_origin(struct dentry *upperdentry, - struct ovl_path *lower, unsigned int numlower, - struct ovl_path **stackp, unsigned int *ctrp) +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct dentry *upperdentry, struct ovl_path **stackp) { - struct vfsmount *mnt; struct dentry *origin = NULL; int i; - for (i = 0; i < numlower; i++) { - mnt = lower[i].layer->mnt; - origin = ovl_get_origin(upperdentry, mnt); - if (IS_ERR(origin)) - return PTR_ERR(origin); - + for (i = 0; i < ofs->numlower; i++) { + origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt); if (origin) break; } if (!origin) - return 0; + return -ESTALE; + else if (IS_ERR(origin)) + return PTR_ERR(origin); + + if (upperdentry && !ovl_is_whiteout(upperdentry) && + ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT)) + goto invalid; - BUG_ON(*ctrp); if (!*stackp) *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); if (!*stackp) { dput(origin); return -ENOMEM; } - **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; - *ctrp = 1; + **stackp = (struct ovl_path){ + .dentry = origin, + .layer = &ofs->lower_layers[i] + }; + + return 0; + +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n", + upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); + dput(origin); + return -EIO; +} + +static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, + struct ovl_path **stackp, unsigned int *ctrp) +{ + struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN); + int err; + + if (IS_ERR_OR_NULL(fh)) + return PTR_ERR(fh); + + err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp); + kfree(fh); + + if (err) { + if (err == -ESTALE) + return 0; + return err; + } + + if (WARN_ON(*ctrp)) + return -EIO; + *ctrp = 1; return 0; } /* - * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. + * Verify that @fh matches the file handle stored in xattr @name. * Return 0 on match, -ESTALE on mismatch, < 0 on error. */ -static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) +static int ovl_verify_fh(struct dentry *dentry, const char *name, + const struct ovl_fh *fh) { - struct ovl_fh *ofh = ovl_get_origin_fh(dentry); + struct ovl_fh *ofh = ovl_get_fh(dentry, name); int err = 0; if (!ofh) @@ -341,28 +401,28 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) } /* - * Verify that an inode matches the origin file handle stored in upper inode. + * Verify that @real dentry matches the file handle stored in xattr @name. * - * If @set is true and there is no stored file handle, encode and store origin - * file handle in OVL_XATTR_ORIGIN. + * If @set is true and there is no stored file handle, encode @real and store + * file handle in xattr @name. * - * Return 0 on match, -ESTALE on mismatch, < 0 on error. + * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, - bool is_upper, bool set) +int ovl_verify_set_fh(struct dentry *dentry, const char *name, + struct dentry *real, bool is_upper, bool set) { struct inode *inode; struct ovl_fh *fh; int err; - fh = ovl_encode_fh(origin, is_upper); + fh = ovl_encode_fh(real, is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) goto fail; - err = ovl_verify_origin_fh(dentry, fh); + err = ovl_verify_fh(dentry, name, fh); if (set && err == -ENODATA) - err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); + err = ovl_do_setxattr(dentry, name, fh, fh->len, 0); if (err) goto fail; @@ -371,45 +431,71 @@ out: return err; fail: - inode = d_inode(origin); - pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", - origin, inode ? inode->i_ino : 0, err); + inode = d_inode(real); + pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n", + is_upper ? "upper" : "origin", real, + inode ? inode->i_ino : 0, err); goto out; } +/* Get upper dentry from index */ +struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index) +{ + struct ovl_fh *fh; + struct dentry *upper; + + if (!d_is_dir(index)) + return dget(index); + + fh = ovl_get_fh(index, OVL_XATTR_UPPER); + if (IS_ERR_OR_NULL(fh)) + return ERR_CAST(fh); + + upper = ovl_decode_fh(fh, ofs->upper_mnt); + kfree(fh); + + if (IS_ERR_OR_NULL(upper)) + return upper ?: ERR_PTR(-ESTALE); + + if (!d_is_dir(upper)) { + pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n", + index, upper); + dput(upper); + return ERR_PTR(-EIO); + } + + return upper; +} + +/* Is this a leftover from create/whiteout of directory index entry? */ +static bool ovl_is_temp_index(struct dentry *index) +{ + return index->d_name.name[0] == '#'; +} + /* * Verify that an index entry name matches the origin file handle stored in * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. */ -int ovl_verify_index(struct dentry *index, struct ovl_path *lower, - unsigned int numlower) +int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) { struct ovl_fh *fh = NULL; size_t len; struct ovl_path origin = { }; struct ovl_path *stack = &origin; - unsigned int ctr = 0; + struct dentry *upper = NULL; int err; if (!d_inode(index)) return 0; - /* - * Directory index entries are going to be used for looking up - * redirected upper dirs by lower dir fh when decoding an overlay - * file handle of a merge dir. Whiteout index entries are going to be - * used as an indication that an exported overlay file handle should - * be treated as stale (i.e. after unlink of the overlay inode). - * We don't know the verification rules for directory and whiteout - * index entries, because they have not been implemented yet, so return - * EINVAL if those entries are found to abort the mount to avoid - * corrupting an index that was created by a newer kernel. - */ - err = -EINVAL; - if (d_is_dir(index) || ovl_is_whiteout(index)) + /* Cleanup leftover from index create/cleanup attempt */ + err = -ESTALE; + if (ovl_is_temp_index(index)) goto fail; + err = -EINVAL; if (index->d_name.len < sizeof(struct ovl_fh)*2) goto fail; @@ -420,26 +506,68 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower, goto fail; err = -EINVAL; - if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) + if (hex2bin((u8 *)fh, index->d_name.name, len)) goto fail; - err = ovl_verify_origin_fh(index, fh); + err = ovl_check_fh_len(fh, len); if (err) goto fail; - err = ovl_check_origin(index, lower, numlower, &stack, &ctr); - if (!err && !ctr) - err = -ESTALE; + /* + * Whiteout index entries are used as an indication that an exported + * overlay file handle should be treated as stale (i.e. after unlink + * of the overlay inode). These entries contain no origin xattr. + */ + if (ovl_is_whiteout(index)) + goto out; + + /* + * Verifying directory index entries are not stale is expensive, so + * only verify stale dir index if NFS export is enabled. + */ + if (d_is_dir(index) && !ofs->config.nfs_export) + goto out; + + /* + * Directory index entries should have 'upper' xattr pointing to the + * real upper dir. Non-dir index entries are hardlinks to the upper + * real inode. For non-dir index, we can read the copy up origin xattr + * directly from the index dentry, but for dir index we first need to + * decode the upper directory. + */ + upper = ovl_index_upper(ofs, index); + if (IS_ERR_OR_NULL(upper)) { + err = PTR_ERR(upper); + /* + * Directory index entries with no 'upper' xattr need to be + * removed. When dir index entry has a stale 'upper' xattr, + * we assume that upper dir was removed and we treat the dir + * index as orphan entry that needs to be whited out. + */ + if (err == -ESTALE) + goto orphan; + else if (!err) + err = -ESTALE; + goto fail; + } + + err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh); + dput(upper); if (err) goto fail; - /* Check if index is orphan and don't warn before cleaning it */ - if (d_inode(index)->i_nlink == 1 && - ovl_get_nlink(origin.dentry, index, 0) == 0) - err = -ENOENT; + /* Check if non-dir index is orphan and don't warn before cleaning it */ + if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) { + err = ovl_check_origin_fh(ofs, fh, index, &stack); + if (err) + goto fail; + + if (ovl_get_nlink(origin.dentry, index, 0) == 0) + goto orphan; + } - dput(origin.dentry); out: + dput(origin.dentry); kfree(fh); return err; @@ -447,6 +575,28 @@ fail: pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", index, d_inode(index)->i_mode & S_IFMT, err); goto out; + +orphan: + pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(index)->i_nlink); + err = -ENOENT; + goto out; +} + +static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) +{ + char *n, *s; + + n = kzalloc(fh->len * 2, GFP_KERNEL); + if (!n) + return -ENOMEM; + + s = bin2hex(n, fh, fh->len); + *name = (struct qstr) QSTR_INIT(n, s - n); + + return 0; + } /* @@ -466,35 +616,58 @@ fail: */ int ovl_get_index_name(struct dentry *origin, struct qstr *name) { - int err; struct ovl_fh *fh; - char *n, *s; + int err; fh = ovl_encode_fh(origin, false); if (IS_ERR(fh)) return PTR_ERR(fh); - err = -ENOMEM; - n = kzalloc(fh->len * 2, GFP_KERNEL); - if (n) { - s = bin2hex(n, fh, fh->len); - *name = (struct qstr) QSTR_INIT(n, s - n); - err = 0; - } - kfree(fh); + err = ovl_get_index_name_fh(fh, name); + kfree(fh); return err; +} + +/* Lookup index by file handle for NFS export */ +struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh) +{ + struct dentry *index; + struct qstr name; + int err; + + err = ovl_get_index_name_fh(fh, &name); + if (err) + return ERR_PTR(err); + + index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); + kfree(name.name); + if (IS_ERR(index)) { + if (PTR_ERR(index) == -ENOENT) + index = NULL; + return index; + } + if (d_is_negative(index)) + err = 0; + else if (ovl_is_whiteout(index)) + err = -ESTALE; + else if (ovl_dentry_weird(index)) + err = -EIO; + else + return index; + + dput(index); + return ERR_PTR(err); } -static struct dentry *ovl_lookup_index(struct dentry *dentry, - struct dentry *upper, - struct dentry *origin) +struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify) { - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct dentry *index; struct inode *inode; struct qstr name; + bool is_dir = d_is_dir(origin); int err; err = ovl_get_index_name(origin, &name); @@ -518,8 +691,16 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, inode = d_inode(index); if (d_is_negative(index)) { goto out_dput; - } else if (upper && d_inode(upper) != inode) { - goto out_dput; + } else if (ovl_is_whiteout(index) && !verify) { + /* + * When index lookup is called with !verify for decoding an + * overlay file handle, a whiteout index implies that decode + * should treat file handle as stale and no need to print a + * warning about it. + */ + dput(index); + index = ERR_PTR(-ESTALE); + goto out; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { /* @@ -533,8 +714,25 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index, d_inode(index)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); goto fail; - } + } else if (is_dir && verify) { + if (!upper) { + pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n", + origin, index); + goto fail; + } + /* Verify that dir index 'upper' xattr points to upper dir */ + err = ovl_verify_upper(index, upper, false); + if (err) { + if (err == -ESTALE) { + pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n", + upper, origin, index); + } + goto fail; + } + } else if (upper && d_inode(upper) != inode) { + goto out_dput; + } out: kfree(name.name); return index; @@ -572,16 +770,25 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) return (idx < oe->numlower) ? idx + 1 : -1; } -static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) +/* Fix missing 'origin' xattr */ +static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) { - int i; + int err; - for (i = 0; i < ofs->numlower; i++) { - if (ofs->lower_layers[i].mnt == path->layer->mnt) - break; - } + if (ovl_check_origin_xattr(upper)) + return 0; + + err = ovl_want_write(dentry); + if (err) + return err; + + err = ovl_set_origin(dentry, lower, upper); + if (!err) + err = ovl_set_impure(dentry->d_parent, upper->d_parent); - return i; + ovl_drop_write(dentry); + return err; } struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, @@ -594,6 +801,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct ovl_path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; + struct dentry *origin = NULL; struct dentry *index = NULL; unsigned int ctr = 0; struct inode *inode = NULL; @@ -638,8 +846,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, * number - it's the same as if we held a reference * to a dentry in lower layer that was moved under us. */ - err = ovl_check_origin(upperdentry, roe->lowerstack, - roe->numlower, &stack, &ctr); + err = ovl_check_origin(ofs, upperdentry, &stack, &ctr); if (err) goto out_put_upper; } @@ -674,6 +881,34 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!this) continue; + /* + * If no origin fh is stored in upper of a merge dir, store fh + * of lower dir and set upper parent "impure". + */ + if (upperdentry && !ctr && !ofs->noxattr) { + err = ovl_fix_origin(dentry, this, upperdentry); + if (err) { + dput(this); + goto out_put; + } + } + + /* + * When "verify_lower" feature is enabled, do not merge with a + * lower dir that does not match a stored origin xattr. In any + * case, only verified origin is used for index lookup. + */ + if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) { + err = ovl_verify_origin(upperdentry, this, false); + if (err) { + dput(this); + break; + } + + /* Bless lower dir as verified origin */ + origin = this; + } + stack[ctr].dentry = this; stack[ctr].layer = lower.layer; ctr++; @@ -693,25 +928,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, */ err = -EPERM; if (d.redirect && !ofs->config.redirect_follow) { - pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry); + pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n", + dentry); goto out_put; } if (d.redirect && d.redirect[0] == '/' && poe != roe) { poe = roe; - /* Find the current layer on the root dentry */ - i = ovl_find_layer(ofs, &lower); - if (WARN_ON(i == ofs->numlower)) - break; + i = lower.layer->idx - 1; } } - /* Lookup index by lower inode and verify it matches upper inode */ - if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { - struct dentry *origin = stack[0].dentry; + /* + * Lookup index by lower inode and verify it matches upper inode. + * We only trust dir index if we verified that lower dir matches + * origin, otherwise dir index entries may be inconsistent and we + * ignore them. Always lookup index of non-dir and non-upper. + */ + if (ctr && (!upperdentry || !d.is_dir)) + origin = stack[0].dentry; - index = ovl_lookup_index(dentry, upperdentry, origin); + if (origin && ovl_indexdir(dentry->d_sb) && + (!d.is_dir || ovl_index_all(dentry->d_sb))) { + index = ovl_lookup_index(ofs, upperdentry, origin, true); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; @@ -724,17 +964,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (!oe) goto out_put; - oe->opaque = upperopaque; memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr); dentry->d_fsdata = oe; + if (upperopaque) + ovl_dentry_set_opaque(dentry); + if (upperdentry) ovl_dentry_set_upper_alias(dentry); else if (index) upperdentry = dget(index); if (upperdentry || ctr) { - inode = ovl_get_inode(dentry, upperdentry, index); + if (ctr) + origin = stack[0].dentry; + inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index, + ctr); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; @@ -748,9 +993,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, dput(index); kfree(stack); kfree(d.redirect); - d_add(dentry, inode); - - return NULL; + return d_splice_alias(inode, dentry); out_free_oe: dentry->d_fsdata = NULL; @@ -771,9 +1014,9 @@ out: bool ovl_lower_positive(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; struct ovl_entry *poe = dentry->d_parent->d_fsdata; const struct qstr *name = &dentry->d_name; + const struct cred *old_cred; unsigned int i; bool positive = false; bool done = false; @@ -783,12 +1026,13 @@ bool ovl_lower_positive(struct dentry *dentry) * whiteout. */ if (!dentry->d_inode) - return oe->opaque; + return ovl_dentry_is_opaque(dentry); /* Negative upper -> positive lower */ if (!ovl_dentry_upper(dentry)) return true; + old_cred = ovl_override_creds(dentry->d_sb); /* Positive upper -> have to look up lower to see whether it exists */ for (i = 0; !done && !positive && i < poe->numlower; i++) { struct dentry *this; @@ -818,6 +1062,7 @@ bool ovl_lower_positive(struct dentry *dentry) dput(this); } } + revert_creds(old_cred); return positive; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index b489099ccd49..0df25a9c94bd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -27,8 +27,9 @@ enum ovl_path_type { #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" #define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" +#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper" -enum ovl_flag { +enum ovl_inode_flag { /* Pure upper dir that may contain non pure upper entries */ OVL_IMPURE, /* Non-merge dir that may contain whiteout entries */ @@ -36,6 +37,11 @@ enum ovl_flag { OVL_INDEX, }; +enum ovl_entry_flag { + OVL_E_UPPER_ALIAS, + OVL_E_OPAQUE, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -62,6 +68,9 @@ enum ovl_flag { #error Endianness not defined #endif +/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */ +#define OVL_FILEID 0xfb + /* On-disk and in-memeory format for redirect by file handle */ struct ovl_fh { u8 version; /* 0 */ @@ -194,6 +203,8 @@ const struct cred *ovl_override_creds(struct super_block *sb); struct super_block *ovl_same_sb(struct super_block *sb); bool ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); +bool ovl_index_all(struct super_block *sb); +bool ovl_verify_lower(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); bool ovl_dentry_weird(struct dentry *dentry); @@ -210,6 +221,9 @@ struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); +void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); +void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry); +bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); @@ -238,6 +252,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode); bool ovl_test_flag(unsigned long flag, struct inode *inode); bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); +bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry, bool *locked); void ovl_nlink_end(struct dentry *dentry, bool locked); int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); @@ -249,15 +264,35 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) /* namei.c */ -int ovl_verify_origin(struct dentry *dentry, struct dentry *origin, - bool is_upper, bool set); -int ovl_verify_index(struct dentry *index, struct ovl_path *lower, - unsigned int numlower); +int ovl_check_fh_len(struct ovl_fh *fh, int fh_len); +struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt); +int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, + struct dentry *upperdentry, struct ovl_path **stackp); +int ovl_verify_set_fh(struct dentry *dentry, const char *name, + struct dentry *real, bool is_upper, bool set); +struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index); +int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name(struct dentry *origin, struct qstr *name); +struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); +struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, + struct dentry *origin, bool verify); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); -struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); +static inline int ovl_verify_origin(struct dentry *upper, + struct dentry *origin, bool set) +{ + return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set); +} + +static inline int ovl_verify_upper(struct dentry *index, + struct dentry *upper, bool set) +{ + return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set); +} + /* readdir.c */ extern const struct file_operations ovl_dir_operations; int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); @@ -267,8 +302,7 @@ void ovl_dir_cache_free(struct inode *inode); int ovl_check_d_type_supported(struct path *realpath); void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); -int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct ovl_path *lower, unsigned int numlower); +int ovl_indexdir_cleanup(struct ovl_fs *ofs); /* inode.c */ int ovl_set_nlink_upper(struct dentry *dentry); @@ -291,8 +325,11 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, - struct dentry *index); +struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, + bool is_upper); +struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, + struct dentry *lowerdentry, struct dentry *index, + unsigned int numlower); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; @@ -306,6 +343,8 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; struct dentry *ovl_lookup_temp(struct dentry *workdir); +int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir, + struct dentry *dentry); struct cattr { dev_t rdev; umode_t mode; @@ -321,4 +360,9 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper); +struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper); +int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper); + +/* export.c */ +extern const struct export_operations ovl_export_operations; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 9d0bc03bf6e4..bfef6edcc111 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -17,11 +17,14 @@ struct ovl_config { bool redirect_follow; const char *redirect_mode; bool index; + bool nfs_export; }; struct ovl_layer { struct vfsmount *mnt; dev_t pseudo_dev; + /* Index of this layer in fs root (upper == 0) */ + int idx; }; struct ovl_path { @@ -58,8 +61,7 @@ struct ovl_fs { struct ovl_entry { union { struct { - unsigned long has_upper; - bool opaque; + unsigned long flags; }; struct rcu_head rcu; }; @@ -69,6 +71,11 @@ struct ovl_entry { struct ovl_entry *ovl_alloc_entry(unsigned int numlower); +static inline struct ovl_entry *OVL_E(struct dentry *dentry) +{ + return (struct ovl_entry *) dentry->d_fsdata; +} + struct ovl_inode { struct ovl_dir_cache *cache; const char *redirect; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 8c98578d27a1..c11f5c0906c3 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -593,8 +593,15 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path) return ERR_PTR(res); } if (list_empty(&cache->entries)) { - /* Good oportunity to get rid of an unnecessary "impure" flag */ - ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE); + /* + * A good opportunity to get rid of an unneeded "impure" flag. + * Removing the "impure" xattr is best effort. + */ + if (!ovl_want_write(dentry)) { + ovl_do_removexattr(ovl_dentry_upper(dentry), + OVL_XATTR_IMPURE); + ovl_drop_write(dentry); + } ovl_clear_flag(OVL_IMPURE, d_inode(dentry)); kfree(cache); return NULL; @@ -769,10 +776,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct dentry *dentry = file->f_path.dentry; struct file *realfile = od->realfile; + /* Nothing to sync for lower */ + if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) + return 0; + /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + if (!od->is_upper) { struct inode *inode = file_inode(file); realfile = READ_ONCE(od->upperfile); @@ -858,8 +869,11 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) int err; struct ovl_cache_entry *p, *n; struct rb_root root = RB_ROOT; + const struct cred *old_cred; + old_cred = ovl_override_creds(dentry->d_sb); err = ovl_dir_read_merged(dentry, list, &root); + revert_creds(old_cred); if (err) return err; @@ -1016,13 +1030,13 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, } } -int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, - struct ovl_path *lower, unsigned int numlower) +int ovl_indexdir_cleanup(struct ovl_fs *ofs) { int err; + struct dentry *indexdir = ofs->indexdir; struct dentry *index = NULL; - struct inode *dir = dentry->d_inode; - struct path path = { .mnt = mnt, .dentry = dentry }; + struct inode *dir = indexdir->d_inode; + struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir }; LIST_HEAD(list); struct rb_root root = RB_ROOT; struct ovl_cache_entry *p; @@ -1046,19 +1060,40 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, if (p->len == 2 && p->name[1] == '.') continue; } - index = lookup_one_len(p->name, dentry, p->len); + index = lookup_one_len(p->name, indexdir, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); index = NULL; break; } - err = ovl_verify_index(index, lower, numlower); - /* Cleanup stale and orphan index entries */ - if (err && (err == -ESTALE || err == -ENOENT)) + err = ovl_verify_index(ofs, index); + if (!err) { + goto next; + } else if (err == -ESTALE) { + /* Cleanup stale index entries */ + err = ovl_cleanup(dir, index); + } else if (err != -ENOENT) { + /* + * Abort mount to avoid corrupting the index if + * an incompatible index entry was found or on out + * of memory. + */ + break; + } else if (ofs->config.nfs_export) { + /* + * Whiteout orphan index to block future open by + * handle after overlay nlink dropped to zero. + */ + err = ovl_cleanup_and_whiteout(indexdir, dir, index); + } else { + /* Cleanup orphan index entries */ err = ovl_cleanup(dir, index); + } + if (err) break; +next: dput(index); index = NULL; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 76440feb79f6..9ee37c76091d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -45,6 +45,11 @@ module_param_named(index, ovl_index_def, bool, 0644); MODULE_PARM_DESC(ovl_index_def, "Default to on or off for the inodes index feature"); +static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); +module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); +MODULE_PARM_DESC(ovl_nfs_export_def, + "Default to on or off for the NFS export feature"); + static void ovl_entry_stack_free(struct ovl_entry *oe) { unsigned int i; @@ -211,6 +216,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); + iput(oi->lower); kfree(oi->redirect); ovl_dir_cache_free(inode); mutex_destroy(&oi->lock); @@ -341,6 +347,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); if (ofs->config.index != ovl_index_def) seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); + if (ofs->config.nfs_export != ovl_nfs_export_def) + seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? + "on" : "off"); return 0; } @@ -373,6 +382,8 @@ enum { OPT_REDIRECT_DIR, OPT_INDEX_ON, OPT_INDEX_OFF, + OPT_NFS_EXPORT_ON, + OPT_NFS_EXPORT_OFF, OPT_ERR, }; @@ -384,6 +395,8 @@ static const match_table_t ovl_tokens = { {OPT_REDIRECT_DIR, "redirect_dir=%s"}, {OPT_INDEX_ON, "index=on"}, {OPT_INDEX_OFF, "index=off"}, + {OPT_NFS_EXPORT_ON, "nfs_export=on"}, + {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, {OPT_ERR, NULL} }; @@ -490,6 +503,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->index = false; break; + case OPT_NFS_EXPORT_ON: + config->nfs_export = true; + break; + + case OPT_NFS_EXPORT_OFF: + config->nfs_export = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -520,10 +541,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, bool retried = false; bool locked = false; - err = mnt_want_write(mnt); - if (err) - goto out_err; - inode_lock_nested(dir, I_MUTEX_PARENT); locked = true; @@ -588,7 +605,6 @@ retry: goto out_err; } out_unlock: - mnt_drop_write(mnt); if (locked) inode_unlock(dir); @@ -700,12 +716,16 @@ static int ovl_lower_dir(const char *name, struct path *path, *remote = true; /* - * The inodes index feature needs to encode and decode file - * handles, so it requires that all layers support them. + * The inodes index feature and NFS export need to encode and decode + * file handles, so they require that all layers support them. */ - if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { + if ((ofs->config.nfs_export || + (ofs->config.index && ofs->config.upperdir)) && + !ovl_can_decode_fh(path->dentry->d_sb)) { ofs->config.index = false; - pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); + ofs->config.nfs_export = false; + pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", + name); } return 0; @@ -929,12 +949,17 @@ out: static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) { + struct vfsmount *mnt = ofs->upper_mnt; struct dentry *temp; int err; + err = mnt_want_write(mnt); + if (err) + return err; + ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); if (!ofs->workdir) - return 0; + goto out; /* * Upper should support d_type, else whiteouts are visible. Given @@ -944,7 +969,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) */ err = ovl_check_d_type_supported(workpath); if (err < 0) - return err; + goto out; /* * We allowed this configuration and don't want to break users over @@ -967,7 +992,9 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); if (err) { ofs->noxattr = true; - pr_warn("overlayfs: upper fs does not support xattr.\n"); + ofs->config.index = false; + pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n"); + err = 0; } else { vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); } @@ -979,7 +1006,15 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); } - return 0; + /* NFS export of r/w mount depends on index */ + if (ofs->config.nfs_export && !ofs->config.index) { + pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; + } + +out: + mnt_drop_write(mnt); + return err; } static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) @@ -1026,11 +1061,16 @@ out: static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, struct path *upperpath) { + struct vfsmount *mnt = ofs->upper_mnt; int err; + err = mnt_want_write(mnt); + if (err) + return err; + /* Verify lower root is upper root origin */ err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, - false, true); + true); if (err) { pr_err("overlayfs: failed to verify upper root origin\n"); goto out; @@ -1038,23 +1078,33 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); if (ofs->indexdir) { - /* Verify upper root is index dir origin */ - err = ovl_verify_origin(ofs->indexdir, upperpath->dentry, - true, true); + /* + * Verify upper root is exclusively associated with index dir. + * Older kernels stored upper fh in "trusted.overlay.origin" + * xattr. If that xattr exists, verify that it is a match to + * upper dir file handle. In any case, verify or set xattr + * "trusted.overlay.upper" to indicate that index may have + * directory entries. + */ + if (ovl_check_origin_xattr(ofs->indexdir)) { + err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, + upperpath->dentry, true, false); + if (err) + pr_err("overlayfs: failed to verify index dir 'origin' xattr\n"); + } + err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); if (err) - pr_err("overlayfs: failed to verify index dir origin\n"); + pr_err("overlayfs: failed to verify index dir 'upper' xattr\n"); /* Cleanup bad/stale/orphan index entries */ if (!err) - err = ovl_indexdir_cleanup(ofs->indexdir, - ofs->upper_mnt, - oe->lowerstack, - oe->numlower); + err = ovl_indexdir_cleanup(ofs); } if (err || !ofs->indexdir) pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); out: + mnt_drop_write(mnt); return err; } @@ -1094,6 +1144,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, ofs->lower_layers[ofs->numlower].mnt = mnt; ofs->lower_layers[ofs->numlower].pseudo_dev = dev; + ofs->lower_layers[ofs->numlower].idx = i + 1; ofs->numlower++; /* Check if all lower layers are on same sb */ @@ -1131,6 +1182,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, } else if (!ofs->config.upperdir && stacklen == 1) { pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); goto out_err; + } else if (!ofs->config.upperdir && ofs->config.nfs_export && + ofs->config.redirect_follow) { + pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; } err = -ENOMEM; @@ -1207,6 +1262,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_err; ofs->config.index = ovl_index_def; + ofs->config.nfs_export = ovl_nfs_export_def; err = ovl_parse_opt((char *) data, &ofs->config); if (err) goto out_err; @@ -1257,13 +1313,26 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_free_oe; - if (!ofs->indexdir) + /* Force r/o mount with no index dir */ + if (!ofs->indexdir) { + dput(ofs->workdir); + ofs->workdir = NULL; sb->s_flags |= SB_RDONLY; + } + } - /* Show index=off/on in /proc/mounts for any of the reasons above */ - if (!ofs->indexdir) + /* Show index=off in /proc/mounts for forced r/o mount */ + if (!ofs->indexdir) { ofs->config.index = false; + if (ofs->upper_mnt && ofs->config.nfs_export) { + pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n"); + ofs->config.nfs_export = false; + } + } + + if (ofs->config.nfs_export) + sb->s_export_op = &ovl_export_operations; /* Never override disk quota limits or use reserved space */ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); @@ -1279,15 +1348,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!root_dentry) goto out_free_oe; + root_dentry->d_fsdata = oe; + mntput(upperpath.mnt); if (upperpath.dentry) { - oe->has_upper = true; + ovl_dentry_set_upper_alias(root_dentry); if (ovl_is_impuredir(upperpath.dentry)) ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); } - root_dentry->d_fsdata = oe; - /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); ovl_inode_init(d_inode(root_dentry), upperpath.dentry, diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index d6bb1c9f5e7a..930784a26623 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -63,6 +63,22 @@ struct dentry *ovl_indexdir(struct super_block *sb) return ofs->indexdir; } +/* Index all files on copy up. For now only enabled for NFS export */ +bool ovl_index_all(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->config.nfs_export && ofs->config.index; +} + +/* Verify lower origin on lookup. For now only enabled for NFS export */ +bool ovl_verify_lower(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->config.nfs_export && ofs->config.index; +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -194,10 +210,24 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache) OVL_I(inode)->cache = cache; } +void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry) +{ + set_bit(flag, &OVL_E(dentry)->flags); +} + +void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry) +{ + clear_bit(flag, &OVL_E(dentry)->flags); +} + +bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry) +{ + return test_bit(flag, &OVL_E(dentry)->flags); +} + bool ovl_dentry_is_opaque(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - return oe->opaque; + return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry); } bool ovl_dentry_is_whiteout(struct dentry *dentry) @@ -207,28 +237,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry) void ovl_dentry_set_opaque(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->opaque = true; + ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); } /* - * For hard links it's possible for ovl_dentry_upper() to return positive, while - * there's no actual upper alias for the inode. Copy up code needs to know - * about the existence of the upper alias, so it can't use ovl_dentry_upper(). + * For hard links and decoded file handles, it's possible for ovl_dentry_upper() + * to return positive, while there's no actual upper alias for the inode. + * Copy up code needs to know about the existence of the upper alias, so it + * can't use ovl_dentry_upper(). */ bool ovl_dentry_has_upper_alias(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->has_upper; + return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry); } void ovl_dentry_set_upper_alias(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->has_upper = true; + ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry); } bool ovl_redirect_dir(struct super_block *sb) @@ -257,7 +282,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, if (upperdentry) OVL_I(inode)->__upperdentry = upperdentry; if (lowerdentry) - OVL_I(inode)->lower = d_inode(lowerdentry); + OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); } @@ -273,7 +298,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) */ smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; - if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { + if (inode_unhashed(inode)) { inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); } @@ -447,10 +472,32 @@ void ovl_inuse_unlock(struct dentry *dentry) } } +/* + * Does this overlay dentry need to be indexed on copy up? + */ +bool ovl_need_index(struct dentry *dentry) +{ + struct dentry *lower = ovl_dentry_lower(dentry); + + if (!lower || !ovl_indexdir(dentry->d_sb)) + return false; + + /* Index all files for NFS export and consistency verification */ + if (ovl_index_all(dentry->d_sb)) + return true; + + /* Index only lower hardlinks on copy up */ + if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1) + return true; + + return false; +} + /* Caller must hold OVL_I(inode)->lock */ static void ovl_cleanup_index(struct dentry *dentry) { - struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; + struct dentry *indexdir = ovl_indexdir(dentry->d_sb); + struct inode *dir = indexdir->d_inode; struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *index = NULL; @@ -463,7 +510,7 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; inode = d_inode(upperdentry); - if (inode->i_nlink != 1) { + if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", upperdentry, inode->i_ino, inode->i_nlink); /* @@ -481,13 +528,17 @@ static void ovl_cleanup_index(struct dentry *dentry) } inode_lock_nested(dir, I_MUTEX_PARENT); - /* TODO: whiteout instead of cleanup to block future open by handle */ - index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len); + index = lookup_one_len(name.name, indexdir, name.len); err = PTR_ERR(index); - if (!IS_ERR(index)) - err = ovl_cleanup(dir, index); - else + if (IS_ERR(index)) { index = NULL; + } else if (ovl_index_all(dentry->d_sb)) { + /* Whiteout orphan index to block future open by handle */ + err = ovl_cleanup_and_whiteout(indexdir, dir, index); + } else { + /* Cleanup orphan index entries */ + err = ovl_cleanup(dir, index); + } inode_unlock(dir); if (err) @@ -512,16 +563,16 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) const struct cred *old_cred; int err; - if (!d_inode(dentry) || d_is_dir(dentry)) + if (!d_inode(dentry)) return 0; /* * With inodes index is enabled, we store the union overlay nlink - * in an xattr on the index inode. When whiting out lower hardlinks + * in an xattr on the index inode. When whiting out an indexed lower, * we need to decrement the overlay persistent nlink, but before the * first copy up, we have no upper index inode to store the xattr. * - * As a workaround, before whiteout/rename over of a lower hardlink, + * As a workaround, before whiteout/rename over an indexed lower, * copy up to create the upper index. Creating the upper index will * initialize the overlay nlink, so it could be dropped if unlink * or rename succeeds. @@ -529,8 +580,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) * TODO: implement metadata only index copy up when called with * ovl_copy_up_flags(dentry, O_PATH). */ - if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) && - d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) { + if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) { err = ovl_copy_up(dentry); if (err) return err; @@ -540,7 +590,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked) if (err) return err; - if (!ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry))) goto out; old_cred = ovl_override_creds(dentry->d_sb); diff --git a/fs/pipe.c b/fs/pipe.c index a449ca0ec0c6..0913aed7fd0d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -35,11 +35,6 @@ */ unsigned int pipe_max_size = 1048576; -/* - * Minimum pipe size, as required by POSIX - */ -unsigned int pipe_min_size = PAGE_SIZE; - /* Maximum allocatable pages per user. Hard limit is unset by default, soft * matches default values. */ @@ -610,12 +605,21 @@ static unsigned long account_pipe_buffers(struct user_struct *user, static bool too_many_pipe_buffers_soft(unsigned long user_bufs) { - return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft; + unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft); + + return soft_limit && user_bufs > soft_limit; } static bool too_many_pipe_buffers_hard(unsigned long user_bufs) { - return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard; + unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard); + + return hard_limit && user_bufs > hard_limit; +} + +static bool is_unprivileged_user(void) +{ + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); } struct pipe_inode_info *alloc_pipe_info(void) @@ -624,22 +628,23 @@ struct pipe_inode_info *alloc_pipe_info(void) unsigned long pipe_bufs = PIPE_DEF_BUFFERS; struct user_struct *user = get_current_user(); unsigned long user_bufs; + unsigned int max_size = READ_ONCE(pipe_max_size); pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); if (pipe == NULL) goto out_free_uid; - if (pipe_bufs * PAGE_SIZE > pipe_max_size && !capable(CAP_SYS_RESOURCE)) - pipe_bufs = pipe_max_size >> PAGE_SHIFT; + if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE)) + pipe_bufs = max_size >> PAGE_SHIFT; user_bufs = account_pipe_buffers(user, 0, pipe_bufs); - if (too_many_pipe_buffers_soft(user_bufs)) { + if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, 1); pipe_bufs = 1; } - if (too_many_pipe_buffers_hard(user_bufs)) + if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), @@ -1020,18 +1025,16 @@ const struct file_operations pipefifo_fops = { * Currently we rely on the pipe array holding a power-of-2 number * of pages. Returns 0 on error. */ -unsigned int round_pipe_size(unsigned int size) +unsigned int round_pipe_size(unsigned long size) { - unsigned long nr_pages; - - if (size < pipe_min_size) - size = pipe_min_size; - - nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (nr_pages == 0) + if (size > (1U << 31)) return 0; - return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; + /* Minimum pipe size, as required by POSIX */ + if (size < PAGE_SIZE) + return PAGE_SIZE; + + return roundup_pow_of_two(size); } /* @@ -1046,8 +1049,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) long ret = 0; size = round_pipe_size(arg); - if (size == 0) - return -EINVAL; nr_pages = size >> PAGE_SHIFT; if (!nr_pages) @@ -1069,7 +1070,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) if (nr_pages > pipe->buffers && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { + is_unprivileged_user()) { ret = -EPERM; goto out_revert_acct; } @@ -1125,16 +1126,6 @@ out_revert_acct: } /* - * This should work even if CONFIG_PROC_FS isn't set, as proc_dopipe_max_size - * will return an error. - */ -int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, - size_t *lenp, loff_t *ppos) -{ - return proc_dopipe_max_size(table, write, buf, lenp, ppos); -} - -/* * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a * pipe. diff --git a/fs/proc/array.c b/fs/proc/array.c index d67a72dcb92c..598803576e4c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -736,16 +736,10 @@ static int children_seq_open(struct inode *inode, struct file *file) return ret; } -int children_seq_release(struct inode *inode, struct file *file) -{ - seq_release(inode, file); - return 0; -} - const struct file_operations proc_tid_children_operations = { .open = children_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = children_seq_release, + .release = seq_release, }; #endif /* CONFIG_PROC_CHILDREN */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 60316b52d659..9298324325ed 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -75,6 +75,7 @@ #include <linux/ptrace.h> #include <linux/tracehook.h> #include <linux/printk.h> +#include <linux/cache.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> @@ -100,6 +101,8 @@ #include "internal.h" #include "fd.h" +#include "../../lib/kstrtox.h" + /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during @@ -110,8 +113,8 @@ * in /proc for a task before it execs a suid executable. */ -static u8 nlink_tid; -static u8 nlink_tgid; +static u8 nlink_tid __ro_after_init; +static u8 nlink_tgid __ro_after_init; struct pid_entry { const char *name; @@ -1370,7 +1373,7 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; - WRITE_ONCE(task->fail_nth, n); + task->fail_nth = n; put_task_struct(task); return count; @@ -1386,8 +1389,7 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; - len = snprintf(numbuf, sizeof(numbuf), "%u\n", - READ_ONCE(task->fail_nth)); + len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); len = simple_read_from_buffer(buf, count, ppos, numbuf, len); put_task_struct(task); @@ -1907,8 +1909,33 @@ end_instantiate: static int dname_to_vma_addr(struct dentry *dentry, unsigned long *start, unsigned long *end) { - if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) + const char *str = dentry->d_name.name; + unsigned long long sval, eval; + unsigned int len; + + len = _parse_integer(str, 16, &sval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (sval != (unsigned long)sval) return -EINVAL; + str += len; + + if (*str != '-') + return -EINVAL; + str++; + + len = _parse_integer(str, 16, &eval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (eval != (unsigned long)eval) + return -EINVAL; + str += len; + + if (*str != '\0') + return -EINVAL; + + *start = sval; + *end = eval; return 0; } @@ -2000,9 +2027,9 @@ out: } struct map_files_info { + unsigned long start; + unsigned long end; fmode_t mode; - unsigned int len; - unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; /* @@ -2172,10 +2199,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) if (++pos <= ctx->pos) continue; + info.start = vma->vm_start; + info.end = vma->vm_end; info.mode = vma->vm_file->f_mode; - info.len = snprintf(info.name, - sizeof(info.name), "%lx-%lx", - vma->vm_start, vma->vm_end); if (flex_array_put(fa, i++, &info, GFP_KERNEL)) BUG(); } @@ -2183,9 +2209,13 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) up_read(&mm->mmap_sem); for (i = 0; i < nr_files; i++) { + char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ + unsigned int len; + p = flex_array_get(fa, i); + len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); if (!proc_fill_cache(file, ctx, - p->name, p->len, + buf, len, proc_map_files_instantiate, task, (void *)(unsigned long)p->mode)) @@ -3018,11 +3048,11 @@ static const struct inode_operations proc_tgid_base_inode_operations = { static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) { struct dentry *dentry, *leader, *dir; - char buf[PROC_NUMBUF]; + char buf[10 + 1]; struct qstr name; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", pid); + name.len = snprintf(buf, sizeof(buf), "%u", pid); /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { @@ -3034,7 +3064,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) return; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", tgid); + name.len = snprintf(buf, sizeof(buf), "%u", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); if (!leader) goto out; @@ -3046,7 +3076,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) goto out_put_leader; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", pid); + name.len = snprintf(buf, sizeof(buf), "%u", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { d_invalidate(dentry); @@ -3225,14 +3255,14 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; cond_resched(); if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) continue; - len = snprintf(name, sizeof(name), "%d", iter.tgid); + len = snprintf(name, sizeof(name), "%u", iter.tgid); ctx->pos = iter.tgid + TGID_OFFSET; if (!proc_fill_cache(file, ctx, name, len, proc_pid_instantiate, iter.task, NULL)) { @@ -3560,10 +3590,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; tid = task_pid_nr_ns(task, ns); - len = snprintf(name, sizeof(name), "%d", tid); + len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index 290ba85cb900..a8ac48aebd59 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -55,8 +55,7 @@ static int show_console_dev(struct seq_file *m, void *v) if (dev) seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); - seq_printf(m, "\n"); - + seq_putc(m, '\n'); return 0; } diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 96fc70225e54..6b80cd1e419a 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -236,7 +236,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, for (fd = ctx->pos - 2; fd < files_fdtable(files)->max_fds; fd++, ctx->pos++) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; if (!fcheck_files(files, fd)) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 793a67574668..5d709fa8f3a2 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -28,7 +28,7 @@ static DEFINE_RWLOCK(proc_subdir_lock); -static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) +static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) { if (len < de->namelen) return -1; @@ -60,7 +60,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, struct proc_dir_entry *de = rb_entry(node, struct proc_dir_entry, subdir_node); - int result = proc_match(len, name, de); + int result = proc_match(name, de, len); if (result < 0) node = node->rb_left; @@ -84,7 +84,7 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, struct proc_dir_entry *this = rb_entry(*new, struct proc_dir_entry, subdir_node); - int result = proc_match(de->namelen, de->name, this); + int result = proc_match(de->name, this, de->namelen); parent = *new; if (result < 0) @@ -211,8 +211,8 @@ void proc_free_inum(unsigned int inum) * Don't create negative dentries here, return -ENOENT by hand * instead. */ -struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, - struct dentry *dentry) +struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, + struct proc_dir_entry *de) { struct inode *inode; @@ -235,7 +235,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - return proc_lookup_de(PDE(dir), dir, dentry); + return proc_lookup_de(dir, dentry, PDE(dir)); } /* @@ -247,8 +247,8 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, * value of the readdir() call, as long as it's non-negative * for success.. */ -int proc_readdir_de(struct proc_dir_entry *de, struct file *file, - struct dir_context *ctx) +int proc_readdir_de(struct file *file, struct dir_context *ctx, + struct proc_dir_entry *de) { int i; @@ -292,7 +292,7 @@ int proc_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - return proc_readdir_de(PDE(inode), file, ctx); + return proc_readdir_de(file, ctx, PDE(inode)); } /* diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8dacaabb9f37..6e8724958116 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -5,6 +5,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ +#include <linux/cache.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/kernel.h> @@ -52,7 +53,7 @@ static void proc_evict_inode(struct inode *inode) } } -static struct kmem_cache * proc_inode_cachep; +static struct kmem_cache *proc_inode_cachep __ro_after_init; static struct inode *proc_alloc_inode(struct super_block *sb) { @@ -128,12 +129,12 @@ enum {BIAS = -1U<<31}; static inline int use_pde(struct proc_dir_entry *pde) { - return atomic_inc_unless_negative(&pde->in_use); + return likely(atomic_inc_unless_negative(&pde->in_use)); } static void unuse_pde(struct proc_dir_entry *pde) { - if (atomic_dec_return(&pde->in_use) == BIAS) + if (unlikely(atomic_dec_return(&pde->in_use) == BIAS)) complete(pde->pde_unload_completion); } @@ -166,7 +167,7 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) spin_lock(&pde->pde_unload_lock); /* After ->release. */ list_del(&pdeo->lh); - if (pdeo->c) + if (unlikely(pdeo->c)) complete(pdeo->c); kfree(pdeo); } @@ -420,7 +421,7 @@ static const char *proc_get_link(struct dentry *dentry, struct delayed_call *done) { struct proc_dir_entry *pde = PDE(inode); - if (unlikely(!use_pde(pde))) + if (!use_pde(pde)) return ERR_PTR(-EINVAL); set_delayed_call(done, proc_put_link, pde); return pde->data; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 4a67188c8d74..d697c8ab0a14 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -31,24 +31,28 @@ struct mempolicy; * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { + /* + * number of callers into module in progress; + * negative -> it's going away RSN + */ + atomic_t in_use; + atomic_t count; /* use count */ + struct list_head pde_openers; /* who did ->open, but not ->release */ + /* protects ->pde_openers and all struct pde_opener instances */ + spinlock_t pde_unload_lock; + struct completion *pde_unload_completion; + const struct inode_operations *proc_iops; + const struct file_operations *proc_fops; + void *data; unsigned int low_ino; - umode_t mode; nlink_t nlink; kuid_t uid; kgid_t gid; loff_t size; - const struct inode_operations *proc_iops; - const struct file_operations *proc_fops; struct proc_dir_entry *parent; struct rb_root_cached subdir; struct rb_node subdir_node; - void *data; - atomic_t count; /* use count */ - atomic_t in_use; /* number of callers into module in progress; */ - /* negative -> it's going away RSN */ - struct completion *pde_unload_completion; - struct list_head pde_openers; /* who did ->open, but not ->release */ - spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + umode_t mode; u8 namelen; char name[]; } __randomize_layout; @@ -149,10 +153,9 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i * generic.c */ extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); -extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, - struct dentry *); +struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); -extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *); +int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *); static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) { diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4bc85cb8be6a..e8a93bc8285d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,23 +512,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return -EFAULT; } else { if (kern_addr_valid(start)) { - unsigned long n; - /* * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - memcpy(buf, (char *) start, tsz); - n = copy_to_user(buffer, buf, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) + if (probe_kernel_read(buf, (void *) start, tsz)) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else { + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; } } else { diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a2bf369c923d..68c06ae7888c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -135,7 +135,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir, de = ERR_PTR(-ENOENT); net = get_proc_task_net(dir); if (net != NULL) { - de = proc_lookup_de(net->proc_net, dir, dentry); + de = proc_lookup_de(dir, dentry, net->proc_net); put_net(net); } return de; @@ -172,7 +172,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) ret = -EINVAL; net = get_proc_task_net(file_inode(file)); if (net != NULL) { - ret = proc_readdir_de(net->proc_net, file, ctx); + ret = proc_readdir_de(file, ctx, net->proc_net); put_net(net); } return ret; diff --git a/fs/proc/self.c b/fs/proc/self.c index 31326bb23b8b..4d7d061696b3 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/cache.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/pid_namespace.h> @@ -17,11 +18,11 @@ static const char *proc_self_get_link(struct dentry *dentry, if (!tgid) return ERR_PTR(-ENOENT); - /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC); + /* max length of unsigned int in decimal + NULL term */ + name = kmalloc(10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); - sprintf(name, "%d", tgid); + sprintf(name, "%u", tgid); set_delayed_call(done, kfree_link, name); return name; } @@ -30,7 +31,7 @@ static const struct inode_operations proc_self_inode_operations = { .get_link = proc_self_get_link, }; -static unsigned self_inum; +static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 339e4c1c044d..ec6d2983a5cb 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -47,8 +47,11 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) if (hiwater_rss < mm->hiwater_rss) hiwater_rss = mm->hiwater_rss; - text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; - lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; + /* split executable areas between text and lib */ + text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK); + text = min(text, mm->exec_vm << PAGE_SHIFT); + lib = (mm->exec_vm << PAGE_SHIFT) - text; + swap = get_mm_counter(mm, MM_SWAPENTS); seq_printf(m, "VmPeak:\t%8lu kB\n" @@ -76,7 +79,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) file << (PAGE_SHIFT-10), shmem << (PAGE_SHIFT-10), mm->data_vm << (PAGE_SHIFT-10), - mm->stack_vm << (PAGE_SHIFT-10), text, lib, + mm->stack_vm << (PAGE_SHIFT-10), + text >> 10, + lib >> 10, mm_pgtables_bytes(mm) >> 10, swap << (PAGE_SHIFT-10)); hugetlb_report_usage(m, mm); @@ -977,14 +982,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = *pmdp; + pmd_t old, pmd = *pmdp; if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ - pmdp_invalidate(vma, addr, pmdp); - if (pmd_dirty(*pmdp)) + old = pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(old)) pmd = pmd_mkdirty(pmd); - if (pmd_young(*pmdp)) + if (pmd_young(old)) pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index b813e3b529f2..9d2efaca499f 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/cache.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/pid_namespace.h> @@ -18,11 +19,10 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, if (!pid) return ERR_PTR(-ENOENT); - name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, - dentry ? GFP_KERNEL : GFP_ATOMIC); + name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); - sprintf(name, "%d/task/%d", tgid, pid); + sprintf(name, "%u/task/%u", tgid, pid); set_delayed_call(done, kfree_link, name); return name; } @@ -31,7 +31,7 @@ static const struct inode_operations proc_thread_self_inode_operations = { .get_link = proc_thread_self_get_link, }; -static unsigned thread_self_inum; +static unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 885d445afa0d..a45f0af22a60 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1178,18 +1178,16 @@ fs_initcall(vmcore_init); /* Cleanup function for vmcore module. */ void vmcore_cleanup(void) { - struct list_head *pos, *next; - if (proc_vmcore) { proc_remove(proc_vmcore); proc_vmcore = NULL; } /* clear the vmcore list. */ - list_for_each_safe(pos, next, &vmcore_list) { + while (!list_empty(&vmcore_list)) { struct vmcore *m; - m = list_entry(pos, struct vmcore, list); + m = list_first_entry(&vmcore_list, struct vmcore, list); list_del(&m->list); kfree(m); } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 691032107f8c..c3129b131e4d 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -41,7 +41,6 @@ #include <linux/timer.h> #include <linux/slab.h> #include <linux/uaccess.h> -#include <linux/hardirq.h> #include <linux/jiffies.h> #include <linux/workqueue.h> diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig index 7cd46666ba2c..86e71c0caf48 100644 --- a/fs/reiserfs/Kconfig +++ b/fs/reiserfs/Kconfig @@ -57,8 +57,7 @@ config REISERFS_FS_XATTR depends on REISERFS_FS help Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). + the kernel or by users (see the attr(5) manual page for details). If unsure, say N. @@ -70,9 +69,6 @@ config REISERFS_FS_POSIX_ACL Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N config REISERFS_FS_SECURITY diff --git a/fs/seq_file.c b/fs/seq_file.c index 4be761c1a03d..eea09f6d8830 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -181,8 +181,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) * if request is to read from zero offset, reset iterator to first * record as it might have been already advanced by previous requests */ - if (*ppos == 0) + if (*ppos == 0) { m->index = 0; + m->version = 0; + m->count = 0; + } /* Don't assume *ppos is where we left it */ if (unlikely(*ppos != m->read_pos)) { diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 2b67bda2021b..58eba92a0e41 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/dir.c - sysfs core and dir operation implementation * @@ -5,12 +6,10 @@ * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * - * This file is released under the GPLv2. - * * Please see Documentation/filesystems/sysfs.txt for more information. */ -#undef DEBUG +#define pr_fmt(fmt) "sysfs: " fmt #include <linux/fs.h> #include <linux/kobject.h> @@ -27,8 +26,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) if (buf) kernfs_path(parent, buf, PATH_MAX); - WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n", - buf, name); + pr_warn("cannot create duplicate filename '%s/%s'\n", buf, name); + dump_stack(); kfree(buf); } diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 39c75a86c67f..5c13f29bfcdb 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/file.c - sysfs regular (text) file implementation * @@ -5,14 +6,11 @@ * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * - * This file is released under the GPLv2. - * * Please see Documentation/filesystems/sysfs.txt for more information. */ #include <linux/module.h> #include <linux/kobject.h> -#include <linux/kallsyms.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/mutex.h> @@ -70,8 +68,8 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v) * indicate truncated result or overflow in normal use cases. */ if (count >= (ssize_t)PAGE_SIZE) { - print_symbol("fill_read_buffer: %s returned bad count\n", - (unsigned long)ops->show); + printk("fill_read_buffer: %pS returned bad count\n", + ops->show); /* Try to struggle along */ count = PAGE_SIZE - 1; } diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index ac2de0ed69ad..4802ec0e1e3a 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/group.c - Operations for adding/removing multiple files at once. * @@ -5,9 +6,6 @@ * Copyright (c) 2003 Open Source Development Lab * Copyright (c) 2013 Greg Kroah-Hartman * Copyright (c) 2013 The Linux Foundation - * - * This file is released undert the GPL v2. - * */ #include <linux/kobject.h> @@ -406,6 +404,6 @@ int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, kernfs_put(entry); kernfs_put(target); - return IS_ERR(link) ? PTR_ERR(link) : 0; + return PTR_ERR_OR_ZERO(link); } EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index fb49510c5dcf..b428d317ae92 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/symlink.c - operations for initializing and mounting sysfs * @@ -5,13 +6,9 @@ * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * - * This file is released under the GPLv2. - * * Please see Documentation/filesystems/sysfs.txt for more information. */ -#define DEBUG - #include <linux/fs.h> #include <linux/magic.h> #include <linux/mount.h> diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index aecb15f84557..8664db25a9a6 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/symlink.c - sysfs symlink implementation * @@ -5,8 +6,6 @@ * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * - * This file is released under the GPLv2. - * * Please see Documentation/filesystems/sysfs.txt for more information. */ diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 0e2f1cccb812..d098e015fcc9 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -1,11 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/sysfs/sysfs.h - sysfs internal header file * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> - * - * This file is released under the GPLv2. */ #ifndef __SYSFS_INTERNAL_H diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index a2ea4856e67b..9d7fb88e172e 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1138,38 +1138,24 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, struct ubifs_info *c = dir->i_sb->s_fs_info; int err, len = strlen(symname); int sz_change = CALC_DENT_SIZE(len); - struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); - struct fscrypt_symlink_data *sd = NULL; + struct fscrypt_str disk_link; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, .new_ino_d = ALIGN(len, 8), .dirtied_ino = 1 }; struct fscrypt_name nm; - if (ubifs_crypt_is_encrypted(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - goto out_budg; - - if (!fscrypt_has_encryption_key(dir)) { - err = -EPERM; - goto out_budg; - } + dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, + symname, dir->i_ino); - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - } + err = fscrypt_prepare_symlink(dir, symname, len, UBIFS_MAX_INO_DATA, + &disk_link); + if (err) + return err; /* * Budget request settings: new inode, new direntry and changing parent * directory inode. */ - - dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, - symname, dir->i_ino); - - if (disk_link.len > UBIFS_MAX_INO_DATA) - return -ENAMETOOLONG; - err = ubifs_budget_space(c, &req); if (err) return err; @@ -1191,38 +1177,20 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, goto out_inode; } - if (ubifs_crypt_is_encrypted(dir)) { - struct qstr istr = QSTR_INIT(symname, len); - struct fscrypt_str ostr; - - sd = kzalloc(disk_link.len, GFP_NOFS); - if (!sd) { - err = -ENOMEM; - goto out_inode; - } - - ostr.name = sd->encrypted_path; - ostr.len = disk_link.len; - - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err) { - kfree(sd); + if (IS_ENCRYPTED(inode)) { + disk_link.name = ui->data; /* encrypt directly into ui->data */ + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); + if (err) goto out_inode; - } - - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *)sd; } else { + memcpy(ui->data, disk_link.name, disk_link.len); inode->i_link = ui->data; } - memcpy(ui->data, disk_link.name, disk_link.len); - ((char *)ui->data)[disk_link.len - 1] = '\0'; - /* * The terminating zero byte is not written to the flash media and it * is put just to make later in-memory string processing simpler. Thus, - * data length is @len, not @len + %1. + * data length is @disk_link.len - 1, not @disk_link.len. */ ui->data_len = disk_link.len - 1; inode->i_size = ubifs_inode(inode)->ui_size = disk_link.len - 1; @@ -1240,11 +1208,10 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, goto out_cancel; mutex_unlock(&dir_ui->ui_mutex); - ubifs_release_budget(c, &req); insert_inode_hash(inode); d_instantiate(dentry, inode); - fscrypt_free_filename(&nm); - return 0; + err = 0; + goto out_fname; out_cancel: dir->i_size -= sz_change; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 9fe194a4fa9b..cf348ba99238 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1629,49 +1629,17 @@ static const char *ubifs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - int err; - struct fscrypt_symlink_data *sd; struct ubifs_inode *ui = ubifs_inode(inode); - struct fscrypt_str cstr; - struct fscrypt_str pstr; - if (!ubifs_crypt_is_encrypted(inode)) + if (!IS_ENCRYPTED(inode)) return ui->data; if (!dentry) return ERR_PTR(-ECHILD); - err = fscrypt_get_encryption_info(inode); - if (err) - return ERR_PTR(err); - - sd = (struct fscrypt_symlink_data *)ui->data; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - - if (cstr.len == 0) - return ERR_PTR(-ENOENT); - - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > ui->data_len) - return ERR_PTR(-EIO); - - err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (err) - return ERR_PTR(err); - - err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (err) { - fscrypt_fname_free_buffer(&pstr); - return ERR_PTR(err); - } - - pstr.name[pstr.len] = '\0'; - - set_delayed_call(done, kfree_link, pstr.name); - return pstr.name; + return fscrypt_get_symlink(inode, ui->data, ui->data_len, done); } - const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage, diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 0beb285b143d..b16ef162344a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -379,9 +379,7 @@ out: } done: clear_inode(inode); -#ifdef CONFIG_UBIFS_FS_ENCRYPTION - fscrypt_put_encryption_info(inode, NULL); -#endif + fscrypt_put_encryption_info(inode); } static void ubifs_dirty_inode(struct inode *inode, int flags) diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 14626b34d13e..0927a4b2ecaf 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -62,6 +62,11 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src) dest->tv_sec -= offset * 60; dest->tv_nsec = 1000 * (src.centiseconds * 10000 + src.hundredsOfMicroseconds * 100 + src.microseconds); + /* + * Sanitize nanosecond field since reportedly some filesystems are + * recorded with bogus sub-second values. + */ + dest->tv_nsec %= NSEC_PER_SEC; return dest; } diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 50dfce000864..b721d0bda5e5 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -429,7 +429,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); - bool need_revalidate = inode_cmp_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, file->f_version); unsigned flags = UFS_SB(sb)->s_flags; UFSD("BEGIN\n"); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b6ba80e05bff..8254b8b3690f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1467,11 +1467,14 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", - sizeof(struct ufs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ufs_inode_cachep = kmem_cache_create_usercopy("ufs_inode_cache", + sizeof(struct ufs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ufs_inode_info, i_u1.i_symlink), + sizeof_field(struct ufs_inode_info, + i_u1.i_symlink), + init_once); if (ufs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 743eaa646898..87a13a7c8270 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -294,10 +294,13 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, * pmd_trans_unstable) of the pmd. */ _pmd = READ_ONCE(*pmd); - if (!pmd_present(_pmd)) + if (pmd_none(_pmd)) goto out; ret = false; + if (!pmd_present(_pmd)) + goto out; + if (pmd_trans_huge(_pmd)) goto out; @@ -985,24 +988,14 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *ctx, struct uffd_msg *msg) { int fd; - struct file *file; - unsigned int flags = new->flags & UFFD_SHARED_FCNTL_FLAGS; - fd = get_unused_fd_flags(flags); + fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, new, + O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS)); if (fd < 0) return fd; - file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, new, - O_RDWR | flags); - if (IS_ERR(file)) { - put_unused_fd(fd); - return PTR_ERR(file); - } - - fd_install(fd, file); msg->arg.reserved.reserved1 = 0; msg->arg.fork.ufd = fd; - return 0; } @@ -1884,24 +1877,10 @@ static void init_once_userfaultfd_ctx(void *mem) seqcount_init(&ctx->refile_seq); } -/** - * userfaultfd_file_create - Creates a userfaultfd file pointer. - * @flags: Flags for the userfaultfd file. - * - * This function creates a userfaultfd file pointer, w/out installing - * it into the fd table. This is useful when the userfaultfd file is - * used during the initialization of data structures that require - * extra setup after the userfaultfd creation. So the userfaultfd - * creation is split into the file pointer creation phase, and the - * file descriptor installation phase. In this way races with - * userspace closing the newly installed file descriptor can be - * avoided. Returns a userfaultfd file pointer, or a proper error - * pointer. - */ -static struct file *userfaultfd_file_create(int flags) +SYSCALL_DEFINE1(userfaultfd, int, flags) { - struct file *file; struct userfaultfd_ctx *ctx; + int fd; BUG_ON(!current->mm); @@ -1909,14 +1888,12 @@ static struct file *userfaultfd_file_create(int flags) BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK); - file = ERR_PTR(-EINVAL); if (flags & ~UFFD_SHARED_FCNTL_FLAGS) - goto out; + return -EINVAL; - file = ERR_PTR(-ENOMEM); ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL); if (!ctx) - goto out; + return -ENOMEM; atomic_set(&ctx->refcount, 1); ctx->flags = flags; @@ -1927,39 +1904,13 @@ static struct file *userfaultfd_file_create(int flags) /* prevent the mm struct to be freed */ mmgrab(ctx->mm); - file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, - O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); - if (IS_ERR(file)) { + fd = anon_inode_getfd("[userfaultfd]", &userfaultfd_fops, ctx, + O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); + if (fd < 0) { mmdrop(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); } -out: - return file; -} - -SYSCALL_DEFINE1(userfaultfd, int, flags) -{ - int fd, error; - struct file *file; - - error = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS); - if (error < 0) - return error; - fd = error; - - file = userfaultfd_file_create(flags); - if (IS_ERR(file)) { - error = PTR_ERR(file); - goto err_put_unused_fd; - } - fd_install(fd, file); - return fd; - -err_put_unused_fd: - put_unused_fd(fd); - - return error; } static int __init userfaultfd_init(void) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index f42fcf1b5465..46bcf0e649f5 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -48,9 +48,6 @@ config XFS_POSIX_ACL POSIX Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N. config XFS_RT diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 46af6aa60a8e..a55f7a45fa78 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -118,8 +118,8 @@ xfs_mount_validate_sb( bool check_inprogress, bool check_version) { - u32 agcount = 0; - u32 rem; + uint32_t agcount = 0; + uint32_t rem; if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8601275cc5e6..9ea08326f876 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1048,7 +1048,7 @@ __xfs_filemap_fault( if (IS_DAX(inode)) { pfn_t pfn; - ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); } else { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f3e0001f9992..7aba628dc527 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1666,9 +1666,12 @@ xfs_fs_fill_super( "DAX unsupported by block device. Turning off DAX."); mp->m_flags &= ~XFS_MOUNT_DAX; } - if (xfs_sb_version_hasreflink(&mp->m_sb)) + if (xfs_sb_version_hasreflink(&mp->m_sb)) { xfs_alert(mp, "DAX and reflink cannot be used together!"); + error = -EINVAL; + goto out_filestream_unmount; + } } if (mp->m_flags & XFS_MOUNT_DISCARD) { @@ -1681,15 +1684,18 @@ xfs_fs_fill_super( } } - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { - if (mp->m_sb.sb_rblocks) { - xfs_alert(mp, - "EXPERIMENTAL reverse mapping btree not compatible with realtime device!"); - error = -EINVAL; - goto out_filestream_unmount; - } + if (xfs_sb_version_hasreflink(&mp->m_sb) && mp->m_sb.sb_rblocks) { + xfs_alert(mp, + "reflink not compatible with realtime device!"); + error = -EINVAL; + goto out_filestream_unmount; + } + + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) { xfs_alert(mp, - "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); + "reverse mapping btree not compatible with realtime device!"); + error = -EINVAL; + goto out_filestream_unmount; } error = xfs_mountfs(mp); diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index fcc5dfc70aa0..8cee8e8050e3 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -44,6 +44,12 @@ extern void xfs_qm_exit(void); # define XFS_REALTIME_STRING #endif +#ifdef CONFIG_XFS_ONLINE_SCRUB +# define XFS_SCRUB_STRING "scrub, " +#else +# define XFS_SCRUB_STRING +#endif + #ifdef DEBUG # define XFS_DBG_STRING "debug" #else @@ -54,6 +60,7 @@ extern void xfs_qm_exit(void); #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ + XFS_SCRUB_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; |