summaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-26 18:49:48 +0100
committerIngo Molnar <mingo@elte.hu>2009-03-26 18:49:48 +0100
commit18ffa418aead13c56515ac74cd26105102128aca (patch)
tree2096ea8db3b2594bd25ad39a70edc691219f669b /fs/ocfs2
parentab76f3d771590d5c89faa3219559c5d3fc0ce0c2 (diff)
parent8e0ee43bc2c3e19db56a4adaa9a9b04ce885cd84 (diff)
downloadlinux-18ffa418aead13c56515ac74cd26105102128aca.tar.bz2
Merge commit 'v2.6.29' into x86/setup-lzma
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/Kconfig85
-rw-r--r--fs/ocfs2/alloc.c33
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/dcache.c42
-rw-r--r--fs/ocfs2/dcache.h9
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/dlm/dlmthread.c3
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/dlmglue.c15
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/ocfs2.h9
-rw-r--r--fs/ocfs2/ocfs2_fs.h6
-rw-r--r--fs/ocfs2/quota_global.c173
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/ocfs2/xattr.c74
16 files changed, 269 insertions, 223 deletions
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
new file mode 100644
index 000000000000..701b7a3a872e
--- /dev/null
+++ b/fs/ocfs2/Kconfig
@@ -0,0 +1,85 @@
+config OCFS2_FS
+ tristate "OCFS2 file system support"
+ depends on NET && SYSFS
+ select CONFIGFS_FS
+ select JBD2
+ select CRC32
+ select QUOTA
+ select QUOTA_TREE
+ help
+ OCFS2 is a general purpose extent based shared disk cluster file
+ system with many similarities to ext3. It supports 64 bit inode
+ numbers, and has automatically extending metadata groups which may
+ also make it attractive for non-clustered use.
+
+ You'll want to install the ocfs2-tools package in order to at least
+ get "mount.ocfs2".
+
+ Project web page: http://oss.oracle.com/projects/ocfs2
+ Tools web page: http://oss.oracle.com/projects/ocfs2-tools
+ OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+
+ For more information on OCFS2, see the file
+ <file:Documentation/filesystems/ocfs2.txt>.
+
+config OCFS2_FS_O2CB
+ tristate "O2CB Kernelspace Clustering"
+ depends on OCFS2_FS
+ default y
+ help
+ OCFS2 includes a simple kernelspace clustering package, the OCFS2
+ Cluster Base. It only requires a very small userspace component
+ to configure it. This comes with the standard ocfs2-tools package.
+ O2CB is limited to maintaining a cluster for OCFS2 file systems.
+ It cannot manage any other cluster applications.
+
+ It is always safe to say Y here, as the clustering method is
+ run-time selectable.
+
+config OCFS2_FS_USERSPACE_CLUSTER
+ tristate "OCFS2 Userspace Clustering"
+ depends on OCFS2_FS && DLM
+ default y
+ help
+ This option will allow OCFS2 to use userspace clustering services
+ in conjunction with the DLM in fs/dlm. If you are using a
+ userspace cluster manager, say Y here.
+
+ It is safe to say Y, as the clustering method is run-time
+ selectable.
+
+config OCFS2_FS_STATS
+ bool "OCFS2 statistics"
+ depends on OCFS2_FS
+ default y
+ help
+ This option allows some fs statistics to be captured. Enabling
+ this option may increase the memory consumption.
+
+config OCFS2_DEBUG_MASKLOG
+ bool "OCFS2 logging support"
+ depends on OCFS2_FS
+ default y
+ help
+ The ocfs2 filesystem has an extensive logging system. The system
+ allows selection of events to log via files in /sys/o2cb/logmask/.
+ This option will enlarge your kernel, but it allows debugging of
+ ocfs2 filesystem issues.
+
+config OCFS2_DEBUG_FS
+ bool "OCFS2 expensive checks"
+ depends on OCFS2_FS
+ default n
+ help
+ This option will enable expensive consistency checks. Enable
+ this option for debugging only as it is likely to decrease
+ performance of the filesystem.
+
+config OCFS2_FS_POSIX_ACL
+ bool "OCFS2 POSIX Access Control Lists"
+ depends on OCFS2_FS
+ select FS_POSIX_ACL
+ default n
+ help
+ Posix Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d861096c9d81..19e3a96aa02c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -176,7 +176,8 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
- (OCFS2_I(inode)->ip_clusters != rec->e_cpos),
+ (OCFS2_I(inode)->ip_clusters !=
+ le32_to_cpu(rec->e_cpos)),
"Device %s, asking for sparse allocation: inode %llu, "
"cpos %u, clusters %u\n",
osb->dev_str,
@@ -4796,6 +4797,29 @@ out:
return ret;
}
+static int ocfs2_replace_extent_rec(struct inode *inode,
+ handle_t *handle,
+ struct ocfs2_path *path,
+ struct ocfs2_extent_list *el,
+ int split_index,
+ struct ocfs2_extent_rec *split_rec)
+{
+ int ret;
+
+ ret = ocfs2_path_bh_journal_access(handle, inode, path,
+ path_num_items(path) - 1);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ el->l_recs[split_index] = *split_rec;
+
+ ocfs2_journal_dirty(handle, path_leaf_bh(path));
+out:
+ return ret;
+}
+
/*
* Mark part or all of the extent record at split_index in the leaf
* pointed to by path as written. This removes the unwritten
@@ -4885,7 +4909,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
if (ctxt.c_contig_type == CONTIG_NONE) {
if (ctxt.c_split_covers_rec)
- el->l_recs[split_index] = *split_rec;
+ ret = ocfs2_replace_extent_rec(inode, handle,
+ path, el,
+ split_index, split_rec);
else
ret = ocfs2_split_and_insert(inode, handle, path, et,
&last_eb_bh, split_index,
@@ -5390,6 +5416,9 @@ int ocfs2_remove_btree_range(struct inode *inode,
goto out;
}
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(inode->i_sb, len));
+
ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
dealloc);
if (ret) {
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a067a6cffb01..8e1709a679b7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
size = i_size_read(inode);
if (size > PAGE_CACHE_SIZE ||
- size > ocfs2_max_inline_data(inode->i_sb)) {
+ size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %Lu",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1555,6 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
int ret, written = 0;
loff_t end = pos + len;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_dinode *di = NULL;
mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
(unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
@@ -1587,7 +1588,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
/*
* Check whether the write can fit.
*/
- if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
+ di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+ if (mmap_page ||
+ end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
return 0;
do_inline_write:
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b1cc7c381e88..e9d7c2038c0f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -38,6 +38,7 @@
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
+#include "super.h"
static int ocfs2_dentry_revalidate(struct dentry *dentry,
@@ -294,6 +295,34 @@ out_attach:
return ret;
}
+static DEFINE_SPINLOCK(dentry_list_lock);
+
+/* We limit the number of dentry locks to drop in one go. We have
+ * this limit so that we don't starve other users of ocfs2_wq. */
+#define DL_INODE_DROP_COUNT 64
+
+/* Drop inode references from dentry locks */
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+ struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+ dentry_lock_work);
+ struct ocfs2_dentry_lock *dl;
+ int drop_count = DL_INODE_DROP_COUNT;
+
+ spin_lock(&dentry_list_lock);
+ while (osb->dentry_lock_list && drop_count--) {
+ dl = osb->dentry_lock_list;
+ osb->dentry_lock_list = dl->dl_next;
+ spin_unlock(&dentry_list_lock);
+ iput(dl->dl_inode);
+ kfree(dl);
+ spin_lock(&dentry_list_lock);
+ }
+ if (osb->dentry_lock_list)
+ queue_work(ocfs2_wq, &osb->dentry_lock_work);
+ spin_unlock(&dentry_list_lock);
+}
+
/*
* ocfs2_dentry_iput() and friends.
*
@@ -318,16 +347,23 @@ out_attach:
static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
- iput(dl->dl_inode);
ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
ocfs2_lock_res_free(&dl->dl_lockres);
- kfree(dl);
+
+ /* We leave dropping of inode reference to ocfs2_wq as that can
+ * possibly lead to inode deletion which gets tricky */
+ spin_lock(&dentry_list_lock);
+ if (!osb->dentry_lock_list)
+ queue_work(ocfs2_wq, &osb->dentry_lock_work);
+ dl->dl_next = osb->dentry_lock_list;
+ osb->dentry_lock_list = dl;
+ spin_unlock(&dentry_list_lock);
}
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
- int unlock = 0;
+ int unlock;
BUG_ON(dl->dl_count == 0);
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index c091c34d9883..d06e16c06640 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,8 +29,13 @@
extern struct dentry_operations ocfs2_dentry_ops;
struct ocfs2_dentry_lock {
+ /* Use count of dentry lock */
unsigned int dl_count;
- u64 dl_parent_blkno;
+ union {
+ /* Linked list of dentry locks to release */
+ struct ocfs2_dentry_lock *dl_next;
+ u64 dl_parent_blkno;
+ };
/*
* The ocfs2_dentry_lock keeps an inode reference until
@@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl);
+void ocfs2_drop_dl_inodes(struct work_struct *work);
+
struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 54e182a27caf..0a2813947853 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1849,12 +1849,12 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
if (!mle) {
if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
res->owner != assert->node_idx) {
- mlog(ML_ERROR, "assert_master from "
- "%u, but current owner is "
- "%u! (%.*s)\n",
- assert->node_idx, res->owner,
- namelen, name);
- goto kill;
+ mlog(ML_ERROR, "DIE! Mastery assert from %u, "
+ "but current owner is %u! (%.*s)\n",
+ assert->node_idx, res->owner, namelen,
+ name);
+ __dlm_print_one_lock_resource(res);
+ BUG();
}
} else if (mle->type != DLM_MLE_MIGRATION) {
if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d1295203029f..4060bb328bc8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,8 +181,7 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
/* This ensures that clear refmap is sent after the set */
- __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
- DLM_LOCK_RES_MIGRATING));
+ __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
spin_unlock(&res->spinlock);
/* clear our bit from the master's refmap, ignore errors */
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 86ca085ef324..fcf879ed6930 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -117,11 +117,11 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
else
BUG_ON(res->owner == dlm->node_num);
- spin_lock(&dlm->spinlock);
+ spin_lock(&dlm->ast_lock);
/* We want to be sure that we're not freeing a lock
* that still has AST's pending... */
in_use = !list_empty(&lock->ast_list);
- spin_unlock(&dlm->spinlock);
+ spin_unlock(&dlm->ast_lock);
if (in_use) {
mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
"while waiting for an ast!", res->lockname.len,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b0c4cadd4c45..7219a86d34cc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -320,9 +320,14 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
int convert);
-#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
- mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
- _err, _func, _lockres->l_name); \
+#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
+ if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \
+ mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
+ _err, _func, _lockres->l_name); \
+ else \
+ mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \
+ _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \
+ (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \
} while (0)
static int ocfs2_downconvert_thread(void *arg);
static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
@@ -2860,6 +2865,10 @@ static void ocfs2_unlock_ast(void *opaque, int error)
case OCFS2_UNLOCK_CANCEL_CONVERT:
mlog(0, "Cancel convert success for %s\n", lockres->l_name);
lockres->l_action = OCFS2_AST_INVALID;
+ /* Downconvert thread may have requeued this lock, we
+ * need to wake it. */
+ if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
+ ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
break;
case OCFS2_UNLOCK_DROP_LOCK:
lockres->l_level = DLM_LOCK_IV;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c3532e1307c..172850a9a12a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
- return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
- new_size);
+ return jbd2_journal_begin_ordered_truncate(
+ OCFS2_SB(inode->i_sb)->journal->j_journal,
+ &OCFS2_I(inode)->ip_jinode,
+ new_size);
}
#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 084aba86c3b2..4b11762f249e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -532,7 +532,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
- fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
+ fe->id2.i_data.id_count = cpu_to_le16(
+ ocfs2_max_inline_data_with_xattr(osb->sb, fe));
} else {
fel = &fe->id2.i_list;
fel->l_tree_depth = 0;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ad5c24a29edd..946d3c34b90b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -210,6 +210,7 @@ struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
struct ocfs2_quota_recovery;
+struct ocfs2_dentry_lock;
struct ocfs2_super
{
struct task_struct *commit_task;
@@ -325,6 +326,11 @@ struct ocfs2_super
struct list_head blocked_lock_list;
unsigned long blocked_lock_count;
+ /* List of dentry locks to release. Anyone can add locks to
+ * the list, ocfs2_wq processes the list */
+ struct ocfs2_dentry_lock *dentry_lock_list;
+ struct work_struct dentry_lock_work;
+
wait_queue_head_t osb_mount_event;
/* Truncate log info */
@@ -335,6 +341,9 @@ struct ocfs2_super
struct ocfs2_node_map osb_recovering_orphan_dirs;
unsigned int *osb_orphan_wipes;
wait_queue_head_t osb_wipe_event;
+
+ /* used to protect metaecc calculation check of xattr. */
+ spinlock_t osb_xattr_lock;
};
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c7ae45aaa36c..2332ef740f4f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1070,12 +1070,6 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
offsetof(struct ocfs2_dinode, id2.i_symlink);
}
-static inline int ocfs2_max_inline_data(struct super_block *sb)
-{
- return sb->s_blocksize -
- offsetof(struct ocfs2_dinode, id2.i_data.id_data);
-}
-
static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
struct ocfs2_dinode *di)
{
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 6aff8f2d3e49..1ed0f7c86869 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
if (dquot->dq_flags & mask)
sync = 1;
spin_unlock(&dq_data_lock);
- if (!sync) {
+ /* This is a slight hack but we can't afford getting global quota
+ * lock if we already have a transaction started. */
+ if (!sync || journal_current_handle()) {
status = ocfs2_write_dquot(dquot);
goto out;
}
@@ -810,171 +812,6 @@ out:
return status;
}
-/* This is difficult. We have to lock quota inode and start transaction
- * in this function but we don't want to take the penalty of exlusive
- * quota file lock when we are just going to use cached structures. So
- * we just take read lock check whether we have dquot cached and if so,
- * we don't have to take the write lock... */
-static int ocfs2_dquot_initialize(struct inode *inode, int type)
-{
- handle_t *handle = NULL;
- int status = 0;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
- int exclusive = 0;
- int cnt;
- qid_t id;
-
- mlog_entry_void();
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (type != -1 && cnt != type)
- continue;
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 0);
- if (status < 0)
- goto out;
- /* This is just a performance optimization not a reliable test.
- * Since we hold an inode lock, noone can actually release
- * the structure until we are finished with initialization. */
- if (inode->i_dquot[cnt] != NODQUOT) {
- ocfs2_unlock_global_qf(oinfo, 0);
- continue;
- }
- /* When we have inode lock, we know that no dquot_release() can
- * run and thus we can safely check whether we need to
- * read+modify global file to get quota information or whether
- * our node already has it. */
- if (cnt == USRQUOTA)
- id = inode->i_uid;
- else if (cnt == GRPQUOTA)
- id = inode->i_gid;
- else
- BUG();
- /* Obtain exclusion from quota off... */
- down_write(&sb_dqopt(sb)->dqptr_sem);
- exclusive = !dquot_is_cached(sb, id, cnt);
- up_write(&sb_dqopt(sb)->dqptr_sem);
- if (exclusive) {
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0) {
- exclusive = 0;
- mlog_errno(status);
- goto out_ilock;
- }
- handle = ocfs2_start_trans(OCFS2_SB(sb),
- ocfs2_calc_qinit_credits(sb, cnt));
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out_ilock;
- }
- }
- dquot_initialize(inode, cnt);
- if (exclusive) {
- ocfs2_commit_trans(OCFS2_SB(sb), handle);
- ocfs2_unlock_global_qf(oinfo, 1);
- }
- ocfs2_unlock_global_qf(oinfo, 0);
- }
- mlog_exit(0);
- return 0;
-out_ilock:
- if (exclusive)
- ocfs2_unlock_global_qf(oinfo, 1);
- ocfs2_unlock_global_qf(oinfo, 0);
-out:
- mlog_exit(status);
- return status;
-}
-
-static int ocfs2_dquot_drop_slow(struct inode *inode)
-{
- int status = 0;
- int cnt;
- int got_lock[MAXQUOTAS] = {0, 0};
- handle_t *handle;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0)
- goto out;
- got_lock[cnt] = 1;
- }
- handle = ocfs2_start_trans(OCFS2_SB(sb),
- ocfs2_calc_qinit_credits(sb, USRQUOTA) +
- ocfs2_calc_qinit_credits(sb, GRPQUOTA));
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out;
- }
- dquot_drop(inode);
- ocfs2_commit_trans(OCFS2_SB(sb), handle);
-out:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (got_lock[cnt]) {
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- ocfs2_unlock_global_qf(oinfo, 1);
- }
- return status;
-}
-
-/* See the comment before ocfs2_dquot_initialize. */
-static int ocfs2_dquot_drop(struct inode *inode)
-{
- int status = 0;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
- int exclusive = 0;
- int cnt;
- int got_lock[MAXQUOTAS] = {0, 0};
-
- mlog_entry_void();
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 0);
- if (status < 0)
- goto out;
- got_lock[cnt] = 1;
- }
- /* Lock against anyone releasing references so that when when we check
- * we know we are not going to be last ones to release dquot */
- down_write(&sb_dqopt(sb)->dqptr_sem);
- /* Urgh, this is a terrible hack :( */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] != NODQUOT &&
- atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
- exclusive = 1;
- break;
- }
- }
- if (!exclusive)
- dquot_drop_locked(inode);
- up_write(&sb_dqopt(sb)->dqptr_sem);
-out:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (got_lock[cnt]) {
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- ocfs2_unlock_global_qf(oinfo, 0);
- }
- /* In case we bailed out because we had to do expensive locking
- * do it now... */
- if (exclusive)
- status = ocfs2_dquot_drop_slow(inode);
- mlog_exit(status);
- return status;
-}
-
static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
{
struct ocfs2_dquot *dquot =
@@ -991,8 +828,8 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
}
struct dquot_operations ocfs2_quota_operations = {
- .initialize = ocfs2_dquot_initialize,
- .drop = ocfs2_dquot_drop,
+ .initialize = dquot_initialize,
+ .drop = dquot_drop,
.alloc_space = dquot_alloc_space,
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 43ed11345b59..7ac83a81ee55 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1537,6 +1537,13 @@ static int ocfs2_get_sector(struct super_block *sb,
unlock_buffer(*bh);
ll_rw_block(READ, 1, bh);
wait_on_buffer(*bh);
+ if (!buffer_uptodate(*bh)) {
+ mlog_errno(-EIO);
+ brelse(*bh);
+ *bh = NULL;
+ return -EIO;
+ }
+
return 0;
}
@@ -1747,6 +1754,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
INIT_LIST_HEAD(&osb->blocked_lock_list);
osb->blocked_lock_count = 0;
spin_lock_init(&osb->osb_lock);
+ spin_lock_init(&osb->osb_xattr_lock);
ocfs2_init_inode_steal_slot(osb);
atomic_set(&osb->alloc_stats.moves, 0);
@@ -1887,6 +1895,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
journal->j_state = OCFS2_JOURNAL_FREE;
+ INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
+ osb->dentry_lock_list = NULL;
+
/* get some pseudo constants for clustersize bits */
osb->s_clustersize_bits =
le32_to_cpu(di->id2.i_super.s_clustersize_bits);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e1d638af6ac3..2563df89fc2a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -82,13 +82,14 @@ struct ocfs2_xattr_set_ctxt {
#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
#define OCFS2_XATTR_INLINE_SIZE 80
+#define OCFS2_XATTR_HEADER_GAP 4
#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
- sizeof(struct ocfs2_xattr_header) \
- - sizeof(__u32))
+ - OCFS2_XATTR_HEADER_GAP)
#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
- sizeof(struct ocfs2_xattr_block) \
- sizeof(struct ocfs2_xattr_header) \
- - sizeof(__u32))
+ - OCFS2_XATTR_HEADER_GAP)
static struct ocfs2_xattr_def_value_root def_xv = {
.xv.xr_list.l_count = cpu_to_le16(1),
@@ -274,10 +275,12 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
bucket->bu_blocks, bucket->bu_bhs, 0,
NULL);
if (!rc) {
+ spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
bucket->bu_bhs,
bucket->bu_blocks,
&bucket_xh(bucket)->xh_check);
+ spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
if (rc)
mlog_errno(rc);
}
@@ -310,9 +313,11 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
{
int i;
+ spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
bucket->bu_bhs, bucket->bu_blocks,
&bucket_xh(bucket)->xh_check);
+ spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
for (i = 0; i < bucket->bu_blocks; i++)
ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
@@ -542,8 +547,12 @@ int ocfs2_calc_xattr_init(struct inode *dir,
* when blocksize = 512, may reserve one more cluser for
* xattr bucket, otherwise reserve one metadata block
* for them is ok.
+ * If this is a new directory with inline data,
+ * we choose to reserve the entire inline area for
+ * directory contents and force an external xattr block.
*/
if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+ (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
(s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
if (ret) {
@@ -1507,7 +1516,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
last += 1;
}
- free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+ free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
if (free < 0)
return -EIO;
@@ -2190,7 +2199,7 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
last += 1;
}
- free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+ free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
if (free < 0)
return 0;
@@ -2592,8 +2601,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
if (!ret) {
/* Update inode ctime. */
- ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_journal_access_di(ctxt->handle, inode,
+ xis->inode_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4729,13 +4739,6 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
vb.vb_xv = (struct ocfs2_xattr_value_root *)
(vb.vb_bh->b_data + offset % blocksize);
- ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
/*
* From here on out we have to dirty the bucket. The generic
* value calls only modify one of the bucket's bhs, but we need
@@ -4748,12 +4751,18 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
if (ret) {
mlog_errno(ret);
- goto out_dirty;
+ goto out;
+ }
+
+ ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
}
xe->xe_value_size = cpu_to_le64(len);
-out_dirty:
ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
out:
@@ -4786,19 +4795,33 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
char *val,
int value_len)
{
- int offset;
+ int ret, offset, block_off;
struct ocfs2_xattr_value_root *xv;
struct ocfs2_xattr_entry *xe = xs->here;
+ struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
+ void *base;
BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
- offset = le16_to_cpu(xe->xe_name_offset) +
- OCFS2_XATTR_SIZE(xe->xe_name_len);
+ ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
+ xe - xh->xh_entries,
+ &block_off,
+ &offset);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
- xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
+ base = bucket_block(xs->bucket, block_off);
+ xv = (struct ocfs2_xattr_value_root *)(base + offset +
+ OCFS2_XATTR_SIZE(xe->xe_name_len));
- return __ocfs2_xattr_set_value_outside(inode, handle,
- xv, val, value_len);
+ ret = __ocfs2_xattr_set_value_outside(inode, handle,
+ xv, val, value_len);
+ if (ret)
+ mlog_errno(ret);
+out:
+ return ret;
}
static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -5061,8 +5084,8 @@ try_again:
xh_free_start = le16_to_cpu(xh->xh_free_start);
header_size = sizeof(struct ocfs2_xattr_header) +
count * sizeof(struct ocfs2_xattr_entry);
- max_free = OCFS2_XATTR_BUCKET_SIZE -
- le16_to_cpu(xh->xh_name_value_len) - header_size;
+ max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
+ le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
"of %u which exceed block size\n",
@@ -5095,7 +5118,7 @@ try_again:
need = 0;
}
- free = xh_free_start - header_size;
+ free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
/*
* We need to make sure the new name/value pair
* can exist in the same block.
@@ -5128,7 +5151,8 @@ try_again:
}
xh_free_start = le16_to_cpu(xh->xh_free_start);
- free = xh_free_start - header_size;
+ free = xh_free_start - header_size
+ - OCFS2_XATTR_HEADER_GAP;
if (xh_free_start % blocksize < need)
free -= xh_free_start % blocksize;