From 02b9984d640873b7b3809e63f81a0d7e13496886 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Mar 2014 10:14:33 -0400 Subject: fs: push sync_filesystem() down to the file system's remount_fs() Previously, the no-op "mount -o mount /dev/xxx" operation when the file system is already mounted read-write causes an implied, unconditional syncfs(). This seems pretty stupid, and it's certainly documented or guaraunteed to do this, nor is it particularly useful, except in the case where the file system was mounted rw and is getting remounted read-only. However, it's possible that there might be some file systems that are actually depending on this behavior. In most file systems, it's probably fine to only call sync_filesystem() when transitioning from read-write to read-only, and there are some file systems where this is not needed at all (for example, for a pseudo-filesystem or something like romfs). Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig Cc: Artem Bityutskiy Cc: Adrian Hunter Cc: Evgeniy Dushistov Cc: Jan Kara Cc: OGAWA Hirofumi Cc: Anders Larsen Cc: Phillip Lougher Cc: Kees Cook Cc: Mikulas Patocka Cc: Petr Vandrovec Cc: xfs@oss.sgi.com Cc: linux-btrfs@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: codalist@coda.cs.cmu.edu Cc: linux-ext4@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: fuse-devel@lists.sourceforge.net Cc: cluster-devel@redhat.com Cc: linux-mtd@lists.infradead.org Cc: jfs-discussion@lists.sourceforge.net Cc: linux-nfs@vger.kernel.org Cc: linux-nilfs@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Cc: reiserfs-devel@vger.kernel.org --- fs/ocfs2/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ocfs2/super.c') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 49d84f80f36c..5f9bf8f9dfa7 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -631,6 +631,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) struct ocfs2_super *osb = OCFS2_SB(sb); u32 tmp; + sync_filesystem(sb); + if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || !ocfs2_check_set_options(sb, &parsed_options)) { ret = -EINVAL; -- cgit v1.2.3 From c18ceab01240fd4c354b78d877571b729908e4a3 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Thu, 3 Apr 2014 14:46:46 -0700 Subject: ocfs2: change ip_unaligned_aio to of type mutex from atomit_t There is a problem that waitqueue_active() may check stale data thus miss a wakeup of threads waiting on ip_unaligned_aio. The valid value of ip_unaligned_aio is only 0 and 1 so we can change it to be of type mutex thus the above prolem is avoid. Another benifit is that mutex which works as FIFO is fairer than wake_up_all(). Signed-off-by: Wengang Wang Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/super.c') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 49d84f80f36c..d17145552097 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1612,14 +1612,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) return 0; } -wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; - static int __init ocfs2_init(void) { - int status, i; - - for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++) - init_waitqueue_head(&ocfs2__ioend_wq[i]); + int status; status = init_ocfs2_uptodate_cache(); if (status < 0) @@ -1761,7 +1756,7 @@ static void ocfs2_inode_init_once(void *data) ocfs2_extent_map_init(&oi->vfs_inode); INIT_LIST_HEAD(&oi->ip_io_markers); oi->ip_dir_start_lookup = 0; - atomic_set(&oi->ip_unaligned_aio, 0); + mutex_init(&oi->ip_unaligned_aio); init_rwsem(&oi->ip_alloc_sem); init_rwsem(&oi->ip_xattr_sem); mutex_init(&oi->ip_io_mutex); -- cgit v1.2.3 From a75fe48cad2fb81e0e2671c73aea6c78ce5626d4 Mon Sep 17 00:00:00 2001 From: "joyce.xue" Date: Thu, 3 Apr 2014 14:46:47 -0700 Subject: ocfs2: remove unused variable uuid_net_key in ocfs2_initialize_super Variable uuid_net_key in ocfs2_initialize_super() is not used. Clean it up. Signed-off-by: joyce.xue Signed-off-by: Joseph Qi Acked-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/ocfs2/super.c') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d17145552097..d7190b2cfd40 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2072,7 +2072,6 @@ static int ocfs2_initialize_super(struct super_block *sb, struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; struct inode *inode = NULL; struct ocfs2_journal *journal; - __le32 uuid_net_key; struct ocfs2_super *osb; u64 total_blocks; @@ -2306,8 +2305,6 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); - strncpy(osb->vol_label, di->id2.i_super.s_label, 63); osb->vol_label[63] = '\0'; osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); -- cgit v1.2.3 From 2931cdcb49194503b19345c597b68fdcf78396f8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 3 Apr 2014 14:46:48 -0700 Subject: ocfs2: improve fsync efficiency and fix deadlock between aio_write and sync_file Currently, ocfs2_sync_file grabs i_mutex and forces the current journal transaction to complete. This isn't terribly efficient, since sync_file really only needs to wait for the last transaction involving that inode to complete, and this doesn't require i_mutex. Therefore, implement the necessary bits to track the newest tid associated with an inode, and teach sync_file to wait for that instead of waiting for everything in the journal to commit. Furthermore, only issue the flush request to the drive if jbd2 hasn't already done so. This also eliminates the deadlock between ocfs2_file_aio_write() and ocfs2_sync_file(). aio_write takes i_mutex then calls ocfs2_aiodio_wait() to wait for unaligned dio writes to finish. However, if that dio completion involves calling fsync, then we can get into trouble when some ocfs2_sync_file tries to take i_mutex. Signed-off-by: Darrick J. Wong Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ocfs2/super.c') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d7190b2cfd40..9fef73da1ca5 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -561,6 +561,9 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) if (!oi) return NULL; + oi->i_sync_tid = 0; + oi->i_datasync_tid = 0; + jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); return &oi->vfs_inode; } -- cgit v1.2.3 From e3a767b60fd8a9f5e133f42f4970cff77ec43173 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 Apr 2014 14:46:56 -0700 Subject: ocfs2: implement delayed dropping of last dquot reference We cannot drop last dquot reference from downconvert thread as that creates the following deadlock: NODE 1 NODE2 holds dentry lock for 'foo' holds inode lock for GLOBAL_BITMAP_SYSTEM_INODE dquot_initialize(bar) ocfs2_dquot_acquire() ocfs2_inode_lock(USER_QUOTA_SYSTEM_INODE) ... downconvert thread (triggered from another node or a different process from NODE2) ocfs2_dentry_post_unlock() ... iput(foo) ocfs2_evict_inode(foo) ocfs2_clear_inode(foo) dquot_drop(inode) ... ocfs2_dquot_release() ocfs2_inode_lock(USER_QUOTA_SYSTEM_INODE) - blocks finds we need more space in quota file ... ocfs2_extend_no_holes() ocfs2_inode_lock(GLOBAL_BITMAP_SYSTEM_INODE) - deadlocks waiting for downconvert thread We solve the problem by postponing dropping of the last dquot reference to a workqueue if it happens from the downconvert thread. Signed-off-by: Jan Kara Reviewed-by: Mark Fasheh Reviewed-by: Srinivas Eeda Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/ocfs2/super.c') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 9fef73da1ca5..b800a1f78d78 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1941,6 +1941,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_disable_quotas(osb); + /* All dquots should be freed by now */ + WARN_ON(!llist_empty(&osb->dquot_drop_list)); + /* Wait for worker to be done with the work structure in osb */ + cancel_work_sync(&osb->dquot_drop_work); + ocfs2_shutdown_local_alloc(osb); /* This will disable recovery and flush any recovery work. */ @@ -2276,6 +2281,9 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); osb->dentry_lock_list = NULL; + INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); + init_llist_head(&osb->dquot_drop_list); + /* get some pseudo constants for clustersize bits */ osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); -- cgit v1.2.3 From 8ed6b23709b346f7bfc1edab47003a205a6a9f69 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Thu, 3 Apr 2014 14:46:59 -0700 Subject: ocfs2: revert iput deferring code in ocfs2_drop_dentry_lock The following patches are reverted in this patch because these patches caused performance regression in the remote unlink() calls. ea455f8ab683 - ocfs2: Push out dropping of dentry lock to ocfs2_wq f7b1aa69be13 - ocfs2: Fix deadlock on umount 5fd131893793 - ocfs2: Don't oops in ocfs2_kill_sb on a failed mount Previous patches in this series removed the possible deadlocks from downconvert thread so the above patches shouldn't be needed anymore. The regression is caused because these patches delay the iput() in case of dentry unlocks. This also delays the unlocking of the open lockres. The open lockresource is required to test if the inode can be wiped from disk or not. When the deleting node does not get the open lock, it marks it as orphan (even though it is not in use by another node/process) and causes a journal checkpoint. This delays operations following the inode eviction. This also moves the inode to the orphaned inode which further causes more I/O and a lot of unneccessary orphans. The following script can be used to generate the load causing issues: declare -a create declare -a remove declare -a iterations=(1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384) unique="`mktemp -u XXXXX`" script="/tmp/idontknow-${unique}.sh" cat < "${script}" for n in {1..8}; do mkdir -p test/dir\${n} eval touch test/dir\${n}/foo{1.."\$1"} done EOF chmod 700 "${script}" function fcreate () { exec 2>&1 /usr/bin/time --format=%E "${script}" "$1" } function fremove () { exec 2>&1 /usr/bin/time --format=%E ssh node2 "cd `pwd`; rm -Rf test*" } function fcp () { exec 2>&1 /usr/bin/time --format=%E ssh node3 "cd `pwd`; cp -R test test.new" } echo ------------------------------------------------- echo "| # files | create #s | copy #s | remove #s |" echo ------------------------------------------------- for ((x=0; x < ${#iterations[*]} ; x++)) do create[$x]="`fcreate ${iterations[$x]}`" copy[$x]="`fcp ${iterations[$x]}`" remove[$x]="`fremove`" printf "| %8d | %9s | %9s | %9s |\n" ${iterations[$x]} ${create[$x]} ${copy[$x]} ${remove[$x]} done rm "${script}" echo "------------------------" Signed-off-by: Srinivas Eeda Signed-off-by: Goldwyn Rodrigues Signed-off-by: Jan Kara Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'fs/ocfs2/super.c') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index b800a1f78d78..888a1457af96 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1241,30 +1241,11 @@ static struct dentry *ocfs2_mount(struct file_system_type *fs_type, return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); } -static void ocfs2_kill_sb(struct super_block *sb) -{ - struct ocfs2_super *osb = OCFS2_SB(sb); - - /* Failed mount? */ - if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) - goto out; - - /* Prevent further queueing of inode drop events */ - spin_lock(&dentry_list_lock); - ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); - spin_unlock(&dentry_list_lock); - /* Wait for work to finish and/or remove it */ - cancel_work_sync(&osb->dentry_lock_work); -out: - kill_block_super(sb); -} - static struct file_system_type ocfs2_fs_type = { .owner = THIS_MODULE, .name = "ocfs2", .mount = ocfs2_mount, - .kill_sb = ocfs2_kill_sb, - + .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, .next = NULL }; @@ -1930,12 +1911,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) debugfs_remove(osb->osb_ctxt); - /* - * Flush inode dropping work queue so that deletes are - * performed while the filesystem is still working - */ - ocfs2_drop_all_dl_inodes(osb); - /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); @@ -2278,9 +2253,6 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); journal->j_state = OCFS2_JOURNAL_FREE; - INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); - osb->dentry_lock_list = NULL; - INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); init_llist_head(&osb->dquot_drop_list); -- cgit v1.2.3 From 43b10a20372d9a1c08391f33f1c8bd86179ddc5f Mon Sep 17 00:00:00 2001 From: jiangyiwen Date: Thu, 3 Apr 2014 14:47:13 -0700 Subject: ocfs2: avoid system inode ref confusion by adding mutex lock The following case may lead to the same system inode ref in confusion. A thread B thread ocfs2_get_system_file_inode ->get_local_system_inode ->_ocfs2_get_system_file_inode because of *arr == NULL, ocfs2_get_system_file_inode ->get_local_system_inode ->_ocfs2_get_system_file_inode gets first ref thru _ocfs2_get_system_file_inode, gets second ref thru igrab and set *arr = inode at the moment, B thread also gets two refs, so lead to one more inode ref. So add mutex lock to avoid multi thread set two inode ref once at the same time. Signed-off-by: jiangyiwen Reviewed-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ocfs2/super.c') diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 888a1457af96..1aecd626e645 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2100,6 +2100,8 @@ static int ocfs2_initialize_super(struct super_block *sb, spin_lock_init(&osb->osb_xattr_lock); ocfs2_init_steal_slots(osb); + mutex_init(&osb->system_file_mutex); + atomic_set(&osb->alloc_stats.moves, 0); atomic_set(&osb->alloc_stats.local_data, 0); atomic_set(&osb->alloc_stats.bitmap_data, 0); -- cgit v1.2.3