From 9e38d86ff2d8a8db99570e982230861046df32b5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 23 Oct 2010 06:55:17 -0400 Subject: fs: Implement lazy LRU updates for inodes Convert the inode LRU to use lazy updates to reduce lock and cacheline traffic. We avoid moving inodes around in the LRU list during iget/iput operations so these frequent operations don't need to access the LRUs. Instead, we defer the refcount checks to reclaim-time and use a per-inode state flag, I_REFERENCED, to tell reclaim that iget has touched the inode in the past. This means that only reclaim should be touching the LRU with any frequency, hence significantly reducing lock acquisitions and the amount contention on LRU updates. This also removes the inode_in_use list, which means we now only have one list for tracking the inode LRU status. This makes it much simpler to split out the LRU list operations under it's own lock. Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/writeback.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..242b6f812ba6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -10,8 +10,6 @@ struct backing_dev_info; extern spinlock_t inode_lock; -extern struct list_head inode_in_use; -extern struct list_head inode_unused; /* * fs/fs-writeback.c -- cgit v1.2.3 From 92c09c041f15fc88b35f8628e07639f52e1fbb38 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 26 Oct 2010 14:22:03 -0700 Subject: mm: declare some external symbols Declare 'bdi_pending_list' and 'tag_pages_for_writeback()' to remove following sparse warnings: mm/backing-dev.c:46:1: warning: symbol 'bdi_pending_list' was not declared. Should it be static? mm/page-writeback.c:825:6: warning: symbol 'tag_pages_for_writeback' was not declared. Should it be static? Signed-off-by: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..c7299d2ace6b 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -149,6 +149,8 @@ int write_cache_pages(struct address_space *mapping, int do_writepages(struct address_space *mapping, struct writeback_control *wbc); void set_page_dirty_balance(struct page *page, int page_mkwrite); void writeback_set_ratelimit(void); +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl -- cgit v1.2.3 From 5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 27 Oct 2010 21:30:13 -0400 Subject: ext4: implement writeback livelock avoidance using page tagging This is analogous to Jan Kara's commit, f446daaea9d4a420d16c606f755f3689dcb2d0ce mm: implement writeback livelock avoidance using page tagging but since we forked write_cache_pages, we need to reimplement it there (and in ext4_da_writepages, since range_cyclic handling was moved to there) If you start a large buffered IO to a file, and then set fsync after it, you'll find that fsync does not complete until the other IO stops. If you continue re-dirtying the file (say, putting dd with conv=notrunc in a loop), when fsync finally completes (after all IO is done), it reports via tracing that it has written many more pages than the file contains; in other words it has synced and re-synced pages in the file multiple times. This then leads to problems with our writeback_index update, since it advances it by pages written, and essentially sets writeback_index off the end of the file... With the following patch, we only sync as much as was dirty at the time of the sync. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..3d132bfb4f3d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, int generic_writepages(struct address_space *mapping, struct writeback_control *wbc); +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); -- cgit v1.2.3 From 3259f8bed2f0f57c2fdcdac1b510c3fa319ef97e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 29 Oct 2010 11:16:17 -0400 Subject: Add new functions for triggering inode writeback When btrfs is running low on metadata space, it needs to force delayed allocation pages to disk. It currently does this with a suboptimal walk of a private list of inodes with delayed allocation, and it would be much better if we used the generic flusher threads. writeback_inodes_sb_if_idle would be ideal, but it waits for the flusher thread to start IO on all the dirty pages in the FS before it returns. This adds variants of writeback_inodes_sb* that allow the caller to control how many pages get sent down. Signed-off-by: Chris Mason --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..a4cf84511e79 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -60,7 +60,9 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +void writeback_inodes_sb_nr(struct super_block *, unsigned long nr); int writeback_inodes_sb_if_idle(struct super_block *); +int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); void sync_inodes_sb(struct super_block *); void writeback_inodes_wb(struct bdi_writeback *wb, struct writeback_control *wbc); -- cgit v1.2.3