From 7d0fa3ecba2f12ceef93fffe615e5dd9b50bb794 Mon Sep 17 00:00:00 2001 From: Alain Renaud Date: Fri, 8 Jun 2012 15:34:46 -0400 Subject: xfs: xfs_vm_writepage clear iomap_valid when !buffer_uptodate (REV2) On filesytems with a block size smaller than PAGE_SIZE we currently have a problem with unwritten extents. If a we have multi-block page for which an unwritten extent has been allocated, and only some of the buffers have been written to, and they are not contiguous, we can expose stale data from disk in the blocks between the writes after extent conversion. Example of a page with unwritten and real data. buffer content 0 empty b_state = 0 1 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 2 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 3 empty b_state = 0 4 empty b_state = 0 5 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 6 DATA b_state = 0x1023 Uptodate,Dirty,Mapped,Unwritten 7 empty b_state = 0 Buffers 1, 2, 5, and 6 have been written to, leaving 0, 3, 4, and 7 empty. Currently buffers 1, 2, 5, and 6 are added to a single ioend, and when IO has completed, extent conversion creates a real extent from block 1 through block 6, leaving 0 and 7 unwritten. However buffers 3 and 4 were not written to disk, so stale data is exposed from those blocks on a subsequent read. Fix this by setting iomap_valid = 0 when we find a buffer that is not Uptodate. This ensures that buffers 5 and 6 are not added to the same ioend as buffers 1 and 2. Later these blocks will be converted into two separate real extents, leaving the blocks in between unwritten. Signed-off-by: Alain Renaud Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index ae31c313a79e..8dad722c0041 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -981,10 +981,15 @@ xfs_vm_writepage( imap_valid = 0; } } else { - if (PageUptodate(page)) { + if (PageUptodate(page)) ASSERT(buffer_mapped(bh)); - imap_valid = 0; - } + /* + * This buffer is not uptodate and will not be + * written to disk. Ensure that we will put any + * subsequent writeable buffers into a new + * ioend. + */ + imap_valid = 0; continue; } -- cgit v1.2.3 From d2c2819117176e139dc761873c664aaa770c79c9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jun 2012 15:44:53 +1000 Subject: xfs: m_maxioffset is redundant The m_maxioffset field in the struct xfs_mount contains the same value as the superblock s_maxbytes field. There is no need to carry two copies of this limit around, so use the VFS superblock version. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8dad722c0041..84e372596d56 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -323,10 +323,10 @@ xfs_map_blocks( ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || (ip->i_df.if_flags & XFS_IFEXTENTS)); - ASSERT(offset <= mp->m_maxioffset); + ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_maxioffset) - count = mp->m_maxioffset - offset; + if (offset + count > mp->m_super->s_maxbytes) + count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, @@ -1162,9 +1162,9 @@ __xfs_get_blocks( lockmode = xfs_ilock_map_shared(ip); } - ASSERT(offset <= mp->m_maxioffset); - if (offset + size > mp->m_maxioffset) - size = mp->m_maxioffset - offset; + ASSERT(offset <= mp->m_super->s_maxbytes); + if (offset + size > mp->m_super->s_maxbytes) + size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); -- cgit v1.2.3 From 6b7a03f03a2f8b1629133e35729eba4727fae3cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jul 2012 12:20:00 -0400 Subject: xfs: handle EOF correctly in xfs_vm_writepage We need to zero out part of a page which beyond EOF before setting uptodate, otherwise, mapread or write will see non-zero data beyond EOF. Based on the code in fs/buffer.c and the following ext4 commit: ext4: handle EOF correctly in ext4_bio_write_page() And yes, I wish we had a good test case for it. Signed-off-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 84e372596d56..91d77ac51bba 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -927,11 +927,26 @@ xfs_vm_writepage( end_index = offset >> PAGE_CACHE_SHIFT; last_index = (offset - 1) >> PAGE_CACHE_SHIFT; if (page->index >= end_index) { - if ((page->index >= end_index + 1) || - !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { + unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); + + /* + * Just skip the page if it is fully outside i_size, e.g. due + * to a truncate operation that is in progress. + */ + if (page->index >= end_index + 1 || offset_into_page == 0) { unlock_page(page); return 0; } + + /* + * The page straddles i_size. It must be zeroed out on each + * and every writepage invocation because it may be mmapped. + * "A file is mapped in multiples of the page size. For a file + * that is not a multiple of the page size, the remaining + * memory is zeroed when mapped, and writes to that region are + * not written out to the file." + */ + zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); } end_offset = min_t(unsigned long long, -- cgit v1.2.3 From 0d882a360b9012bc7a7e921c935774c3fba1bfd9 Mon Sep 17 00:00:00 2001 From: Alain Renaud Date: Tue, 22 May 2012 15:56:21 -0500 Subject: Prefix IO_XX flags with XFS_IO_XX to avoid namespace colision. Add a XFS_ prefix to IO_DIRECT,XFS_IO_DELALLOC, XFS_IO_UNWRITTEN and XFS_IO_OVERWRITE. This to avoid namespace conflict with other modules. Signed-off-by: Alain Renaud Reviewed-by: Rich Johnston Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 91d77ac51bba..15052ff916ec 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -179,7 +179,7 @@ xfs_finish_ioend( if (atomic_dec_and_test(&ioend->io_remaining)) { struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; - if (ioend->io_type == IO_UNWRITTEN) + if (ioend->io_type == XFS_IO_UNWRITTEN) queue_work(mp->m_unwritten_workqueue, &ioend->io_work); else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); @@ -210,7 +210,7 @@ xfs_end_io( * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. */ - if (ioend->io_type == IO_UNWRITTEN) { + if (ioend->io_type == XFS_IO_UNWRITTEN) { /* * For buffered I/O we never preallocate a transaction when * doing the unwritten extent conversion, but for direct I/O @@ -312,7 +312,7 @@ xfs_map_blocks( if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); - if (type == IO_UNWRITTEN) + if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { @@ -336,7 +336,7 @@ xfs_map_blocks( if (error) return -XFS_ERROR(error); - if (type == IO_DELALLOC && + if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { error = xfs_iomap_write_allocate(ip, offset, count, imap); if (!error) @@ -345,7 +345,7 @@ xfs_map_blocks( } #ifdef DEBUG - if (type == IO_UNWRITTEN) { + if (type == XFS_IO_UNWRITTEN) { ASSERT(nimaps); ASSERT(imap->br_startblock != HOLESTARTBLOCK); ASSERT(imap->br_startblock != DELAYSTARTBLOCK); @@ -634,11 +634,11 @@ xfs_check_page_type( bh = head = page_buffers(page); do { if (buffer_unwritten(bh)) - acceptable += (type == IO_UNWRITTEN); + acceptable += (type == XFS_IO_UNWRITTEN); else if (buffer_delay(bh)) - acceptable += (type == IO_DELALLOC); + acceptable += (type == XFS_IO_DELALLOC); else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable += (type == IO_OVERWRITE); + acceptable += (type == XFS_IO_OVERWRITE); else break; } while ((bh = bh->b_this_page) != head); @@ -721,11 +721,11 @@ xfs_convert_page( if (buffer_unwritten(bh) || buffer_delay(bh) || buffer_mapped(bh)) { if (buffer_unwritten(bh)) - type = IO_UNWRITTEN; + type = XFS_IO_UNWRITTEN; else if (buffer_delay(bh)) - type = IO_DELALLOC; + type = XFS_IO_DELALLOC; else - type = IO_OVERWRITE; + type = XFS_IO_OVERWRITE; if (!xfs_imap_valid(inode, imap, offset)) { done = 1; @@ -733,7 +733,7 @@ xfs_convert_page( } lock_buffer(bh); - if (type != IO_OVERWRITE) + if (type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, imap, offset); xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); @@ -831,7 +831,7 @@ xfs_aops_discard_page( struct buffer_head *bh, *head; loff_t offset = page_offset(page); - if (!xfs_check_page_type(page, IO_DELALLOC)) + if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) goto out_invalidate; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -956,7 +956,7 @@ xfs_vm_writepage( bh = head = page_buffers(page); offset = page_offset(page); - type = IO_OVERWRITE; + type = XFS_IO_OVERWRITE; if (wbc->sync_mode == WB_SYNC_NONE) nonblocking = 1; @@ -981,18 +981,18 @@ xfs_vm_writepage( } if (buffer_unwritten(bh)) { - if (type != IO_UNWRITTEN) { - type = IO_UNWRITTEN; + if (type != XFS_IO_UNWRITTEN) { + type = XFS_IO_UNWRITTEN; imap_valid = 0; } } else if (buffer_delay(bh)) { - if (type != IO_DELALLOC) { - type = IO_DELALLOC; + if (type != XFS_IO_DELALLOC) { + type = XFS_IO_DELALLOC; imap_valid = 0; } } else if (buffer_uptodate(bh)) { - if (type != IO_OVERWRITE) { - type = IO_OVERWRITE; + if (type != XFS_IO_OVERWRITE) { + type = XFS_IO_OVERWRITE; imap_valid = 0; } } else { @@ -1028,7 +1028,7 @@ xfs_vm_writepage( } if (imap_valid) { lock_buffer(bh); - if (type != IO_OVERWRITE) + if (type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, &imap, offset); xfs_add_to_ioend(inode, bh, offset, type, &ioend, new_ioend); @@ -1069,7 +1069,7 @@ xfs_vm_writepage( * Reserve log space if we might write beyond the on-disk * inode size. */ - if (ioend->io_type != IO_UNWRITTEN && + if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) { err = xfs_setfilesize_trans_alloc(ioend); if (err) @@ -1366,7 +1366,7 @@ xfs_end_io_direct_write( ioend->io_iocb = iocb; ioend->io_result = ret; if (private && size > 0) - ioend->io_type = IO_UNWRITTEN; + ioend->io_type = XFS_IO_UNWRITTEN; if (is_async) { ioend->io_isasync = 1; @@ -1398,7 +1398,7 @@ xfs_vm_direct_IO( * and converts at least on unwritten extent we will cancel * the still clean transaction after the I/O has finished. */ - iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT); + iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); if (offset + size > XFS_I(inode)->i_d.di_size) { ret = xfs_setfilesize_trans_alloc(ioend); if (ret) -- cgit v1.2.3 From d9457dc056249913a7abe8b71dc09e427e590e35 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:39 +0200 Subject: xfs: Convert to new freezing code Generic code now blocks all writers from standard write paths. So we add blocking of all writers coming from ioctl (we get a protection of ioctl against racing remount read-only as a bonus) and convert xfs_file_aio_write() to a non-racy freeze protection. We also keep freeze protection on transaction start to block internal filesystem writes such as removal of preallocated blocks. CC: Ben Myers CC: Alex Elder CC: xfs@oss.sgi.com Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/xfs/xfs_aops.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8dad722c0041..daa42383ebd9 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -123,6 +123,12 @@ xfs_setfilesize_trans_alloc( ioend->io_append_trans = tp; + /* + * We will pass freeze protection with a transaction. So tell lockdep + * we released it. + */ + rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 1, _THIS_IP_); /* * We hand off the transaction to the completion thread now, so * clear the flag here. @@ -199,6 +205,15 @@ xfs_end_io( struct xfs_inode *ip = XFS_I(ioend->io_inode); int error = 0; + if (ioend->io_append_trans) { + /* + * We've got freeze protection passed with the transaction. + * Tell lockdep about it. + */ + rwsem_acquire_read( + &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); + } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { ioend->io_error = -EIO; goto done; @@ -1410,6 +1425,9 @@ out_trans_cancel: if (ioend->io_append_trans) { current_set_flags_nested(&ioend->io_append_trans->t_pflags, PF_FSTRANS); + rwsem_acquire_read( + &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); xfs_trans_cancel(ioend->io_append_trans, 0); } out_destroy_ioend: -- cgit v1.2.3