summaryrefslogtreecommitdiff
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 12:59:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 12:59:41 -0700
commitbe6297e9be118d89fa477a60ddfbf0e0b2dfacec (patch)
tree1c710355e97a9c1916c28c570cab260519fada74 /fs/ext4/ialloc.c
parent5791577963426c5a2db51fff57e9fcd72061e2c3 (diff)
parentb5f515735bea4ae71c248aea3e049073f8852889 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Scalability improvements when allocating inodes, and some miscellaneous bug fixes and cleanups" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: avoid Y2038 overflow in recently_deleted() ext4: fix fault handling when mounted with -o dax,ro ext4: fix quota inconsistency during orphan cleanup for read-only mounts ext4: fix incorrect quotaoff if the quota feature is enabled ext4: remove useless test and assignment in strtohash functions ext4: backward compatibility support for Lustre ea_inode implementation ext4: remove timebomb in ext4_decode_extra_time() ext4: use sizeof(*ptr) ext4: in ext4_seek_{hole,data}, return -ENXIO for negative offsets ext4: reduce lock contention in __ext4_new_inode ext4: cleanup goto next group ext4: do not unnecessarily allocate buffer in recently_deleted()
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c93
1 files changed, 60 insertions, 33 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 507bfb3344d4..71e93a23cec3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -692,24 +692,25 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* somewhat arbitrary...)
*/
#define RECENTCY_MIN 5
-#define RECENTCY_DIRTY 30
+#define RECENTCY_DIRTY 300
static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
{
struct ext4_group_desc *gdp;
struct ext4_inode *raw_inode;
struct buffer_head *bh;
- unsigned long dtime, now;
- int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
- int offset, ret = 0, recentcy = RECENTCY_MIN;
+ int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+ int offset, ret = 0;
+ int recentcy = RECENTCY_MIN;
+ u32 dtime, now;
gdp = ext4_get_group_desc(sb, group, NULL);
if (unlikely(!gdp))
return 0;
- bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
+ bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
- if (unlikely(!bh) || !buffer_uptodate(bh))
+ if (!bh || !buffer_uptodate(bh))
/*
* If the block is not in the buffer cache, then it
* must have been written out.
@@ -718,18 +719,45 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
raw_inode = (struct ext4_inode *) (bh->b_data + offset);
+
+ /* i_dtime is only 32 bits on disk, but we only care about relative
+ * times in the range of a few minutes (i.e. long enough to sync a
+ * recently-deleted inode to disk), so using the low 32 bits of the
+ * clock (a 68 year range) is enough, see time_before32() */
dtime = le32_to_cpu(raw_inode->i_dtime);
- now = get_seconds();
+ now = ktime_get_real_seconds();
if (buffer_dirty(bh))
recentcy += RECENTCY_DIRTY;
- if (dtime && (dtime < now) && (now < dtime + recentcy))
+ if (dtime && time_before32(dtime, now) &&
+ time_before32(now, dtime + recentcy))
ret = 1;
out:
brelse(bh);
return ret;
}
+static int find_inode_bit(struct super_block *sb, ext4_group_t group,
+ struct buffer_head *bitmap, unsigned long *ino)
+{
+next:
+ *ino = ext4_find_next_zero_bit((unsigned long *)
+ bitmap->b_data,
+ EXT4_INODES_PER_GROUP(sb), *ino);
+ if (*ino >= EXT4_INODES_PER_GROUP(sb))
+ return 0;
+
+ if ((EXT4_SB(sb)->s_journal == NULL) &&
+ recently_deleted(sb, group, *ino)) {
+ *ino = *ino + 1;
+ if (*ino < EXT4_INODES_PER_GROUP(sb))
+ goto next;
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
@@ -892,19 +920,13 @@ got_group:
/*
* Check free inodes count before loading bitmap.
*/
- if (ext4_free_inodes_count(sb, gdp) == 0) {
- if (++group == ngroups)
- group = 0;
- continue;
- }
+ if (ext4_free_inodes_count(sb, gdp) == 0)
+ goto next_group;
grp = ext4_get_group_info(sb, group);
/* Skip groups with already-known suspicious inode tables */
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
- if (++group == ngroups)
- group = 0;
- continue;
- }
+ if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ goto next_group;
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
@@ -912,27 +934,20 @@ got_group:
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) ||
IS_ERR(inode_bitmap_bh)) {
inode_bitmap_bh = NULL;
- if (++group == ngroups)
- group = 0;
- continue;
+ goto next_group;
}
repeat_in_this_group:
- ino = ext4_find_next_zero_bit((unsigned long *)
- inode_bitmap_bh->b_data,
- EXT4_INODES_PER_GROUP(sb), ino);
- if (ino >= EXT4_INODES_PER_GROUP(sb))
+ ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
+ if (!ret2)
goto next_group;
- if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
+
+ if (group == 0 && (ino + 1) < EXT4_FIRST_INO(sb)) {
ext4_error(sb, "reserved inode found cleared - "
"inode=%lu", ino + 1);
- continue;
- }
- if ((EXT4_SB(sb)->s_journal == NULL) &&
- recently_deleted(sb, group, ino)) {
- ino++;
- goto next_inode;
+ goto next_group;
}
+
if (!handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
@@ -952,11 +967,23 @@ repeat_in_this_group:
}
ext4_lock_group(sb, group);
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+ if (ret2) {
+ /* Someone already took the bit. Repeat the search
+ * with lock held.
+ */
+ ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
+ if (ret2) {
+ ext4_set_bit(ino, inode_bitmap_bh->b_data);
+ ret2 = 0;
+ } else {
+ ret2 = 1; /* we didn't grab the inode */
+ }
+ }
ext4_unlock_group(sb, group);
ino++; /* the inode bitmap is zero-based */
if (!ret2)
goto got; /* we grabbed the inode! */
-next_inode:
+
if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
next_group: