diff options
Diffstat (limited to 'fs')
59 files changed, 621 insertions, 275 deletions
diff --git a/fs/attr.c b/fs/attr.c index 1449adb14ef6..8dd5825ec708 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -182,11 +182,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return -EPERM; } - if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) { - if (attr->ia_size != inode->i_size) - inode_inc_iversion(inode); - } - if ((ia_valid & ATTR_MODE)) { umode_t amode = attr->ia_mode; /* Flag setting protected by i_mutex */ diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 45e944fe52a6..8dccf73025b3 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio) } EXPORT_SYMBOL(bio_integrity_free); +static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) +{ + if (bip->bip_slab == BIO_POOL_NONE) + return BIP_INLINE_VECS; + + return bvec_nr_vecs(bip->bip_slab); +} + /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update @@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { + if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index e15d2b0d8d3b..0890c83643e9 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); - } else { + } else if (ret < 0) { cache_no_acl(inode); } } else { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3b6d20bc2388..bbafa05519da 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7491,7 +7491,7 @@ out: */ if (root_dropped == false) btrfs_add_dead_root(root); - if (err) + if (err && err != -EAGAIN) btrfs_std_error(root->fs_info, err); return err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1e2288dc5346..8fcd2424e7f9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2419,10 +2419,23 @@ out_unlock: return ret; } +static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) +{ + struct old_sa_defrag_extent *old, *tmp; + + if (!new) + return; + + list_for_each_entry_safe(old, tmp, &new->head, list) { + list_del(&old->list); + kfree(old); + } + kfree(new); +} + static void relink_file_extents(struct new_sa_defrag_extent *new) { struct btrfs_path *path; - struct old_sa_defrag_extent *old, *tmp; struct sa_defrag_extent_backref *backref; struct sa_defrag_extent_backref *prev = NULL; struct inode *inode; @@ -2465,16 +2478,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new) kfree(prev); btrfs_free_path(path); - - list_for_each_entry_safe(old, tmp, &new->head, list) { - list_del(&old->list); - kfree(old); - } out: + free_sa_defrag_extent(new); + atomic_dec(&root->fs_info->defrag_running); wake_up(&root->fs_info->transaction_wait); - - kfree(new); } static struct new_sa_defrag_extent * @@ -2484,7 +2492,7 @@ record_old_file_extents(struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; struct btrfs_key key; - struct old_sa_defrag_extent *old, *tmp; + struct old_sa_defrag_extent *old; struct new_sa_defrag_extent *new; int ret; @@ -2532,7 +2540,7 @@ record_old_file_extents(struct inode *inode, if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret < 0) - goto out_free_list; + goto out_free_path; else if (ret > 0) break; continue; @@ -2561,7 +2569,7 @@ record_old_file_extents(struct inode *inode, old = kmalloc(sizeof(*old), GFP_NOFS); if (!old) - goto out_free_list; + goto out_free_path; offset = max(new->file_pos, key.offset); end = min(new->file_pos + new->len, key.offset + num_bytes); @@ -2583,15 +2591,10 @@ next: return new; -out_free_list: - list_for_each_entry_safe(old, tmp, &new->head, list) { - list_del(&old->list); - kfree(old); - } out_free_path: btrfs_free_path(path); out_kfree: - kfree(new); + free_sa_defrag_extent(new); return NULL; } @@ -2652,7 +2655,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) EXTENT_DEFRAG, 1, cached_state); if (ret) { u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); - if (last_snapshot >= BTRFS_I(inode)->generation) + if (0 && last_snapshot >= BTRFS_I(inode)->generation) /* the inode is shared */ new = record_old_file_extents(inode, ordered_extent); @@ -2743,8 +2746,14 @@ out: btrfs_remove_ordered_extent(inode, ordered_extent); /* for snapshot-aware defrag */ - if (new) - relink_file_extents(new); + if (new) { + if (ret) { + free_sa_defrag_extent(new); + atomic_dec(&root->fs_info->defrag_running); + } else { + relink_file_extents(new); + } + } /* once for us */ btrfs_put_ordered_extent(ordered_extent); @@ -4518,8 +4527,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ - if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) - inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); + if (newsize != oldsize) { + inode_inc_iversion(inode); + if (!(mask & (ATTR_CTIME | ATTR_MTIME))) + inode->i_ctime = inode->i_mtime = + current_fs_time(inode->i_sb); + } if (newsize > oldsize) { truncate_pagecache(inode, oldsize, newsize); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8dedf4019672..783906c687b5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1528,6 +1528,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); ret = -EINVAL; + } else if (!inode_owner_or_capable(src_inode)) { + /* + * Subvolume creation is not restricted, but snapshots + * are limited to own subvolumes only + */ + ret = -EPERM; } else { ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, @@ -2093,7 +2099,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); if (err == -EINTR) - goto out; + goto out_drop_write; dentry = lookup_one_len(vol_args->name, parent, namelen); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -2235,6 +2241,7 @@ out_dput: dput(dentry); out_unlock_dir: mutex_unlock(&dir->i_mutex); +out_drop_write: mnt_drop_write_file(file); out: kfree(vol_args); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 09ea0bdde65f..256a9a46d544 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4623,8 +4623,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } if (!access_ok(VERIFY_READ, arg->clone_sources, - sizeof(*arg->clone_sources * - arg->clone_sources_count))) { + sizeof(*arg->clone_sources) * + arg->clone_sources_count)) { ret = -EFAULT; goto out; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cf68596b51fb..bca436330681 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3314,7 +3314,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, btrfs_set_token_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG, &token); - if (em->block_start == 0) + if (em->block_start == EXTENT_MAP_HOLE) skip_csum = true; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bffb9174afb..b6c23c4abae2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4248,6 +4248,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); + free_extent_map(em); return 1; } @@ -4429,6 +4430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " "found %Lu-%Lu\n", logical, em->start, em->start + em->len); + free_extent_map(em); return -EINVAL; } diff --git a/fs/buffer.c b/fs/buffer.c index d2a4d1bb2d57..75964d734444 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -620,14 +620,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); static void __set_page_dirty(struct page *page, struct address_space *mapping, int warn) { - spin_lock_irq(&mapping->tree_lock); + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + spin_unlock_irqrestore(&mapping->tree_lock, flags); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3e68ac101040..5da06f020986 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -213,9 +213,13 @@ static int readpage_nounlock(struct file *filp, struct page *page) if (err < 0) { SetPageError(page); goto out; - } else if (err < PAGE_CACHE_SIZE) { + } else { + if (err < PAGE_CACHE_SIZE) { /* zero fill remainder of page */ - zero_user_segment(page, err, PAGE_CACHE_SIZE); + zero_user_segment(page, err, PAGE_CACHE_SIZE); + } else { + flush_dcache_page(page); + } } SetPageUptodate(page); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 656e16907430..5de16f5ac7e9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -313,9 +313,9 @@ static int striped_read(struct inode *inode, { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); - u64 pos, this_len; + u64 pos, this_len, left; int io_align, page_align; - int left, pages_left; + int pages_left; int read; struct page **page_pos; int ret; @@ -346,47 +346,40 @@ more: ret = 0; hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; - dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, + dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); - if (ret > 0) { - int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; - - if (read < pos - off) { - dout(" zero gap %llu to %llu\n", off + read, pos); - ceph_zero_page_vector_range(page_align + read, - pos - off - read, pages); + if (ret >= 0) { + int didpages; + if (was_short && (pos + ret < inode->i_size)) { + u64 tmp = min(this_len - ret, + inode->i_size - pos - ret); + dout(" zero gap %llu to %llu\n", + pos + ret, pos + ret + tmp); + ceph_zero_page_vector_range(page_align + read + ret, + tmp, pages); + ret += tmp; } + + didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; pos += ret; read = pos - off; left -= ret; page_pos += didpages; pages_left -= didpages; - /* hit stripe? */ - if (left && hit_stripe) + /* hit stripe and need continue*/ + if (left && hit_stripe && pos < inode->i_size) goto more; } - if (was_short) { + if (read > 0) { + ret = read; /* did we bounce off eof? */ if (pos + left > inode->i_size) *checkeof = 1; - - /* zero trailing bytes (inside i_size) */ - if (left > 0 && pos < inode->i_size) { - if (pos + left > inode->i_size) - left = inode->i_size - pos; - - dout("zero tail %d\n", left); - ceph_zero_page_vector_range(page_align + read, left, - pages); - read += left; - } } - if (ret >= 0) - ret = read; dout("striped_read returns %d\n", ret); return ret; } @@ -618,6 +611,8 @@ out: if (check_caps) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); + } else if (ret != -EOLDSNAPC && written > 0) { + ret = written; } return ret; } diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index a5ce62eb7806..669622fd1ae3 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -211,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap, - ceph_file_layout_pg_pool(ci->i_layout)); + r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap, + ceph_file_layout_pg_pool(ci->i_layout)); + if (r < 0) { + up_read(&osdc->map_sem); + return r; + } dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4d2920304be8..d6a536886472 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -414,6 +414,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *s; + if (mds >= mdsc->mdsmap->m_max_mds) + return ERR_PTR(-EINVAL); + s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) return ERR_PTR(-ENOMEM); @@ -639,6 +642,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, req->r_unsafe_dir = NULL; } + complete_all(&req->r_safe_completion); + ceph_mdsc_put_request(req); } @@ -1840,8 +1845,11 @@ static int __do_request(struct ceph_mds_client *mdsc, int mds = -1; int err = -EAGAIN; - if (req->r_err || req->r_got_result) + if (req->r_err || req->r_got_result) { + if (req->r_aborted) + __unregister_request(mdsc, req); goto out; + } if (req->r_timeout && time_after_eq(jiffies, req->r_started + req->r_timeout)) { @@ -2151,7 +2159,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) if (head->safe) { req->r_got_safe = true; __unregister_request(mdsc, req); - complete_all(&req->r_safe_completion); if (req->r_got_unsafe) { /* @@ -3040,8 +3047,10 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) fsc->mdsc = mdsc; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); - if (mdsc->mdsmap == NULL) + if (mdsc->mdsmap == NULL) { + kfree(mdsc); return -ENOMEM; + } init_completion(&mdsc->safe_umount_waiters); init_waitqueue_head(&mdsc->session_close_wq); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 9278dec9e940..d4d38977dcbb 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -138,6 +138,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) m->m_info[mds].export_targets = kcalloc(num_export_targets, sizeof(u32), GFP_NOFS); + if (m->m_info[mds].export_targets == NULL) + goto badmem; for (j = 0; j < num_export_targets; j++) m->m_info[mds].export_targets[j] = ceph_decode_32(&pexport_targets); @@ -170,7 +172,7 @@ bad: DUMP_PREFIX_OFFSET, 16, 1, start, end - start, true); ceph_mdsmap_destroy(m); - return ERR_PTR(-EINVAL); + return ERR_PTR(err); } void ceph_mdsmap_destroy(struct ceph_mdsmap *m) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7d377c9a5e35..6627b26a800c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -357,7 +357,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, } err = -EINVAL; dev_name_end--; /* back up to ':' separator */ - if (*dev_name_end != ':') { + if (dev_name_end < dev_name || *dev_name_end != ':') { pr_err("device name is missing path (no : separator in %s)\n", dev_name); goto out; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 51f5e0ee7237..494b68349667 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1027,15 +1027,30 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, __u32 secdesclen = 0; struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); + struct cifs_tcon *tcon; + + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); cifs_dbg(NOISY, "set ACL from mode for %s\n", path); /* Get the security descriptor */ - pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); + + if (tcon->ses->server->ops->get_acl == NULL) { + cifs_put_tlink(tlink); + return -EOPNOTSUPP; + } + + pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path, + &secdesclen); if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); - goto out; + cifs_put_tlink(tlink); + return rc; } /* @@ -1048,6 +1063,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, pnntsd = kmalloc(secdesclen, GFP_KERNEL); if (!pnntsd) { kfree(pntsd); + cifs_put_tlink(tlink); return -ENOMEM; } @@ -1056,14 +1072,18 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc); + if (tcon->ses->server->ops->set_acl == NULL) + rc = -EOPNOTSUPP; + if (!rc) { /* Set the security descriptor */ - rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag); + rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode, + path, aclflag); cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } + cifs_put_tlink(tlink); kfree(pnntsd); kfree(pntsd); -out: return rc; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ea3a0b3018a5..e2c2d96491fa 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -370,6 +370,16 @@ struct smb_version_operations { void (*new_lease_key)(struct cifs_fid *fid); int (*calc_signature)(struct smb_rqst *rqst, struct TCP_Server_Info *server); + ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, + const unsigned char *, const unsigned char *, char *, + size_t, const struct nls_table *, int); + int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, + const char *, const void *, const __u16, + const struct nls_table *, int); + struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *, + const char *, u32 *); + int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, + int); }; struct smb_version_values { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c2934f8701da..8b0c656f2ab2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2353,7 +2353,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *poffset) { unsigned long nr_pages, i; - size_t copied, len, cur_len; + size_t bytes, copied, len, cur_len; ssize_t total_written = 0; loff_t offset; struct iov_iter it; @@ -2408,14 +2408,45 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, save_len = cur_len; for (i = 0; i < nr_pages; i++) { - copied = min_t(const size_t, cur_len, PAGE_SIZE); + bytes = min_t(const size_t, cur_len, PAGE_SIZE); copied = iov_iter_copy_from_user(wdata->pages[i], &it, - 0, copied); + 0, bytes); cur_len -= copied; iov_iter_advance(&it, copied); + /* + * If we didn't copy as much as we expected, then that + * may mean we trod into an unmapped area. Stop copying + * at that point. On the next pass through the big + * loop, we'll likely end up getting a zero-length + * write and bailing out of it. + */ + if (copied < bytes) + break; } cur_len = save_len - cur_len; + /* + * If we have no data to send, then that probably means that + * the copy above failed altogether. That's most likely because + * the address in the iovec was bogus. Set the rc to -EFAULT, + * free anything we allocated and bail out. + */ + if (!cur_len) { + for (i = 0; i < nr_pages; i++) + put_page(wdata->pages[i]); + kfree(wdata); + rc = -EFAULT; + break; + } + + /* + * i + 1 now represents the number of pages we actually used in + * the copy phase above. Bring nr_pages down to that, and free + * any pages that we didn't use. + */ + for ( ; nr_pages > i + 1; nr_pages--) + put_page(wdata->pages[nr_pages - 1]); + wdata->sync_mode = WB_SYNC_ALL; wdata->nr_pages = nr_pages; wdata->offset = (__u64)offset; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 449b6cf09b09..9d463501348f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -490,10 +490,15 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, return PTR_ERR(tlink); tcon = tlink_tcon(tlink); - rc = CIFSSMBQAllEAs(xid, tcon, path, "SETFILEBITS", - ea_value, 4 /* size of buf */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + if (tcon->ses->server->ops->query_all_EAs == NULL) { + cifs_put_tlink(tlink); + return -EOPNOTSUPP; + } + + rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path, + "SETFILEBITS", ea_value, 4 /* size of buf */, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); cifs_put_tlink(tlink); if (rc < 0) return (int)rc; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3efdb9d5c0b8..4885a40f3210 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -948,6 +948,14 @@ struct smb_version_operations smb1_operations = { .mand_lock = cifs_mand_lock, .mand_unlock_range = cifs_unlock_range, .push_mand_locks = cifs_push_mandatory_locks, +#ifdef CONFIG_CIFS_XATTR + .query_all_EAs = CIFSSMBQAllEAs, + .set_EA = CIFSSMBSetEA, +#endif /* CIFS_XATTR */ +#ifdef CONFIG_CIFS_ACL + .get_acl = get_cifs_acl, + .set_acl = set_cifs_acl, +#endif /* CIFS_ACL */ }; struct smb_version_values smb1_values = { diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 7c0e2143e775..cc592ef6584a 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -55,4 +55,7 @@ #define SMB2_NTLMV2_SESSKEY_SIZE (16) #define SMB2_HMACSHA256_SIZE (32) +/* Maximum buffer size value we can send with 1 credit */ +#define SMB2_MAX_BUFFER_SIZE 65536 + #endif /* _SMB2_GLOB_H */ diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f2e76f3b0c61..e2756bb40b4d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -181,11 +181,8 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified wsize, or default */ wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); - /* - * limit write size to 2 ** 16, because we don't support multicredit - * requests now. - */ - wsize = min_t(unsigned int, wsize, 2 << 15); + /* set it to the maximum buffer size value we can send with 1 credit */ + wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); return wsize; } @@ -199,11 +196,8 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified rsize, or default */ rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); - /* - * limit write size to 2 ** 16, because we don't support multicredit - * requests now. - */ - rsize = min_t(unsigned int, rsize, 2 << 15); + /* set it to the maximum buffer size value we can send with 1 credit */ + rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); return rsize; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2b95ce2b54e8..c7a6fd87bb6e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -408,6 +408,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) server->dialect = le16_to_cpu(rsp->DialectRevision); server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); + /* set it to the maximum buffer size value we can send with 1 credit */ + server->maxBuf = min_t(unsigned int, le32_to_cpu(rsp->MaxTransactSize), + SMB2_MAX_BUFFER_SIZE); server->max_read = le32_to_cpu(rsp->MaxReadSize); server->max_write = le32_to_cpu(rsp->MaxWriteSize); /* BB Do we need to validate the SecurityMode? */ diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 09afda4cc58e..5ac836a86b18 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -82,9 +82,11 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) goto remove_ea_exit; ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ - rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL, - (__u16)0, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->set_EA) + rc = pTcon->ses->server->ops->set_EA(xid, pTcon, + full_path, ea_name, NULL, (__u16)0, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } remove_ea_exit: kfree(full_path); @@ -149,18 +151,22 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, cifs_dbg(FYI, "attempt to set cifs inode metadata\n"); ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ - rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, - (__u16)value_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->set_EA) + rc = pTcon->ses->server->ops->set_EA(xid, pTcon, + full_path, ea_name, ea_value, (__u16)value_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto set_ea_exit; ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */ - rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, - (__u16)value_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->set_EA) + rc = pTcon->ses->server->ops->set_EA(xid, pTcon, + full_path, ea_name, ea_value, (__u16)value_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, strlen(CIFS_XATTR_CIFS_ACL)) == 0) { #ifdef CONFIG_CIFS_ACL @@ -170,8 +176,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, rc = -ENOMEM; } else { memcpy(pacl, ea_value, value_size); - rc = set_cifs_acl(pacl, value_size, - direntry->d_inode, full_path, CIFS_ACL_DACL); + if (pTcon->ses->server->ops->set_acl) + rc = pTcon->ses->server->ops->set_acl(pacl, + value_size, direntry->d_inode, + full_path, CIFS_ACL_DACL); + else + rc = -EOPNOTSUPP; if (rc == 0) /* force revalidate of the inode */ CIFS_I(direntry->d_inode)->time = 0; kfree(pacl); @@ -272,17 +282,21 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, /* revalidate/getattr then populate from inode */ } /* BB add else when above is implemented */ ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ - rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, - buf_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->query_all_EAs) + rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, + full_path, ea_name, ea_value, buf_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto get_ea_exit; ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */ - rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, - buf_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->query_all_EAs) + rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, + full_path, ea_name, ea_value, buf_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS)) == 0) { #ifdef CONFIG_CIFS_POSIX @@ -313,8 +327,11 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, u32 acllen; struct cifs_ntsd *pacl; - pacl = get_cifs_acl(cifs_sb, direntry->d_inode, - full_path, &acllen); + if (pTcon->ses->server->ops->get_acl == NULL) + goto get_ea_exit; /* rc already EOPNOTSUPP */ + + pacl = pTcon->ses->server->ops->get_acl(cifs_sb, + direntry->d_inode, full_path, &acllen); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); cifs_dbg(VFS, "%s: error %zd getting sec desc\n", @@ -400,11 +417,12 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) /* if proc/fs/cifs/streamstoxattr is set then search server for EAs or streams to returns as xattrs */ - rc = CIFSSMBQAllEAs(xid, pTcon, full_path, NULL, data, - buf_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->query_all_EAs) + rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, + full_path, NULL, data, buf_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); list_ea_exit: kfree(full_path); free_xid(xid); diff --git a/fs/dcache.c b/fs/dcache.c index da89cdfb21ab..9a59653d3449 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2686,8 +2686,13 @@ char *d_path(const struct path *path, char *buf, int buflen) * thus don't need to be hashed. They also don't need a name until a * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. */ - if (path->dentry->d_op && path->dentry->d_op->d_dname) + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); diff --git a/fs/dcookies.c b/fs/dcookies.c index ab5954b50267..ac44a69fbea9 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -204,7 +204,7 @@ out: } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, size_t, len) +COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len) { #ifdef __BIG_ENDIAN return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len); diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index b74422888604..85cde3e76290 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout) layout->max_io_length = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) * - layout->group_width; + (layout->group_width - layout->parity); if (layout->parity) { unsigned stripe_length = (layout->group_width - layout->parity) * @@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, if (length) { ore_calc_stripe_info(layout, offset, length, &ios->si); ios->length = ios->si.length; - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE; + ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) + + ios->length + PAGE_SIZE - 1) / PAGE_SIZE; if (layout->parity) _ore_post_alloc_raid_stuff(ios); } @@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, u64 H = LmodS - G * T; u32 N = div_u64(H, U); + u32 Nlast; /* "H - (N * U)" is just "H % U" so it's bound to u32 */ u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width; @@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, si->length = T - H; if (si->length > length) si->length = length; + + Nlast = div_u64(H + si->length + U - 1, U); + si->maxdevUnits = Nlast - N; + si->M = M; } EXPORT_SYMBOL(ore_calc_stripe_info); @@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, int ret; if (per_dev->bio == NULL) { - unsigned pages_in_stripe = ios->layout->group_width * - (ios->layout->stripe_unit / PAGE_SIZE); - unsigned nr_pages = ios->nr_pages * ios->layout->group_width / - (ios->layout->group_width - - ios->layout->parity); - unsigned bio_size = (nr_pages + pages_in_stripe) / - ios->layout->group_width; + unsigned bio_size; + + if (!ios->reading) { + bio_size = ios->si.maxdevUnits; + } else { + bio_size = (ios->si.maxdevUnits + 1) * + (ios->layout->group_width - ios->layout->parity) / + ios->layout->group_width; + } + bio_size *= (ios->layout->stripe_unit / PAGE_SIZE); per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); if (unlikely(!per_dev->bio)) { @@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, added_len = bio_add_pc_page(q, per_dev->bio, pages[pg], pglen, pgbase); if (unlikely(pglen != added_len)) { - ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n", - per_dev->bio->bi_vcnt); + /* If bi_vcnt == bi_max then this is a SW BUG */ + ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x " + "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n", + per_dev->bio->bi_vcnt, + per_dev->bio->bi_max_vecs, + BIO_MAX_PAGES_KMALLOC, cur_len); ret = -ENOMEM; goto out; } @@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *oc, size_attr->attr = g_attr_logical_length; size_attr->attr.val_ptr = &size_attr->newsize; - ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", + ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", _LLU(oc->comps->obj.id), _LLU(obj_size), i); ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, &size_attr->attr); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5aae3d12d400..790b14c5f262 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -280,6 +280,16 @@ struct ext4_io_submit { /* Translate # of blks to # of clusters */ #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ (sbi)->s_cluster_bits) +/* Mask out the low bits to get the starting block of the cluster */ +#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ + ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ + ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) +/* Get the cluster offset */ +#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ + ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ + ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) /* * Structure of a blocks group descriptor @@ -764,6 +774,8 @@ do { \ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ (einode)->xtime.tv_sec = \ (signed)le32_to_cpu((raw_inode)->xtime); \ + else \ + (einode)->xtime.tv_sec = 0; \ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ ext4_decode_extra_time(&(einode)->xtime, \ raw_inode->xtime ## _extra); \ diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 1c88061da526..1be3996b5942 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -223,6 +223,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, if (WARN_ON_ONCE(err)) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); + ext4_error_inode(inode, where, line, + bh->b_blocknr, + "journal_dirty_metadata failed: " + "handle type %u started at line %u, " + "credits %u/%u, errcode %d", + handle->h_type, + handle->h_line_no, + handle->h_requested_credits, + handle->h_buffer_credits, err); } } else { if (inode) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index dc1e03047226..a2b625e279db 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) { ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); + ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); + ext4_lblk_t last = lblock + len - 1; - if (len == 0) + if (lblock > last) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } @@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + ext4_fsblk_t pblock = 0; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; + + /* Check for overlapping extents */ + lblock = le32_to_cpu(ext->ee_block); + len = ext4_ext_get_actual_len(ext); + if ((lblock <= prev) && prev) { + pblock = ext4_ext_pblock(ext); + es->s_last_error_block = cpu_to_le64(pblock); + return 0; + } ext++; entries--; + prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); @@ -1755,8 +1772,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, depth = ext_depth(inode); if (!path[depth].p_ext) goto out; - b2 = le32_to_cpu(path[depth].p_ext->ee_block); - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block)); /* * get the next allocated block if the extent in the path @@ -1766,7 +1782,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, b2 = ext4_ext_next_allocated_block(path); if (b2 == EXT_MAX_BLOCKS) goto out; - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, b2); } /* check for wrap through zero on extent logical start block*/ @@ -2427,7 +2443,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * truncate operation has removed all of the blocks in * the cluster. */ - if (pblk & (sbi->s_cluster_ratio - 1) && + if (EXT4_PBLK_COFF(sbi, pblk) && (ee_len == num)) *partial_cluster = EXT4_B2C(sbi, pblk); else @@ -3658,7 +3674,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_lblk_t lblk_start, lblk_end; - lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); + lblk_start = EXT4_LBLK_CMASK(sbi, lblk); lblk_end = lblk_start + sbi->s_cluster_ratio - 1; return ext4_find_delalloc_range(inode, lblk_start, lblk_end); @@ -3717,9 +3733,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); /* Check towards left side */ - c_offset = lblk_start & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start); if (c_offset) { - lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); + lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start); lblk_to = lblk_from + c_offset - 1; if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) @@ -3727,7 +3743,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, } /* Now check towards right. */ - c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks); if (allocated_clusters && c_offset) { lblk_from = lblk_start + num_blks; lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; @@ -3935,7 +3951,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, struct ext4_ext_path *path) { struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ext4_lblk_t ex_cluster_start, ex_cluster_end; ext4_lblk_t rr_cluster_start; ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); @@ -3953,8 +3969,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, (rr_cluster_start == ex_cluster_start)) { if (rr_cluster_start == ex_cluster_end) ee_start += ee_len - 1; - map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + - c_offset; + map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset; map->m_len = min(map->m_len, (unsigned) sbi->s_cluster_ratio - c_offset); /* @@ -4108,7 +4123,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; newex.ee_block = cpu_to_le32(map->m_lblk); - cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); /* * If we are doing bigalloc, check to see if the extent returned @@ -4176,7 +4191,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * needed so that future calls to get_implied_cluster_alloc() * work correctly. */ - offset = map->m_lblk & (sbi->s_cluster_ratio - 1); + offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ar.len = EXT4_NUM_B2C(sbi, offset+allocated); ar.goal -= offset; ar.logical -= offset; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 33331b4c2178..e350be6c7ac6 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1957,9 +1957,11 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) } /* Clear the content within i_blocks. */ - if (i_size < EXT4_MIN_INLINE_DATA_SIZE) - memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0, - EXT4_MIN_INLINE_DATA_SIZE - i_size); + if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { + void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; + memset(p + i_size, 0, + EXT4_MIN_INLINE_DATA_SIZE - i_size); + } EXT4_I(inode)->i_inline_size = i_size < EXT4_MIN_INLINE_DATA_SIZE ? diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 904ca1a21dce..21dff8f236f6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1263,7 +1263,6 @@ static int ext4_journalled_write_end(struct file *file, */ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1275,7 +1274,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1295,10 +1293,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } return -ENOSPC; } ei->i_reserved_meta_blocks += md_needed; @@ -1312,7 +1306,6 @@ repeat: */ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1334,7 +1327,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1354,10 +1346,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } @@ -4716,6 +4704,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size > sbi->s_bitmap_maxbytes) return -EFBIG; } + + if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) + inode_inc_iversion(inode); + if (S_ISREG(inode->i_mode) && (attr->ia_size < inode->i_size)) { if (ext4_should_order_data(inode)) { diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c0427e2f6648..42624a995b00 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -145,7 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb, handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); if (IS_ERR(handle)) { err = -EINVAL; - goto swap_boot_out; + goto journal_err_out; } /* Protect extent tree against block allocations via delalloc */ @@ -203,6 +203,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_double_up_write_data_sem(inode, inode_bl); +journal_err_out: ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode_bl); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 59c6750b894f..fba960ee26de 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3423,6 +3423,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); + + BUG_ON(atomic_read(&pa->pa_count)); + BUG_ON(pa->pa_deleted == 0); kmem_cache_free(ext4_pspace_cachep, pa); } @@ -3436,11 +3439,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, ext4_group_t grp; ext4_fsblk_t grp_blk; - if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) - return; - /* in this short window concurrent discard can set pa_deleted */ spin_lock(&pa->pa_lock); + if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { + spin_unlock(&pa->pa_lock); + return; + } + if (pa->pa_deleted == 1) { spin_unlock(&pa->pa_lock); return; @@ -4102,7 +4107,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ - ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); + ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; ac->ac_inode = ar->inode; @@ -4639,7 +4644,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * blocks at the beginning or the end unless we are explicitly * requested to avoid doing so. */ - overflow = block & (sbi->s_cluster_ratio - 1); + overflow = EXT4_PBLK_COFF(sbi, block); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { overflow = sbi->s_cluster_ratio - overflow; @@ -4653,7 +4658,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += overflow; } } - overflow = count & (sbi->s_cluster_ratio - 1); + overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { if (count > overflow) @@ -4766,8 +4771,8 @@ do_more: " group:%d block:%d count:%lu failed" " with %d", block_group, bit, count, err); - } - + } else + EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 49d3c01eabf8..c503850a61a8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -238,6 +238,7 @@ static int ext4_alloc_group_tables(struct super_block *sb, ext4_group_t group; ext4_group_t last_group; unsigned overhead; + __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; BUG_ON(flex_gd->count == 0 || group_data == NULL); @@ -261,7 +262,7 @@ next_group: src_group++; for (; src_group <= last_group; src_group++) { overhead = ext4_group_overhead_blocks(sb, src_group); - if (overhead != 0) + if (overhead == 0) last_blk += group_data[src_group - group].blocks_count; else break; @@ -275,8 +276,7 @@ next_group: group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; group_data[group].free_blocks_count--; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode bitmaps */ @@ -287,22 +287,30 @@ next_group: group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; group_data[group].free_blocks_count--; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode tables */ for (; it_index < flex_gd->count; it_index++) { - if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) + unsigned int itb = EXT4_SB(sb)->s_itb_per_group; + ext4_fsblk_t next_group_start; + + if (start_blk + itb > last_blk) goto next_group; group_data[it_index].inode_table = start_blk; - group = ext4_get_group_number(sb, start_blk - 1); + group = ext4_get_group_number(sb, start_blk); + next_group_start = ext4_group_first_block_no(sb, group + 1); group -= group_data[0].group; - group_data[group].free_blocks_count -= - EXT4_SB(sb)->s_itb_per_group; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + if (start_blk + itb > next_group_start) { + flex_gd->bg_flags[group + 1] &= uninit_mask; + overhead = start_blk + itb - next_group_start; + group_data[group + 1].free_blocks_count -= overhead; + itb -= overhead; + } + + group_data[group].free_blocks_count -= itb; + flex_gd->bg_flags[group] &= uninit_mask; start_blk += EXT4_SB(sb)->s_itb_per_group; } @@ -396,7 +404,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, start = ext4_group_first_block_no(sb, group); group -= flex_gd->groups[0].group; - count2 = sb->s_blocksize * 8 - (block - start); + count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start); if (count2 > count) count2 = count; @@ -615,7 +623,7 @@ handle_ib: if (err) goto out; count = group_table_count[j]; - start = group_data[i].block_bitmap; + start = (&group_data[i].block_bitmap)[j]; block = start; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b3907f1ee202..468e26500dfa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3213,11 +3213,19 @@ int ext4_calculate_overhead(struct super_block *sb) } -static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) +static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; /* + * There's no need to reserve anything when we aren't using extents. + * The space estimates are exact, there are no unwritten extents, + * hole punching doesn't need new metadata... This is needed especially + * to keep ext2/3 backward compatibility. + */ + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) + return 0; + /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run * out of space like for example punch hole, or converting @@ -3225,7 +3233,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ - resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; + resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> + EXT4_SB(sb)->s_cluster_bits; do_div(resv_clusters, 50); resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); @@ -3583,16 +3592,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; - i = le32_to_cpu(es->s_flags); - if (i & EXT2_FLAGS_UNSIGNED_HASH) - sbi->s_hash_unsigned = 3; - else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { + if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + i = le32_to_cpu(es->s_flags); + if (i & EXT2_FLAGS_UNSIGNED_HASH) + sbi->s_hash_unsigned = 3; + else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { #ifdef __CHAR_UNSIGNED__ - es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); - sbi->s_hash_unsigned = 3; + if (!(sb->s_flags & MS_RDONLY)) + es->s_flags |= + cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); + sbi->s_hash_unsigned = 3; #else - es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); + if (!(sb->s_flags & MS_RDONLY)) + es->s_flags |= + cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); #endif + } } /* Handle clustersize */ @@ -3969,10 +3984,10 @@ no_journal: "available"); } - err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); + err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); if (err) { ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " - "reserved pool", ext4_calculate_resv_clusters(sbi)); + "reserved pool", ext4_calculate_resv_clusters(sb)); goto failed_mount4a; } diff --git a/fs/file.c b/fs/file.c index 4a78f981557a..9de20265a78c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -34,7 +34,7 @@ static void *alloc_fdmem(size_t size) * vmalloc() if the allocation size will be considered "large" by the VM. */ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); + void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY); if (data != NULL) return data; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b6b092696a5d..a941933cb0e2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -505,13 +505,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, } WARN_ON(inode->i_state & I_SYNC); /* - * Skip inode if it is clean. We don't want to mess with writeback - * lists in this function since flusher thread may be doing for example - * sync in parallel and if we move the inode, it could get skipped. So - * here we make sure inode is on some writeback list and leave it there - * unless we have completely cleaned the inode. + * Skip inode if it is clean and we have no outstanding writeback in + * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this + * function since flusher thread may be doing for example sync in + * parallel and if we move the inode, it could get skipped. So here we + * make sure inode is on some writeback list and leave it there unless + * we have completely cleaned the inode. */ - if (!(inode->i_state & I_DIRTY)) + if (!(inode->i_state & I_DIRTY) && + (wbc->sync_mode != WB_SYNC_ALL || + !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) goto out; inode->i_state |= I_SYNC; spin_unlock(&inode->i_lock); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 0e16fc1cfcd3..8eb5a0992658 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1300,22 +1300,6 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); } -static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return 1; -} - -static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = { - .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, - .confirm = generic_pipe_buf_confirm, - .release = generic_pipe_buf_release, - .steal = fuse_dev_pipe_buf_steal, - .get = generic_pipe_buf_get, -}; - static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) @@ -1362,7 +1346,11 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, buf->page = bufs[page_nr].page; buf->offset = bufs[page_nr].offset; buf->len = bufs[page_nr].len; - buf->ops = &fuse_dev_pipe_buf_ops; + /* + * Need to be careful about this. Having buf->ops in module + * code can Oops if the buffer persists after module unload. + */ + buf->ops = &nosteal_pipe_buf_ops; pipe->nrbufs++; page_nr++; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 0bad69ed6336..76251600cbea 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -999,6 +999,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct address_space *mapping = inode->i_mapping; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int rv; @@ -1019,6 +1020,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ + /* + * Now since we are holding a deferred (CW) lock at this point, you + * might be wondering why this is ever needed. There is a case however + * where we've granted a deferred local lock against a cached exclusive + * glock. That is ok provided all granted local locks are deferred, but + * it also means that it is possible to encounter pages which are + * cached and possibly also mapped. So here we check for that and sort + * them out ahead of the dio. The glock state machine will take care of + * everything else. + * + * If in fact the cached glock state (gl->gl_state) is deferred (CW) in + * the first place, mapping->nr_pages will always be zero. + */ + if (mapping->nrpages) { + loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); + loff_t len = iov_length(iov, nr_segs); + loff_t end = PAGE_ALIGN(offset + len) - 1; + + rv = 0; + if (len == 0) + goto out; + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); + rv = filemap_write_and_wait_range(mapping, lstart, end); + if (rv) + return rv; + truncate_inode_pages_range(mapping, lstart, end); + } + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 62b484e4a9e4..bc5dac400125 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1536,10 +1536,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - error = gfs2_quota_lock(ip, nuid, ngid); + error = get_write_access(inode); if (error) return error; + error = gfs2_rs_alloc(ip); + if (error) + goto out; + + error = gfs2_rindex_update(sdp); + if (error) + goto out; + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out; + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { error = gfs2_quota_check(ip, nuid, ngid); @@ -1566,6 +1578,8 @@ out_end_trans: gfs2_trans_end(sdp); out_gunlock_q: gfs2_quota_unlock(ip); +out: + put_write_access(inode); return error; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 60ede2a0f43f..f7dd3b4f8ab0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1317,8 +1317,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (IS_ERR(s)) goto error_bdev; - if (s->s_root) + if (s->s_root) { + /* + * s_umount nests inside bd_mutex during + * __invalidate_device(). blkdev_put() acquires + * bd_mutex and can't be called under s_umount. Drop + * s_umount temporarily. This is safe as we're + * holding an active reference. + */ + up_write(&s->s_umount); blkdev_put(bdev, mode); + down_write(&s->s_umount); + } memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e0c0bc275924..a6917125f215 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1151,7 +1151,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * once a transaction -bzzz */ jh->b_modified = 1; - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); + if (handle->h_buffer_credits <= 0) { + ret = -ENOSPC; + goto out_unlock_bh; + } handle->h_buffer_credits--; } @@ -1234,7 +1237,6 @@ out_unlock_bh: jbd2_journal_put_journal_head(jh); out: JBUFFER_TRACE(jh, "exit"); - WARN_ON(ret); /* All errors are bugs, so dump the stack */ return ret; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 8ebd3f551e0c..ffc4045fc62e 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -767,6 +767,7 @@ nlmsvc_grant_blocked(struct nlm_block *block) struct nlm_file *file = block->b_file; struct nlm_lock *lock = &block->b_call->a_args.lock; int error; + loff_t fl_start, fl_end; dprintk("lockd: grant blocked lock %p\n", block); @@ -784,9 +785,16 @@ nlmsvc_grant_blocked(struct nlm_block *block) } /* Try the lock operation again */ + /* vfs_lock_file() can mangle fl_start and fl_end, but we need + * them unchanged for the GRANT_MSG + */ lock->fl.fl_flags |= FL_SLEEP; + fl_start = lock->fl.fl_start; + fl_end = lock->fl.fl_end; error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); lock->fl.fl_flags &= ~FL_SLEEP; + lock->fl.fl_start = fl_start; + lock->fl.fl_end = fl_end; switch (error) { case 0: diff --git a/fs/mount.h b/fs/mount.h index 64a858143ff9..68d80bdcd081 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -73,7 +73,7 @@ static inline int mnt_has_parent(struct mount *mnt) static inline int is_mounted(struct vfsmount *mnt) { /* neither detached nor internal? */ - return !IS_ERR_OR_NULL(real_mount(mnt)); + return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns); } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 9c3e117c3ed1..4d0161442565 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -44,7 +44,7 @@ static inline sector_t normalize(sector_t s, int base) { sector_t tmp = s; /* Since do_div modifies its argument */ - return s - do_div(tmp, base); + return s - sector_div(tmp, base); } static inline sector_t normalize_up(sector_t s, int base) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 4cbad5d6b276..02773aab43c5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -240,13 +240,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; - nfs_put_client(clp); - if (clp != old) { - clp->cl_preserve_clid = true; - clp = old; - } - return clp; + if (clp != old) + clp->cl_preserve_clid = true; + nfs_put_client(clp); + return old; error: nfs_mark_client_ready(clp, error); @@ -324,9 +322,10 @@ int nfs40_walk_client_list(struct nfs_client *new, prev = pos; status = nfs_wait_client_init_complete(pos); - spin_lock(&nn->nfs_client_lock); if (status < 0) - continue; + goto out; + status = -NFS4ERR_STALE_CLIENTID; + spin_lock(&nn->nfs_client_lock); } if (pos->cl_cons_state != NFS_CS_READY) continue; @@ -464,7 +463,8 @@ int nfs41_walk_client_list(struct nfs_client *new, } spin_lock(&nn->nfs_client_lock); if (status < 0) - continue; + break; + status = -NFS4ERR_STALE_CLIENTID; } if (pos->cl_cons_state != NFS_CS_READY) continue; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 10a0dde7731a..7ccc30ecd744 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4222,8 +4222,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - task->tk_status = 0; - return -EAGAIN; + goto wait_on_recovery; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); @@ -6233,9 +6232,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layout_hdr *lo; struct nfs4_state *state = NULL; - unsigned long timeo, giveup; + unsigned long timeo, now, giveup; - dprintk("--> %s\n", __func__); + dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); if (!nfs41_sequence_done(task, &lgp->res.seq_res)) goto out; @@ -6243,12 +6242,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: goto out; + /* + * NFS4ERR_LAYOUTTRYLATER is a conflict with another client + * (or clients) writing to the same RAID stripe + */ case -NFS4ERR_LAYOUTTRYLATER: + /* + * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall + * existing layout before getting a new one). + */ case -NFS4ERR_RECALLCONFLICT: timeo = rpc_get_timeout(task->tk_client); giveup = lgp->args.timestamp + timeo; - if (time_after(giveup, jiffies)) - task->tk_status = -NFS4ERR_DELAY; + now = jiffies; + if (time_after(giveup, now)) { + unsigned long delay; + + /* Delay for: + * - Not less then NFS4_POLL_RETRY_MIN. + * - One last time a jiffie before we give up + * - exponential backoff (time_now minus start_attempt) + */ + delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN, + min((giveup - now - 1), + now - lgp->args.timestamp)); + + dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", + __func__, delay); + rpc_delay(task, delay); + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out; /* Do not call nfs4_async_handle_error() */ + } break; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: @@ -6684,7 +6709,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, switch (err) { case 0: case -NFS4ERR_WRONGSEC: - case -NFS4ERR_NOTSUPP: + case -ENOTSUPP: goto out; default: err = nfs4_handle_exception(server, err, &exception); @@ -6716,7 +6741,7 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, * Fall back on "guess and check" method if * the server doesn't support SECINFO_NO_NAME */ - if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { + if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) { err = nfs4_find_root_sec(server, fhandle, info); goto out_freepage; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4be8d135ed61..988efb4caac0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3002,7 +3002,8 @@ out_overflow: return -EIO; } -static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected, + int *nfs_retval) { __be32 *p; uint32_t opnum; @@ -3012,19 +3013,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) if (unlikely(!p)) goto out_overflow; opnum = be32_to_cpup(p++); - if (opnum != expected) { - dprintk("nfs: Server returned operation" - " %d but we issued a request for %d\n", - opnum, expected); - return -EIO; - } + if (unlikely(opnum != expected)) + goto out_bad_operation; nfserr = be32_to_cpup(p); - if (nfserr != NFS_OK) - return nfs4_stat_to_errno(nfserr); - return 0; + if (nfserr == NFS_OK) + *nfs_retval = 0; + else + *nfs_retval = nfs4_stat_to_errno(nfserr); + return true; +out_bad_operation: + dprintk("nfs: Server returned operation" + " %d but we issued a request for %d\n", + opnum, expected); + *nfs_retval = -EREMOTEIO; + return false; out_overflow: print_overflow_msg(__func__, xdr); - return -EIO; + *nfs_retval = -EIO; + return false; +} + +static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + int retval; + + __decode_op_hdr(xdr, expected, &retval); + return retval; } /* Dummy routine */ @@ -4842,11 +4856,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) uint32_t savewords, bmlen, i; int status; - status = decode_op_hdr(xdr, OP_OPEN); - if (status != -EIO) - nfs_increment_open_seqid(status, res->seqid); - if (!status) - status = decode_stateid(xdr, &res->stateid); + if (!__decode_op_hdr(xdr, OP_OPEN, &status)) + return status; + nfs_increment_open_seqid(status, res->seqid); + if (status) + return status; + status = decode_stateid(xdr, &res->stateid); if (unlikely(status)) return status; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e76244edd748..ec8d97ddc635 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -129,6 +129,13 @@ nfsd_reply_cache_alloc(void) } static void +nfsd_reply_cache_unhash(struct svc_cacherep *rp) +{ + hlist_del_init(&rp->c_hash); + list_del_init(&rp->c_lru); +} + +static void nfsd_reply_cache_free_locked(struct svc_cacherep *rp) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { @@ -403,7 +410,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); if (nfsd_cache_entry_expired(rp) || num_drc_entries >= max_drc_entries) { - lru_put_end(rp); + nfsd_reply_cache_unhash(rp); prune_cache_entries(); goto search_cache; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index cbd66188a28b..958a5b57ed4a 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1440,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nilfs_clear_logs(&sci->sc_segbufs); - err = nilfs_segctor_extend_segments(sci, nilfs, nadd); - if (unlikely(err)) - return err; - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, NULL); WARN_ON(err); /* do not happen */ + sci->sc_stage.flags &= ~NILFS_CF_SUFREED; } + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 77cc85dd0db0..f1680cdbd88b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -867,9 +867,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, { return sys_fanotify_mark(fanotify_fd, flags, #ifdef __BIG_ENDIAN - ((__u64)mask1 << 32) | mask0, -#else ((__u64)mask0 << 32) | mask1, +#else + ((__u64)mask1 << 32) | mask0, #endif dfd, pathname); } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3e64169ef527..38802d683969 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb, dqstats_inc(DQST_LOOKUPS); dqput(old_dquot); old_dquot = dquot; - ret = fn(dquot, priv); - if (ret < 0) - goto out; + /* + * ->release_dquot() can be racing with us. Our reference + * protects us from new calls to it so just wait for any + * outstanding call and recheck the DQ_ACTIVE_B after that. + */ + wait_on_dquot(dquot); + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + ret = fn(dquot, priv); + if (ret < 0) + goto out; + } spin_lock(&dq_list_lock); /* We are safe to continue now because our dquot could not * be moved out of the inuse list while we hold the reference */ diff --git a/fs/read_write.c b/fs/read_write.c index 2cefa417be34..f6b7c600eb7f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -947,9 +947,9 @@ out: return ret; } -COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, +COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen) + compat_ulong_t, vlen) { struct fd f = fdget(fd); ssize_t ret; @@ -983,9 +983,9 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, return ret; } -COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd, +COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, u32, pos_low, u32, pos_high) + compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; return compat_sys_preadv64(fd, vec, vlen, pos); @@ -1013,9 +1013,9 @@ out: return ret; } -COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, +COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, const struct compat_iovec __user *, vec, - unsigned long, vlen) + compat_ulong_t, vlen) { struct fd f = fdget(fd); ssize_t ret; @@ -1049,9 +1049,9 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, return ret; } -COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd, +COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, u32, pos_low, u32, pos_high) + compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; return compat_sys_pwritev64(fd, vec, vlen, pos); diff --git a/fs/splice.c b/fs/splice.c index d37431dd60a1..4b5a5fac3383 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -555,6 +555,24 @@ static const struct pipe_buf_operations default_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + +/* Pipe buffer operations for a socket and similar. */ +const struct pipe_buf_operations nosteal_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = generic_pipe_buf_release, + .steal = generic_pipe_buf_nosteal, + .get = generic_pipe_buf_get, +}; +EXPORT_SYMBOL(nosteal_pipe_buf_ops); + static ssize_t kernel_readv(struct file *file, const struct iovec *vec, unsigned long vlen, loff_t offset) { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3c3644ea825b..2288db4e1784 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -216,6 +216,8 @@ xfs_growfs_data_private( */ nfree = 0; for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { + __be32 *agfl_bno; + /* * AG freespace header block */ @@ -275,8 +277,10 @@ xfs_growfs_data_private( agfl->agfl_seqno = cpu_to_be32(agno); uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); } + + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) - agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); + agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); error = xfs_bwrite(bp); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ca01d830e989..83dfe6e73235 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -409,7 +409,8 @@ xfs_attrlist_by_handle( return -XFS_ERROR(EPERM); if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) + if (al_hreq.buflen < sizeof(struct attrlist) || + al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); /* diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c0c66259cc91..68799d7f02cc 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -359,7 +359,8 @@ xfs_compat_attrlist_by_handle( if (copy_from_user(&al_hreq, arg, sizeof(compat_xfs_fsop_attrlist_handlereq_t))) return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) + if (al_hreq.buflen < sizeof(struct attrlist) || + al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); /* |