diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
-rw-r--r-- | drivers/staging/lustre/lustre/llite/Makefile | 5 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/dcache.c | 34 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/dir.c | 24 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/file.c | 657 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/llite_internal.h | 76 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/llite_lib.c | 76 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/lloop.c | 3 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/lproc_llite.c | 41 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/namei.c | 14 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/super25.c | 4 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/vvp_io.c | 61 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/vvp_object.c | 2 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/xattr.c | 104 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/llite/xattr_cache.c | 617 |
14 files changed, 1511 insertions, 207 deletions
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile index f493e0740004..c76f3cfedab0 100644 --- a/drivers/staging/lustre/lustre/llite/Makefile +++ b/drivers/staging/lustre/lustre/llite/Makefile @@ -1,12 +1,13 @@ obj-$(CONFIG_LUSTRE_FS) += lustre.o obj-$(CONFIG_LUSTRE_LLITE_LLOOP) += llite_lloop.o lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \ - rw.o lproc_llite.o namei.o symlink.o llite_mmap.o \ - xattr.o remote_perm.o llite_rmtacl.o llite_capa.o \ + rw.o namei.o symlink.o llite_mmap.o \ + xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o llite_capa.o \ rw26.o super25.o statahead.o \ ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \ vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o +lustre-$(CONFIG_PROC_FS) += lproc_llite.o llite_lloop-y := lloop.o diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index e7629be39739..cbd663ed030c 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -404,7 +404,6 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, struct inode *inode = de->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct obd_client_handle **och_p; - __u64 *och_usecount; __u64 ibits; /* @@ -418,37 +417,32 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, */ - if (it->it_flags & FMODE_WRITE) { + if (it->it_flags & FMODE_WRITE) och_p = &lli->lli_mds_write_och; - och_usecount = &lli->lli_open_fd_write_count; - } else if (it->it_flags & FMODE_EXEC) { + else if (it->it_flags & FMODE_EXEC) och_p = &lli->lli_mds_exec_och; - och_usecount = &lli->lli_open_fd_exec_count; - } else { + else och_p = &lli->lli_mds_read_och; - och_usecount = &lli->lli_open_fd_read_count; - } + /* Check for the proper lock. */ ibits = MDS_INODELOCK_LOOKUP; if (!ll_have_md_lock(inode, &ibits, LCK_MINMODE)) goto do_lock; mutex_lock(&lli->lli_och_mutex); if (*och_p) { /* Everything is open already, do nothing */ - /*(*och_usecount)++; Do not let them steal our open - handle from under us */ - SET_BUT_UNUSED(och_usecount); - /* XXX The code above was my original idea, but in case - we have the handle, but we cannot use it due to later - checks (e.g. O_CREAT|O_EXCL flags set), nobody - would decrement counter increased here. So we just - hope the lock won't be invalidated in between. But - if it would be, we'll reopen the open request to - MDS later during file open path */ + /* Originally it was idea to do not let them steal our + * open handle from under us by (*och_usecount)++ here. + * But in case we have the handle, but we cannot use it + * due to later checks (e.g. O_CREAT|O_EXCL flags set), + * nobody would decrement counter increased here. So we + * just hope the lock won't be invalidated in between. + * But if it would be, we'll reopen the open request to + * MDS later during file open path. + */ mutex_unlock(&lli->lli_och_mutex); return 1; - } else { - mutex_unlock(&lli->lli_och_mutex); } + mutex_unlock(&lli->lli_och_mutex); } if (it->it_op == IT_GETATTR) { diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 1f079034bd8f..22d0acc95bc5 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -1809,8 +1809,28 @@ out_rmdir: return -EFAULT; } - rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize, - hur, NULL); + if (hur->hur_request.hr_action == HUA_RELEASE) { + const struct lu_fid *fid; + struct inode *f; + int i; + + for (i = 0; i < hur->hur_request.hr_itemcount; i++) { + fid = &hur->hur_user_item[i].hui_fid; + f = search_inode_for_lustre(inode->i_sb, fid); + if (IS_ERR(f)) { + rc = PTR_ERR(f); + break; + } + + rc = ll_hsm_release(f); + iput(f); + if (rc != 0) + break; + } + } else { + rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize, + hur, NULL); + } OBD_FREE_LARGE(hur, totalsize); diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index fb85a58db058..c12821aedc2f 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -115,7 +115,8 @@ out: static int ll_close_inode_openhandle(struct obd_export *md_exp, struct inode *inode, - struct obd_client_handle *och) + struct obd_client_handle *och, + const __u64 *data_version) { struct obd_export *exp = ll_i2mdexp(inode); struct md_op_data *op_data; @@ -139,6 +140,13 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here. ll_prepare_close(inode, op_data, och); + if (data_version != NULL) { + /* Pass in data_version implies release. */ + op_data->op_bias |= MDS_HSM_RELEASE; + op_data->op_data_version = *data_version; + op_data->op_lease_handle = och->och_lease_handle; + op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; + } epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE); rc = md_close(md_exp, op_data, och->och_mod, &req); if (rc == -EAGAIN) { @@ -167,14 +175,20 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, spin_unlock(&lli->lli_lock); } - ll_finish_md_op_data(op_data); - if (rc == 0) { rc = ll_objects_destroy(req, inode); if (rc) CERROR("inode %lu ll_objects destroy: rc = %d\n", inode->i_ino, rc); } + if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) { + struct mdt_body *body; + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + if (!(body->valid & OBD_MD_FLRELEASED)) + rc = -EBUSY; + } + + ll_finish_md_op_data(op_data); out: if (exp_connect_som(exp) && !epoch_close && @@ -224,7 +238,7 @@ int ll_md_real_close(struct inode *inode, int flags) if (och) { /* There might be a race and somebody have freed this och already */ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, - inode, och); + inode, och, NULL); } return rc; @@ -241,6 +255,24 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode, if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); + if (fd->fd_lease_och != NULL) { + bool lease_broken; + + /* Usually the lease is not released when the + * application crashed, we need to release here. */ + rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken); + CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n", + PFID(&lli->lli_fid), rc, lease_broken); + + fd->fd_lease_och = NULL; + } + + if (fd->fd_och != NULL) { + rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL); + fd->fd_och = NULL; + GOTO(out, rc); + } + /* Let's see if we have good enough OPEN lock on the file and if we can skip talking to MDS */ if (file->f_dentry->d_inode) { /* Can this ever be false? */ @@ -277,6 +309,7 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode, file, file->f_dentry, file->f_dentry->d_name.name); } +out: LUSTRE_FPRIVATE(file) = NULL; ll_file_data_put(fd); ll_capa_close(inode); @@ -431,22 +464,18 @@ void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch) } } -static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli, - struct lookup_intent *it, struct obd_client_handle *och) +static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, + struct obd_client_handle *och) { struct ptlrpc_request *req = it->d.lustre.it_data; struct mdt_body *body; - LASSERT(och); - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body != NULL); /* reply already checked out */ - - memcpy(&och->och_fh, &body->handle, sizeof(body->handle)); + och->och_fh = body->handle; + och->och_fid = body->fid1; + och->och_lease_handle.cookie = it->d.lustre.it_lock_handle; och->och_magic = OBD_CLIENT_HANDLE_MAGIC; - och->och_fid = lli->lli_fid; och->och_flags = it->it_flags; - ll_ioepoch_open(lli, body->ioepoch); return md_set_open_replay_data(md_exp, och, req); } @@ -466,20 +495,17 @@ int ll_local_open(struct file *file, struct lookup_intent *it, struct mdt_body *body; int rc; - rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och); - if (rc) + rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); + if (rc != 0) return rc; body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if ((it->it_flags & FMODE_WRITE) && - (body->valid & OBD_MD_FLSIZE)) - CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n", - lli->lli_ioepoch, PFID(&lli->lli_fid)); + ll_ioepoch_open(lli, body->ioepoch); } LUSTRE_FPRIVATE(file) = fd; ll_readahead_init(inode, &fd->fd_ras); - fd->fd_omode = it->it_flags; + fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); return 0; } @@ -681,6 +707,198 @@ out_openerr: return rc; } +static int ll_md_blocking_lease_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, void *data, int flag) +{ + int rc; + struct lustre_handle lockh; + + switch (flag) { + case LDLM_CB_BLOCKING: + ldlm_lock2handle(lock, &lockh); + rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); + if (rc < 0) { + CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); + return rc; + } + break; + case LDLM_CB_CANCELING: + /* do nothing */ + break; + } + return 0; +} + +/** + * Acquire a lease and open the file. + */ +struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file, + fmode_t fmode, __u64 open_flags) +{ + struct lookup_intent it = { .it_op = IT_OPEN }; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct md_op_data *op_data; + struct ptlrpc_request *req; + struct lustre_handle old_handle = { 0 }; + struct obd_client_handle *och = NULL; + int rc; + int rc2; + + if (fmode != FMODE_WRITE && fmode != FMODE_READ) + return ERR_PTR(-EINVAL); + + if (file != NULL) { + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct obd_client_handle **och_p; + __u64 *och_usecount; + + if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC)) + return ERR_PTR(-EPERM); + + /* Get the openhandle of the file */ + rc = -EBUSY; + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och != NULL) { + mutex_unlock(&lli->lli_och_mutex); + return ERR_PTR(rc); + } + + if (fd->fd_och == NULL) { + if (file->f_mode & FMODE_WRITE) { + LASSERT(lli->lli_mds_write_och != NULL); + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else { + LASSERT(lli->lli_mds_read_och != NULL); + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + if (*och_usecount == 1) { + fd->fd_och = *och_p; + *och_p = NULL; + *och_usecount = 0; + rc = 0; + } + } + mutex_unlock(&lli->lli_och_mutex); + if (rc < 0) /* more than 1 opener */ + return ERR_PTR(rc); + + LASSERT(fd->fd_och != NULL); + old_handle = fd->fd_och->och_fh; + } + + OBD_ALLOC_PTR(och); + if (och == NULL) + return ERR_PTR(-ENOMEM); + + op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + GOTO(out, rc = PTR_ERR(op_data)); + + /* To tell the MDT this openhandle is from the same owner */ + op_data->op_handle = old_handle; + + it.it_flags = fmode | open_flags; + it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE; + rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req, + ll_md_blocking_lease_ast, + /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise + * it can be cancelled which may mislead applications that the lease is + * broken; + * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal + * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast + * doesn't deal with openhandle, so normal openhandle will be leaked. */ + LDLM_FL_NO_LRU | LDLM_FL_EXCL); + ll_finish_md_op_data(op_data); + if (req != NULL) { + ptlrpc_req_finished(req); + it_clear_disposition(&it, DISP_ENQ_COMPLETE); + } + if (rc < 0) + GOTO(out_release_it, rc); + + if (it_disposition(&it, DISP_LOOKUP_NEG)) + GOTO(out_release_it, rc = -ENOENT); + + rc = it_open_error(DISP_OPEN_OPEN, &it); + if (rc) + GOTO(out_release_it, rc); + + LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF)); + ll_och_fill(sbi->ll_md_exp, &it, och); + + if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ + GOTO(out_close, rc = -EOPNOTSUPP); + + /* already get lease, handle lease lock */ + ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); + if (it.d.lustre.it_lock_mode == 0 || + it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) { + /* open lock must return for lease */ + CERROR(DFID "lease granted but no open lock, %d/%llu.\n", + PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode, + it.d.lustre.it_lock_bits); + GOTO(out_close, rc = -EPROTO); + } + + ll_intent_release(&it); + return och; + +out_close: + rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); + if (rc2) + CERROR("Close openhandle returned %d\n", rc2); + + /* cancel open lock */ + if (it.d.lustre.it_lock_mode != 0) { + ldlm_lock_decref_and_cancel(&och->och_lease_handle, + it.d.lustre.it_lock_mode); + it.d.lustre.it_lock_mode = 0; + } +out_release_it: + ll_intent_release(&it); +out: + OBD_FREE_PTR(och); + return ERR_PTR(rc); +} +EXPORT_SYMBOL(ll_lease_open); + +/** + * Release lease and close the file. + * It will check if the lease has ever broken. + */ +int ll_lease_close(struct obd_client_handle *och, struct inode *inode, + bool *lease_broken) +{ + struct ldlm_lock *lock; + bool cancelled = true; + int rc; + + lock = ldlm_handle2lock(&och->och_lease_handle); + if (lock != NULL) { + lock_res_and_lock(lock); + cancelled = ldlm_is_cancel(lock); + unlock_res_and_lock(lock); + ldlm_lock_put(lock); + } + + CDEBUG(D_INODE, "lease for "DFID" broken? %d\n", + PFID(&ll_i2info(inode)->lli_fid), cancelled); + + if (!cancelled) + ldlm_cli_cancel(&och->och_lease_handle, 0); + if (lease_broken != NULL) + *lease_broken = cancelled; + + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, + NULL); + return rc; +} +EXPORT_SYMBOL(ll_lease_close); + /* Fills the obdo with the attributes for the lsm */ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, struct obd_capa *capa, struct obdo *obdo, @@ -905,7 +1123,7 @@ out: cl_io_fini(env, io); /* If any bit been read/written (result != 0), we just return * short read/write instead of restart io. */ - if (result == 0 && io->ci_need_restart) { + if ((result == 0 || result == -ENODATA) && io->ci_need_restart) { CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n", iot == CIT_READ ? "read" : "write", file->f_dentry->d_name.name, *ppos, count); @@ -930,48 +1148,16 @@ out: return result; } - -/* - * XXX: exact copy from kernel code (__generic_file_aio_write_nolock) - */ -static int ll_file_get_iov_count(const struct iovec *iov, - unsigned long *nr_segs, size_t *count) -{ - size_t cnt = 0; - unsigned long seg; - - for (seg = 0; seg < *nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - cnt += iv->iov_len; - if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - *nr_segs = seg; - cnt -= iv->iov_len; /* This segment is no good */ - break; - } - *count = cnt; - return 0; -} - static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct lu_env *env; struct vvp_io_args *args; - size_t count; + size_t count = 0; ssize_t result; int refcheck; - result = ll_file_get_iov_count(iov, &nr_segs, &count); + result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); if (result) return result; @@ -1026,11 +1212,11 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct lu_env *env; struct vvp_io_args *args; - size_t count; + size_t count = 0; ssize_t result; int refcheck; - result = ll_file_get_iov_count(iov, &nr_segs, &count); + result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); if (result) return result; @@ -1482,12 +1668,11 @@ int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) if (!och) GOTO(out, rc = -ENOMEM); - ll_och_fill(ll_i2sbi(inode)->ll_md_exp, - ll_i2info(inode), it, och); + ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, - inode, och); - out: + inode, och, NULL); +out: /* this one is in place of ll_file_open */ if (it_disposition(it, DISP_ENQ_OPEN_REF)) { ptlrpc_req_finished(it->d.lustre.it_data); @@ -1692,6 +1877,53 @@ out: return rc; } +/* + * Trigger a HSM release request for the provided inode. + */ +int ll_hsm_release(struct inode *inode) +{ + struct cl_env_nest nest; + struct lu_env *env; + struct obd_client_handle *och = NULL; + __u64 data_version = 0; + int rc; + + + CDEBUG(D_INODE, "%s: Releasing file "DFID".\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&ll_i2info(inode)->lli_fid)); + + och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE); + if (IS_ERR(och)) + GOTO(out, rc = PTR_ERR(och)); + + /* Grab latest data_version and [am]time values */ + rc = ll_data_version(inode, &data_version, 1); + if (rc != 0) + GOTO(out, rc); + + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + GOTO(out, rc = PTR_ERR(env)); + + ll_merge_lvb(env, inode); + cl_env_nested_put(&nest, env); + + /* Release the file. + * NB: lease lock handle is released in mdc_hsm_release_pack() because + * we still need it to pack l_remote_handle to MDT. */ + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och, + &data_version); + och = NULL; + + +out: + if (och != NULL && !IS_ERR(och)) /* close the file */ + ll_lease_close(och, inode, NULL); + + return rc; +} + struct ll_swap_stack { struct iattr ia1, ia2; __u64 dv1, dv2; @@ -1853,6 +2085,86 @@ free: return rc; } +static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) +{ + struct md_op_data *op_data; + int rc; + + /* Non-root users are forbidden to set or clear flags which are + * NOT defined in HSM_USER_MASK. */ + if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) && + !cfs_capable(CFS_CAP_SYS_ADMIN)) + return -EPERM; + + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, hss); + if (IS_ERR(op_data)) + return PTR_ERR(op_data); + + rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode), + sizeof(*op_data), op_data, NULL); + + ll_finish_md_op_data(op_data); + + return rc; +} + +static int ll_hsm_import(struct inode *inode, struct file *file, + struct hsm_user_import *hui) +{ + struct hsm_state_set *hss = NULL; + struct iattr *attr = NULL; + int rc; + + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + /* set HSM flags */ + OBD_ALLOC_PTR(hss); + if (hss == NULL) + GOTO(out, rc = -ENOMEM); + + hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID; + hss->hss_archive_id = hui->hui_archive_id; + hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED; + rc = ll_hsm_state_set(inode, hss); + if (rc != 0) + GOTO(out, rc); + + OBD_ALLOC_PTR(attr); + if (attr == NULL) + GOTO(out, rc = -ENOMEM); + + attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO); + attr->ia_mode |= S_IFREG; + attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid); + attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid); + attr->ia_size = hui->hui_size; + attr->ia_mtime.tv_sec = hui->hui_mtime; + attr->ia_mtime.tv_nsec = hui->hui_mtime_ns; + attr->ia_atime.tv_sec = hui->hui_atime; + attr->ia_atime.tv_nsec = hui->hui_atime_ns; + + attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE | + ATTR_UID | ATTR_GID | + ATTR_MTIME | ATTR_MTIME_SET | + ATTR_ATIME | ATTR_ATIME_SET; + + rc = ll_setattr_raw(file->f_dentry, attr, true); + if (rc == -ENODATA) + rc = 0; + +out: + if (hss != NULL) + OBD_FREE_PTR(hss); + + if (attr != NULL) + OBD_FREE_PTR(attr); + + return rc; +} + long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; @@ -2014,37 +2326,19 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return rc; } case LL_IOC_HSM_STATE_SET: { - struct md_op_data *op_data; struct hsm_state_set *hss; int rc; OBD_ALLOC_PTR(hss); if (hss == NULL) return -ENOMEM; + if (copy_from_user(hss, (char *)arg, sizeof(*hss))) { OBD_FREE_PTR(hss); return -EFAULT; } - /* Non-root users are forbidden to set or clear flags which are - * NOT defined in HSM_USER_MASK. */ - if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) - && !cfs_capable(CFS_CAP_SYS_ADMIN)) { - OBD_FREE_PTR(hss); - return -EPERM; - } - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, hss); - if (IS_ERR(op_data)) { - OBD_FREE_PTR(hss); - return PTR_ERR(op_data); - } - - rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), - op_data, NULL); - - ll_finish_md_op_data(op_data); + rc = ll_hsm_state_set(inode, hss); OBD_FREE_PTR(hss); return rc; @@ -2075,6 +2369,107 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) OBD_FREE_PTR(hca); return rc; } + case LL_IOC_SET_LEASE: { + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_client_handle *och = NULL; + bool lease_broken; + fmode_t mode = 0; + + switch (arg) { + case F_WRLCK: + if (!(file->f_mode & FMODE_WRITE)) + return -EPERM; + mode = FMODE_WRITE; + break; + case F_RDLCK: + if (!(file->f_mode & FMODE_READ)) + return -EPERM; + mode = FMODE_READ; + break; + case F_UNLCK: + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och != NULL) { + och = fd->fd_lease_och; + fd->fd_lease_och = NULL; + } + mutex_unlock(&lli->lli_och_mutex); + + if (och != NULL) { + mode = och->och_flags & + (FMODE_READ|FMODE_WRITE); + rc = ll_lease_close(och, inode, &lease_broken); + if (rc == 0 && lease_broken) + mode = 0; + } else { + rc = -ENOLCK; + } + + /* return the type of lease or error */ + return rc < 0 ? rc : (int)mode; + default: + return -EINVAL; + } + + CDEBUG(D_INODE, "Set lease with mode %d\n", mode); + + /* apply for lease */ + och = ll_lease_open(inode, file, mode, 0); + if (IS_ERR(och)) + return PTR_ERR(och); + + rc = 0; + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och == NULL) { + fd->fd_lease_och = och; + och = NULL; + } + mutex_unlock(&lli->lli_och_mutex); + if (och != NULL) { + /* impossible now that only excl is supported for now */ + ll_lease_close(och, inode, &lease_broken); + rc = -EBUSY; + } + return rc; + } + case LL_IOC_GET_LEASE: { + struct ll_inode_info *lli = ll_i2info(inode); + struct ldlm_lock *lock = NULL; + + rc = 0; + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och != NULL) { + struct obd_client_handle *och = fd->fd_lease_och; + + lock = ldlm_handle2lock(&och->och_lease_handle); + if (lock != NULL) { + lock_res_and_lock(lock); + if (!ldlm_is_cancel(lock)) + rc = och->och_flags & + (FMODE_READ | FMODE_WRITE); + unlock_res_and_lock(lock); + ldlm_lock_put(lock); + } + } + mutex_unlock(&lli->lli_och_mutex); + return rc; + } + case LL_IOC_HSM_IMPORT: { + struct hsm_user_import *hui; + + OBD_ALLOC_PTR(hui); + if (hui == NULL) + return -ENOMEM; + + if (copy_from_user(hui, (void *)arg, sizeof(*hui))) { + OBD_FREE_PTR(hui); + return -EFAULT; + } + + rc = ll_hsm_import(inode, file, hui); + + OBD_FREE_PTR(hui); + return rc; + } default: { int err; @@ -2435,7 +2830,8 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode) } ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, - struct lustre_handle *lockh, __u64 flags) + struct lustre_handle *lockh, __u64 flags, + ldlm_mode_t mode) { ldlm_policy_data_t policy = { .l_inodebits = {bits}}; struct lu_fid *fid; @@ -2445,8 +2841,8 @@ ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags, - fid, LDLM_IBITS, &policy, - LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh); + fid, LDLM_IBITS, &policy, mode, lockh); + return rc; } @@ -2581,7 +2977,15 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it, LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime; LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime; } else { - rc = ll_glimpse_size(inode); + /* In case of restore, the MDT has the right size and has + * already send it back without granting the layout lock, + * inode is up-to-date so glimpse is useless. + * Also to glimpse we need the layout, in case of a running + * restore the MDT holds the layout lock so the glimpse will + * block up to the end of restore (getattr will block) + */ + if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING)) + rc = ll_glimpse_size(inode); } return rc; } @@ -2628,6 +3032,38 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) return ll_getattr_it(mnt, de, &it, stat); } +int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) +{ + int rc; + size_t num_bytes; + struct ll_user_fiemap *fiemap; + unsigned int extent_count = fieinfo->fi_extents_max; + + num_bytes = sizeof(*fiemap) + (extent_count * + sizeof(struct ll_fiemap_extent)); + OBD_ALLOC_LARGE(fiemap, num_bytes); + + if (fiemap == NULL) + return -ENOMEM; + + fiemap->fm_flags = fieinfo->fi_flags; + fiemap->fm_extent_count = fieinfo->fi_extents_max; + fiemap->fm_start = start; + fiemap->fm_length = len; + memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start, + sizeof(struct ll_fiemap_extent)); + + rc = ll_do_fiemap(inode, fiemap, num_bytes); + + fieinfo->fi_flags = fiemap->fm_flags; + fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents; + memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0], + fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent)); + + OBD_FREE_LARGE(fiemap, num_bytes); + return rc; +} struct posix_acl * ll_get_acl(struct inode *inode, int type) { @@ -2676,17 +3112,12 @@ int ll_inode_permission(struct inode *inode, int mask) return rc; } -#define READ_METHOD aio_read -#define READ_FUNCTION ll_file_aio_read -#define WRITE_METHOD aio_write -#define WRITE_FUNCTION ll_file_aio_write - /* -o localflock - only provides locally consistent flock locks */ struct file_operations ll_file_operations = { .read = ll_file_read, - .READ_METHOD = READ_FUNCTION, + .aio_read = ll_file_aio_read, .write = ll_file_write, - .WRITE_METHOD = WRITE_FUNCTION, + .aio_write = ll_file_aio_write, .unlocked_ioctl = ll_file_ioctl, .open = ll_file_open, .release = ll_file_release, @@ -2699,9 +3130,9 @@ struct file_operations ll_file_operations = { struct file_operations ll_file_operations_flock = { .read = ll_file_read, - .READ_METHOD = READ_FUNCTION, + .aio_read = ll_file_aio_read, .write = ll_file_write, - .WRITE_METHOD = WRITE_FUNCTION, + .aio_write = ll_file_aio_write, .unlocked_ioctl = ll_file_ioctl, .open = ll_file_open, .release = ll_file_release, @@ -2717,9 +3148,9 @@ struct file_operations ll_file_operations_flock = { /* These are for -o noflock - to return ENOSYS on flock calls */ struct file_operations ll_file_operations_noflock = { .read = ll_file_read, - .READ_METHOD = READ_FUNCTION, + .aio_read = ll_file_aio_read, .write = ll_file_write, - .WRITE_METHOD = WRITE_FUNCTION, + .aio_write = ll_file_aio_write, .unlocked_ioctl = ll_file_ioctl, .open = ll_file_open, .release = ll_file_release, @@ -2740,6 +3171,7 @@ struct inode_operations ll_file_inode_operations = { .getxattr = ll_getxattr, .listxattr = ll_listxattr, .removexattr = ll_removexattr, + .fiemap = ll_fiemap, .get_acl = ll_get_acl, }; @@ -3086,7 +3518,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) /* mostly layout lock is caching on the local side, so try to match * it before grabbing layout lock mutex. */ - mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0); + mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, + LCK_CR | LCK_CW | LCK_PR | LCK_PW); if (mode != 0) { /* hit cached lock */ rc = ll_layout_lock_set(&lockh, mode, inode, gen, false); if (rc == 0) @@ -3101,7 +3534,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) again: /* try again. Maybe somebody else has done this. */ - mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0); + mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, + LCK_CR | LCK_CW | LCK_PR | LCK_PW); if (mode != 0) { /* hit cached lock */ rc = ll_layout_lock_set(&lockh, mode, inode, gen, true); if (rc == -EAGAIN) @@ -3150,3 +3584,30 @@ again: return rc; } + +/** + * This function send a restore request to the MDT + */ +int ll_layout_restore(struct inode *inode) +{ + struct hsm_user_request *hur; + int len, rc; + + len = sizeof(struct hsm_user_request) + + sizeof(struct hsm_user_item); + OBD_ALLOC(hur, len); + if (hur == NULL) + return -ENOMEM; + + hur->hur_request.hr_action = HUA_RESTORE; + hur->hur_request.hr_archive_id = 0; + hur->hur_request.hr_flags = 0; + memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, + sizeof(hur->hur_user_item[0].hui_fid)); + hur->hur_user_item[0].hui_extent.length = -1; + hur->hur_request.hr_itemcount = 1; + rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, + len, hur, NULL); + OBD_FREE(hur, len); + return rc; +} diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 47e443d90fe1..7ee5c02783f9 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -46,6 +46,8 @@ #include <lclient.h> #include <lustre_mdc.h> #include <linux/lustre_intent.h> +#include <linux/compat.h> +#include <linux/posix_acl_xattr.h> #ifndef FMODE_EXEC #define FMODE_EXEC 0 @@ -124,6 +126,10 @@ enum lli_flags { LLIF_SRVLOCK = (1 << 5), /* File data is modified. */ LLIF_DATA_MODIFIED = (1 << 6), + /* File is being restored */ + LLIF_FILE_RESTORING = (1 << 7), + /* Xattr cache is attached to the file */ + LLIF_XATTR_CACHE = (1 << 8), }; struct ll_inode_info { @@ -276,8 +282,27 @@ struct ll_inode_info { struct mutex lli_layout_mutex; /* valid only inside LAYOUT ibits lock, protected by lli_layout_mutex */ __u32 lli_layout_gen; + + struct rw_semaphore lli_xattrs_list_rwsem; + struct mutex lli_xattrs_enq_lock; + struct list_head lli_xattrs;/* ll_xattr_entry->xe_list */ }; +int ll_xattr_cache_destroy(struct inode *inode); + +int ll_xattr_cache_get(struct inode *inode, + const char *name, + char *buffer, + size_t size, + __u64 valid); + +int ll_xattr_cache_update(struct inode *inode, + const char *name, + const char *newval, + size_t size, + __u64 valid, + int flags); + /* * Locking to guarantee consistency of non-atomic updates to long long i_size, * consistency between file size and KMS. @@ -399,6 +424,7 @@ enum stats_track_type { #define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */ #define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */ #define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */ +#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */ #define LL_SBI_FLAGS { \ "nolck", \ @@ -406,6 +432,7 @@ enum stats_track_type { "flock", \ "xattr", \ "acl", \ + "???", \ "rmt_client", \ "mds_capa", \ "oss_capa", \ @@ -418,7 +445,9 @@ enum stats_track_type { "agl", \ "verbose", \ "layout", \ - "user_fid2path" } + "user_fid2path",\ + "xattr", \ +} /* default value for ll_sb_info->contention_time */ #define SBI_DEFAULT_CONTENTION_SECONDS 60 @@ -458,7 +487,8 @@ struct ll_sb_info { struct lu_fid ll_root_fid; /* root object fid */ int ll_flags; - int ll_umounting:1; + unsigned int ll_umounting:1, + ll_xattr_cache_enabled:1; struct list_head ll_conn_chain; /* per-conn chain of SBs */ struct lustre_client_ocd ll_lco; @@ -607,10 +637,14 @@ extern struct kmem_cache *ll_file_data_slab; struct lustre_handle; struct ll_file_data { struct ll_readahead_state fd_ras; - int fd_omode; struct ccc_grouplock fd_grouplock; __u64 lfd_pos; __u32 fd_flags; + fmode_t fd_omode; + /* openhandle if lease exists for this file. + * Borrow lli->lli_och_mutex to protect assignment */ + struct obd_client_handle *fd_lease_och; + struct obd_client_handle *fd_och; struct file *fd_file; /* Indicate whether need to report failure when close. * true: failure is known, not report again. @@ -643,7 +677,12 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi) #if BITS_PER_LONG == 32 return 1; #else - return unlikely(current_is_32bit() || (sbi->ll_flags & LL_SBI_32BIT_API)); + return unlikely( +#ifdef CONFIG_COMPAT + is_compat_task() || +#endif + (sbi->ll_flags & LL_SBI_32BIT_API) + ); #endif } @@ -663,15 +702,22 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi); void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count); void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars); +void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, + struct ll_file_data *file, loff_t pos, + size_t count, int rw); #else static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent, struct super_block *sb, char *osc, char *mdc){return 0;} static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {} -static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {} -static void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars) +static inline +void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {} +static inline void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars) { memset(lvars, 0, sizeof(*lvars)); } +static inline void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, + struct ll_file_data *file, loff_t pos, + size_t count, int rw) {} #endif @@ -720,7 +766,8 @@ extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *, extern int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode); extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, - struct lustre_handle *lockh, __u64 flags); + struct lustre_handle *lockh, __u64 flags, + ldlm_mode_t mode); int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *, __u64 bits); int ll_revalidate_nd(struct dentry *dentry, unsigned int flags); @@ -746,9 +793,6 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, struct md_open_data **mod); void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, struct lustre_handle *fh); -extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, - struct ll_file_data *file, loff_t pos, - size_t count, int rw); int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat); int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); @@ -775,6 +819,12 @@ int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg); int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); int ll_fid2path(struct inode *inode, void *arg); int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock); +int ll_hsm_release(struct inode *inode); + +struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file, + fmode_t mode, __u64 flags); +int ll_lease_close(struct obd_client_handle *och, struct inode *inode, + bool *lease_broken); /* llite/dcache.c */ @@ -801,7 +851,7 @@ void ll_kill_super(struct super_block *sb); struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock); struct inode *ll_inode_from_lock(struct ldlm_lock *lock); void ll_clear_inode(struct inode *inode); -int ll_setattr_raw(struct dentry *dentry, struct iattr *attr); +int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import); int ll_setattr(struct dentry *de, struct iattr *attr); int ll_statfs(struct dentry *de, struct kstatfs *sfs); int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, @@ -1578,5 +1628,9 @@ enum { int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); int ll_layout_refresh(struct inode *inode, __u32 *gen); +int ll_layout_restore(struct inode *inode); + +int ll_xattr_init(void); +void ll_xattr_fini(void); #endif /* LLITE_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index fd584ff7e2df..6cfdb9e4b74b 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -56,6 +56,7 @@ #include "llite_internal.h" struct kmem_cache *ll_file_data_slab; +struct proc_dir_entry *proc_lustre_fs_root; LIST_HEAD(ll_super_blocks); DEFINE_SPINLOCK(ll_sb_lock); @@ -209,7 +210,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH| OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS; + OBD_CONNECT_LAYOUTLOCK | + OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -383,6 +385,17 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; } + if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) { + if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) { + LCONSOLE_INFO( + "%s: disabling xattr cache due to unknown maximum xattr size.\n", + dt); + } else { + sbi->ll_flags |= LL_SBI_XATTR_CACHE; + sbi->ll_xattr_cache_enabled = 1; + } + } + obd = class_name2obd(dt); if (!obd) { CERROR("DT %s: not setup or attached\n", dt); @@ -922,6 +935,9 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_layout_gen = LL_LAYOUT_GEN_NONE; lli->lli_clob = NULL; + init_rwsem(&lli->lli_xattrs_list_rwsem); + mutex_init(&lli->lli_xattrs_enq_lock); + LASSERT(lli->lli_vfs_inode.i_mode != 0); if (S_ISDIR(lli->lli_vfs_inode.i_mode)) { mutex_init(&lli->lli_readdir_mutex); @@ -1194,6 +1210,8 @@ void ll_clear_inode(struct inode *inode) lli->lli_symlink_name = NULL; } + ll_xattr_cache_destroy(inode); + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { LASSERT(lli->lli_posix_acl == NULL); if (lli->lli_remote_perms) { @@ -1346,19 +1364,24 @@ static int ll_setattr_ost(struct inode *inode, struct iattr *attr) * to the OST with the punch RPC, otherwise we do an explicit setattr RPC. * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE * at the same time. + * + * In case of HSMimport, we only set attr on MDS. */ -int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) +int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) { struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct md_op_data *op_data = NULL; struct md_open_data *mod = NULL; + bool file_is_released = false; int rc = 0, rc1 = 0; - CDEBUG(D_VFSTRACE, "%s: setattr inode %p/fid:"DFID" from %llu to %llu, " - "valid %x\n", ll_get_fsname(inode->i_sb, NULL, 0), inode, + CDEBUG(D_VFSTRACE, + "%s: setattr inode %p/fid:"DFID + " from %llu to %llu, valid %x, hsm_import %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), inode, PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size, - attr->ia_valid); + attr->ia_valid, hsm_import); if (attr->ia_valid & ATTR_SIZE) { /* Check new size against VFS/VM file size limit and rlimit */ @@ -1436,10 +1459,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) op_data->op_flags = MF_EPOCH_OPEN; + /* truncate on a released file must failed with -ENODATA, + * so size must not be set on MDS for released file + * but other attributes must be set + */ + if (S_ISREG(inode->i_mode)) { + struct lov_stripe_md *lsm; + __u32 gen; + + ll_layout_refresh(inode, &gen); + lsm = ccc_inode_lsm_get(inode); + if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) + file_is_released = true; + ccc_inode_lsm_put(inode, lsm); + } + + /* if not in HSM import mode, clear size attr for released file + * we clear the attribute send to MDT in op_data, not the original + * received from caller in attr which is used later to + * decide return code */ + if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import) + op_data->op_attr.ia_valid &= ~ATTR_SIZE; + rc = ll_md_setattr(dentry, op_data, &mod); if (rc) GOTO(out, rc); + /* truncate failed (only when non HSM import), others succeed */ + if (file_is_released) { + if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) + GOTO(out, rc = -ENODATA); + else + GOTO(out, rc = 0); + } + /* RPC to MDT is sent, cancel data modification flag */ if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { spin_lock(&lli->lli_lock); @@ -1473,7 +1526,7 @@ out: if (!S_ISDIR(inode->i_mode)) { up_write(&lli->lli_trunc_sem); mutex_lock(&inode->i_mutex); - if (attr->ia_valid & ATTR_SIZE) + if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) inode_dio_wait(inode); } @@ -1508,7 +1561,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr) !(attr->ia_valid & ATTR_KILL_SGID)) attr->ia_valid |= ATTR_KILL_SGID; - return ll_setattr_raw(de, attr); + return ll_setattr_raw(de, attr, false); } int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, @@ -1721,7 +1774,9 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) * lock on the client and set LLIF_MDS_SIZE_LOCK holding * it. */ mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE, - &lockh, LDLM_FL_CBPENDING); + &lockh, LDLM_FL_CBPENDING, + LCK_CR | LCK_CW | + LCK_PR | LCK_PW); if (mode) { if (lli->lli_flags & (LLIF_DONE_WRITING | LLIF_EPOCH_PENDING | @@ -1761,6 +1816,11 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) LASSERT(md->oss_capa); ll_add_capa(inode, md->oss_capa); } + + if (body->valid & OBD_MD_TSTATE) { + if (body->t_state & MS_RESTORE) + lli->lli_flags |= LLIF_FILE_RESTORING; + } } void ll_read_inode2(struct inode *inode, void *opaque) diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c index e2421ea61352..5338e8d4c50f 100644 --- a/drivers/staging/lustre/lustre/llite/lloop.c +++ b/drivers/staging/lustre/lustre/llite/lloop.c @@ -856,7 +856,8 @@ static void lloop_exit(void) module_init(lloop_init); module_exit(lloop_exit); -CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device"); +module_param(max_loop, int, 0444); +MODULE_PARM_DESC(max_loop, "maximum of lloop_device"); MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>"); MODULE_DESCRIPTION("Lustre virtual block device"); MODULE_LICENSE("GPL"); diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 4bf09c4a0c9d..a9a104a6a4ee 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -42,9 +42,6 @@ #include "llite_internal.h" -struct proc_dir_entry *proc_lustre_fs_root; - -#ifdef LPROCFS /* /proc/lustre/llite mount point registration */ extern struct file_operations vvp_dump_pgcache_file_ops; struct file_operations ll_rw_extents_stats_fops; @@ -723,6 +720,41 @@ static int ll_sbi_flags_seq_show(struct seq_file *m, void *v) } LPROC_SEQ_FOPS_RO(ll_sbi_flags); +static int ll_xattr_cache_seq_show(struct seq_file *m, void *v) +{ + struct super_block *sb = m->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); + int rc; + + rc = seq_printf(m, "%u\n", sbi->ll_xattr_cache_enabled); + + return rc; +} + +static ssize_t ll_xattr_cache_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct super_block *sb = seq->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val != 0 && val != 1) + return -ERANGE; + + if (val == 1 && !(sbi->ll_flags & LL_SBI_XATTR_CACHE)) + return -ENOTSUPP; + + sbi->ll_xattr_cache_enabled = val; + + return count; +} +LPROC_SEQ_FOPS(ll_xattr_cache); + static struct lprocfs_vars lprocfs_llite_obd_vars[] = { { "uuid", &ll_sb_uuid_fops, 0, 0 }, //{ "mntpt_path", ll_rd_path, 0, 0 }, @@ -751,6 +783,7 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = { { "lazystatfs", &ll_lazystatfs_fops, 0 }, { "max_easize", &ll_maxea_size_fops, 0, 0 }, { "sbi_flags", &ll_sbi_flags_fops, 0, 0 }, + { "xattr_cache", &ll_xattr_cache_fops, 0, 0 }, { 0 } }; @@ -802,6 +835,7 @@ struct llite_file_opcode { { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" }, { LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" }, { LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" }, + { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REGS, "getxattr_hits" }, { LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" }, { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" }, { LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" }, @@ -1367,4 +1401,3 @@ void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars) lvars->module_vars = NULL; lvars->obd_vars = lprocfs_llite_obd_vars; } -#endif /* LPROCFS */ diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index 90bbdae824ac..fc8d264f6c9a 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -223,6 +223,10 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, break; LASSERT(lock->l_flags & LDLM_FL_CANCELING); + + if (bits & MDS_INODELOCK_XATTR) + ll_xattr_cache_destroy(inode); + /* For OPEN locks we differentiate between lock modes * LCK_CR, LCK_CW, LCK_PR - bug 22891 */ if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE | @@ -233,12 +237,9 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, ll_have_md_lock(inode, &bits, mode); fid = ll_inode2fid(inode); - if (lock->l_resource->lr_name.name[0] != fid_seq(fid) || - lock->l_resource->lr_name.name[1] != fid_oid(fid) || - lock->l_resource->lr_name.name[2] != fid_ver(fid)) { + if (!fid_res_name_eq(fid, &lock->l_resource->lr_name)) LDLM_ERROR(lock, "data mismatch with object " DFID" (%p)", PFID(fid), inode); - } if (bits & MDS_INODELOCK_OPEN) { int flags = 0; @@ -526,8 +527,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, icbd.icbd_childp = &dentry; icbd.icbd_parent = parent; - if (it->it_op & IT_CREAT || - (it->it_op & IT_OPEN && it->it_create_mode & O_CREAT)) + if (it->it_op & IT_CREAT) opc = LUSTRE_OPC_CREATE; else opc = LUSTRE_OPC_ANY; @@ -626,7 +626,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, return -ENOMEM; it->it_op = IT_OPEN; - if (mode) { + if (open_flags & O_CREAT) { it->it_op |= IT_CREAT; lookup_flags |= LOOKUP_CREATE; } diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c index 0beaf4e76b4b..e21e1c760a8e 100644 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ b/drivers/staging/lustre/lustre/llite/super25.c @@ -187,11 +187,15 @@ static int __init init_lustre_lite(void) if (rc == 0) rc = vvp_global_init(); + if (rc == 0) + rc = ll_xattr_init(); + return rc; } static void __exit exit_lustre_lite(void) { + ll_xattr_fini(); vvp_global_fini(); del_timer(&ll_capa_timer); ll_capa_thread_stop(); diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 3ff664ce7503..93cbfbb7e7f7 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -121,8 +121,38 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - CDEBUG(D_VFSTRACE, "ignore/verify layout %d/%d, layout version %d.\n", - io->ci_ignore_layout, io->ci_verify_layout, cio->cui_layout_gen); + CDEBUG(D_VFSTRACE, DFID + " ignore/verify layout %d/%d, layout version %d restore needed %d\n", + PFID(lu_object_fid(&obj->co_lu)), + io->ci_ignore_layout, io->ci_verify_layout, + cio->cui_layout_gen, io->ci_restore_needed); + + if (io->ci_restore_needed == 1) { + int rc; + + /* file was detected release, we need to restore it + * before finishing the io + */ + rc = ll_layout_restore(ccc_object_inode(obj)); + /* if restore registration failed, no restart, + * we will return -ENODATA */ + /* The layout will change after restore, so we need to + * block on layout lock hold by the MDT + * as MDT will not send new layout in lvb (see LU-3124) + * we have to explicitly fetch it, all this will be done + * by ll_layout_refresh() + */ + if (rc == 0) { + io->ci_restore_needed = 0; + io->ci_need_restart = 1; + io->ci_verify_layout = 1; + } else { + io->ci_restore_needed = 1; + io->ci_need_restart = 0; + io->ci_verify_layout = 0; + io->ci_result = rc; + } + } if (!io->ci_ignore_layout && io->ci_verify_layout) { __u32 gen = 0; @@ -130,9 +160,17 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) /* check layout version */ ll_layout_refresh(ccc_object_inode(obj), &gen); io->ci_need_restart = cio->cui_layout_gen != gen; - if (io->ci_need_restart) - CDEBUG(D_VFSTRACE, "layout changed from %d to %d.\n", - cio->cui_layout_gen, gen); + if (io->ci_need_restart) { + CDEBUG(D_VFSTRACE, + DFID" layout changed from %d to %d.\n", + PFID(lu_object_fid(&obj->co_lu)), + cio->cui_layout_gen, gen); + /* today successful restore is the only possible + * case */ + /* restore was done, clear restoring state */ + ll_i2info(ccc_object_inode(obj))->lli_flags &= + ~LLIF_FILE_RESTORING; + } } } @@ -590,8 +628,11 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf); if (vmf->page) { - LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n", - vmf->virtual_address); + CDEBUG(D_PAGE, + "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n", + vmf->page, vmf->page->mapping, vmf->page->index, + (long)vmf->page->flags, page_count(vmf->page), + page_private(vmf->page), vmf->virtual_address); if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) { lock_page(vmf->page); cfio->fault.ft_flags &= VM_FAULT_LOCKED; @@ -1111,6 +1152,12 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CDEBUG(D_VFSTRACE, DFID + " ignore/verify layout %d/%d, layout version %d restore needed %d\n", + PFID(lu_object_fid(&obj->co_lu)), + io->ci_ignore_layout, io->ci_verify_layout, + cio->cui_layout_gen, io->ci_restore_needed); + CL_IO_SLICE_CLEAN(cio, cui_cl); cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops); vio->cui_ra_window_set = 0; diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c index 33173fce478f..25973dedd9a2 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_object.c +++ b/drivers/staging/lustre/lustre/llite/vvp_object.c @@ -138,7 +138,7 @@ int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, lli->lli_layout_gen, conf->u.coc_md->lsm->lsm_layout_gen); - lli->lli_has_smd = true; + lli->lli_has_smd = lsm_has_objects(conf->u.coc_md->lsm); lli->lli_layout_gen = conf->u.coc_md->lsm->lsm_layout_gen; } else { CDEBUG(D_VFSTRACE, "layout lock destroyed: %u.\n", diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index bcf86bac30a9..3a7d03c12dd9 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -109,12 +109,12 @@ int ll_setxattr_common(struct inode *inode, const char *name, int flags, __u64 valid) { struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; int xattr_type, rc; struct obd_capa *oc; + struct rmtacl_ctl_entry *rce = NULL; #ifdef CONFIG_FS_POSIX_ACL posix_acl_xattr_header *new_value = NULL; - struct rmtacl_ctl_entry *rce = NULL; ext_acl_xattr_header *acl = NULL; #endif const char *pv = value; @@ -183,11 +183,17 @@ int ll_setxattr_common(struct inode *inode, const char *name, valid |= rce_ops2valid(rce->rce_ops); } #endif - oc = ll_mdscapa_get(inode); - rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, - valid, name, pv, size, 0, flags, ll_i2suppgid(inode), - &req); - capa_put(oc); + if (sbi->ll_xattr_cache_enabled && + (rce == NULL || rce->rce_ops == RMT_LSETFACL)) { + rc = ll_xattr_cache_update(inode, name, pv, size, valid, flags); + } else { + oc = ll_mdscapa_get(inode); + rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, + valid, name, pv, size, 0, flags, + ll_i2suppgid(inode), &req); + capa_put(oc); + } + #ifdef CONFIG_FS_POSIX_ACL if (new_value != NULL) lustre_posix_acl_xattr_free(new_value, size); @@ -352,48 +358,54 @@ int ll_getxattr_common(struct inode *inode, const char *name, #endif do_getxattr: - oc = ll_mdscapa_get(inode); - rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, - valid | (rce ? rce_ops2valid(rce->rce_ops) : 0), - name, NULL, 0, size, 0, &req); - capa_put(oc); - if (rc) { - if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { - LCONSOLE_INFO("Disabling user_xattr feature because " - "it is not supported on the server\n"); - sbi->ll_flags &= ~LL_SBI_USER_XATTR; - } - return rc; - } + if (sbi->ll_xattr_cache_enabled && (rce == NULL || + rce->rce_ops == RMT_LGETFACL || + rce->rce_ops == RMT_LSETFACL)) { + rc = ll_xattr_cache_get(inode, name, buffer, size, valid); + if (rc < 0) + GOTO(out_xattr, rc); + } else { + oc = ll_mdscapa_get(inode); + rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, + valid | (rce ? rce_ops2valid(rce->rce_ops) : 0), + name, NULL, 0, size, 0, &req); + capa_put(oc); - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body); + if (rc < 0) + GOTO(out_xattr, rc); - /* only detect the xattr size */ - if (size == 0) - GOTO(out, rc = body->eadatasize); + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + LASSERT(body); - if (size < body->eadatasize) { - CERROR("server bug: replied size %u > %u\n", - body->eadatasize, (int)size); - GOTO(out, rc = -ERANGE); - } + /* only detect the xattr size */ + if (size == 0) + GOTO(out, rc = body->eadatasize); + + if (size < body->eadatasize) { + CERROR("server bug: replied size %u > %u\n", + body->eadatasize, (int)size); + GOTO(out, rc = -ERANGE); + } - if (body->eadatasize == 0) - GOTO(out, rc = -ENODATA); + if (body->eadatasize == 0) + GOTO(out, rc = -ENODATA); - /* do not need swab xattr data */ - xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, - body->eadatasize); - if (!xdata) - GOTO(out, rc = -EFAULT); + /* do not need swab xattr data */ + xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, + body->eadatasize); + if (!xdata) + GOTO(out, rc = -EFAULT); + + memcpy(buffer, xdata, body->eadatasize); + rc = body->eadatasize; + } #ifdef CONFIG_FS_POSIX_ACL - if (body->eadatasize >= 0 && rce && rce->rce_ops == RMT_LSETFACL) { + if (rce && rce->rce_ops == RMT_LSETFACL) { ext_acl_xattr_header *acl; - acl = lustre_posix_acl_xattr_2ext((posix_acl_xattr_header *)xdata, - body->eadatasize); + acl = lustre_posix_acl_xattr_2ext( + (posix_acl_xattr_header *)buffer, rc); if (IS_ERR(acl)) GOTO(out, rc = PTR_ERR(acl)); @@ -406,12 +418,12 @@ do_getxattr: } #endif - if (body->eadatasize == 0) { - rc = -ENODATA; - } else { - LASSERT(buffer); - memcpy(buffer, xdata, body->eadatasize); - rc = body->eadatasize; +out_xattr: + if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { + LCONSOLE_INFO( + "%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), rc); + sbi->ll_flags &= ~LL_SBI_USER_XATTR; } out: ptlrpc_req_finished(req); diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c new file mode 100644 index 000000000000..3e3be1f13502 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c @@ -0,0 +1,617 @@ +/* + * Copyright 2012 Xyratex Technology Limited + * + * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com> + * + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <obd_support.h> +#include <lustre_lite.h> +#include <lustre_dlm.h> +#include <lustre_ver.h> +#include "llite_internal.h" + +/* If we ever have hundreds of extended attributes, we might want to consider + * using a hash or a tree structure instead of list for faster lookups. + */ +struct ll_xattr_entry { + struct list_head xe_list; /* protected with + * lli_xattrs_list_rwsem */ + char *xe_name; /* xattr name, \0-terminated */ + char *xe_value; /* xattr value */ + unsigned xe_namelen; /* strlen(xe_name) + 1 */ + unsigned xe_vallen; /* xattr value length */ +}; + +static struct kmem_cache *xattr_kmem; +static struct lu_kmem_descr xattr_caches[] = { + { + .ckd_cache = &xattr_kmem, + .ckd_name = "xattr_kmem", + .ckd_size = sizeof(struct ll_xattr_entry) + }, + { + .ckd_cache = NULL + } +}; + +int ll_xattr_init(void) +{ + return lu_kmem_init(xattr_caches); +} + +void ll_xattr_fini(void) +{ + lu_kmem_fini(xattr_caches); +} + +/** + * Initializes xattr cache for an inode. + * + * This initializes the xattr list and marks cache presence. + */ +static void ll_xattr_cache_init(struct ll_inode_info *lli) +{ + + + LASSERT(lli != NULL); + + INIT_LIST_HEAD(&lli->lli_xattrs); + lli->lli_flags |= LLIF_XATTR_CACHE; +} + +/** + * This looks for a specific extended attribute. + * + * Find in @cache and return @xattr_name attribute in @xattr, + * for the NULL @xattr_name return the first cached @xattr. + * + * \retval 0 success + * \retval -ENODATA if not found + */ +static int ll_xattr_cache_find(struct list_head *cache, + const char *xattr_name, + struct ll_xattr_entry **xattr) +{ + struct ll_xattr_entry *entry; + + + + list_for_each_entry(entry, cache, xe_list) { + /* xattr_name == NULL means look for any entry */ + if (xattr_name == NULL || + strcmp(xattr_name, entry->xe_name) == 0) { + *xattr = entry; + CDEBUG(D_CACHE, "find: [%s]=%.*s\n", + entry->xe_name, entry->xe_vallen, + entry->xe_value); + return 0; + } + } + + return -ENODATA; +} + +/** + * This adds or updates an xattr. + * + * Add @xattr_name attr with @xattr_val value and @xattr_val_len length, + * if the attribute already exists, then update its value. + * + * \retval 0 success + * \retval -ENOMEM if no memory could be allocated for the cached attr + */ +static int ll_xattr_cache_add(struct list_head *cache, + const char *xattr_name, + const char *xattr_val, + unsigned xattr_val_len) +{ + struct ll_xattr_entry *xattr; + + + + if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { + /* Found a cached EA, update it */ + + if (xattr_val_len != xattr->xe_vallen) { + char *val; + OBD_ALLOC(val, xattr_val_len); + if (val == NULL) { + CDEBUG(D_CACHE, + "failed to allocate %u bytes for xattr %s update\n", + xattr_val_len, xattr_name); + return -ENOMEM; + } + OBD_FREE(xattr->xe_value, xattr->xe_vallen); + xattr->xe_value = val; + xattr->xe_vallen = xattr_val_len; + } + memcpy(xattr->xe_value, xattr_val, xattr_val_len); + + CDEBUG(D_CACHE, "update: [%s]=%.*s\n", xattr_name, + xattr_val_len, xattr_val); + + return 0; + } + + OBD_SLAB_ALLOC_PTR_GFP(xattr, xattr_kmem, __GFP_IO); + if (xattr == NULL) { + CDEBUG(D_CACHE, "failed to allocate xattr\n"); + return -ENOMEM; + } + + xattr->xe_namelen = strlen(xattr_name) + 1; + + OBD_ALLOC(xattr->xe_name, xattr->xe_namelen); + if (!xattr->xe_name) { + CDEBUG(D_CACHE, "failed to alloc xattr name %u\n", + xattr->xe_namelen); + goto err_name; + } + OBD_ALLOC(xattr->xe_value, xattr_val_len); + if (!xattr->xe_value) { + CDEBUG(D_CACHE, "failed to alloc xattr value %d\n", + xattr_val_len); + goto err_value; + } + + memcpy(xattr->xe_name, xattr_name, xattr->xe_namelen); + memcpy(xattr->xe_value, xattr_val, xattr_val_len); + xattr->xe_vallen = xattr_val_len; + list_add(&xattr->xe_list, cache); + + CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, + xattr_val_len, xattr_val); + + return 0; +err_value: + OBD_FREE(xattr->xe_name, xattr->xe_namelen); +err_name: + OBD_SLAB_FREE_PTR(xattr, xattr_kmem); + + return -ENOMEM; +} + +/** + * This removes an extended attribute from cache. + * + * Remove @xattr_name attribute from @cache. + * + * \retval 0 success + * \retval -ENODATA if @xattr_name is not cached + */ +static int ll_xattr_cache_del(struct list_head *cache, + const char *xattr_name) +{ + struct ll_xattr_entry *xattr; + + + + CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name); + + if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { + list_del(&xattr->xe_list); + OBD_FREE(xattr->xe_name, xattr->xe_namelen); + OBD_FREE(xattr->xe_value, xattr->xe_vallen); + OBD_SLAB_FREE_PTR(xattr, xattr_kmem); + + return 0; + } + + return -ENODATA; +} + +/** + * This iterates cached extended attributes. + * + * Walk over cached attributes in @cache and + * fill in @xld_buffer or only calculate buffer + * size if @xld_buffer is NULL. + * + * \retval >= 0 buffer list size + * \retval -ENODATA if the list cannot fit @xld_size buffer + */ +static int ll_xattr_cache_list(struct list_head *cache, + char *xld_buffer, + int xld_size) +{ + struct ll_xattr_entry *xattr, *tmp; + int xld_tail = 0; + + + + list_for_each_entry_safe(xattr, tmp, cache, xe_list) { + CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n", + xld_buffer, xld_tail, xattr->xe_name); + + if (xld_buffer) { + xld_size -= xattr->xe_namelen; + if (xld_size < 0) + break; + memcpy(&xld_buffer[xld_tail], + xattr->xe_name, xattr->xe_namelen); + } + xld_tail += xattr->xe_namelen; + } + + if (xld_size < 0) + return -ERANGE; + + return xld_tail; +} + +/** + * Check if the xattr cache is initialized (filled). + * + * \retval 0 @cache is not initialized + * \retval 1 @cache is initialized + */ +int ll_xattr_cache_valid(struct ll_inode_info *lli) +{ + return !!(lli->lli_flags & LLIF_XATTR_CACHE); +} + +/** + * This finalizes the xattr cache. + * + * Free all xattr memory. @lli is the inode info pointer. + * + * \retval 0 no error occured + */ +static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) +{ + + + if (!ll_xattr_cache_valid(lli)) + return 0; + + while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) + ; /* empty loop */ + lli->lli_flags &= ~LLIF_XATTR_CACHE; + + return 0; +} + +int ll_xattr_cache_destroy(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + int rc; + + + + down_write(&lli->lli_xattrs_list_rwsem); + rc = ll_xattr_cache_destroy_locked(lli); + up_write(&lli->lli_xattrs_list_rwsem); + + return rc; +} + +/** + * Match or enqueue a PR or PW LDLM lock. + * + * Find or request an LDLM lock with xattr data. + * Since LDLM does not provide API for atomic match_or_enqueue, + * the function handles it with a separate enq lock. + * If successful, the function exits with the list lock held. + * + * \retval 0 no error occured + * \retval -ENOMEM not enough memory + */ +static int ll_xattr_find_get_lock(struct inode *inode, + struct lookup_intent *oit, + struct ptlrpc_request **req) +{ + ldlm_mode_t mode; + struct lustre_handle lockh = { 0 }; + struct md_op_data *op_data; + struct ll_inode_info *lli = ll_i2info(inode); + struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS, + .ei_mode = it_to_lock_mode(oit), + .ei_cb_bl = ll_md_blocking_ast, + .ei_cb_cp = ldlm_completion_ast }; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct obd_export *exp = sbi->ll_md_exp; + int rc; + + + + mutex_lock(&lli->lli_xattrs_enq_lock); + /* Try matching first. */ + mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0, + oit->it_op == IT_SETXATTR ? LCK_PW : + (LCK_PR | LCK_PW)); + if (mode != 0) { + /* fake oit in mdc_revalidate_lock() manner */ + oit->d.lustre.it_lock_handle = lockh.cookie; + oit->d.lustre.it_lock_mode = mode; + goto out; + } + + /* Enqueue if the lock isn't cached locally. */ + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) { + mutex_unlock(&lli->lli_xattrs_enq_lock); + return PTR_ERR(op_data); + } + + op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS | + OBD_MD_FLXATTRLOCKED; +#ifdef CONFIG_FS_POSIX_ACL + /* If working with ACLs, we would like to cache local ACLs */ + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + op_data->op_valid |= OBD_MD_FLRMTLGETFACL; +#endif + + rc = md_enqueue(exp, &einfo, oit, op_data, &lockh, NULL, 0, NULL, 0); + ll_finish_md_op_data(op_data); + + if (rc < 0) { + CDEBUG(D_CACHE, + "md_intent_lock failed with %d for fid "DFID"\n", + rc, PFID(ll_inode2fid(inode))); + mutex_unlock(&lli->lli_xattrs_enq_lock); + return rc; + } + + *req = (struct ptlrpc_request *)oit->d.lustre.it_data; +out: + down_write(&lli->lli_xattrs_list_rwsem); + mutex_unlock(&lli->lli_xattrs_enq_lock); + + return 0; +} + +/** + * Refill the xattr cache. + * + * Fetch and cache the whole of xattrs for @inode, acquiring + * a read or a write xattr lock depending on operation in @oit. + * Intent is dropped on exit unless the operation is setxattr. + * + * \retval 0 no error occured + * \retval -EPROTO network protocol error + * \retval -ENOMEM not enough memory for the cache + */ +static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ptlrpc_request *req = NULL; + const char *xdata, *xval, *xtail, *xvtail; + struct ll_inode_info *lli = ll_i2info(inode); + struct mdt_body *body; + __u32 *xsizes; + int rc = 0, i; + + + + rc = ll_xattr_find_get_lock(inode, oit, &req); + if (rc) + GOTO(out_no_unlock, rc); + + /* Do we have the data at this point? */ + if (ll_xattr_cache_valid(lli)) { + ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1); + GOTO(out_maybe_drop, rc = 0); + } + + /* Matched but no cache? Cancelled on error by a parallel refill. */ + if (unlikely(req == NULL)) { + CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n"); + GOTO(out_maybe_drop, rc = -EIO); + } + + if (oit->d.lustre.it_status < 0) { + CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n", + oit->d.lustre.it_status, PFID(ll_inode2fid(inode))); + GOTO(out_destroy, rc = oit->d.lustre.it_status); + } + + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + if (body == NULL) { + CERROR("no MDT BODY in the refill xattr reply\n"); + GOTO(out_destroy, rc = -EPROTO); + } + /* do not need swab xattr data */ + xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, + body->eadatasize); + xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS, + body->aclsize); + xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS, + body->max_mdsize * sizeof(__u32)); + if (xdata == NULL || xval == NULL || xsizes == NULL) { + CERROR("wrong setxattr reply\n"); + GOTO(out_destroy, rc = -EPROTO); + } + + xtail = xdata + body->eadatasize; + xvtail = xval + body->aclsize; + + CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail); + + ll_xattr_cache_init(lli); + + for (i = 0; i < body->max_mdsize; i++) { + CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval); + /* Perform consistency checks: attr names and vals in pill */ + if (memchr(xdata, 0, xtail - xdata) == NULL) { + CERROR("xattr protocol violation (names are broken)\n"); + rc = -EPROTO; + } else if (xval + *xsizes > xvtail) { + CERROR("xattr protocol violation (vals are broken)\n"); + rc = -EPROTO; + } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) { + rc = -ENOMEM; + } else { + rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval, + *xsizes); + } + if (rc < 0) { + ll_xattr_cache_destroy_locked(lli); + GOTO(out_destroy, rc); + } + xdata += strlen(xdata) + 1; + xval += *xsizes; + xsizes++; + } + + if (xdata != xtail || xval != xvtail) + CERROR("a hole in xattr data\n"); + + ll_set_lock_data(sbi->ll_md_exp, inode, oit, NULL); + + GOTO(out_maybe_drop, rc); +out_maybe_drop: + /* drop lock on error or getxattr */ + if (rc != 0 || oit->it_op != IT_SETXATTR) + ll_intent_drop_lock(oit); + + if (rc != 0) + up_write(&lli->lli_xattrs_list_rwsem); +out_no_unlock: + ptlrpc_req_finished(req); + + return rc; + +out_destroy: + up_write(&lli->lli_xattrs_list_rwsem); + + ldlm_lock_decref_and_cancel((struct lustre_handle *) + &oit->d.lustre.it_lock_handle, + oit->d.lustre.it_lock_mode); + + goto out_no_unlock; +} + +/** + * Get an xattr value or list xattrs using the write-through cache. + * + * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or + * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode. + * The resulting value/list is stored in @buffer if the former + * is not larger than @size. + * + * \retval 0 no error occured + * \retval -EPROTO network protocol error + * \retval -ENOMEM not enough memory for the cache + * \retval -ERANGE the buffer is not large enough + * \retval -ENODATA no such attr or the list is empty + */ +int ll_xattr_cache_get(struct inode *inode, + const char *name, + char *buffer, + size_t size, + __u64 valid) +{ + struct lookup_intent oit = { .it_op = IT_GETXATTR }; + struct ll_inode_info *lli = ll_i2info(inode); + int rc = 0; + + + + LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS)); + + down_read(&lli->lli_xattrs_list_rwsem); + if (!ll_xattr_cache_valid(lli)) { + up_read(&lli->lli_xattrs_list_rwsem); + rc = ll_xattr_cache_refill(inode, &oit); + if (rc) + return rc; + downgrade_write(&lli->lli_xattrs_list_rwsem); + } else { + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1); + } + + if (valid & OBD_MD_FLXATTR) { + struct ll_xattr_entry *xattr; + + rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr); + if (rc == 0) { + rc = xattr->xe_vallen; + /* zero size means we are only requested size in rc */ + if (size != 0) { + if (size >= xattr->xe_vallen) + memcpy(buffer, xattr->xe_value, + xattr->xe_vallen); + else + rc = -ERANGE; + } + } + } else if (valid & OBD_MD_FLXATTRLS) { + rc = ll_xattr_cache_list(&lli->lli_xattrs, + size ? buffer : NULL, size); + } + + GOTO(out, rc); +out: + up_read(&lli->lli_xattrs_list_rwsem); + + return rc; +} + + +/** + * Set/update an xattr value or remove xattr using the write-through cache. + * + * Set/update the xattr value (if @valid has OBD_MD_FLXATTR) of @name to @newval + * or + * remove the xattr @name (@valid has OBD_MD_FLXATTRRM set) from @inode. + * @flags is either XATTR_CREATE or XATTR_REPLACE as defined by setxattr(2) + * + * \retval 0 no error occured + * \retval -EPROTO network protocol error + * \retval -ENOMEM not enough memory for the cache + * \retval -ERANGE the buffer is not large enough + * \retval -ENODATA no such attr (in the removal case) + */ +int ll_xattr_cache_update(struct inode *inode, + const char *name, + const char *newval, + size_t size, + __u64 valid, + int flags) +{ + struct lookup_intent oit = { .it_op = IT_SETXATTR }; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ptlrpc_request *req = NULL; + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *oc; + int rc; + + + + LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRRM)); + + rc = ll_xattr_cache_refill(inode, &oit); + if (rc) + return rc; + + oc = ll_mdscapa_get(inode); + rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, + valid | OBD_MD_FLXATTRLOCKED, name, newval, + size, 0, flags, ll_i2suppgid(inode), &req); + capa_put(oc); + + if (rc) { + ll_intent_drop_lock(&oit); + GOTO(out, rc); + } + + if (valid & OBD_MD_FLXATTR) + rc = ll_xattr_cache_add(&lli->lli_xattrs, name, newval, size); + else if (valid & OBD_MD_FLXATTRRM) + rc = ll_xattr_cache_del(&lli->lli_xattrs, name); + + ll_intent_drop_lock(&oit); + GOTO(out, rc); +out: + up_write(&lli->lli_xattrs_list_rwsem); + ptlrpc_req_finished(req); + + return rc; +} |