summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lustre/llite
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile5
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c34
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c24
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c657
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h76
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c76
-rw-r--r--drivers/staging/lustre/lustre/llite/lloop.c3
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c41
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c14
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c61
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c2
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c104
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c617
14 files changed, 1511 insertions, 207 deletions
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index f493e0740004..c76f3cfedab0 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -1,12 +1,13 @@
obj-$(CONFIG_LUSTRE_FS) += lustre.o
obj-$(CONFIG_LUSTRE_LLITE_LLOOP) += llite_lloop.o
lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
- rw.o lproc_llite.o namei.o symlink.o llite_mmap.o \
- xattr.o remote_perm.o llite_rmtacl.o llite_capa.o \
+ rw.o namei.o symlink.o llite_mmap.o \
+ xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o llite_capa.o \
rw26.o super25.o statahead.o \
../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \
vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o
+lustre-$(CONFIG_PROC_FS) += lproc_llite.o
llite_lloop-y := lloop.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index e7629be39739..cbd663ed030c 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -404,7 +404,6 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags,
struct inode *inode = de->d_inode;
struct ll_inode_info *lli = ll_i2info(inode);
struct obd_client_handle **och_p;
- __u64 *och_usecount;
__u64 ibits;
/*
@@ -418,37 +417,32 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags,
*/
- if (it->it_flags & FMODE_WRITE) {
+ if (it->it_flags & FMODE_WRITE)
och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else if (it->it_flags & FMODE_EXEC) {
+ else if (it->it_flags & FMODE_EXEC)
och_p = &lli->lli_mds_exec_och;
- och_usecount = &lli->lli_open_fd_exec_count;
- } else {
+ else
och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
+
/* Check for the proper lock. */
ibits = MDS_INODELOCK_LOOKUP;
if (!ll_have_md_lock(inode, &ibits, LCK_MINMODE))
goto do_lock;
mutex_lock(&lli->lli_och_mutex);
if (*och_p) { /* Everything is open already, do nothing */
- /*(*och_usecount)++; Do not let them steal our open
- handle from under us */
- SET_BUT_UNUSED(och_usecount);
- /* XXX The code above was my original idea, but in case
- we have the handle, but we cannot use it due to later
- checks (e.g. O_CREAT|O_EXCL flags set), nobody
- would decrement counter increased here. So we just
- hope the lock won't be invalidated in between. But
- if it would be, we'll reopen the open request to
- MDS later during file open path */
+ /* Originally it was idea to do not let them steal our
+ * open handle from under us by (*och_usecount)++ here.
+ * But in case we have the handle, but we cannot use it
+ * due to later checks (e.g. O_CREAT|O_EXCL flags set),
+ * nobody would decrement counter increased here. So we
+ * just hope the lock won't be invalidated in between.
+ * But if it would be, we'll reopen the open request to
+ * MDS later during file open path.
+ */
mutex_unlock(&lli->lli_och_mutex);
return 1;
- } else {
- mutex_unlock(&lli->lli_och_mutex);
}
+ mutex_unlock(&lli->lli_och_mutex);
}
if (it->it_op == IT_GETATTR) {
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 1f079034bd8f..22d0acc95bc5 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -1809,8 +1809,28 @@ out_rmdir:
return -EFAULT;
}
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
- hur, NULL);
+ if (hur->hur_request.hr_action == HUA_RELEASE) {
+ const struct lu_fid *fid;
+ struct inode *f;
+ int i;
+
+ for (i = 0; i < hur->hur_request.hr_itemcount; i++) {
+ fid = &hur->hur_user_item[i].hui_fid;
+ f = search_inode_for_lustre(inode->i_sb, fid);
+ if (IS_ERR(f)) {
+ rc = PTR_ERR(f);
+ break;
+ }
+
+ rc = ll_hsm_release(f);
+ iput(f);
+ if (rc != 0)
+ break;
+ }
+ } else {
+ rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
+ hur, NULL);
+ }
OBD_FREE_LARGE(hur, totalsize);
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index fb85a58db058..c12821aedc2f 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -115,7 +115,8 @@ out:
static int ll_close_inode_openhandle(struct obd_export *md_exp,
struct inode *inode,
- struct obd_client_handle *och)
+ struct obd_client_handle *och,
+ const __u64 *data_version)
{
struct obd_export *exp = ll_i2mdexp(inode);
struct md_op_data *op_data;
@@ -139,6 +140,13 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
ll_prepare_close(inode, op_data, och);
+ if (data_version != NULL) {
+ /* Pass in data_version implies release. */
+ op_data->op_bias |= MDS_HSM_RELEASE;
+ op_data->op_data_version = *data_version;
+ op_data->op_lease_handle = och->och_lease_handle;
+ op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+ }
epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
rc = md_close(md_exp, op_data, och->och_mod, &req);
if (rc == -EAGAIN) {
@@ -167,14 +175,20 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
spin_unlock(&lli->lli_lock);
}
- ll_finish_md_op_data(op_data);
-
if (rc == 0) {
rc = ll_objects_destroy(req, inode);
if (rc)
CERROR("inode %lu ll_objects destroy: rc = %d\n",
inode->i_ino, rc);
}
+ if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
+ struct mdt_body *body;
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (!(body->valid & OBD_MD_FLRELEASED))
+ rc = -EBUSY;
+ }
+
+ ll_finish_md_op_data(op_data);
out:
if (exp_connect_som(exp) && !epoch_close &&
@@ -224,7 +238,7 @@ int ll_md_real_close(struct inode *inode, int flags)
if (och) { /* There might be a race and somebody have freed this och
already */
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och);
+ inode, och, NULL);
}
return rc;
@@ -241,6 +255,24 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode,
if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+ if (fd->fd_lease_och != NULL) {
+ bool lease_broken;
+
+ /* Usually the lease is not released when the
+ * application crashed, we need to release here. */
+ rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
+ CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
+ PFID(&lli->lli_fid), rc, lease_broken);
+
+ fd->fd_lease_och = NULL;
+ }
+
+ if (fd->fd_och != NULL) {
+ rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
+ fd->fd_och = NULL;
+ GOTO(out, rc);
+ }
+
/* Let's see if we have good enough OPEN lock on the file and if
we can skip talking to MDS */
if (file->f_dentry->d_inode) { /* Can this ever be false? */
@@ -277,6 +309,7 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode,
file, file->f_dentry, file->f_dentry->d_name.name);
}
+out:
LUSTRE_FPRIVATE(file) = NULL;
ll_file_data_put(fd);
ll_capa_close(inode);
@@ -431,22 +464,18 @@ void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
}
}
-static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
- struct lookup_intent *it, struct obd_client_handle *och)
+static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
+ struct obd_client_handle *och)
{
struct ptlrpc_request *req = it->d.lustre.it_data;
struct mdt_body *body;
- LASSERT(och);
-
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- LASSERT(body != NULL); /* reply already checked out */
-
- memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
+ och->och_fh = body->handle;
+ och->och_fid = body->fid1;
+ och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
- och->och_fid = lli->lli_fid;
och->och_flags = it->it_flags;
- ll_ioepoch_open(lli, body->ioepoch);
return md_set_open_replay_data(md_exp, och, req);
}
@@ -466,20 +495,17 @@ int ll_local_open(struct file *file, struct lookup_intent *it,
struct mdt_body *body;
int rc;
- rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
- if (rc)
+ rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
+ if (rc != 0)
return rc;
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if ((it->it_flags & FMODE_WRITE) &&
- (body->valid & OBD_MD_FLSIZE))
- CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
- lli->lli_ioepoch, PFID(&lli->lli_fid));
+ ll_ioepoch_open(lli, body->ioepoch);
}
LUSTRE_FPRIVATE(file) = fd;
ll_readahead_init(inode, &fd->fd_ras);
- fd->fd_omode = it->it_flags;
+ fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
return 0;
}
@@ -681,6 +707,198 @@ out_openerr:
return rc;
}
+static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *desc, void *data, int flag)
+{
+ int rc;
+ struct lustre_handle lockh;
+
+ switch (flag) {
+ case LDLM_CB_BLOCKING:
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+ if (rc < 0) {
+ CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+ return rc;
+ }
+ break;
+ case LDLM_CB_CANCELING:
+ /* do nothing */
+ break;
+ }
+ return 0;
+}
+
+/**
+ * Acquire a lease and open the file.
+ */
+struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
+ fmode_t fmode, __u64 open_flags)
+{
+ struct lookup_intent it = { .it_op = IT_OPEN };
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct md_op_data *op_data;
+ struct ptlrpc_request *req;
+ struct lustre_handle old_handle = { 0 };
+ struct obd_client_handle *och = NULL;
+ int rc;
+ int rc2;
+
+ if (fmode != FMODE_WRITE && fmode != FMODE_READ)
+ return ERR_PTR(-EINVAL);
+
+ if (file != NULL) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
+
+ if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
+ return ERR_PTR(-EPERM);
+
+ /* Get the openhandle of the file */
+ rc = -EBUSY;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ mutex_unlock(&lli->lli_och_mutex);
+ return ERR_PTR(rc);
+ }
+
+ if (fd->fd_och == NULL) {
+ if (file->f_mode & FMODE_WRITE) {
+ LASSERT(lli->lli_mds_write_och != NULL);
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else {
+ LASSERT(lli->lli_mds_read_och != NULL);
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+ if (*och_usecount == 1) {
+ fd->fd_och = *och_p;
+ *och_p = NULL;
+ *och_usecount = 0;
+ rc = 0;
+ }
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (rc < 0) /* more than 1 opener */
+ return ERR_PTR(rc);
+
+ LASSERT(fd->fd_och != NULL);
+ old_handle = fd->fd_och->och_fh;
+ }
+
+ OBD_ALLOC_PTR(och);
+ if (och == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ GOTO(out, rc = PTR_ERR(op_data));
+
+ /* To tell the MDT this openhandle is from the same owner */
+ op_data->op_handle = old_handle;
+
+ it.it_flags = fmode | open_flags;
+ it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
+ rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
+ ll_md_blocking_lease_ast,
+ /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
+ * it can be cancelled which may mislead applications that the lease is
+ * broken;
+ * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
+ * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
+ * doesn't deal with openhandle, so normal openhandle will be leaked. */
+ LDLM_FL_NO_LRU | LDLM_FL_EXCL);
+ ll_finish_md_op_data(op_data);
+ if (req != NULL) {
+ ptlrpc_req_finished(req);
+ it_clear_disposition(&it, DISP_ENQ_COMPLETE);
+ }
+ if (rc < 0)
+ GOTO(out_release_it, rc);
+
+ if (it_disposition(&it, DISP_LOOKUP_NEG))
+ GOTO(out_release_it, rc = -ENOENT);
+
+ rc = it_open_error(DISP_OPEN_OPEN, &it);
+ if (rc)
+ GOTO(out_release_it, rc);
+
+ LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
+ ll_och_fill(sbi->ll_md_exp, &it, och);
+
+ if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
+ GOTO(out_close, rc = -EOPNOTSUPP);
+
+ /* already get lease, handle lease lock */
+ ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
+ if (it.d.lustre.it_lock_mode == 0 ||
+ it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
+ /* open lock must return for lease */
+ CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
+ PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
+ it.d.lustre.it_lock_bits);
+ GOTO(out_close, rc = -EPROTO);
+ }
+
+ ll_intent_release(&it);
+ return och;
+
+out_close:
+ rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
+ if (rc2)
+ CERROR("Close openhandle returned %d\n", rc2);
+
+ /* cancel open lock */
+ if (it.d.lustre.it_lock_mode != 0) {
+ ldlm_lock_decref_and_cancel(&och->och_lease_handle,
+ it.d.lustre.it_lock_mode);
+ it.d.lustre.it_lock_mode = 0;
+ }
+out_release_it:
+ ll_intent_release(&it);
+out:
+ OBD_FREE_PTR(och);
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ll_lease_open);
+
+/**
+ * Release lease and close the file.
+ * It will check if the lease has ever broken.
+ */
+int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
+ bool *lease_broken)
+{
+ struct ldlm_lock *lock;
+ bool cancelled = true;
+ int rc;
+
+ lock = ldlm_handle2lock(&och->och_lease_handle);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ cancelled = ldlm_is_cancel(lock);
+ unlock_res_and_lock(lock);
+ ldlm_lock_put(lock);
+ }
+
+ CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
+ PFID(&ll_i2info(inode)->lli_fid), cancelled);
+
+ if (!cancelled)
+ ldlm_cli_cancel(&och->och_lease_handle, 0);
+ if (lease_broken != NULL)
+ *lease_broken = cancelled;
+
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
+ NULL);
+ return rc;
+}
+EXPORT_SYMBOL(ll_lease_close);
+
/* Fills the obdo with the attributes for the lsm */
static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
struct obd_capa *capa, struct obdo *obdo,
@@ -905,7 +1123,7 @@ out:
cl_io_fini(env, io);
/* If any bit been read/written (result != 0), we just return
* short read/write instead of restart io. */
- if (result == 0 && io->ci_need_restart) {
+ if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
iot == CIT_READ ? "read" : "write",
file->f_dentry->d_name.name, *ppos, count);
@@ -930,48 +1148,16 @@ out:
return result;
}
-
-/*
- * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
- */
-static int ll_file_get_iov_count(const struct iovec *iov,
- unsigned long *nr_segs, size_t *count)
-{
- size_t cnt = 0;
- unsigned long seg;
-
- for (seg = 0; seg < *nr_segs; seg++) {
- const struct iovec *iv = &iov[seg];
-
- /*
- * If any segment has a negative length, or the cumulative
- * length ever wraps negative then return -EINVAL.
- */
- cnt += iv->iov_len;
- if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
- return -EINVAL;
- if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
- continue;
- if (seg == 0)
- return -EFAULT;
- *nr_segs = seg;
- cnt -= iv->iov_len; /* This segment is no good */
- break;
- }
- *count = cnt;
- return 0;
-}
-
static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct lu_env *env;
struct vvp_io_args *args;
- size_t count;
+ size_t count = 0;
ssize_t result;
int refcheck;
- result = ll_file_get_iov_count(iov, &nr_segs, &count);
+ result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
if (result)
return result;
@@ -1026,11 +1212,11 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
{
struct lu_env *env;
struct vvp_io_args *args;
- size_t count;
+ size_t count = 0;
ssize_t result;
int refcheck;
- result = ll_file_get_iov_count(iov, &nr_segs, &count);
+ result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
if (result)
return result;
@@ -1482,12 +1668,11 @@ int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
if (!och)
GOTO(out, rc = -ENOMEM);
- ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
- ll_i2info(inode), it, och);
+ ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och);
- out:
+ inode, och, NULL);
+out:
/* this one is in place of ll_file_open */
if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
ptlrpc_req_finished(it->d.lustre.it_data);
@@ -1692,6 +1877,53 @@ out:
return rc;
}
+/*
+ * Trigger a HSM release request for the provided inode.
+ */
+int ll_hsm_release(struct inode *inode)
+{
+ struct cl_env_nest nest;
+ struct lu_env *env;
+ struct obd_client_handle *och = NULL;
+ __u64 data_version = 0;
+ int rc;
+
+
+ CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&ll_i2info(inode)->lli_fid));
+
+ och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
+ if (IS_ERR(och))
+ GOTO(out, rc = PTR_ERR(och));
+
+ /* Grab latest data_version and [am]time values */
+ rc = ll_data_version(inode, &data_version, 1);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ env = cl_env_nested_get(&nest);
+ if (IS_ERR(env))
+ GOTO(out, rc = PTR_ERR(env));
+
+ ll_merge_lvb(env, inode);
+ cl_env_nested_put(&nest, env);
+
+ /* Release the file.
+ * NB: lease lock handle is released in mdc_hsm_release_pack() because
+ * we still need it to pack l_remote_handle to MDT. */
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
+ &data_version);
+ och = NULL;
+
+
+out:
+ if (och != NULL && !IS_ERR(och)) /* close the file */
+ ll_lease_close(och, inode, NULL);
+
+ return rc;
+}
+
struct ll_swap_stack {
struct iattr ia1, ia2;
__u64 dv1, dv2;
@@ -1853,6 +2085,86 @@ free:
return rc;
}
+static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
+{
+ struct md_op_data *op_data;
+ int rc;
+
+ /* Non-root users are forbidden to set or clear flags which are
+ * NOT defined in HSM_USER_MASK. */
+ if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
+ !cfs_capable(CFS_CAP_SYS_ADMIN))
+ return -EPERM;
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, hss);
+ if (IS_ERR(op_data))
+ return PTR_ERR(op_data);
+
+ rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
+ sizeof(*op_data), op_data, NULL);
+
+ ll_finish_md_op_data(op_data);
+
+ return rc;
+}
+
+static int ll_hsm_import(struct inode *inode, struct file *file,
+ struct hsm_user_import *hui)
+{
+ struct hsm_state_set *hss = NULL;
+ struct iattr *attr = NULL;
+ int rc;
+
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ /* set HSM flags */
+ OBD_ALLOC_PTR(hss);
+ if (hss == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
+ hss->hss_archive_id = hui->hui_archive_id;
+ hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
+ rc = ll_hsm_state_set(inode, hss);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ OBD_ALLOC_PTR(attr);
+ if (attr == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
+ attr->ia_mode |= S_IFREG;
+ attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
+ attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
+ attr->ia_size = hui->hui_size;
+ attr->ia_mtime.tv_sec = hui->hui_mtime;
+ attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
+ attr->ia_atime.tv_sec = hui->hui_atime;
+ attr->ia_atime.tv_nsec = hui->hui_atime_ns;
+
+ attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
+ ATTR_UID | ATTR_GID |
+ ATTR_MTIME | ATTR_MTIME_SET |
+ ATTR_ATIME | ATTR_ATIME_SET;
+
+ rc = ll_setattr_raw(file->f_dentry, attr, true);
+ if (rc == -ENODATA)
+ rc = 0;
+
+out:
+ if (hss != NULL)
+ OBD_FREE_PTR(hss);
+
+ if (attr != NULL)
+ OBD_FREE_PTR(attr);
+
+ return rc;
+}
+
long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file->f_dentry->d_inode;
@@ -2014,37 +2326,19 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return rc;
}
case LL_IOC_HSM_STATE_SET: {
- struct md_op_data *op_data;
struct hsm_state_set *hss;
int rc;
OBD_ALLOC_PTR(hss);
if (hss == NULL)
return -ENOMEM;
+
if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
OBD_FREE_PTR(hss);
return -EFAULT;
}
- /* Non-root users are forbidden to set or clear flags which are
- * NOT defined in HSM_USER_MASK. */
- if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK)
- && !cfs_capable(CFS_CAP_SYS_ADMIN)) {
- OBD_FREE_PTR(hss);
- return -EPERM;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hss);
- if (IS_ERR(op_data)) {
- OBD_FREE_PTR(hss);
- return PTR_ERR(op_data);
- }
-
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
- op_data, NULL);
-
- ll_finish_md_op_data(op_data);
+ rc = ll_hsm_state_set(inode, hss);
OBD_FREE_PTR(hss);
return rc;
@@ -2075,6 +2369,107 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
OBD_FREE_PTR(hca);
return rc;
}
+ case LL_IOC_SET_LEASE: {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle *och = NULL;
+ bool lease_broken;
+ fmode_t mode = 0;
+
+ switch (arg) {
+ case F_WRLCK:
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EPERM;
+ mode = FMODE_WRITE;
+ break;
+ case F_RDLCK:
+ if (!(file->f_mode & FMODE_READ))
+ return -EPERM;
+ mode = FMODE_READ;
+ break;
+ case F_UNLCK:
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ och = fd->fd_lease_och;
+ fd->fd_lease_och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+ if (och != NULL) {
+ mode = och->och_flags &
+ (FMODE_READ|FMODE_WRITE);
+ rc = ll_lease_close(och, inode, &lease_broken);
+ if (rc == 0 && lease_broken)
+ mode = 0;
+ } else {
+ rc = -ENOLCK;
+ }
+
+ /* return the type of lease or error */
+ return rc < 0 ? rc : (int)mode;
+ default:
+ return -EINVAL;
+ }
+
+ CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
+
+ /* apply for lease */
+ och = ll_lease_open(inode, file, mode, 0);
+ if (IS_ERR(och))
+ return PTR_ERR(och);
+
+ rc = 0;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och == NULL) {
+ fd->fd_lease_och = och;
+ och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (och != NULL) {
+ /* impossible now that only excl is supported for now */
+ ll_lease_close(och, inode, &lease_broken);
+ rc = -EBUSY;
+ }
+ return rc;
+ }
+ case LL_IOC_GET_LEASE: {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ldlm_lock *lock = NULL;
+
+ rc = 0;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ struct obd_client_handle *och = fd->fd_lease_och;
+
+ lock = ldlm_handle2lock(&och->och_lease_handle);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ if (!ldlm_is_cancel(lock))
+ rc = och->och_flags &
+ (FMODE_READ | FMODE_WRITE);
+ unlock_res_and_lock(lock);
+ ldlm_lock_put(lock);
+ }
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ return rc;
+ }
+ case LL_IOC_HSM_IMPORT: {
+ struct hsm_user_import *hui;
+
+ OBD_ALLOC_PTR(hui);
+ if (hui == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
+ OBD_FREE_PTR(hui);
+ return -EFAULT;
+ }
+
+ rc = ll_hsm_import(inode, file, hui);
+
+ OBD_FREE_PTR(hui);
+ return rc;
+ }
default: {
int err;
@@ -2435,7 +2830,8 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
}
ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags)
+ struct lustre_handle *lockh, __u64 flags,
+ ldlm_mode_t mode)
{
ldlm_policy_data_t policy = { .l_inodebits = {bits}};
struct lu_fid *fid;
@@ -2445,8 +2841,8 @@ ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
- fid, LDLM_IBITS, &policy,
- LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
+ fid, LDLM_IBITS, &policy, mode, lockh);
+
return rc;
}
@@ -2581,7 +2977,15 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
} else {
- rc = ll_glimpse_size(inode);
+ /* In case of restore, the MDT has the right size and has
+ * already send it back without granting the layout lock,
+ * inode is up-to-date so glimpse is useless.
+ * Also to glimpse we need the layout, in case of a running
+ * restore the MDT holds the layout lock so the glimpse will
+ * block up to the end of restore (getattr will block)
+ */
+ if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
+ rc = ll_glimpse_size(inode);
}
return rc;
}
@@ -2628,6 +3032,38 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
return ll_getattr_it(mnt, de, &it, stat);
}
+int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ __u64 start, __u64 len)
+{
+ int rc;
+ size_t num_bytes;
+ struct ll_user_fiemap *fiemap;
+ unsigned int extent_count = fieinfo->fi_extents_max;
+
+ num_bytes = sizeof(*fiemap) + (extent_count *
+ sizeof(struct ll_fiemap_extent));
+ OBD_ALLOC_LARGE(fiemap, num_bytes);
+
+ if (fiemap == NULL)
+ return -ENOMEM;
+
+ fiemap->fm_flags = fieinfo->fi_flags;
+ fiemap->fm_extent_count = fieinfo->fi_extents_max;
+ fiemap->fm_start = start;
+ fiemap->fm_length = len;
+ memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
+ sizeof(struct ll_fiemap_extent));
+
+ rc = ll_do_fiemap(inode, fiemap, num_bytes);
+
+ fieinfo->fi_flags = fiemap->fm_flags;
+ fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
+ memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
+ fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
+
+ OBD_FREE_LARGE(fiemap, num_bytes);
+ return rc;
+}
struct posix_acl * ll_get_acl(struct inode *inode, int type)
{
@@ -2676,17 +3112,12 @@ int ll_inode_permission(struct inode *inode, int mask)
return rc;
}
-#define READ_METHOD aio_read
-#define READ_FUNCTION ll_file_aio_read
-#define WRITE_METHOD aio_write
-#define WRITE_FUNCTION ll_file_aio_write
-
/* -o localflock - only provides locally consistent flock locks */
struct file_operations ll_file_operations = {
.read = ll_file_read,
- .READ_METHOD = READ_FUNCTION,
+ .aio_read = ll_file_aio_read,
.write = ll_file_write,
- .WRITE_METHOD = WRITE_FUNCTION,
+ .aio_write = ll_file_aio_write,
.unlocked_ioctl = ll_file_ioctl,
.open = ll_file_open,
.release = ll_file_release,
@@ -2699,9 +3130,9 @@ struct file_operations ll_file_operations = {
struct file_operations ll_file_operations_flock = {
.read = ll_file_read,
- .READ_METHOD = READ_FUNCTION,
+ .aio_read = ll_file_aio_read,
.write = ll_file_write,
- .WRITE_METHOD = WRITE_FUNCTION,
+ .aio_write = ll_file_aio_write,
.unlocked_ioctl = ll_file_ioctl,
.open = ll_file_open,
.release = ll_file_release,
@@ -2717,9 +3148,9 @@ struct file_operations ll_file_operations_flock = {
/* These are for -o noflock - to return ENOSYS on flock calls */
struct file_operations ll_file_operations_noflock = {
.read = ll_file_read,
- .READ_METHOD = READ_FUNCTION,
+ .aio_read = ll_file_aio_read,
.write = ll_file_write,
- .WRITE_METHOD = WRITE_FUNCTION,
+ .aio_write = ll_file_aio_write,
.unlocked_ioctl = ll_file_ioctl,
.open = ll_file_open,
.release = ll_file_release,
@@ -2740,6 +3171,7 @@ struct inode_operations ll_file_inode_operations = {
.getxattr = ll_getxattr,
.listxattr = ll_listxattr,
.removexattr = ll_removexattr,
+ .fiemap = ll_fiemap,
.get_acl = ll_get_acl,
};
@@ -3086,7 +3518,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
/* mostly layout lock is caching on the local side, so try to match
* it before grabbing layout lock mutex. */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
+ mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
+ LCK_CR | LCK_CW | LCK_PR | LCK_PW);
if (mode != 0) { /* hit cached lock */
rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
if (rc == 0)
@@ -3101,7 +3534,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
again:
/* try again. Maybe somebody else has done this. */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
+ mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
+ LCK_CR | LCK_CW | LCK_PR | LCK_PW);
if (mode != 0) { /* hit cached lock */
rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
if (rc == -EAGAIN)
@@ -3150,3 +3584,30 @@ again:
return rc;
}
+
+/**
+ * This function send a restore request to the MDT
+ */
+int ll_layout_restore(struct inode *inode)
+{
+ struct hsm_user_request *hur;
+ int len, rc;
+
+ len = sizeof(struct hsm_user_request) +
+ sizeof(struct hsm_user_item);
+ OBD_ALLOC(hur, len);
+ if (hur == NULL)
+ return -ENOMEM;
+
+ hur->hur_request.hr_action = HUA_RESTORE;
+ hur->hur_request.hr_archive_id = 0;
+ hur->hur_request.hr_flags = 0;
+ memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
+ sizeof(hur->hur_user_item[0].hui_fid));
+ hur->hur_user_item[0].hui_extent.length = -1;
+ hur->hur_request.hr_itemcount = 1;
+ rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
+ len, hur, NULL);
+ OBD_FREE(hur, len);
+ return rc;
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 47e443d90fe1..7ee5c02783f9 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -46,6 +46,8 @@
#include <lclient.h>
#include <lustre_mdc.h>
#include <linux/lustre_intent.h>
+#include <linux/compat.h>
+#include <linux/posix_acl_xattr.h>
#ifndef FMODE_EXEC
#define FMODE_EXEC 0
@@ -124,6 +126,10 @@ enum lli_flags {
LLIF_SRVLOCK = (1 << 5),
/* File data is modified. */
LLIF_DATA_MODIFIED = (1 << 6),
+ /* File is being restored */
+ LLIF_FILE_RESTORING = (1 << 7),
+ /* Xattr cache is attached to the file */
+ LLIF_XATTR_CACHE = (1 << 8),
};
struct ll_inode_info {
@@ -276,8 +282,27 @@ struct ll_inode_info {
struct mutex lli_layout_mutex;
/* valid only inside LAYOUT ibits lock, protected by lli_layout_mutex */
__u32 lli_layout_gen;
+
+ struct rw_semaphore lli_xattrs_list_rwsem;
+ struct mutex lli_xattrs_enq_lock;
+ struct list_head lli_xattrs;/* ll_xattr_entry->xe_list */
};
+int ll_xattr_cache_destroy(struct inode *inode);
+
+int ll_xattr_cache_get(struct inode *inode,
+ const char *name,
+ char *buffer,
+ size_t size,
+ __u64 valid);
+
+int ll_xattr_cache_update(struct inode *inode,
+ const char *name,
+ const char *newval,
+ size_t size,
+ __u64 valid,
+ int flags);
+
/*
* Locking to guarantee consistency of non-atomic updates to long long i_size,
* consistency between file size and KMS.
@@ -399,6 +424,7 @@ enum stats_track_type {
#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */
#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */
#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
+#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */
#define LL_SBI_FLAGS { \
"nolck", \
@@ -406,6 +432,7 @@ enum stats_track_type {
"flock", \
"xattr", \
"acl", \
+ "???", \
"rmt_client", \
"mds_capa", \
"oss_capa", \
@@ -418,7 +445,9 @@ enum stats_track_type {
"agl", \
"verbose", \
"layout", \
- "user_fid2path" }
+ "user_fid2path",\
+ "xattr", \
+}
/* default value for ll_sb_info->contention_time */
#define SBI_DEFAULT_CONTENTION_SECONDS 60
@@ -458,7 +487,8 @@ struct ll_sb_info {
struct lu_fid ll_root_fid; /* root object fid */
int ll_flags;
- int ll_umounting:1;
+ unsigned int ll_umounting:1,
+ ll_xattr_cache_enabled:1;
struct list_head ll_conn_chain; /* per-conn chain of SBs */
struct lustre_client_ocd ll_lco;
@@ -607,10 +637,14 @@ extern struct kmem_cache *ll_file_data_slab;
struct lustre_handle;
struct ll_file_data {
struct ll_readahead_state fd_ras;
- int fd_omode;
struct ccc_grouplock fd_grouplock;
__u64 lfd_pos;
__u32 fd_flags;
+ fmode_t fd_omode;
+ /* openhandle if lease exists for this file.
+ * Borrow lli->lli_och_mutex to protect assignment */
+ struct obd_client_handle *fd_lease_och;
+ struct obd_client_handle *fd_och;
struct file *fd_file;
/* Indicate whether need to report failure when close.
* true: failure is known, not report again.
@@ -643,7 +677,12 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
#if BITS_PER_LONG == 32
return 1;
#else
- return unlikely(current_is_32bit() || (sbi->ll_flags & LL_SBI_32BIT_API));
+ return unlikely(
+#ifdef CONFIG_COMPAT
+ is_compat_task() ||
+#endif
+ (sbi->ll_flags & LL_SBI_32BIT_API)
+ );
#endif
}
@@ -663,15 +702,22 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
+void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
+ struct ll_file_data *file, loff_t pos,
+ size_t count, int rw);
#else
static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
struct super_block *sb, char *osc, char *mdc){return 0;}
static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {}
-static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
-static void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
+static inline
+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
+static inline void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
{
memset(lvars, 0, sizeof(*lvars));
}
+static inline void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
+ struct ll_file_data *file, loff_t pos,
+ size_t count, int rw) {}
#endif
@@ -720,7 +766,8 @@ extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
extern int ll_have_md_lock(struct inode *inode, __u64 *bits,
ldlm_mode_t l_req_mode);
extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags);
+ struct lustre_handle *lockh, __u64 flags,
+ ldlm_mode_t mode);
int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
__u64 bits);
int ll_revalidate_nd(struct dentry *dentry, unsigned int flags);
@@ -746,9 +793,6 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
struct md_open_data **mod);
void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
struct lustre_handle *fh);
-extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
- struct ll_file_data *file, loff_t pos,
- size_t count, int rw);
int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
struct lookup_intent *it, struct kstat *stat);
int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
@@ -775,6 +819,12 @@ int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg);
int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
int ll_fid2path(struct inode *inode, void *arg);
int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
+int ll_hsm_release(struct inode *inode);
+
+struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
+ fmode_t mode, __u64 flags);
+int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
+ bool *lease_broken);
/* llite/dcache.c */
@@ -801,7 +851,7 @@ void ll_kill_super(struct super_block *sb);
struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
void ll_clear_inode(struct inode *inode);
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr);
+int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import);
int ll_setattr(struct dentry *de, struct iattr *attr);
int ll_statfs(struct dentry *de, struct kstatfs *sfs);
int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
@@ -1578,5 +1628,9 @@ enum {
int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
int ll_layout_refresh(struct inode *inode, __u32 *gen);
+int ll_layout_restore(struct inode *inode);
+
+int ll_xattr_init(void);
+void ll_xattr_fini(void);
#endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index fd584ff7e2df..6cfdb9e4b74b 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -56,6 +56,7 @@
#include "llite_internal.h"
struct kmem_cache *ll_file_data_slab;
+struct proc_dir_entry *proc_lustre_fs_root;
LIST_HEAD(ll_super_blocks);
DEFINE_SPINLOCK(ll_sb_lock);
@@ -209,7 +210,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH|
OBD_CONNECT_EINPROGRESS |
OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
+ OBD_CONNECT_LAYOUTLOCK |
+ OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE;
if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
data->ocd_connect_flags |= OBD_CONNECT_SOM;
@@ -383,6 +385,17 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
}
+ if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
+ if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
+ LCONSOLE_INFO(
+ "%s: disabling xattr cache due to unknown maximum xattr size.\n",
+ dt);
+ } else {
+ sbi->ll_flags |= LL_SBI_XATTR_CACHE;
+ sbi->ll_xattr_cache_enabled = 1;
+ }
+ }
+
obd = class_name2obd(dt);
if (!obd) {
CERROR("DT %s: not setup or attached\n", dt);
@@ -922,6 +935,9 @@ void ll_lli_init(struct ll_inode_info *lli)
lli->lli_layout_gen = LL_LAYOUT_GEN_NONE;
lli->lli_clob = NULL;
+ init_rwsem(&lli->lli_xattrs_list_rwsem);
+ mutex_init(&lli->lli_xattrs_enq_lock);
+
LASSERT(lli->lli_vfs_inode.i_mode != 0);
if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
mutex_init(&lli->lli_readdir_mutex);
@@ -1194,6 +1210,8 @@ void ll_clear_inode(struct inode *inode)
lli->lli_symlink_name = NULL;
}
+ ll_xattr_cache_destroy(inode);
+
if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
LASSERT(lli->lli_posix_acl == NULL);
if (lli->lli_remote_perms) {
@@ -1346,19 +1364,24 @@ static int ll_setattr_ost(struct inode *inode, struct iattr *attr)
* to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
* I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
* at the same time.
+ *
+ * In case of HSMimport, we only set attr on MDS.
*/
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr)
+int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
{
struct inode *inode = dentry->d_inode;
struct ll_inode_info *lli = ll_i2info(inode);
struct md_op_data *op_data = NULL;
struct md_open_data *mod = NULL;
+ bool file_is_released = false;
int rc = 0, rc1 = 0;
- CDEBUG(D_VFSTRACE, "%s: setattr inode %p/fid:"DFID" from %llu to %llu, "
- "valid %x\n", ll_get_fsname(inode->i_sb, NULL, 0), inode,
+ CDEBUG(D_VFSTRACE,
+ "%s: setattr inode %p/fid:"DFID
+ " from %llu to %llu, valid %x, hsm_import %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), inode,
PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size,
- attr->ia_valid);
+ attr->ia_valid, hsm_import);
if (attr->ia_valid & ATTR_SIZE) {
/* Check new size against VFS/VM file size limit and rlimit */
@@ -1436,10 +1459,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr)
(attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
op_data->op_flags = MF_EPOCH_OPEN;
+ /* truncate on a released file must failed with -ENODATA,
+ * so size must not be set on MDS for released file
+ * but other attributes must be set
+ */
+ if (S_ISREG(inode->i_mode)) {
+ struct lov_stripe_md *lsm;
+ __u32 gen;
+
+ ll_layout_refresh(inode, &gen);
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
+ file_is_released = true;
+ ccc_inode_lsm_put(inode, lsm);
+ }
+
+ /* if not in HSM import mode, clear size attr for released file
+ * we clear the attribute send to MDT in op_data, not the original
+ * received from caller in attr which is used later to
+ * decide return code */
+ if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import)
+ op_data->op_attr.ia_valid &= ~ATTR_SIZE;
+
rc = ll_md_setattr(dentry, op_data, &mod);
if (rc)
GOTO(out, rc);
+ /* truncate failed (only when non HSM import), others succeed */
+ if (file_is_released) {
+ if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
+ GOTO(out, rc = -ENODATA);
+ else
+ GOTO(out, rc = 0);
+ }
+
/* RPC to MDT is sent, cancel data modification flag */
if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
spin_lock(&lli->lli_lock);
@@ -1473,7 +1526,7 @@ out:
if (!S_ISDIR(inode->i_mode)) {
up_write(&lli->lli_trunc_sem);
mutex_lock(&inode->i_mutex);
- if (attr->ia_valid & ATTR_SIZE)
+ if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
inode_dio_wait(inode);
}
@@ -1508,7 +1561,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
!(attr->ia_valid & ATTR_KILL_SGID))
attr->ia_valid |= ATTR_KILL_SGID;
- return ll_setattr_raw(de, attr);
+ return ll_setattr_raw(de, attr, false);
}
int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
@@ -1721,7 +1774,9 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
* lock on the client and set LLIF_MDS_SIZE_LOCK holding
* it. */
mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE,
- &lockh, LDLM_FL_CBPENDING);
+ &lockh, LDLM_FL_CBPENDING,
+ LCK_CR | LCK_CW |
+ LCK_PR | LCK_PW);
if (mode) {
if (lli->lli_flags & (LLIF_DONE_WRITING |
LLIF_EPOCH_PENDING |
@@ -1761,6 +1816,11 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
LASSERT(md->oss_capa);
ll_add_capa(inode, md->oss_capa);
}
+
+ if (body->valid & OBD_MD_TSTATE) {
+ if (body->t_state & MS_RESTORE)
+ lli->lli_flags |= LLIF_FILE_RESTORING;
+ }
}
void ll_read_inode2(struct inode *inode, void *opaque)
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index e2421ea61352..5338e8d4c50f 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -856,7 +856,8 @@ static void lloop_exit(void)
module_init(lloop_init);
module_exit(lloop_exit);
-CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device");
+module_param(max_loop, int, 0444);
+MODULE_PARM_DESC(max_loop, "maximum of lloop_device");
MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
MODULE_DESCRIPTION("Lustre virtual block device");
MODULE_LICENSE("GPL");
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 4bf09c4a0c9d..a9a104a6a4ee 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -42,9 +42,6 @@
#include "llite_internal.h"
-struct proc_dir_entry *proc_lustre_fs_root;
-
-#ifdef LPROCFS
/* /proc/lustre/llite mount point registration */
extern struct file_operations vvp_dump_pgcache_file_ops;
struct file_operations ll_rw_extents_stats_fops;
@@ -723,6 +720,41 @@ static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
}
LPROC_SEQ_FOPS_RO(ll_sbi_flags);
+static int ll_xattr_cache_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int rc;
+
+ rc = seq_printf(m, "%u\n", sbi->ll_xattr_cache_enabled);
+
+ return rc;
+}
+
+static ssize_t ll_xattr_cache_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct super_block *sb = seq->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val != 0 && val != 1)
+ return -ERANGE;
+
+ if (val == 1 && !(sbi->ll_flags & LL_SBI_XATTR_CACHE))
+ return -ENOTSUPP;
+
+ sbi->ll_xattr_cache_enabled = val;
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_xattr_cache);
+
static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
{ "uuid", &ll_sb_uuid_fops, 0, 0 },
//{ "mntpt_path", ll_rd_path, 0, 0 },
@@ -751,6 +783,7 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
{ "lazystatfs", &ll_lazystatfs_fops, 0 },
{ "max_easize", &ll_maxea_size_fops, 0, 0 },
{ "sbi_flags", &ll_sbi_flags_fops, 0, 0 },
+ { "xattr_cache", &ll_xattr_cache_fops, 0, 0 },
{ 0 }
};
@@ -802,6 +835,7 @@ struct llite_file_opcode {
{ LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" },
{ LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" },
{ LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" },
+ { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REGS, "getxattr_hits" },
{ LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" },
{ LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" },
{ LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" },
@@ -1367,4 +1401,3 @@ void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
lvars->module_vars = NULL;
lvars->obd_vars = lprocfs_llite_obd_vars;
}
-#endif /* LPROCFS */
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 90bbdae824ac..fc8d264f6c9a 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -223,6 +223,10 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
break;
LASSERT(lock->l_flags & LDLM_FL_CANCELING);
+
+ if (bits & MDS_INODELOCK_XATTR)
+ ll_xattr_cache_destroy(inode);
+
/* For OPEN locks we differentiate between lock modes
* LCK_CR, LCK_CW, LCK_PR - bug 22891 */
if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
@@ -233,12 +237,9 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
ll_have_md_lock(inode, &bits, mode);
fid = ll_inode2fid(inode);
- if (lock->l_resource->lr_name.name[0] != fid_seq(fid) ||
- lock->l_resource->lr_name.name[1] != fid_oid(fid) ||
- lock->l_resource->lr_name.name[2] != fid_ver(fid)) {
+ if (!fid_res_name_eq(fid, &lock->l_resource->lr_name))
LDLM_ERROR(lock, "data mismatch with object "
DFID" (%p)", PFID(fid), inode);
- }
if (bits & MDS_INODELOCK_OPEN) {
int flags = 0;
@@ -526,8 +527,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
icbd.icbd_childp = &dentry;
icbd.icbd_parent = parent;
- if (it->it_op & IT_CREAT ||
- (it->it_op & IT_OPEN && it->it_create_mode & O_CREAT))
+ if (it->it_op & IT_CREAT)
opc = LUSTRE_OPC_CREATE;
else
opc = LUSTRE_OPC_ANY;
@@ -626,7 +626,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
return -ENOMEM;
it->it_op = IT_OPEN;
- if (mode) {
+ if (open_flags & O_CREAT) {
it->it_op |= IT_CREAT;
lookup_flags |= LOOKUP_CREATE;
}
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 0beaf4e76b4b..e21e1c760a8e 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -187,11 +187,15 @@ static int __init init_lustre_lite(void)
if (rc == 0)
rc = vvp_global_init();
+ if (rc == 0)
+ rc = ll_xattr_init();
+
return rc;
}
static void __exit exit_lustre_lite(void)
{
+ ll_xattr_fini();
vvp_global_fini();
del_timer(&ll_capa_timer);
ll_capa_thread_stop();
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 3ff664ce7503..93cbfbb7e7f7 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -121,8 +121,38 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
CLOBINVRNT(env, obj, ccc_object_invariant(obj));
- CDEBUG(D_VFSTRACE, "ignore/verify layout %d/%d, layout version %d.\n",
- io->ci_ignore_layout, io->ci_verify_layout, cio->cui_layout_gen);
+ CDEBUG(D_VFSTRACE, DFID
+ " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ io->ci_ignore_layout, io->ci_verify_layout,
+ cio->cui_layout_gen, io->ci_restore_needed);
+
+ if (io->ci_restore_needed == 1) {
+ int rc;
+
+ /* file was detected release, we need to restore it
+ * before finishing the io
+ */
+ rc = ll_layout_restore(ccc_object_inode(obj));
+ /* if restore registration failed, no restart,
+ * we will return -ENODATA */
+ /* The layout will change after restore, so we need to
+ * block on layout lock hold by the MDT
+ * as MDT will not send new layout in lvb (see LU-3124)
+ * we have to explicitly fetch it, all this will be done
+ * by ll_layout_refresh()
+ */
+ if (rc == 0) {
+ io->ci_restore_needed = 0;
+ io->ci_need_restart = 1;
+ io->ci_verify_layout = 1;
+ } else {
+ io->ci_restore_needed = 1;
+ io->ci_need_restart = 0;
+ io->ci_verify_layout = 0;
+ io->ci_result = rc;
+ }
+ }
if (!io->ci_ignore_layout && io->ci_verify_layout) {
__u32 gen = 0;
@@ -130,9 +160,17 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
/* check layout version */
ll_layout_refresh(ccc_object_inode(obj), &gen);
io->ci_need_restart = cio->cui_layout_gen != gen;
- if (io->ci_need_restart)
- CDEBUG(D_VFSTRACE, "layout changed from %d to %d.\n",
- cio->cui_layout_gen, gen);
+ if (io->ci_need_restart) {
+ CDEBUG(D_VFSTRACE,
+ DFID" layout changed from %d to %d.\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ cio->cui_layout_gen, gen);
+ /* today successful restore is the only possible
+ * case */
+ /* restore was done, clear restoring state */
+ ll_i2info(ccc_object_inode(obj))->lli_flags &=
+ ~LLIF_FILE_RESTORING;
+ }
}
}
@@ -590,8 +628,11 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf);
if (vmf->page) {
- LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
- vmf->virtual_address);
+ CDEBUG(D_PAGE,
+ "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n",
+ vmf->page, vmf->page->mapping, vmf->page->index,
+ (long)vmf->page->flags, page_count(vmf->page),
+ page_private(vmf->page), vmf->virtual_address);
if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) {
lock_page(vmf->page);
cfio->fault.ft_flags &= VM_FAULT_LOCKED;
@@ -1111,6 +1152,12 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+ CDEBUG(D_VFSTRACE, DFID
+ " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ io->ci_ignore_layout, io->ci_verify_layout,
+ cio->cui_layout_gen, io->ci_restore_needed);
+
CL_IO_SLICE_CLEAN(cio, cui_cl);
cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops);
vio->cui_ra_window_set = 0;
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 33173fce478f..25973dedd9a2 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -138,7 +138,7 @@ int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
lli->lli_layout_gen,
conf->u.coc_md->lsm->lsm_layout_gen);
- lli->lli_has_smd = true;
+ lli->lli_has_smd = lsm_has_objects(conf->u.coc_md->lsm);
lli->lli_layout_gen = conf->u.coc_md->lsm->lsm_layout_gen;
} else {
CDEBUG(D_VFSTRACE, "layout lock destroyed: %u.\n",
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index bcf86bac30a9..3a7d03c12dd9 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -109,12 +109,12 @@ int ll_setxattr_common(struct inode *inode, const char *name,
int flags, __u64 valid)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req = NULL;
int xattr_type, rc;
struct obd_capa *oc;
+ struct rmtacl_ctl_entry *rce = NULL;
#ifdef CONFIG_FS_POSIX_ACL
posix_acl_xattr_header *new_value = NULL;
- struct rmtacl_ctl_entry *rce = NULL;
ext_acl_xattr_header *acl = NULL;
#endif
const char *pv = value;
@@ -183,11 +183,17 @@ int ll_setxattr_common(struct inode *inode, const char *name,
valid |= rce_ops2valid(rce->rce_ops);
}
#endif
- oc = ll_mdscapa_get(inode);
- rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
- valid, name, pv, size, 0, flags, ll_i2suppgid(inode),
- &req);
- capa_put(oc);
+ if (sbi->ll_xattr_cache_enabled &&
+ (rce == NULL || rce->rce_ops == RMT_LSETFACL)) {
+ rc = ll_xattr_cache_update(inode, name, pv, size, valid, flags);
+ } else {
+ oc = ll_mdscapa_get(inode);
+ rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+ valid, name, pv, size, 0, flags,
+ ll_i2suppgid(inode), &req);
+ capa_put(oc);
+ }
+
#ifdef CONFIG_FS_POSIX_ACL
if (new_value != NULL)
lustre_posix_acl_xattr_free(new_value, size);
@@ -352,48 +358,54 @@ int ll_getxattr_common(struct inode *inode, const char *name,
#endif
do_getxattr:
- oc = ll_mdscapa_get(inode);
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
- valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
- name, NULL, 0, size, 0, &req);
- capa_put(oc);
- if (rc) {
- if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
- LCONSOLE_INFO("Disabling user_xattr feature because "
- "it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
- return rc;
- }
+ if (sbi->ll_xattr_cache_enabled && (rce == NULL ||
+ rce->rce_ops == RMT_LGETFACL ||
+ rce->rce_ops == RMT_LSETFACL)) {
+ rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
+ if (rc < 0)
+ GOTO(out_xattr, rc);
+ } else {
+ oc = ll_mdscapa_get(inode);
+ rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+ valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
+ name, NULL, 0, size, 0, &req);
+ capa_put(oc);
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- LASSERT(body);
+ if (rc < 0)
+ GOTO(out_xattr, rc);
- /* only detect the xattr size */
- if (size == 0)
- GOTO(out, rc = body->eadatasize);
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(body);
- if (size < body->eadatasize) {
- CERROR("server bug: replied size %u > %u\n",
- body->eadatasize, (int)size);
- GOTO(out, rc = -ERANGE);
- }
+ /* only detect the xattr size */
+ if (size == 0)
+ GOTO(out, rc = body->eadatasize);
+
+ if (size < body->eadatasize) {
+ CERROR("server bug: replied size %u > %u\n",
+ body->eadatasize, (int)size);
+ GOTO(out, rc = -ERANGE);
+ }
- if (body->eadatasize == 0)
- GOTO(out, rc = -ENODATA);
+ if (body->eadatasize == 0)
+ GOTO(out, rc = -ENODATA);
- /* do not need swab xattr data */
- xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->eadatasize);
- if (!xdata)
- GOTO(out, rc = -EFAULT);
+ /* do not need swab xattr data */
+ xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
+ body->eadatasize);
+ if (!xdata)
+ GOTO(out, rc = -EFAULT);
+
+ memcpy(buffer, xdata, body->eadatasize);
+ rc = body->eadatasize;
+ }
#ifdef CONFIG_FS_POSIX_ACL
- if (body->eadatasize >= 0 && rce && rce->rce_ops == RMT_LSETFACL) {
+ if (rce && rce->rce_ops == RMT_LSETFACL) {
ext_acl_xattr_header *acl;
- acl = lustre_posix_acl_xattr_2ext((posix_acl_xattr_header *)xdata,
- body->eadatasize);
+ acl = lustre_posix_acl_xattr_2ext(
+ (posix_acl_xattr_header *)buffer, rc);
if (IS_ERR(acl))
GOTO(out, rc = PTR_ERR(acl));
@@ -406,12 +418,12 @@ do_getxattr:
}
#endif
- if (body->eadatasize == 0) {
- rc = -ENODATA;
- } else {
- LASSERT(buffer);
- memcpy(buffer, xdata, body->eadatasize);
- rc = body->eadatasize;
+out_xattr:
+ if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
+ LCONSOLE_INFO(
+ "%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), rc);
+ sbi->ll_flags &= ~LL_SBI_USER_XATTR;
}
out:
ptlrpc_req_finished(req);
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
new file mode 100644
index 000000000000..3e3be1f13502
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -0,0 +1,617 @@
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com>
+ *
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_ver.h>
+#include "llite_internal.h"
+
+/* If we ever have hundreds of extended attributes, we might want to consider
+ * using a hash or a tree structure instead of list for faster lookups.
+ */
+struct ll_xattr_entry {
+ struct list_head xe_list; /* protected with
+ * lli_xattrs_list_rwsem */
+ char *xe_name; /* xattr name, \0-terminated */
+ char *xe_value; /* xattr value */
+ unsigned xe_namelen; /* strlen(xe_name) + 1 */
+ unsigned xe_vallen; /* xattr value length */
+};
+
+static struct kmem_cache *xattr_kmem;
+static struct lu_kmem_descr xattr_caches[] = {
+ {
+ .ckd_cache = &xattr_kmem,
+ .ckd_name = "xattr_kmem",
+ .ckd_size = sizeof(struct ll_xattr_entry)
+ },
+ {
+ .ckd_cache = NULL
+ }
+};
+
+int ll_xattr_init(void)
+{
+ return lu_kmem_init(xattr_caches);
+}
+
+void ll_xattr_fini(void)
+{
+ lu_kmem_fini(xattr_caches);
+}
+
+/**
+ * Initializes xattr cache for an inode.
+ *
+ * This initializes the xattr list and marks cache presence.
+ */
+static void ll_xattr_cache_init(struct ll_inode_info *lli)
+{
+
+
+ LASSERT(lli != NULL);
+
+ INIT_LIST_HEAD(&lli->lli_xattrs);
+ lli->lli_flags |= LLIF_XATTR_CACHE;
+}
+
+/**
+ * This looks for a specific extended attribute.
+ *
+ * Find in @cache and return @xattr_name attribute in @xattr,
+ * for the NULL @xattr_name return the first cached @xattr.
+ *
+ * \retval 0 success
+ * \retval -ENODATA if not found
+ */
+static int ll_xattr_cache_find(struct list_head *cache,
+ const char *xattr_name,
+ struct ll_xattr_entry **xattr)
+{
+ struct ll_xattr_entry *entry;
+
+
+
+ list_for_each_entry(entry, cache, xe_list) {
+ /* xattr_name == NULL means look for any entry */
+ if (xattr_name == NULL ||
+ strcmp(xattr_name, entry->xe_name) == 0) {
+ *xattr = entry;
+ CDEBUG(D_CACHE, "find: [%s]=%.*s\n",
+ entry->xe_name, entry->xe_vallen,
+ entry->xe_value);
+ return 0;
+ }
+ }
+
+ return -ENODATA;
+}
+
+/**
+ * This adds or updates an xattr.
+ *
+ * Add @xattr_name attr with @xattr_val value and @xattr_val_len length,
+ * if the attribute already exists, then update its value.
+ *
+ * \retval 0 success
+ * \retval -ENOMEM if no memory could be allocated for the cached attr
+ */
+static int ll_xattr_cache_add(struct list_head *cache,
+ const char *xattr_name,
+ const char *xattr_val,
+ unsigned xattr_val_len)
+{
+ struct ll_xattr_entry *xattr;
+
+
+
+ if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
+ /* Found a cached EA, update it */
+
+ if (xattr_val_len != xattr->xe_vallen) {
+ char *val;
+ OBD_ALLOC(val, xattr_val_len);
+ if (val == NULL) {
+ CDEBUG(D_CACHE,
+ "failed to allocate %u bytes for xattr %s update\n",
+ xattr_val_len, xattr_name);
+ return -ENOMEM;
+ }
+ OBD_FREE(xattr->xe_value, xattr->xe_vallen);
+ xattr->xe_value = val;
+ xattr->xe_vallen = xattr_val_len;
+ }
+ memcpy(xattr->xe_value, xattr_val, xattr_val_len);
+
+ CDEBUG(D_CACHE, "update: [%s]=%.*s\n", xattr_name,
+ xattr_val_len, xattr_val);
+
+ return 0;
+ }
+
+ OBD_SLAB_ALLOC_PTR_GFP(xattr, xattr_kmem, __GFP_IO);
+ if (xattr == NULL) {
+ CDEBUG(D_CACHE, "failed to allocate xattr\n");
+ return -ENOMEM;
+ }
+
+ xattr->xe_namelen = strlen(xattr_name) + 1;
+
+ OBD_ALLOC(xattr->xe_name, xattr->xe_namelen);
+ if (!xattr->xe_name) {
+ CDEBUG(D_CACHE, "failed to alloc xattr name %u\n",
+ xattr->xe_namelen);
+ goto err_name;
+ }
+ OBD_ALLOC(xattr->xe_value, xattr_val_len);
+ if (!xattr->xe_value) {
+ CDEBUG(D_CACHE, "failed to alloc xattr value %d\n",
+ xattr_val_len);
+ goto err_value;
+ }
+
+ memcpy(xattr->xe_name, xattr_name, xattr->xe_namelen);
+ memcpy(xattr->xe_value, xattr_val, xattr_val_len);
+ xattr->xe_vallen = xattr_val_len;
+ list_add(&xattr->xe_list, cache);
+
+ CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name,
+ xattr_val_len, xattr_val);
+
+ return 0;
+err_value:
+ OBD_FREE(xattr->xe_name, xattr->xe_namelen);
+err_name:
+ OBD_SLAB_FREE_PTR(xattr, xattr_kmem);
+
+ return -ENOMEM;
+}
+
+/**
+ * This removes an extended attribute from cache.
+ *
+ * Remove @xattr_name attribute from @cache.
+ *
+ * \retval 0 success
+ * \retval -ENODATA if @xattr_name is not cached
+ */
+static int ll_xattr_cache_del(struct list_head *cache,
+ const char *xattr_name)
+{
+ struct ll_xattr_entry *xattr;
+
+
+
+ CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name);
+
+ if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
+ list_del(&xattr->xe_list);
+ OBD_FREE(xattr->xe_name, xattr->xe_namelen);
+ OBD_FREE(xattr->xe_value, xattr->xe_vallen);
+ OBD_SLAB_FREE_PTR(xattr, xattr_kmem);
+
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+/**
+ * This iterates cached extended attributes.
+ *
+ * Walk over cached attributes in @cache and
+ * fill in @xld_buffer or only calculate buffer
+ * size if @xld_buffer is NULL.
+ *
+ * \retval >= 0 buffer list size
+ * \retval -ENODATA if the list cannot fit @xld_size buffer
+ */
+static int ll_xattr_cache_list(struct list_head *cache,
+ char *xld_buffer,
+ int xld_size)
+{
+ struct ll_xattr_entry *xattr, *tmp;
+ int xld_tail = 0;
+
+
+
+ list_for_each_entry_safe(xattr, tmp, cache, xe_list) {
+ CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n",
+ xld_buffer, xld_tail, xattr->xe_name);
+
+ if (xld_buffer) {
+ xld_size -= xattr->xe_namelen;
+ if (xld_size < 0)
+ break;
+ memcpy(&xld_buffer[xld_tail],
+ xattr->xe_name, xattr->xe_namelen);
+ }
+ xld_tail += xattr->xe_namelen;
+ }
+
+ if (xld_size < 0)
+ return -ERANGE;
+
+ return xld_tail;
+}
+
+/**
+ * Check if the xattr cache is initialized (filled).
+ *
+ * \retval 0 @cache is not initialized
+ * \retval 1 @cache is initialized
+ */
+int ll_xattr_cache_valid(struct ll_inode_info *lli)
+{
+ return !!(lli->lli_flags & LLIF_XATTR_CACHE);
+}
+
+/**
+ * This finalizes the xattr cache.
+ *
+ * Free all xattr memory. @lli is the inode info pointer.
+ *
+ * \retval 0 no error occured
+ */
+static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
+{
+
+
+ if (!ll_xattr_cache_valid(lli))
+ return 0;
+
+ while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0)
+ ; /* empty loop */
+ lli->lli_flags &= ~LLIF_XATTR_CACHE;
+
+ return 0;
+}
+
+int ll_xattr_cache_destroy(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc;
+
+
+
+ down_write(&lli->lli_xattrs_list_rwsem);
+ rc = ll_xattr_cache_destroy_locked(lli);
+ up_write(&lli->lli_xattrs_list_rwsem);
+
+ return rc;
+}
+
+/**
+ * Match or enqueue a PR or PW LDLM lock.
+ *
+ * Find or request an LDLM lock with xattr data.
+ * Since LDLM does not provide API for atomic match_or_enqueue,
+ * the function handles it with a separate enq lock.
+ * If successful, the function exits with the list lock held.
+ *
+ * \retval 0 no error occured
+ * \retval -ENOMEM not enough memory
+ */
+static int ll_xattr_find_get_lock(struct inode *inode,
+ struct lookup_intent *oit,
+ struct ptlrpc_request **req)
+{
+ ldlm_mode_t mode;
+ struct lustre_handle lockh = { 0 };
+ struct md_op_data *op_data;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS,
+ .ei_mode = it_to_lock_mode(oit),
+ .ei_cb_bl = ll_md_blocking_ast,
+ .ei_cb_cp = ldlm_completion_ast };
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct obd_export *exp = sbi->ll_md_exp;
+ int rc;
+
+
+
+ mutex_lock(&lli->lli_xattrs_enq_lock);
+ /* Try matching first. */
+ mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0,
+ oit->it_op == IT_SETXATTR ? LCK_PW :
+ (LCK_PR | LCK_PW));
+ if (mode != 0) {
+ /* fake oit in mdc_revalidate_lock() manner */
+ oit->d.lustre.it_lock_handle = lockh.cookie;
+ oit->d.lustre.it_lock_mode = mode;
+ goto out;
+ }
+
+ /* Enqueue if the lock isn't cached locally. */
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data)) {
+ mutex_unlock(&lli->lli_xattrs_enq_lock);
+ return PTR_ERR(op_data);
+ }
+
+ op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS |
+ OBD_MD_FLXATTRLOCKED;
+#ifdef CONFIG_FS_POSIX_ACL
+ /* If working with ACLs, we would like to cache local ACLs */
+ if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+ op_data->op_valid |= OBD_MD_FLRMTLGETFACL;
+#endif
+
+ rc = md_enqueue(exp, &einfo, oit, op_data, &lockh, NULL, 0, NULL, 0);
+ ll_finish_md_op_data(op_data);
+
+ if (rc < 0) {
+ CDEBUG(D_CACHE,
+ "md_intent_lock failed with %d for fid "DFID"\n",
+ rc, PFID(ll_inode2fid(inode)));
+ mutex_unlock(&lli->lli_xattrs_enq_lock);
+ return rc;
+ }
+
+ *req = (struct ptlrpc_request *)oit->d.lustre.it_data;
+out:
+ down_write(&lli->lli_xattrs_list_rwsem);
+ mutex_unlock(&lli->lli_xattrs_enq_lock);
+
+ return 0;
+}
+
+/**
+ * Refill the xattr cache.
+ *
+ * Fetch and cache the whole of xattrs for @inode, acquiring
+ * a read or a write xattr lock depending on operation in @oit.
+ * Intent is dropped on exit unless the operation is setxattr.
+ *
+ * \retval 0 no error occured
+ * \retval -EPROTO network protocol error
+ * \retval -ENOMEM not enough memory for the cache
+ */
+static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *req = NULL;
+ const char *xdata, *xval, *xtail, *xvtail;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct mdt_body *body;
+ __u32 *xsizes;
+ int rc = 0, i;
+
+
+
+ rc = ll_xattr_find_get_lock(inode, oit, &req);
+ if (rc)
+ GOTO(out_no_unlock, rc);
+
+ /* Do we have the data at this point? */
+ if (ll_xattr_cache_valid(lli)) {
+ ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1);
+ GOTO(out_maybe_drop, rc = 0);
+ }
+
+ /* Matched but no cache? Cancelled on error by a parallel refill. */
+ if (unlikely(req == NULL)) {
+ CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n");
+ GOTO(out_maybe_drop, rc = -EIO);
+ }
+
+ if (oit->d.lustre.it_status < 0) {
+ CDEBUG(D_CACHE, "getxattr intent returned %d for fid "DFID"\n",
+ oit->d.lustre.it_status, PFID(ll_inode2fid(inode)));
+ GOTO(out_destroy, rc = oit->d.lustre.it_status);
+ }
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL) {
+ CERROR("no MDT BODY in the refill xattr reply\n");
+ GOTO(out_destroy, rc = -EPROTO);
+ }
+ /* do not need swab xattr data */
+ xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
+ body->eadatasize);
+ xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS,
+ body->aclsize);
+ xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS,
+ body->max_mdsize * sizeof(__u32));
+ if (xdata == NULL || xval == NULL || xsizes == NULL) {
+ CERROR("wrong setxattr reply\n");
+ GOTO(out_destroy, rc = -EPROTO);
+ }
+
+ xtail = xdata + body->eadatasize;
+ xvtail = xval + body->aclsize;
+
+ CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail);
+
+ ll_xattr_cache_init(lli);
+
+ for (i = 0; i < body->max_mdsize; i++) {
+ CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval);
+ /* Perform consistency checks: attr names and vals in pill */
+ if (memchr(xdata, 0, xtail - xdata) == NULL) {
+ CERROR("xattr protocol violation (names are broken)\n");
+ rc = -EPROTO;
+ } else if (xval + *xsizes > xvtail) {
+ CERROR("xattr protocol violation (vals are broken)\n");
+ rc = -EPROTO;
+ } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) {
+ rc = -ENOMEM;
+ } else {
+ rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval,
+ *xsizes);
+ }
+ if (rc < 0) {
+ ll_xattr_cache_destroy_locked(lli);
+ GOTO(out_destroy, rc);
+ }
+ xdata += strlen(xdata) + 1;
+ xval += *xsizes;
+ xsizes++;
+ }
+
+ if (xdata != xtail || xval != xvtail)
+ CERROR("a hole in xattr data\n");
+
+ ll_set_lock_data(sbi->ll_md_exp, inode, oit, NULL);
+
+ GOTO(out_maybe_drop, rc);
+out_maybe_drop:
+ /* drop lock on error or getxattr */
+ if (rc != 0 || oit->it_op != IT_SETXATTR)
+ ll_intent_drop_lock(oit);
+
+ if (rc != 0)
+ up_write(&lli->lli_xattrs_list_rwsem);
+out_no_unlock:
+ ptlrpc_req_finished(req);
+
+ return rc;
+
+out_destroy:
+ up_write(&lli->lli_xattrs_list_rwsem);
+
+ ldlm_lock_decref_and_cancel((struct lustre_handle *)
+ &oit->d.lustre.it_lock_handle,
+ oit->d.lustre.it_lock_mode);
+
+ goto out_no_unlock;
+}
+
+/**
+ * Get an xattr value or list xattrs using the write-through cache.
+ *
+ * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or
+ * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode.
+ * The resulting value/list is stored in @buffer if the former
+ * is not larger than @size.
+ *
+ * \retval 0 no error occured
+ * \retval -EPROTO network protocol error
+ * \retval -ENOMEM not enough memory for the cache
+ * \retval -ERANGE the buffer is not large enough
+ * \retval -ENODATA no such attr or the list is empty
+ */
+int ll_xattr_cache_get(struct inode *inode,
+ const char *name,
+ char *buffer,
+ size_t size,
+ __u64 valid)
+{
+ struct lookup_intent oit = { .it_op = IT_GETXATTR };
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc = 0;
+
+
+
+ LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS));
+
+ down_read(&lli->lli_xattrs_list_rwsem);
+ if (!ll_xattr_cache_valid(lli)) {
+ up_read(&lli->lli_xattrs_list_rwsem);
+ rc = ll_xattr_cache_refill(inode, &oit);
+ if (rc)
+ return rc;
+ downgrade_write(&lli->lli_xattrs_list_rwsem);
+ } else {
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1);
+ }
+
+ if (valid & OBD_MD_FLXATTR) {
+ struct ll_xattr_entry *xattr;
+
+ rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr);
+ if (rc == 0) {
+ rc = xattr->xe_vallen;
+ /* zero size means we are only requested size in rc */
+ if (size != 0) {
+ if (size >= xattr->xe_vallen)
+ memcpy(buffer, xattr->xe_value,
+ xattr->xe_vallen);
+ else
+ rc = -ERANGE;
+ }
+ }
+ } else if (valid & OBD_MD_FLXATTRLS) {
+ rc = ll_xattr_cache_list(&lli->lli_xattrs,
+ size ? buffer : NULL, size);
+ }
+
+ GOTO(out, rc);
+out:
+ up_read(&lli->lli_xattrs_list_rwsem);
+
+ return rc;
+}
+
+
+/**
+ * Set/update an xattr value or remove xattr using the write-through cache.
+ *
+ * Set/update the xattr value (if @valid has OBD_MD_FLXATTR) of @name to @newval
+ * or
+ * remove the xattr @name (@valid has OBD_MD_FLXATTRRM set) from @inode.
+ * @flags is either XATTR_CREATE or XATTR_REPLACE as defined by setxattr(2)
+ *
+ * \retval 0 no error occured
+ * \retval -EPROTO network protocol error
+ * \retval -ENOMEM not enough memory for the cache
+ * \retval -ERANGE the buffer is not large enough
+ * \retval -ENODATA no such attr (in the removal case)
+ */
+int ll_xattr_cache_update(struct inode *inode,
+ const char *name,
+ const char *newval,
+ size_t size,
+ __u64 valid,
+ int flags)
+{
+ struct lookup_intent oit = { .it_op = IT_SETXATTR };
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *req = NULL;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *oc;
+ int rc;
+
+
+
+ LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRRM));
+
+ rc = ll_xattr_cache_refill(inode, &oit);
+ if (rc)
+ return rc;
+
+ oc = ll_mdscapa_get(inode);
+ rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
+ valid | OBD_MD_FLXATTRLOCKED, name, newval,
+ size, 0, flags, ll_i2suppgid(inode), &req);
+ capa_put(oc);
+
+ if (rc) {
+ ll_intent_drop_lock(&oit);
+ GOTO(out, rc);
+ }
+
+ if (valid & OBD_MD_FLXATTR)
+ rc = ll_xattr_cache_add(&lli->lli_xattrs, name, newval, size);
+ else if (valid & OBD_MD_FLXATTRRM)
+ rc = ll_xattr_cache_del(&lli->lli_xattrs, name);
+
+ ll_intent_drop_lock(&oit);
+ GOTO(out, rc);
+out:
+ up_write(&lli->lli_xattrs_list_rwsem);
+ ptlrpc_req_finished(req);
+
+ return rc;
+}