From 00cd8dd3bf95f2cc8435b4cac01d9995635c6d0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:13:09 -0400 Subject: stop passing nameidata to ->lookup() Just the flags; only NFS cares even about that, but there are legitimate uses for such argument. And getting rid of that completely would require splitting ->lookup() into a couple of methods (at least), so let's leave that alone for now... Signed-off-by: Al Viro --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b303dfc7dce0..0cd1314acdaf 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); -static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); static const struct inode_operations cgroup_dir_inode_operations; @@ -2570,7 +2570,7 @@ static const struct inode_operations cgroup_dir_inode_operations = { .rename = cgroup_rename, }; -static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); -- cgit v1.2.3 From 79714f72d3b964611997de512cb29198c9f2dbbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Jun 2012 03:01:42 +0400 Subject: get rid of kern_path_parent() all callers want the same thing, actually - a kinda-sorta analog of kern_path_create(). I.e. they want parent vfsmount/dentry (with ->i_mutex held, to make sure the child dentry is still their child) + the child dentry. Signed-off-by Al Viro --- kernel/audit_watch.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e683869365d9..3823281401b5 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent) /* Get path information necessary for adding watches. */ static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct nameidata nd; - struct dentry *d; - int err; - - err = kern_path_parent(watch->path, &nd); - if (err) - return err; - - if (nd.last_type != LAST_NORM) { - path_put(&nd.path); - return -EINVAL; - } - - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (IS_ERR(d)) { - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + struct dentry *d = kern_path_locked(watch->path, parent); + if (IS_ERR(d)) return PTR_ERR(d); - } + mutex_unlock(&parent->dentry->d_inode->i_mutex); if (d->d_inode) { /* update watch filter fields */ watch->dev = d->d_inode->i_sb->s_dev; watch->ino = d->d_inode->i_ino; } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - - *parent = nd.path; dput(d); return 0; } -- cgit v1.2.3 From be34d1a3bc4b6f357a49acb55ae870c81337e4f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:18 +0100 Subject: VFS: Make clone_mnt()/copy_tree()/collect_mounts() return errors copy_tree() can theoretically fail in a case other than ENOMEM, but always returns NULL which is interpreted by callers as -ENOMEM. Change it to return an explicit error. Also change clone_mnt() for consistency and because union mounts will add new error cases. Thanks to Andreas Gruenbacher for a bug fix. [AV: folded braino fix by Dan Carpenter] Original-author: Valerie Aurora Signed-off-by: David Howells Cc: Valerie Aurora Cc: Andreas Gruenbacher Signed-off-by: Al Viro --- kernel/audit_tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 5bf0790497e7..3a5ca582ba1e 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -595,7 +595,7 @@ void audit_trim_trees(void) root_mnt = collect_mounts(&path); path_put(&path); - if (!root_mnt) + if (IS_ERR(root_mnt)) goto skip_it; spin_lock(&hash_lock); @@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule) goto Err; mnt = collect_mounts(&path); path_put(&path); - if (!mnt) { - err = -ENOMEM; + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); goto Err; } @@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new) return err; tagged = collect_mounts(&path2); path_put(&path2); - if (!tagged) - return -ENOMEM; + if (IS_ERR(tagged)) + return PTR_ERR(tagged); err = kern_path(old, 0, &path1); if (err) { -- cgit v1.2.3 From 9249e17fe094d853d1ef7475dd559a2cc7e23d42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:37 +0100 Subject: VFS: Pass mount flags to sget() Pass mount flags to sget() so that it can use them in initialising a new superblock before the set function is called. They could also be passed to the compare function. Signed-off-by: David Howells Signed-off-by: Al Viro --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0cd1314acdaf..af2b5641fc8b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1587,7 +1587,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, opts.new_root = new_root; /* Locate an existing or new sb for this hierarchy */ - sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); + sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts); if (IS_ERR(sb)) { ret = PTR_ERR(sb); cgroup_drop_root(opts.new_root); -- cgit v1.2.3 From 7266702805f9d824f92ce5c4069eca65d0f21d28 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 15 Jul 2012 14:10:52 +0400 Subject: signal: make sure we don't get stopped with pending task_work Signed-off-by: Al Viro --- kernel/signal.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 677102789cf2..be4f856d52f8 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1971,6 +1971,13 @@ static void ptrace_do_notify(int signr, int exit_code, int why) void ptrace_notify(int exit_code) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); + if (unlikely(current->task_works)) { + if (test_and_clear_ti_thread_flag(current_thread_info(), + TIF_NOTIFY_RESUME)) { + smp_mb__after_clear_bit(); + task_work_run(); + } + } spin_lock_irq(¤t->sighand->siglock); ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); @@ -2191,6 +2198,14 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct signal_struct *signal = current->signal; int signr; + if (unlikely(current->task_works)) { + if (test_and_clear_ti_thread_flag(current_thread_info(), + TIF_NOTIFY_RESUME)) { + smp_mb__after_clear_bit(); + task_work_run(); + } + } + if (unlikely(uprobe_deny_signal())) return 0; -- cgit v1.2.3 From 41f9d29f09ca0b22c3631e8a39676e74cda9bcc0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jun 2012 22:10:04 +0400 Subject: trimming task_work: kill ->data get rid of the only user of ->data; this is _not_ the final variant - in the end we'll have task_work and rcu_head identical and just use cred->rcu, at which point the separate allocation will be gone completely. Signed-off-by: Al Viro --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 8c548232ba39..d1dd54734ce7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -830,7 +830,7 @@ static int irq_thread(void *data) sched_setscheduler(current, SCHED_FIFO, ¶m); - init_task_work(&on_exit_work, irq_thread_dtor, NULL); + init_task_work(&on_exit_work, irq_thread_dtor); task_work_add(current, &on_exit_work, false); while (!irq_wait_for_interrupt(action)) { -- cgit v1.2.3 From 158e1645e07f3e9f7e4962d7a0997f5c3b98311b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 09:24:13 +0400 Subject: trim task_work: get rid of hlist layout based on Oleg's suggestion; single-linked list, task->task_works points to the last element, forward pointer from said last element points to head. I'd still prefer much more regular scheme with two pointers in task_work, but... Signed-off-by: Al Viro --- kernel/fork.c | 2 +- kernel/task_work.c | 64 ++++++++++++++++++++++++++++-------------------------- 2 files changed, 34 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e622..bebabad59202 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1415,7 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_HLIST_HEAD(&p->task_works); + p->task_works = NULL; /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible diff --git a/kernel/task_work.c b/kernel/task_work.c index 82d1c794066d..9b8948dbdc60 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -19,7 +19,12 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) */ raw_spin_lock_irqsave(&task->pi_lock, flags); if (likely(!(task->flags & PF_EXITING))) { - hlist_add_head(&twork->hlist, &task->task_works); + struct task_work *last = task->task_works; + struct task_work *first = last ? last->next : twork; + twork->next = first; + if (last) + last->next = twork; + task->task_works = twork; err = 0; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); @@ -34,51 +39,48 @@ struct task_work * task_work_cancel(struct task_struct *task, task_work_func_t func) { unsigned long flags; - struct task_work *twork; - struct hlist_node *pos; + struct task_work *last, *res = NULL; raw_spin_lock_irqsave(&task->pi_lock, flags); - hlist_for_each_entry(twork, pos, &task->task_works, hlist) { - if (twork->func == func) { - hlist_del(&twork->hlist); - goto found; + last = task->task_works; + if (last) { + struct task_work *q = last, *p = q->next; + while (1) { + if (p->func == func) { + q->next = p->next; + if (p == last) + task->task_works = q == p ? NULL : q; + res = p; + break; + } + if (p == last) + break; + q = p; + p = q->next; } } - twork = NULL; - found: raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - return twork; + return res; } void task_work_run(void) { struct task_struct *task = current; - struct hlist_head task_works; - struct hlist_node *pos; + struct task_work *p, *q; raw_spin_lock_irq(&task->pi_lock); - hlist_move_list(&task->task_works, &task_works); + p = task->task_works; + task->task_works = NULL; raw_spin_unlock_irq(&task->pi_lock); - if (unlikely(hlist_empty(&task_works))) + if (unlikely(!p)) return; - /* - * We use hlist to save the space in task_struct, but we want fifo. - * Find the last entry, the list should be short, then process them - * in reverse order. - */ - for (pos = task_works.first; pos->next; pos = pos->next) - ; - - for (;;) { - struct hlist_node **pprev = pos->pprev; - struct task_work *twork = container_of(pos, struct task_work, - hlist); - twork->func(twork); - if (pprev == &task_works.first) - break; - pos = container_of(pprev, struct hlist_node, next); + q = p->next; /* head */ + p->next = NULL; /* cut it */ + while (q) { + p = q->next; + q->func(q); + q = p; } } -- cgit v1.2.3 From 67d1214551e800f9fe7dc7c47a346d2df0fafed5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:07:19 +0400 Subject: merge task_work and rcu_head, get rid of separate allocation for keyring case task_work and rcu_head are identical now; merge them (calling the result struct callback_head, rcu_head #define'd to it), kill separate allocation in security/keys since we can just use cred->rcu now. Signed-off-by: Al Viro --- kernel/irq/manage.c | 4 ++-- kernel/task_work.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d1dd54734ce7..814c9ef6bba1 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -781,7 +781,7 @@ static void wake_threads_waitq(struct irq_desc *desc) wake_up(&desc->wait_for_threads); } -static void irq_thread_dtor(struct task_work *unused) +static void irq_thread_dtor(struct callback_head *unused) { struct task_struct *tsk = current; struct irq_desc *desc; @@ -813,7 +813,7 @@ static void irq_thread_dtor(struct task_work *unused) */ static int irq_thread(void *data) { - struct task_work on_exit_work; + struct callback_head on_exit_work; static const struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; diff --git a/kernel/task_work.c b/kernel/task_work.c index 9b8948dbdc60..76266fb665dc 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -3,7 +3,7 @@ #include int -task_work_add(struct task_struct *task, struct task_work *twork, bool notify) +task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) { unsigned long flags; int err = -ESRCH; @@ -19,8 +19,8 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) */ raw_spin_lock_irqsave(&task->pi_lock, flags); if (likely(!(task->flags & PF_EXITING))) { - struct task_work *last = task->task_works; - struct task_work *first = last ? last->next : twork; + struct callback_head *last = task->task_works; + struct callback_head *first = last ? last->next : twork; twork->next = first; if (last) last->next = twork; @@ -35,16 +35,16 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) return err; } -struct task_work * +struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { unsigned long flags; - struct task_work *last, *res = NULL; + struct callback_head *last, *res = NULL; raw_spin_lock_irqsave(&task->pi_lock, flags); last = task->task_works; if (last) { - struct task_work *q = last, *p = q->next; + struct callback_head *q = last, *p = q->next; while (1) { if (p->func == func) { q->next = p->next; @@ -66,7 +66,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) void task_work_run(void) { struct task_struct *task = current; - struct task_work *p, *q; + struct callback_head *p, *q; raw_spin_lock_irq(&task->pi_lock); p = task->task_works; -- cgit v1.2.3 From ed3e694d78cc75fa79bf29698631b146fd27aa35 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:31:24 +0400 Subject: move exit_task_work() past exit_files() et.al. ... and get rid of PF_EXITING check in task_work_add(). Signed-off-by: Al Viro --- kernel/exit.c | 6 ++---- kernel/task_work.c | 30 +++++++++++------------------- 2 files changed, 13 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 2f59cc334516..d17f6c4ddfa9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -953,14 +953,11 @@ void do_exit(long code) exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against - * an exiting task cleaning up the robust pi futexes, and in - * task_work_add() to avoid the race with exit_task_work(). + * an exiting task cleaning up the robust pi futexes. */ smp_mb(); raw_spin_unlock_wait(&tsk->pi_lock); - exit_task_work(tsk); - if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, task_pid_nr(current), @@ -995,6 +992,7 @@ void do_exit(long code) exit_shm(tsk); exit_files(tsk); exit_fs(tsk); + exit_task_work(tsk); check_stack_usage(); exit_thread(); diff --git a/kernel/task_work.c b/kernel/task_work.c index 76266fb665dc..fb396089f66a 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -5,34 +5,26 @@ int task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) { + struct callback_head *last, *first; unsigned long flags; - int err = -ESRCH; -#ifndef TIF_NOTIFY_RESUME - if (notify) - return -ENOTSUPP; -#endif /* - * We must not insert the new work if the task has already passed - * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait() - * and check PF_EXITING under pi_lock. + * Not inserting the new work if the task has already passed + * exit_task_work() is the responisbility of callers. */ raw_spin_lock_irqsave(&task->pi_lock, flags); - if (likely(!(task->flags & PF_EXITING))) { - struct callback_head *last = task->task_works; - struct callback_head *first = last ? last->next : twork; - twork->next = first; - if (last) - last->next = twork; - task->task_works = twork; - err = 0; - } + last = task->task_works; + first = last ? last->next : twork; + twork->next = first; + if (last) + last->next = twork; + task->task_works = twork; raw_spin_unlock_irqrestore(&task->pi_lock, flags); /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ - if (likely(!err) && notify) + if (notify) set_notify_resume(task); - return err; + return 0; } struct callback_head * -- cgit v1.2.3 From a2d4c71d1559426155e5da8db3265bfa0d8d398d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:33:29 +0400 Subject: deal with task_work callbacks adding more work It doesn't matter on normal return to userland path (we'll recheck the NOTIFY_RESUME flag anyway), but in case of exit_task_work() we'll need that as soon as we get callbacks capable of triggering more task_work_add(). Signed-off-by: Al Viro --- kernel/task_work.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/task_work.c b/kernel/task_work.c index fb396089f66a..91d4e1742a0c 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -60,19 +60,21 @@ void task_work_run(void) struct task_struct *task = current; struct callback_head *p, *q; - raw_spin_lock_irq(&task->pi_lock); - p = task->task_works; - task->task_works = NULL; - raw_spin_unlock_irq(&task->pi_lock); + while (1) { + raw_spin_lock_irq(&task->pi_lock); + p = task->task_works; + task->task_works = NULL; + raw_spin_unlock_irq(&task->pi_lock); - if (unlikely(!p)) - return; + if (unlikely(!p)) + return; - q = p->next; /* head */ - p->next = NULL; /* cut it */ - while (q) { - p = q->next; - q->func(q); - q = p; + q = p->next; /* head */ + p->next = NULL; /* cut it */ + while (q) { + p = q->next; + q->func(q); + q = p; + } } } -- cgit v1.2.3