From f03542a7019c600163ac4441d8a826c92c1bd510 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 26 Jul 2012 08:55:34 +0800 Subject: sched: recover SD_WAKE_AFFINE in select_task_rq_fair and code clean up Since power saving code was removed from sched now, the implement code is out of service in this function, and even pollute other logical. like, 'want_sd' never has chance to be set '0', that remove the effect of SD_WAKE_AFFINE here. So, clean up the obsolete code, includes SD_PREFER_LOCAL. Signed-off-by: Alex Shi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/5028F431.6000306@intel.com Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f9..f3eebc121ebc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -860,7 +860,6 @@ enum cpu_idle_type { #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ -- cgit v1.2.3 From f8ac4ec9c064b330dcc49e03c450fe74298c4622 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Aug 2012 17:11:42 +0200 Subject: uprobes: Introduce MMF_HAS_UPROBES Add the new MMF_HAS_UPROBES flag. It is set by install_breakpoint() and it is copied by dup_mmap(), uprobe_pre_sstep_notifier() checks it to avoid the slow path if the task was never probed. Perhaps it makes sense to check it in valid_vma(is_register => false) as well. This needs the new dup_mmap()->uprobe_dup_mmap() hook. We can't use uprobe_reset_state() or put MMF_HAS_UPROBES into MMF_INIT_MASK, we need oldmm->mmap_sem to avoid the race with uprobe_register() or mmap() from another thread. Currently we never clear this bit, it can be false-positive after uprobe_unregister() or uprobe_munmap() or if dup_mmap() hits the probed VM_DONTCOPY vma. But this is fine correctness-wise and has no effect unless the task hits the non-uprobe breakpoint. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f9..3667c332e61d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,6 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HAS_UPROBES 19 /* might have uprobes */ + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) struct sighand_struct { -- cgit v1.2.3 From f3e947867478af9a12b9956bcd000ac7613a8a95 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Sep 2012 11:22:00 +0200 Subject: sched: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW Now that the last architecture to use this has stopped doing so (ARM, thanks Catalin!) we can remove this complexity from the scheduler core. Signed-off-by: Peter Zijlstra Cc: Oleg Nesterov Cc: Catalin Marinas Link: http://lkml.kernel.org/n/tip-g9p2a1w81xxbrze25v9zpzbf@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index f3eebc121ebc..60e5e38eee2a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -678,11 +678,6 @@ struct signal_struct { * (notably. ptrace) */ }; -/* Context switch must be unlocked if interrupts are to be enabled */ -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW -# define __ARCH_WANT_UNLOCKED_CTXSW -#endif - /* * Bits in flags field of signal_struct. */ -- cgit v1.2.3 From c1cc017c59c44d9ede7003631c43adc0cfdce2f9 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Mon, 10 Sep 2012 15:10:58 +0800 Subject: sched/nohz: Clean up select_nohz_load_balancer() There is no load_balancer to be selected now. It just sets the state of the nohz tick to stop. So rename the function, pass the 'cpu' as a parameter and then remove the useless call from tick_nohz_restart_sched_tick(). [ s/set_nohz_tick_stopped/nohz_balance_enter_idle/g s/clear_nohz_tick_stopped/nohz_balance_exit_idle/g ] Signed-off-by: Alex Shi Acked-by: Suresh Siddha Cc: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1347261059-24747-1-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 60e5e38eee2a..8c38df07ac3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -273,11 +273,11 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) -extern void select_nohz_load_balancer(int stop_tick); +extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else -static inline void select_nohz_load_balancer(int stop_tick) { } +static inline void nohz_balance_enter_idle(int cpu) { } static inline void set_cpu_sd_state_idle(void) { } #endif -- cgit v1.2.3 From 9f68f672c47b9bd4cfe0a667ecb0b1382c61e2de Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 19 Aug 2012 16:15:09 +0200 Subject: uprobes: Introduce MMF_RECALC_UPROBES Add the new MMF_RECALC_UPROBES flag, it means that MMF_HAS_UPROBES can be false positive after remove_breakpoint() or uprobe_munmap(). It is also set by uprobe_dup_mmap(), this is not optimal but simple. We could add the new hook, uprobe_dup_vma(), to set MMF_HAS_UPROBES only if the new mm actually has uprobes, but I don't think this makes sense. The next patch will use this flag to clear MMF_HAS_UPROBES. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3667c332e61d..255661d48834 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,7 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ -#define MMF_HAS_UPROBES 19 /* might have uprobes */ +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From e1760bd5ffae8cb98cffb030ee8e631eba28f3d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 22:39:43 -0700 Subject: userns: Convert the audit loginuid to be a kuid Always store audit loginuids in type kuid_t. Print loginuids by converting them into uids in the appropriate user namespace, and then printing the resulting uid. Modify audit_get_loginuid to return a kuid_t. Modify audit_set_loginuid to take a kuid_t. Modify /proc//loginuid on read to convert the loginuid into the user namespace of the opener of the file. Modify /proc//loginud on write to convert the loginuid rom the user namespace of the opener of the file. Cc: Al Viro Cc: Eric Paris Cc: Paul Moore ? Cc: David Miller Signed-off-by: Eric W. Biederman --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index c147e7024f11..f64d092f2bed 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1426,7 +1426,7 @@ struct task_struct { struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL - uid_t loginuid; + kuid_t loginuid; unsigned int sessionid; #endif struct seccomp seccomp; -- cgit v1.2.3 From 5640f7685831e088fe6c2e1f863a6805962f8e81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Sep 2012 23:04:42 +0000 Subject: net: use a per task frag allocator We currently use a per socket order-0 page cache for tcp_sendmsg() operations. This page is used to build fragments for skbs. Its done to increase probability of coalescing small write() into single segments in skbs still in write queue (not yet sent) But it wastes a lot of memory for applications handling many mostly idle sockets, since each socket holds one page in sk->sk_sndmsg_page Its also quite inefficient to build TSO 64KB packets, because we need about 16 pages per skb on arches where PAGE_SIZE = 4096, so we hit page allocator more than wanted. This patch adds a per task frag allocator and uses bigger pages, if available. An automatic fallback is done in case of memory pressure. (up to 32768 bytes per frag, thats order-3 pages on x86) This increases TCP stream performance by 20% on loopback device, but also benefits on other network devices, since 8x less frags are mapped on transmit and unmapped on tx completion. Alexander Duyck mentioned a probable performance win on systems with IOMMU enabled. Its possible some SG enabled hardware cant cope with bigger fragments, but their ndo_start_xmit() should already handle this, splitting a fragment in sub fragments, since some arches have PAGE_SIZE=65536 Successfully tested on various ethernet devices. (ixgbe, igb, bnx2x, tg3, mellanox mlx4) Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Vijay Subramanian Cc: Alexander Duyck Tested-by: Vijay Subramanian Signed-off-by: David S. Miller --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f9..a8e2413f6bc3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1530,6 +1530,9 @@ struct task_struct { * cache last used pipe for splice */ struct pipe_inode_info *splice_pipe; + + struct page_frag task_frag; + #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif -- cgit v1.2.3 From 04e7e951532b390b16feb070be9972b8fad2fc57 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Jul 2012 15:06:40 -0700 Subject: rcu: Switch task's syscall hooks on context switch Clear the syscalls hook of a task when it's scheduled out so that if the task migrates, it doesn't run the syscall slow path on a CPU that might not need it. Also set the syscalls hook on the next task if needed. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Josh Triplett Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 23bddac4bad8..335720a1fc33 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1885,6 +1885,14 @@ static inline void rcu_copy_process(struct task_struct *p) #endif +static inline void rcu_switch(struct task_struct *prev, + struct task_struct *next) +{ +#ifdef CONFIG_RCU_USER_QS + rcu_user_hooks_switch(prev, next); +#endif +} + static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { -- cgit v1.2.3