From 95d97adb2bb85d964bae4538e0574e742e522dda Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 17 Dec 2015 23:56:52 +0000 Subject: x86/signal: Cleanup get_nr_restart_syscall() Check for TS_COMPAT instead of TIF_IA32 to distinguish ia32 tasks from 64-bit tasks. Check for __X32_SYSCALL_BIT iff CONFIG_X86_X32_ABI is defined. Suggested-by: Andy Lutomirski Signed-off-by: Dmitry V. Levin Acked-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Elvira Khabirova Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160111145515.GB29007@altlinux.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index cb6282c3638f..c07ff5ddbd47 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -692,12 +692,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { -#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64) +#ifdef CONFIG_X86_64 + if (is_ia32_task()) + return __NR_ia32_restart_syscall; +#endif +#ifdef CONFIG_X86_X32_ABI + return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); +#else return __NR_restart_syscall; -#else /* !CONFIG_X86_32 && CONFIG_X86_64 */ - return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : - __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); -#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */ +#endif } /* -- cgit v1.2.3 From 14365449b6ce34cf6a3040ff8ebbb39d89d67159 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Tue, 26 Jan 2016 18:21:21 +0600 Subject: x86/asm: Remove unused L3_PAGE_OFFSET L3_PAGE_OFFSET was introduced in commit a6523748bd (paravirt/x86, 64-bit: move __PAGE_OFFSET to leave a space for hypervisor), but has no users. Signed-off-by: Alexander Kuleshov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: Andrey Ryabinin Link: http://lkml.kernel.org/r/1453810881-30622-1-git-send-email-kuleshovmail@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head_64.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ffdc0e860390..2e974680f5ad 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -38,7 +38,6 @@ #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) -L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) -- cgit v1.2.3 From 32324ce15ea8cb4c8acc28acb2fd36fabf73e9db Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 28 Jan 2016 15:11:22 -0800 Subject: x86/syscalls: Remove __SYSCALL_COMMON and __SYSCALL_X32 The common/64/x32 distinction has no effect other than determining which kernels actually support the syscall. Move the logic into syscalltbl.sh. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/58d4a95f40e43b894f93288b4a3633963d0ee22e.1454022279.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_64.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f2edafb5f24e..29db3b3f550c 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -5,12 +5,6 @@ #include #define __SYSCALL_64(nr, sym, compat) [nr] = 1, -#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, -#ifdef CONFIG_X86_X32_ABI -# define __SYSCALL_X32(nr, sym, compat) [nr] = 1, -#else -# define __SYSCALL_X32(nr, sym, compat) /* nothing */ -#endif static char syscalls_64[] = { #include }; -- cgit v1.2.3 From 3e65654e3db6df6aba9c5b895f8b8e6a8d8eb508 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 28 Jan 2016 15:11:23 -0800 Subject: x86/syscalls: Move compat syscall entry handling into syscalltbl.sh Rather than duplicating the compat entry handling in all consumers of syscalls_BITS.h, handle it directly in syscalltbl.sh. Now we generate entries in syscalls_32.h like: __SYSCALL_I386(5, sys_open) __SYSCALL_I386(5, compat_sys_open) and all of its consumers implicitly get the right entry point. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/b7c2b501dc0e6e43050e916b95807c3e2e16e9bb.1454022279.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/asm-offsets_64.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6ce39025f467..abec4c9f1c97 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -7,7 +7,7 @@ #include #include "../../../drivers/lguest/lg.h" -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym) [nr] = 1, static char syscalls[] = { #include }; diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 29db3b3f550c..9677bf9a616f 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -4,11 +4,11 @@ #include -#define __SYSCALL_64(nr, sym, compat) [nr] = 1, +#define __SYSCALL_64(nr, sym) [nr] = 1, static char syscalls_64[] = { #include }; -#define __SYSCALL_I386(nr, sym, compat) [nr] = 1, +#define __SYSCALL_I386(nr, sym) [nr] = 1, static char syscalls_ia32[] = { #include }; -- cgit v1.2.3 From cfcbadb49dabb05efa23e1a0f95f3391c0a815bc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 28 Jan 2016 15:11:24 -0800 Subject: x86/syscalls: Add syscall entry qualifiers This will let us specify something like 'sys_xyz/foo' instead of 'sys_xyz' in the syscall table, where the 'foo' qualifier conveys some extra information to the C code. The intent is to allow things like sys_execve/ptregs to indicate that sys_execve() touches pt_regs. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2de06e33dce62556b3ec662006fcb295504e296e.1454022279.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/asm-offsets_64.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index abec4c9f1c97..fdeb0ce07c16 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -7,7 +7,7 @@ #include #include "../../../drivers/lguest/lg.h" -#define __SYSCALL_I386(nr, sym) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls[] = { #include }; diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 9677bf9a616f..d875f97d4e0b 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -4,11 +4,11 @@ #include -#define __SYSCALL_64(nr, sym) [nr] = 1, +#define __SYSCALL_64(nr, sym, qual) [nr] = 1, static char syscalls_64[] = { #include }; -#define __SYSCALL_I386(nr, sym) [nr] = 1, +#define __SYSCALL_I386(nr, sym, qual) [nr] = 1, static char syscalls_ia32[] = { #include }; -- cgit v1.2.3 From cd4d09ec6f6c12a2cc3db5b7d8876a325a53545b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 26 Jan 2016 22:12:04 +0100 Subject: x86/cpufeature: Carve out X86_FEATURE_* Move them to a separate header and have the following dependency: x86/cpufeatures.h <- x86/processor.h <- x86/cpufeature.h This makes it easier to use the header in asm code and not include the whole cpufeature.h and add guards for asm. Suggested-by: H. Peter Anvin Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1453842730-28463-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/centaur.c | 2 +- arch/x86/kernel/cpu/cyrix.c | 1 + arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +- arch/x86/kernel/cpu/match.c | 2 +- arch/x86/kernel/cpu/mkcapflags.sh | 6 +++--- arch/x86/kernel/cpu/mtrr/main.c | 2 +- arch/x86/kernel/cpu/transmeta.c | 2 +- arch/x86/kernel/e820.c | 1 + arch/x86/kernel/head_32.S | 2 +- arch/x86/kernel/hpet.c | 1 + arch/x86/kernel/msr.c | 2 +- arch/x86/kernel/verify_cpu.S | 2 +- 14 files changed, 16 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 58031303e304..faa7b5204129 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -64,7 +64,7 @@ ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ -cpufeature = $(src)/../../include/asm/cpufeature.h +cpufeature = $(src)/../../include/asm/cpufeatures.h targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index ae20be6e483c..6608c03c2126 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index aaf152e79637..15e47c1cd412 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 565648bc1a0a..9299e3bdfad6 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0b6c52388cf4..341449c49f34 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index afa9f0d487ea..fbb5e90557a5 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 3f20710a5b23..6988c74409a8 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h +# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h # IN=$1 @@ -49,8 +49,8 @@ dump_array() trap 'rm "$OUT"' EXIT ( - echo "#ifndef _ASM_X86_CPUFEATURE_H" - echo "#include " + echo "#ifndef _ASM_X86_CPUFEATURES_H" + echo "#include " echo "#endif" echo "" diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 5c3d149ee91c..74f1d90f9c29 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 252da7aceca6..a19a663282b5 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include "cpu.h" diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 569c1e4f96fe..b3c2a697820a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * The e820 map is the map that gets modified e.g. with command line parameters diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6bc9ae24b6d2..af1112980dd4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b8e6ff5cd5d0..be0ebbb6d1d1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 64f9616f93f1..7f3550acde1b 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include static struct class *msr_class; diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 07efb35ee4bc..014ea59aa153 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -30,7 +30,7 @@ * appropriately. Either display a message or halt. */ -#include +#include #include verify_cpu: -- cgit v1.2.3 From bc696ca05f5a8927329ec276a892341e006b00ba Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 26 Jan 2016 22:12:05 +0100 Subject: x86/cpufeature: Replace the old static_cpu_has() with safe variant So the old one didn't work properly before alternatives had run. And it was supposed to provide an optimized JMP because the assumption was that the offset it is jumping to is within a signed byte and thus a two-byte JMP. So I did an x86_64 allyesconfig build and dumped all possible sites where static_cpu_has() was used. The optimization amounted to all in all 12(!) places where static_cpu_has() had generated a 2-byte JMP. Which has saved us a whopping 36 bytes! This clearly is not worth the trouble so we can remove it. The only place where the optimization might count - in __switch_to() - we will handle differently. But that's not subject of this patch. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1453842730-28463-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic_numachip.c | 4 ++-- arch/x86/kernel/cpu/common.c | 12 ++---------- arch/x86/kernel/vm86_32.c | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c80c02c6ec49..ab5c2c685a3c 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) unsigned long value; unsigned int id = (x >> 24) & 0xff; - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { rdmsrl(MSR_FAM10H_NODE_ID, value); id |= (value << 2) & 0xff00; } @@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) this_cpu_write(cpu_llc_id, node); /* Account for nodes per socket in multi-core-module processors */ - if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + if (static_cpu_has(X86_FEATURE_NODEID_MSR)) { rdmsrl(MSR_FAM10H_NODE_ID, val); nodes = ((val >> 3) & 7) + 1; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37830de8f60a..ee499817f3f5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1475,19 +1475,11 @@ void cpu_init(void) } #endif -#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS -void warn_pre_alternatives(void) -{ - WARN(1, "You're using static_cpu_has before alternatives have run!\n"); -} -EXPORT_SYMBOL_GPL(warn_pre_alternatives); -#endif - -inline bool __static_cpu_has_safe(u16 bit) +inline bool __static_cpu_has(u16 bit) { return boot_cpu_has(bit); } -EXPORT_SYMBOL_GPL(__static_cpu_has_safe); +EXPORT_SYMBOL_GPL(__static_cpu_has); static void bsp_resume(void) { diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e574b8546518..3dce1ca0a653 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) /* make room for real-mode segments */ tsk->thread.sp0 += 16; - if (static_cpu_has_safe(X86_FEATURE_SEP)) + if (static_cpu_has(X86_FEATURE_SEP)) tsk->thread.sysenter_cs = 0; load_sp0(tss, &tsk->thread); -- cgit v1.2.3 From 337e4cc84021212a87b04b77b65cccc49304909e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 26 Jan 2016 22:12:07 +0100 Subject: x86/alternatives: Add an auxilary section Add .altinstr_aux for additional instructions which will be used before and/or during patching. All stuff which needs more sophisticated patching should go there. See next patch. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1453842730-28463-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 74e4bf11f562..92dc211c11db 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -195,6 +195,17 @@ SECTIONS :init #endif + /* + * Section for code used exclusively before alternatives are run. All + * references to such code must be patched out by alternatives, normally + * by using X86_FEATURE_ALWAYS CPU feature bit. + * + * See static_cpu_has() for an example. + */ + .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) { + *(.altinstr_aux) + } + INIT_DATA_SECTION(16) .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { -- cgit v1.2.3 From 2476f2fa20568bd5d9e09cd35bcd73e99a6f4cc6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 27 Jan 2016 09:45:25 +0100 Subject: x86/alternatives: Discard dynamic check after init Move the code to do the dynamic check to the altinstr_aux section so that it is discarded after alternatives have run and a static branch has been chosen. This way we're changing the dynamic branch from C code to assembly, which makes it *substantially* smaller while avoiding a completely unnecessary call to an out of line function. Signed-off-by: Brian Gerst [ Changed it to do TESTB, as hpa suggested. ] Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kristen Carlson Accardi Cc: Laura Abbott Cc: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Ross Zwisler Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1452972124-7380-1-git-send-email-brgerst@gmail.com Link: http://lkml.kernel.org/r/20160127084525.GC30712@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ee499817f3f5..079d83fc6488 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1475,12 +1475,6 @@ void cpu_init(void) } #endif -inline bool __static_cpu_has(u16 bit) -{ - return boot_cpu_has(bit); -} -EXPORT_SYMBOL_GPL(__static_cpu_has); - static void bsp_resume(void) { if (this_cpu->c_bsp_resume) -- cgit v1.2.3 From d99e1bd175f4291ddb6e62b22bb5bdbe3976389a Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Mon, 25 Jan 2016 20:41:46 +0100 Subject: x86/entry/traps: Refactor preemption and interrupt flag handling Make the preemption and interrupt flag handling more readable by removing preempt_conditional_sti() and preempt_conditional_cli() helpers and using preempt_disable() and preempt_enable_no_resched() instead. Rename contitional_sti() and conditional_cli() to the more understandable cond_local_irq_enable() and cond_local_irq_disable() respectively, while at it. Suggested-by: Borislav Petkov Signed-off-by: Alexander Kuleshov [ Boris: massage text. ] Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tony Luck Cc: Wang Nan Link: http://lkml.kernel.org/r/1453750913-4781-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 47 +++++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ade185a46b1d..410e8e2700c5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss; DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); -static inline void conditional_sti(struct pt_regs *regs) +static inline void cond_local_irq_enable(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) -{ - preempt_count_inc(); - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); -} - -static inline void conditional_cli(struct pt_regs *regs) -{ - if (regs->flags & X86_EFLAGS_IF) - local_irq_disable(); -} - -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void cond_local_irq_disable(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - preempt_count_dec(); } void ist_enter(struct pt_regs *regs) @@ -286,7 +272,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { - conditional_sti(regs); + cond_local_irq_enable(regs); do_trap(trapnr, signr, str, regs, error_code, fill_trap_info(regs, signr, trapnr, &info)); } @@ -368,7 +354,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) return; - conditional_sti(regs); + cond_local_irq_enable(regs); if (!user_mode(regs)) die("bounds", regs, error_code); @@ -443,7 +429,7 @@ do_general_protection(struct pt_regs *regs, long error_code) struct task_struct *tsk; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - conditional_sti(regs); + cond_local_irq_enable(regs); if (v8086_mode(regs)) { local_irq_enable(); @@ -517,9 +503,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_conditional_sti(regs); + preempt_disable(); + cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); exit: ist_exit(regs); @@ -648,12 +636,14 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + preempt_disable(); + cond_local_irq_enable(regs); if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); goto exit; } @@ -673,7 +663,8 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); + cond_local_irq_disable(regs); + preempt_enable_no_resched(); debug_stack_usage_dec(); exit: @@ -696,7 +687,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) return; - conditional_sti(regs); + cond_local_irq_enable(regs); if (!user_mode(regs)) { if (!fixup_exception(regs)) { @@ -743,7 +734,7 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) { - conditional_sti(regs); + cond_local_irq_enable(regs); } dotraplinkage void @@ -756,7 +747,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; - conditional_sti(regs); + cond_local_irq_enable(regs); info.regs = regs; math_emulate(&info); @@ -765,7 +756,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif fpu__restore(¤t->thread.fpu); /* interrupts still off */ #ifdef CONFIG_X86_32 - conditional_sti(regs); + cond_local_irq_enable(regs); #endif } NOKPROBE_SYMBOL(do_device_not_available); -- cgit v1.2.3 From 8ff5bd2e1e2767fbf737f84d5f92668dafe7e7b0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 16 Feb 2016 15:09:02 -0800 Subject: x86/signal/64: Fix SS if needed when delivering a 64-bit signal Signals are always delivered to 64-bit tasks with CS set to a long mode segment. In long mode, SS doesn't matter as long as it's a present writable segment. If SS starts out invalid (this can happen if the signal was caused by an IRET fault or was delivered on the way out of set_thread_area or modify_ldt), then IRET to the signal handler can fail, eventually killing the task. The straightforward fix would be to simply reset SS when delivering a signal. That breaks DOSEMU, though: 64-bit builds of DOSEMU rely on SS being set to the faulting SS when signals are delivered. As a compromise, this patch leaves SS alone so long as it's valid. The net effect should be that the behavior of successfully delivered signals is unchanged. Some signals that would previously have failed to be delivered will now be delivered successfully. This has no effect for x32 or 32-bit tasks: their signal handlers were already called with SS == __USER_DS. (On Xen, there's a slight hole: if a task sets SS to a writable *kernel* data segment, then we will fail to identify it as invalid and we'll still kill the task. If anyone cares, this could be fixed with a new paravirt hook.) Signed-off-by: Andy Lutomirski Acked-by: Borislav Petkov Cc: Al Viro Cc: Andy Lutomirski Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Pavel Emelyanov Cc: Peter Zijlstra Cc: Stas Sergeev Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/163c6e1eacde41388f3ff4d2fe6769be651d7b6e.1455664054.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index c07ff5ddbd47..52f82c7ef57d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -61,6 +61,35 @@ regs->seg = GET_SEG(seg) | 3; \ } while (0) +#ifdef CONFIG_X86_64 +/* + * If regs->ss will cause an IRET fault, change it. Otherwise leave it + * alone. Using this generally makes no sense unless + * user_64bit_mode(regs) would return true. + */ +static void force_valid_ss(struct pt_regs *regs) +{ + u32 ar; + asm volatile ("lar %[old_ss], %[ar]\n\t" + "jz 1f\n\t" /* If invalid: */ + "xorl %[ar], %[ar]\n\t" /* set ar = 0 */ + "1:" + : [ar] "=r" (ar) + : [old_ss] "rm" ((u16)regs->ss)); + + /* + * For a valid 64-bit user context, we need DPL 3, type + * read-write data or read-write exp-down data, and S and P + * set. We can't use VERW because VERW doesn't check the + * P bit. + */ + ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK; + if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) && + ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN)) + regs->ss = __USER_DS; +} +#endif + int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { unsigned long buf_val; @@ -459,10 +488,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* Set up the CS register to run signal handlers in 64-bit mode, - even if the handler happens to be interrupting 32-bit code. */ + /* + * Set up the CS and SS registers to run signal handlers in + * 64-bit mode, even if the handler happens to be interrupting + * 32-bit or 16-bit code. + * + * SS is subtle. In 64-bit mode, we don't need any particular + * SS descriptor, but we do need SS to be valid. It's possible + * that the old SS is entirely bogus -- this can happen if the + * signal we're trying to deliver is #GP or #SS caused by a bad + * SS value. We also have a compatbility issue here: DOSEMU + * relies on the contents of the SS register indicating the + * SS value at the time of the signal, even though that code in + * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU + * avoids relying on sigreturn to restore SS; instead it uses + * a trampoline.) So we do our best: if the old SS was valid, + * we keep it. Otherwise we replace it. + */ regs->cs = __USER_CS; + if (unlikely(regs->ss != __USER_DS)) + force_valid_ss(regs); + return 0; } #endif /* CONFIG_X86_32 */ -- cgit v1.2.3 From 6c25da5ad55d48c41b8909bc1f4e3cd5d85bb499 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 16 Feb 2016 15:09:03 -0800 Subject: x86/signal/64: Re-add support for SS in the 64-bit signal context This is a second attempt to make the improvements from c6f2062935c8 ("x86/signal/64: Fix SS handling for signals delivered to 64-bit programs"), which was reverted by 51adbfbba5c6 ("x86/signal/64: Add support for SS in the 64-bit signal context"). This adds two new uc_flags flags. UC_SIGCONTEXT_SS will be set for all 64-bit signals (including x32). It indicates that the saved SS field is valid and that the kernel supports the new behavior. The goal is to fix a problems with signal handling in 64-bit tasks: SS wasn't saved in the 64-bit signal context, making it awkward to determine what SS was at the time of signal delivery and making it impossible to return to a non-flat SS (as calling sigreturn clobbers SS). This also made it extremely difficult for 64-bit tasks to return to fully-defined 16-bit contexts, because only the kernel can easily do espfix64, but sigreturn was unable to set a non-flag SS:ESP. (DOSEMU has a monstrous hack to partially work around this limitation.) If we could go back in time, the correct fix would be to make 64-bit signals work just like 32-bit signals with respect to SS: save it in signal context, reset it when delivering a signal, and restore it in sigreturn. Unfortunately, doing that (as I tried originally) breaks DOSEMU: DOSEMU wouldn't reset the signal context's SS when clearing the LDT and changing the saved CS to 64-bit mode, since it predates the SS context field existing in the first place. This patch is a bit more complicated, and it tries to balance a bunch of goals. It makes most cases of changing ucontext->ss during signal handling work as expected. I do this by special-casing the interesting case. On sigreturn, ucontext->ss will be honored by default, unless the ucontext was created from scratch by an old program and had a 64-bit CS (unfortunately, CRIU can do this) or was the result of changing a 32-bit signal context to 64-bit without resetting SS (as DOSEMU does). For the benefit of new 64-bit software that uses segmentation (new versions of DOSEMU might), the new behavior can be detected with a new ucontext flag UC_SIGCONTEXT_SS. To avoid compilation issues, __pad0 is left as an alias for ss in ucontext. The nitty-gritty details are documented in the header file. This patch also re-enables the sigreturn_64 and ldt_gdt_64 selftests, as the kernel change allows both of them to pass. Tested-by: Stas Sergeev Signed-off-by: Andy Lutomirski Acked-by: Borislav Petkov Cc: Al Viro Cc: Andy Lutomirski Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Pavel Emelyanov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/749149cbfc3e75cd7fcdad69a854b399d792cc6f.1455664054.git.luto@kernel.org [ Small readability edit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 63 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 52f82c7ef57d..548ddf7d6fd2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -90,7 +90,9 @@ static void force_valid_ss(struct pt_regs *regs) } #endif -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc, + unsigned long uc_flags) { unsigned long buf_val; void __user *buf; @@ -123,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ -#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); -#else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); -#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_X86_64 + /* + * Fix up SS if needed for the benefit of old DOSEMU and + * CRIU. + */ + if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && + user_64bit_mode(regs))) + force_valid_ss(regs); +#endif get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -194,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->cs, &sc->cs); put_user_ex(0, &sc->gs); put_user_ex(0, &sc->fs); + put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -432,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, return 0; } #else /* !CONFIG_X86_32 */ +static unsigned long frame_uc_flags(struct pt_regs *regs) +{ + unsigned long flags; + + if (cpu_has_xsave) + flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; + else + flags = UC_SIGCONTEXT_SS; + + if (likely(user_64bit_mode(regs))) + flags |= UC_STRICT_RESTORE_SS; + + return flags; +} + static int __setup_rt_frame(int sig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { @@ -451,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); save_altstack_ex(&frame->uc.uc_stack, regs->sp); @@ -536,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); @@ -601,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->sc)) + /* + * x86_32 has no uc_flags bits relevant to restore_sigcontext. + * Save a few cycles by skipping the __get_user. + */ + if (restore_sigcontext(regs, &frame->sc, 0)) goto badframe; return regs->ax; @@ -617,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) @@ -813,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_x32 __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); @@ -820,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) -- cgit v1.2.3 From f1b92bb6b5a4e17b508f128b084fa00e0eda590c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 16 Feb 2016 09:43:21 +0100 Subject: x86/ftrace, x86/asm: Kill ftrace_caller_end label One of ftrace_caller_end and ftrace_return is redundant so unify them. Rename ftrace_return to ftrace_epilogue to mean that everything after that label represents, like an afterword, work which happens *after* the ftrace call, e.g., the function graph tracer for one. Steve wants this to rather mean "[a]n event which reflects meaningfully on a recently ended conflict or struggle." I can imagine that ftrace can be a struggle sometimes. Anyway, beef up the comment about the code contents and layout before ftrace_epilogue label. Signed-off-by: Borislav Petkov Reviewed-by: Steven Rostedt Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1455612202-14414-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 11 +++++------ arch/x86/kernel/mcount_64.S | 14 ++++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 29408d6d6626..04f9641e0cb6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { } #endif /* Defined as markers to the end of the ftrace default trampolines */ -extern void ftrace_caller_end(void); extern void ftrace_regs_caller_end(void); -extern void ftrace_return(void); +extern void ftrace_epilogue(void); extern void ftrace_caller_op_ptr(void); extern void ftrace_regs_caller_op_ptr(void); @@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) op_offset = (unsigned long)ftrace_regs_caller_op_ptr; } else { start_offset = (unsigned long)ftrace_caller; - end_offset = (unsigned long)ftrace_caller_end; + end_offset = (unsigned long)ftrace_epilogue; op_offset = (unsigned long)ftrace_caller_op_ptr; } @@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* * Allocate enough size to store the ftrace_caller code, - * the jmp to ftrace_return, as well as the address of + * the jmp to ftrace_epilogue, as well as the address of * the ftrace_ops this trampoline is used for. */ trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); @@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = (unsigned long)trampoline + size; - /* The trampoline ends with a jmp to ftrace_return */ - jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return); + /* The trampoline ends with a jmp to ftrace_epilogue */ + jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue); memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); /* diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 87e1762e2bca..ed48a9f465f8 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -168,12 +168,14 @@ GLOBAL(ftrace_call) restore_mcount_regs /* - * The copied trampoline must call ftrace_return as it + * The copied trampoline must call ftrace_epilogue as it * still may need to call the function graph tracer. + * + * The code up to this label is copied into trampolines so + * think twice before adding any new code or changing the + * layout here. */ -GLOBAL(ftrace_caller_end) - -GLOBAL(ftrace_return) +GLOBAL(ftrace_epilogue) #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call) popfq /* - * As this jmp to ftrace_return can be a short jump + * As this jmp to ftrace_epilogue can be a short jump * it must not be copied into the trampoline. * The trampoline will add the code to jump * to the return. */ GLOBAL(ftrace_regs_caller_end) - jmp ftrace_return + jmp ftrace_epilogue END(ftrace_regs_caller) -- cgit v1.2.3 From 0f68c088c0adb3c3bbeb487c4ebcde91fd5d34be Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Feb 2016 10:20:13 -0800 Subject: x86/cpufeature: Create a new synthetic cpu capability for machine check recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Intel Software Developer Manual describes bit 24 in the MCG_CAP MSR: MCG_SER_P (software error recovery support present) flag, bit 24 — Indicates (when set) that the processor supports software error recovery But only some models with this capability bit set will actually generate recoverable machine checks. Check the model name and set a synthetic capability bit. Provide a command line option to set this bit anyway in case the kernel doesn't recognise the model name. Signed-off-by: Tony Luck Reviewed-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2e5bfb23c89800a036fb8a45fa97a74bb16bc362.1455732970.git.tony.luck@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a006f4cd792b..b5b187c8cc07 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1576,6 +1576,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 6 && c->x86_model == 45) quirk_no_way_out = quirk_sandybridge_ifu; + /* + * MCG_CAP.MCG_SER_P is necessary but not sufficient to know + * whether this processor will actually generate recoverable + * machine checks. Check to see if this is an E7 model Xeon. + * We can't do a model number check because E5 and E7 use the + * same model number. E5 doesn't support recovery, E7 does. + */ + if (mca_cfg.recovery || (mca_cfg.ser && + !strncmp(c->x86_model_id, + "Intel(R) Xeon(R) CPU E7-", 24))) + set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY); } if (cfg->monarch_timeout < 0) cfg->monarch_timeout = 0; @@ -2028,6 +2039,8 @@ static int __init mcheck_enable(char *str) cfg->bootlog = (str[0] == 'b'); else if (!strcmp(str, "bios_cmci_threshold")) cfg->bios_cmci_threshold = true; + else if (!strcmp(str, "recovery")) + cfg->recovery = true; else if (isdigit(str[0])) { if (get_option(&str, &cfg->tolerant) == 2) get_option(&str, &(cfg->monarch_timeout)); -- cgit v1.2.3 From 58a5aac5331388a175a42b6ed2154f0559cefb21 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 29 Feb 2016 15:50:19 -0800 Subject: x86/entry/32: Introduce and use X86_BUG_ESPFIX instead of paravirt_enabled x86_64 has very clean espfix handling on paravirt: espfix64 is set up in native_iret, so paravirt systems that override iret bypass espfix64 automatically. This is robust and straightforward. x86_32 is messier. espfix is set up before the IRET paravirt patch point, so it can't be directly conditionalized on whether we use native_iret. We also can't easily move it into native_iret without regressing performance due to a bizarre consideration. Specifically, on 64-bit kernels, the logic is: if (regs->ss & 0x4) setup_espfix; On 32-bit kernels, the logic is: if ((regs->ss & 0x4) && (regs->cs & 0x3) == 3 && (regs->flags & X86_EFLAGS_VM) == 0) setup_espfix; The performance of setup_espfix itself is essentially irrelevant, but the comparison happens on every IRET so its performance matters. On x86_64, there's no need for any registers except flags to implement the comparison, so we fold the whole thing into native_iret. On x86_32, we don't do that because we need a free register to implement the comparison efficiently. We therefore do espfix setup before restoring registers on x86_32. This patch gets rid of the explicit paravirt_enabled check by introducing X86_BUG_ESPFIX on 32-bit systems and using an ALTERNATIVE to skip espfix on paravirt systems where iret != native_iret. This is also messy, but it's at least in line with other things we do. This improves espfix performance by removing a branch, but no one cares. More importantly, it removes a paravirt_enabled user, which is good because paravirt_enabled is ill-defined and is going away. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: lguest@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 079d83fc6488..d8337f34b5f4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -802,6 +802,31 @@ static void detect_nopl(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_NOPL); #else set_cpu_cap(c, X86_FEATURE_NOPL); +#endif + + /* + * ESPFIX is a strange bug. All real CPUs have it. Paravirt + * systems that run Linux at CPL > 0 may or may not have the + * issue, but, even if they have the issue, there's absolutely + * nothing we can do about it because we can't use the real IRET + * instruction. + * + * NB: For the time being, only 32-bit kernels support + * X86_BUG_ESPFIX as such. 64-bit kernels directly choose + * whether to apply espfix using paravirt hooks. If any + * non-paravirt system ever shows up that does *not* have the + * ESPFIX issue, we can change this. + */ +#ifdef CONFIG_X86_32 +#ifdef CONFIG_PARAVIRT + do { + extern void native_iret(void); + if (pv_cpu_ops.iret == native_iret) + set_cpu_bug(c, X86_BUG_ESPFIX); + } while (0); +#else + set_cpu_bug(c, X86_BUG_ESPFIX); +#endif #endif } -- cgit v1.2.3 From 0dd0036f6e07f741a1356b424b84a3164b6e59cf Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 29 Feb 2016 15:50:20 -0800 Subject: x86/asm-offsets: Remove PARAVIRT_enabled It no longer has any users. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: lguest@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 84a7524b202c..5c042466f274 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -59,7 +59,6 @@ void common(void) { #ifdef CONFIG_PARAVIRT BLANK(); - OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); -- cgit v1.2.3 From 81edd9f69a6fd214fdbe66b43de6aa1610c84c63 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 9 Mar 2016 19:00:28 -0800 Subject: x86/entry/traps: Clear TIF_BLOCKSTEP on all debug exceptions The SDM says that debug exceptions clear BTF, and we need to keep TIF_BLOCKSTEP in sync with BTF. Clear it unconditionally and improve the comment. I suspect that the fact that kmemcheck could cause TIF_BLOCKSTEP not to be cleared was just an oversight. Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/fa86e55d196e6dde5b38839595bde2a292c52fdc.1457578375.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 410e8e2700c5..1af56217fb5d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -597,6 +597,13 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); + /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. @@ -612,11 +619,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); - /* - * The processor cleared BTF, so don't mark that we need it set. - */ - clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); - /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; -- cgit v1.2.3 From 8bb5643686d2898bec181c407651cf84e77d413f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 9 Mar 2016 19:00:29 -0800 Subject: x86/entry/traps: Clear DR6 early in do_debug() and improve the comment Leaving any bits set in DR6 on return from a debug exception is asking for trouble. Prevent it by writing zero right away and clarify the comment. Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/3857676e1be8fb27db4b89bbb1e2052b7f435ff4.1457578375.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1af56217fb5d..7394be8b1a66 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -593,6 +593,18 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) ist_enter(regs); get_debugreg(dr6, 6); + /* + * The Intel SDM says: + * + * Certain debug exceptions may clear bits 0-3. The remaining + * contents of the DR6 register are never cleared by the + * processor. To avoid confusion in identifying debug + * exceptions, debug handlers should clear the register before + * returning to the interrupted task. + * + * Keep it simple: clear DR6 immediately. + */ + set_debugreg(0, 6); /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; @@ -616,9 +628,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) goto exit; - /* DR6 may or may not be cleared by the CPU */ - set_debugreg(0, 6); - /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; -- cgit v1.2.3 From f2b375756c839ff655a3e0b45e339f8fbf973093 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 9 Mar 2016 19:00:30 -0800 Subject: x86/entry: Vastly simplify SYSENTER TF (single-step) handling Due to a blatant design error, SYSENTER doesn't clear TF (single-step). As a result, if a user does SYSENTER with TF set, we will single-step through the kernel until something clears TF. There is absolutely nothing we can do to prevent this short of turning off SYSENTER [1]. Simplify the handling considerably with two changes: 1. We already sanitize EFLAGS in SYSENTER to clear NT and AC. We can add TF to that list of flags to sanitize with no overhead whatsoever. 2. Teach do_debug() to ignore single-step traps in the SYSENTER prologue. That's all we need to do. Don't get too excited -- our handling is still buggy on 32-bit kernels. There's nothing wrong with the SYSENTER code itself, but the #DB prologue has a clever fixup for traps on the very first instruction of entry_SYSENTER_32, and the fixup doesn't work quite correctly. The next two patches will fix that. [1] We could probably prevent it by forcing BTF on at all times and making sure we clear TF before any branches in the SYSENTER code. Needless to say, this is a bad idea. Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a30d2ea06fe4b621fe6a9ef911b02c0f38feb6f2.1457578375.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 52 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7394be8b1a66..b0ddb81926f9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -559,6 +559,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) NOKPROBE_SYMBOL(fixup_bad_iret); #endif +static bool is_sysenter_singlestep(struct pt_regs *regs) +{ + /* + * We don't try for precision here. If we're anywhere in the region of + * code that can be single-stepped in the SYSENTER entry path, then + * assume that this is a useless single-step trap due to SYSENTER + * being invoked with TF set. (We don't know in advance exactly + * which instructions will be hit because BTF could plausibly + * be set.) + */ +#ifdef CONFIG_X86_32 + return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) < + (unsigned long)__end_SYSENTER_singlestep_region - + (unsigned long)__begin_SYSENTER_singlestep_region; +#elif defined(CONFIG_IA32_EMULATION) + return (regs->ip - (unsigned long)entry_SYSENTER_compat) < + (unsigned long)__end_entry_SYSENTER_compat - + (unsigned long)entry_SYSENTER_compat; +#else + return false; +#endif +} + /* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore @@ -616,6 +639,18 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) */ clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); + if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) && + is_sysenter_singlestep(regs))) { + dr6 &= ~DR_STEP; + if (!dr6) + goto exit; + /* + * else we might have gotten a single-step trap and hit a + * watchpoint at the same time, in which case we should fall + * through and handle the watchpoint. + */ + } + /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. @@ -624,7 +659,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if (!dr6 && user_mode(regs)) user_icebp = 1; - /* Catch kmemcheck conditions first of all! */ + /* Catch kmemcheck conditions! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) goto exit; @@ -659,14 +694,13 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) goto exit; } - /* - * Single-stepping through system calls: ignore any exceptions in - * kernel space, but re-enable TF when returning to user mode. - * - * We already checked v86 mode above, so we can check for kernel mode - * by just checking the CPL of CS. - */ - if ((dr6 & DR_STEP) && !user_mode(regs)) { + if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) { + /* + * Historical junk that used to handle SYSENTER single-stepping. + * This should be unreachable now. If we survive for a while + * without anyone hitting this warning, we'll turn this into + * an oops. + */ tsk->thread.debugreg6 &= ~DR_STEP; set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; -- cgit v1.2.3 From 7536656f08d0c1a3b4c487d00785c5186ec6f533 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 9 Mar 2016 19:00:32 -0800 Subject: x86/entry/32: Simplify and fix up the SYSENTER stack #DB/NMI fixup Right after SYSENTER, we can get a #DB or NMI. On x86_32, there's no IST, so the exception handler is invoked on the temporary SYSENTER stack. Because the SYSENTER stack is very small, we have a fixup to switch off the stack quickly when this happens. The old fixup had several issues: 1. It checked the interrupt frame's CS and EIP. This wasn't obviously correct on Xen or if vm86 mode was in use [1]. 2. In the NMI handler, it did some frightening digging into the stack frame. I'm not convinced this digging was correct. 3. The fixup didn't switch stacks and then switch back. Instead, it synthesized a brand new stack frame that would redirect the IRET back to the SYSENTER code. That frame was highly questionable. For one thing, if NMI nested inside #DB, we would effectively abort the #DB prologue, which was probably safe but was frightening. For another, the code used PUSHFL to write the FLAGS portion of the frame, which was simply bogus -- by the time PUSHFL was called, at least TF, NT, VM, and all of the arithmetic flags were clobbered. Simplify this considerably. Instead of looking at the saved frame to see where we came from, check the hardware ESP register against the SYSENTER stack directly. Malicious user code cannot spoof the kernel ESP register, and by moving the check after SAVE_ALL, we can use normal PER_CPU accesses to find all the relevant addresses. With this patch applied, the improved syscall_nt_32 test finally passes on 32-bit kernels. [1] It isn't obviously correct, but it is nonetheless safe from vm86 shenanigans as far as I can tell. A user can't point EIP at entry_SYSENTER_32 while in vm86 mode because entry_SYSENTER_32, like all kernel addresses, is greater than 0xffff and would thus violate the CS segment limit. Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/b2cdbc037031c07ecf2c40a96069318aec0e7971.1457578375.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_32.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index fdeb0ce07c16..ecdc1d217dc0 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -52,6 +52,11 @@ void foo(void) DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - offsetofend(struct tss_struct, SYSENTER_stack)); + /* Offset from cpu_tss to SYSENTER_stack */ + OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); + /* Size of SYSENTER_stack */ + DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); -- cgit v1.2.3 From 2a41aa4feb25af3ead60b740c43df80c576efea2 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 9 Mar 2016 19:00:33 -0800 Subject: x86/entry/32: Add and check a stack canary for the SYSENTER stack The first instruction of the SYSENTER entry runs on its own tiny stack. That stack can be used if a #DB or NMI is delivered before the SYSENTER prologue switches to a real stack. We have code in place to prevent us from overflowing the tiny stack. For added paranoia, add a canary to the stack and check it in do_debug() -- that way, if something goes wrong with the #DB logic, we'll eventually notice. Signed-off-by: Andy Lutomirski Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/6ff9a806f39098b166dc2c41c1db744df5272f29.1457578375.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 3 +++ arch/x86/kernel/traps.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9f7c21c22477..ee9a9792caeb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { */ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif +#ifdef CONFIG_X86_32 + .SYSENTER_stack_canary = STACK_END_MAGIC, +#endif }; EXPORT_PER_CPU_SYMBOL(cpu_tss); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b0ddb81926f9..49e2e775f507 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -713,6 +713,14 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: +#if defined(CONFIG_X86_32) + /* + * This is the most likely code path that involves non-trivial use + * of the SYSENTER stack. Check that we haven't overrun it. + */ + WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, + "Overran or corrupted SYSENTER stack\n"); +#endif ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); -- cgit v1.2.3 From a798f091113ef4999277dbe0483d37d04fa35b2e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 9 Mar 2016 13:24:32 -0800 Subject: x86/entry/32: Change INT80 to be an interrupt gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want all of the syscall entries to run with interrupts off so that we can efficiently run context tracking before enabling interrupts. This will regress int $0x80 performance on 32-bit kernels by a couple of cycles. This shouldn't matter much -- int $0x80 is not a fast path. This effectively reverts: 657c1eea0019 ("x86/entry/32: Fix entry_INT80_32() to expect interrupts to be on") ... and fixes the same issue differently. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Frédéric Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/59b4f90c9ebfccd8c937305dbbbca680bc74b905.1457558566.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 49e2e775f507..5fae2d840e64 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -912,7 +912,7 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); + set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif -- cgit v1.2.3 From 10ee73865e9e4705ba8b3f4d6149e8e68d902bb7 Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Thu, 10 Mar 2016 20:19:58 +0800 Subject: x86/entry/traps: Show unhandled signal for i386 in do_trap() Commit abd4f7505baf ("x86: i386-show-unhandled-signals-v3") did turn on the showing-unhandled-signal behaviour for i386 for some exception handlers, but for no reason do_trap() is left out (my naive guess is because turning it on for do_trap() would be too noisy since do_trap() is shared by several exceptions). And since the same commit make "show_unhandled_signals" a debug tunable(in /proc/sys/debug/exception-trace), and x86 by default turning it on. So it would be strange for i386 users who turing it on manually and expect seeing the unhandled signal output in log, but nothing. This patch turns it on for i386 in do_trap() as well. Signed-off-by: Jianyu Zhan Reviewed-by: Jan Beulich Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@suse.de Cc: dave.hansen@linux.intel.com Cc: heukelum@fastmail.fm Cc: jbeulich@novell.com Cc: jdike@addtoit.com Cc: joe@perches.com Cc: luto@kernel.org Link: http://lkml.kernel.org/r/1457612398-4568-1-git-send-email-nasa4836@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5fae2d840e64..1baf08187ddf 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -248,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; -#ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", @@ -257,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, print_vma_addr(" in ", regs->ip); pr_cont("\n"); } -#endif force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } -- cgit v1.2.3 From d05004944206cbbf1c453e179768163731c7c6f1 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 10 Mar 2016 19:38:18 -0800 Subject: x86/cpufeature: Enable new AVX-512 features A few new AVX-512 instruction groups/features are added in cpufeatures.h for enuermation: AVX512DQ, AVX512BW, and AVX512VL. Clear the flags in fpu__xstate_clear_all_cpu_caps(). The specification for latest AVX-512 including the features can be found at: https://software.intel.com/sites/default/files/managed/07/b7/319433-023.pdf Note, I didn't enable the flags in KVM. Hopefully the KVM guys can pick up the flags and enable them in KVM. Signed-off-by: Fenghua Yu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: Denys Vlasenko Cc: Gleb Natapov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V Shankar Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1457667498-37357-1-git-send-email-fenghua.yu@intel.com [ Added more detailed feature descriptions. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d425cda5ae6d..6e8354f5a593 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512PF); setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512CD); + setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); + setup_clear_cpu_cap(X86_FEATURE_AVX512BW); + setup_clear_cpu_cap(X86_FEATURE_AVX512VL); setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); } -- cgit v1.2.3