summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2017-08-17 15:00:37 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-01-05 15:44:25 +0100
commit0731188fc74cc2237975a2b5bedd36e2463ef10b (patch)
tree173ba3edfd505193360d2ec9ba36071d1f972880 /arch/x86/mm
parenteb82151d0b1df53d1ad8d060ecd554ca12eb552a (diff)
kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user
We have many machines (Westmere, Sandybridge, Ivybridge) supporting PCID but not INVPCID: on these load_new_mm_cr3() simply crashed. Flushing user context inside load_new_mm_cr3() without the use of invpcid is difficult: momentarily switch from kernel to user context and back to do so? I'm not sure whether that can be safely done at all, and would risk polluting user context with kernel internals, and kernel context with stale user externals. Instead, follow the hint in the comment that was there: change X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3() can leave a note in it, for SWITCH_USER_CR3 on return to userspace to flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH. Which works well enough that there's no need to do it this way only when invpcid is unsupported: it's a good alternative to invpcid here. But there's a couple of inlines in asm/tlbflush.h that need to do the same trick, so it's best to localize all this per-cpu business in mm/kaiser.c: moving that part of the initialization from setup_pcid() to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit. I did try to make the feature tests in asm/tlbflush.h more consistent with each other: there seem to be far too many ways of performing such tests, and I don't have a good grasp of their differences. At first I converted them all to be static_cpu_has(): but that proved to be a mistake, as the comment in __native_flush_tlb_single() hints; so then I reversed and made them all this_cpu_has(). Probably all gratuitous change, but that's the way it's working at present. I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR gets re-initialized by each cpu (before and after these changes): no problem when (as usual) all cpus on a machine have the same features, but in principle incorrect. However, my experiment to per-cpu-ify that one did not end well... Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Jiri Kosina <jkosina@suse.cz> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/kaiser.c50
-rw-r--r--arch/x86/mm/tlb.c46
2 files changed, 62 insertions, 34 deletions
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
index 290a52e6017d..bf3ec221a90b 100644
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -12,12 +12,26 @@
#include <linux/ftrace.h>
#include <asm/kaiser.h>
+#include <asm/tlbflush.h> /* to verify its kaiser declarations */
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/desc.h>
+
#ifdef CONFIG_KAISER
+__visible
+DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+/*
+ * These can have bit 63 set, so we can not just use a plain "or"
+ * instruction to get their value or'd into CR3. It would take
+ * another register. So, we use a memory reference to these instead.
+ *
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
+DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
/*
* At runtime, the only things we map are some things for CPU
* hotplug, and stacks for new processes. No two CPUs will ever
@@ -239,9 +253,6 @@ static void __init kaiser_init_all_pgds(void)
WARN_ON(__ret); \
} while (0)
-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
-extern unsigned long X86_CR3_PCID_KERN_VAR;
-extern unsigned long X86_CR3_PCID_USER_VAR;
/*
* If anything in here fails, we will likely die on one of the
* first kernel->user transitions and init will die. But, we
@@ -295,8 +306,6 @@ void __init kaiser_init(void)
kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
__PAGE_KERNEL);
- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
- __PAGE_KERNEL);
}
/* Add a mapping to the shadow mapping, and synchronize the mappings */
@@ -361,4 +370,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
}
return pgd;
}
+
+void kaiser_setup_pcid(void)
+{
+ unsigned long kern_cr3 = 0;
+ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+
+ if (this_cpu_has(X86_FEATURE_PCID)) {
+ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
+ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+ }
+ /*
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+ X86_CR3_PCID_KERN_VAR = kern_cr3;
+ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
+}
+
+/*
+ * Make a note that this cpu will need to flush USER tlb on return to user.
+ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
+ * if cpu does not, then the NOFLUSH bit will never have been set.
+ */
+void kaiser_flush_tlb_on_return_to_user(void)
+{
+ this_cpu_write(X86_CR3_PCID_USER_VAR,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+}
+EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
#endif /* CONFIG_KAISER */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index aed0b704de3d..99a5d1b95527 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,13 +6,14 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-#include <linux/debugfs.h>
+#include <asm/kaiser.h>
/*
* TLB flushing, formerly SMP-only
@@ -38,34 +39,23 @@ static void load_new_mm_cr3(pgd_t *pgdir)
{
unsigned long new_mm_cr3 = __pa(pgdir);
- /*
- * KAISER, plus PCIDs needs some extra work here. But,
- * if either of features is not present, we need no
- * PCIDs here and just do a normal, full TLB flush with
- * the write_cr3()
- */
- if (!IS_ENABLED(CONFIG_KAISER) ||
- !cpu_feature_enabled(X86_FEATURE_PCID))
- goto out_set_cr3;
- /*
- * We reuse the same PCID for different tasks, so we must
- * flush all the entires for the PCID out when we change
- * tasks.
- */
- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
-
- /*
- * The flush from load_cr3() may leave old TLB entries
- * for userspace in place. We must flush that context
- * separately. We can theoretically delay doing this
- * until we actually load up the userspace CR3, but
- * that's a bit tricky. We have to have the "need to
- * flush userspace PCID" bit per-cpu and check it in the
- * exit-to-userspace paths.
- */
- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
+#ifdef CONFIG_KAISER
+ if (this_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+ * Flush KERN below, flush USER when returning to userspace in
+ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
+ *
+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
+ */
+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ kaiser_flush_tlb_on_return_to_user();
+ }
+#endif /* CONFIG_KAISER */
-out_set_cr3:
/*
* Caution: many callers of this function expect
* that load_cr3() is serializing and orders TLB