summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/efi.h6
-rw-r--r--arch/x86/include/asm/xen/interface.h4
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c48
-rw-r--r--arch/x86/kernel/cpu/perf_event_knc.c93
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c127
-rw-r--r--arch/x86/kernel/e820.c3
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/setup.c27
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/kernel/uprobes.c16
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/mm/init.c58
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--arch/x86/platform/efi/efi.c47
-rw-r--r--arch/x86/platform/efi/efi_64.c7
-rw-r--r--arch/x86/xen/enlighten.c2
22 files changed, 348 insertions, 134 deletions
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index c9dcc181d4d1..6e8fdf5ad113 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -35,7 +35,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
efi_call_virt(f, a1, a2, a3, a4, a5, a6)
-#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
+#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
#else /* !CONFIG_X86_32 */
@@ -89,7 +89,7 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
- u32 type);
+ u32 type, u64 attribute);
#endif /* CONFIG_X86_32 */
@@ -98,6 +98,8 @@ extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
+extern void efi_unmap_memmap(void);
+extern void efi_memory_uc(u64 addr, unsigned long size);
#ifndef CONFIG_EFI
/*
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 6d2f75a82a14..54d52ff1304a 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -51,14 +51,14 @@
* with Xen so that on ARM we can have one ABI that works for 32 and 64
* bit guests. */
typedef unsigned long xen_pfn_t;
+#define PRI_xen_pfn "lx"
typedef unsigned long xen_ulong_t;
+#define PRI_xen_ulong "lx"
/* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_GUEST_HANDLE(uint, unsigned int);
-__DEFINE_GUEST_HANDLE(ulong, unsigned long);
DEFINE_GUEST_HANDLE(char);
DEFINE_GUEST_HANDLE(int);
-DEFINE_GUEST_HANDLE(long);
DEFINE_GUEST_HANDLE(void);
DEFINE_GUEST_HANDLE(uint64_t);
DEFINE_GUEST_HANDLE(uint32_t);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c265593ec2cd..1817fa911024 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2257,6 +2257,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
continue;
cfg = irq_cfg(irq);
+ if (!cfg)
+ continue;
+
raw_spin_lock(&desc->lock);
/*
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3373f84d1397..4a3374e61a93 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -208,12 +208,14 @@ static bool check_hw_exists(void)
}
/*
- * Now write a value and read it back to see if it matches,
- * this is needed to detect certain hardware emulators (qemu/kvm)
- * that don't trap on the MSR access and always return 0s.
+ * Read the current value, change it and read it back to see if it
+ * matches, this is needed to detect certain hardware emulators
+ * (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
- val = 0xabcdUL;
reg = x86_pmu_event_addr(0);
+ if (rdmsrl_safe(reg, &val))
+ goto msr_fail;
+ val ^= 0xffffUL;
ret = wrmsrl_safe(reg, val);
ret |= rdmsrl_safe(reg, &val_new);
if (ret || val != val_new)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 99d96a4978b5..3cf3d97cce3a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -118,22 +118,24 @@ static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
- u32 config;
+ u32 config = 0;
- pci_read_config_dword(pdev, box_ctl, &config);
- config |= SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
}
static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
- u32 config;
+ u32 config = 0;
- pci_read_config_dword(pdev, box_ctl, &config);
- config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
}
static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
@@ -156,7 +158,7 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
- u64 count;
+ u64 count = 0;
pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
@@ -603,11 +605,12 @@ static struct pci_driver snbep_uncore_pci_driver = {
/*
* build pci bus to socket mapping
*/
-static void snbep_pci2phy_map_init(void)
+static int snbep_pci2phy_map_init(void)
{
struct pci_dev *ubox_dev = NULL;
int i, bus, nodeid;
- u32 config;
+ int err = 0;
+ u32 config = 0;
while (1) {
/* find the UBOX device */
@@ -618,10 +621,14 @@ static void snbep_pci2phy_map_init(void)
break;
bus = ubox_dev->bus->number;
/* get the Node ID of the local register */
- pci_read_config_dword(ubox_dev, 0x40, &config);
+ err = pci_read_config_dword(ubox_dev, 0x40, &config);
+ if (err)
+ break;
nodeid = config;
/* get the Node ID mapping */
- pci_read_config_dword(ubox_dev, 0x54, &config);
+ err = pci_read_config_dword(ubox_dev, 0x54, &config);
+ if (err)
+ break;
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
@@ -633,7 +640,11 @@ static void snbep_pci2phy_map_init(void)
}
}
};
- return;
+
+ if (ubox_dev)
+ pci_dev_put(ubox_dev);
+
+ return err ? pcibios_err_to_errno(err) : 0;
}
/* end of Sandy Bridge-EP uncore support */
@@ -1547,7 +1558,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int port;
/* adjust the main event selector and extra register index */
if (reg1->idx % 2) {
@@ -1559,7 +1569,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
}
/* adjust extra register config */
- port = reg1->idx / 6 + box->pmu->pmu_idx * 4;
switch (reg1->idx % 6) {
case 2:
/* shift the 8~15 bits to the 0~7 bits */
@@ -2578,9 +2587,11 @@ static int __init uncore_pci_init(void)
switch (boot_cpu_data.x86_model) {
case 45: /* Sandy Bridge-EP */
+ ret = snbep_pci2phy_map_init();
+ if (ret)
+ return ret;
pci_uncores = snbep_pci_uncores;
uncore_pci_driver = &snbep_uncore_pci_driver;
- snbep_pci2phy_map_init();
break;
default:
return 0;
@@ -2926,6 +2937,9 @@ static int __init intel_uncore_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
+ if (cpu_has_hypervisor)
+ return -ENODEV;
+
ret = uncore_pci_init();
if (ret)
goto fail;
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
index 7c46bfdbc373..4b7731bf23a8 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -3,6 +3,8 @@
#include <linux/perf_event.h>
#include <linux/types.h>
+#include <asm/hardirq.h>
+
#include "perf_event.h"
static const u64 knc_perfmon_event_map[] =
@@ -173,30 +175,100 @@ static void knc_pmu_enable_all(int added)
static inline void
knc_pmu_disable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
static void knc_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
}
+static inline u64 knc_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int handled = 0;
+ int bit, loops;
+ u64 status;
+
+ cpuc = &__get_cpu_var(cpu_hw_events);
+
+ knc_pmu_disable_all();
+
+ status = knc_pmu_get_status();
+ if (!status) {
+ knc_pmu_enable_all(0);
+ return handled;
+ }
+
+ loops = 0;
+again:
+ knc_pmu_ack_status(status);
+ if (++loops > 100) {
+ WARN_ONCE(1, "perf: irq loop stuck!\n");
+ perf_event_print_debug();
+ goto done;
+ }
+
+ inc_irq_stat(apic_perf_irqs);
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ if (!intel_pmu_save_and_restart(event))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = knc_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ knc_pmu_enable_all(0);
+
+ return handled;
+}
+
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -214,7 +286,7 @@ static struct attribute *intel_knc_formats_attr[] = {
static __initconst struct x86_pmu knc_pmu = {
.name = "knc",
- .handle_irq = x86_pmu_handle_irq,
+ .handle_irq = knc_pmu_handle_irq,
.disable_all = knc_pmu_disable_all,
.enable_all = knc_pmu_enable_all,
.enable = knc_pmu_enable_event,
@@ -226,12 +298,11 @@ static __initconst struct x86_pmu knc_pmu = {
.event_map = knc_pmu_event_map,
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
.apic = 1,
- .max_period = (1ULL << 31) - 1,
+ .max_period = (1ULL << 39) - 1,
.version = 0,
.num_counters = 2,
- /* in theory 40 bits, early silicon is buggy though */
- .cntval_bits = 32,
- .cntval_mask = (1ULL << 32) - 1,
+ .cntval_bits = 40,
+ .cntval_mask = (1ULL << 40) - 1,
.get_event_constraints = x86_get_event_constraints,
.event_constraints = knc_event_constraints,
.format_attrs = intel_knc_formats_attr,
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index e4dd0f7a0453..7d0270bd793e 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -8,13 +8,106 @@
*/
static const u64 p6_perfmon_event_map[] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
- [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */
+
+};
+
+static __initconst u64 p6_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
};
static u64 p6_pmu_event_map(int hw_event)
@@ -34,7 +127,7 @@ static struct event_constraint p6_event_constraints[] =
{
INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
@@ -64,25 +157,25 @@ static void p6_pmu_enable_all(int added)
static inline void
p6_pmu_disable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val = P6_NOP_EVENT;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-
(void)wrmsrl_safe(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ /*
+ * p6 only has a global event enable, set on PerfEvtSel0
+ * We "disable" events by programming P6_NOP_EVENT
+ * and we rely on p6_pmu_enable_all() being called
+ * to actually enable the events.
+ */
(void)wrmsrl_safe(hwc->config_base, val);
}
@@ -158,5 +251,9 @@ __init int p6_pmu_init(void)
x86_pmu = p6_pmu;
+ memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+
return 0;
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ed858e9e9a74..df06ade26bef 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1077,6 +1077,9 @@ void __init memblock_x86_fill(void)
memblock_add(ei->addr, ei->size);
}
+ /* throw away partial pages */
+ memblock_trim_memory(PAGE_SIZE);
+
memblock_dump_all();
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a1193aef6d7d..88b725aa1d52 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1035,7 +1035,7 @@ ENTRY(xen_sysenter_target)
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
- pushl_cfi $0
+ pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
TRACE_IRQS_OFF
@@ -1077,14 +1077,16 @@ ENTRY(xen_failsafe_callback)
2: mov 8(%esp),%es
3: mov 12(%esp),%fs
4: mov 16(%esp),%gs
+ /* EAX == 0 => Category 1 (Bad segment)
+ EAX != 0 => Category 2 (Bad IRET) */
testl %eax,%eax
popl_cfi %eax
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
addl $16,%esp
- jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
-5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment)
+ jmp iret_exc
+5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
jmp ret_from_exception
CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0c58952d64e8..b51b2c7ee51f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1435,7 +1435,7 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq_cfi $0
+ pushq_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
jmp error_exit
CFI_ENDPROC
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b3e5e51bc907..4180a874c764 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -247,7 +247,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
+ rcu_irq_enter();
+ exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
+ rcu_irq_exit();
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 468e98dfd44e..ca45696f30fb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -921,18 +921,19 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
int i;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
+ unsigned long start, end;
+ unsigned long start_pfn, end_pfn;
- if (ei->addr + ei->size <= 1UL << 32)
- continue;
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
+ NULL) {
- if (ei->type == E820_RESERVED)
+ end = PFN_PHYS(end_pfn);
+ if (end <= (1UL<<32))
continue;
+ start = PFN_PHYS(start_pfn);
max_pfn_mapped = init_memory_mapping(
- ei->addr < 1UL << 32 ? 1UL << 32 : ei->addr,
- ei->addr + ei->size);
+ max((1UL<<32), start), end);
}
/* can we preseve max_low_pfn ?*/
@@ -1048,6 +1049,18 @@ void __init setup_arch(char **cmdline_p)
arch_init_ideal_nops();
register_refined_jiffies(CLOCK_TICK_RATE);
+
+#ifdef CONFIG_EFI
+ /* Once setup is done above, disable efi_enabled on mismatched
+ * firmware/kernel archtectures since there is no support for
+ * runtime services.
+ */
+ if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+ pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
+ efi_unmap_memmap();
+ efi_enabled = 0;
+ }
+#endif
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 29ad351804e9..70b27ee6118e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -824,10 +824,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
- if (thread_info_flags & _TIF_UPROBE) {
- clear_thread_flag(TIF_UPROBE);
+ if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
- }
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 9538f00827a9..aafa5557b396 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -651,31 +651,19 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
/*
* Skip these instructions as per the currently known x86 ISA.
- * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
+ * rep=0x66*; nop=0x90
*/
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
int i;
for (i = 0; i < MAX_UINSN_BYTES; i++) {
- if ((auprobe->insn[i] == 0x66))
+ if (auprobe->insn[i] == 0x66)
continue;
if (auprobe->insn[i] == 0x90)
return true;
- if (i == (MAX_UINSN_BYTES - 1))
- break;
-
- if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f))
- return true;
-
- if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19))
- return true;
-
- if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0))
- return true;
-
break;
}
return false;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c6e6b721b6ee..43e9fadca5d0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1311,7 +1311,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if (apic_x2apic_mode(apic)) {
u32 id = kvm_apic_id(apic);
- u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
+ u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
kvm_apic_set_ldr(apic, ldr);
}
apic->base_address = apic->vcpu->arch.apic_base &
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d289fee1ffb8..6f85fe0bf958 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2497,8 +2497,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
}
- if (!is_error_pfn(pfn))
- kvm_release_pfn_clean(pfn);
+ kvm_release_pfn_clean(pfn);
}
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ab1f6a93b527..d7aea41563b3 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -35,40 +35,44 @@ struct map_range {
unsigned page_size_mask;
};
-static void __init find_early_table_space(struct map_range *mr, unsigned long end,
- int use_pse, int use_gbpages)
+/*
+ * First calculate space needed for kernel direct mapping page tables to cover
+ * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB
+ * pages. Then find enough contiguous space for those page tables.
+ */
+static void __init find_early_table_space(struct map_range *mr, int nr_range)
{
- unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+ int i;
+ unsigned long puds = 0, pmds = 0, ptes = 0, tables;
+ unsigned long start = 0, good_end;
phys_addr_t base;
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
-
- if (use_gbpages) {
- unsigned long extra;
-
- extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
- pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+ for (i = 0; i < nr_range; i++) {
+ unsigned long range, extra;
- tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
+ range = mr[i].end - mr[i].start;
+ puds += (range + PUD_SIZE - 1) >> PUD_SHIFT;
- if (use_pse) {
- unsigned long extra;
+ if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) {
+ extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT);
+ pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+ } else {
+ pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT;
+ }
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+ if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) {
+ extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT);
#ifdef CONFIG_X86_32
- extra += PMD_SIZE;
+ extra += PMD_SIZE;
#endif
- /* The first 2/4M doesn't use large pages. */
- if (mr->start < PMD_SIZE)
- extra += mr->end - mr->start;
-
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ } else {
+ ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ }
+ }
+ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
#ifdef CONFIG_X86_32
@@ -86,7 +90,7 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
- end - 1, pgt_buf_start << PAGE_SHIFT,
+ mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT,
(pgt_buf_top << PAGE_SHIFT) - 1);
}
@@ -267,7 +271,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
* nodes are discovered.
*/
if (!after_bootmem)
- find_early_table_space(&mr[0], end, use_pse, use_gbpages);
+ find_early_table_space(mr, nr_range);
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2b6b4a3c8beb..3baff255adac 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -386,7 +386,8 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
* these mappings are more intelligent.
*/
if (pte_val(*pte)) {
- pages++;
+ if (!after_bootmem)
+ pages++;
continue;
}
@@ -451,6 +452,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_2M)) {
+ if (!after_bootmem)
+ pages++;
last_map_addr = next;
continue;
}
@@ -526,6 +529,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_1G)) {
+ if (!after_bootmem)
+ pages++;
last_map_addr = next;
continue;
}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 26b8a8514ee5..48768df2471a 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
val |= counter_config->extra;
event &= model->event_mask ? model->event_mask : 0xFF;
val |= event & 0xFF;
- val |= (event & 0x0F00) << 24;
+ val |= (u64)(event & 0x0F00) << 24;
return val;
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index aded2a91162a..ad4439145f85 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,11 +70,15 @@ EXPORT_SYMBOL(efi);
struct efi_memory_map memmap;
bool efi_64bit;
-static bool efi_native;
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
+static inline bool efi_is_native(void)
+{
+ return IS_ENABLED(CONFIG_X86_64) == efi_64bit;
+}
+
static int __init setup_noefi(char *arg)
{
efi_enabled = 0;
@@ -420,7 +424,7 @@ void __init efi_reserve_boot_services(void)
}
}
-static void __init efi_unmap_memmap(void)
+void __init efi_unmap_memmap(void)
{
if (memmap.map) {
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
@@ -432,7 +436,7 @@ void __init efi_free_boot_services(void)
{
void *p;
- if (!efi_native)
+ if (!efi_is_native())
return;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -684,12 +688,10 @@ void __init efi_init(void)
return;
}
efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
- efi_native = !efi_64bit;
#else
efi_phys.systab = (efi_system_table_t *)
(boot_params.efi_info.efi_systab |
((__u64)boot_params.efi_info.efi_systab_hi<<32));
- efi_native = efi_64bit;
#endif
if (efi_systab_init(efi_phys.systab)) {
@@ -723,7 +725,7 @@ void __init efi_init(void)
* that doesn't match the kernel 32/64-bit mode.
*/
- if (!efi_native)
+ if (!efi_is_native())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
else if (efi_runtime_init()) {
efi_enabled = 0;
@@ -735,7 +737,7 @@ void __init efi_init(void)
return;
}
#ifdef CONFIG_X86_32
- if (efi_native) {
+ if (efi_is_native()) {
x86_platform.get_wallclock = efi_get_time;
x86_platform.set_wallclock = efi_set_rtc_mmss;
}
@@ -810,6 +812,16 @@ void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
return NULL;
}
+void efi_memory_uc(u64 addr, unsigned long size)
+{
+ unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
+ u64 npages;
+
+ npages = round_up(size, page_shift) / page_shift;
+ memrange_efi_to_native(&addr, &npages);
+ set_memory_uc(addr, npages);
+}
+
/*
* This function will switch the EFI runtime services to virtual mode.
* Essentially, look through the EFI memmap and map every region that
@@ -823,7 +835,7 @@ void __init efi_enter_virtual_mode(void)
efi_memory_desc_t *md, *prev_md = NULL;
efi_status_t status;
unsigned long size;
- u64 end, systab, addr, npages, end_pfn;
+ u64 end, systab, end_pfn;
void *p, *va, *new_memmap = NULL;
int count = 0;
@@ -834,7 +846,7 @@ void __init efi_enter_virtual_mode(void)
* non-native EFI
*/
- if (!efi_native) {
+ if (!efi_is_native()) {
efi_unmap_memmap();
return;
}
@@ -879,10 +891,14 @@ void __init efi_enter_virtual_mode(void)
end_pfn = PFN_UP(end);
if (end_pfn <= max_low_pfn_mapped
|| (end_pfn > (1UL << (32 - PAGE_SHIFT))
- && end_pfn <= max_pfn_mapped))
+ && end_pfn <= max_pfn_mapped)) {
va = __va(md->phys_addr);
- else
- va = efi_ioremap(md->phys_addr, size, md->type);
+
+ if (!(md->attribute & EFI_MEMORY_WB))
+ efi_memory_uc((u64)(unsigned long)va, size);
+ } else
+ va = efi_ioremap(md->phys_addr, size,
+ md->type, md->attribute);
md->virt_addr = (u64) (unsigned long) va;
@@ -892,13 +908,6 @@ void __init efi_enter_virtual_mode(void)
continue;
}
- if (!(md->attribute & EFI_MEMORY_WB)) {
- addr = md->virt_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&addr, &npages);
- set_memory_uc(addr, npages);
- }
-
systab = (u64) (unsigned long) efi_phys.systab;
if (md->phys_addr <= systab && systab < end) {
systab += md->virt_addr - md->phys_addr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac3aa54e2654..95fd505dfeb6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -82,7 +82,7 @@ void __init efi_call_phys_epilog(void)
}
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
- u32 type)
+ u32 type, u64 attribute)
{
unsigned long last_map_pfn;
@@ -92,8 +92,11 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
unsigned long top = last_map_pfn << PAGE_SHIFT;
- efi_ioremap(top, size - (top - phys_addr), type);
+ efi_ioremap(top, size - (top - phys_addr), type, attribute);
}
+ if (!(attribute & EFI_MEMORY_WB))
+ efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
+
return (void __iomem *)__va(phys_addr);
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e3497f240eab..586d83812b67 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -81,8 +81,6 @@
#include "smp.h"
#include "multicalls.h"
-#include <xen/events.h>
-
EXPORT_SYMBOL_GPL(hypercall_page);
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);