summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorJason Liu <jason.hui.liu@nxp.com>2022-09-26 12:56:22 -0500
committerJason Liu <jason.hui.liu@nxp.com>2022-09-26 12:56:22 -0500
commit2b53d4d52360709aebc699b18fde867a571b8c39 (patch)
treec2ff72465262fbd327e6e09361154c52047ef792 /arch/x86/kvm/x86.c
parent36363d8623ba60858e2632b7d2b70dae932c9a8b (diff)
parent3e98e33d345e981800e03dd29f6f6343286d30b6 (diff)
Merge tag 'v5.15.70' into lf-5.15.y
This is the 5.15.70 stable release * tag 'v5.15.70': (2444 commits) Linux 5.15.70 ALSA: hda/sigmatel: Fix unused variable warning for beep power change cgroup: Add missing cpus_read_lock() to cgroup_attach_task_all() ... Signed-off-by: Jason Liu <jason.hui.liu@nxp.com> Conflicts: arch/arm/boot/dts/imx6ul.dtsi arch/arm/mm/mmu.c arch/arm64/boot/dts/freescale/imx8mp-evk.dts drivers/gpu/drm/imx/dcss/dcss-kms.c drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c drivers/soc/fsl/Kconfig drivers/soc/imx/gpcv2.c drivers/usb/dwc3/host.c net/dsa/slave.c sound/soc/fsl/imx-card.c
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c170
1 files changed, 111 insertions, 59 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 39aaa21e28f7..11e73d02fb3a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -277,6 +277,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, nested_run),
STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
+ STATS_DESC_COUNTER(VCPU, preemption_reported),
+ STATS_DESC_COUNTER(VCPU, preemption_other),
STATS_DESC_ICOUNTER(VCPU, guest_mode)
};
@@ -1029,7 +1031,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
return false;
@@ -1037,9 +1039,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
return false;
- return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
+ return true;
+}
+EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
+
+static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+ return __kvm_is_valid_cr4(vcpu, cr4) &&
+ static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
}
-EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
{
@@ -1457,12 +1465,32 @@ static const u32 msr_based_features_all[] = {
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
static unsigned int num_msr_based_features;
+/*
+ * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
+ * does not yet virtualize. These include:
+ * 10 - MISC_PACKAGE_CTRLS
+ * 11 - ENERGY_FILTERING_CTL
+ * 12 - DOITM
+ * 18 - FB_CLEAR_CTRL
+ * 21 - XAPIC_DISABLE_STATUS
+ * 23 - OVERCLOCKING_STATUS
+ */
+
+#define KVM_SUPPORTED_ARCH_CAP \
+ (ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \
+ ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
+ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
+ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
+
static u64 kvm_get_arch_capabilities(void)
{
u64 data = 0;
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
+ data &= KVM_SUPPORTED_ARCH_CAP;
+ }
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@@ -1510,9 +1538,6 @@ static u64 kvm_get_arch_capabilities(void)
*/
}
- /* Guests don't need to know "Fill buffer clear control" exists */
- data &= ~ARCH_CAP_FB_CLEAR_CTRL;
-
return data;
}
@@ -3094,17 +3119,20 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* only 0 or all 1s can be written to IA32_MCi_CTL
* some Linux kernels though clear bit 10 in bank 4 to
* workaround a BIOS/GART TBL issue on AMD K8s, ignore
- * this to avoid an uncatched #GP in the guest
+ * this to avoid an uncatched #GP in the guest.
+ *
+ * UNIXWARE clears bit 0 of MC1_CTL to ignore
+ * correctable, single-bit ECC data errors.
*/
if ((offset & 0x3) == 0 &&
- data != 0 && (data | (1 << 10)) != ~(u64)0)
- return -1;
+ data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
+ return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
- return -1;
+ return 1;
}
vcpu->arch.mce_banks[offset] = data;
@@ -3236,6 +3264,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
+ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
u64 steal;
u32 version;
@@ -3253,13 +3282,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
slots = kvm_memslots(vcpu->kvm);
if (unlikely(slots->generation != ghc->generation ||
+ gpa != ghc->gpa ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
- gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
-
/* We rely on the fact that it fits in a single page. */
BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)
return;
}
@@ -4370,7 +4398,21 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED;
+ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
+ /*
+ * The vCPU can be marked preempted if and only if the VM-Exit was on
+ * an instruction boundary and will not trigger guest emulation of any
+ * kind (see vcpu_run). Vendor specific code controls (conservatively)
+ * when this is true, for example allowing the vCPU to be marked
+ * preempted if and only if the VM-Exit was due to a host interrupt.
+ */
+ if (!vcpu->arch.at_instruction_boundary) {
+ vcpu->stat.preemption_other++;
+ return;
+ }
+
+ vcpu->stat.preemption_reported++;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -4384,6 +4426,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
slots = kvm_memslots(vcpu->kvm);
if (unlikely(slots->generation != ghc->generation ||
+ gpa != ghc->gpa ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot))
return;
@@ -4400,19 +4443,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
int idx;
- if (vcpu->preempted && !vcpu->arch.guest_state_protected)
- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+ if (vcpu->preempted) {
+ if (!vcpu->arch.guest_state_protected)
+ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
- /*
- * Take the srcu lock as memslots will be accessed to check the gfn
- * cache generation against the memslots generation.
- */
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvm_xen_msr_enabled(vcpu->kvm))
- kvm_xen_runstate_set_preempted(vcpu);
- else
- kvm_steal_time_set_preempted(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ /*
+ * Take the srcu lock as memslots will be accessed to check the gfn
+ * cache generation against the memslots generation.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (kvm_xen_msr_enabled(vcpu->kvm))
+ kvm_xen_runstate_set_preempted(vcpu);
+ else
+ kvm_steal_time_set_preempted(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ }
static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
@@ -6894,15 +6939,8 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
exception, &write_emultor);
}
-#define CMPXCHG_TYPE(t, ptr, old, new) \
- (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
-
-#ifdef CONFIG_X86_64
-# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
-#else
-# define CMPXCHG64(ptr, old, new) \
- (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
-#endif
+#define emulator_try_cmpxchg_user(t, ptr, old, new) \
+ (__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t))
static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
@@ -6911,12 +6949,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
unsigned int bytes,
struct x86_exception *exception)
{
- struct kvm_host_map map;
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u64 page_line_mask;
+ unsigned long hva;
gpa_t gpa;
- char *kaddr;
- bool exchanged;
+ int r;
/* guests cmpxchg8b have to be emulated atomically */
if (bytes > 8 || (bytes & (bytes - 1)))
@@ -6940,31 +6977,32 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
goto emul_write;
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
+ hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa));
+ if (kvm_is_error_hva(hva))
goto emul_write;
- kaddr = map.hva + offset_in_page(gpa);
+ hva += offset_in_page(gpa);
switch (bytes) {
case 1:
- exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
+ r = emulator_try_cmpxchg_user(u8, hva, old, new);
break;
case 2:
- exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
+ r = emulator_try_cmpxchg_user(u16, hva, old, new);
break;
case 4:
- exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
+ r = emulator_try_cmpxchg_user(u32, hva, old, new);
break;
case 8:
- exchanged = CMPXCHG64(kaddr, old, new);
+ r = emulator_try_cmpxchg_user(u64, hva, old, new);
break;
default:
BUG();
}
- kvm_vcpu_unmap(vcpu, &map, true);
-
- if (!exchanged)
+ if (r < 0)
+ goto emul_write;
+ if (r)
return X86EMUL_CMPXCHG_FAILED;
kvm_page_track_write(vcpu, gpa, new, bytes);
@@ -8713,15 +8751,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
*/
static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
{
- struct kvm_lapic_irq lapic_irq;
-
- lapic_irq.shorthand = APIC_DEST_NOSHORT;
- lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
- lapic_irq.level = 0;
- lapic_irq.dest_id = apicid;
- lapic_irq.msi_redir_hint = false;
+ /*
+ * All other fields are unused for APIC_DM_REMRD, but may be consumed by
+ * common code, e.g. for tracing. Defer initialization to the compiler.
+ */
+ struct kvm_lapic_irq lapic_irq = {
+ .delivery_mode = APIC_DM_REMRD,
+ .dest_mode = APIC_DEST_PHYSICAL,
+ .shorthand = APIC_DEST_NOSHORT,
+ .dest_id = apicid,
+ };
- lapic_irq.delivery_mode = APIC_DM_REMRD;
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
@@ -9517,6 +9557,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}
+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+ static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
@@ -9939,6 +9984,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
+ /*
+ * If another guest vCPU requests a PV TLB flush in the middle
+ * of instruction emulation, the rest of the emulation could
+ * use a stale page translation. Assume that any code after
+ * this point can start executing an instruction.
+ */
+ vcpu->arch.at_instruction_boundary = false;
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
@@ -11552,7 +11604,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
if (slot->arch.rmap[i])
continue;
- slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
+ slot->arch.rmap[i] = __vcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
if (!slot->arch.rmap[i]) {
memslot_rmap_free(slot);
return -ENOMEM;
@@ -11633,7 +11685,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
lpages = __kvm_mmu_slot_lpages(slot, npages, level);
- linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
+ linfo = __vcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
if (!linfo)
goto out_free;
@@ -12177,9 +12229,9 @@ void kvm_arch_end_assignment(struct kvm *kvm)
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
+bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return atomic_read(&kvm->arch.assigned_device_count);
+ return arch_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);