[edk2] [PATCH 1/3] KVM: MTRR: fix memory type handling if MTRR is completely disabled

Discussion:

Alex Williamson

2015-07-16 04:10:18 UTC

Currently code uses default memory type if MTRR is fully disabled,
fix it by using UC instead
---

Seems to work for me. I don't see a 0th patch, but for the series:

Tested-by: Alex Williamson <***@redhat.com>

Thanks!

arch/x86/kvm/mtrr.c | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index de1d2d8..e275013 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -120,6 +120,16 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
}
+static u8 mtrr_disabled_type(void)
+{
+ /*
+ * Intel SDM 11.11.2.2: all MTRRs are disabled when
+ * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
+ * memory type is applied to all of physical memory.
+ */
+ return MTRR_TYPE_UNCACHABLE;
+}
+
/*
* - segment, it indicates the address segments covered by fixed MTRRs.
@@ -434,6 +444,8 @@ struct mtrr_iter {
/* output fields. */
int mem_type;
+ /* mtrr is completely disabled? */
+ bool mtrr_disabled;
/* [start, end) is not fully covered in MTRRs? */
bool partial_map;
@@ -549,7 +561,7 @@ static void mtrr_lookup_var_next(struct mtrr_iter *iter)
static void mtrr_lookup_start(struct mtrr_iter *iter)
{
if (!mtrr_is_enabled(iter->mtrr_state)) {
- iter->partial_map = true;
+ iter->mtrr_disabled = true;
return;
}
@@ -563,6 +575,7 @@ static void mtrr_lookup_init(struct mtrr_iter *iter,
iter->mtrr_state = mtrr_state;
iter->start = start;
iter->end = end;
+ iter->mtrr_disabled = false;
iter->partial_map = false;
iter->fixed = false;
iter->range = NULL;
@@ -656,6 +669,9 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
return MTRR_TYPE_WRBACK;
}
+ if (iter.mtrr_disabled)
+ return mtrr_disabled_type();
+
/* It is not covered by MTRRs. */
if (iter.partial_map) {
/*
@@ -689,6 +705,9 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
return false;
}
+ if (iter.mtrr_disabled)
+ return true;
+
if (!iter.partial_map)
return true;

Paolo Bonzini

2015-07-23 05:56:01 UTC

Permalink

Current firmware depends on WB to fast boot, please refer to
https://lkml.org/lkml/2015/7/12/115
Let's us WB if CR0.CD is set to make this kind of firmware happy
This quirk can be dropped by using KVM_ENABLE_CAP API with
KVM_CAP_DISABLE_QUIRKS if the broken firmware is gone

Your patch is actually *enabling* the quirk if KVM_QUIRK_CD_AS_WB is
included in the disabled quirks. I'm squashing in this change:

diff --cc arch/x86/kvm/x86.h
index edc8cdcd786b,ea99928d9d09..000000000000
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@@ -147,9 +147,9 @@@ static inline void kvm_register_writel(
return ktime_get_boot_ns();
}

-static inline bool kvm_check_disabled_quirks(struct kvm *kvm, u64 quirk)
+static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
{
- return !!(kvm->arch.disabled_quirks & quirk);
+ return !(kvm->arch.disabled_quirks & quirk);
}

void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0d77b20d628a..1c425443a41a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1595,7 +1595,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
for (i = 0; i < APIC_LVT_NUM; i++)
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
apic_update_lvtt(apic);
- if (!kvm_check_disabled_quirks(vcpu->kvm, KVM_QUIRK_LINT0_REENABLED))
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_QUIRK_LINT0_REENABLED))
apic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cac9ee6abea7..8cbec765b08d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1672,7 +1672,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* does not do it - this results in some delay at
* reboot
*/
- if (!kvm_check_disabled_quirks(vcpu->kvm, KVM_QUIRK_CD_NW_CLEARED))
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_QUIRK_CD_NW_CLEARED))
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7247891526ae..6b4890623146 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8650,7 +8650,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)

if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
ipat = VMX_EPT_IPAT_BIT;
- if (kvm_check_disabled_quirks(vcpu->kvm, KVM_QUIRK_CD_AS_WB))
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_QUIRK_CD_AS_WB))
cache = MTRR_TYPE_WRBACK;
else
cache = MTRR_TYPE_UNCACHABLE;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 549051784955..5ef2560075bf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3765,11 +3765,6 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return r;
}

-static void kvm_init_disabled_quirks(struct kvm *kvm)
-{
- kvm->arch.disabled_quirks = KVM_QUIRK_CD_AS_WB;
-}
-
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -7671,8 +7666,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);

- kvm_init_disabled_quirks(kvm);
-
return 0;
}

to avoid the double negation and invert the meaning of KVM_QUIRK_CD_AS_WB.

Paolo

------------------------------------------------------------------------------

Paolo Bonzini

2015-07-23 06:21:08 UTC

Permalink

Currently code uses default memory type if MTRR is fully disabled,
fix it by using UC instead
---

In fact this is the same quirk already implemented for SVM as
KVM_QUIRK_CD_NW_CLEARED, so we can reuse the bit.

Paolo

------------------------------------------------------------------------------

Paolo Bonzini

2015-07-23 07:18:55 UTC

Permalink

Post by Paolo Bonzini
In fact this is the same quirk already implemented for SVM as
KVM_QUIRK_CD_NW_CLEARED, so we can reuse the bit.

Sounds good to me. I will drop the new bit and reuse as your suggestion.
And i think we need document this whole staff in API.txt ...

No problem, I've already pushed it to kvm/queue. I'm running tests now
and will send a pull request to Linus tomorrow if all goes well.

Paolo

------------------------------------------------------------------------------

Alex Williamson

2015-07-29 19:07:13 UTC

Permalink

Hi Paolo,

Something bad happened to this patch. This is the version I provided

kvm_mtrr_get_guest_memory_type never returns -1 which is implied
range), iter.partial_map must be true
Simplify the code to indicate this fact
---
arch/x86/kvm/mtrr.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e275013..9e8bf13 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -672,15 +672,16 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
if (iter.mtrr_disabled)
return mtrr_disabled_type();
- /* It is not covered by MTRRs. */
- if (iter.partial_map) {
- /*
- * We just check one page, partially covered by MTRRs is
- * impossible.
- */
- WARN_ON(type != -1);
- type = mtrr_default_type(mtrr_state);
- }
+ /* not contained in any MTRRs. */
+ if (type == -1)
+ return mtrr_default_type(mtrr_state);
+
+ /*
+ * We just check one page, partially covered by MTRRs is
+ * impossible.
+ */
+ WARN_ON(iter.partial_map);
+
return type;
}
EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);

This is the version that was committed for v4.2-rc4:

commit 3e5d2fdceda172554e681b68c853bf5d08205bbf
Author: Xiao Guangrong <***@intel.com>
Date: Thu Jul 16 03:25:55 2015 +0800

KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type

kvm_mtrr_get_guest_memory_type never returns -1 which is implied
in the current code since if @type = -1 (means no MTRR contains the
range), iter.partial_map must be true

Simplify the code to indicate this fact

Signed-off-by: Xiao Guangrong <***@intel.com>
Tested-by: Alex Williamson <***@redhat.com>
Signed-off-by: Paolo Bonzini <***@redhat.com>

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e275013..dc0a84a 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -672,15 +672,16 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
if (iter.mtrr_disabled)
return mtrr_disabled_type();

- /* It is not covered by MTRRs. */
- if (iter.partial_map) {
- /*
- * We just check one page, partially covered by MTRRs is
- * impossible.
- */
- WARN_ON(type != -1);
- type = mtrr_default_type(mtrr_state);
- }
+ /*
+ * We just check one page, partially covered by MTRRs is
+ * impossible.
+ */
+ WARN_ON(iter.partial_map);
+
+ /* not contained in any MTRRs. */
+ if (type == -1)
+ return mtrr_default_type(mtrr_state);
+
return type;
}
EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);

The WARN_ON() now comes before the type == -1 test and I hit that at
*very* high frequency when trying to test device assignment. Restoring
the ordering to what Xiao originally proposed resolves the problem.
Thanks,

Alex

------------------------------------------------------------------------------

Paolo Bonzini

2015-07-30 07:21:53 UTC

Permalink

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e275013..dc0a84a 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -672,15 +672,16 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
if (iter.mtrr_disabled)
return mtrr_disabled_type();
- /* It is not covered by MTRRs. */
- if (iter.partial_map) {
- /*
- * We just check one page, partially covered by MTRRs is
- * impossible.
- */
- WARN_ON(type != -1);
- type = mtrr_default_type(mtrr_state);
- }
+ /*
+ * We just check one page, partially covered by MTRRs is
+ * impossible.
+ */
+ WARN_ON(iter.partial_map);
+
+ /* not contained in any MTRRs. */
+ if (type == -1)
+ return mtrr_default_type(mtrr_state);
+
return type;
}
EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
The WARN_ON() now comes before the type == -1 test and I hit that at
*very* high frequency when trying to test device assignment. Restoring
the ordering to what Xiao originally proposed resolves the problem.
Thanks,

And I remember testing the change and seeing the warning. So, in short,
I screwed up. Probably I tested on one machine and pushed from another. :/

Paolo

------------------------------------------------------------------------------