[PATCH 43/50] KVM: MMU: Simplify page table walker

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Simplify the walker level loop not to carry so much information from one
loop to the next.  In addition to being complex, this made kmap_atomic()
critical sections difficult to manage.

As a result of this change, kmap_atomic() sections are limited to actually
touching the guest pte, which allows the other functions called from the
walker to do sleepy operations.  This will happen when we enable swapping.

Signed-off-by: Avi Kivity <[email protected]>
---
 drivers/kvm/paging_tmpl.h |  124 +++++++++++++++++---------------------------
 1 files changed, 48 insertions(+), 76 deletions(-)

diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index a9e687b..bab1b7f 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -59,32 +59,12 @@
 struct guest_walker {
 	int level;
 	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
-	pt_element_t *table;
 	pt_element_t pte;
-	pt_element_t *ptep;
-	struct page *page;
-	int index;
 	pt_element_t inherited_ar;
 	gfn_t gfn;
 	u32 error_code;
 };
 
-static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
-				    int write_fault,
-				    pt_element_t *ptep,
-				    gfn_t table_gfn)
-{
-	gpa_t pte_gpa;
-
-	if (write_fault && !is_dirty_pte(*ptep)) {
-		mark_page_dirty(vcpu->kvm, table_gfn);
-		*ptep |= PT_DIRTY_MASK;
-		pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
-		pte_gpa += offset_in_page(ptep);
-		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
-	}
-}
-
 /*
  * Fetch a guest pte for a guest virtual address
  */
@@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 {
 	hpa_t hpa;
 	struct kvm_memory_slot *slot;
-	pt_element_t *ptep;
-	pt_element_t root;
+	struct page *page;
+	pt_element_t *table;
+	pt_element_t pte;
 	gfn_t table_gfn;
+	unsigned index;
+	gpa_t pte_gpa;
 
 	pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
 	walker->level = vcpu->mmu.root_level;
-	walker->table = NULL;
-	walker->page = NULL;
-	walker->ptep = NULL;
-	root = vcpu->cr3;
+	pte = vcpu->cr3;
 #if PTTYPE == 64
 	if (!is_long_mode(vcpu)) {
-		walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
-		root = *walker->ptep;
-		walker->pte = root;
-		if (!(root & PT_PRESENT_MASK))
+		pte = vcpu->pdptrs[(addr >> 30) & 3];
+		if (!is_present_pte(pte))
 			goto not_present;
 		--walker->level;
 	}
 #endif
-	table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
-	walker->table_gfn[walker->level - 1] = table_gfn;
-	pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
-		 walker->level - 1, table_gfn);
-	slot = gfn_to_memslot(vcpu->kvm, table_gfn);
-	hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK);
-	walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
-	walker->table = kmap_atomic(walker->page, KM_USER0);
-
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
 
 	walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
 
 	for (;;) {
-		int index = PT_INDEX(addr, walker->level);
-		hpa_t paddr;
+		index = PT_INDEX(addr, walker->level);
 
-		ptep = &walker->table[index];
-		walker->index = index;
-		ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
-		       ((unsigned long)ptep & PAGE_MASK));
+		table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+		walker->table_gfn[walker->level - 1] = table_gfn;
+		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+			 walker->level - 1, table_gfn);
+
+		slot = gfn_to_memslot(vcpu->kvm, table_gfn);
+		hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK);
+		page = pfn_to_page(hpa >> PAGE_SHIFT);
 
-		if (!is_present_pte(*ptep))
+		table = kmap_atomic(page, KM_USER0);
+		pte = table[index];
+		kunmap_atomic(table, KM_USER0);
+
+		if (!is_present_pte(pte))
 			goto not_present;
 
-		if (write_fault && !is_writeble_pte(*ptep))
+		if (write_fault && !is_writeble_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
 
-		if (user_fault && !(*ptep & PT_USER_MASK))
+		if (user_fault && !(pte & PT_USER_MASK))
 			goto access_error;
 
 #if PTTYPE == 64
-		if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
+		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
 			goto access_error;
 #endif
 
-		if (!(*ptep & PT_ACCESSED_MASK)) {
+		if (!(pte & PT_ACCESSED_MASK)) {
 			mark_page_dirty(vcpu->kvm, table_gfn);
-			*ptep |= PT_ACCESSED_MASK;
+			pte |= PT_ACCESSED_MASK;
+			table = kmap_atomic(page, KM_USER0);
+			table[index] = pte;
+			kunmap_atomic(table, KM_USER0);
 		}
 
 		if (walker->level == PT_PAGE_TABLE_LEVEL) {
-			walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
-				>> PAGE_SHIFT;
-			FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
-						table_gfn);
+			walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
 			break;
 		}
 
 		if (walker->level == PT_DIRECTORY_LEVEL
-		    && (*ptep & PT_PAGE_SIZE_MASK)
+		    && (pte & PT_PAGE_SIZE_MASK)
 		    && (PTTYPE == 64 || is_pse(vcpu))) {
-			walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
+			walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK)
 				>> PAGE_SHIFT;
 			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
-			FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
-						table_gfn);
 			break;
 		}
 
-		walker->inherited_ar &= walker->table[index];
-		table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
-		kunmap_atomic(walker->table, KM_USER0);
-		paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT);
-		walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
-		walker->table = kmap_atomic(walker->page, KM_USER0);
+		walker->inherited_ar &= pte;
 		--walker->level;
-		walker->table_gfn[walker->level - 1] = table_gfn;
-		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
-			 walker->level - 1, table_gfn);
 	}
-	walker->pte = *ptep;
-	if (walker->page)
-		walker->ptep = NULL;
-	if (walker->table)
-		kunmap_atomic(walker->table, KM_USER0);
-	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
+
+	if (write_fault && !is_dirty_pte(pte)) {
+		mark_page_dirty(vcpu->kvm, table_gfn);
+		pte |= PT_DIRTY_MASK;
+		table = kmap_atomic(page, KM_USER0);
+		table[index] = pte;
+		kunmap_atomic(table, KM_USER0);
+		pte_gpa = table_gfn << PAGE_SHIFT;
+		pte_gpa += index * sizeof(pt_element_t);
+		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+	}
+
+	walker->pte = pte;
+	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
 	return 1;
 
 not_present:
@@ -209,8 +183,6 @@ err:
 		walker->error_code |= PFERR_USER_MASK;
 	if (fetch_fault)
 		walker->error_code |= PFERR_FETCH_MASK;
-	if (walker->table)
-		kunmap_atomic(walker->table, KM_USER0);
 	return 0;
 }
 
-- 
1.5.3.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux