
From: William Lee Irwin III <wli@holomorphy.com>

i386 does hardware interpretation of pagetables, so pte_clear() can't be
used on present ptes, as it sets the upper half of the hugepte prior to
setting the lower half (which includes the valid bit).  i.e.  there is a
window where having a hugepage mapped at 56GB and doing pte_clear() in
unmap_hugepage_range() allows other threads of the process to see a
hugepage at 0 in place of the original hugepage at 56GB.

This patch corrects the situation by using ptep_get_and_clear(), which
clears the lower word of the pte prior to clearing the upper word.

There is another nasty where huge_page_release() needs to wait for TLB
flushes before returning the hugepages to the free pool, analogous to the
issue tlb_remove_page() and tlb_flush_mm() repair.


---

 25-akpm/arch/i386/mm/hugetlbpage.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff -puN arch/i386/mm/hugetlbpage.c~i386-hugetlb-tlb-correction arch/i386/mm/hugetlbpage.c
--- 25/arch/i386/mm/hugetlbpage.c~i386-hugetlb-tlb-correction	2004-04-21 00:56:17.534242536 -0700
+++ 25-akpm/arch/i386/mm/hugetlbpage.c	2004-04-21 00:56:17.539241776 -0700
@@ -209,19 +209,18 @@ void unmap_hugepage_range(struct vm_area
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
-	pte_t *pte;
+	pte_t pte;
 	struct page *page;
 
 	BUG_ON(start & (HPAGE_SIZE - 1));
 	BUG_ON(end & (HPAGE_SIZE - 1));
 
 	for (address = start; address < end; address += HPAGE_SIZE) {
-		pte = huge_pte_offset(mm, address);
-		if (pte_none(*pte))
+		pte = ptep_get_and_clear(huge_pte_offset(mm, address));
+		if (pte_none(pte))
 			continue;
-		page = pte_page(*pte);
+		page = pte_page(pte);
 		huge_page_release(page);
-		pte_clear(pte);
 	}
 	mm->rss -= (end - start) >> PAGE_SHIFT;
 	flush_tlb_range(vma, start, end);

_
