
From: Hugh Dickins <hugh@veritas.com>




 25-akpm/mm/rmap.c |  147 ++++++++++++++++++++++++++++++++----------------------
 1 files changed, 89 insertions(+), 58 deletions(-)

diff -puN mm/rmap.c~page_convert_anon-locking-fix mm/rmap.c
--- 25/mm/rmap.c~page_convert_anon-locking-fix	Fri Mar 28 14:25:55 2003
+++ 25-akpm/mm/rmap.c	Fri Mar 28 14:25:55 2003
@@ -784,106 +784,137 @@ int page_convert_anon(struct page *page)
 	struct vm_area_struct *vma;
 	struct pte_chain *pte_chain = NULL, *ptec;
 	pte_t *pte;
-	pte_addr_t pte_paddr;
+	pte_addr_t pte_paddr = 0;
 	int mapcount;
-	int index = 0;
+	int ptecount;
+	int index = 1;
 	int err = 0;
 
+	down(&mapping->i_shared_sem);
+	pte_chain_lock(page);
+
+	/*
+	 * Has someone else done it for us before we got the lock?
+	 * If so, pte.direct or pte.chain has replaced pte.mapcount.
+	 */
 	if (PageAnon(page))
-		goto out;
+		goto out_unlock;
 
-retry:
 	/*
 	 * Preallocate the pte_chains outside the lock.
+	 * If mapcount grows while we're allocating here, retry.
+	 * If mapcount shrinks, we free the excess before returning.
 	 */
 	mapcount = page->pte.mapcount;
-	if (mapcount > 1) {
+	while (index < mapcount) {
+		pte_chain_unlock(page);
+		up(&mapping->i_shared_sem);
 		for (; index < mapcount; index += NRPTE) {
+			if (index == 1)
+				index = 0;
 			ptec = pte_chain_alloc(GFP_KERNEL);
 			if (!ptec) {
-				while (pte_chain) {
-					ptec = pte_chain->next;
-					pte_chain_free(pte_chain);
-					pte_chain = ptec;
-				}
 				err = -ENOMEM;
-				goto out;
+				goto out_free;
 			}
 			ptec->next = pte_chain;
 			pte_chain = ptec;
 		}
+		down(&mapping->i_shared_sem);
+		pte_chain_lock(page);
+		/*
+		 * Has someone else done it while we were allocating?
+		 */
+		if (PageAnon(page))
+			goto out_unlock;
+		mapcount = page->pte.mapcount;
 	}
-	down(&mapping->i_shared_sem);
-	pte_chain_lock(page);
+	if (!mapcount)
+		goto set_anon;
 
+again:
 	/*
-	 * Check to make sure the number of mappings didn't change.  If they
-	 * did, either retry or free enough pte_chains to compensate.
+	 * We don't try for page_table_lock (what would we do when a
+	 * trylock fails?), therefore there's a small chance that we
+	 * catch a vma just as it's being unmapped and its page tables
+	 * freed.  Our pte_chain_lock prevents that on vmas that really
+	 * contain our page, but not on the others we look at.  So we
+	 * might locate junk that looks just like our page's pfn.  It's
+	 * a transient and very unlikely occurrence (much less likely
+	 * than a trylock failing), so check how many maps we find,
+	 * and if too many, start all over again.
 	 */
-	if (mapcount < page->pte.mapcount) {
-		pte_chain_unlock(page);
-		up(&mapping->i_shared_sem);
-		goto retry;
-	} else if ((mapcount > page->pte.mapcount) && (mapcount > 1)) {
-		mapcount = page->pte.mapcount;
-		while ((index - NRPTE) > mapcount) {
-			index -= NRPTE;
-			ptec = pte_chain->next;
-			pte_chain_free(pte_chain);
-			pte_chain = ptec;
-		}
-		if (mapcount <= 1)
-			pte_chain_free(pte_chain);
-	}
-	SetPageAnon(page);
+	ptecount = 0;
+	ptec = pte_chain;
 
-	if (mapcount == 0)
-		goto out_unlock;
-	else if (mapcount == 1) {
-		SetPageDirect(page);
-		page->pte.direct = 0;
-	} else
-		page->pte.chain = pte_chain;
+	/*
+	 * Arrange for the first pte_chain to be partially filled at
+	 * the top, and the last (and any intermediates) to be full.
+	 */
+	index = mapcount % NRPTE;
+	if (index)
+		index = NRPTE - index;
 
-	index = NRPTE-1;
 	list_for_each_entry(vma, &mapping->i_mmap, shared) {
-		/* FIXME: unsafe without page_table_lock */
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
+			ptecount++;
+			if (unlikely(ptecount > mapcount))
+				goto again;
 			pte_paddr = ptep_to_paddr(pte);
 			pte_unmap(pte);
-			if (PageDirect(page)) {
-				page->pte.direct = pte_paddr;
-				goto out_unlock;
-			}
-			pte_chain->ptes[index] = pte_paddr;
-			if (!--index) {
-				pte_chain = pte_chain->next;
-				index = NRPTE-1;
+			if (ptec) {
+				ptec->ptes[index] = pte_paddr;
+				index++;
+				if (index == NRPTE) {
+					ptec = ptec->next;
+					index = 0;
+				}
 			}
 		}
 	}
 	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
-		/* FIXME: unsafe without page_table_lock */
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
+			ptecount++;
+			if (unlikely(ptecount > mapcount))
+				goto again;
 			pte_paddr = ptep_to_paddr(pte);
 			pte_unmap(pte);
-			if (PageDirect(page)) {
-				page->pte.direct = pte_paddr;
-				goto out_unlock;
-			}
-			pte_chain->ptes[index] = pte_paddr;
-			if (!--index) {
-				pte_chain = pte_chain->next;
-				index = NRPTE-1;
+			if (ptec) {
+				ptec->ptes[index] = pte_paddr;
+				index++;
+				if (index == NRPTE) {
+					ptec = ptec->next;
+					index = 0;
+				}
 			}
 		}
 	}
+
+	BUG_ON(ptecount != mapcount);
+	if (mapcount == 1) {
+		SetPageDirect(page);
+		page->pte.direct = pte_paddr;
+		/* If pte_chain is set, it's all excess to be freed */
+	} else {
+		page->pte.chain = pte_chain;
+		/* Point pte_chain to any excess to be freed */
+		pte_chain = ptec;
+		BUG_ON(index);
+	}
+
+set_anon:
+	SetPageAnon(page);
 out_unlock:
 	pte_chain_unlock(page);
 	up(&mapping->i_shared_sem);
-out:
+out_free:
+	while (pte_chain) {
+		ptec = pte_chain->next;
+		pte_chain_free(pte_chain);
+		pte_chain = ptec;
+	}
 	return err;
 }
 

_
