

From Dave.  Crappy name.


 fs/exec.c                    |    1 
 include/asm-i386/mman.h      |    0 
 include/asm-ppc64/mman.h     |    0 
 include/linux/mm.h           |    1 
 include/linux/page-flags.h   |    5 
 include/linux/rmap-locking.h |    0 
 include/linux/swap.h         |    2 
 mm/filemap.c                 |    3 
 mm/fremap.c                  |   22 +-
 mm/memory.c                  |    8 
 mm/mmap.c                    |   16 +
 mm/page_alloc.c              |    2 
 mm/rmap.c                    |  404 ++++++++++++++++++++++++++++++++++++++++++-
 mm/swapfile.c                |  116 +++++++-----
 14 files changed, 520 insertions(+), 60 deletions(-)

diff -puN fs/exec.c~objrmap fs/exec.c
--- 25/fs/exec.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/fs/exec.c	2003-04-08 03:16:49.000000000 -0700
@@ -315,6 +315,7 @@ void put_dirty_page(struct task_struct *
 	lru_cache_add_active(page);
 	flush_dcache_page(page);
 	flush_page_to_ram(page);
+	SetPageAnon(page);
 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
 	pte_unmap(pte);
diff -puN include/asm-i386/mman.h~objrmap include/asm-i386/mman.h
diff -puN include/linux/mm.h~objrmap include/linux/mm.h
--- 25/include/linux/mm.h~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/include/linux/mm.h	2003-04-08 03:16:49.000000000 -0700
@@ -170,6 +170,7 @@ struct page {
 		struct pte_chain *chain;/* Reverse pte mapping pointer.
 					 * protected by PG_chainlock */
 		pte_addr_t direct;
+		int mapcount;
 	} pte;
 	unsigned long private;		/* mapping-private opaque data */
 
diff -puN include/linux/page-flags.h~objrmap include/linux/page-flags.h
--- 25/include/linux/page-flags.h~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/include/linux/page-flags.h	2003-04-08 03:16:49.000000000 -0700
@@ -74,6 +74,7 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_compound		19	/* Part of a compound page */
+#define PG_anon			20	/* Anonymous page */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -254,6 +255,10 @@ extern void get_full_page_state(struct p
 #define SetPageCompound(page)	set_bit(PG_compound, &(page)->flags)
 #define ClearPageCompound(page)	clear_bit(PG_compound, &(page)->flags)
 
+#define PageAnon(page)		test_bit(PG_anon, &(page)->flags)
+#define SetPageAnon(page)	set_bit(PG_anon, &(page)->flags)
+#define ClearPageAnon(page)	clear_bit(PG_anon, &(page)->flags)
+
 /*
  * The PageSwapCache predicate doesn't use a PG_flag at this time,
  * but it may again do so one day.
diff -puN mm/fremap.c~objrmap mm/fremap.c
--- 25/mm/fremap.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/mm/fremap.c	2003-04-08 03:16:49.000000000 -0700
@@ -60,10 +60,26 @@ int install_page(struct mm_struct *mm, s
 	pgd_t *pgd;
 	pmd_t *pmd;
 	struct pte_chain *pte_chain;
+	unsigned long pgidx;
 
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
 	if (!pte_chain)
 		goto err;
+
+	/*
+	 * Convert this page to anon for objrmap if it's nonlinear
+	 */
+	pgidx = (addr - vma->vm_start) >> PAGE_SHIFT;
+	pgidx += vma->vm_pgoff;
+	pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
+	if (!PageAnon(page) && (page->index != pgidx)) {
+		lock_page(page);
+		err = page_convert_anon(page);
+		unlock_page(page);
+		if (err < 0)
+			goto err_free;
+	}
+
 	pgd = pgd_offset(mm, addr);
 	spin_lock(&mm->page_table_lock);
 
@@ -86,12 +102,10 @@ int install_page(struct mm_struct *mm, s
 	if (flush)
 		flush_tlb_page(vma, addr);
 
-	spin_unlock(&mm->page_table_lock);
-	pte_chain_free(pte_chain);
-	return 0;
-
+	err = 0;
 err_unlock:
 	spin_unlock(&mm->page_table_lock);
+err_free:
 	pte_chain_free(pte_chain);
 err:
 	return err;
diff -puN mm/memory.c~objrmap mm/memory.c
--- 25/mm/memory.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/mm/memory.c	2003-04-08 03:16:49.000000000 -0700
@@ -1012,6 +1012,7 @@ static int do_wp_page(struct mm_struct *
 			++mm->rss;
 		page_remove_rmap(old_page, page_table);
 		break_cow(vma, new_page, address, page_table);
+		SetPageAnon(new_page);
 		pte_chain = page_add_rmap(new_page, page_table, pte_chain);
 		lru_cache_add_active(new_page);
 
@@ -1221,6 +1222,7 @@ static int do_swap_page(struct mm_struct
 	flush_page_to_ram(page);
 	flush_icache_page(vma, page);
 	set_pte(page_table, pte);
+	SetPageAnon(page);
 	pte_chain = page_add_rmap(page, page_table, pte_chain);
 
 	/* No need to invalidate - it was non-present before */
@@ -1287,6 +1289,7 @@ do_anonymous_page(struct mm_struct *mm, 
 		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 		lru_cache_add_active(page);
 		mark_page_accessed(page);
+		SetPageAnon(page);
 	}
 
 	set_pte(page_table, entry);
@@ -1346,6 +1349,10 @@ do_no_page(struct mm_struct *mm, struct 
 	if (!pte_chain)
 		goto oom;
 
+	/* See if nopage returned an anon page */
+	if (!new_page->mapping || PageSwapCache(new_page))
+		SetPageAnon(new_page);
+
 	/*
 	 * Should we do an early C-O-W break?
 	 */
@@ -1358,6 +1365,7 @@ do_no_page(struct mm_struct *mm, struct 
 		copy_user_highpage(page, new_page, address);
 		page_cache_release(new_page);
 		lru_cache_add_active(page);
+		SetPageAnon(page);
 		new_page = page;
 	}
 
diff -puN mm/mmap.c~objrmap mm/mmap.c
--- 25/mm/mmap.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/mm/mmap.c	2003-04-08 03:16:49.000000000 -0700
@@ -311,14 +311,26 @@ static inline void __vma_link_file(struc
 	if (file) {
 		struct inode * inode = file->f_dentry->d_inode;
 		struct address_space *mapping = inode->i_mapping;
+		struct list_head *vmlist, *vmhead;
 
 		if (vma->vm_flags & VM_DENYWRITE)
 			atomic_dec(&inode->i_writecount);
 
 		if (vma->vm_flags & VM_SHARED)
-			list_add_tail(&vma->shared, &mapping->i_mmap_shared);
+			vmhead = &mapping->i_mmap_shared;
 		else
-			list_add_tail(&vma->shared, &mapping->i_mmap);
+			vmhead = &mapping->i_mmap;
+
+		list_for_each(vmlist, &mapping->i_mmap_shared) {
+			struct vm_area_struct *vmtemp;
+			vmtemp = list_entry(vmlist, struct vm_area_struct, shared);
+			if (vmtemp->vm_pgoff >= vma->vm_pgoff)
+				break;
+		}
+		if (vmlist == vmhead)
+			list_add_tail(&vma->shared, vmlist);
+		else
+			list_add(&vma->shared, vmlist);
 	}
 }
 
diff -puN mm/page_alloc.c~objrmap mm/page_alloc.c
--- 25/mm/page_alloc.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/mm/page_alloc.c	2003-04-08 03:16:49.000000000 -0700
@@ -220,6 +220,8 @@ static inline void free_pages_check(cons
 		bad_page(function, page);
 	if (PageDirty(page))
 		ClearPageDirty(page);
+	if (PageAnon(page))
+		ClearPageAnon(page);
 }
 
 /*
diff -puN mm/rmap.c~objrmap mm/rmap.c
--- 25/mm/rmap.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/mm/rmap.c	2003-04-08 03:16:49.000000000 -0700
@@ -14,8 +14,8 @@
 /*
  * Locking:
  * - the page->pte.chain is protected by the PG_chainlock bit,
- *   which nests within the zone->lru_lock, then the
- *   mm->page_table_lock, and then the page lock.
+ *   which nests within the the mm->page_table_lock,
+ *   which nests within the page lock.
  * - because swapout locking is opposite to the locking order
  *   in the page fault path, the swapout path uses trylocks
  *   on the mm->page_table_lock
@@ -102,6 +102,140 @@ pte_chain_encode(struct pte_chain *pte_c
  **/
 
 /**
+ * find_pte - Find a pte pointer given a vma and a struct page.
+ * @vma: the vma to search
+ * @page: the page to find
+ *
+ * Determine if this page is mapped in this vma.  If it is, map and rethrn
+ * the pte pointer associated with it.  Return null if the page is not
+ * mapped in this vma for any reason.
+ *
+ * This is strictly an internal helper function for the object-based rmap
+ * functions.
+ * 
+ * It is the caller's responsibility to unmap the pte if it is returned.
+ */
+static inline pte_t *
+find_pte(struct vm_area_struct *vma, struct page *page, unsigned long *addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long loffset;
+	unsigned long address;
+
+	loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+	address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT);
+	if (address < vma->vm_start || address >= vma->vm_end)
+		goto out;
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pmd = pmd_offset(pgd, address);
+	if (!pmd_present(*pmd))
+		goto out;
+
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte))
+		goto out_unmap;
+
+	if (page_to_pfn(page) != pte_pfn(*pte))
+		goto out_unmap;
+
+	if (addr)
+		*addr = address;
+
+	return pte;
+
+out_unmap:
+	pte_unmap(pte);
+out:
+	return NULL;
+}
+
+/**
+ * page_referenced_obj_one - referenced check for object-based rmap
+ * @vma: the vma to look in.
+ * @page: the page we're working on.
+ *
+ * Find a pte entry for a page/vma pair, then check and clear the referenced
+ * bit.
+ *
+ * This is strictly a helper function for page_referenced_obj.
+ */
+static int
+page_referenced_obj_one(struct vm_area_struct *vma, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t *pte;
+	int referenced = 0;
+
+	if (!spin_trylock(&mm->page_table_lock))
+		return 1;
+
+	pte = find_pte(vma, page, NULL);
+	if (pte) {
+		if (ptep_test_and_clear_young(pte))
+			referenced++;
+		pte_unmap(pte);
+	}
+
+	spin_unlock(&mm->page_table_lock);
+	return referenced;
+}
+
+/**
+ * page_referenced_obj_one - referenced check for object-based rmap
+ * @page: the page we're checking references on.
+ *
+ * For an object-based mapped page, find all the places it is mapped and
+ * check/clear the referenced flag.  This is done by following the page->mapping
+ * pointer, then walking the chain of vmas it holds.  It returns the number
+ * of references it found.
+ *
+ * This function is only called from page_referenced for object-based pages.
+ *
+ * The semaphore address_space->i_shared_sem is tried.  If it can't be gotten,
+ * assume a reference count of 1.
+ */
+static int
+page_referenced_obj(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct vm_area_struct *vma;
+	int referenced = 0;
+
+	if (!page->pte.mapcount)
+		return 0;
+
+	if (!mapping)
+		BUG();
+
+	if (PageSwapCache(page))
+		BUG();
+
+	if (down_trylock(&mapping->i_shared_sem))
+		return 1;
+	
+	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
+			break;
+		referenced += page_referenced_obj_one(vma, page);
+	}
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
+			break;
+		referenced += page_referenced_obj_one(vma, page);
+	}
+	up(&mapping->i_shared_sem);
+
+	return referenced;
+}
+
+/**
  * page_referenced - test if the page was referenced
  * @page: the page to test
  *
@@ -120,6 +254,10 @@ int page_referenced(struct page * page)
 	if (TestClearPageReferenced(page))
 		referenced++;
 
+	if (!PageAnon(page)) {
+		referenced += page_referenced_obj(page);
+		goto out;
+	}
 	if (PageDirect(page)) {
 		pte_t *pte = rmap_ptep_map(page->pte.direct);
 		if (ptep_test_and_clear_young(pte))
@@ -153,6 +291,7 @@ int page_referenced(struct page * page)
 			__pte_chain_free(pc);
 		}
 	}
+out:
 	return referenced;
 }
 
@@ -175,6 +314,21 @@ page_add_rmap(struct page *page, pte_t *
 	if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
 		goto out;
 
+	/*
+	 * If this is an object-based page, just count it.  We can
+ 	 * find the mappings by walking the object vma chain for that object.
+	 */
+	if (!PageAnon(page)) {
+		if (!page->mapping)
+			BUG();
+		if (PageSwapCache(page))
+			BUG();
+		if (!page->pte.mapcount)
+			inc_page_state(nr_mapped);
+		page->pte.mapcount++;
+		goto out;
+	}
+
 	if (page->pte.direct == 0) {
 		page->pte.direct = pte_paddr;
 		SetPageDirect(page);
@@ -231,8 +385,25 @@ void page_remove_rmap(struct page *page,
 		goto out_unlock;
 
 	if (!page_mapped(page))
-		goto out_unlock;	/* remap_page_range() from a driver? */
+		goto out_unlock;
 
+	/*
+	 * If this is an object-based page, just uncount it.  We can
+	 * find the mappings by walking the object vma chain for that object.
+	 */
+	if (!PageAnon(page)) {
+		if (!page->mapping)
+			BUG();
+		if (PageSwapCache(page))
+			BUG();
+		if (!page->pte.mapcount)
+			BUG();
+		page->pte.mapcount--;
+		if (!page->pte.mapcount)
+			dec_page_state(nr_mapped);
+		goto out_unlock;
+	}
+  
 	if (PageDirect(page)) {
 		if (page->pte.direct == pte_paddr) {
 			page->pte.direct = 0;
@@ -279,6 +450,106 @@ out_unlock:
 }
 
 /**
+ * try_to_unmap_obj - unmap a page using the object-based rmap method
+ * @page: the page to unmap
+ *
+ * Determine whether a page is mapped in a given vma and unmap it if it's found.
+ *
+ * This function is strictly a helper function for try_to_unmap_obj.
+ */
+static inline int
+try_to_unmap_obj_one(struct vm_area_struct *vma, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte;
+	pte_t pteval;
+	int ret = SWAP_AGAIN;
+
+	if (!spin_trylock(&mm->page_table_lock))
+		return ret;
+
+	pte = find_pte(vma, page, &address);
+	if (!pte)
+		goto out;
+
+	if (vma->vm_flags & VM_LOCKED) {
+		ret =  SWAP_FAIL;
+		goto out_unmap;
+	}
+
+	flush_cache_page(vma, address);
+	pteval = ptep_get_and_clear(pte);
+	flush_tlb_page(vma, address);
+
+	if (pte_dirty(pteval))
+		set_page_dirty(page);
+
+	if (!page->pte.mapcount)
+		BUG();
+
+	mm->rss--;
+	page->pte.mapcount--;
+	page_cache_release(page);
+
+out_unmap:
+	pte_unmap(pte);
+
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
+/**
+ * try_to_unmap_obj - unmap a page using the object-based rmap method
+ * @page: the page to unmap
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the address_space struct it points to.
+ *
+ * This function is only called from try_to_unmap for object-based pages.
+ *
+ * The semaphore address_space->i_shared_sem is tried.  If it can't be gotten,
+ * return a temporary error.
+ */
+static int
+try_to_unmap_obj(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct vm_area_struct *vma;
+	int ret = SWAP_AGAIN;
+
+	if (!mapping)
+		BUG();
+
+	if (PageSwapCache(page))
+		BUG();
+
+	if (down_trylock(&mapping->i_shared_sem))
+		return ret;
+	
+	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
+			break;
+		ret = try_to_unmap_obj_one(vma, page);
+		if (ret == SWAP_FAIL || !page->pte.mapcount)
+			goto out;
+	}
+
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
+			break;
+		ret = try_to_unmap_obj_one(vma, page);
+		if (ret == SWAP_FAIL || !page->pte.mapcount)
+			goto out;
+	}
+
+out:
+	up(&mapping->i_shared_sem);
+	return ret;
+}
+
+/**
  * try_to_unmap_one - worker function for try_to_unmap
  * @page: page to unmap
  * @ptep: page table entry to unmap from page
@@ -287,9 +558,8 @@ out_unlock:
  * table entry mapping a page. Because locking order here is opposite
  * to the locking order used by the page fault path, we use trylocks.
  * Locking:
- *	zone->lru_lock			page_launder()
- *	    page lock			page_launder(), trylock
- *		pte_chain_lock		page_launder()
+ *	    page lock			shrink_list(), trylock
+ *		pte_chain_lock		shrink_list()
  *		    mm->page_table_lock	try_to_unmap_one(), trylock
  */
 static int FASTCALL(try_to_unmap_one(struct page *, pte_addr_t));
@@ -376,8 +646,8 @@ out_unlock:
  * @page: the page to get unmapped
  *
  * Tries to remove all the page table entries which are mapping this
- * page, used in the pageout path.  Caller must hold zone->lru_lock
- * and the page lock.  Return values are:
+ * page, used in the pageout path.  Caller must hold the page lock
+ * and its pte chain lock.  Return values are:
  *
  * SWAP_SUCCESS	- we succeeded in removing all mappings
  * SWAP_AGAIN	- we missed a trylock, try again later
@@ -398,6 +668,15 @@ int try_to_unmap(struct page * page)
 	if (!page->mapping)
 		BUG();
 
+	/*
+	 * If it's an object-based page, use the object vma chain to find all
+	 * the mappings.
+	 */
+	if (!PageAnon(page)) {
+		ret = try_to_unmap_obj(page);
+		goto out;
+	}
+
 	if (PageDirect(page)) {
 		ret = try_to_unmap_one(page, page->pte.direct);
 		if (ret == SWAP_SUCCESS) {
@@ -453,12 +732,119 @@ int try_to_unmap(struct page * page)
 		}
 	}
 out:
-	if (!page_mapped(page))
+	if (!page_mapped(page)) {
 		dec_page_state(nr_mapped);
+		ret = SWAP_SUCCESS;
+	}
 	return ret;
 }
 
 /**
+ * page_convert_anon - Convert an object-based mapped page to pte_chain-based.
+ * @page: the page to convert
+ *
+ * Find all the mappings for an object-based page and convert them
+ * to 'anonymous', ie create a pte_chain and store all the pte pointers there.
+ *
+ * This function takes the address_space->i_shared_sem, sets the PageAnon flag,
+ * then sets the mm->page_table_lock for each vma and calls page_add_rmap. This
+ * means there is a period when PageAnon is set, but still has some mappings
+ * with no pte_chain entry.  This is in fact safe, since page_remove_rmap will
+ * simply not find it.  try_to_unmap might erroneously return success, but it
+ * will never be called because the page_convert_anon() caller has locked the
+ * page.
+ *
+ * page_referenced() may fail to scan all the appropriate pte's and may return
+ * an inaccurate result.  This is so rare that it does not matter.
+ */
+int page_convert_anon(struct page *page)
+{
+	struct address_space *mapping;
+	struct vm_area_struct *vma;
+	struct pte_chain *pte_chain = NULL;
+	pte_t *pte;
+	int err = 0;
+
+	mapping = page->mapping;
+	if (mapping == NULL)
+		goto out;		/* truncate won the lock_page() race */
+
+	down(&mapping->i_shared_sem);
+	pte_chain_lock(page);
+
+	/*
+	 * Has someone else done it for us before we got the lock?
+	 * If so, pte.direct or pte.chain has replaced pte.mapcount.
+	 */
+	if (PageAnon(page)) {
+		pte_chain_unlock(page);
+		goto out_unlock;
+	}
+
+	SetPageAnon(page);
+	if (page->pte.mapcount == 0) {
+		pte_chain_unlock(page);
+		goto out_unlock;
+	}
+	/* This is gonna get incremented by page_add_rmap */
+	dec_page_state(nr_mapped);
+	page->pte.mapcount = 0;
+
+	/*
+	 * Now that the page is marked as anon, unlock it.  page_add_rmap will
+	 * lock it as necessary.
+	 */
+	pte_chain_unlock(page);
+
+	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
+			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
+		pte = find_pte(vma, page, NULL);
+		if (pte) {
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
+			pte_unmap(pte);
+		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
+	}
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		if (vma->vm_pgoff > (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT)))
+			break;
+		if (!pte_chain) {
+			pte_chain = pte_chain_alloc(GFP_KERNEL);
+			if (!pte_chain) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+		}
+		spin_lock(&vma->vm_mm->page_table_lock);
+		pte = find_pte(vma, page, NULL);
+		if (pte) {
+			/* Make sure this isn't a duplicate */
+			page_remove_rmap(page, pte);
+			pte_chain = page_add_rmap(page, pte, pte_chain);
+			pte_unmap(pte);
+		}
+		spin_unlock(&vma->vm_mm->page_table_lock);
+	}
+
+out_unlock:
+	pte_chain_free(pte_chain);
+	up(&mapping->i_shared_sem);
+out:
+	return err;
+}
+
+/**
  ** No more VM stuff below this comment, only pte_chain helper
  ** functions.
  **/
diff -puN mm/swapfile.c~objrmap mm/swapfile.c
--- 25/mm/swapfile.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/mm/swapfile.c	2003-04-08 03:16:49.000000000 -0700
@@ -377,41 +377,34 @@ void free_swap_and_cache(swp_entry_t ent
  * share this swap entry, so be cautious and let do_wp_page work out
  * what to do if a write is requested later.
  */
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* vma->vm_mm->page_table_lock is held */
 static void
 unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
 	swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
-	pte_t pte = *dir;
-
-	if (pte_file(pte))
-		return;
-	if (likely(pte_to_swp_entry(pte).val != entry.val))
-		return;
-	if (unlikely(pte_none(pte) || pte_present(pte)))
-		return;
+	vma->vm_mm->rss++;
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
+	SetPageAnon(page);
 	*pte_chainp = page_add_rmap(page, dir, *pte_chainp);
 	swap_free(entry);
-	++vma->vm_mm->rss;
 }
 
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
-static void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
+/* vma->vm_mm->page_table_lock is held */
+static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
 	unsigned long address, unsigned long size, unsigned long offset,
-	swp_entry_t entry, struct page* page)
+	swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
 	pte_t * pte;
 	unsigned long end;
-	struct pte_chain *pte_chain = NULL;
+	pte_t swp_pte = swp_entry_to_pte(entry);
 
 	if (pmd_none(*dir))
-		return;
+		return 0;
 	if (pmd_bad(*dir)) {
 		pmd_ERROR(*dir);
 		pmd_clear(dir);
-		return;
+		return 0;
 	}
 	pte = pte_offset_map(dir, address);
 	offset += address & PMD_MASK;
@@ -421,33 +414,36 @@ static void unuse_pmd(struct vm_area_str
 		end = PMD_SIZE;
 	do {
 		/*
-		 * FIXME: handle pte_chain_alloc() failures
+		 * swapoff spends a _lot_ of time in this loop!
+		 * Test inline before going to call unuse_pte.
 		 */
-		if (pte_chain == NULL)
-			pte_chain = pte_chain_alloc(GFP_ATOMIC);
-		unuse_pte(vma, offset+address-vma->vm_start,
-				pte, entry, page, &pte_chain);
+		if (unlikely(pte_same(*pte, swp_pte))) {
+			unuse_pte(vma, offset + address, pte,
+					entry, page, pte_chainp);
+			pte_unmap(pte);
+			return 1;
+		}
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
 	pte_unmap(pte - 1);
-	pte_chain_free(pte_chain);
+	return 0;
 }
 
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
-static void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
+/* vma->vm_mm->page_table_lock is held */
+static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
 	unsigned long address, unsigned long size,
-	swp_entry_t entry, struct page* page)
+	swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
 	pmd_t * pmd;
 	unsigned long offset, end;
 
 	if (pgd_none(*dir))
-		return;
+		return 0;
 	if (pgd_bad(*dir)) {
 		pgd_ERROR(*dir);
 		pgd_clear(dir);
-		return;
+		return 0;
 	}
 	pmd = pmd_offset(dir, address);
 	offset = address & PGDIR_MASK;
@@ -458,32 +454,42 @@ static void unuse_pgd(struct vm_area_str
 	if (address >= end)
 		BUG();
 	do {
-		unuse_pmd(vma, pmd, address, end - address, offset, entry,
-			  page);
+		if (unuse_pmd(vma, pmd, address, end - address,
+				offset, entry, page, pte_chainp))
+			return 1;
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
+	return 0;
 }
 
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
-static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
-			swp_entry_t entry, struct page* page)
+/* vma->vm_mm->page_table_lock is held */
+static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
+	swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
 {
 	unsigned long start = vma->vm_start, end = vma->vm_end;
 
 	if (start >= end)
 		BUG();
 	do {
-		unuse_pgd(vma, pgdir, start, end - start, entry, page);
+		if (unuse_pgd(vma, pgdir, start, end - start,
+				entry, page, pte_chainp))
+			return 1;
 		start = (start + PGDIR_SIZE) & PGDIR_MASK;
 		pgdir++;
 	} while (start && (start < end));
+	return 0;
 }
 
-static void unuse_process(struct mm_struct * mm,
+static int unuse_process(struct mm_struct * mm,
 			swp_entry_t entry, struct page* page)
 {
 	struct vm_area_struct* vma;
+	struct pte_chain *pte_chain;
+
+	pte_chain = pte_chain_alloc(GFP_KERNEL);
+	if (!pte_chain)
+		return -ENOMEM;
 
 	/*
 	 * Go through process' page directory.
@@ -491,10 +497,12 @@ static void unuse_process(struct mm_stru
 	spin_lock(&mm->page_table_lock);
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		pgd_t * pgd = pgd_offset(mm, vma->vm_start);
-		unuse_vma(vma, pgd, entry, page);
+		if (unuse_vma(vma, pgd, entry, page, &pte_chain))
+			break;
 	}
 	spin_unlock(&mm->page_table_lock);
-	return;
+	pte_chain_free(pte_chain);
+	return 0;
 }
 
 /*
@@ -638,36 +646,54 @@ static int try_to_unuse(unsigned int typ
 			if (start_mm == &init_mm)
 				shmem = shmem_unuse(entry, page);
 			else
-				unuse_process(start_mm, entry, page);
+				retval = unuse_process(start_mm, entry, page);
 		}
 		if (*swap_map > 1) {
 			int set_start_mm = (*swap_map >= swcount);
 			struct list_head *p = &start_mm->mmlist;
 			struct mm_struct *new_start_mm = start_mm;
+			struct mm_struct *prev_mm = start_mm;
 			struct mm_struct *mm;
 
+			atomic_inc(&new_start_mm->mm_users);
+			atomic_inc(&prev_mm->mm_users);
 			spin_lock(&mmlist_lock);
-			while (*swap_map > 1 &&
+			while (*swap_map > 1 && !retval &&
 					(p = p->next) != &start_mm->mmlist) {
 				mm = list_entry(p, struct mm_struct, mmlist);
+				atomic_inc(&mm->mm_users);
+				spin_unlock(&mmlist_lock);
+				mmput(prev_mm);
+				prev_mm = mm;
+
+				cond_resched();
+
 				swcount = *swap_map;
-				if (mm == &init_mm) {
+				if (swcount <= 1)
+					;
+				else if (mm == &init_mm) {
 					set_start_mm = 1;
-					spin_unlock(&mmlist_lock);
 					shmem = shmem_unuse(entry, page);
-					spin_lock(&mmlist_lock);
 				} else
-					unuse_process(mm, entry, page);
+					retval = unuse_process(mm, entry, page);
 				if (set_start_mm && *swap_map < swcount) {
+					mmput(new_start_mm);
+					atomic_inc(&mm->mm_users);
 					new_start_mm = mm;
 					set_start_mm = 0;
 				}
+				spin_lock(&mmlist_lock);
 			}
-			atomic_inc(&new_start_mm->mm_users);
 			spin_unlock(&mmlist_lock);
+			mmput(prev_mm);
 			mmput(start_mm);
 			start_mm = new_start_mm;
 		}
+		if (retval) {
+			unlock_page(page);
+			page_cache_release(page);
+			break;
+		}
 
 		/*
 		 * How could swap count reach 0x7fff when the maximum
@@ -691,7 +717,7 @@ static int try_to_unuse(unsigned int typ
 
 		/*
 		 * If a reference remains (rare), we would like to leave
-		 * the page in the swap cache; but try_to_swap_out could
+		 * the page in the swap cache; but try_to_unmap could
 		 * then re-duplicate the entry once we drop page lock,
 		 * so we might loop indefinitely; also, that page could
 		 * not be swapped out to other storage meanwhile.  So:
@@ -727,7 +753,7 @@ static int try_to_unuse(unsigned int typ
 		/*
 		 * So we could skip searching mms once swap count went
 		 * to 1, we did not mark any present ptes as dirty: must
-		 * mark page dirty so try_to_swap_out will preserve it.
+		 * mark page dirty so shrink_list will preserve it.
 		 */
 		SetPageDirty(page);
 		unlock_page(page);
diff -puN include/asm-ppc64/mman.h~objrmap include/asm-ppc64/mman.h
diff -puN include/linux/rmap-locking.h~objrmap include/linux/rmap-locking.h
diff -puN include/linux/swap.h~objrmap include/linux/swap.h
--- 25/include/linux/swap.h~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/include/linux/swap.h	2003-04-08 03:16:49.000000000 -0700
@@ -178,6 +178,8 @@ struct pte_chain *FASTCALL(page_add_rmap
 void FASTCALL(page_remove_rmap(struct page *, pte_t *));
 int FASTCALL(try_to_unmap(struct page *));
 
+int page_convert_anon(struct page *);
+
 /* linux/mm/shmem.c */
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
 #else
diff -puN mm/filemap.c~objrmap mm/filemap.c
--- 25/mm/filemap.c~objrmap	2003-04-08 03:16:49.000000000 -0700
+++ 25-akpm/mm/filemap.c	2003-04-08 03:16:49.000000000 -0700
@@ -64,6 +64,9 @@
  *  ->mmap_sem
  *    ->i_shared_sem		(various places)
  *
+ *  ->lock_page
+ *    ->i_shared_sem		(page_convert_anon)
+ *
  *  ->inode_lock
  *    ->sb_lock			(fs/fs-writeback.c)
  *    ->mapping->page_lock	(__sync_single_inode)

_
