
[ppc64] __hash_page rewrite, from Ben Herrenschmidt

Rewrite __hash_page function in assembly in such a way we don't need
the page table lock any more. We now rely on a BUSY bit in the linux
PTE on which we spin on when doing an update of the PTE


---

 /dev/null                       |  494 ----------------------------------------
 arch/ppc64/kernel/Makefile      |    2 
 arch/ppc64/kernel/setup.c       |    4 
 arch/ppc64/kernel/vmlinux.lds.S |    2 
 arch/ppc64/mm/Makefile          |    2 
 arch/ppc64/mm/hash_low.S        |  283 ++++++++++++++++++++++
 arch/ppc64/mm/hash_utils.c      |  358 ++++++++++++++++++++++++++++
 arch/ppc64/mm/hugetlbpage.c     |   10 
 arch/ppc64/mm/init.c            |    7 
 include/asm-ppc64/mmu.h         |   12 
 include/asm-ppc64/pgtable.h     |   66 +++--
 11 files changed, 717 insertions(+), 523 deletions(-)

diff -puN arch/ppc64/kernel/Makefile~ppc64-hash_page_rewrite arch/ppc64/kernel/Makefile
--- 25/arch/ppc64/kernel/Makefile~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/Makefile	2004-01-13 23:23:06.000000000 -0800
@@ -7,7 +7,7 @@ extra-y		:= head.o vmlinux.lds.s
 
 obj-y               :=	setup.o entry.o traps.o irq.o idle.o \
 			time.o process.o signal.o syscalls.o misc.o ptrace.o \
-			align.o semaphore.o bitops.o stab.o htab.o pacaData.o \
+			align.o semaphore.o bitops.o stab.o pacaData.o \
 			udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
 			ptrace32.o signal32.o pmc.o rtc.o init_task.o \
 			lmb.o cputable.o
diff -puN -L arch/ppc64/kernel/htab.c arch/ppc64/kernel/htab.c~ppc64-hash_page_rewrite /dev/null
--- 25/arch/ppc64/kernel/htab.c
+++ /dev/null	2002-08-30 16:31:37.000000000 -0700
@@ -1,494 +0,0 @@
-/*
- * PowerPC64 port by Mike Corrigan and Dave Engebretsen
- *   {mikejc|engebret}@us.ibm.com
- *
- *    Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
- *
- * SMP scalability work:
- *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- * 
- *    Module name: htab.c
- *
- *    Description:
- *      PowerPC Hashed Page Table functions
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include <linux/ctype.h>
-#include <linux/cache.h>
-
-#include <asm/ppcdebug.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/naca.h>
-#include <asm/pmc.h>
-#include <asm/machdep.h>
-#include <asm/lmb.h>
-#include <asm/abs_addr.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
-#include <asm/eeh.h>
-#include <asm/tlb.h>
-#include <asm/cacheflush.h>
-#include <asm/cputable.h>
-
-/*
- * Note:  pte   --> Linux PTE
- *        HPTE  --> PowerPC Hashed Page Table Entry
- *
- * Execution context:
- *   htab_initialize is called with the MMU off (of course), but
- *   the kernel has been copied down to zero so it can directly
- *   reference global data.  At this point it is very difficult
- *   to print debug info.
- *
- */
-
-HTAB htab_data = {NULL, 0, 0, 0, 0};
-
-extern unsigned long _SDR1;
-
-#define KB (1024)
-#define MB (1024*KB)
-
-static inline void
-loop_forever(void)
-{
-	volatile unsigned long x = 1;
-	for(;x;x|=1)
-		;
-}
-
-#ifdef CONFIG_PPC_PSERIES
-static inline void
-create_pte_mapping(unsigned long start, unsigned long end,
-		   unsigned long mode, int large)
-{
-	unsigned long addr;
-	unsigned int step;
-
-	if (large)
-		step = 16*MB;
-	else
-		step = 4*KB;
-
-	for (addr = start; addr < end; addr += step) {
-		unsigned long vpn, hash, hpteg;
-		unsigned long vsid = get_kernel_vsid(addr);
-		unsigned long va = (vsid << 28) | (addr & 0xfffffff);
-		int ret;
-
-		if (large)
-			vpn = va >> LARGE_PAGE_SHIFT;
-		else
-			vpn = va >> PAGE_SHIFT;
-
-		hash = hpt_hash(vpn, large);
-
-		hpteg = ((hash & htab_data.htab_hash_mask)*HPTES_PER_GROUP);
-
-		if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
-			ret = pSeries_lpar_hpte_insert(hpteg, va,
-				(unsigned long)__v2a(addr) >> PAGE_SHIFT,
-				0, mode, 1, large);
-		else
-			ret = pSeries_hpte_insert(hpteg, va,
-				(unsigned long)__v2a(addr) >> PAGE_SHIFT,
-				0, mode, 1, large);
-
-		if (ret == -1) {
-			ppc64_terminate_msg(0x20, "create_pte_mapping");
-			loop_forever();
-		}
-	}
-}
-
-void
-htab_initialize(void)
-{
-	unsigned long table, htab_size_bytes;
-	unsigned long pteg_count;
-	unsigned long mode_rw;
-
-	/*
-	 * Calculate the required size of the htab.  We want the number of
-	 * PTEGs to equal one half the number of real pages.
-	 */ 
-	htab_size_bytes = 1UL << naca->pftSize;
-	pteg_count = htab_size_bytes >> 7;
-
-	/* For debug, make the HTAB 1/8 as big as it normally would be. */
-	ifppcdebug(PPCDBG_HTABSIZE) {
-		pteg_count >>= 3;
-		htab_size_bytes = pteg_count << 7;
-	}
-
-	htab_data.htab_num_ptegs = pteg_count;
-	htab_data.htab_hash_mask = pteg_count - 1;
-
-	if (systemcfg->platform == PLATFORM_PSERIES) {
-		/* Find storage for the HPT.  Must be contiguous in
-		 * the absolute address space.
-		 */
-		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
-		if ( !table ) {
-			ppc64_terminate_msg(0x20, "hpt space");
-			loop_forever();
-		}
-		htab_data.htab = (HPTE *)__a2v(table);
-
-		/* htab absolute addr + encoded htabsize */
-		_SDR1 = table + __ilog2(pteg_count) - 11;
-
-		/* Initialize the HPT with no entries */
-		memset((void *)table, 0, htab_size_bytes);
-	} else {
-		/* Using a hypervisor which owns the htab */
-		htab_data.htab = NULL;
-		_SDR1 = 0; 
-	}
-
-	mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
-
-	/* XXX we currently map kernel text rw, should fix this */
-	if ((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE)
-	    && systemcfg->physicalMemorySize > 256*MB) {
-		create_pte_mapping((unsigned long)KERNELBASE, 
-				   KERNELBASE + 256*MB, mode_rw, 0);
-		create_pte_mapping((unsigned long)KERNELBASE + 256*MB, 
-				   KERNELBASE + (systemcfg->physicalMemorySize), 
-				   mode_rw, 1);
-	} else {
-		create_pte_mapping((unsigned long)KERNELBASE, 
-				   KERNELBASE+(systemcfg->physicalMemorySize), 
-				   mode_rw, 0);
-	}
-}
-#undef KB
-#undef MB
-#endif
-
-/*
- * find_linux_pte returns the address of a linux pte for a given 
- * effective address and directory.  If not found, it returns zero.
- */
-pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
-{
-	pgd_t *pg;
-	pmd_t *pm;
-	pte_t *pt = NULL;
-	pte_t pte;
-
-	pg = pgdir + pgd_index(ea);
-	if (!pgd_none(*pg)) {
-
-		pm = pmd_offset(pg, ea);
-		if (pmd_present(*pm)) { 
-			pt = pte_offset_kernel(pm, ea);
-			pte = *pt;
-			if (!pte_present(pte))
-				pt = NULL;
-		}
-	}
-
-	return pt;
-}
-
-static inline unsigned long computeHptePP(unsigned long pte)
-{
-	return (pte & _PAGE_USER) |
-		(((pte & _PAGE_USER) >> 1) &
-		 ((~((pte >> 2) &	/* _PAGE_RW */
-		     (pte >> 7))) &	/* _PAGE_DIRTY */
-		  1));
-}
-
-/*
- * Handle a fault by adding an HPTE. If the address can't be determined
- * to be valid via Linux page tables, return 1. If handled return 0
- */
-int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
-		pte_t *ptep, unsigned long trap, int local)
-{
-	unsigned long va, vpn;
-	unsigned long newpp, prpn;
-	unsigned long hpteflags;
-	long slot;
-	pte_t old_pte, new_pte;
-
-	/* XXX fix for large ptes */
-	int large = 0;
-
-	/* Search the Linux page table for a match with va */
-	va = (vsid << 28) | (ea & 0x0fffffff);
-
-	if (large)
-		vpn = va >> LARGE_PAGE_SHIFT;
-	else
-		vpn = va >> PAGE_SHIFT;
-
-	/*
-	 * If no pte found or not present, send the problem up to
-	 * do_page_fault
-	 */
-	if (unlikely(!ptep || !pte_present(*ptep)))
-		return 1;
-
-	/* 
-	 * Check the user's access rights to the page.  If access should be
-	 * prevented then send the problem up to do_page_fault.
-	 */
-	access |= _PAGE_PRESENT;
-	if (unlikely(access & ~(pte_val(*ptep))))
-		return 1;
-
-	/*
-	 * At this point, we have a pte (old_pte) which can be used to build
-	 * or update an HPTE. There are 2 cases:
-	 *
-	 * 1. There is a valid (present) pte with no associated HPTE (this is 
-	 *	the most common case)
-	 * 2. There is a valid (present) pte with an associated HPTE. The
-	 *	current values of the pp bits in the HPTE prevent access
-	 *	because we are doing software DIRTY bit management and the
-	 *	page is currently not DIRTY. 
-	 */
-
-	old_pte = *ptep;
-	new_pte = old_pte;
-	/* If the attempted access was a store */
-	if (access & _PAGE_RW)
-		pte_val(new_pte) |= _PAGE_ACCESSED | _PAGE_DIRTY;
-	else
-		pte_val(new_pte) |= _PAGE_ACCESSED;
-
-	newpp = computeHptePP(pte_val(new_pte));
-
-#define PPC64_HWNOEXEC (1 << 2)
-
-	/* We do lazy icache flushing on cpus that support it */
-	if (unlikely((cur_cpu_spec->cpu_features & CPU_FTR_NOEXECUTE)
-		     && pfn_valid(pte_pfn(new_pte)))) {
-		struct page *page = pte_page(new_pte);
-
-		/* page is dirty */
-		if (!PageReserved(page) &&
-		    !test_bit(PG_arch_1, &page->flags)) {
-			if (trap == 0x400) {
-				__flush_dcache_icache(page_address(page));
-				set_bit(PG_arch_1, &page->flags);
-			} else {
-				newpp |= PPC64_HWNOEXEC;
-			}
-		}
-	}
-
-	/* Check if pte already has an hpte (case 2) */
-	if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
-		/* There MIGHT be an HPTE for this pte */
-		unsigned long hash, slot, secondary;
-
-		hash = hpt_hash(vpn, large);
-		secondary = (pte_val(old_pte) & _PAGE_SECONDARY) >> 15;
-		if (secondary)
-			hash = ~hash;
-		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
-
-		if (ppc_md.hpte_updatepp(slot, newpp, va, large, local) == -1)
-			pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
-		else
-			if (!pte_same(old_pte, new_pte))
-				*ptep = new_pte;
-	}
-
-	if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
-		unsigned long hash = hpt_hash(vpn, large);
-		unsigned long hpte_group;
-		prpn = pte_val(old_pte) >> PTE_SHIFT;
-
-repeat:
-		hpte_group = ((hash & htab_data.htab_hash_mask) *
-			      HPTES_PER_GROUP) & ~0x7UL;
-
-		/* Update the linux pte with the HPTE slot */
-		pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
-		pte_val(new_pte) |= _PAGE_HASHPTE;
-
-		/* copy appropriate flags from linux pte */
-		hpteflags = (pte_val(new_pte) & 0x1f8) | newpp;
-
-		slot = ppc_md.hpte_insert(hpte_group, va, prpn, 0,
-					  hpteflags, 0, large);
-
-		/* Primary is full, try the secondary */
-		if (unlikely(slot == -1)) {
-			pte_val(new_pte) |= 1 << 15;
-			hpte_group = ((~hash & htab_data.htab_hash_mask) *
-				      HPTES_PER_GROUP) & ~0x7UL; 
-			slot = ppc_md.hpte_insert(hpte_group, va, prpn,
-						  1, hpteflags, 0, large);
-			if (slot == -1) {
-				if (mftb() & 0x1)
-					hpte_group = ((hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
-
-				ppc_md.hpte_remove(hpte_group);
-				goto repeat;
-                        }
-		}
-
-		if (unlikely(slot == -2))
-			panic("hash_page: pte_insert failed\n");
-
-		pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-
-		/* 
-		 * No need to use ldarx/stdcx here because all who
-		 * might be updating the pte will hold the
-		 * page_table_lock or the hash_table_lock
-		 * (we hold both)
-		 */
-		*ptep = new_pte;
-	}
-
-	return 0;
-}
-
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
-{
-	void *pgdir;
-	unsigned long vsid;
-	struct mm_struct *mm;
-	pte_t *ptep;
-	int ret;
-	int user_region = 0;
-	int local = 0;
-	cpumask_t tmp;
-
-	/* Check for invalid addresses. */
-	if (!IS_VALID_EA(ea))
-		return 1;
-
- 	switch (REGION_ID(ea)) {
-	case USER_REGION_ID:
-		user_region = 1;
-		mm = current->mm;
-		if (mm == NULL)
-			return 1;
-
-		vsid = get_vsid(mm->context, ea);
-		break;
-	case IO_REGION_ID:
-		mm = &ioremap_mm;
-		vsid = get_kernel_vsid(ea);
-		break;
-	case VMALLOC_REGION_ID:
-		mm = &init_mm;
-		vsid = get_kernel_vsid(ea);
-		break;
-#if 0
-	case EEH_REGION_ID:
-		/*
-		 * Should only be hit if there is an access to MMIO space
-		 * which is protected by EEH.
-		 * Send the problem up to do_page_fault 
-		 */
-	case KERNEL_REGION_ID:
-		/*
-		 * Should never get here - entire 0xC0... region is bolted.
-		 * Send the problem up to do_page_fault 
-		 */
-#endif
-	default:
-		/* Not a valid range
-		 * Send the problem up to do_page_fault 
-		 */
-		return 1;
-		break;
-	}
-
-	pgdir = mm->pgd;
-
-	if (pgdir == NULL)
-		return 1;
-
-	/*
-	 * Lock the Linux page table to prevent mmap and kswapd
-	 * from modifying entries while we search and update
-	 */
-	spin_lock(&mm->page_table_lock);
-
-	tmp = cpumask_of_cpu(smp_processor_id());
-	if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
-		local = 1;
-
-	ret = hash_huge_page(mm, access, ea, vsid, local);
-	if (ret < 0) {
-		ptep = find_linux_pte(pgdir, ea);
-		ret = __hash_page(ea, access, vsid, ptep, trap, local);
-	}
-
-	spin_unlock(&mm->page_table_lock);
-
-	return ret;
-}
-
-void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
-		     int local)
-{
-	unsigned long vsid, vpn, va, hash, secondary, slot;
-
-	/* XXX fix for large ptes */
-	unsigned long large = 0;
-
-	if ((ea >= USER_START) && (ea <= USER_END))
-		vsid = get_vsid(context, ea);
-	else
-		vsid = get_kernel_vsid(ea);
-
-	va = (vsid << 28) | (ea & 0x0fffffff);
-	if (large)
-		vpn = va >> LARGE_PAGE_SHIFT;
-	else
-		vpn = va >> PAGE_SHIFT;
-	hash = hpt_hash(vpn, large);
-	secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
-	if (secondary)
-		hash = ~hash;
-	slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-	slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-
-	ppc_md.hpte_invalidate(slot, va, large, local);
-}
-
-void flush_hash_range(unsigned long context, unsigned long number, int local)
-{
-	if (ppc_md.flush_hash_range) {
-		ppc_md.flush_hash_range(context, number, local);
-	} else {
-		int i;
-		struct ppc64_tlb_batch *batch =
-			&ppc64_tlb_batch[smp_processor_id()];
-
-		for (i = 0; i < number; i++)
-			flush_hash_page(context, batch->addr[i], batch->pte[i],
-					local);
-	}
-}
diff -puN arch/ppc64/kernel/setup.c~ppc64-hash_page_rewrite arch/ppc64/kernel/setup.c
--- 25/arch/ppc64/kernel/setup.c~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/setup.c	2004-01-13 23:23:06.000000000 -0800
@@ -213,6 +213,10 @@ void setup_system(unsigned long r3, unsi
 #endif
 	}
 #endif
+	/* Finish initializing the hash table (do the dynamic
+	 * patching for the fast-path hashtable.S code)
+	 */
+	htab_finish_init();
 
 	printk("Starting Linux PPC64 %s\n", UTS_RELEASE);
 
diff -puN arch/ppc64/kernel/vmlinux.lds.S~ppc64-hash_page_rewrite arch/ppc64/kernel/vmlinux.lds.S
--- 25/arch/ppc64/kernel/vmlinux.lds.S~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/vmlinux.lds.S	2004-01-13 23:23:06.000000000 -0800
@@ -53,7 +53,6 @@ SECTIONS
     *(.data1)
     *(.sdata)
     *(.sdata2)
-    *(.got.plt) *(.got)
     *(.dynamic)
     CONSTRUCTORS
   }
@@ -126,6 +125,7 @@ SECTIONS
   /* freed after init ends here */
 
   __toc_start = .;
+  .got : { *(.got.plt) *(.got) }
   .toc : { *(.toc) }
   . = ALIGN(4096);
   __toc_end = .;
diff -puN arch/ppc64/mm/Makefile~ppc64-hash_page_rewrite arch/ppc64/mm/Makefile
--- 25/arch/ppc64/mm/Makefile~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/arch/ppc64/mm/Makefile	2004-01-13 23:23:06.000000000 -0800
@@ -4,6 +4,6 @@
 
 EXTRA_CFLAGS += -mno-minimal-toc
 
-obj-y := fault.o init.o extable.o imalloc.o
+obj-y := fault.o init.o extable.o imalloc.o hash_utils.o hash_low.o
 obj-$(CONFIG_DISCONTIGMEM) += numa.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff -puN /dev/null arch/ppc64/mm/hash_low.S
--- /dev/null	2002-08-30 16:31:37.000000000 -0700
+++ 25-akpm/arch/ppc64/mm/hash_low.S	2004-01-13 23:23:06.000000000 -0800
@@ -0,0 +1,283 @@
+/*
+ * ppc64 MMU hashtable management routines
+ *
+ * (c) Copyright IBM Corp. 2003
+ *
+ * Maintained by: Benjamin Herrenschmidt
+ *                <benh@kernel.crashing.org>
+ *
+ * This file is covered by the GNU Public Licence v2 as
+ * described in the kernel's COPYING file.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/ppc_asm.h>
+#include <asm/offsets.h>
+#include <asm/cputable.h>
+
+	.text
+
+/*
+ * Stackframe:
+ *		
+ *         +-> Back chain			(SP + 256)
+ *         |   General register save area	(SP + 112)
+ *         |   Parameter save area		(SP + 48)
+ *         |   TOC save area			(SP + 40)
+ *         |   link editor doubleword		(SP + 32)
+ *         |   compiler doubleword		(SP + 24)
+ *         |   LR save area			(SP + 16)
+ *         |   CR save area			(SP + 8)
+ * SP ---> +-- Back chain			(SP + 0)
+ */
+#define STACKFRAMESIZE	256
+
+/* Save parameters offsets */
+#define STK_PARM(i)	(STACKFRAMESIZE + 48 + ((i)-3)*8)
+
+/* Save non-volatile offsets */
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+/*
+ * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
+ *		pte_t *ptep, unsigned long trap, int local)
+ *
+ * Adds a page to the hash table. This is the non-LPAR version for now
+ */
+
+_GLOBAL(__hash_page)
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-STACKFRAMESIZE(r1)
+	/* Save all params that we need after a function call */
+	std	r6,STK_PARM(r6)(r1)
+	std	r8,STK_PARM(r8)(r1)
+	
+	/* Add _PAGE_PRESENT to access */
+	ori	r4,r4,_PAGE_PRESENT
+
+	/* Save non-volatile registers.
+	 * r31 will hold "old PTE"
+	 * r30 is "new PTE"
+	 * r29 is "va"
+	 * r28 is a hash value
+	 * r27 is hashtab mask (maybe dynamic patched instead ?)
+	 */
+	std	r27,STK_REG(r27)(r1)
+	std	r28,STK_REG(r28)(r1)
+	std	r29,STK_REG(r29)(r1)
+	std	r30,STK_REG(r30)(r1)
+	std	r31,STK_REG(r31)(r1)
+	
+	/* Step 1:
+	 *
+	 * Check permissions, atomically mark the linux PTE busy
+	 * and hashed.
+	 */ 
+1:
+	ldarx	r31,0,r6
+	/* Check access rights (access & ~(pte_val(*ptep))) */
+	andc.	r0,r4,r31
+	bne-	htab_wrong_access
+	/* Check if PTE is busy */
+	andi.	r0,r31,_PAGE_BUSY
+	bne-	1b
+	/* Prepare new PTE value (turn access RW into DIRTY, then
+	 * add BUSY,HASHPTE and ACCESSED)
+	 */
+	rlwinm	r30,r4,5,24,24	/* _PAGE_RW -> _PAGE_DIRTY */
+	or	r30,r30,r31
+	ori	r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+	/* Write the linux PTE atomically (setting busy) */
+	stdcx.	r30,0,r6
+	bne-	1b
+	
+
+	/* Step 2:
+	 *
+	 * Insert/Update the HPTE in the hash table. At this point,
+	 * r4 (access) is re-useable, we use it for the new HPTE flags
+	 */
+
+	/* Calc va and put it in r29 */
+	rldicr	r29,r5,28,63-28
+	rldicl	r3,r3,0,36
+	or	r29,r3,r29
+
+	/* Calculate hash value for primary slot and store it in r28 */
+	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
+	rldicl	r0,r3,64-12,48		/* (ea >> 12) & 0xffff */
+	xor	r28,r5,r0
+	
+	/* Convert linux PTE bits into HW equivalents
+	 */
+	andi.	r3,r30,0x1fa		/* Get basic set of flags */
+	rlwinm	r0,r30,32-2+1,30,30	/* _PAGE_RW -> _PAGE_USER (r0) */
+	rlwinm	r4,r30,32-7+1,30,30	/* _PAGE_DIRTY -> _PAGE_USER (r4) */
+	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
+	andc	r0,r30,r0		/* r0 = pte & ~r0 */
+	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
+
+	/* We eventually do the icache sync here (maybe inline that
+	 * code rather than call a C function...) 
+	 */
+BEGIN_FTR_SECTION
+	mr	r4,r30
+	mr	r5,r7
+	bl	.hash_page_do_lazy_icache
+END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE)
+
+	/* At this point, r3 contains new PP bits, save them in
+	 * place of "access" in the param area (sic)
+	 */
+	std	r3,STK_PARM(r4)(r1)
+
+	/* Get htab_hash_mask */
+	ld	r4,htab_data@got(2)
+	ld	r27,16(r4)	/* htab_data.htab_hash_mask -> r27 */
+
+	/* Check if we may already be in the hashtable, in this case, we
+	 * go to out-of-line code to try to modify the HPTE
+	 */
+	andi.	r0,r31,_PAGE_HASHPTE
+	bne	htab_modify_pte
+
+htab_insert_pte:
+	/* Clear hpte bits in new pte (we also clear BUSY btw) and
+	 * add _PAGE_HASHPTE
+	 */
+	lis	r0,_PAGE_HPTEFLAGS@h
+	ori	r0,r0,_PAGE_HPTEFLAGS@l
+	andc	r30,r30,r0
+	ori	r30,r30,_PAGE_HASHPTE
+
+	/* page number in r5 */
+	rldicl	r5,r31,64-PTE_SHIFT,PTE_SHIFT
+
+	/* Calculate primary group hash */
+	and	r0,r28,r27
+	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */
+
+	/* Call ppc_md.hpte_insert */
+	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
+	mr	r4,r29			/* Retreive va */
+	li	r6,0			/* primary slot */
+	li	r8,0			/* not bolted and not large */
+	li	r9,0
+_GLOBAL(htab_call_hpte_insert1)
+	bl	.			/* Will be patched by htab_finish_init() */
+	cmpi	0,r3,0
+	bge	htab_pte_insert_ok	/* Insertion successful */
+	cmpi	0,r3,-2			/* Critical failure */
+	beq-	htab_pte_insert_failure
+
+	/* Now try secondary slot */
+	ori	r30,r30,_PAGE_SECONDARY
+	
+	/* page number in r5 */
+	rldicl	r5,r31,64-PTE_SHIFT,PTE_SHIFT
+
+	/* Calculate secondary group hash */
+	andc	r0,r27,r28
+	rldicr	r3,r0,3,63-3	/* r0 = (~hash & mask) << 3 */
+	
+	/* Call ppc_md.hpte_insert */
+	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
+	mr	r4,r29			/* Retreive va */
+	li	r6,1			/* secondary slot */
+	li	r8,0			/* not bolted and not large */
+	li	r9,0
+_GLOBAL(htab_call_hpte_insert2)
+	bl	.			/* Will be patched by htab_finish_init() */
+	cmpi	0,r3,0
+	bge+	htab_pte_insert_ok	/* Insertion successful */
+	cmpi	0,r3,-2			/* Critical failure */
+	beq-	htab_pte_insert_failure
+
+	/* Both are full, we need to evict something */
+	mftb	r0
+	/* Pick a random group based on TB */
+	andi.	r0,r0,1
+	mr	r5,r28
+	bne	2f
+	not	r5,r5
+2:	and	r0,r5,r27
+	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */	
+	/* Call ppc_md.hpte_remove */
+_GLOBAL(htab_call_hpte_remove)
+	bl	.			/* Will be patched by htab_finish_init() */
+
+	/* Try all again */
+	b	htab_insert_pte	
+
+htab_pte_insert_ok:
+	/* Insert slot number in PTE */
+	rldimi	r30,r3,12,63-14
+		
+	/* Write out the PTE with a normal write
+	 * (maybe add eieio may be good still ?)
+	 */
+htab_write_out_pte:
+	ld	r6,STK_PARM(r6)(r1)
+	std	r30,0(r6)
+	li	r3, 0
+bail:
+	ld	r27,STK_REG(r27)(r1)
+	ld	r28,STK_REG(r28)(r1)
+	ld	r29,STK_REG(r29)(r1)
+	ld      r30,STK_REG(r30)(r1)
+	ld      r31,STK_REG(r31)(r1)
+	addi    r1,r1,STACKFRAMESIZE
+	ld      r0,16(r1)
+	mtlr    r0
+	blr
+
+htab_modify_pte:
+	/* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+	mr	r4,r3
+	rlwinm	r3,r31,32-12,29,31
+
+	/* Secondary group ? if yes, get a inverted hash value */
+	mr	r5,r28
+	andi.	r0,r31,_PAGE_SECONDARY
+	beq	1f
+	not	r5,r5
+1:
+	/* Calculate proper slot value for ppc_md.hpte_updatepp */
+	and	r0,r5,r27
+	rldicr	r0,r0,3,63-3	/* r0 = (hash & mask) << 3 */
+	add	r3,r0,r3	/* add slot idx */
+
+	/* Call ppc_md.hpte_updatepp */
+	mr	r5,r29			/* va */
+	li	r6,0			/* large is 0 */
+	ld	r7,STK_PARM(r8)(r1)	/* get "local" param */
+_GLOBAL(htab_call_hpte_updatepp)
+	bl	.			/* Will be patched by htab_finish_init() */
+
+	/* if we failed because typically the HPTE wasn't really here
+	 * we try an insertion. 
+	 */
+	cmpi	0,r3,-1
+	beq-	htab_insert_pte
+
+	/* Clear the BUSY bit and Write out the PTE */
+	li	r0,_PAGE_BUSY
+	andc	r30,r30,r0
+	b	htab_write_out_pte
+
+htab_wrong_access:
+	/* Bail out clearing reservation */
+	stdcx.	r31,0,r6
+	li	r3,1
+	b	bail
+
+htab_pte_insert_failure:
+	b	.htab_insert_failure
+
+
diff -puN /dev/null arch/ppc64/mm/hash_utils.c
--- /dev/null	2002-08-30 16:31:37.000000000 -0700
+++ 25-akpm/arch/ppc64/mm/hash_utils.c	2004-01-13 23:23:06.000000000 -0800
@@ -0,0 +1,358 @@
+/*
+ * PowerPC64 port by Mike Corrigan and Dave Engebretsen
+ *   {mikejc|engebret}@us.ibm.com
+ *
+ *    Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * SMP scalability work:
+ *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * 
+ *    Module name: htab.c
+ *
+ *    Description:
+ *      PowerPC Hashed Page Table functions
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/sysctl.h>
+#include <linux/ctype.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+
+#include <asm/ppcdebug.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/naca.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/lmb.h>
+#include <asm/abs_addr.h>
+#include <asm/tlbflush.h>
+#include <asm/io.h>
+#include <asm/eeh.h>
+#include <asm/tlb.h>
+#include <asm/cacheflush.h>
+#include <asm/cputable.h>
+/*
+ * Note:  pte   --> Linux PTE
+ *        HPTE  --> PowerPC Hashed Page Table Entry
+ *
+ * Execution context:
+ *   htab_initialize is called with the MMU off (of course), but
+ *   the kernel has been copied down to zero so it can directly
+ *   reference global data.  At this point it is very difficult
+ *   to print debug info.
+ *
+ */
+
+HTAB htab_data = {NULL, 0, 0, 0, 0};
+
+extern unsigned long _SDR1;
+
+#define KB (1024)
+#define MB (1024*KB)
+
+static inline void loop_forever(void)
+{
+	volatile unsigned long x = 1;
+	for(;x;x|=1)
+		;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+static inline void create_pte_mapping(unsigned long start, unsigned long end,
+				      unsigned long mode, int large)
+{
+	unsigned long addr;
+	unsigned int step;
+
+	if (large)
+		step = 16*MB;
+	else
+		step = 4*KB;
+
+	for (addr = start; addr < end; addr += step) {
+		unsigned long vpn, hash, hpteg;
+		unsigned long vsid = get_kernel_vsid(addr);
+		unsigned long va = (vsid << 28) | (addr & 0xfffffff);
+		int ret;
+
+		if (large)
+			vpn = va >> LARGE_PAGE_SHIFT;
+		else
+			vpn = va >> PAGE_SHIFT;
+
+		hash = hpt_hash(vpn, large);
+
+		hpteg = ((hash & htab_data.htab_hash_mask)*HPTES_PER_GROUP);
+
+		if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
+			ret = pSeries_lpar_hpte_insert(hpteg, va,
+				(unsigned long)__v2a(addr) >> PAGE_SHIFT,
+				0, mode, 1, large);
+		else
+			ret = pSeries_hpte_insert(hpteg, va,
+				(unsigned long)__v2a(addr) >> PAGE_SHIFT,
+				0, mode, 1, large);
+
+		if (ret == -1) {
+			ppc64_terminate_msg(0x20, "create_pte_mapping");
+			loop_forever();
+		}
+	}
+}
+
+void __init htab_initialize(void)
+{
+	unsigned long table, htab_size_bytes;
+	unsigned long pteg_count;
+	unsigned long mode_rw;
+
+	/*
+	 * Calculate the required size of the htab.  We want the number of
+	 * PTEGs to equal one half the number of real pages.
+	 */ 
+	htab_size_bytes = 1UL << naca->pftSize;
+	pteg_count = htab_size_bytes >> 7;
+
+	/* For debug, make the HTAB 1/8 as big as it normally would be. */
+	ifppcdebug(PPCDBG_HTABSIZE) {
+		pteg_count >>= 3;
+		htab_size_bytes = pteg_count << 7;
+	}
+
+	htab_data.htab_num_ptegs = pteg_count;
+	htab_data.htab_hash_mask = pteg_count - 1;
+
+	if (systemcfg->platform == PLATFORM_PSERIES) {
+		/* Find storage for the HPT.  Must be contiguous in
+		 * the absolute address space.
+		 */
+		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+		if ( !table ) {
+			ppc64_terminate_msg(0x20, "hpt space");
+			loop_forever();
+		}
+		htab_data.htab = (HPTE *)__a2v(table);
+
+		/* htab absolute addr + encoded htabsize */
+		_SDR1 = table + __ilog2(pteg_count) - 11;
+
+		/* Initialize the HPT with no entries */
+		memset((void *)table, 0, htab_size_bytes);
+	} else {
+		/* Using a hypervisor which owns the htab */
+		htab_data.htab = NULL;
+		_SDR1 = 0; 
+	}
+
+	mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
+
+	/* XXX we currently map kernel text rw, should fix this */
+	if ((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE)
+	    && systemcfg->physicalMemorySize > 256*MB) {
+		create_pte_mapping((unsigned long)KERNELBASE, 
+				   KERNELBASE + 256*MB, mode_rw, 0);
+		create_pte_mapping((unsigned long)KERNELBASE + 256*MB, 
+				   KERNELBASE + (systemcfg->physicalMemorySize), 
+				   mode_rw, 1);
+	} else {
+		create_pte_mapping((unsigned long)KERNELBASE, 
+				   KERNELBASE+(systemcfg->physicalMemorySize), 
+				   mode_rw, 0);
+	}
+}
+#undef KB
+#undef MB
+#endif
+
+/*
+ * Called by asm hashtable.S for doing lazy icache flush
+ */
+unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
+{
+	struct page *page;
+
+#define PPC64_HWNOEXEC (1 << 2)
+
+	if (!pfn_valid(pte_pfn(pte)))
+		return pp;
+
+	page = pte_page(pte);
+
+	/* page is dirty */
+	if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
+		if (trap == 0x400) {
+			__flush_dcache_icache(page_address(page));
+			set_bit(PG_arch_1, &page->flags);
+		} else
+			pp |= PPC64_HWNOEXEC;
+	}
+	return pp;
+}
+
+/*
+ * Called by asm hashtable.S in case of critical insert failure
+ */
+void htab_insert_failure(void)
+{
+	panic("hash_page: pte_insert failed\n");
+}
+
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+{
+	void *pgdir;
+	unsigned long vsid;
+	struct mm_struct *mm;
+	pte_t *ptep;
+	int ret;
+	int user_region = 0;
+	int local = 0;
+	cpumask_t tmp;
+
+	/* Check for invalid addresses. */
+	if (!IS_VALID_EA(ea))
+		return 1;
+
+ 	switch (REGION_ID(ea)) {
+	case USER_REGION_ID:
+		user_region = 1;
+		mm = current->mm;
+		if (mm == NULL)
+			return 1;
+
+		vsid = get_vsid(mm->context, ea);
+		break;
+	case IO_REGION_ID:
+		mm = &ioremap_mm;
+		vsid = get_kernel_vsid(ea);
+		break;
+	case VMALLOC_REGION_ID:
+		mm = &init_mm;
+		vsid = get_kernel_vsid(ea);
+		break;
+#if 0
+	case EEH_REGION_ID:
+		/*
+		 * Should only be hit if there is an access to MMIO space
+		 * which is protected by EEH.
+		 * Send the problem up to do_page_fault 
+		 */
+	case KERNEL_REGION_ID:
+		/*
+		 * Should never get here - entire 0xC0... region is bolted.
+		 * Send the problem up to do_page_fault 
+		 */
+#endif
+	default:
+		/* Not a valid range
+		 * Send the problem up to do_page_fault 
+		 */
+		return 1;
+		break;
+	}
+
+	pgdir = mm->pgd;
+
+	if (pgdir == NULL)
+		return 1;
+
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	/* Is this a huge page ? */
+	if (unlikely(in_hugepage_area(mm->context, ea)))
+		ret = hash_huge_page(mm, access, ea, vsid, local);
+	else {
+		ptep = find_linux_pte(pgdir, ea);
+		if (ptep == NULL)
+			return 1;
+		ret = __hash_page(ea, access, vsid, ptep, trap, local);
+	}
+
+
+	return ret;
+}
+
+void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
+		     int local)
+{
+	unsigned long vsid, vpn, va, hash, secondary, slot;
+
+	/* XXX fix for large ptes */
+	unsigned long large = 0;
+
+	if ((ea >= USER_START) && (ea <= USER_END))
+		vsid = get_vsid(context, ea);
+	else
+		vsid = get_kernel_vsid(ea);
+
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	if (large)
+		vpn = va >> LARGE_PAGE_SHIFT;
+	else
+		vpn = va >> PAGE_SHIFT;
+	hash = hpt_hash(vpn, large);
+	secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
+	if (secondary)
+		hash = ~hash;
+	slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+	slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
+
+	ppc_md.hpte_invalidate(slot, va, large, local);
+}
+
+void flush_hash_range(unsigned long context, unsigned long number, int local)
+{
+	if (ppc_md.flush_hash_range) {
+		ppc_md.flush_hash_range(context, number, local);
+	} else {
+		int i;
+		struct ppc64_tlb_batch *batch =
+			&ppc64_tlb_batch[smp_processor_id()];
+
+		for (i = 0; i < number; i++)
+			flush_hash_page(context, batch->addr[i], batch->pte[i],
+					local);
+	}
+}
+
+static inline void make_bl(unsigned int *insn_addr, void *func)
+{
+	unsigned long funcp = *((unsigned long *)func);
+	int offset = funcp - (unsigned long)insn_addr;
+
+	*insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
+	flush_icache_range((unsigned long)insn_addr, 4+
+			   (unsigned long)insn_addr);
+}
+
+void __init htab_finish_init(void)
+{
+	extern unsigned int *htab_call_hpte_insert1;
+	extern unsigned int *htab_call_hpte_insert2;
+	extern unsigned int *htab_call_hpte_remove;
+	extern unsigned int *htab_call_hpte_updatepp;
+
+	make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
+	make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
+	make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
+	make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
+}
diff -puN arch/ppc64/mm/hugetlbpage.c~ppc64-hash_page_rewrite arch/ppc64/mm/hugetlbpage.c
--- 25/arch/ppc64/mm/hugetlbpage.c~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/arch/ppc64/mm/hugetlbpage.c	2004-01-13 23:23:06.000000000 -0800
@@ -652,13 +652,9 @@ int hash_huge_page(struct mm_struct *mm,
 	unsigned long va, vpn;
 	int is_write;
 	hugepte_t old_pte, new_pte;
-	unsigned long hpteflags, prpn;
+	unsigned long hpteflags, prpn, flags;
 	long slot;
 
-	/* Is this for us? */
-	if (!in_hugepage_area(mm->context, ea))
-		return -1;
-
 	ea &= ~(HPAGE_SIZE-1);
 
 	/* We have to find the first hugepte in the batch, since
@@ -698,6 +694,8 @@ int hash_huge_page(struct mm_struct *mm,
 	 *	page is currently not DIRTY. 
 	 */
 
+	spin_lock_irqsave(&mm->page_table_lock, flags);
+
 	old_pte = *ptep;
 	new_pte = old_pte;
 
@@ -769,6 +767,8 @@ repeat:
 		*ptep = new_pte;
 	}
 
+	spin_unlock_irqrestore(&mm->page_table_lock, flags);
+
 	return 0;
 }
 
diff -puN include/asm-ppc64/mmu.h~ppc64-hash_page_rewrite include/asm-ppc64/mmu.h
--- 25/include/asm-ppc64/mmu.h~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/include/asm-ppc64/mmu.h	2004-01-13 23:23:06.000000000 -0800
@@ -13,6 +13,8 @@
 #ifndef _PPC64_MMU_H_
 #define _PPC64_MMU_H_
 
+#include <asm/page.h>
+
 #ifndef __ASSEMBLY__
 
 /* Default "unsigned long" context */
@@ -245,6 +247,16 @@ static inline void tlbiel(unsigned long 
 	asm volatile("ptesync": : :"memory");
 }
 
+/*
+ * Handle a fault by adding an HPTE. If the address can't be determined
+ * to be valid via Linux page tables, return 1. If handled return 0
+ */
+extern int __hash_page(unsigned long ea, unsigned long access,
+		       unsigned long vsid, pte_t *ptep, unsigned long trap,
+		       int local);
+
+extern void htab_finish_init(void);
+
 #endif /* __ASSEMBLY__ */
 
 /*
diff -puN include/asm-ppc64/pgtable.h~ppc64-hash_page_rewrite include/asm-ppc64/pgtable.h
--- 25/include/asm-ppc64/pgtable.h~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/include/asm-ppc64/pgtable.h	2004-01-13 23:23:06.000000000 -0800
@@ -7,6 +7,7 @@
  */
 
 #ifndef __ASSEMBLY__
+#include <linux/stddef.h>
 #include <asm/processor.h>		/* For TASK_SIZE */
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -74,22 +75,23 @@
  * Bits in a linux-style PTE.  These match the bits in the
  * (hardware-defined) PowerPC PTE as closely as possible.
  */
-#define _PAGE_PRESENT	0x001UL	/* software: pte contains a translation */
-#define _PAGE_USER	0x002UL	/* matches one of the PP bits */
-#define _PAGE_RW	0x004UL	/* software: user write access allowed */
-#define _PAGE_GUARDED	0x008UL
-#define _PAGE_COHERENT	0x010UL	/* M: enforce memory coherence (SMP systems) */
-#define _PAGE_NO_CACHE	0x020UL	/* I: cache inhibit */
-#define _PAGE_WRITETHRU	0x040UL	/* W: cache write-through */
-#define _PAGE_DIRTY	0x080UL	/* C: page changed */
-#define _PAGE_ACCESSED	0x100UL	/* R: page referenced */
-#define _PAGE_FILE	0x200UL /* software: pte holds file offset */
-#define _PAGE_HASHPTE	0x400UL	/* software: pte has an associated HPTE */
-#define _PAGE_EXEC	0x800UL	/* software: i-cache coherence required */
-#define _PAGE_SECONDARY 0x8000UL /* software: HPTE is in secondary group */
-#define _PAGE_GROUP_IX  0x7000UL /* software: HPTE index within group */
+#define _PAGE_PRESENT	0x0001 /* software: pte contains a translation */
+#define _PAGE_USER	0x0002 /* matches one of the PP bits */
+#define _PAGE_FILE	0x0002 /* (!present only) software: pte holds file offset */
+#define _PAGE_RW	0x0004 /* software: user write access allowed */
+#define _PAGE_GUARDED	0x0008
+#define _PAGE_COHERENT	0x0010 /* M: enforce memory coherence (SMP systems) */
+#define _PAGE_NO_CACHE	0x0020 /* I: cache inhibit */
+#define _PAGE_WRITETHRU	0x0040 /* W: cache write-through */
+#define _PAGE_DIRTY	0x0080 /* C: page changed */
+#define _PAGE_ACCESSED	0x0100 /* R: page referenced */
+#define _PAGE_EXEC	0x0200 /* software: i-cache coherence required */
+#define _PAGE_HASHPTE	0x0400 /* software: pte has an associated HPTE */
+#define _PAGE_BUSY	0x0800 /* software: PTE & hash are busy */ 
+#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
+#define _PAGE_GROUP_IX  0x7000 /* software: HPTE index within group */
 /* Bits 0x7000 identify the index within an HPT Group */
-#define _PAGE_HPTEFLAGS (_PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
 /* PAGE_MASK gives the right answer below, but only by accident */
 /* It should be preserving the high 48 bits and then specifically */
 /* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
@@ -157,8 +159,10 @@ extern unsigned long empty_zero_page[PAG
 #define _PMD_HUGEPAGE	0x00000001U
 #define HUGEPTE_BATCH_SIZE (1<<(HPAGE_SHIFT-PMD_SHIFT))
 
+#ifndef __ASSEMBLY__
 int hash_huge_page(struct mm_struct *mm, unsigned long access,
 		   unsigned long ea, unsigned long vsid, int local);
+#endif /* __ASSEMBLY__ */
 
 #define HAVE_ARCH_UNMAPPED_AREA
 #else
@@ -288,15 +292,17 @@ static inline unsigned long pte_update( 
 					unsigned long set )
 {
 	unsigned long old, tmp;
-
+	
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3		# pte_update\n\
+	andi.	%1,%0,%7\n\
+	bne-	1b \n\
 	andc	%1,%0,%4 \n\
 	or	%1,%1,%5 \n\
 	stdcx.	%1,0,%3 \n\
 	bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*p)
-	: "r" (p), "r" (clr), "r" (set), "m" (*p)
+	: "r" (p), "r" (clr), "r" (set), "m" (*p), "i" (_PAGE_BUSY)
 	: "cc" );
 	return old;
 }
@@ -422,5 +428,31 @@ long pSeries_hpte_insert(unsigned long h
 			 unsigned long prpn, int secondary,
 			 unsigned long hpteflags, int bolted, int large);
 
+/*
+ * find_linux_pte returns the address of a linux pte for a given 
+ * effective address and directory.  If not found, it returns zero.
+ */
+static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
+{
+	pgd_t *pg;
+	pmd_t *pm;
+	pte_t *pt = NULL;
+	pte_t pte;
+
+	pg = pgdir + pgd_index(ea);
+	if (!pgd_none(*pg)) {
+
+		pm = pmd_offset(pg, ea);
+		if (pmd_present(*pm)) { 
+			pt = pte_offset_kernel(pm, ea);
+			pte = *pt;
+			if (!pte_present(pte))
+				pt = NULL;
+		}
+	}
+
+	return pt;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* _PPC64_PGTABLE_H */
diff -puN arch/ppc64/mm/init.c~ppc64-hash_page_rewrite arch/ppc64/mm/init.c
--- 25/arch/ppc64/mm/init.c~ppc64-hash_page_rewrite	2004-01-13 23:23:06.000000000 -0800
+++ 25-akpm/arch/ppc64/mm/init.c	2004-01-13 23:23:06.000000000 -0800
@@ -901,10 +901,6 @@ void flush_icache_user_range(struct vm_a
 	flush_icache_range(maddr, maddr + len);
 }
 
-extern pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea);
-int __hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
-		pte_t *ptep, unsigned long trap, int local);
-
 /*
  * This is called at the end of handling a user page fault, when the
  * fault has been handled by updating a PTE in the linux page tables.
@@ -944,6 +940,9 @@ void update_mmu_cache(struct vm_area_str
 		return;
 
 	ptep = find_linux_pte(pgdir, ea);
+	if (!ptep)
+		return;
+
 	vsid = get_vsid(vma->vm_mm->context, ea);
 
 	tmp = cpumask_of_cpu(smp_processor_id());

_
