
From: Olof Johansson <olof@austin.ibm.com>

Earlier cleanup efforts of the ppc64 IOMMU code have mostly been targeted
at simplifying the allocation schemes and modularising things for the
various platforms.  The IOMMU init functions are still a mess.  This is an
attempt to clean them up and make them somewhat easier to follow.

The new rules are:

1. iommu_init_early_<arch> is called before any PCI/VIO init is done
2. The pcibios fixup routines will call the iommu_{bus,dev}_setup functions
   appropriately as devices are added.

TCE space allocation has changed somewhat:

* On LPARs, nothing is really different. ibm,dma-window properties are still
  used to determine table sizes.
* On pSeries SMP-mode (non-LPAR), the full TCE space per PHB is split up
  in 256MB chunks, each handed out to one child bus/slot as needed. This
  makes current max 7 child buses per PHB, something we're currently below
  on all machine models I'm aware of.
* Exception to the above: Pre-POWER4 machines with Python PHBs have a full
  GB of DMA space allocated at the PHB level, since there are no EADS-level
  tables on such systems.
* PowerMac and Maple still work like before: all buses/slots share one table.
* VIO works like before, ibm,my-dma-window is used like before.
* iSeries has not been touched much at all, besides the changed unit of
  the it_size variable in struct iommu_table.

Other things changed:
* Powermac and maple PCI/IOMMU inits have been changed a bit to conform to
  the new init structure
* pci_dma_direct.c has been renamed pci_direct_iommu.c to match
  pci_iommu.c (see separate patch)
* Likewise, a couple of the pci direct init functions have been renamed.

Signed-off-by: Olof Johansson <olof@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/ppc64/kernel/Makefile           |    2 
 25-akpm/arch/ppc64/kernel/iSeries_iommu.c    |   11 -
 25-akpm/arch/ppc64/kernel/iSeries_setup.c    |    3 
 25-akpm/arch/ppc64/kernel/iommu.c            |   21 --
 25-akpm/arch/ppc64/kernel/maple_pci.c        |    3 
 25-akpm/arch/ppc64/kernel/maple_setup.c      |    7 
 25-akpm/arch/ppc64/kernel/pSeries_iommu.c    |  283 +++++++++++++--------------
 25-akpm/arch/ppc64/kernel/pSeries_pci.c      |    5 
 25-akpm/arch/ppc64/kernel/pSeries_setup.c    |    5 
 25-akpm/arch/ppc64/kernel/pci.c              |    5 
 25-akpm/arch/ppc64/kernel/pci_direct_iommu.c |    2 
 25-akpm/arch/ppc64/kernel/pmac_pci.c         |    2 
 25-akpm/arch/ppc64/kernel/pmac_setup.c       |    7 
 25-akpm/arch/ppc64/kernel/prom.c             |   11 -
 25-akpm/arch/ppc64/kernel/u3_iommu.c         |  104 ++++++---
 25-akpm/arch/ppc64/kernel/vio.c              |   18 -
 25-akpm/drivers/pci/hotplug/rpaphp_pci.c     |    4 
 25-akpm/include/asm-ppc64/iommu.h            |   13 -
 25-akpm/include/asm-ppc64/machdep.h          |    2 
 25-akpm/include/asm-ppc64/pci-bridge.h       |    8 
 20 files changed, 265 insertions(+), 251 deletions(-)

diff -puN arch/ppc64/kernel/iommu.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/iommu.c
--- 25/arch/ppc64/kernel/iommu.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.873873456 -0800
+++ 25-akpm/arch/ppc64/kernel/iommu.c	2005-01-05 16:24:51.917866768 -0800
@@ -87,7 +87,7 @@ static unsigned long iommu_range_alloc(s
 		start = largealloc ? tbl->it_largehint : tbl->it_hint;
 
 	/* Use only half of the table for small allocs (15 pages or less) */
-	limit = largealloc ? tbl->it_mapsize : tbl->it_halfpoint;
+	limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
 
 	if (largealloc && start < tbl->it_halfpoint)
 		start = tbl->it_halfpoint;
@@ -114,7 +114,7 @@ static unsigned long iommu_range_alloc(s
 			 * Second failure, rescan the other half of the table.
 			 */
 			start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
-			limit = pass ? tbl->it_mapsize : limit;
+			limit = pass ? tbl->it_size : limit;
 			pass++;
 			goto again;
 		} else {
@@ -194,7 +194,7 @@ static void __iommu_free(struct iommu_ta
 	entry = dma_addr >> PAGE_SHIFT;
 	free_entry = entry - tbl->it_offset;
 
-	if (((free_entry + npages) > tbl->it_mapsize) ||
+	if (((free_entry + npages) > tbl->it_size) ||
 	    (entry < tbl->it_offset)) {
 		if (printk_ratelimit()) {
 			printk(KERN_INFO "iommu_free: invalid entry\n");
@@ -202,7 +202,7 @@ static void __iommu_free(struct iommu_ta
 			printk(KERN_INFO "\tdma_addr  = 0x%lx\n", (u64)dma_addr);
 			printk(KERN_INFO "\tTable     = 0x%lx\n", (u64)tbl);
 			printk(KERN_INFO "\tbus#      = 0x%lx\n", (u64)tbl->it_busno);
-			printk(KERN_INFO "\tmapsize   = 0x%lx\n", (u64)tbl->it_mapsize);
+			printk(KERN_INFO "\tsize      = 0x%lx\n", (u64)tbl->it_size);
 			printk(KERN_INFO "\tstartOff  = 0x%lx\n", (u64)tbl->it_offset);
 			printk(KERN_INFO "\tindex     = 0x%lx\n", (u64)tbl->it_index);
 			WARN_ON(1);
@@ -407,14 +407,11 @@ struct iommu_table *iommu_init_table(str
 	unsigned long sz;
 	static int welcomed = 0;
 
-	/* it_size is in pages, it_mapsize in number of entries */
-	tbl->it_mapsize = (tbl->it_size << PAGE_SHIFT) / tbl->it_entrysize;
-
 	/* Set aside 1/4 of the table for large allocations. */
-	tbl->it_halfpoint = tbl->it_mapsize * 3 / 4;
+	tbl->it_halfpoint = tbl->it_size * 3 / 4;
 
 	/* number of bytes needed for the bitmap */
-	sz = (tbl->it_mapsize + 7) >> 3;
+	sz = (tbl->it_size + 7) >> 3;
 
 	tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
 	if (!tbl->it_map)
@@ -448,8 +445,8 @@ void iommu_free_table(struct device_node
 	}
 
 	/* verify that table contains no entries */
-	/* it_mapsize is in entries, and we're examining 64 at a time */
-	for (i = 0; i < (tbl->it_mapsize/64); i++) {
+	/* it_size is in entries, and we're examining 64 at a time */
+	for (i = 0; i < (tbl->it_size/64); i++) {
 		if (tbl->it_map[i] != 0) {
 			printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
 				__FUNCTION__, dn->full_name);
@@ -458,7 +455,7 @@ void iommu_free_table(struct device_node
 	}
 
 	/* calculate bitmap size in bytes */
-	bitmap_sz = (tbl->it_mapsize + 7) / 8;
+	bitmap_sz = (tbl->it_size + 7) / 8;
 
 	/* free bitmap */
 	order = get_order(bitmap_sz);
diff -puN arch/ppc64/kernel/iSeries_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/iSeries_iommu.c
--- 25/arch/ppc64/kernel/iSeries_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.875873152 -0800
+++ 25-akpm/arch/ppc64/kernel/iSeries_iommu.c	2005-01-05 16:24:51.908868136 -0800
@@ -132,11 +132,11 @@ static void iommu_table_getparms(struct 
 	if (parms->itc_size == 0)
 		panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
 
-	tbl->it_size = parms->itc_size;
+	/* itc_size is in pages worth of table, it_size is in # of entries */
+	tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
 	tbl->it_busno = parms->itc_busno;
 	tbl->it_offset = parms->itc_offset;
 	tbl->it_index = parms->itc_index;
-	tbl->it_entrysize = sizeof(union tce_entry);
 	tbl->it_blocksize = 1;
 	tbl->it_type = TCE_PCI;
 
@@ -160,11 +160,16 @@ void iommu_devnode_init_iSeries(struct i
 		kfree(tbl);
 }
 
+static void iommu_dev_setup_iSeries(struct pci_dev *dev) { }
+static void iommu_bus_setup_iSeries(struct pci_bus *bus) { }
 
-void tce_init_iSeries(void)
+void iommu_init_early_iSeries(void)
 {
 	ppc_md.tce_build = tce_build_iSeries;
 	ppc_md.tce_free  = tce_free_iSeries;
 
+	ppc_md.iommu_dev_setup = iommu_dev_setup_iSeries;
+	ppc_md.iommu_bus_setup = iommu_bus_setup_iSeries;
+
 	pci_iommu_init();
 }
diff -puN arch/ppc64/kernel/iSeries_setup.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/iSeries_setup.c
--- 25/arch/ppc64/kernel/iSeries_setup.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.877872848 -0800
+++ 25-akpm/arch/ppc64/kernel/iSeries_setup.c	2005-01-05 16:24:51.913867376 -0800
@@ -68,7 +68,6 @@ extern void hvlog(char *fmt, ...);
 
 /* Function Prototypes */
 extern void ppcdbg_initialize(void);
-extern void tce_init_iSeries(void);
 
 static void build_iSeries_Memory_Map(void);
 static void setup_iSeries_cache_sizes(void);
@@ -344,7 +343,7 @@ static void __init iSeries_parse_cmdline
 	/*
 	 * Initialize the DMA/TCE management
 	 */
-	tce_init_iSeries();
+	iommu_init_early_iSeries();
 
 	/*
 	 * Initialize the table which translate Linux physical addresses to
diff -puN arch/ppc64/kernel/Makefile~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/Makefile
--- 25/arch/ppc64/kernel/Makefile~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.878872696 -0800
+++ 25-akpm/arch/ppc64/kernel/Makefile	2005-01-05 16:24:51.918866616 -0800
@@ -16,7 +16,7 @@ obj-y               :=	setup.o entry.o t
 obj-$(CONFIG_PPC_OF) +=	of_device.o
 
 pci-obj-$(CONFIG_PPC_ISERIES)	+= iSeries_pci.o iSeries_pci_reset.o
-pci-obj-$(CONFIG_PPC_MULTIPLATFORM)	+= pci_dn.o pci_dma_direct.o
+pci-obj-$(CONFIG_PPC_MULTIPLATFORM)	+= pci_dn.o pci_direct_iommu.o
 
 obj-$(CONFIG_PCI)	+= pci.o pci_iommu.o iomap.o $(pci-obj-y)
 
diff -puN arch/ppc64/kernel/maple_pci.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/maple_pci.c
--- 25/arch/ppc64/kernel/maple_pci.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.879872544 -0800
+++ 25-akpm/arch/ppc64/kernel/maple_pci.c	2005-01-05 16:24:51.914867224 -0800
@@ -385,9 +385,6 @@ void __init maple_pcibios_fixup(void)
 	/* Fixup the pci_bus sysdata pointers */
 	pci_fix_bus_sysdata();
 
-	/* Setup the iommu */
-	iommu_setup_u3();
-
 	DBG(" <- maple_pcibios_fixup\n");
 }
 
diff -puN arch/ppc64/kernel/maple_setup.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/maple_setup.c
--- 25/arch/ppc64/kernel/maple_setup.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.881872240 -0800
+++ 25-akpm/arch/ppc64/kernel/maple_setup.c	2005-01-05 16:24:51.915867072 -0800
@@ -111,11 +111,6 @@ void __init maple_setup_arch(void)
 #ifdef CONFIG_SMP
 	smp_ops = &maple_smp_ops;
 #endif
-	/* Setup the PCI DMA to "direct" by default. May be overriden
-	 * by iommu later on
-	 */
-	pci_dma_init_direct();
-
 	/* Lookup PCI hosts */
        	maple_pci_init();
 
@@ -159,6 +154,8 @@ static void __init maple_init_early(void
 	/* Setup interrupt mapping options */
 	ppc64_interrupt_controller = IC_OPEN_PIC;
 
+	iommu_init_early_u3();
+
 	DBG(" <- maple_init_early\n");
 }
 
diff -puN arch/ppc64/kernel/pci.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/pci.c
--- 25/arch/ppc64/kernel/pci.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.882872088 -0800
+++ 25-akpm/arch/ppc64/kernel/pci.c	2005-01-05 16:24:51.904868744 -0800
@@ -845,6 +845,11 @@ void __devinit pcibios_fixup_bus(struct 
 		pcibios_fixup_device_resources(dev, bus);
 	}
 
+	ppc_md.iommu_bus_setup(bus);
+
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		ppc_md.iommu_dev_setup(dev);
+
 	if (!pci_probe_only)
 		return;
 
diff -puN arch/ppc64/kernel/pci_direct_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/pci_direct_iommu.c
--- 25/arch/ppc64/kernel/pci_direct_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.883871936 -0800
+++ 25-akpm/arch/ppc64/kernel/pci_direct_iommu.c	2005-01-05 16:24:51.918866616 -0800
@@ -78,7 +78,7 @@ static void pci_direct_unmap_sg(struct p
 {
 }
 
-void __init pci_dma_init_direct(void)
+void __init pci_direct_iommu_init(void)
 {
 	pci_dma_ops.pci_alloc_consistent = pci_direct_alloc_consistent;
 	pci_dma_ops.pci_free_consistent = pci_direct_free_consistent;
diff -puN arch/ppc64/kernel/pmac_pci.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/pmac_pci.c
--- 25/arch/ppc64/kernel/pmac_pci.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.885871632 -0800
+++ 25-akpm/arch/ppc64/kernel/pmac_pci.c	2005-01-05 16:24:51.914867224 -0800
@@ -666,8 +666,6 @@ void __init pmac_pcibios_fixup(void)
 		pci_read_irq_line(dev);
 
 	pci_fix_bus_sysdata();
-
-	iommu_setup_u3();
 }
 
 static void __init pmac_fixup_phb_resources(void)
diff -puN arch/ppc64/kernel/pmac_setup.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/pmac_setup.c
--- 25/arch/ppc64/kernel/pmac_setup.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.886871480 -0800
+++ 25-akpm/arch/ppc64/kernel/pmac_setup.c	2005-01-05 16:24:51.915867072 -0800
@@ -166,11 +166,6 @@ void __init pmac_setup_arch(void)
 	pmac_setup_smp();
 #endif
 
-	/* Setup the PCI DMA to "direct" by default. May be overriden
-	 * by iommu later on
-	 */
-	pci_dma_init_direct();
-
 	/* Lookup PCI hosts */
        	pmac_pci_init();
 
@@ -317,6 +312,8 @@ void __init pmac_init_early(void)
 	/* Setup interrupt mapping options */
 	ppc64_interrupt_controller = IC_OPEN_PIC;
 
+	iommu_init_early_u3();
+
 	DBG(" <- pmac_init_early\n");
 }
 
diff -puN arch/ppc64/kernel/prom.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/prom.c
--- 25/arch/ppc64/kernel/prom.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.887871328 -0800
+++ 25-akpm/arch/ppc64/kernel/prom.c	2005-01-05 16:24:51.911867680 -0800
@@ -1743,17 +1743,6 @@ static int of_finish_dynamic_node(struct
 		node->devfn = (regs[0] >> 8) & 0xff;
 	}
 
-	/* fixing up iommu_table */
-
-#ifdef CONFIG_PPC_PSERIES
-	if (strcmp(node->name, "pci") == 0 &&
-	    get_property(node, "ibm,dma-window", NULL)) {
-		node->bussubno = node->busno;
-		iommu_devnode_init_pSeries(node);
-	} else
-		node->iommu_table = parent->iommu_table;
-#endif /* CONFIG_PPC_PSERIES */
-
 out:
 	of_node_put(parent);
 	return err;
diff -puN arch/ppc64/kernel/pSeries_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/pSeries_iommu.c
--- 25/arch/ppc64/kernel/pSeries_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.889871024 -0800
+++ 25-akpm/arch/ppc64/kernel/pSeries_iommu.c	2005-01-05 16:24:51.906868440 -0800
@@ -46,6 +46,9 @@
 #include <asm/systemcfg.h>
 #include "pci.h"
 
+#define DBG(fmt...)
+
+extern int is_python(struct device_node *);
 
 static void tce_build_pSeries(struct iommu_table *tbl, long index, 
 			      long npages, unsigned long uaddr, 
@@ -121,7 +124,7 @@ static void tce_build_pSeriesLP(struct i
 	}
 }
 
-DEFINE_PER_CPU(void *, tce_page) = NULL;
+static DEFINE_PER_CPU(void *, tce_page) = NULL;
 
 static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
@@ -233,85 +236,6 @@ static void tce_freemulti_pSeriesLP(stru
 	}
 }
 
-
-static void iommu_buses_init(void)
-{
-	struct pci_controller *phb, *tmp;
-	struct device_node *dn, *first_dn;
-	int num_slots, num_slots_ilog2;
-	int first_phb = 1;
-	unsigned long tcetable_ilog2;
-
-	/*
-	 * We default to a TCE table that maps 2GB (4MB table, 22 bits),
-	 * however some machines have a 3GB IO hole and for these we
-	 * create a table that maps 1GB (2MB table, 21 bits)
-	 */
-	if (io_hole_start < 0x80000000UL)
-		tcetable_ilog2 = 21;
-	else
-		tcetable_ilog2 = 22;
-
-	/* XXX Should we be using pci_root_buses instead?  -ojn
-	 */
-
-	list_for_each_entry_safe(phb, tmp, &hose_list, list_node) {
-		first_dn = ((struct device_node *)phb->arch_data)->child;
-
-		/* Carve 2GB into the largest dma_window_size possible */
-		for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling)
-			num_slots++;
-		num_slots_ilog2 = __ilog2(num_slots);
-
-		if ((1<<num_slots_ilog2) != num_slots)
-			num_slots_ilog2++;
-
-		phb->dma_window_size = 1 << (tcetable_ilog2 - num_slots_ilog2);
-
-		/* Reserve 16MB of DMA space on the first PHB.
-		 * We should probably be more careful and use firmware props.
-		 * In reality this space is remapped, not lost.  But we don't
-		 * want to get that smart to handle it -- too much work.
-		 */
-		phb->dma_window_base_cur = first_phb ? (1 << 12) : 0;
-		first_phb = 0;
-
-		for (dn = first_dn; dn != NULL; dn = dn->sibling)
-			iommu_devnode_init_pSeries(dn);
-	}
-}
-
-
-static void iommu_buses_init_lpar(struct list_head *bus_list)
-{
-	struct list_head *ln;
-	struct pci_bus *bus;
-	struct device_node *busdn;
-	unsigned int *dma_window;
-
-	for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
-		bus = pci_bus_b(ln);
-
-		if (bus->self)
-			busdn = pci_device_to_OF_node(bus->self);
-		else
-			busdn = bus->sysdata;   /* must be a phb */
-
-		dma_window = (unsigned int *)get_property(busdn, "ibm,dma-window", NULL);
-		if (dma_window) {
-			/* Bussubno hasn't been copied yet.
-			 * Do it now because iommu_table_setparms_lpar needs it.
-			 */
-			busdn->bussubno = bus->number;
-			iommu_devnode_init_pSeries(busdn);
-		}
-
-		/* look for a window on a bridge even if the PHB had one */
-		iommu_buses_init_lpar(&bus->children);
-	}
-}
-
-
 static void iommu_table_setparms(struct pci_controller *phb,
 				 struct device_node *dn,
 				 struct iommu_table *tbl) 
@@ -336,27 +260,18 @@ static void iommu_table_setparms(struct 
 	tbl->it_busno = phb->bus->number;
 	
 	/* Units of tce entries */
-	tbl->it_offset = phb->dma_window_base_cur;
-	
-	/* Adjust the current table offset to the next
-	 * region.  Measured in TCE entries. Force an
-	 * alignment to the size allotted per IOA. This
-	 * makes it easier to remove the 1st 16MB.
-      	 */
-	phb->dma_window_base_cur += (phb->dma_window_size>>3);
-	phb->dma_window_base_cur &= 
-		~((phb->dma_window_size>>3)-1);
-	
-	/* Set the tce table size - measured in pages */
-	tbl->it_size = ((phb->dma_window_base_cur -
-			 tbl->it_offset) << 3) >> PAGE_SHIFT;
+	tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
 	
 	/* Test if we are going over 2GB of DMA space */
-	if (phb->dma_window_base_cur > (1 << 19))
+	if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31))
 		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); 
 	
+	phb->dma_window_base_cur += phb->dma_window_size;
+
+	/* Set the tce table size - measured in entries */
+	tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
+
 	tbl->it_index = 0;
-	tbl->it_entrysize = sizeof(union tce_entry);
 	tbl->it_blocksize = 16;
 	tbl->it_type = TCE_PCI;
 }
@@ -375,82 +290,174 @@ static void iommu_table_setparms(struct 
  */
 static void iommu_table_setparms_lpar(struct pci_controller *phb,
 				      struct device_node *dn,
-				      struct iommu_table *tbl)
+				      struct iommu_table *tbl,
+				      unsigned int *dma_window)
 {
-	unsigned int *dma_window;
-
-	dma_window = (unsigned int *)get_property(dn, "ibm,dma-window", NULL);
-
 	if (!dma_window)
 		panic("iommu_table_setparms_lpar: device %s has no"
 		      " ibm,dma-window property!\n", dn->full_name);
 
 	tbl->it_busno  = dn->bussubno;
-	tbl->it_size   = (((((unsigned long)dma_window[4] << 32) | 
-			   (unsigned long)dma_window[5]) >> PAGE_SHIFT) << 3) >> PAGE_SHIFT;
-	tbl->it_offset = ((((unsigned long)dma_window[2] << 32) | 
-			   (unsigned long)dma_window[3]) >> 12);
+
+	/* TODO: Parse field size properties properly. */
+	tbl->it_size   = (((unsigned long)dma_window[4] << 32) |
+			   (unsigned long)dma_window[5]) >> PAGE_SHIFT;
+	tbl->it_offset = (((unsigned long)dma_window[2] << 32) |
+			   (unsigned long)dma_window[3]) >> PAGE_SHIFT;
 	tbl->it_base   = 0;
 	tbl->it_index  = dma_window[0];
-	tbl->it_entrysize = sizeof(union tce_entry);
 	tbl->it_blocksize  = 16;
 	tbl->it_type = TCE_PCI;
 }
 
+static void iommu_bus_setup_pSeries(struct pci_bus *bus)
+{
+	struct device_node *dn, *pdn;
+
+	DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self);
+
+	/* For each (root) bus, we carve up the available DMA space in 256MB
+	 * pieces. Since each piece is used by one (sub) bus/device, that would
+	 * give a maximum of 7 devices per PHB. In most cases, this is plenty.
+	 *
+	 * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS
+	 * bridges below the PHB to allocate the sectioned tables to, so instead
+	 * we allocate a 1GB table at the PHB level.
+	 */
+
+	dn = pci_bus_to_OF_node(bus);
+
+	if (!bus->self) {
+		/* Root bus */
+		if (is_python(dn)) {
+			struct iommu_table *tbl;
+
+			DBG("Python root bus %s\n", bus->name);
+
+			/* 1GB window by default */
+			dn->phb->dma_window_size = 1 << 30;
+			dn->phb->dma_window_base_cur = 0;
+
+			tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+
+			iommu_table_setparms(dn->phb, dn, tbl);
+			dn->iommu_table = iommu_init_table(tbl);
+		} else {
+			/* 256 MB window by default */
+			dn->phb->dma_window_size = 1 << 28;
+			/* always skip the first 256MB */
+			dn->phb->dma_window_base_cur = 1 << 28;
+
+			/* No table at PHB level for non-python PHBs */
+		}
+	} else {
+		pdn = pci_bus_to_OF_node(bus->parent);
+
+		if (!pdn->iommu_table) {
+			struct iommu_table *tbl;
+			/* First child, allocate new table (256MB window) */
+
+			tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+
+			iommu_table_setparms(dn->phb, dn, tbl);
+
+			dn->iommu_table = iommu_init_table(tbl);
+		} else {
+			/* Lower than first child or under python, copy parent table */
+			dn->iommu_table = pdn->iommu_table;
+		}
+	}
+}
+
 
-void iommu_devnode_init_pSeries(struct device_node *dn)
+static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus)
 {
 	struct iommu_table *tbl;
+	struct device_node *dn, *pdn;
+	unsigned int *dma_window = NULL;
 
-	tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), 
-					    GFP_KERNEL);
-	
-	if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
-		iommu_table_setparms_lpar(dn->phb, dn, tbl);
-	else
-		iommu_table_setparms(dn->phb, dn, tbl);
+	dn = pci_bus_to_OF_node(bus);
+
+	/* Find nearest ibm,dma-window, walking up the device tree */
+	for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
+		dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL);
+		if (dma_window != NULL)
+			break;
+	}
+
+	WARN_ON(dma_window == NULL);
+
+	if (!pdn->iommu_table) {
+		/* Bussubno hasn't been copied yet.
+		 * Do it now because iommu_table_setparms_lpar needs it.
+		 */
+		pdn->bussubno = bus->number;
+
+		tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
+						    GFP_KERNEL);
 	
-	dn->iommu_table = iommu_init_table(tbl);
+		iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window);
+
+		pdn->iommu_table = iommu_init_table(tbl);
+	}
+
+	if (pdn != dn)
+		dn->iommu_table = pdn->iommu_table;
 }
 
-void iommu_setup_pSeries(void)
+
+static void iommu_dev_setup_pSeries(struct pci_dev *dev)
 {
-	struct pci_dev *dev = NULL;
 	struct device_node *dn, *mydn;
 
-	if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
-		iommu_buses_init_lpar(&pci_root_buses);
-	else
-		iommu_buses_init();
-
-	/* Now copy the iommu_table ptr from the bus devices down to every
+	DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name);
+	/* Now copy the iommu_table ptr from the bus device down to the
 	 * pci device_node.  This means get_iommu_table() won't need to search
 	 * up the device tree to find it.
 	 */
-	for_each_pci_dev(dev) {
-		mydn = dn = pci_device_to_OF_node(dev);
+	mydn = dn = pci_device_to_OF_node(dev);
 
-		while (dn && dn->iommu_table == NULL)
-			dn = dn->parent;
-		if (dn)
-			mydn->iommu_table = dn->iommu_table;
-	}
+	while (dn && dn->iommu_table == NULL)
+		dn = dn->parent;
+
+	WARN_ON(!dn);
+
+	if (dn)
+		mydn->iommu_table = dn->iommu_table;
 }
 
+static void iommu_bus_setup_null(struct pci_bus *b) { }
+static void iommu_dev_setup_null(struct pci_dev *d) { }
+
 /* These are called very early. */
-void tce_init_pSeries(void)
+void iommu_init_early_pSeries(void)
 {
-	if (!(systemcfg->platform & PLATFORM_LPAR)) {
+	if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) {
+		/* Direct I/O, IOMMU off */
+		ppc_md.iommu_dev_setup = iommu_dev_setup_null;
+		ppc_md.iommu_bus_setup = iommu_bus_setup_null;
+		pci_direct_iommu_init();
+
+		return;
+	}
+
+	if (systemcfg->platform & PLATFORM_LPAR) {
+		if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) {
+			ppc_md.tce_build = tce_buildmulti_pSeriesLP;
+			ppc_md.tce_free	 = tce_freemulti_pSeriesLP;
+		} else {
+			ppc_md.tce_build = tce_build_pSeriesLP;
+			ppc_md.tce_free	 = tce_free_pSeriesLP;
+		}
+		ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP;
+	} else {
 		ppc_md.tce_build = tce_build_pSeries;
 		ppc_md.tce_free  = tce_free_pSeries;
-	} else if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) {
-		ppc_md.tce_build = tce_buildmulti_pSeriesLP;
-		ppc_md.tce_free	 = tce_freemulti_pSeriesLP;
-	} else {
-		ppc_md.tce_build = tce_build_pSeriesLP;
-		ppc_md.tce_free	 = tce_free_pSeriesLP;
+		ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries;
 	}
 
+	ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
+
 	pci_iommu_init();
 }
 
diff -puN arch/ppc64/kernel/pSeries_pci.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/pSeries_pci.c
--- 25/arch/ppc64/kernel/pSeries_pci.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.890870872 -0800
+++ 25-akpm/arch/ppc64/kernel/pSeries_pci.c	2005-01-05 16:24:51.909867984 -0800
@@ -148,7 +148,7 @@ struct pci_ops rtas_pci_ops = {
 	rtas_pci_write_config
 };
 
-static int is_python(struct device_node *dev)
+int is_python(struct device_node *dev)
 {
 	char *model = (char *)get_property(dev, "model", NULL);
 
@@ -554,9 +554,6 @@ void __init pSeries_final_fixup(void)
 	pSeries_request_regions();
 	pci_fix_bus_sysdata();
 
-	if (!of_chosen || !get_property(of_chosen, "linux,iommu-off", NULL))
-		iommu_setup_pSeries();
-
 	pci_addr_cache_build();
 }
 
diff -puN arch/ppc64/kernel/pSeries_setup.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/pSeries_setup.c
--- 25/arch/ppc64/kernel/pSeries_setup.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.891870720 -0800
+++ 25-akpm/arch/ppc64/kernel/pSeries_setup.c	2005-01-05 16:24:51.912867528 -0800
@@ -375,10 +375,7 @@ static void __init pSeries_init_early(vo
 	}
 
 
-	if (iommu_off)
-		pci_dma_init_direct();
-	else
-		tce_init_pSeries();
+	iommu_init_early_pSeries();
 
 	pSeries_discover_pic();
 
diff -puN arch/ppc64/kernel/u3_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/u3_iommu.c
--- 25/arch/ppc64/kernel/u3_iommu.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.893870416 -0800
+++ 25-akpm/arch/ppc64/kernel/u3_iommu.c	2005-01-05 16:24:51.908868136 -0800
@@ -91,6 +91,7 @@ static unsigned int *dart; 
 static unsigned int dart_emptyval;
 
 static struct iommu_table iommu_table_u3;
+static int iommu_table_u3_inited;
 static int dart_dirty;
 
 #define DBG(...)
@@ -192,7 +193,6 @@ static int dart_init(struct device_node 
 	unsigned int regword;
 	unsigned int i;
 	unsigned long tmp;
-	struct page *p;
 
 	if (dart_tablebase == 0 || dart_tablesize == 0) {
 		printk(KERN_INFO "U3-DART: table not allocated, using direct DMA\n");
@@ -209,16 +209,15 @@ static int dart_init(struct device_node 
 	 * that to work around what looks like a problem with the HT bridge
 	 * prefetching into invalid pages and corrupting data
 	 */
-	tmp = __get_free_pages(GFP_ATOMIC, 1);
-	if (tmp == 0)
-		panic("U3-DART: Cannot allocate spare page !");
-	dart_emptyval = DARTMAP_VALID |
-		((virt_to_abs(tmp) >> PAGE_SHIFT) & DARTMAP_RPNMASK);
+	tmp = lmb_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!tmp)
+		panic("U3-DART: Cannot allocate spare page!");
+	dart_emptyval = DARTMAP_VALID | ((tmp >> PAGE_SHIFT) & DARTMAP_RPNMASK);
 
 	/* Map in DART registers. FIXME: Use device node to get base address */
 	dart = ioremap(DART_BASE, 0x7000);
 	if (dart == NULL)
-		panic("U3-DART: Cannot map registers !");
+		panic("U3-DART: Cannot map registers!");
 
 	/* Set initial control register contents: table base, 
 	 * table size and enable bit
@@ -227,7 +226,6 @@ static int dart_init(struct device_node 
 		((dart_tablebase >> PAGE_SHIFT) << DARTCNTL_BASE_SHIFT) |
 		(((dart_tablesize >> PAGE_SHIFT) & DARTCNTL_SIZE_MASK)
 				 << DARTCNTL_SIZE_SHIFT);
-	p = virt_to_page(dart_tablebase);
 	dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize);
 
 	/* Fill initial table */
@@ -240,35 +238,67 @@ static int dart_init(struct device_node 
 	/* Invalidate DART to get rid of possible stale TLBs */
 	dart_tlb_invalidate_all();
 
+	printk(KERN_INFO "U3/CPC925 DART IOMMU initialized\n");
+
+	return 0;
+}
+
+static void iommu_table_u3_setup(void)
+{
 	iommu_table_u3.it_busno = 0;
-	
-	/* Units of tce entries */
 	iommu_table_u3.it_offset = 0;
-	
-	/* Set the tce table size - measured in pages */
-	iommu_table_u3.it_size = dart_tablesize >> PAGE_SHIFT;
+	/* it_size is in number of entries */
+	iommu_table_u3.it_size = dart_tablesize / sizeof(u32);
 
 	/* Initialize the common IOMMU code */
 	iommu_table_u3.it_base = (unsigned long)dart_vbase;
 	iommu_table_u3.it_index = 0;
 	iommu_table_u3.it_blocksize = 1;
-	iommu_table_u3.it_entrysize = sizeof(u32);
 	iommu_init_table(&iommu_table_u3);
 
 	/* Reserve the last page of the DART to avoid possible prefetch
 	 * past the DART mapped area
 	 */
-	set_bit(iommu_table_u3.it_mapsize - 1, iommu_table_u3.it_map);
+	set_bit(iommu_table_u3.it_size - 1, iommu_table_u3.it_map);
+}
 
-	printk(KERN_INFO "U3/CPC925 DART IOMMU initialized\n");
+static void iommu_dev_setup_u3(struct pci_dev *dev)
+{
+	struct device_node *dn;
 
-	return 0;
+	/* We only have one iommu table on the mac for now, which makes
+	 * things simple. Setup all PCI devices to point to this table
+	 *
+	 * We must use pci_device_to_OF_node() to make sure that
+	 * we get the real "final" pointer to the device in the
+	 * pci_dev sysdata and not the temporary PHB one
+	 */
+	dn = pci_device_to_OF_node(dev);
+
+	if (dn)
+		dn->iommu_table = &iommu_table_u3;
+}
+
+static void iommu_bus_setup_u3(struct pci_bus *bus)
+{
+	struct device_node *dn;
+
+	if (!iommu_table_u3_inited) {
+		iommu_table_u3_inited = 1;
+		iommu_table_u3_setup();
+	}
+
+	dn = pci_bus_to_OF_node(bus);
+
+	if (dn)
+		dn->iommu_table = &iommu_table_u3;
 }
 
-void iommu_setup_u3(void)
+static void iommu_dev_setup_null(struct pci_dev *dev) { }
+static void iommu_bus_setup_null(struct pci_bus *bus) { }
+
+void iommu_init_early_u3(void)
 {
-	struct pci_controller *phb, *tmp;
-	struct pci_dev *dev = NULL;
 	struct device_node *dn;
 
 	/* Find the DART in the device-tree */
@@ -282,31 +312,23 @@ void iommu_setup_u3(void)
 	ppc_md.tce_flush = dart_flush;
 
 	/* Initialize the DART HW */
-	if (dart_init(dn))
-		return;
+	if (dart_init(dn)) {
+		/* If init failed, use direct iommu and null setup functions */
+		ppc_md.iommu_dev_setup = iommu_dev_setup_null;
+		ppc_md.iommu_bus_setup = iommu_bus_setup_null;
+
+		/* Setup pci_dma ops */
+		pci_direct_iommu_init();
+	} else {
+		ppc_md.iommu_dev_setup = iommu_dev_setup_u3;
+		ppc_md.iommu_bus_setup = iommu_bus_setup_u3;
 
-	/* Setup pci_dma ops */
-	pci_iommu_init();
-
-	/* We only have one iommu table on the mac for now, which makes
-	 * things simple. Setup all PCI devices to point to this table
-	 */
-	for_each_pci_dev(dev) {
-		/* We must use pci_device_to_OF_node() to make sure that
-		 * we get the real "final" pointer to the device in the
-		 * pci_dev sysdata and not the temporary PHB one
-		 */
-		struct device_node *dn = pci_device_to_OF_node(dev);
-		if (dn)
-			dn->iommu_table = &iommu_table_u3;
-	}
-	/* We also make sure we set all PHBs ... */
-	list_for_each_entry_safe(phb, tmp, &hose_list, list_node) {
-		dn = (struct device_node *)phb->arch_data;
-		dn->iommu_table = &iommu_table_u3;
+		/* Setup pci_dma ops */
+		pci_iommu_init();
 	}
 }
 
+
 void __init alloc_u3_dart_table(void)
 {
 	/* Only reserve DART space if machine has more than 2GB of RAM
diff -puN arch/ppc64/kernel/vio.c~ppc64-iommu-cleanups-main-cleanup-patch arch/ppc64/kernel/vio.c
--- 25/arch/ppc64/kernel/vio.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.894870264 -0800
+++ 25-akpm/arch/ppc64/kernel/vio.c	2005-01-05 16:24:51.916866920 -0800
@@ -158,6 +158,7 @@ void __init iommu_vio_init(void)
 	struct iommu_table *t;
 	struct iommu_table_cb cb;
 	unsigned long cbp;
+	unsigned long itc_entries;
 
 	cb.itc_busno = 255;    /* Bus 255 is the virtual bus */
 	cb.itc_virtbus = 0xff; /* Ask for virtual bus */
@@ -165,12 +166,12 @@ void __init iommu_vio_init(void)
 	cbp = virt_to_abs(&cb);
 	HvCallXm_getTceTableParms(cbp);
 
-	veth_iommu_table.it_size        = cb.itc_size / 2;
+	itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
+	veth_iommu_table.it_size        = itc_entries / 2;
 	veth_iommu_table.it_busno       = cb.itc_busno;
 	veth_iommu_table.it_offset      = cb.itc_offset;
 	veth_iommu_table.it_index       = cb.itc_index;
 	veth_iommu_table.it_type        = TCE_VB;
-	veth_iommu_table.it_entrysize	= sizeof(union tce_entry);
 	veth_iommu_table.it_blocksize	= 1;
 
 	t = iommu_init_table(&veth_iommu_table);
@@ -178,13 +179,12 @@ void __init iommu_vio_init(void)
 	if (!t)
 		printk("Virtual Bus VETH TCE table failed.\n");
 
-	vio_iommu_table.it_size         = cb.itc_size - veth_iommu_table.it_size;
+	vio_iommu_table.it_size         = itc_entries - veth_iommu_table.it_size;
 	vio_iommu_table.it_busno        = cb.itc_busno;
 	vio_iommu_table.it_offset       = cb.itc_offset +
-		veth_iommu_table.it_size * (PAGE_SIZE/sizeof(union tce_entry));
+					  veth_iommu_table.it_size;
 	vio_iommu_table.it_index        = cb.itc_index;
 	vio_iommu_table.it_type         = TCE_VB;
-	vio_iommu_table.it_entrysize	= sizeof(union tce_entry);
 	vio_iommu_table.it_blocksize	= 1;
 
 	t = iommu_init_table(&vio_iommu_table);
@@ -511,7 +511,6 @@ static struct iommu_table * vio_build_io
 	unsigned int *dma_window;
 	struct iommu_table *newTceTable;
 	unsigned long offset;
-	unsigned long size;
 	int dma_window_property_size;
 
 	dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
@@ -521,21 +520,18 @@ static struct iommu_table * vio_build_io
 
 	newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
 
-	size = ((dma_window[4] >> PAGE_SHIFT) << 3) >> PAGE_SHIFT;
-
 	/*  There should be some code to extract the phys-encoded offset
 		using prom_n_addr_cells(). However, according to a comment
 		on earlier versions, it's always zero, so we don't bother */
 	offset = dma_window[1] >>  PAGE_SHIFT;
 
-	/* TCE table size - measured in units of pages of tce table */
-	newTceTable->it_size		= size;
+	/* TCE table size - measured in tce entries */
+	newTceTable->it_size		= dma_window[4] >> PAGE_SHIFT;
 	/* offset for VIO should always be 0 */
 	newTceTable->it_offset		= offset;
 	newTceTable->it_busno		= 0;
 	newTceTable->it_index		= (unsigned long)dma_window[0];
 	newTceTable->it_type		= TCE_VB;
-	newTceTable->it_entrysize	= sizeof(union tce_entry);
 
 	return iommu_init_table(newTceTable);
 }
diff -puN drivers/pci/hotplug/rpaphp_pci.c~ppc64-iommu-cleanups-main-cleanup-patch drivers/pci/hotplug/rpaphp_pci.c
--- 25/drivers/pci/hotplug/rpaphp_pci.c~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.895870112 -0800
+++ 25-akpm/drivers/pci/hotplug/rpaphp_pci.c	2005-01-05 16:24:51.909867984 -0800
@@ -25,6 +25,7 @@
 #include <linux/pci.h>
 #include <asm/pci-bridge.h>
 #include <asm/rtas.h>
+#include <asm/machdep.h>
 #include "../pci.h"		/* for pci_add_new_bus */
 
 #include "rpaphp.h"
@@ -168,6 +169,9 @@ rpaphp_fixup_new_pci_devices(struct pci_
 		if (list_empty(&dev->global_list)) {
 			int i;
 			
+			/* Need to setup IOMMU tables */
+			ppc_md.iommu_dev_setup(dev);
+
 			if(fix_bus)
 				pcibios_fixup_device_resources(dev, bus);
 			pci_read_irq_line(dev);
diff -puN include/asm-ppc64/iommu.h~ppc64-iommu-cleanups-main-cleanup-patch include/asm-ppc64/iommu.h
--- 25/include/asm-ppc64/iommu.h~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.897869808 -0800
+++ 25-akpm/include/asm-ppc64/iommu.h	2005-01-05 16:24:51.912867528 -0800
@@ -69,18 +69,16 @@ union tce_entry {
 
 struct iommu_table {
 	unsigned long  it_busno;     /* Bus number this table belongs to */
-	unsigned long  it_size;      /* Size in pages of iommu table */
+	unsigned long  it_size;      /* Size of iommu table in entries */
 	unsigned long  it_offset;    /* Offset into global table */
 	unsigned long  it_base;      /* mapped address of tce table */
 	unsigned long  it_index;     /* which iommu table this is */
 	unsigned long  it_type;      /* type: PCI or Virtual Bus */
-	unsigned long  it_entrysize; /* Size of an entry in bytes */
 	unsigned long  it_blocksize; /* Entries in each block (cacheline) */
 	unsigned long  it_hint;      /* Hint for next alloc */
 	unsigned long  it_largehint; /* Hint for large allocs */
 	unsigned long  it_halfpoint; /* Breaking point for small/large allocs */
 	spinlock_t     it_lock;      /* Protects it_map */
-	unsigned long  it_mapsize;   /* Size of map in # of entries (bits) */
 	unsigned long *it_map;       /* A simple allocation bitmap for now */
 };
 
@@ -156,14 +154,13 @@ extern dma_addr_t iommu_map_single(struc
 extern void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
 		size_t size, enum dma_data_direction direction);
 
-extern void tce_init_pSeries(void);
-extern void tce_init_iSeries(void);
+extern void iommu_init_early_pSeries(void);
+extern void iommu_init_early_iSeries(void);
+extern void iommu_init_early_u3(void);
 
 extern void pci_iommu_init(void);
-extern void pci_dma_init_direct(void);
+extern void pci_direct_iommu_init(void);
 
 extern void alloc_u3_dart_table(void);
 
-extern int ppc64_iommu_off;
-
 #endif /* _ASM_IOMMU_H */
diff -puN include/asm-ppc64/machdep.h~ppc64-iommu-cleanups-main-cleanup-patch include/asm-ppc64/machdep.h
--- 25/include/asm-ppc64/machdep.h~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.898869656 -0800
+++ 25-akpm/include/asm-ppc64/machdep.h	2005-01-05 16:24:51.904868744 -0800
@@ -70,6 +70,8 @@ struct machdep_calls {
 				    long index,
 				    long npages);
 	void		(*tce_flush)(struct iommu_table *tbl);
+	void		(*iommu_dev_setup)(struct pci_dev *dev);
+	void		(*iommu_bus_setup)(struct pci_bus *bus);
 
 	int		(*probe)(int platform);
 	void		(*setup_arch)(void);
diff -puN include/asm-ppc64/pci-bridge.h~ppc64-iommu-cleanups-main-cleanup-patch include/asm-ppc64/pci-bridge.h
--- 25/include/asm-ppc64/pci-bridge.h~ppc64-iommu-cleanups-main-cleanup-patch	2005-01-05 16:24:51.900869352 -0800
+++ 25-akpm/include/asm-ppc64/pci-bridge.h	2005-01-05 16:24:51.911867680 -0800
@@ -79,6 +79,14 @@ static inline struct device_node *pci_de
 		return fetch_dev_dn(dev);
 }
 
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+	if (bus->self)
+		return pci_device_to_OF_node(bus->self);
+	else
+		return bus->sysdata; /* Must be root bus (PHB) */
+}
+
 extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 					 struct device_node *dev);
 
_
