
[ppc64] Fix {pte,pmd}_free vs. hash_page race by relaying actual deallocation with RCU, from Ben Herrenschmidt


---

 arch/ppc64/mm/init.c        |   46 ++++++++++++++++++++++++++++++++++++
 include/asm-ppc64/pgalloc.h |   55 +++++++++++++++++++++++++++++++++++++++-----
 include/asm-ppc64/tlb.h     |    4 +++
 3 files changed, 99 insertions(+), 6 deletions(-)

diff -puN arch/ppc64/mm/init.c~ppc64-hash_page_race arch/ppc64/mm/init.c
--- 25/arch/ppc64/mm/init.c~ppc64-hash_page_race	2004-01-13 23:23:05.000000000 -0800
+++ 25-akpm/arch/ppc64/mm/init.c	2004-01-13 23:23:05.000000000 -0800
@@ -94,6 +94,52 @@ unsigned long __max_memory;
  * include/asm-ppc64/tlb.h file -- tgall
  */
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+static void pte_free_smp_sync(void *arg)
+{
+	/* Do nothing, just ensure we sync with all CPUs */
+}
+
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+void pte_free_now(struct page *ptepage)
+{
+	pte_freelist_forced_free++;
+
+	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+
+	pte_free(ptepage);
+}
+
+static void pte_free_rcu_callback(void *arg)
+{
+	struct pte_freelist_batch *batch = arg;
+	unsigned int i;
+
+	for (i = 0; i < batch->index; i++)
+		pte_free(batch->pages[i]);
+	free_page((unsigned long)batch);
+}
+
+void pte_free_submit(struct pte_freelist_batch *batch)
+{
+	INIT_RCU_HEAD(&batch->rcu);
+	call_rcu(&batch->rcu, pte_free_rcu_callback, batch);
+}
+
+void pte_free_finish(void)
+{
+	/* This is safe as we are holding page_table_lock */
+	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	
+	if (*batchp == NULL)
+		return;
+	pte_free_submit(*batchp);
+	*batchp = NULL;
+}
 
 void show_mem(void)
 {
diff -puN include/asm-ppc64/pgalloc.h~ppc64-hash_page_race include/asm-ppc64/pgalloc.h
--- 25/include/asm-ppc64/pgalloc.h~ppc64-hash_page_race	2004-01-13 23:23:05.000000000 -0800
+++ 25-akpm/include/asm-ppc64/pgalloc.h	2004-01-13 23:23:05.000000000 -0800
@@ -3,7 +3,10 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
 #include <asm/processor.h>
+#include <asm/tlb.h>
 
 extern kmem_cache_t *zero_cache;
 
@@ -40,8 +43,6 @@ pmd_free(pmd_t *pmd)
 	kmem_cache_free(zero_cache, pmd);
 }
 
-#define __pmd_free_tlb(tlb, pmd)	pmd_free(pmd)
-
 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte)
 #define pmd_populate(mm, pmd, pte_page) \
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
@@ -62,15 +63,57 @@ pte_alloc_one(struct mm_struct *mm, unsi
 
 	return NULL;
 }
-
-static inline void
-pte_free_kernel(pte_t *pte)
+		
+static inline void pte_free_kernel(pte_t *pte)
 {
 	kmem_cache_free(zero_cache, pte);
 }
 
 #define pte_free(pte_page)	pte_free_kernel(page_address(pte_page))
-#define __pte_free_tlb(tlb, pte)	pte_free(pte)
+
+struct pte_freelist_batch
+{
+	struct rcu_head	rcu;
+	unsigned int	index;
+	struct page *	pages[0];
+};
+
+#define PTE_FREELIST_SIZE	((PAGE_SIZE - sizeof(struct pte_freelist_batch) / \
+				  sizeof(struct page *)))
+
+extern void pte_free_now(struct page *ptepage);
+extern void pte_free_submit(struct pte_freelist_batch *batch);
+
+DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
+{
+	/* This is safe as we are holding page_table_lock */
+        cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
+	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+
+	if (atomic_read(&tlb->mm->mm_users) < 2 ||
+	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
+		pte_free(ptepage);
+		return;
+	}
+
+	if (*batchp == NULL) {
+		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
+		if (*batchp == NULL) {
+			pte_free_now(ptepage);
+			return;
+		}
+		(*batchp)->index = 0;
+	}
+	(*batchp)->pages[(*batchp)->index++] = ptepage;
+	if ((*batchp)->index == PTE_FREELIST_SIZE) {
+		pte_free_submit(*batchp);
+		*batchp = NULL;
+	}
+}
+
+#define __pmd_free_tlb(tlb, pmd)	__pte_free_tlb(tlb, virt_to_page(pmd))
 
 #define check_pgt_cache()	do { } while (0)
 
diff -puN include/asm-ppc64/tlb.h~ppc64-hash_page_race include/asm-ppc64/tlb.h
--- 25/include/asm-ppc64/tlb.h~ppc64-hash_page_race	2004-01-13 23:23:05.000000000 -0800
+++ 25-akpm/include/asm-ppc64/tlb.h	2004-01-13 23:23:05.000000000 -0800
@@ -74,6 +74,8 @@ static inline void __tlb_remove_tlb_entr
 	batch->index = i;
 }
 
+extern void pte_free_finish(void);
+
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	int cpu = smp_processor_id();
@@ -86,6 +88,8 @@ static inline void tlb_flush(struct mmu_
 
 	flush_hash_range(tlb->mm->context, batch->index, local);
 	batch->index = 0;
+
+	pte_free_finish();
 }
 
 #endif /* _PPC64_TLB_H */

_
