

This is a cleanup patch.

There are quite a lot of places in the kernel which will infinitely retry a
memory allocation.

Generally, they get it wrong.  Some do yield(), the semantics of which have
changed over time.  Some do schedule(), which can lock up if the caller is
SCHED_FIFO/RR.  Some do schedule_timeout(), etc.

And often it is unnecessary, because the page allocator will do the retry
internally anyway.  But we cannot rely on that - this behaviour may change
(-aa and -rmap kernels do not do this, for instance).

So it is good to formalise and to centralise this operation.  If an
allocation specifies __GFP_REPEAT then the page allocator must infinitely
retry the allocation.



 include/linux/gfp.h  |    1 +
 include/linux/slab.h |    2 +-
 mm/page_alloc.c      |    7 ++++---
 3 files changed, 6 insertions(+), 4 deletions(-)

diff -puN include/linux/gfp.h~gfp_repeat include/linux/gfp.h
--- 25/include/linux/gfp.h~gfp_repeat	2003-04-10 21:45:42.000000000 -0700
+++ 25-akpm/include/linux/gfp.h	2003-04-10 21:46:09.000000000 -0700
@@ -18,6 +18,7 @@
 #define __GFP_FS	0x80	/* Can call down to low-level FS? */
 #define __GFP_COLD	0x100	/* Cache-cold page required */
 #define __GFP_NOWARN	0x200	/* Suppress page allocation failure warning */
+#define __GFP_REPEAT	0x400	/* Retry the allocation */
 
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
diff -puN include/linux/slab.h~gfp_repeat include/linux/slab.h
--- 25/include/linux/slab.h~gfp_repeat	2003-04-10 21:46:49.000000000 -0700
+++ 25-akpm/include/linux/slab.h	2003-04-10 21:47:08.000000000 -0700
@@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t
 #define	SLAB_KERNEL		GFP_KERNEL
 #define	SLAB_DMA		GFP_DMA
 
-#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN)
+#define SLAB_LEVEL_MASK		(__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT)
 #define	SLAB_NO_GROW		0x00001000UL	/* don't grow a cache */
 
 /* flags to pass to kmem_cache_create().
diff -puN mm/page_alloc.c~gfp_repeat mm/page_alloc.c
--- 25/mm/page_alloc.c~gfp_repeat	2003-04-10 21:47:03.000000000 -0700
+++ 25-akpm/mm/page_alloc.c	2003-04-10 21:54:44.000000000 -0700
@@ -633,10 +633,11 @@ rebalance:
 	}
 
 	/*
-	 * Don't let big-order allocations loop.  Yield for kswapd, try again.
+	 * Don't let big-order allocations loop unless the caller explicitly
+	 * requests that.  Wait for some write requests to complete then retry.
 	 */
-	if (order <= 3) {
-		yield();
+	if ((order <= 3) || (gfp_mask & __GFP_REPEAT)) {
+		blk_congestion_wait(WRITE, HZ/50);
 		goto rebalance;
 	}
 

_
