

An NFS mount of localhost hangs the system under heavy writeout loads.

This is because knfsd gets stuck in balance_dirty_pages().  It is not allowed
to exit from there until the amount of dirty+writeback+unstable memory
subsides.  But it will never subside because knfsd itself is responsible for
cleaning the memory.

This is just like the drivers/block/loop.c hang, only more complex.  We
cannot simply disable knfsd's throttling because it would then swamp the
machine under real loads when the clients are remote.

So we introduce the concept of a "less throttled" process.  These processes
are allowed to exceed the preset dirty memory limits by a little.  This
allows knfsd to make progrws in writing things out while the local NFS
clients are throttled.  It also ensures that knfsd will not swamp the machine
when working on behalf of remote clients.

Note that even though knfsd is allowed to exceed the default system-wide
dirty memory threshold, this does _not_ cause other memory-dirtying tasks to
get starved out.  This is because they are allowed to exit
balance_dirty_pages() after having written their quota of pages, regardless
of the current dirty memory state.



 fs/nfsd/nfssvc.c      |    8 ++++++++
 include/linux/sched.h |    1 +
 mm/page-writeback.c   |   14 +++++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff -puN fs/nfsd/nfssvc.c~nfs-to-localhost-lockup-fix fs/nfsd/nfssvc.c
--- 25/fs/nfsd/nfssvc.c~nfs-to-localhost-lockup-fix	2003-06-09 22:02:58.000000000 -0700
+++ 25-akpm/fs/nfsd/nfssvc.c	2003-06-09 22:02:58.000000000 -0700
@@ -188,6 +188,14 @@ nfsd(struct svc_rqst *rqstp)
 	list_add(&me.list, &nfsd_list);
 
 	unlock_kernel();
+
+	/*
+	 * We want less throttling in balance_dirty_pages() so that nfs to
+	 * localhost doesn't cause nfsd to lock up due to all the client's
+	 * dirty pages.
+	 */
+	current->flags |= PF_LESS_THROTTLE;
+
 	/*
 	 * The main request loop
 	 */
diff -puN include/linux/sched.h~nfs-to-localhost-lockup-fix include/linux/sched.h
--- 25/include/linux/sched.h~nfs-to-localhost-lockup-fix	2003-06-09 22:02:58.000000000 -0700
+++ 25-akpm/include/linux/sched.h	2003-06-09 22:39:23.000000000 -0700
@@ -480,6 +480,7 @@ do { if (atomic_dec_and_test(&(tsk)->usa
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
 #define PF_SWAPOFF	0x00080000	/* I am in swapoff */
+#define PF_LESS_THROTTLE 0x01000000	/* Throttle me less: I clena memory */
 
 #ifdef CONFIG_SMP
 extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
diff -puN mm/page-writeback.c~nfs-to-localhost-lockup-fix mm/page-writeback.c
--- 25/mm/page-writeback.c~nfs-to-localhost-lockup-fix	2003-06-09 22:02:58.000000000 -0700
+++ 25-akpm/mm/page-writeback.c	2003-06-09 22:39:02.000000000 -0700
@@ -104,11 +104,13 @@ static void background_writeout(unsigned
  * clamping level.
  */
 static void
-get_dirty_limits(struct page_state *ps, long *background, long *dirty)
+get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty)
 {
 	int background_ratio;		/* Percentages */
 	int dirty_ratio;
 	int unmapped_ratio;
+	long background;
+	long dirty;
 
 	get_page_state(ps);
 
@@ -125,8 +127,14 @@ get_dirty_limits(struct page_state *ps, 
 	if (background_ratio >= dirty_ratio)
 		background_ratio = dirty_ratio / 2;
 
-	*background = (background_ratio * total_pages) / 100;
-	*dirty = (dirty_ratio * total_pages) / 100;
+	background = (background_ratio * total_pages) / 100;
+	dirty = (dirty_ratio * total_pages) / 100;
+	if (current->flags & PF_LESS_THROTTLE) {
+		background += background / 4;
+		dirty += dirty / 4;
+	}
+	*pbackground = background;
+	*pdirty = dirty;
 }
 
 /*

_
