
From: Neil Brown <neilb@cse.unsw.edu.au>

Don't remove inode from hash until filesystem has deleted it.

There is a small race with knfsd using iget to get an inode that is
currently being deleted.  This is because it is removed from the hash table
*before* the filesystem gets to delete it.  If nfsd does an iget in this
window it will cause a read_inode which will return an apparently valid
inode.  However that inode will shortly be deleted from disc without knfsd
noticing...  until it is too late.

With this patch, the inode being deleted is left on the hash table, and if
a lookup find an inode being freed in the hashtable, it waits in the inode
waitqueue for the inode to be fully deleted.



 fs/fs-writeback.c |    3 ++-
 fs/inode.c        |   29 ++++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff -puN fs/fs-writeback.c~inode-unhashing-fix-2 fs/fs-writeback.c
--- 25/fs/fs-writeback.c~inode-unhashing-fix-2	2003-05-11 21:39:52.000000000 -0700
+++ 25-akpm/fs/fs-writeback.c	2003-05-11 21:39:52.000000000 -0700
@@ -90,7 +90,8 @@ void __mark_inode_dirty(struct inode *in
 		 * Only add valid (hashed) inodes to the superblock's
 		 * dirty list.  Add blockdev inodes as well.
 		 */
-		if (hlist_unhashed(&inode->i_hash) && !S_ISBLK(inode->i_mode))
+		if ((hlist_unhashed(&inode->i_hash) || (inode->i_state & (I_FREEING|I_CLEAR)))
+		    && !S_ISBLK(inode->i_mode))
 			goto out;
 
 		/*
diff -puN fs/inode.c~inode-unhashing-fix-2 fs/inode.c
--- 25/fs/inode.c~inode-unhashing-fix-2	2003-05-11 21:39:52.000000000 -0700
+++ 25-akpm/fs/inode.c	2003-05-11 21:39:52.000000000 -0700
@@ -466,6 +466,7 @@ static int shrink_icache_memory(int nr, 
 	return inodes_stat.nr_unused;
 }
 
+void __wait_on_freeing_inode(struct inode *inode);
 /*
  * Called with the inode lock held.
  * NOTE: we are not increasing the inode-refcount, you must call __iget()
@@ -477,6 +478,7 @@ static struct inode * find_inode(struct 
 	struct hlist_node *node;
 	struct inode * inode = NULL;
 
+repeat:
 	hlist_for_each (node, head) { 
 		prefetch(node->next);
 		inode = hlist_entry(node, struct inode, i_hash);
@@ -484,6 +486,10 @@ static struct inode * find_inode(struct 
 			continue;
 		if (!test(inode, data))
 			continue;
+		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+			__wait_on_freeing_inode(inode);
+			goto repeat;
+		}
 		break;
 	}
 	return node ? inode : NULL;
@@ -498,6 +504,7 @@ static struct inode * find_inode_fast(st
 	struct hlist_node *node;
 	struct inode * inode = NULL;
 
+repeat:
 	hlist_for_each (node, head) {
 		prefetch(node->next);
 		inode = list_entry(node, struct inode, i_hash);
@@ -505,6 +512,10 @@ static struct inode * find_inode_fast(st
 			continue;
 		if (inode->i_sb != sb)
 			continue;
+		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+			__wait_on_freeing_inode(inode);
+			goto repeat;
+		}
 		break;
 	}
 	return node ? inode : NULL;
@@ -937,7 +948,6 @@ void generic_delete_inode(struct inode *
 {
 	struct super_operations *op = inode->i_sb->s_op;
 
-	hlist_del_init(&inode->i_hash);
 	list_del_init(&inode->i_list);
 	inode->i_state|=I_FREEING;
 	inodes_stat.nr_inodes--;
@@ -956,6 +966,10 @@ void generic_delete_inode(struct inode *
 		delete(inode);
 	} else
 		clear_inode(inode);
+	spin_lock(&inode_lock);
+	hlist_del_init(&inode->i_hash);
+	spin_unlock(&inode_lock);
+	wake_up_inode(inode);
 	if (inode->i_state != I_CLEAR)
 		BUG();
 	destroy_inode(inode);
@@ -1229,6 +1243,19 @@ repeat:
 	__set_current_state(TASK_RUNNING);
 }
 
+void __wait_on_freeing_inode(struct inode *inode)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	wait_queue_head_t *wq = i_waitq_head(inode);
+
+	add_wait_queue(wq, &wait);
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	spin_unlock(&inode_lock);
+	schedule();
+	remove_wait_queue(wq, &wait);
+	spin_lock(&inode_lock);
+}
+
 void wake_up_inode(struct inode *inode)
 {
 	wait_queue_head_t *wq = i_waitq_head(inode);

_
