
From: Maneesh Soni <maneesh@in.ibm.com>

Here is a patch to use seqlock for real_lookup race with d_lookup as suggested
by Linus. The race condition can result in duplicate dentry when d_lookup
fails due concurrent d_move in some unrelated directory. 

Apart from real_lookup, lookup_hash()->cached_lookup() can also fail due
to same reason. So, for that I am doing the d_lookup again.

Now we have __d_lookup (called from do_lookup() during pathwalk) and
d_lookup which uses seqlock to protect againt rename race.

dcachebench numbers (lower is better) don't have much difference on a 4-way
PIII xeon SMP box.

base-2565
Average usec/iteration  19059.4
Standard Deviation      503.07

base-2565 + seq_lock
Average usec/iteration  18843.2
Standard Deviation      450.57




 fs/dcache.c            |   23 ++++++++++++++++++++++-
 fs/namei.c             |   17 ++++++++++-------
 include/linux/dcache.h |    1 +
 3 files changed, 33 insertions(+), 8 deletions(-)

diff -puN fs/dcache.c~real_lookup-race-fix fs/dcache.c
--- 25/fs/dcache.c~real_lookup-race-fix	2003-03-28 02:52:33.000000000 -0800
+++ 25-akpm/fs/dcache.c	2003-03-28 02:52:33.000000000 -0800
@@ -27,12 +27,14 @@
 #include <linux/file.h>
 #include <asm/uaccess.h>
 #include <linux/security.h>
+#include <linux/seqlock.h>
 
 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
 
 spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 rwlock_t dparent_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED;
+seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
 
 static kmem_cache_t *dentry_cache; 
 
@@ -926,7 +928,7 @@ struct dentry *d_splice_alias(struct ino
  * is returned. The caller must use d_put to free the entry when it has
  * finished using it. %NULL is returned on failure.
  *
- * d_lookup is now, dcache_lock free. The hash list is protected using RCU.
+ * __d_lookup is dcache_lock free. The hash list is protected using RCU.
  * Memory barriers are used while updating and doing lockless traversal. 
  * To avoid races with d_move while rename is happening, d_move_count is 
  * used. 
@@ -939,10 +941,27 @@ struct dentry *d_splice_alias(struct ino
  *
  * d_lru list is not updated, which can leave non-zero d_count dentries
  * around in d_lru list.
+ *
+ * d_lookup() is protected against the concurrent renames in some unrelated
+ * directory using the seqlockt_t rename_lock.
  */
 
 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
 {
+	struct dentry * dentry = NULL;
+	unsigned long seq;
+
+        do {
+                seq = read_seqbegin(&rename_lock);
+                dentry = __d_lookup(parent, name);
+                if (dentry)
+			break;
+	} while (read_seqretry(&rename_lock, seq));
+	return dentry;
+}
+
+struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
+{
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
@@ -1185,6 +1204,7 @@ void d_move(struct dentry * dentry, stru
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
 	spin_lock(&dcache_lock);
+	write_seqlock(&rename_lock);
 	spin_lock(&dentry->d_lock);
 
 	/* Move the dentry to the target hash queue, if on different bucket */
@@ -1222,6 +1242,7 @@ void d_move(struct dentry * dentry, stru
 	list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
 	dentry->d_move_count++;
 	spin_unlock(&dentry->d_lock);
+	write_sequnlock(&rename_lock);
 	spin_unlock(&dcache_lock);
 }
 
diff -puN fs/namei.c~real_lookup-race-fix fs/namei.c
--- 25/fs/namei.c~real_lookup-race-fix	2003-03-28 02:52:33.000000000 -0800
+++ 25-akpm/fs/namei.c	2003-03-28 02:52:33.000000000 -0800
@@ -275,8 +275,14 @@ void path_release(struct nameidata *nd)
  */
 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
 {
-	struct dentry * dentry = d_lookup(parent, name);
-	
+	struct dentry * dentry = __d_lookup(parent, name);
+
+	/* lockess __d_lookup may fail due to concurrent d_move() 
+	 * in some unrelated directory, so try with d_lookup
+	 */
+	if (!dentry)
+		dentry = d_lookup(parent, name);
+
 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
 		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
 			dput(dentry);
@@ -348,12 +354,9 @@ static struct dentry * real_lookup(struc
 	 * negatives from the RCU list walk here, unlike the optimistic
 	 * fast walk).
 	 *
-	 * We really should do a sequence number thing to avoid this
-	 * all.
+	 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
 	 */
-	spin_lock(&dcache_lock);
 	result = d_lookup(parent, name);
-	spin_unlock(&dcache_lock);
 	if (!result) {
 		struct dentry * dentry = d_alloc(parent, name);
 		result = ERR_PTR(-ENOMEM);
@@ -524,7 +527,7 @@ static int do_lookup(struct nameidata *n
 		     struct path *path, int flags)
 {
 	struct vfsmount *mnt = nd->mnt;
-	struct dentry *dentry = d_lookup(nd->dentry, name);
+	struct dentry *dentry = __d_lookup(nd->dentry, name);
 
 	if (!dentry)
 		goto need_lookup;
diff -puN include/linux/dcache.h~real_lookup-race-fix include/linux/dcache.h
--- 25/include/linux/dcache.h~real_lookup-race-fix	2003-03-28 02:52:33.000000000 -0800
+++ 25-akpm/include/linux/dcache.h	2003-03-28 02:52:33.000000000 -0800
@@ -238,6 +238,7 @@ extern void d_move(struct dentry *, stru
 
 /* appendix may either be NULL or be used for transname suffixes */
 extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
 
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);

_
