
From: "Paul E. McKenney" <paulmck@us.ibm.com>

This patch uses the rcu_assign_pointer() API to eliminate a number of
explicit memory barriers from the SysV IPC code that uses RCU.  It also
restructures the ipc_ids structure so that the array size is stored in the
same memory block as the array itself (see the new struct ipc_id_ary). 
This prevents the race that the earlier code was subject to, where a reader
could see a mismatch between the size and the actual array.  With the size
stored with the array, the possibility of mismatch is eliminated -- with
out the need for careful ordering and explicit memory barriers.  This has
been tested successfully on i386 and ppc64.

Signed-off-by: <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/ipc/msg.c  |    2 -
 25-akpm/ipc/sem.c  |    2 -
 25-akpm/ipc/util.c |   80 +++++++++++++++++++++++------------------------------
 25-akpm/ipc/util.h |   13 ++++----
 4 files changed, 45 insertions(+), 52 deletions(-)

diff -puN ipc/msg.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/msg.c
--- 25/ipc/msg.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc	2004-10-24 03:23:49.798841488 -0700
+++ 25-akpm/ipc/msg.c	2004-10-24 03:23:49.819838296 -0700
@@ -381,7 +381,7 @@ asmlinkage long sys_msgctl (int msqid, i
 		int success_return;
 		if (!buf)
 			return -EFAULT;
-		if(cmd == MSG_STAT && msqid >= msg_ids.size)
+		if(cmd == MSG_STAT && msqid >= msg_ids.entries->size)
 			return -EINVAL;
 
 		memset(&tbuf,0,sizeof(tbuf));
diff -puN ipc/sem.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/sem.c
--- 25/ipc/sem.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc	2004-10-24 03:23:49.799841336 -0700
+++ 25-akpm/ipc/sem.c	2004-10-24 03:23:49.820838144 -0700
@@ -524,7 +524,7 @@ static int semctl_nolock(int semid, int 
 		struct semid64_ds tbuf;
 		int id;
 
-		if(semid >= sem_ids.size)
+		if(semid >= sem_ids.entries->size)
 			return -EINVAL;
 
 		memset(&tbuf,0,sizeof(tbuf));
diff -puN ipc/util.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/util.c
--- 25/ipc/util.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc	2004-10-24 03:23:49.801841032 -0700
+++ 25-akpm/ipc/util.c	2004-10-24 03:23:49.821837992 -0700
@@ -62,7 +62,6 @@ void __init ipc_init_ids(struct ipc_ids*
 
 	if(size > IPCMNI)
 		size = IPCMNI;
-	ids->size = size;
 	ids->in_use = 0;
 	ids->max_id = -1;
 	ids->seq = 0;
@@ -74,14 +73,17 @@ void __init ipc_init_ids(struct ipc_ids*
 		 	ids->seq_max = seq_limit;
 	}
 
-	ids->entries = ipc_rcu_alloc(sizeof(struct ipc_id)*size);
+	ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size +
+				     sizeof(struct ipc_id_ary));
 
 	if(ids->entries == NULL) {
 		printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
-		ids->size = 0;
+		size = 0;
+		ids->entries = &ids->nullentry;
 	}
-	for(i=0;i<ids->size;i++)
-		ids->entries[i].p = NULL;
+	ids->entries->size = size;
+	for(i=0;i<size;i++)
+		ids->entries->p[i] = NULL;
 }
 
 /**
@@ -104,7 +106,7 @@ int ipc_findkey(struct ipc_ids* ids, key
 	 * since ipc_ids.sem is held
 	 */
 	for (id = 0; id <= max_id; id++) {
-		p = ids->entries[id].p;
+		p = ids->entries->p[id];
 		if(p==NULL)
 			continue;
 		if (key == p->key)
@@ -118,36 +120,36 @@ int ipc_findkey(struct ipc_ids* ids, key
  */
 static int grow_ary(struct ipc_ids* ids, int newsize)
 {
-	struct ipc_id* new;
-	struct ipc_id* old;
+	struct ipc_id_ary* new;
+	struct ipc_id_ary* old;
 	int i;
+	int size = ids->entries->size;
 
 	if(newsize > IPCMNI)
 		newsize = IPCMNI;
-	if(newsize <= ids->size)
+	if(newsize <= size)
 		return newsize;
 
-	new = ipc_rcu_alloc(sizeof(struct ipc_id)*newsize);
+	new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
+			    sizeof(struct ipc_id_ary));
 	if(new == NULL)
-		return ids->size;
-	memcpy(new, ids->entries, sizeof(struct ipc_id)*ids->size);
-	for(i=ids->size;i<newsize;i++) {
-		new[i].p = NULL;
+		return size;
+	new->size = newsize;
+	memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size +
+					sizeof(struct ipc_id_ary));
+	for(i=size;i<newsize;i++) {
+		new->p[i] = NULL;
 	}
 	old = ids->entries;
 
 	/*
-	 * before setting the ids->entries to the new array, there must be a
-	 * smp_wmb() to make sure the memcpyed contents of the new array are
-	 * visible before the new array becomes visible.
+	 * Use rcu_assign_pointer() to make sure the memcpyed contents
+	 * of the new array are visible before the new array becomes visible.
 	 */
-	smp_wmb();	/* prevent seeing new array uninitialized. */
-	ids->entries = new;
-	smp_wmb();	/* prevent indexing into old array based on new size. */
-	ids->size = newsize;
+	rcu_assign_pointer(ids->entries, new);
 
 	ipc_rcu_putref(old);
-	return ids->size;
+	return newsize;
 }
 
 /**
@@ -175,7 +177,7 @@ int ipc_addid(struct ipc_ids* ids, struc
 	 * ipc_ids.sem is held
 	 */
 	for (id = 0; id < size; id++) {
-		if(ids->entries[id].p == NULL)
+		if(ids->entries->p[id] == NULL)
 			goto found;
 	}
 	return -1;
@@ -195,7 +197,7 @@ found:
 	new->deleted = 0;
 	rcu_read_lock();
 	spin_lock(&new->lock);
-	ids->entries[id].p = new;
+	ids->entries->p[id] = new;
 	return id;
 }
 
@@ -216,15 +218,15 @@ struct kern_ipc_perm* ipc_rmid(struct ip
 {
 	struct kern_ipc_perm* p;
 	int lid = id % SEQ_MULTIPLIER;
-	if(lid >= ids->size)
+	if(lid >= ids->entries->size)
 		BUG();
 
 	/* 
 	 * do not need a rcu_dereference()() here to force ordering
 	 * on Alpha, since the ipc_ids.sem is held.
 	 */	
-	p = ids->entries[lid].p;
-	ids->entries[lid].p = NULL;
+	p = ids->entries->p[lid];
+	ids->entries->p[lid] = NULL;
 	if(p==NULL)
 		BUG();
 	ids->in_use--;
@@ -234,7 +236,7 @@ struct kern_ipc_perm* ipc_rmid(struct ip
 			lid--;
 			if(lid == -1)
 				break;
-		} while (ids->entries[lid].p == NULL);
+		} while (ids->entries->p[lid] == NULL);
 		ids->max_id = lid;
 	}
 	p->deleted = 1;
@@ -493,9 +495,9 @@ struct kern_ipc_perm* ipc_get(struct ipc
 {
 	struct kern_ipc_perm* out;
 	int lid = id % SEQ_MULTIPLIER;
-	if(lid >= ids->size)
+	if(lid >= ids->entries->size)
 		return NULL;
-	out = ids->entries[lid].p;
+	out = ids->entries->p[lid];
 	return out;
 }
 
@@ -503,25 +505,15 @@ struct kern_ipc_perm* ipc_lock(struct ip
 {
 	struct kern_ipc_perm* out;
 	int lid = id % SEQ_MULTIPLIER;
-	struct ipc_id* entries;
+	struct ipc_id_ary* entries;
 
 	rcu_read_lock();
-	if(lid >= ids->size) {
+	entries = rcu_dereference(ids->entries);
+	if(lid >= entries->size) {
 		rcu_read_unlock();
 		return NULL;
 	}
-
-	/* 
-	 * Note: The following two read barriers are corresponding
-	 * to the two write barriers in grow_ary(). They guarantee 
-	 * the writes are seen in the same order on the read side. 
-	 * smp_rmb() has effect on all CPUs.  rcu_dereference()
-	 * is used if there are data dependency between two reads, and 
-	 * has effect only on Alpha.
-	 */
-	smp_rmb(); /* prevent indexing old array with new size */
-	entries = rcu_dereference(ids->entries);
-	out = entries[lid].p;
+	out = entries->p[lid];
 	if(out == NULL) {
 		rcu_read_unlock();
 		return NULL;
diff -puN ipc/util.h~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/util.h
--- 25/ipc/util.h~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc	2004-10-24 03:23:49.811839512 -0700
+++ 25-akpm/ipc/util.h	2004-10-24 03:23:49.822837840 -0700
@@ -15,18 +15,19 @@ void sem_init (void);
 void msg_init (void);
 void shm_init (void);
 
-struct ipc_ids {
+struct ipc_id_ary {
 	int size;
+	struct kern_ipc_perm *p[0];
+};
+
+struct ipc_ids {
 	int in_use;
 	int max_id;
 	unsigned short seq;
 	unsigned short seq_max;
 	struct semaphore sem;	
-	struct ipc_id* entries;
-};
-
-struct ipc_id {
-	struct kern_ipc_perm* p;
+	struct ipc_id_ary nullentry;
+	struct ipc_id_ary* entries;
 };
 
 void __init ipc_init_ids(struct ipc_ids* ids, int size);
_
