
From: Corey Minyard <minyard@acm.org>

* On a 32-bit architecture, the idr code will cease to work if you add
  more than 2^20 entries.  You will not be able to find many of the
  entries.  The problem is that the IDR code uses 5-bit chunks of the
  number and the lower portion used by IDR is 24 bits, so you have one bit
  that leaks over into the comparisons that should not be there.  The
  solution is to mask off that bit before doing IDR processing.  This
  actually causes the POSIX timer code to crash if you create that many
  timers.  I have included an idr_test.tar.gz file that demonstrates this
  with and without the fix, in case you need more evidence :).

* When the IDR fills up, it returns -1.  However, there was no way to
  check for this condition.  This patch adds the ability to check for the
  idr being full and fixes all the users.  It also fixes a problem in
  fs/super.c where the idr code wasn't checking for -1.

* There was a race condition creating POSIX timers.  The timer was added
  to a task struct for another process then the data for the timer was
  filled out.  The other task could use/destroy time timer as soon as it is
  in the task's queue and the lock is released.  This moves settup up the
  timer data to before the timer is enqueued or (for some data) into the
  lock.


---

 25-akpm/fs/proc/generic.c        |    4 +++
 25-akpm/fs/super.c               |    8 ++++++
 25-akpm/include/linux/idr.h      |    7 +++++
 25-akpm/kernel/posix-timers.c    |   51 ++++++++++++++++++++++++++-------------
 25-akpm/lib/idr.c                |   19 +++++++++++++-
 25-akpm/net/sctp/sm_make_chunk.c |    5 +++
 6 files changed, 76 insertions(+), 18 deletions(-)

diff -puN fs/proc/generic.c~idr-overflow-fixes fs/proc/generic.c
--- 25/fs/proc/generic.c~idr-overflow-fixes	Fri May  7 15:20:31 2004
+++ 25-akpm/fs/proc/generic.c	Fri May  7 15:20:31 2004
@@ -296,6 +296,10 @@ retry:
 
 	spin_lock(&proc_inum_lock);
 	i = idr_get_new(&proc_inum_idr, NULL);
+	if ((i == -1) && idr_full(&proc_inum_idr)) {
+		spin_unlock(&proc_inum_lock);
+		return 0;
+	}
 	spin_unlock(&proc_inum_lock);
 
 	if (i == -1)
diff -puN fs/super.c~idr-overflow-fixes fs/super.c
--- 25/fs/super.c~idr-overflow-fixes	Fri May  7 15:20:31 2004
+++ 25-akpm/fs/super.c	Fri May  7 15:20:31 2004
@@ -556,13 +556,21 @@ int set_anon_super(struct super_block *s
 {
 	int dev;
 
+ retry:
 	spin_lock(&unnamed_dev_lock);
 	if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) {
 		spin_unlock(&unnamed_dev_lock);
 		return -ENOMEM;
 	}
 	dev = idr_get_new(&unnamed_dev_idr, NULL);
+	if ((dev == -1) && idr_full(&unnamed_dev_idr)) {
+		spin_unlock(&unnamed_dev_lock);
+		return -EAGAIN;
+	}
 	spin_unlock(&unnamed_dev_lock);
+	if (dev == -1)
+		/* We raced and lost with another CPU. */
+		goto retry;
 
 	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
diff -puN include/linux/idr.h~idr-overflow-fixes include/linux/idr.h
--- 25/include/linux/idr.h~idr-overflow-fixes	Fri May  7 15:20:31 2004
+++ 25-akpm/include/linux/idr.h	Fri May  7 15:20:31 2004
@@ -16,9 +16,15 @@
 #if BITS_PER_LONG == 32
 # define IDR_BITS 5
 # define IDR_FULL 0xffffffff
+/* We can only use half the bits in the top level because there are
+   only four possible bits in the top level (5 bits * 4 levels = 25
+   bits, but you only use 24 bits in the id). */
+# define TOP_LEVEL_FULL (IDR_FULL >> 16)
 #elif BITS_PER_LONG == 64
 # define IDR_BITS 6
 # define IDR_FULL 0xffffffffffffffff
+/* We can use all the bits in a 64-bit long at the top level. */
+# define TOP_LEVEL_FULL IDR_FULL
 #else
 # error "BITS_PER_LONG is not 32 or 64"
 #endif
@@ -74,6 +80,7 @@ int idr_pre_get(struct idr *idp, unsigne
 int idr_get_new(struct idr *idp, void *ptr);
 void idr_remove(struct idr *idp, int id);
 void idr_init(struct idr *idp);
+int idr_full(struct idr *idp);
 
 extern kmem_cache_t *idr_layer_cache;
 
diff -puN kernel/posix-timers.c~idr-overflow-fixes kernel/posix-timers.c
--- 25/kernel/posix-timers.c~idr-overflow-fixes	Fri May  7 15:20:31 2004
+++ 25-akpm/kernel/posix-timers.c	Fri May  7 15:21:19 2004
@@ -443,7 +443,7 @@ sys_timer_create(clockid_t which_clock,
 		return -EAGAIN;
 
 	spin_lock_init(&new_timer->it_lock);
-	do {
+	for (;;) {
 		if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) {
 			error = -EAGAIN;
 			new_timer->it_id = (timer_t)-1;
@@ -452,10 +452,30 @@ sys_timer_create(clockid_t which_clock,
 		spin_lock_irq(&idr_lock);
 		new_timer_id = (timer_t) idr_get_new(&posix_timers_id,
 							(void *) new_timer);
+		if (new_timer_id == -1) {
+			if (idr_full(&posix_timers_id)) {
+				spin_unlock_irq(&idr_lock);
+				error = -EAGAIN;
+				new_timer->it_id = (timer_t)-1;
+				goto out;
+			}
+			spin_unlock_irq(&idr_lock);
+			continue;
+		}
 		spin_unlock_irq(&idr_lock);
-	} while (unlikely(new_timer_id == -1));
+		break;
+	}
 
 	new_timer->it_id = new_timer_id;
+	new_timer->it_clock = which_clock;
+	new_timer->it_incr = 0;
+	new_timer->it_overrun = -1;
+	init_timer(&new_timer->it_timer);
+	new_timer->it_timer.expires = 0;
+	new_timer->it_timer.data = (unsigned long) new_timer;
+	new_timer->it_timer.function = posix_timer_fn;
+	set_timer_inactive(new_timer);
+
 	/*
 	 * return the timer_id now.  The next step is hard to
 	 * back out if there is an error.
@@ -470,6 +490,10 @@ sys_timer_create(clockid_t which_clock,
 			error = -EFAULT;
 			goto out;
 		}
+		new_timer->it_sigev_notify = event.sigev_notify;
+		new_timer->it_sigev_signo = event.sigev_signo;
+		new_timer->it_sigev_value = event.sigev_value;
+
 		read_lock(&tasklist_lock);
 		if ((process = good_sigevent(&event))) {
 			/*
@@ -489,6 +513,7 @@ sys_timer_create(clockid_t which_clock,
 			 */
 			spin_lock_irqsave(&process->sighand->siglock, flags);
 			if (!(process->flags & PF_EXITING)) {
+				new_timer->it_process = process;
 				list_add(&new_timer->list,
 					 &process->signal->posix_timers);
 				spin_unlock_irqrestore(&process->sighand->siglock, flags);
@@ -503,32 +528,24 @@ sys_timer_create(clockid_t which_clock,
 			error = -EINVAL;
 			goto out;
 		}
-		new_timer->it_sigev_notify = event.sigev_notify;
-		new_timer->it_sigev_signo = event.sigev_signo;
-		new_timer->it_sigev_value = event.sigev_value;
 	} else {
 		new_timer->it_sigev_notify = SIGEV_SIGNAL;
 		new_timer->it_sigev_signo = SIGALRM;
 		new_timer->it_sigev_value.sival_int = new_timer->it_id;
 		process = current->group_leader;
 		spin_lock_irqsave(&process->sighand->siglock, flags);
+		new_timer->it_process = process;
 		list_add(&new_timer->list, &process->signal->posix_timers);
 		spin_unlock_irqrestore(&process->sighand->siglock, flags);
 	}
 
-	new_timer->it_clock = which_clock;
-	new_timer->it_incr = 0;
-	new_timer->it_overrun = -1;
-	init_timer(&new_timer->it_timer);
-	new_timer->it_timer.expires = 0;
-	new_timer->it_timer.data = (unsigned long) new_timer;
-	new_timer->it_timer.function = posix_timer_fn;
-	set_timer_inactive(new_timer);
-
-	/*
-	 * Once we set the process, it can be found so do it last...
+ 	/*
+	 * In the case of the timer belonging to another task, after
+	 * the task is unlocked, the timer is owned by the other task
+	 * and may cease to exist at any time.  Don't use or modify
+	 * new_timer after the unlock call.
 	 */
-	new_timer->it_process = process;
+
 out:
 	if (error)
 		release_posix_timer(new_timer);
diff -puN lib/idr.c~idr-overflow-fixes lib/idr.c
--- 25/lib/idr.c~idr-overflow-fixes	Fri May  7 15:20:31 2004
+++ 25-akpm/lib/idr.c	Fri May  7 15:20:31 2004
@@ -75,7 +75,8 @@
  *   This is the allocate id function.  It should be called with any
  *   required locks.  In fact, in the SMP case, you MUST lock prior to
  *   calling this function to avoid possible out of memory problems.  If
- *   memory is required, it will return a -1, in which case you should
+ *   memory is required or if the idr is full, it will return a -1, in
+ *   which case you should check to see if the idr is full and if not
  *   unlock and go back to the idr_pre_get() call.  ptr is the pointer
  *   you want associated with the id.  In other words:
 
@@ -92,6 +93,10 @@
  *   removes the given id, freeing that slot and any memory that may
  *   now be unused.  See idr_find() for locking restrictions.
 
+ * int idr_full(struct idr *idp);
+
+ *   Returns true if the idr is full and false if not.
+
  */
 
 
@@ -276,6 +281,12 @@ build_up:
 }
 EXPORT_SYMBOL(idr_get_new_above);
 
+int idr_full(struct idr *idp)
+{
+	return ((idp->layers >= MAX_LEVEL)
+		&& (idp->top->bitmap == TOP_LEVEL_FULL));
+}
+
 int idr_get_new(struct idr *idp, void *ptr)
 {
 	return idr_get_new_above(idp, ptr, 0);
@@ -315,6 +326,9 @@ void idr_remove(struct idr *idp, int id)
 {
 	struct idr_layer *p;
 
+	/* Mask off upper bits we don't use for the search. */
+	id &= MAX_ID_MASK;
+
 	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
 	if ( idp->top && idp->top->count == 1 && 
 	     (idp->layers > 1) &&
@@ -350,6 +364,9 @@ void *idr_find(struct idr *idp, int id)
 	if ( unlikely( (id & ~(~0 << MAX_ID_SHIFT)) >> (n + IDR_BITS)))
 	     return NULL;
 #endif
+	/* Mask off upper bits we don't use for the search. */
+	id &= MAX_ID_MASK;
+
 	while (n > 0 && p) {
 		n -= IDR_BITS;
 		p = p->ary[(id >> n) & IDR_MASK];
diff -puN net/sctp/sm_make_chunk.c~idr-overflow-fixes net/sctp/sm_make_chunk.c
--- 25/net/sctp/sm_make_chunk.c~idr-overflow-fixes	Fri May  7 15:20:31 2004
+++ 25-akpm/net/sctp/sm_make_chunk.c	Fri May  7 15:20:31 2004
@@ -1847,6 +1847,11 @@ int sctp_process_init(struct sctp_associ
 			spin_lock_bh(&sctp_assocs_id_lock);
 			assoc_id = (sctp_assoc_t)idr_get_new(&sctp_assocs_id,
 							     (void *)asoc);
+			if ((assoc_id == -1) && idr_full(&sctp_assocs_id)) {
+				spin_unlock_bh(&sctp_assocs_id_lock);
+				goto clean_up;
+			}
+
 			spin_unlock_bh(&sctp_assocs_id_lock);
 		} while (unlikely((int)assoc_id == -1));
 

_
