
From: Andi Kleen <ak@muc.de>

Copy ioperm bitmaps more efficiently at context switch time.  Only copy
upto the highest used port.

Originally from Ingo Molnar

Signed-off-by: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/x86_64/kernel/ioport.c    |   34 +++++++++++++++++++------
 25-akpm/arch/x86_64/kernel/process.c   |   44 ++++++++++++++++++---------------
 25-akpm/arch/x86_64/kernel/setup64.c   |    8 +++---
 25-akpm/include/asm-x86_64/processor.h |    1 
 4 files changed, 57 insertions(+), 30 deletions(-)

diff -puN arch/x86_64/kernel/ioport.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch arch/x86_64/kernel/ioport.c
--- 25/arch/x86_64/kernel/ioport.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch	2004-09-15 02:33:21.557569592 -0700
+++ 25-akpm/arch/x86_64/kernel/ioport.c	2004-09-15 02:33:21.567568072 -0700
@@ -28,12 +28,12 @@ static void set_bitmap(unsigned long *bi
 			clear_bit(i, bitmap); 
 }
 
-
 /*
  * this changes the io permissions bitmap in the current task.
  */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
+	unsigned int i, max_long, bytes, bytes_updated;
 	struct thread_struct * t = &current->thread;
 	struct tss_struct * tss;
 	unsigned long *bitmap;
@@ -59,16 +59,34 @@ asmlinkage long sys_ioperm(unsigned long
 
 	/*
 	 * do it in the per-thread copy and in the TSS ...
+	 *
+	 * Disable preemption via get_cpu() - we must not switch away
+	 * because the ->io_bitmap_max value must match the bitmap
+	 * contents:
 	 */
-	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
 	tss = &per_cpu(init_tss, get_cpu());
-	if (tss->io_bitmap_base == IO_BITMAP_OFFSET) { /* already active? */
-		set_bitmap(tss->io_bitmap, from, num, !turn_on);
-	} else {
-		memcpy(tss->io_bitmap, t->io_bitmap_ptr, IO_BITMAP_BYTES);
-		tss->io_bitmap_base = IO_BITMAP_OFFSET; /* Activate it in the TSS */
-	}
+
+	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+
+	/*
+	 * Search for a (possibly new) maximum. This is simple and stupid,
+	 * to keep it obviously correct:
+	 */
+	max_long = 0;
+	for (i = 0; i < IO_BITMAP_LONGS; i++)
+		if (t->io_bitmap_ptr[i] != ~0UL)
+			max_long = i;
+
+	bytes = (max_long + 1) * sizeof(long);
+	bytes_updated = max(bytes, t->io_bitmap_max);
+
+	t->io_bitmap_max = bytes;
+
+	/* Update the TSS: */
+	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+
 	put_cpu();
+
 	return 0;
 }
 
diff -puN arch/x86_64/kernel/process.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch arch/x86_64/kernel/process.c
--- 25/arch/x86_64/kernel/process.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch	2004-09-15 02:33:21.558569440 -0700
+++ 25-akpm/arch/x86_64/kernel/process.c	2004-09-15 02:33:21.566568224 -0700
@@ -250,11 +250,17 @@ void show_regs(struct pt_regs *regs)
 void exit_thread(void)
 {
 	struct task_struct *me = current;
+	struct thread_struct *t = &me->thread;
 	if (me->thread.io_bitmap_ptr) { 
-		struct tss_struct *tss = &per_cpu(init_tss,get_cpu());
-		kfree(me->thread.io_bitmap_ptr); 
-		me->thread.io_bitmap_ptr = NULL;
-		tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+
+		kfree(t->io_bitmap_ptr);
+		t->io_bitmap_ptr = NULL;
+		/*
+		 * Careful, clear this in the TSS too:
+		 */
+		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+		t->io_bitmap_max = 0;
 		put_cpu();
 	}
 }
@@ -362,8 +368,10 @@ int copy_thread(int nr, unsigned long cl
 
 	if (unlikely(me->thread.io_bitmap_ptr != NULL)) { 
 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
-		if (!p->thread.io_bitmap_ptr) 
+		if (!p->thread.io_bitmap_ptr) {
+			p->thread.io_bitmap_max = 0;
 			return -ENOMEM;
+		}
 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
 	} 
 
@@ -382,8 +390,10 @@ int copy_thread(int nr, unsigned long cl
 	}
 	err = 0;
 out:
-	if (err && p->thread.io_bitmap_ptr)
+	if (err && p->thread.io_bitmap_ptr) {
 		kfree(p->thread.io_bitmap_ptr);
+		p->thread.io_bitmap_max = 0;
+	}
 	return err;
 }
 
@@ -490,22 +500,18 @@ struct task_struct *__switch_to(struct t
 	 * Handle the IO bitmap 
 	 */ 
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
-		if (next->io_bitmap_ptr) {
+		if (next->io_bitmap_ptr)
 			/*
-			 * 2 cachelines copy ... not good, but not that
-			 * bad either. Anyone got something better?
-			 * This only affects processes which use ioperm().
-			 */
-			memcpy(tss->io_bitmap, next->io_bitmap_ptr, IO_BITMAP_BYTES);
-			tss->io_bitmap_base = IO_BITMAP_OFFSET;
-		} else {
+			 * Copy the relevant range of the IO bitmap.
+			 * Normally this is 128 bytes or less:
+ 			 */
+			memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+				max(prev->io_bitmap_max, next->io_bitmap_max));
+		else {
 			/*
-			 * a bitmap offset pointing outside of the TSS limit
-			 * causes a nicely controllable SIGSEGV if a process
-			 * tries to use a port IO instruction. The first
-			 * sys_ioperm() call sets up the bitmap properly.
+			 * Clear any possible leftover bits:
 			 */
-			tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+			memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 		}
 	}
 
diff -puN arch/x86_64/kernel/setup64.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch arch/x86_64/kernel/setup64.c
--- 25/arch/x86_64/kernel/setup64.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch	2004-09-15 02:33:21.560569136 -0700
+++ 25-akpm/arch/x86_64/kernel/setup64.c	2004-09-15 02:33:21.565568376 -0700
@@ -241,6 +241,7 @@ void __init cpu_init (void)
 	unsigned long v; 
 	char *estacks = NULL; 
 	struct task_struct *me;
+	int i;
 
 	/* CPU 0 is initialised in head64.c */
 	if (cpu != 0) {
@@ -304,12 +305,13 @@ void __init cpu_init (void)
 		t->ist[v] = (unsigned long)estacks;
 	}
 
-	t->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+	t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
 	/*
-	 * This is required because the CPU will access up to
+	 * <= is required because the CPU will access up to
 	 * 8 bits beyond the end of the IO permission bitmap.
 	 */
-	t->io_bitmap[IO_BITMAP_LONGS] = ~0UL;
+	for (i = 0; i <= IO_BITMAP_LONGS; i++)
+		t->io_bitmap[i] = ~0UL;
 
 	atomic_inc(&init_mm.mm_count);
 	me->active_mm = &init_mm;
diff -puN include/asm-x86_64/processor.h~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch include/asm-x86_64/processor.h
--- 25/include/asm-x86_64/processor.h~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch	2004-09-15 02:33:21.561568984 -0700
+++ 25-akpm/include/asm-x86_64/processor.h	2004-09-15 02:33:21.567568072 -0700
@@ -254,6 +254,7 @@ struct thread_struct {
    switch faster for a limited number of ioperm using tasks. -AK */
 	int		ioperm;
 	unsigned long	*io_bitmap_ptr;
+	unsigned io_bitmap_max;
 /* cached TLS descriptors. */
 	u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
 } __attribute__((aligned(16)));
_
