
From: Manfred Spraul <manfred@colorfullife.com>

The patch performs the kmalloc cache lookup for constant kmalloc calls at
compile time.  The idea is that the loop in kmalloc takes a significant
amount of time, and for kmalloc(4096,GFP_KERNEL), that lookup can happen
entirely at compile time.

The problem is the implementation: gcc's brain is lossy, i.e.

    if(__builtin_constant_t(size)) {
          if(size < 32) return kmem_cache_alloc(...);
          if(size < 64) return kmem_cache_alloc(...);
          if(size < 96) return kmem_cache_alloc(...);
          if(size < 128) return kmem_cache_alloc(...);
          ...
    }

doesn't work, because gcc only optimizes the first two or three 
comparisons, and then suddenly generates code. I've solved that with a 
switch/case statement, but the source is not pretty.



 25-akpm/include/linux/kmalloc_sizes.h |   26 ++++++++++++++++--
 25-akpm/include/linux/slab.h          |   47 +++++++++++++++++++++++++++++++++-
 25-akpm/kernel/ksyms.c                |    3 +-
 25-akpm/mm/slab.c                     |    8 +----
 4 files changed, 73 insertions(+), 11 deletions(-)

diff -puN include/linux/kmalloc_sizes.h~fixed-size-kmalloc-speedup include/linux/kmalloc_sizes.h
--- 25/include/linux/kmalloc_sizes.h~fixed-size-kmalloc-speedup	Mon Jun  9 16:54:24 2003
+++ 25-akpm/include/linux/kmalloc_sizes.h	Mon Jun  9 16:54:24 2003
@@ -1,3 +1,14 @@
+/*
+ * kmalloc cache sizes.
+ * - CACHE(x) is called for every entry except the last
+ * - for the last entry, LCACHE is called. LCACHE defaults
+ *   to CACHE.
+ */
+#ifndef LCACHE
+#define LCACHE(x)	CACHE(x)
+#define __LCACHE_DEFINED
+#endif
+
 #if (PAGE_SIZE == 4096)
 	CACHE(32)
 #endif
@@ -18,16 +29,25 @@
 	CACHE(16384)
 	CACHE(32768)
 	CACHE(65536)
+#ifdef CONFIG_MMU
+	LCACHE(131072)
+#else
 	CACHE(131072)
-#ifndef CONFIG_MMU
 	CACHE(262144)
 	CACHE(524288)
+#ifndef CONFIG_LARGE_ALLOCS
+	LCACHE(1048576)
+#else
 	CACHE(1048576)
-#ifdef CONFIG_LARGE_ALLOCS
 	CACHE(2097152)
 	CACHE(4194304)
 	CACHE(8388608)
 	CACHE(16777216)
-	CACHE(33554432)
+	LCACHE(33554432)
 #endif /* CONFIG_LARGE_ALLOCS */
 #endif /* CONFIG_MMU */
+
+#ifdef __LCACHE_DEFINED
+#undef __LCACHE_DEFINIED
+#undef LCACHE
+#endif
diff -puN include/linux/slab.h~fixed-size-kmalloc-speedup include/linux/slab.h
--- 25/include/linux/slab.h~fixed-size-kmalloc-speedup	Mon Jun  9 16:54:24 2003
+++ 25-akpm/include/linux/slab.h	Mon Jun  9 16:54:24 2003
@@ -62,7 +62,52 @@ extern void *kmem_cache_alloc(kmem_cache
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 
-extern void *kmalloc(size_t, int);
+/* Size description struct for general caches. */
+struct cache_sizes {
+	size_t		 cs_size;
+	kmem_cache_t	*cs_cachep;
+	kmem_cache_t	*cs_dmacachep;
+};
+extern struct cache_sizes malloc_sizes[];
+extern void *__kmalloc(size_t, int);
+
+/*
+ * gcc's brain is lossy: is forgets that a number is known at compile
+ * time after a few accesses and produces bogus code if a sequence of
+ * if clauses is used. This is avoided by using select.
+ */
+static inline void * kmalloc(size_t size, int flags)
+{
+	if (__builtin_constant_p(size)) {
+extern void __you_cannot_kmalloc_that_much(void);
+		unsigned int i,j;
+		j = 0;
+		switch(size) {
+		case 0 ...
+#define CACHE(x) \
+			(x): j++; \
+		case (x+1) ...
+#define LCACHE(x) \
+			(x): j++; break;
+#include "kmalloc_sizes.h"
+#undef CACHE
+#undef LCACHE
+		default:
+				__you_cannot_kmalloc_that_much();
+		}
+		i = 0;
+#define CACHE(x) \
+		i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+		return kmem_cache_alloc( (flags & GFP_DMA)?
+					malloc_sizes[i-j].cs_dmacachep
+					: malloc_sizes[i-j].cs_cachep,
+					flags);
+	}
+	return __kmalloc(size,flags);
+}
+
 extern void kfree(const void *);
 extern unsigned int ksize(const void *);
 
diff -puN kernel/ksyms.c~fixed-size-kmalloc-speedup kernel/ksyms.c
--- 25/kernel/ksyms.c~fixed-size-kmalloc-speedup	Mon Jun  9 16:54:24 2003
+++ 25-akpm/kernel/ksyms.c	Mon Jun  9 16:54:24 2003
@@ -95,7 +95,8 @@ EXPORT_SYMBOL(kmem_cache_free);
 EXPORT_SYMBOL(kmem_cache_size);
 EXPORT_SYMBOL(set_shrinker);
 EXPORT_SYMBOL(remove_shrinker);
-EXPORT_SYMBOL(kmalloc);
+EXPORT_SYMBOL(malloc_sizes);
+EXPORT_SYMBOL(__kmalloc);
 EXPORT_SYMBOL(kfree);
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(__alloc_percpu);
diff -puN mm/slab.c~fixed-size-kmalloc-speedup mm/slab.c
--- 25/mm/slab.c~fixed-size-kmalloc-speedup	Mon Jun  9 16:54:24 2003
+++ 25-akpm/mm/slab.c	Mon Jun  9 16:54:24 2003
@@ -385,11 +385,7 @@ static int slab_break_gfp_order = BREAK_
 #define	GET_PAGE_SLAB(pg)     ((struct slab *)(pg)->list.prev)
 
 /* These are the default caches for kmalloc. Custom caches can have other sizes. */
-static struct cache_sizes {
-	size_t		 cs_size;
-	kmem_cache_t	*cs_cachep;
-	kmem_cache_t	*cs_dmacachep;
-} malloc_sizes[] = {
+struct cache_sizes malloc_sizes[] = {
 #define CACHE(x) { .cs_size = (x) },
 #include <linux/kmalloc_sizes.h>
 	{ 0, }
@@ -1967,7 +1963,7 @@ void * kmem_cache_alloc (kmem_cache_t *c
  * platforms.  For example, on i386, it means that the memory must come
  * from the first 16MB.
  */
-void * kmalloc (size_t size, int flags)
+void * __kmalloc (size_t size, int flags)
 {
 	struct cache_sizes *csizep = malloc_sizes;
 

_
