
From: Manfred Spraul and Brian Gerst

The patch performs the kmalloc cache lookup for constant kmalloc calls at
compile time.  The idea is that the loop in kmalloc takes a significant
amount of time, and for kmalloc(4096,GFP_KERNEL), that lookup can happen
entirely at compile time.

A problem has been seen with gcc-3.2.2-5 from RedHat.  This code:

    if(__builtin_constant_t(size)) {
          if(size < 32) return kmem_cache_alloc(...);
          if(size < 64) return kmem_cache_alloc(...);
          if(size < 96) return kmem_cache_alloc(...);
          if(size < 128) return kmem_cache_alloc(...);
          ...
    }

doesn't work, because gcc only optimizes the first two or three comparisons,
and then suddenly generates code.

But we did it that way anyway.  Apparently it's fixed in later compilers.


 include/linux/slab.h |   33 ++++++++++++++++++++++++++++++++-
 kernel/ksyms.c       |    3 ++-
 mm/slab.c            |    8 ++------
 3 files changed, 36 insertions(+), 8 deletions(-)

diff -puN include/linux/slab.h~fixed-size-kmalloc-speedup include/linux/slab.h
--- 25/include/linux/slab.h~fixed-size-kmalloc-speedup	2003-06-10 23:33:20.000000000 -0700
+++ 25-akpm/include/linux/slab.h	2003-06-10 23:33:20.000000000 -0700
@@ -62,7 +62,38 @@ extern void *kmem_cache_alloc(kmem_cache
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 
-extern void *kmalloc(size_t, int);
+/* Size description struct for general caches. */
+struct cache_sizes {
+	size_t		 cs_size;
+	kmem_cache_t	*cs_cachep;
+	kmem_cache_t	*cs_dmacachep;
+};
+extern struct cache_sizes malloc_sizes[];
+extern void *__kmalloc(size_t, int);
+
+static inline void *kmalloc(size_t size, int flags)
+{
+	if (__builtin_constant_p(size)) {
+		int i = 0;
+#define CACHE(x) \
+		if (size <= x) \
+			goto found; \
+		else \
+			i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+		{
+			extern void __you_cannot_kmalloc_that_much(void);
+			__you_cannot_kmalloc_that_much();
+		}
+found:
+		return kmem_cache_alloc((flags & GFP_DMA) ?
+			malloc_sizes[i].cs_dmacachep :
+			malloc_sizes[i].cs_cachep, flags);
+	}
+	return __kmalloc(size, flags);
+}
+
 extern void kfree(const void *);
 extern unsigned int ksize(const void *);
 
diff -puN kernel/ksyms.c~fixed-size-kmalloc-speedup kernel/ksyms.c
--- 25/kernel/ksyms.c~fixed-size-kmalloc-speedup	2003-06-10 23:33:20.000000000 -0700
+++ 25-akpm/kernel/ksyms.c	2003-06-10 23:33:20.000000000 -0700
@@ -95,7 +95,8 @@ EXPORT_SYMBOL(kmem_cache_free);
 EXPORT_SYMBOL(kmem_cache_size);
 EXPORT_SYMBOL(set_shrinker);
 EXPORT_SYMBOL(remove_shrinker);
-EXPORT_SYMBOL(kmalloc);
+EXPORT_SYMBOL(malloc_sizes);
+EXPORT_SYMBOL(__kmalloc);
 EXPORT_SYMBOL(kfree);
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(__alloc_percpu);
diff -puN mm/slab.c~fixed-size-kmalloc-speedup mm/slab.c
--- 25/mm/slab.c~fixed-size-kmalloc-speedup	2003-06-10 23:33:20.000000000 -0700
+++ 25-akpm/mm/slab.c	2003-06-10 23:33:20.000000000 -0700
@@ -385,11 +385,7 @@ static int slab_break_gfp_order = BREAK_
 #define	GET_PAGE_SLAB(pg)     ((struct slab *)(pg)->list.prev)
 
 /* These are the default caches for kmalloc. Custom caches can have other sizes. */
-static struct cache_sizes {
-	size_t		 cs_size;
-	kmem_cache_t	*cs_cachep;
-	kmem_cache_t	*cs_dmacachep;
-} malloc_sizes[] = {
+struct cache_sizes malloc_sizes[] = {
 #define CACHE(x) { .cs_size = (x) },
 #include <linux/kmalloc_sizes.h>
 	{ 0, }
@@ -1967,7 +1963,7 @@ void * kmem_cache_alloc (kmem_cache_t *c
  * platforms.  For example, on i386, it means that the memory must come
  * from the first 16MB.
  */
-void * kmalloc (size_t size, int flags)
+void * __kmalloc (size_t size, int flags)
 {
 	struct cache_sizes *csizep = malloc_sizes;
 

_
