# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	v2.5.74 -> 1.1384 
#	      kernel/ksyms.c	1.206   -> 1.207  
#	arch/x86_64/ia32/ia32_binfmt.c	1.14    -> 1.15   
#	arch/alpha/mm/numa.c	1.12    -> 1.13   
#	  include/linux/mm.h	1.121   -> 1.123  
#	arch/i386/kernel/cpu/common.c	1.21    -> 1.22   
#	           fs/open.c	1.42    -> 1.43   
#	include/asm-mips64/mmzone.h	1.7     -> 1.8    
#	     mm/page_alloc.c	1.163   -> 1.166  
#	  arch/arm/mm/init.c	1.22    -> 1.23   
#	       kernel/fork.c	1.127   -> 1.128  
#	drivers/block/ll_rw_blk.c	1.174   -> 1.175  
#	include/linux/mman.h	1.3     -> 1.4    
#	           fs/attr.c	1.17    -> 1.18   
#	 fs/proc/proc_misc.c	1.80    -> 1.81   
#	   arch/i386/Kconfig	1.64    -> 1.65   
#	arch/x86_64/mm/init.c	1.19    -> 1.20   
#	       mm/swapfile.c	1.79    -> 1.80   
#	    security/dummy.c	1.26    -> 1.27   
#	include/asm-i386/timer.h	1.7     -> 1.9    
#	fs/jbd/transaction.c	1.68    -> 1.69   
#	arch/i386/kernel/io_apic.c	1.74    -> 1.75   
#	    fs/ramfs/inode.c	1.34    -> 1.35   
#	security/capability.c	1.18    -> 1.19   
#	           fs/exec.c	1.85    -> 1.87   
#	           mm/swap.c	1.51    -> 1.52   
#	      fs/coda/file.c	1.10    -> 1.11   
#	       mm/mprotect.c	1.22    -> 1.23   
#	          mm/shmem.c	1.127   -> 1.128  
#	 arch/ia64/mm/init.c	1.44    -> 1.45   
#	arch/i386/mm/pageattr.c	1.4     -> 1.5    
#	drivers/net/e100/e100_main.c	1.78    -> 1.79   
#	arch/i386/kernel/time.c	1.37    -> 1.38   
#	include/asm-i386/mmzone.h	1.12    -> 1.13   
#	    fs/ext2/ialloc.c	1.34    -> 1.35   
#	include/asm-i386/cacheflush.h	1.3     -> 1.4    
#	         mm/mremap.c	1.29    -> 1.31   
#	include/linux/mmzone.h	1.39    -> 1.40   
#	arch/ia64/kernel/sys_ia64.c	1.23    -> 1.24   
#	arch/ppc64/mm/init.c	1.46    -> 1.47   
#	           mm/mmap.c	1.87    -> 1.88   
#	       fs/nfs/file.c	1.28    -> 1.29   
#	arch/i386/kernel/timers/timer_tsc.c	1.19    -> 1.21   
#	arch/ppc64/mm/numa.c	1.7     -> 1.8    
#	arch/i386/kernel/timers/timer_cyclone.c	1.8     -> 1.9    
#	include/linux/security.h	1.24    -> 1.25   
#	      fs/proc/root.c	1.13    -> 1.14   
#	           mm/slab.c	1.90    -> 1.92   
#	        init/Kconfig	1.16    -> 1.17   
#	arch/x86_64/mm/numa.c	1.3     -> 1.4    
#	include/asm-x86_64/mmzone.h	1.3     -> 1.4    
#	 drivers/net/Kconfig	1.34    -> 1.35   
#	          mm/nommu.c	1.3     -> 1.4    
#	       kernel/exit.c	1.104   -> 1.105  
#	arch/s390/kernel/compat_exec.c	1.2     -> 1.3    
#	     fs/jbd/commit.c	1.36    -> 1.37   
#	Documentation/filesystems/Locking	1.42    -> 1.43   
#	arch/i386/lib/delay.c	1.4     -> 1.5    
#	include/asm-alpha/mmzone.h	1.8     -> 1.9    
#	arch/mips/kernel/sysirix.c	1.11    -> 1.12   
#	arch/ia64/ia32/binfmt_elf32.c	1.13    -> 1.14   
#	arch/arm26/mm/init.c	1.1     -> 1.2    
#	include/asm-ppc64/mmzone.h	1.11    -> 1.12   
#	arch/i386/kernel/timers/timer.c	1.8     -> 1.10   
#	drivers/block/cciss.c	1.82    -> 1.83   
#	include/linux/slab.h	1.25    -> 1.27   
#	      fs/block_dev.c	1.133   -> 1.134  
#	arch/i386/mm/pgtable.c	1.12    -> 1.13   
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/07/02	torvalds@home.osdl.org	1.1361
# Linux 2.5.74
# --------------------------------------------
# 03/07/02	ilmari@ilmari.org	1.1362
# [PATCH] Allow modular DM
# 
# With the recent fixes, io_schedule needs to be exported for modular dm
# to work.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1363
# [PATCH] move_vma() make_pages_present() fix
# 
# From: Hugh Dickins <hugh@veritas.com>
# 
# mremap's move_vma VM_LOCKED case was still wrong.
# 
# If the do_munmap unmaps a part of new_vma, then its vm_start and vm_end
# from before cannot both be the right addresses for the make_pages_present
# range, and may BUG() there.
# 
# We need [new_addr, new_addr+new_len) to be locked down; but
# move_page_tables already transferred the locked pages [new_addr,
# new_addr+old_len), and they're either held in a VM_LOCKED vma throughout,
# or temporarily in no vma: in neither case can be swapped out, so no need to
# run over that range again.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1364
# [PATCH] page unmapping debug
# 
# From: Manfred Spraul <manfred@colorfullife.com>
# 
# Manfred's latest page unmapping debug patch.
# 
# The patch adds support for a special debug mode to both the page and the slab
# allocator: Unused pages are removed from the kernel linear mapping.  This
# means that now any access to freed memory will cause an immediate exception.
# Right now, read accesses remain totally unnoticed and write accesses may be
# catched by the slab poisoning, but usually far too late for a meaningfull bug
# report.
# 
# The implementation is based on a new arch dependant function,
# kernel_map_pages(), that removes the pages from the linear mapping.  It's
# right now only implemented for i386.
# 
# Changelog:
# 
# - Add kernel_map_pages() for i386, based on change_page_attr.  If
#   DEBUG_PAGEALLOC is not set, then the function is an empty stub.  The stub
#   is in <linux/mm.h>, i.e.  it exists for all archs.
# 
# - Make change_page_attr irq safe.  Note that it's not fully irq safe due to
#   the lack of the tlb flush ipi, but it's good enough for kernel_map_pages().
#    Another problem is that kernel_map_pages is not permitted to fail, thus
#   PSE is disabled if DEBUG_PAGEALLOC is enabled
# 
# - use kernel_map pages for the page allocator.
# 
# - use kernel_map_pages for the slab allocator.
# 
#   I couldn't resist and added additional debugging support into mm/slab.c:
# 
#   * at kfree time, the complete backtrace of the kfree caller is stored
#     in the freed object.
# 
#   * a ptrinfo() function that dumps all known data about a kernel virtual
#     address: the pte value, if it belongs to a slab cache the cache name and
#     additional info.
# 
#   * merging of common code: new helper function obj_dbglen and obj_dbghdr
#     for the conversion between the user visible object pointers/len and the
#     actual, internal addresses and len values.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1365
# [PATCH] NUMA memory reporting fix
# 
# From: Dave Hansen <haveblue@us.ibm.com>
# 
# The current numa meminfo code exports (via sysfs) pgdat->node_size, as
# totalram.  This variable is consistently used elsewhere to mean "the number
# of physical pages that this particular node spans".  This is _not_ what we
# want to see from meminfo, which is: "how much actual memory does this node
# have?"
# 
# The following patch removes pgdat->node_size, and replaces it with
# ->node_spanned_pages.  This is to avoid confusion with a new variable,
# node_present_pages, which is the _actual_ value that we want to export in
# meminfo.  Most of the patch is a simple s/node_size/node_spanned_pages/.
# The node_size() macro is also removed, and replaced with new ones for
# node_{spanned,present}_pages() to avoid confusion.
# 
# We were bitten by this problem in this bug:
# 	http://bugme.osdl.org/show_bug.cgi?id=818
# 
# Compiled and tested on NUMA-Q.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1366
# [PATCH] ramfs: use rgeneric_file_llseek
# 
# Teach ramfs to use generic_file_llseek: default_llseek takes lock_kernel().
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1367
# [PATCH] inode_change_ok(): remove lock_kernel()
# 
# `attr' is on the stack, and the inode's contents can change as soon as we
# return from inode_change_ok() anyway.  I can't see anything which is actually
# being locked in there.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1368
# [PATCH] nommu vmtruncate: remove lock_kernel()
# 
# lock_kernel() need not be held across truncate.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1369
# [PATCH] procfs: remove some unneeded lock_kernel()s
# 
# From: William Lee Irwin III <wli@holomorphy.com>
# 
# Remove spurious BKL acquisitions in /proc/.  The BKL is not required to
# access nr_threads for reporting, and get_locks_status() takes it
# internally, wrapping all operations with it.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1370
# [PATCH] remove lock_kernel() from file_ops.flush()
# 
# Rework the file_ops.flush() API sothat it is no longer called under
# lock_kernel().  Push lock_kernel() down to all impementations except CIFS,
# which doesn't want it.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1371
# [PATCH] block_llseek(): remove lock_kernel()
# 
# Replace it with the blockdev inode's i_sem.  And we only really need that for
# atomic access to file->f_pos.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1372
# [PATCH] Make CONFIG_TC35815 depend on CONFIG_TOSHIBA_JMR3927
# 
# From: Adrian Bunk <bunk@fs.tum.de>
# 
# I got an error at the final linking with CONFIG_TC35815 enabled since
# the variables tc_readl and tc_writel are not available.
# 
# The only place where they are defined is arch/mips/pci/ops-jmr3927.c.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1373
# [PATCH] Report detached thread exit to the debugger
# 
# From: Daniel Jacobowitz <dan@debian.org>
# 
# Right now, CLONE_DETACHED threads silently vanish from GDB's sight when
# they exit.  This patch lets the thread report its exit to the debugger, and
# then be auto-reaped as soon as it is collected, instead of being reaped as
# soon as it exits and not reported at all.
# 
# GDB works either way, but this is more correct and will be useful for some
# later GDB patches.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1374
# [PATCH] timer renaming and cleanups
# 
# From: john stultz <johnstul@us.ibm.com>
# 
# This renames the bad "timer" variable to "cur_timer" and moves externs to
# .h files.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1375
# [PATCH] fix lost_tick detector for speedstep
# 
# From: john stultz <johnstul@us.ibm.com>
# 
# The patch tries to resolve issues caused by running the TSC based lost
# tick compensation code on CPUs that change frequency (speedstep, etc).
# 
# Should the CPU be in slow mode when calibrate_tsc() executes, the kernel
# will assume we have so many cycles per tick.  Later when the cpu speeds up,
# the kernel will start noting that too many cycles have past since the last
# interrupt.  Since this can occasionally happen, the lost tick compensation
# code then tries to fix this by incrementing jiffies.  Thus every tick we
# end up incrementing jiffies many times, causing timers to expire too
# quickly and time to rush ahead.
# 
# This patch detects when there has been 100 consecutive interrupts where we
# had to compensate for lost ticks.  If this occurs, we spit out a warning
# and fall back to using the PIT as a time source.
# 
# I've tested this on my speedstep enabled laptop with success, and others
# laptop users seeing this problem have reported it works for them.  Also to
# ensure we don't fall back to the slower PIT too quickly, I tested the code
# on a system I have that looses ~30 ticks about every second and it can
# still manage to use the TSC as a good time source.
# 
# This solves most of the "time doubling" problems seen on laptops.
# Additionally this revision has been modified to use the cleanups made in
# rename-timer_A1.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1376
# [PATCH] fix lost-tick compensation corner-case
# 
# From: john stultz <johnstul@us.ibm.com>
# 
# This patch catches a corner case in the lost-tick compensation code.
# 
# There is a check to see if we overflowed between reads of the two time
# sources, however should the high res time source be slightly slower then
# what we calibrated, its possible to trigger this code when no ticks have
# been lost.
# 
# This patch adds an extra check to insure we have seen more then one tick
# before we check for this overflow.  This seems to resolve the remaining
# "time doubling" issues that I've seen reported.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1377
# [PATCH] cleanup and generalise lowmem_page_address
# 
# From: William Lee Irwin III <wli@holomorphy.com>
# 
# This patch allows architectures to micro-optimize lowmem_page_address() at
# their whims.  Roman Zippel originally wrote and/or suggested this back when
# dependencies on page->virtual existing were being shaken out.  That's
# long-settled, so it's fine to do this now.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1378
# [PATCH] Security hook for vm_enough_memory
# 
# From: Stephen Smalley <sds@epoch.ncsc.mil>
# 
# This patch against 2.5.73 replaces vm_enough_memory with a security hook
# per Alan Cox's suggestion so that security modules can completely replace
# the logic if desired.
# 
# Note that the patch changes the interface to follow the convention of the
# other security hooks, i.e.  return 0 if ok or -errno on failure (-ENOMEM in
# this case) rather than returning a boolean.  It also exports various
# variables and functions required for the vm_enough_memory logic.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1379
# [PATCH] ext2: inode allocation race fix
# 
# ext2's inode allocator will call find_group_orlov(), which will return a
# suitable blockgroup in which the inode should be allocated.  But by the time
# we actually try to allocate an inode in the blockgroup, other CPUs could have
# used them all up.
# 
# ext2 will bogusly fail with "ext2_new_inode: Free inodes count corrupted in
# group NN".
# 
# 
# To fix this we just advance onto the next blockgroup if the rare race
# happens.  If we've scanned all blockgroups then return -ENOSPC.
# 
# 
# (This is a bit inaccurate: after we've scanned all blockgroups, there may
# still be available inodes due to inode freeing activity in other blockgroups.
#  This cannot be fixed without fs-wide locking.  The effect is a slightly
# early ENOSPC in a nearly-full filesystem).
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1380
# [PATCH] fix double mmdrop() on exec path
# 
# If load_elf_binary() (and the other binary handlers) fail after
# flush_old_exec() (for example, in setup_arg_pages()) then do_execve() will go
# through and do mmdrop(bprm.mm).
# 
# But bprm.mm is now current->mm.  We've just freed the current process's mm.
# The kernel dies in a most ghastly manner.
# 
# Fix that up by nulling out bprm.mm in flush_old_exec(), at the point where we
# consumed the mm.  Handle the null pointer in the do_execve() error path.
# 
# Also: don't open-code free_arg_pages() in do_execve(): call it instead.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1381
# [PATCH] ext3: fix journal_release_buffer() race
# 
# 		CPU0				CPU1
# 
# 	journal_get_write_access(bh)
# 	 (Add buffer to t_reserved_list)
# 
# 					journal_get_write_access(bh)
# 					 (It's already on t_reserved_list:
# 					  nothing to do)
# 
# 	 (We decide we don't want to
# 	  journal the buffer after all)
# 	journal_release_buffer()
# 	 (It gets pulled off the transaction)
# 
# 
# 					journal_dirty_metadata()
# 					 (The buffer isn't on the reserved
# 					  list!  The kernel explodes)
# 
# 
# Simple fix: just leave the buffer on t_reserved_list in
# journal_release_buffer().  If nobody ends up claiming the buffer then it will
# get thrown away at start of transaction commit.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1382
# [PATCH] Set limits on CONFIG_LOG_BUF_SHIFT
# 
# From: bert hubert <ahu@ds9a.nl>
# 
# Attached patch adds a range check to LOG_BUF_SHIFT and clarifies the
# configuration somewhat.  I managed to build a non-booting kernel because I
# thought 64 was a nice power of two, which lead to the kernel blocking when
# it tried to actually use or allocate a 2^64 buffer.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1383
# [PATCH] Fix cciss hang
# 
# From: Jens Axboe <axboe@suse.de>
# 
# It fixes a hang when performing large I/O's.  Has been tested and acked by
# the maintainer, "Wiran, Francis" <francis.wiran@hp.com>.
# --------------------------------------------
# 03/07/02	akpm@osdl.org	1.1384
# [PATCH] e100 use-after-free fix
# 
# I though Scott had recently merged this but it seems not.  We'll be
# needing this patch if you merge Manfred's page unmapping debug patch.
# --------------------------------------------
#
diff -Nru a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
--- a/Documentation/filesystems/Locking	Thu Jul  3 01:12:43 2003
+++ b/Documentation/filesystems/Locking	Thu Jul  3 01:12:43 2003
@@ -318,7 +318,7 @@
 ioctl:		yes	(see below)
 mmap:		no
 open:		maybe	(see below)
-flush:		yes
+flush:		no
 release:	no
 fsync:		yes	(see below)
 fasync:		yes	(see below)
diff -Nru a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
--- a/arch/alpha/mm/numa.c	Thu Jul  3 01:12:43 2003
+++ b/arch/alpha/mm/numa.c	Thu Jul  3 01:12:43 2003
@@ -338,7 +338,7 @@
 
 		lmem_map = node_mem_map(nid);
 		pfn = NODE_DATA(nid)->node_start_pfn;
-		for (i = 0; i < node_size(nid); i++, pfn++)
+		for (i = 0; i < node_spanned_pages(nid); i++, pfn++)
 			if (page_is_ram(pfn) && PageReserved(lmem_map+i))
 				reservedpages++;
 	}
@@ -372,7 +372,7 @@
 	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 	for (nid = 0; nid < numnodes; nid++) {
 		struct page * lmem_map = node_mem_map(nid);
-		i = node_size(nid);
+		i = node_spanned_pages(nid);
 		while (i-- > 0) {
 			total++;
 			if (PageReserved(lmem_map+i))
diff -Nru a/arch/arm/mm/init.c b/arch/arm/mm/init.c
--- a/arch/arm/mm/init.c	Thu Jul  3 01:12:43 2003
+++ b/arch/arm/mm/init.c	Thu Jul  3 01:12:43 2003
@@ -79,7 +79,7 @@
 		struct page *page, *end;
 
 		page = NODE_MEM_MAP(node);
-		end  = page + NODE_DATA(node)->node_size;
+		end  = page + NODE_DATA(node)->node_spanned_pages;
 
 		do {
 			total++;
@@ -576,7 +576,7 @@
 	for (node = 0; node < numnodes; node++) {
 		pg_data_t *pgdat = NODE_DATA(node);
 
-		if (pgdat->node_size != 0)
+		if (pgdat->node_spanned_pages != 0)
 			totalram_pages += free_all_bootmem_node(pgdat);
 	}
 
diff -Nru a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c
--- a/arch/arm26/mm/init.c	Thu Jul  3 01:12:43 2003
+++ b/arch/arm26/mm/init.c	Thu Jul  3 01:12:43 2003
@@ -68,7 +68,7 @@
 
 
 	page = NODE_MEM_MAP(0);
-	end  = page + NODE_DATA(0)->node_size;
+	end  = page + NODE_DATA(0)->node_spanned_pages;
 
 	do {
 		total++;
@@ -353,7 +353,7 @@
 	max_mapnr   = virt_to_page(high_memory) - mem_map;
 
 	/* this will put all unused low memory onto the freelists */
-	if (pgdat->node_size != 0)
+	if (pgdat->node_spanned_pages != 0)
 		totalram_pages += free_all_bootmem_node(pgdat);
 
 	printk(KERN_INFO "Memory:");
diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig
--- a/arch/i386/Kconfig	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/Kconfig	Thu Jul  3 01:12:43 2003
@@ -1339,6 +1339,14 @@
 	  best used in conjunction with the NMI watchdog so that spinlock
 	  deadlocks are also debuggable.
 
+config DEBUG_PAGEALLOC
+	bool "Page alloc debugging"
+	depends on DEBUG_KERNEL
+	help
+	  Unmap pages from the kernel linear mapping after free_pages().
+	  This results in a large slowdown, but helps to find certain types
+	  of memory corruptions.
+
 config DEBUG_HIGHMEM
 	bool "Highmem debugging"
 	depends on DEBUG_KERNEL && HIGHMEM
diff -Nru a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
--- a/arch/i386/kernel/cpu/common.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/kernel/cpu/common.c	Thu Jul  3 01:12:43 2003
@@ -430,6 +430,14 @@
 	rise_init_cpu();
 	nexgen_init_cpu();
 	umc_init_cpu();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	/* pse is not compatible with on-the-fly unmapping,
+	 * disable it even if the cpus claim to support it.
+	 */
+	clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+	disable_pse = 1;
+#endif
 }
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
diff -Nru a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
--- a/arch/i386/kernel/io_apic.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/kernel/io_apic.c	Thu Jul  3 01:12:43 2003
@@ -35,6 +35,7 @@
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
+#include <asm/timer.h>
 
 #include <mach_apic.h>
 
@@ -2052,7 +2053,6 @@
  */
 static inline void check_timer(void)
 {
-	extern int timer_ack;
 	int pin1, pin2;
 	int vector;
 
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/kernel/time.c	Thu Jul  3 01:12:43 2003
@@ -80,8 +80,7 @@
 spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
 EXPORT_SYMBOL(i8253_lock);
 
-extern struct timer_opts timer_none;
-struct timer_opts* timer = &timer_none;
+struct timer_opts *cur_timer = &timer_none;
 
 /*
  * This version of gettimeofday has microsecond resolution
@@ -93,14 +92,14 @@
 	unsigned long usec, sec;
 
 	do {
+		unsigned long lost;
+
 		seq = read_seqbegin(&xtime_lock);
 
-		usec = timer->get_offset();
-		{
-			unsigned long lost = jiffies - wall_jiffies;
-			if (lost)
-				usec += lost * (1000000 / HZ);
-		}
+		usec = cur_timer->get_offset();
+		lost = jiffies - wall_jiffies;
+		if (lost)
+			usec += lost * (1000000 / HZ);
 		sec = xtime.tv_sec;
 		usec += (xtime.tv_nsec / 1000);
 	} while (read_seqretry(&xtime_lock, seq));
@@ -126,7 +125,7 @@
 	 * wall time.  Discover what correction gettimeofday() would have
 	 * made, and then undo it!
 	 */
-	tv->tv_nsec -= timer->get_offset() * NSEC_PER_USEC;
+	tv->tv_nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
 	tv->tv_nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
 
 	while (tv->tv_nsec < 0) {
@@ -180,7 +179,7 @@
  */
 unsigned long long monotonic_clock(void)
 {
-	return timer->monotonic_clock();
+	return cur_timer->monotonic_clock();
 }
 EXPORT_SYMBOL(monotonic_clock);
 
@@ -189,7 +188,8 @@
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
  */
-static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static inline void do_timer_interrupt(int irq, void *dev_id,
+					struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_IO_APIC
 	if (timer_ack) {
@@ -259,7 +259,7 @@
 	 */
 	write_seqlock(&xtime_lock);
 
-	timer->mark_offset();
+	cur_timer->mark_offset();
  
 	do_timer_interrupt(irq, NULL, regs);
 
@@ -301,16 +301,13 @@
 
 device_initcall(time_init_device);
 
-
 void __init time_init(void)
 {
-	
 	xtime.tv_sec = get_cmos_time();
 	wall_to_monotonic.tv_sec = -xtime.tv_sec;
 	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
 	wall_to_monotonic.tv_nsec = -xtime.tv_nsec;
 
-
-	timer = select_timer();
+	cur_timer = select_timer();
 	time_init_hook();
 }
diff -Nru a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
--- a/arch/i386/kernel/timers/timer.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/kernel/timers/timer.c	Thu Jul  3 01:12:43 2003
@@ -3,12 +3,6 @@
 #include <linux/string.h>
 #include <asm/timer.h>
 
-/* list of externed timers */
-extern struct timer_opts timer_pit;
-extern struct timer_opts timer_tsc;
-#ifdef CONFIG_X86_CYCLONE_TIMER
-extern struct timer_opts timer_cyclone;
-#endif
 /* list of timers, ordered by preference, NULL terminated */
 static struct timer_opts* timers[] = {
 #ifdef CONFIG_X86_CYCLONE_TIMER
@@ -28,6 +22,15 @@
 	return 1;
 }
 __setup("clock=", clock_setup);
+
+
+/* The chosen timesource has been found to be bad.
+ * Fall back to a known good timesource (the PIT)
+ */
+void clock_fallback(void)
+{
+	cur_timer = &timer_pit;
+}
 
 /* iterates through the list of timers, returning the first 
  * one that initializes successfully.
diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
--- a/arch/i386/kernel/timers/timer_cyclone.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/kernel/timers/timer_cyclone.c	Thu Jul  3 01:12:43 2003
@@ -88,7 +88,7 @@
 	 * between cyclone and pit reads (as noted when 
 	 * usec delta is > 90% # of usecs/tick)
 	 */
-	if (abs(delay - delay_at_last_interrupt) > (900000/HZ)) 
+	if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
 		jiffies++;
 }
 
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/kernel/timers/timer_tsc.c	Thu Jul  3 01:12:43 2003
@@ -124,6 +124,7 @@
 	int countmp;
 	static int count1 = 0;
 	unsigned long long this_offset, last_offset;
+	static int lost_count = 0;
 	
 	write_lock(&monotonic_lock);
 	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
@@ -178,9 +179,19 @@
 	delta += delay_at_last_interrupt;
 	lost = delta/(1000000/HZ);
 	delay = delta%(1000000/HZ);
-	if (lost >= 2)
+	if (lost >= 2) {
 		jiffies += lost-1;
 
+		/* sanity check to ensure we're not always loosing ticks */
+		if (lost_count++ > 100) {
+			printk(KERN_WARNING "Loosing too many ticks!\n");
+			printk(KERN_WARNING "TSC cannot be used as a timesource."
+					" (Are you running with SpeedStep?)\n");
+			printk(KERN_WARNING "Falling back to a sane timesource.\n");
+			clock_fallback();
+		}
+	} else
+		lost_count = 0;
 	/* update the monotonic base value */
 	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 	monotonic_base += cycles_2_ns(this_offset - last_offset);
@@ -194,7 +205,7 @@
 	 * between tsc and pit reads (as noted when 
 	 * usec delta is > 90% # of usecs/tick)
 	 */
-	if (abs(delay - delay_at_last_interrupt) > (900000/HZ))
+	if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
 		jiffies++;
 }
 
diff -Nru a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
--- a/arch/i386/lib/delay.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/lib/delay.c	Thu Jul  3 01:12:43 2003
@@ -25,7 +25,7 @@
 
 void __delay(unsigned long loops)
 {
-	timer->delay(loops);
+	cur_timer->delay(loops);
 }
 
 inline void __const_udelay(unsigned long xloops)
diff -Nru a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
--- a/arch/i386/mm/pageattr.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/mm/pageattr.c	Thu Jul  3 01:12:43 2003
@@ -13,6 +13,10 @@
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 
+static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED;
+static struct list_head df_list = LIST_HEAD_INIT(df_list);
+
+
 static inline pte_t *lookup_address(unsigned long address) 
 { 
 	pgd_t *pgd = pgd_offset_k(address); 
@@ -31,10 +35,15 @@
 { 
 	int i; 
 	unsigned long addr;
-	struct page *base = alloc_pages(GFP_KERNEL, 0);
+	struct page *base;
 	pte_t *pbase;
+
+	spin_unlock_irq(&cpa_lock);
+	base = alloc_pages(GFP_KERNEL, 0);
+	spin_lock_irq(&cpa_lock);
 	if (!base) 
 		return NULL;
+
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK; 
 	pbase = (pte_t *)page_address(base);
@@ -87,7 +96,7 @@
 }
 
 static int
-__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) 
+__change_page_attr(struct page *page, pgprot_t prot)
 { 
 	pte_t *kpte; 
 	unsigned long address;
@@ -123,7 +132,7 @@
 	}
 
 	if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) { 
-		*oldpage = kpte_page;
+		list_add(&kpte_page->list, &df_list);
 		revert_page(kpte_page, address);
 	} 
 	return 0;
@@ -134,12 +143,6 @@
 	on_each_cpu(flush_kernel_map, NULL, 1, 1);
 }
 
-struct deferred_page { 
-	struct deferred_page *next; 
-	struct page *fpage;
-}; 
-static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
-
 /*
  * Change the page attributes of an page in the linear mapping.
  *
@@ -156,47 +159,54 @@
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
 	int err = 0; 
-	struct page *fpage; 
 	int i; 
+	unsigned long flags;
 
-	down_write(&init_mm.mmap_sem);
+	spin_lock_irqsave(&cpa_lock, flags);
 	for (i = 0; i < numpages; i++, page++) { 
-		fpage = NULL;
-		err = __change_page_attr(page, prot, &fpage); 
+		err = __change_page_attr(page, prot);
 		if (err) 
 			break; 
-		if (fpage) { 
-			struct deferred_page *df;
-			df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); 
-			if (!df) {
-				flush_map();
-				__free_page(fpage);
-			} else { 
-				df->next = df_list;
-				df->fpage = fpage;				
-				df_list = df;
-			} 			
-		} 
 	} 	
-	up_write(&init_mm.mmap_sem); 
+	spin_unlock_irqrestore(&cpa_lock, flags);
 	return err;
 }
 
 void global_flush_tlb(void)
 { 
-	struct deferred_page *df, *next_df;
+	LIST_HEAD(l);
+	struct list_head* n;
 
-	down_read(&init_mm.mmap_sem);
-	df = xchg(&df_list, NULL);
-	up_read(&init_mm.mmap_sem);
+	BUG_ON(irqs_disabled());
+
+	spin_lock_irq(&cpa_lock);
+	list_splice_init(&df_list, &l);
+	spin_unlock_irq(&cpa_lock);
 	flush_map();
-	for (; df; df = next_df) { 
-		next_df = df->next;
-		if (df->fpage) 
-			__free_page(df->fpage);
-		kfree(df);
-	} 
+	n = l.next;
+	while (n != &l) {
+		struct page *pg = list_entry(n, struct page, list);
+		n = n->next;
+		__free_page(pg);
+	}
 } 
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (PageHighMem(page))
+		return;
+	/* the return value is ignored - the calls cannot fail,
+	 * large pages are disabled at boot time.
+	 */
+	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+	/* we should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 */
+	__flush_tlb_all();
+}
+EXPORT_SYMBOL(kernel_map_pages);
+#endif
 
 EXPORT_SYMBOL(change_page_attr);
 EXPORT_SYMBOL(global_flush_tlb);
diff -Nru a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
--- a/arch/i386/mm/pgtable.c	Thu Jul  3 01:12:43 2003
+++ b/arch/i386/mm/pgtable.c	Thu Jul  3 01:12:43 2003
@@ -34,7 +34,7 @@
 	show_free_areas();
 	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
-		for (i = 0; i < pgdat->node_size; ++i) {
+		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat->node_mem_map + i;
 			total++;
 			if (PageHighMem(page))
diff -Nru a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
--- a/arch/ia64/ia32/binfmt_elf32.c	Thu Jul  3 01:12:43 2003
+++ b/arch/ia64/ia32/binfmt_elf32.c	Thu Jul  3 01:12:43 2003
@@ -13,6 +13,7 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/security.h>
 
 #include <asm/param.h>
 #include <asm/signal.h>
@@ -177,7 +178,7 @@
 	if (!mpnt)
 		return -ENOMEM;
 
-	if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+	if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
 		kmem_cache_free(vm_area_cachep, mpnt);
 		return -ENOMEM;
 	}
diff -Nru a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
--- a/arch/ia64/kernel/sys_ia64.c	Thu Jul  3 01:12:43 2003
+++ b/arch/ia64/kernel/sys_ia64.c	Thu Jul  3 01:12:43 2003
@@ -100,7 +100,6 @@
 asmlinkage unsigned long
 ia64_brk (unsigned long brk)
 {
-	extern int vm_enough_memory (long pages);
 	unsigned long rlim, retval, newbrk, oldbrk;
 	struct mm_struct *mm = current->mm;
 
diff -Nru a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
--- a/arch/ia64/mm/init.c	Thu Jul  3 01:12:43 2003
+++ b/arch/ia64/mm/init.c	Thu Jul  3 01:12:43 2003
@@ -232,7 +232,7 @@
 		printk("Free swap:       %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 		for_each_pgdat(pgdat) {
 			printk("Node ID: %d\n", pgdat->node_id);
-			for(i = 0; i < pgdat->node_size; i++) {
+			for(i = 0; i < pgdat->node_spanned_pages; i++) {
 				if (PageReserved(pgdat->node_mem_map+i))
 					reserved++;
 				else if (PageSwapCache(pgdat->node_mem_map+i))
@@ -240,7 +240,7 @@
 				else if (page_count(pgdat->node_mem_map + i))
 					shared += page_count(pgdat->node_mem_map + i) - 1;
 			}
-			printk("\t%d pages of RAM\n", pgdat->node_size);
+			printk("\t%d pages of RAM\n", pgdat->node_spanned_pages);
 			printk("\t%d reserved pages\n", reserved);
 			printk("\t%d pages shared\n", shared);
 			printk("\t%d pages swap cached\n", cached);
diff -Nru a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
--- a/arch/mips/kernel/sysirix.c	Thu Jul  3 01:12:43 2003
+++ b/arch/mips/kernel/sysirix.c	Thu Jul  3 01:12:43 2003
@@ -28,6 +28,7 @@
 #include <linux/vfs.h>
 #include <linux/namei.h>
 #include <linux/socket.h>
+#include <linux/security.h>
 
 #include <asm/ptrace.h>
 #include <asm/page.h>
@@ -527,8 +528,6 @@
 	return get_seconds();
 }
 
-int vm_enough_memory(long pages);
-
 /*
  * IRIX is completely broken... it returns 0 on success, otherwise
  * ENOMEM.
@@ -585,7 +584,7 @@
 	/*
 	 * Check if we have enough memory..
 	 */
-	if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) {
+	if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) {
 		ret = -ENOMEM;
 		goto out;
 	}
diff -Nru a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
--- a/arch/ppc64/mm/init.c	Thu Jul  3 01:12:43 2003
+++ b/arch/ppc64/mm/init.c	Thu Jul  3 01:12:43 2003
@@ -109,7 +109,7 @@
 	show_free_areas();
 	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
-		for (i = 0; i < pgdat->node_size; i++) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			page = pgdat->node_mem_map + i;
 			total++;
 			if (PageReserved(page))
@@ -564,7 +564,7 @@
 	int nid;
 
         for (nid = 0; nid < numnodes; nid++) {
-		if (node_data[nid].node_size != 0) {
+		if (node_data[nid].node_spanned_pages != 0) {
 			printk("freeing bootmem node %x\n", nid);
 			totalram_pages +=
 				free_all_bootmem_node(NODE_DATA(nid));
diff -Nru a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c
--- a/arch/ppc64/mm/numa.c	Thu Jul  3 01:12:43 2003
+++ b/arch/ppc64/mm/numa.c	Thu Jul  3 01:12:43 2003
@@ -160,21 +160,21 @@
 		 * this simple case and complain if there is a gap in
 		 * memory
 		 */
-		if (node_data[numa_domain].node_size) {
+		if (node_data[numa_domain].node_spanned_pages) {
 			unsigned long shouldstart =
 				node_data[numa_domain].node_start_pfn + 
-				node_data[numa_domain].node_size;
+				node_data[numa_domain].node_spanned_pages;
 			if (shouldstart != (start / PAGE_SIZE)) {
 				printk(KERN_ERR "Hole in node, disabling "
 						"region start %lx length %lx\n",
 						start, size);
 				continue;
 			}
-			node_data[numa_domain].node_size += size / PAGE_SIZE;
+			node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE;
 		} else {
 			node_data[numa_domain].node_start_pfn =
 				start / PAGE_SIZE;
-			node_data[numa_domain].node_size = size / PAGE_SIZE;
+			node_data[numa_domain].node_spanned_pages = size / PAGE_SIZE;
 		}
 
 		for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
@@ -202,7 +202,7 @@
 		map_cpu_to_node(i, 0);
 
 	node_data[0].node_start_pfn = 0;
-	node_data[0].node_size = lmb_end_of_DRAM() / PAGE_SIZE;
+	node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE;
 
 	for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT)
 		numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
@@ -224,12 +224,12 @@
 		unsigned long bootmem_paddr;
 		unsigned long bootmap_pages;
 
-		if (node_data[nid].node_size == 0)
+		if (node_data[nid].node_spanned_pages == 0)
 			continue;
 
 		start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE;
 		end_paddr = start_paddr + 
-				(node_data[nid].node_size * PAGE_SIZE);
+				(node_data[nid].node_spanned_pages * PAGE_SIZE);
 
 		dbg("node %d\n", nid);
 		dbg("start_paddr = %lx\n", start_paddr);
@@ -311,7 +311,7 @@
 		unsigned long start_pfn;
 		unsigned long end_pfn;
 
-		if (node_data[nid].node_size == 0)
+		if (node_data[nid].node_spanned_pages == 0)
 			continue;
 
 		start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT;
diff -Nru a/arch/s390/kernel/compat_exec.c b/arch/s390/kernel/compat_exec.c
--- a/arch/s390/kernel/compat_exec.c	Thu Jul  3 01:12:43 2003
+++ b/arch/s390/kernel/compat_exec.c	Thu Jul  3 01:12:43 2003
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/binfmts.h>
 #include <linux/module.h>
+#include <linux/security.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -55,7 +56,7 @@
 	if (!mpnt) 
 		return -ENOMEM; 
 	
-	if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+	if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
 		kmem_cache_free(vm_area_cachep, mpnt);
 		return -ENOMEM;
 	}
diff -Nru a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
--- a/arch/x86_64/ia32/ia32_binfmt.c	Thu Jul  3 01:12:43 2003
+++ b/arch/x86_64/ia32/ia32_binfmt.c	Thu Jul  3 01:12:43 2003
@@ -14,6 +14,8 @@
 #include <linux/string.h>
 #include <linux/binfmts.h>
 #include <linux/mm.h>
+#include <linux/security.h>
+
 #include <asm/segment.h> 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
@@ -339,7 +341,7 @@
 	if (!mpnt) 
 		return -ENOMEM; 
 	
-	if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+	if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
 		kmem_cache_free(vm_area_cachep, mpnt);
 		return -ENOMEM;
 	}
diff -Nru a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
--- a/arch/x86_64/mm/init.c	Thu Jul  3 01:12:43 2003
+++ b/arch/x86_64/mm/init.c	Thu Jul  3 01:12:43 2003
@@ -64,7 +64,7 @@
 	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 
 	for_each_pgdat(pgdat) {
-               for (i = 0; i < pgdat->node_size; ++i) {
+               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
                        page = pgdat->node_mem_map + i;
 		total++;
                        if (PageReserved(page))
diff -Nru a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
--- a/arch/x86_64/mm/numa.c	Thu Jul  3 01:12:43 2003
+++ b/arch/x86_64/mm/numa.c	Thu Jul  3 01:12:43 2003
@@ -86,7 +86,7 @@
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
 	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
 	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-	NODE_DATA(nodeid)->node_size = end_pfn - start_pfn;
+	NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
 
 	/* Find a place for the bootmem map */
 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 
diff -Nru a/drivers/block/cciss.c b/drivers/block/cciss.c
--- a/drivers/block/cciss.c	Thu Jul  3 01:12:43 2003
+++ b/drivers/block/cciss.c	Thu Jul  3 01:12:43 2003
@@ -1887,7 +1887,7 @@
                 BUG();
 
 	if (( c = cmd_alloc(h, 1)) == NULL)
-		goto startio;
+		goto full;
 
 	blkdev_dequeue_request(creq);
 
@@ -1960,8 +1960,9 @@
 		h->maxQsinceinit = h->Qdepth; 
 
 	goto queue;
-startio:
+full:
 	blk_stop_queue(q);
+startio:
 	start_io(h);
 }
 
diff -Nru a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
--- a/drivers/block/ll_rw_blk.c	Thu Jul  3 01:12:43 2003
+++ b/drivers/block/ll_rw_blk.c	Thu Jul  3 01:12:43 2003
@@ -1072,8 +1072,8 @@
  **/
 void blk_start_queue(request_queue_t *q)
 {
-	if (test_and_clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
-		schedule_work(&q->unplug_work);
+	clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
+	schedule_work(&q->unplug_work);
 }
 
 /**
diff -Nru a/drivers/net/Kconfig b/drivers/net/Kconfig
--- a/drivers/net/Kconfig	Thu Jul  3 01:12:43 2003
+++ b/drivers/net/Kconfig	Thu Jul  3 01:12:43 2003
@@ -1397,7 +1397,7 @@
 
 config TC35815
 	tristate "TOSHIBA TC35815 Ethernet support"
-	depends on NET_PCI && PCI
+	depends on NET_PCI && PCI && TOSHIBA_JMR3927
 
 config DGRS
 	tristate "Digi Intl. RightSwitch SE-X support"
diff -Nru a/drivers/net/e100/e100_main.c b/drivers/net/e100/e100_main.c
--- a/drivers/net/e100/e100_main.c	Thu Jul  3 01:12:43 2003
+++ b/drivers/net/e100/e100_main.c	Thu Jul  3 01:12:43 2003
@@ -1085,9 +1085,9 @@
 		goto exit1;
 	}
 
-	e100_prepare_xmit_buff(bdp, skb);
-
 	bdp->drv_stats.net_stats.tx_bytes += skb->len;
+
+	e100_prepare_xmit_buff(bdp, skb);
 
 	dev->trans_start = jiffies;
 
diff -Nru a/fs/attr.c b/fs/attr.c
--- a/fs/attr.c	Thu Jul  3 01:12:43 2003
+++ b/fs/attr.c	Thu Jul  3 01:12:43 2003
@@ -22,8 +22,6 @@
 	int retval = -EPERM;
 	unsigned int ia_valid = attr->ia_valid;
 
-	lock_kernel();
-
 	/* If force is set do it anyway. */
 	if (ia_valid & ATTR_FORCE)
 		goto fine;
@@ -58,7 +56,6 @@
 fine:
 	retval = 0;
 error:
-	unlock_kernel();
 	return retval;
 }
 
diff -Nru a/fs/block_dev.c b/fs/block_dev.c
--- a/fs/block_dev.c	Thu Jul  3 01:12:43 2003
+++ b/fs/block_dev.c	Thu Jul  3 01:12:43 2003
@@ -155,11 +155,13 @@
  */
 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 {
-	/* ewww */
-	loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size;
+	struct inode *bd_inode;
+	loff_t size;
 	loff_t retval;
 
-	lock_kernel();
+	bd_inode = file->f_dentry->d_inode->i_bdev->bd_inode;
+	down(&bd_inode->i_sem);
+	size = bd_inode->i_size;
 
 	switch (origin) {
 		case 2:
@@ -175,7 +177,7 @@
 		}
 		retval = offset;
 	}
-	unlock_kernel();
+	up(&bd_inode->i_sem);
 	return retval;
 }
 	
diff -Nru a/fs/coda/file.c b/fs/coda/file.c
--- a/fs/coda/file.c	Thu Jul  3 01:12:43 2003
+++ b/fs/coda/file.c	Thu Jul  3 01:12:43 2003
@@ -153,19 +153,22 @@
 	struct inode *coda_inode;
 	int err = 0, fcnt;
 
+	lock_kernel();
+
 	coda_vfs_stat.flush++;
 
 	/* last close semantics */
 	fcnt = file_count(coda_file);
-	if (fcnt > 1) return 0;
+	if (fcnt > 1)
+		goto out;
 
 	/* No need to make an upcall when we have not made any modifications
 	 * to the file */
 	if ((coda_file->f_flags & O_ACCMODE) == O_RDONLY)
-		return 0;
+		goto out;
 
 	if (use_coda_close)
-		return 0;
+		goto out;
 
 	cfi = CODA_FTOC(coda_file);
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -180,6 +183,8 @@
 		err = 0;
 	}
 
+out:
+	unlock_kernel();
 	return err;
 }
 
diff -Nru a/fs/exec.c b/fs/exec.c
--- a/fs/exec.c	Thu Jul  3 01:12:43 2003
+++ b/fs/exec.c	Thu Jul  3 01:12:43 2003
@@ -392,7 +392,7 @@
 	if (!mpnt)
 		return -ENOMEM;
 
-	if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+	if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
 		kmem_cache_free(vm_area_cachep, mpnt);
 		return -ENOMEM;
 	}
@@ -441,9 +441,9 @@
 {
 	int i;
 
-	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
+	for (i = 0; i < MAX_ARG_PAGES; i++) {
 		if (bprm->page[i])
-		__free_page(bprm->page[i]);
+			__free_page(bprm->page[i]);
 		bprm->page[i] = NULL;
 	}
 }
@@ -772,6 +772,8 @@
 	if (retval)
 		goto out;
 
+	bprm->mm = NULL;		/* We're using it now */
+
 	/* This is the point of no return */
 
 	current->sas_ss_sp = current->sas_ss_size = 0;
@@ -999,7 +1001,7 @@
 			}
 			read_lock(&binfmt_lock);
 			put_binfmt(fmt);
-			if (retval != -ENOEXEC)
+			if (retval != -ENOEXEC || bprm->mm == NULL)
 				break;
 			if (!bprm->file) {
 				read_unlock(&binfmt_lock);
@@ -1007,7 +1009,7 @@
 			}
 		}
 		read_unlock(&binfmt_lock);
-		if (retval != -ENOEXEC) {
+		if (retval != -ENOEXEC || bprm->mm == NULL) {
 			break;
 #ifdef CONFIG_KMOD
 		}else{
@@ -1035,7 +1037,6 @@
 	struct linux_binprm bprm;
 	struct file *file;
 	int retval;
-	int i;
 
 	sched_balance_exec();
 
@@ -1103,17 +1104,14 @@
 
 out:
 	/* Something went wrong, return the inode and free the argument pages*/
-	for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-		struct page * page = bprm.page[i];
-		if (page)
-			__free_page(page);
-	}
+	free_arg_pages(&bprm);
 
 	if (bprm.security)
 		security_bprm_free(&bprm);
 
 out_mm:
-	mmdrop(bprm.mm);
+	if (bprm.mm)
+		mmdrop(bprm.mm);
 
 out_file:
 	if (bprm.file) {
diff -Nru a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
--- a/fs/ext2/ialloc.c	Thu Jul  3 01:12:43 2003
+++ b/fs/ext2/ialloc.c	Thu Jul  3 01:12:43 2003
@@ -489,17 +489,18 @@
 	return group;
 }
 
-struct inode * ext2_new_inode(struct inode * dir, int mode)
+struct inode *ext2_new_inode(struct inode *dir, int mode)
 {
 	struct super_block *sb;
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *bh2;
 	int group, i;
-	ino_t ino;
+	ino_t ino = 0;
 	struct inode * inode;
-	struct ext2_group_desc * desc;
-	struct ext2_super_block * es;
+	struct ext2_group_desc *gdp;
+	struct ext2_super_block *es;
 	struct ext2_inode_info *ei;
+	struct ext2_sb_info *sbi;
 	int err;
 
 	sb = dir->i_sb;
@@ -508,36 +509,62 @@
 		return ERR_PTR(-ENOMEM);
 
 	ei = EXT2_I(inode);
-	es = EXT2_SB(sb)->s_es;
+	sbi = EXT2_SB(sb);
+	es = sbi->s_es;
 repeat:
 	if (S_ISDIR(mode)) {
-		if (test_opt (sb, OLDALLOC))
+		if (test_opt(sb, OLDALLOC))
 			group = find_group_dir(sb, dir);
 		else
 			group = find_group_orlov(sb, dir);
 	} else 
 		group = find_group_other(sb, dir);
 
-	err = -ENOSPC;
-	if (group == -1)
+	if (group == -1) {
+		err = -ENOSPC;
 		goto fail;
+	}
 
-	err = -EIO;
-	bitmap_bh = read_inode_bitmap(sb, group);
-	if (!bitmap_bh)
-		goto fail2;
-
-	i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
-				      EXT2_INODES_PER_GROUP(sb));
-	if (i >= EXT2_INODES_PER_GROUP(sb))
-		goto bad_count;
-	if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group),
-			i, (void *) bitmap_bh->b_data)) {
+	for (i = 0; i < sbi->s_groups_count; i++) {
+		gdp = ext2_get_group_desc(sb, group, &bh2);
 		brelse(bitmap_bh);
-		ext2_release_inode(sb, group, S_ISDIR(mode));
-		goto repeat;
+		bitmap_bh = read_inode_bitmap(sb, group);
+		if (!bitmap_bh) {
+			err = -EIO;
+			goto fail2;
+		}
+
+		i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
+					      EXT2_INODES_PER_GROUP(sb));
+		if (i >= EXT2_INODES_PER_GROUP(sb)) {
+			/*
+			 * Rare race: find_group_xx() decided that there were
+			 * free inodes in this group, but by the time we tried
+			 * to allocate one, they're all gone.  This can also
+			 * occur because the counters which find_group_orlov()
+			 * uses are approximate.  So just go and search the
+			 * next block group.
+			 */
+			if (++group == sbi->s_groups_count)
+				group = 0;
+			continue;
+		}
+		if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group),
+						i, bitmap_bh->b_data)) {
+			brelse(bitmap_bh);
+			bitmap_bh = NULL;
+			ext2_release_inode(sb, group, S_ISDIR(mode));
+			goto repeat;
+		}
+		goto got;
 	}
 
+	/*
+	 * Scanned all blockgroups.
+	 */
+	err = -ENOSPC;
+	goto fail2;
+got:
 	mark_buffer_dirty(bitmap_bh);
 	if (sb->s_flags & MS_SYNCHRONOUS)
 		sync_dirty_buffer(bitmap_bh);
@@ -605,8 +632,9 @@
 	inode->i_generation = EXT2_SB(sb)->s_next_generation++;
 	insert_inode_hash(inode);
 
-	if(DQUOT_ALLOC_INODE(inode)) {
+	if (DQUOT_ALLOC_INODE(inode)) {
 		DQUOT_DROP(inode);
+		err = -ENOSPC;
 		goto fail3;
 	}
 	err = ext2_init_acl(inode, dir);
@@ -631,21 +659,6 @@
 	make_bad_inode(inode);
 	iput(inode);
 	return ERR_PTR(err);
-
-bad_count:
-	brelse(bitmap_bh);
-	ext2_error (sb, "ext2_new_inode",
-		    "Free inodes count corrupted in group %d",
-		    group);
-	/* Is it really ENOSPC? */
-	err = -ENOSPC;
-	if (sb->s_flags & MS_RDONLY)
-		goto fail;
-
-	desc = ext2_get_group_desc (sb, group, &bh2);
-	desc->bg_free_inodes_count = 0;
-	mark_buffer_dirty(bh2);
-	goto repeat;
 }
 
 unsigned long ext2_count_free_inodes (struct super_block * sb)
diff -Nru a/fs/jbd/commit.c b/fs/jbd/commit.c
--- a/fs/jbd/commit.c	Thu Jul  3 01:12:43 2003
+++ b/fs/jbd/commit.c	Thu Jul  3 01:12:43 2003
@@ -169,10 +169,23 @@
 	 * that multiple journal_get_write_access() calls to the same
 	 * buffer are perfectly permissable.
 	 */
-	while (commit_transaction->t_reserved_list) {
-		jh = commit_transaction->t_reserved_list;
-		JBUFFER_TRACE(jh, "reserved, unused: refile");
-		journal_refile_buffer(journal, jh);
+	{
+		int nr = 0;
+		while (commit_transaction->t_reserved_list) {
+			jh = commit_transaction->t_reserved_list;
+			JBUFFER_TRACE(jh, "reserved, unused: refile");
+			journal_refile_buffer(journal, jh);
+			nr++;
+		}
+		if (nr) {
+			static int noisy;
+
+			if (noisy < 10) {
+				noisy++;
+				printk("%s: freed %d reserved buffers\n",
+					__FUNCTION__, nr);
+			}
+		}
 	}
 
 	/*
diff -Nru a/fs/jbd/transaction.c b/fs/jbd/transaction.c
--- a/fs/jbd/transaction.c	Thu Jul  3 01:12:43 2003
+++ b/fs/jbd/transaction.c	Thu Jul  3 01:12:43 2003
@@ -1168,37 +1168,24 @@
  * journal_release_buffer: undo a get_write_access without any buffer
  * updates, if the update decided in the end that it didn't need access.
  *
- * journal_get_write_access() can block, so it is quite possible for a
- * journaling component to decide after the write access is returned
- * that global state has changed and the update is no longer required.
- *
  * The caller passes in the number of credits which should be put back for
  * this buffer (zero or one).
+ *
+ * We leave the buffer attached to t_reserved_list because even though this
+ * handle doesn't want it, some other concurrent handle may want to journal
+ * this buffer.  If that handle is curently in between get_write_access() and
+ * journal_dirty_metadata() then it expects the buffer to be reserved.  If
+ * we were to rip it off t_reserved_list here, the other handle will explode
+ * when journal_dirty_metadata is presented with a non-reserved buffer.
+ *
+ * If nobody really wants to journal this buffer then it will be thrown
+ * away at the start of commit.
  */
 void
 journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits)
 {
-	transaction_t *transaction = handle->h_transaction;
-	journal_t *journal = transaction->t_journal;
-	struct journal_head *jh = bh2jh(bh);
-
-	JBUFFER_TRACE(jh, "entry");
-
-	/* If the buffer is reserved but not modified by this
-	 * transaction, then it is safe to release it.  In all other
-	 * cases, just leave the buffer as it is. */
-
-	jbd_lock_bh_state(bh);
-	spin_lock(&journal->j_list_lock);
-	if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction &&
-	    !buffer_jbddirty(jh2bh(jh))) {
-		JBUFFER_TRACE(jh, "unused: refiling it");
-		__journal_refile_buffer(jh);
-	}
-	spin_unlock(&journal->j_list_lock);
-	jbd_unlock_bh_state(bh);
+	BUFFER_TRACE(bh, "entry");
 	handle->h_buffer_credits += credits;
-	JBUFFER_TRACE(jh, "exit");
 }
 
 /** 
diff -Nru a/fs/nfs/file.c b/fs/nfs/file.c
--- a/fs/nfs/file.c	Thu Jul  3 01:12:43 2003
+++ b/fs/nfs/file.c	Thu Jul  3 01:12:43 2003
@@ -104,11 +104,13 @@
 
 	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	lock_kernel();
 	status = nfs_wb_file(inode, file);
 	if (!status) {
 		status = file->f_error;
 		file->f_error = 0;
 	}
+	unlock_kernel();
 	return status;
 }
 
diff -Nru a/fs/open.c b/fs/open.c
--- a/fs/open.c	Thu Jul  3 01:12:43 2003
+++ b/fs/open.c	Thu Jul  3 01:12:43 2003
@@ -952,11 +952,8 @@
 		return 0;
 	}
 	retval = 0;
-	if (filp->f_op && filp->f_op->flush) {
-		lock_kernel();
+	if (filp->f_op && filp->f_op->flush)
 		retval = filp->f_op->flush(filp);
-		unlock_kernel();
-	}
 	dnotify_flush(filp, id);
 	locks_remove_posix(filp, id);
 	fput(filp);
diff -Nru a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
--- a/fs/proc/proc_misc.c	Thu Jul  3 01:12:43 2003
+++ b/fs/proc/proc_misc.c	Thu Jul  3 01:12:43 2003
@@ -497,11 +497,10 @@
 static int locks_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
-	int len;
-	lock_kernel();
-	len = get_locks_status(page, start, off, count);
-	unlock_kernel();
-	if (len < count) *eof = 1;
+	int len = get_locks_status(page, start, off, count);
+
+	if (len < count)
+		*eof = 1;
 	return len;
 }
 
diff -Nru a/fs/proc/root.c b/fs/proc/root.c
--- a/fs/proc/root.c	Thu Jul  3 01:12:43 2003
+++ b/fs/proc/root.c	Thu Jul  3 01:12:43 2003
@@ -81,11 +81,13 @@
 
 static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry)
 {
-	if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */
-		lock_kernel();
+	/*
+	 * nr_threads is actually protected by the tasklist_lock;
+	 * however, it's conventional to do reads, especially for
+	 * reporting, without any locking whatsoever.
+	 */
+	if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */
 		dir->i_nlink = proc_root.nlink + nr_threads;
-		unlock_kernel();
-	}
 
 	if (!proc_lookup(dir, dentry)) {
 		return NULL;
diff -Nru a/fs/ramfs/inode.c b/fs/ramfs/inode.c
--- a/fs/ramfs/inode.c	Thu Jul  3 01:12:43 2003
+++ b/fs/ramfs/inode.c	Thu Jul  3 01:12:43 2003
@@ -146,6 +146,7 @@
 	.mmap		= generic_file_mmap,
 	.fsync		= simple_sync_file,
 	.sendfile	= generic_file_sendfile,
+	.llseek		= generic_file_llseek,
 };
 
 static struct inode_operations ramfs_file_inode_operations = {
diff -Nru a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h
--- a/include/asm-alpha/mmzone.h	Thu Jul  3 01:12:43 2003
+++ b/include/asm-alpha/mmzone.h	Thu Jul  3 01:12:43 2003
@@ -31,7 +31,6 @@
 
 #define pa_to_nid(pa)		alpha_pa_to_nid(pa)
 #define NODE_DATA(nid)		(&node_data[(nid)])
-#define node_size(nid)		(NODE_DATA(nid)->node_size)
 
 #define node_localnr(pfn, nid)	((pfn) - NODE_DATA(nid)->node_start_pfn)
 
@@ -124,7 +123,7 @@
 #define pfn_to_nid(pfn)		pa_to_nid(((u64)pfn << PAGE_SHIFT))
 #define pfn_valid(pfn)							\
 	(((pfn) - node_start_pfn(pfn_to_nid(pfn))) <			\
-	 node_size(pfn_to_nid(pfn)))					\
+	 node_spanned_pages(pfn_to_nid(pfn)))					\
 
 #define virt_addr_valid(kaddr)	pfn_valid((__pa(kaddr) >> PAGE_SHIFT))
 
diff -Nru a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h
--- a/include/asm-i386/cacheflush.h	Thu Jul  3 01:12:43 2003
+++ b/include/asm-i386/cacheflush.h	Thu Jul  3 01:12:43 2003
@@ -17,4 +17,9 @@
 void global_flush_tlb(void); 
 int change_page_attr(struct page *page, int numpages, pgprot_t prot);
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+/* internal debugging function */
+void kernel_map_pages(struct page *page, int numpages, int enable);
+#endif
+
 #endif /* _I386_CACHEFLUSH_H */
diff -Nru a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
--- a/include/asm-i386/mmzone.h	Thu Jul  3 01:12:43 2003
+++ b/include/asm-i386/mmzone.h	Thu Jul  3 01:12:43 2003
@@ -32,8 +32,7 @@
 #define alloc_bootmem_low_pages_node(ignore, x) \
 	__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
 
-#define node_size(nid)		(node_data[nid]->node_size)
-#define node_localnr(pfn, nid)	((pfn) - node_data[nid]->node_start_pfn)
+#define node_localnr(pfn, nid)		((pfn) - node_data[nid]->node_start_pfn)
 
 /*
  * Following are macros that each numa implmentation must define.
@@ -54,7 +53,7 @@
 #define node_end_pfn(nid)						\
 ({									\
 	pg_data_t *__pgdat = NODE_DATA(nid);				\
-	__pgdat->node_start_pfn + __pgdat->node_size;			\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
 })
 
 #define local_mapnr(kvaddr)						\
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h	Thu Jul  3 01:12:43 2003
+++ b/include/asm-i386/timer.h	Thu Jul  3 01:12:43 2003
@@ -21,8 +21,21 @@
 #define TICK_SIZE (tick_nsec / 1000)
 
 extern struct timer_opts* select_timer(void);
+extern void clock_fallback(void);
 
 /* Modifiers for buggy PIT handling */
 
 extern int pit_latch_buggy;
+
+extern struct timer_opts *cur_timer;
+extern int timer_ack;
+
+/* list of externed timers */
+extern struct timer_opts timer_none;
+extern struct timer_opts timer_pit;
+extern struct timer_opts timer_tsc;
+#ifdef CONFIG_X86_CYCLONE_TIMER
+extern struct timer_opts timer_cyclone;
+#endif
+
 #endif
diff -Nru a/include/asm-mips64/mmzone.h b/include/asm-mips64/mmzone.h
--- a/include/asm-mips64/mmzone.h	Thu Jul  3 01:12:43 2003
+++ b/include/asm-mips64/mmzone.h	Thu Jul  3 01:12:43 2003
@@ -24,7 +24,7 @@
 
 #define PHYSADDR_TO_NID(pa)		NASID_TO_COMPACT_NODEID(NASID_GET(pa))
 #define PLAT_NODE_DATA(n)		(plat_node_data[n])
-#define PLAT_NODE_DATA_SIZE(n)	     (PLAT_NODE_DATA(n)->gendata.node_size)
+#define PLAT_NODE_DATA_SIZE(n)	     (PLAT_NODE_DATA(n)->gendata.node_spanned_pages)
 #define PLAT_NODE_DATA_LOCALNR(p, n) \
 		(((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn)
 
diff -Nru a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h
--- a/include/asm-ppc64/mmzone.h	Thu Jul  3 01:12:43 2003
+++ b/include/asm-ppc64/mmzone.h	Thu Jul  3 01:12:43 2003
@@ -54,7 +54,6 @@
  */
 #define NODE_DATA(nid)		(&node_data[nid])
 
-#define node_size(nid)		(NODE_DATA(nid)->node_size)
 #define node_localnr(pfn, nid)	((pfn) - NODE_DATA(nid)->node_start_pfn)
 
 /*
diff -Nru a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h
--- a/include/asm-x86_64/mmzone.h	Thu Jul  3 01:12:43 2003
+++ b/include/asm-x86_64/mmzone.h	Thu Jul  3 01:12:43 2003
@@ -40,8 +40,7 @@
 #define node_mem_map(nid)	(NODE_DATA(nid)->node_mem_map)
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
-				 NODE_DATA(nid)->node_size)
-#define node_size(nid)		(NODE_DATA(nid)->node_size)
+				 NODE_DATA(nid)->node_spanned_pages)
 
 #define local_mapnr(kvaddr) \
 	( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) )
diff -Nru a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h	Thu Jul  3 01:12:43 2003
+++ b/include/linux/mm.h	Thu Jul  3 01:12:43 2003
@@ -339,9 +339,14 @@
 	page->flags |= zone_num << ZONE_SHIFT;
 }
 
-static inline void * lowmem_page_address(struct page *page)
+#ifndef CONFIG_DISCONTIGMEM
+/* The array of struct pages - for discontigmem use pgdat->lmem_map */
+extern struct page *mem_map;
+#endif
+
+static inline void *lowmem_page_address(struct page *page)
 {
-	return __va( ( (page - page_zone(page)->zone_mem_map)	+ page_zone(page)->zone_start_pfn) << PAGE_SHIFT);
+	return __va(page_to_pfn(page) << PAGE_SHIFT);
 }
 
 #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
@@ -395,11 +400,6 @@
 #define VM_FAULT_MINOR	1
 #define VM_FAULT_MAJOR	2
 
-#ifndef CONFIG_DISCONTIGMEM
-/* The array of struct pages - for discontigmem use pgdat->lmem_map */
-extern struct page *mem_map;
-#endif 
-
 extern void show_free_areas(void);
 
 struct page *shmem_nopage(struct vm_area_struct * vma,
@@ -609,5 +609,13 @@
 		int write);
 extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
 		unsigned long to, unsigned long size, pgprot_t prot);
+
+#ifndef CONFIG_DEBUG_PAGEALLOC
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable)
+{
+}
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff -Nru a/include/linux/mman.h b/include/linux/mman.h
--- a/include/linux/mman.h	Thu Jul  3 01:12:43 2003
+++ b/include/linux/mman.h	Thu Jul  3 01:12:43 2003
@@ -9,7 +9,8 @@
 #define MREMAP_MAYMOVE	1
 #define MREMAP_FIXED	2
 
-extern int vm_enough_memory(long pages);
+extern int sysctl_overcommit_memory;
+extern int sysctl_overcommit_ratio;
 extern atomic_t vm_committed_space;
 
 #ifdef CONFIG_SMP
diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h
--- a/include/linux/mmzone.h	Thu Jul  3 01:12:43 2003
+++ b/include/linux/mmzone.h	Thu Jul  3 01:12:43 2003
@@ -184,11 +184,16 @@
 	unsigned long *valid_addr_bitmap;
 	struct bootmem_data *bdata;
 	unsigned long node_start_pfn;
-	unsigned long node_size;
+	unsigned long node_present_pages; /* total number of physical pages */
+	unsigned long node_spanned_pages; /* total size of physical page
+					     range, including holes */
 	int node_id;
 	struct pglist_data *pgdat_next;
 	wait_queue_head_t       kswapd_wait;
 } pg_data_t;
+
+#define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
+#define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
 
 extern int numnodes;
 extern struct pglist_data *pgdat_list;
diff -Nru a/include/linux/security.h b/include/linux/security.h
--- a/include/linux/security.h	Thu Jul  3 01:12:43 2003
+++ b/include/linux/security.h	Thu Jul  3 01:12:43 2003
@@ -49,6 +49,7 @@
 extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags);
 extern void cap_task_reparent_to_init (struct task_struct *p);
 extern int cap_syslog (int type);
+extern int cap_vm_enough_memory (long pages);
 
 static inline int cap_netlink_send (struct sk_buff *skb)
 {
@@ -958,6 +959,10 @@
  *	See the syslog(2) manual page for an explanation of the @type values.  
  *	@type contains the type of action.
  *	Return 0 if permission is granted.
+ * @vm_enough_memory:
+ *	Check permissions for allocating a new virtual mapping.
+ *      @pages contains the number of pages.
+ *	Return 0 if permission is granted.
  *
  * @register_security:
  * 	allow module stacking.
@@ -989,6 +994,7 @@
 	int (*quotactl) (int cmds, int type, int id, struct super_block * sb);
 	int (*quota_on) (struct file * f);
 	int (*syslog) (int type);
+	int (*vm_enough_memory) (long pages);
 
 	int (*bprm_alloc_security) (struct linux_binprm * bprm);
 	void (*bprm_free_security) (struct linux_binprm * bprm);
@@ -1238,6 +1244,11 @@
 	return security_ops->syslog(type);
 }
 
+static inline int security_vm_enough_memory(long pages)
+{
+	return security_ops->vm_enough_memory(pages);
+}
+
 static inline int security_bprm_alloc (struct linux_binprm *bprm)
 {
 	return security_ops->bprm_alloc_security (bprm);
@@ -1896,6 +1907,11 @@
 static inline int security_syslog(int type)
 {
 	return cap_syslog(type);
+}
+
+static inline int security_vm_enough_memory(long pages)
+{
+	return cap_vm_enough_memory(pages);
 }
 
 static inline int security_bprm_alloc (struct linux_binprm *bprm)
diff -Nru a/include/linux/slab.h b/include/linux/slab.h
--- a/include/linux/slab.h	Thu Jul  3 01:12:43 2003
+++ b/include/linux/slab.h	Thu Jul  3 01:12:43 2003
@@ -114,6 +114,10 @@
 extern kmem_cache_t	*sighand_cachep;
 extern kmem_cache_t	*bio_cachep;
 
+void ptrinfo(unsigned long addr);
+
+extern atomic_t slab_reclaim_pages;
+
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_SLAB_H */
diff -Nru a/init/Kconfig b/init/Kconfig
--- a/init/Kconfig	Thu Jul  3 01:12:43 2003
+++ b/init/Kconfig	Thu Jul  3 01:12:43 2003
@@ -93,7 +93,8 @@
 	  limited in memory.
 
 config LOG_BUF_SHIFT
-	int "Kernel log buffer size" if DEBUG_KERNEL
+	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
+	range 12 20
 	default 17 if ARCH_S390
 	default 16 if X86_NUMAQ || IA64
 	default 15 if SMP
diff -Nru a/kernel/exit.c b/kernel/exit.c
--- a/kernel/exit.c	Thu Jul  3 01:12:43 2003
+++ b/kernel/exit.c	Thu Jul  3 01:12:43 2003
@@ -651,6 +651,8 @@
 	if (tsk->exit_signal != -1) {
 		int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD;
 		do_notify_parent(tsk, signal);
+	} else if (tsk->ptrace) {
+		do_notify_parent(tsk, SIGCHLD);
 	}
 
 	tsk->state = TASK_ZOMBIE;
@@ -715,7 +717,7 @@
 	tsk->exit_code = code;
 	exit_notify(tsk);
 
-	if (tsk->exit_signal == -1)
+	if (tsk->exit_signal == -1 && tsk->ptrace == 0)
 		release_task(tsk);
 
 	schedule();
@@ -859,7 +861,7 @@
 		BUG_ON(state != TASK_DEAD);
 		return 0;
 	}
-	if (unlikely(p->exit_signal == -1))
+	if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
 		/*
 		 * This can only happen in a race with a ptraced thread
 		 * dying on another processor.
@@ -889,8 +891,12 @@
 		/* Double-check with lock held.  */
 		if (p->real_parent != p->parent) {
 			__ptrace_unlink(p);
-			do_notify_parent(p, p->exit_signal);
 			p->state = TASK_ZOMBIE;
+			/* If this is a detached thread, this is where it goes away.  */
+			if (p->exit_signal == -1)
+				release_task (p);
+			else
+				do_notify_parent(p, p->exit_signal);
 			p = NULL;
 		}
 		write_unlock_irq(&tasklist_lock);
diff -Nru a/kernel/fork.c b/kernel/fork.c
--- a/kernel/fork.c	Thu Jul  3 01:12:43 2003
+++ b/kernel/fork.c	Thu Jul  3 01:12:43 2003
@@ -286,7 +286,7 @@
 			continue;
 		if (mpnt->vm_flags & VM_ACCOUNT) {
 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
-			if (!vm_enough_memory(len))
+			if (security_vm_enough_memory(len))
 				goto fail_nomem;
 			charge += len;
 		}
diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c
--- a/kernel/ksyms.c	Thu Jul  3 01:12:43 2003
+++ b/kernel/ksyms.c	Thu Jul  3 01:12:43 2003
@@ -462,6 +462,7 @@
 #endif
 EXPORT_SYMBOL(schedule_timeout);
 EXPORT_SYMBOL(yield);
+EXPORT_SYMBOL(io_schedule);
 EXPORT_SYMBOL(__cond_resched);
 EXPORT_SYMBOL(set_user_nice);
 EXPORT_SYMBOL(task_nice);
diff -Nru a/mm/mmap.c b/mm/mmap.c
--- a/mm/mmap.c	Thu Jul  3 01:12:43 2003
+++ b/mm/mmap.c	Thu Jul  3 01:12:43 2003
@@ -18,6 +18,7 @@
 #include <linux/security.h>
 #include <linux/hugetlb.h>
 #include <linux/profile.h>
+#include <linux/module.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -53,65 +54,9 @@
 int sysctl_overcommit_ratio = 50;	/* default is 50% */
 atomic_t vm_committed_space = ATOMIC_INIT(0);
 
-/*
- * Check that a process has enough memory to allocate a new virtual
- * mapping. 1 means there is enough memory for the allocation to
- * succeed and 0 implies there is not.
- *
- * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-acounting
- *
- * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
- * Additional code 2002 Jul 20 by Robert Love.
- */
-extern atomic_t slab_reclaim_pages;
-int vm_enough_memory(long pages)
-{
-	unsigned long free, allowed;
-
-	vm_acct_memory(pages);
-
-        /*
-	 * Sometimes we want to use more memory than we have
-	 */
-	if (sysctl_overcommit_memory == 1)
-		return 1;
-
-	if (sysctl_overcommit_memory == 0) {
-		free = get_page_cache_size();
-		free += nr_free_pages();
-		free += nr_swap_pages;
-
-		/*
-		 * Any slabs which are created with the
-		 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
-		 * which are reclaimable, under pressure.  The dentry
-		 * cache and most inode caches should fall into this
-		 */
-		free += atomic_read(&slab_reclaim_pages);
-
-		/*
-		 * Leave the last 3% for root
-		 */
-		if (!capable(CAP_SYS_ADMIN))
-			free -= free / 32;
-		
-		if (free > pages)
-			return 1;
-		vm_unacct_memory(pages);
-		return 0;
-	}
-
-	allowed = totalram_pages * sysctl_overcommit_ratio / 100;
-	allowed += total_swap_pages;
-
-	if (atomic_read(&vm_committed_space) < allowed)
-		return 1;
-
-	vm_unacct_memory(pages);
-
-	return 0;
-}
+EXPORT_SYMBOL(sysctl_overcommit_memory);
+EXPORT_SYMBOL(sysctl_overcommit_ratio);
+EXPORT_SYMBOL(vm_committed_space);
 
 /*
  * Requires inode->i_mapping->i_shared_sem
@@ -646,7 +591,7 @@
 			 * Private writable mapping: check memory availability
 			 */
 			charged = len >> PAGE_SHIFT;
-			if (!vm_enough_memory(charged))
+			if (security_vm_enough_memory(charged))
 				return -ENOMEM;
 			vm_flags |= VM_ACCOUNT;
 		}
@@ -950,7 +895,7 @@
 	grow = (address - vma->vm_end) >> PAGE_SHIFT;
 
 	/* Overcommit.. */
-	if (!vm_enough_memory(grow)) {
+	if (security_vm_enough_memory(grow)) {
 		spin_unlock(&vma->vm_mm->page_table_lock);
 		return -ENOMEM;
 	}
@@ -1004,7 +949,7 @@
 	grow = (vma->vm_start - address) >> PAGE_SHIFT;
 
 	/* Overcommit.. */
-	if (!vm_enough_memory(grow)) {
+	if (security_vm_enough_memory(grow)) {
 		spin_unlock(&vma->vm_mm->page_table_lock);
 		return -ENOMEM;
 	}
@@ -1376,7 +1321,7 @@
 	if (mm->map_count > MAX_MAP_COUNT)
 		return -ENOMEM;
 
-	if (!vm_enough_memory(len >> PAGE_SHIFT))
+	if (security_vm_enough_memory(len >> PAGE_SHIFT))
 		return -ENOMEM;
 
 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
diff -Nru a/mm/mprotect.c b/mm/mprotect.c
--- a/mm/mprotect.c	Thu Jul  3 01:12:43 2003
+++ b/mm/mprotect.c	Thu Jul  3 01:12:43 2003
@@ -175,7 +175,7 @@
 	if (newflags & VM_WRITE) {
 		if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
 			charged = (end - start) >> PAGE_SHIFT;
-			if (!vm_enough_memory(charged))
+			if (security_vm_enough_memory(charged))
 				return -ENOMEM;
 			newflags |= VM_ACCOUNT;
 		}
diff -Nru a/mm/mremap.c b/mm/mremap.c
--- a/mm/mremap.c	Thu Jul  3 01:12:43 2003
+++ b/mm/mremap.c	Thu Jul  3 01:12:43 2003
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/rmap-locking.h>
+#include <linux/security.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -244,9 +245,7 @@
 	}
 
 	if (!move_page_tables(vma, new_addr, addr, old_len)) {
-		unsigned long must_fault_in;
-		unsigned long fault_in_start;
-		unsigned long fault_in_end;
+		unsigned long vm_locked = vma->vm_flags & VM_LOCKED;
 
 		if (allocated_vma) {
 			*new_vma = *vma;
@@ -272,14 +271,8 @@
 		} else
 			vma = NULL;		/* nothing more to do */
 
-		must_fault_in = new_vma->vm_flags & VM_LOCKED;
-		fault_in_start = new_vma->vm_start;
-		fault_in_end = new_vma->vm_end;
-
 		do_munmap(current->mm, addr, old_len);
 
-		/* new_vma could have been invalidated by do_munmap */
-
 		/* Restore VM_ACCOUNT if one or two pieces of vma left */
 		if (vma) {
 			vma->vm_flags |= VM_ACCOUNT;
@@ -288,9 +281,11 @@
 		}
 
 		current->mm->total_vm += new_len >> PAGE_SHIFT;
-		if (must_fault_in) {
+		if (vm_locked) {
 			current->mm->locked_vm += new_len >> PAGE_SHIFT;
-			make_pages_present(fault_in_start, fault_in_end);
+			if (new_len > old_len)
+				make_pages_present(new_addr + old_len,
+						   new_addr + new_len);
 		}
 		return new_addr;
 	}
@@ -391,7 +386,7 @@
 
 	if (vma->vm_flags & VM_ACCOUNT) {
 		charged = (new_len - old_len) >> PAGE_SHIFT;
-		if (!vm_enough_memory(charged))
+		if (security_vm_enough_memory(charged))
 			goto out_nc;
 	}
 
diff -Nru a/mm/nommu.c b/mm/nommu.c
--- a/mm/nommu.c	Thu Jul  3 01:12:43 2003
+++ b/mm/nommu.c	Thu Jul  3 01:12:43 2003
@@ -62,11 +62,8 @@
 	inode->i_size = offset;
 
 out_truncate:
-	if (inode->i_op && inode->i_op->truncate) {
-		lock_kernel();
+	if (inode->i_op && inode->i_op->truncate)
 		inode->i_op->truncate(inode);
-		unlock_kernel();
-	}
 	return 0;
 out_sig:
 	send_sig(SIGXFSZ, current, 0);
diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c	Thu Jul  3 01:12:43 2003
+++ b/mm/page_alloc.c	Thu Jul  3 01:12:43 2003
@@ -32,6 +32,8 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 
+#include <asm/tlbflush.h>
+
 DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
 DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS);
 struct pglist_data *pgdat_list;
@@ -41,6 +43,9 @@
 int numnodes = 1;
 int sysctl_lower_zone_protection = 0;
 
+EXPORT_SYMBOL(totalram_pages);
+EXPORT_SYMBOL(nr_swap_pages);
+
 /*
  * Used by page_zone() to look up the address of the struct zone whose
  * id is encoded in the upper bits of page->flags
@@ -265,6 +270,7 @@
 	mod_page_state(pgfree, 1 << order);
 	free_pages_check(__FUNCTION__, page);
 	list_add(&page->list, &list);
+	kernel_map_pages(page, 1<<order, 0);
 	free_pages_bulk(page_zone(page), 1, &list, order);
 }
 
@@ -440,6 +446,7 @@
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
 
+	kernel_map_pages(page, 1, 0);
 	inc_page_state(pgfree);
 	free_pages_check(__FUNCTION__, page);
 	pcp = &zone->pageset[get_cpu()].pcp[cold];
@@ -556,7 +563,7 @@
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
 			if (page)
-				return page;
+		       		goto got_pg;
 		}
 		min += z->pages_low * sysctl_lower_zone_protection;
 	}
@@ -579,7 +586,7 @@
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
 			if (page)
-				return page;
+				goto got_pg;
 		}
 		min += local_min * sysctl_lower_zone_protection;
 	}
@@ -594,7 +601,7 @@
 
 			page = buffered_rmqueue(z, order, cold);
 			if (page)
-				return page;
+				goto got_pg;
 		}
 		goto nopage;
 	}
@@ -622,7 +629,7 @@
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
 			if (page)
-				return page;
+				goto got_pg;
 		}
 		min += z->pages_low * sysctl_lower_zone_protection;
 	}
@@ -653,6 +660,9 @@
 			current->comm, order, gfp_mask);
 	}
 	return NULL;
+got_pg:
+	kernel_map_pages(page, 1 << order, 1);
+	return page;
 }
 
 /*
@@ -726,6 +736,7 @@
 
 	return sum;
 }
+EXPORT_SYMBOL(nr_free_pages);
 
 unsigned int nr_used_zone_pages(void)
 {
@@ -818,6 +829,7 @@
 EXPORT_PER_CPU_SYMBOL(page_states);
 
 atomic_t nr_pagecache = ATOMIC_INIT(0);
+EXPORT_SYMBOL(nr_pagecache);
 #ifdef CONFIG_SMP
 DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
 #endif
@@ -896,7 +908,7 @@
 {
 	pg_data_t *pgdat = NODE_DATA(nid);
 
-	val->totalram = pgdat->node_size;
+	val->totalram = pgdat->node_present_pages;
 	val->freeram = nr_free_pages_pgdat(pgdat);
 	val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
 	val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
@@ -1131,12 +1143,13 @@
 
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		totalpages += zones_size[i];
-	pgdat->node_size = totalpages;
+	pgdat->node_spanned_pages = totalpages;
 
 	realtotalpages = totalpages;
 	if (zholes_size)
 		for (i = 0; i < MAX_NR_ZONES; i++)
 			realtotalpages -= zholes_size[i];
+	pgdat->node_present_pages = realtotalpages;
 	printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
 }
 
@@ -1342,7 +1355,7 @@
 	pgdat->node_start_pfn = node_start_pfn;
 	calculate_zone_totalpages(pgdat, zones_size, zholes_size);
 	if (!node_mem_map) {
-		size = (pgdat->node_size + 1) * sizeof(struct page); 
+		size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
 		node_mem_map = alloc_bootmem_node(pgdat, size);
 	}
 	pgdat->node_mem_map = node_mem_map;
diff -Nru a/mm/shmem.c b/mm/shmem.c
--- a/mm/shmem.c	Thu Jul  3 01:12:43 2003
+++ b/mm/shmem.c	Thu Jul  3 01:12:43 2003
@@ -36,6 +36,7 @@
 #include <linux/writeback.h>
 #include <linux/vfs.h>
 #include <linux/blkdev.h>
+#include <linux/security.h>
 #include <asm/uaccess.h>
 #include <asm/div64.h>
 
@@ -507,7 +508,7 @@
 	 	 */
 		change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size);
 		if (change > 0) {
-			if (!vm_enough_memory(change))
+			if (security_vm_enough_memory(change))
 				return -ENOMEM;
 		} else if (attr->ia_size < inode->i_size) {
 			vm_unacct_memory(-change);
@@ -1139,7 +1140,7 @@
 	maxpos = inode->i_size;
 	if (maxpos < pos + count) {
 		maxpos = pos + count;
-		if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) {
+		if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) {
 			err = -ENOMEM;
 			goto out;
 		}
@@ -1493,7 +1494,7 @@
 		memcpy(info, symname, len);
 		inode->i_op = &shmem_symlink_inline_operations;
 	} else {
-		if (!vm_enough_memory(VM_ACCT(1))) {
+		if (security_vm_enough_memory(VM_ACCT(1))) {
 			iput(inode);
 			return -ENOMEM;
 		}
@@ -1887,7 +1888,7 @@
 	if (size > SHMEM_MAX_BYTES)
 		return ERR_PTR(-EINVAL);
 
-	if ((flags & VM_ACCOUNT) && !vm_enough_memory(VM_ACCT(size)))
+	if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size)))
 		return ERR_PTR(-ENOMEM);
 
 	error = -ENOMEM;
diff -Nru a/mm/slab.c b/mm/slab.c
--- a/mm/slab.c	Thu Jul  3 01:12:43 2003
+++ b/mm/slab.c	Thu Jul  3 01:12:43 2003
@@ -89,7 +89,12 @@
 #include	<linux/notifier.h>
 #include	<linux/kallsyms.h>
 #include	<linux/cpu.h>
+#include	<linux/sysctl.h>
+#include	<linux/module.h>
+
 #include	<asm/uaccess.h>
+#include	<asm/cacheflush.h>
+#include	<asm/tlbflush.h>
 
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
@@ -351,6 +356,34 @@
 #define POISON_AFTER	0x6b	/* for use-after-free poisoning */
 #define	POISON_END	0xa5	/* end-byte of poisoning */
 
+static inline int obj_dbghead(kmem_cache_t *cachep)
+{
+	if (cachep->flags & SLAB_RED_ZONE)
+		return BYTES_PER_WORD;
+	return 0;
+}
+
+static inline int obj_dbglen(kmem_cache_t *cachep)
+{
+	int len = 0;
+
+	if (cachep->flags & SLAB_RED_ZONE) {
+		len += 2*BYTES_PER_WORD;
+	}
+	if (cachep->flags & SLAB_STORE_USER) {
+		len += BYTES_PER_WORD;
+	}
+	return len;
+}
+#else
+static inline int obj_dbghead(kmem_cache_t *cachep)
+{
+	return 0;
+}
+static inline int obj_dbglen(kmem_cache_t *cachep)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -430,6 +463,7 @@
  * SLAB_RECLAIM_ACCOUNT turns this on per-slab
  */
 atomic_t slab_reclaim_pages;
+EXPORT_SYMBOL(slab_reclaim_pages);
 
 /*
  * chicken and egg problem: delay the per-cpu array allocation
@@ -765,16 +799,45 @@
 }
 
 #if DEBUG
-static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val)
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller)
 {
-	int size = cachep->objsize;
-	if (cachep->flags & SLAB_RED_ZONE) {
-		addr += BYTES_PER_WORD;
-		size -= 2*BYTES_PER_WORD;
-	}
-	if (cachep->flags & SLAB_STORE_USER) {
-		size -= BYTES_PER_WORD;
+	int size = cachep->objsize-obj_dbglen(cachep);
+
+	addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)];
+
+	if (size < 5*sizeof(unsigned long))
+		return;
+
+	*addr++=0x12345678;
+	*addr++=caller;
+	*addr++=smp_processor_id();
+	size -= 3*sizeof(unsigned long);
+	{
+		unsigned long *sptr = &caller;
+		unsigned long svalue;
+
+		while (((long) sptr & (THREAD_SIZE-1)) != 0) {
+			svalue = *sptr++;
+			if (kernel_text_address(svalue)) {
+				*addr++=svalue;
+				size -= sizeof(unsigned long);
+				if (size <= sizeof(unsigned long))
+					break;
+			}
+		}
+
 	}
+	*addr++=0x87654321;
+}
+#endif
+
+static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val)
+{
+	int size = cachep->objsize-obj_dbglen(cachep);
+	addr = &((char*)addr)[obj_dbghead(cachep)];
+
 	memset(addr, val, size);
 	*(unsigned char *)(addr+size-1) = POISON_END;
 }
@@ -796,15 +859,11 @@
 
 static void check_poison_obj(kmem_cache_t *cachep, void *addr)
 {
-	int size = cachep->objsize;
 	void *end;
-	if (cachep->flags & SLAB_RED_ZONE) {
-		addr += BYTES_PER_WORD;
-		size -= 2*BYTES_PER_WORD;
-	}
-	if (cachep->flags & SLAB_STORE_USER) {
-		size -= BYTES_PER_WORD;
-	}
+	int size = cachep->objsize-obj_dbglen(cachep);
+
+	addr = &((char*)addr)[obj_dbghead(cachep)];
+
 	end = scan_poisoned_obj(addr, size);
 	if (end) {
 		int s;
@@ -858,8 +917,16 @@
 		void *objp = slabp->s_mem + cachep->objsize * i;
 		int objlen = cachep->objsize;
 
-		if (cachep->flags & SLAB_POISON)
+		if (cachep->flags & SLAB_POISON) {
+#ifdef CONFIG_DEBUG_PAGEALLOC
+			if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
+				kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1);
+			else
+				check_poison_obj(cachep, objp);
+#else
 			check_poison_obj(cachep, objp);
+#endif
+		}
 		if (cachep->flags & SLAB_STORE_USER)
 			objlen -= BYTES_PER_WORD;
 
@@ -952,6 +1019,10 @@
 	}
 
 #if FORCED_DEBUG
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128)
+		size = PAGE_SIZE-3*BYTES_PER_WORD;
+#endif
 	/*
 	 * Enable redzoning and last user accounting, except
 	 * - for caches with forced alignment: redzoning would violate the
@@ -1404,6 +1475,8 @@
 				slab_error(cachep, "constructor overwrote the"
 							" start of an object");
 		}
+		if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
+	       		kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0);
 #else
 		if (cachep->ctor)
 			cachep->ctor(objp, cachep, ctor_flags);
@@ -1584,25 +1657,28 @@
 		 * caller can perform a verify of its state (debugging).
 		 * Called without the cache-lock held.
 		 */
-		if (cachep->flags & SLAB_RED_ZONE) {
-			cachep->ctor(objp+BYTES_PER_WORD,
+		cachep->ctor(objp+obj_dbghead(cachep),
 					cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
-		} else {
-			cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
-		}
 	}
 	if (cachep->flags & SLAB_POISON && cachep->dtor) {
 		/* we want to cache poison the object,
 		 * call the destruction callback
 		 */
-		if (cachep->flags & SLAB_RED_ZONE)
-			cachep->dtor(objp+BYTES_PER_WORD, cachep, 0);
-		else
-			cachep->dtor(objp, cachep, 0);
+		cachep->dtor(objp+obj_dbghead(cachep), cachep, 0);
 	}
-	if (cachep->flags & SLAB_POISON)
+	if (cachep->flags & SLAB_POISON) {
+#ifdef CONFIG_DEBUG_PAGEALLOC
+		if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
+			store_stackinfo(cachep, objp, POISON_AFTER);
+	       		kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0);
+		} else {
+			poison_obj(cachep, objp, POISON_AFTER);
+		}
+#else
 		poison_obj(cachep, objp, POISON_AFTER);
 #endif
+	}
+#endif
 	return objp;
 }
 
@@ -1617,6 +1693,7 @@
 	for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
 		entries++;
 		BUG_ON(entries > cachep->num);
+		BUG_ON(i < 0 || i >= cachep->num);
 	}
 	BUG_ON(entries != cachep->num - slabp->inuse);
 #endif
@@ -1746,9 +1823,16 @@
 
 	if (!objp)	
 		return objp;
-	if (cachep->flags & SLAB_POISON) {
+ 	if (cachep->flags & SLAB_POISON) {
+#ifdef CONFIG_DEBUG_PAGEALLOC
+		if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
+			kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1);
+		else
+			check_poison_obj(cachep, objp);
+#else
 		check_poison_obj(cachep, objp);
- 		poison_obj(cachep, objp, POISON_BEFORE);
+#endif
+		poison_obj(cachep, objp, POISON_BEFORE);
 	}
 	if (cachep->flags & SLAB_STORE_USER) {
 		objlen -= BYTES_PER_WORD;
@@ -2085,16 +2169,7 @@
 
 unsigned int kmem_cache_size(kmem_cache_t *cachep)
 {
-	unsigned int objlen = cachep->objsize;
-
-#if DEBUG
-	if (cachep->flags & SLAB_RED_ZONE)
-		objlen -= 2*BYTES_PER_WORD;
-	if (cachep->flags & SLAB_STORE_USER)
-		objlen -= BYTES_PER_WORD;
-#endif
-
-	return objlen;
+	return cachep->objsize-obj_dbglen(cachep);
 }
 
 kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
@@ -2626,3 +2701,70 @@
 	return size;
 }
 
+void ptrinfo(unsigned long addr)
+{
+	struct page *page;
+
+	printk("Dumping data about address %p.\n", (void*)addr);
+	if (!virt_addr_valid((void*)addr)) {
+		printk("virt addr invalid.\n");
+		return;
+	}
+	do {
+		pgd_t *pgd = pgd_offset_k(addr);
+		pmd_t *pmd;
+		if (pgd_none(*pgd)) {
+			printk("No pgd.\n");
+			break;
+		}
+		pmd = pmd_offset(pgd, addr);
+		if (pmd_none(*pmd)) {
+			printk("No pmd.\n");
+			break;
+		}
+#ifdef CONFIG_X86
+		if (pmd_large(*pmd)) {
+			printk("Large page.\n");
+			break;
+		}
+#endif
+		printk("normal page, pte_val 0x%llx\n",
+		  (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr)));
+	} while(0);
+
+	page = virt_to_page((void*)addr);
+	printk("struct page at %p, flags %lxh.\n", page, page->flags);
+	if (PageSlab(page)) {
+		kmem_cache_t *c;
+		struct slab *s;
+		unsigned long flags;
+		int objnr;
+		void *objp;
+
+		c = GET_PAGE_CACHE(page);
+		printk("belongs to cache %s.\n",c->name);
+
+		spin_lock_irqsave(&c->spinlock, flags);
+		s = GET_PAGE_SLAB(page);
+		printk("slabp %p with %d inuse objects (from %d).\n",
+			s, s->inuse, c->num);
+		check_slabp(c,s);
+
+		objnr = (addr-(unsigned long)s->s_mem)/c->objsize;
+		objp = s->s_mem+c->objsize*objnr;
+		printk("points into object no %d, starting at %p, len %d.\n",
+			objnr, objp, c->objsize);
+		if (objnr >= c->num) {
+			printk("Bad obj number.\n");
+		} else {
+			kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1);
+
+			printk("redzone: %lxh/%lxh/%lxh.\n",
+				((unsigned long*)objp)[0],
+				((unsigned long*)(objp+c->objsize))[-2],
+				((unsigned long*)(objp+c->objsize))[-1]);
+		}
+		spin_unlock_irqrestore(&c->spinlock, flags);
+
+	}
+}
diff -Nru a/mm/swap.c b/mm/swap.c
--- a/mm/swap.c	Thu Jul  3 01:12:43 2003
+++ b/mm/swap.c	Thu Jul  3 01:12:43 2003
@@ -20,6 +20,7 @@
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/init.h>
+#include <linux/module.h>
 #include <linux/mm_inline.h>
 #include <linux/buffer_head.h>	/* for try_to_release_page() */
 #include <linux/percpu.h>
@@ -370,6 +371,7 @@
 	}
 	preempt_enable();
 }
+EXPORT_SYMBOL(vm_acct_memory);
 #endif
 
 
diff -Nru a/mm/swapfile.c b/mm/swapfile.c
--- a/mm/swapfile.c	Thu Jul  3 01:12:43 2003
+++ b/mm/swapfile.c	Thu Jul  3 01:12:43 2003
@@ -20,7 +20,9 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/module.h>
 #include <linux/rmap-locking.h>
+#include <linux/security.h>
 
 #include <asm/pgtable.h>
 #include <linux/swapops.h>
@@ -30,6 +32,8 @@
 int total_swap_pages;
 static int swap_overflow;
 
+EXPORT_SYMBOL(total_swap_pages);
+
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
@@ -1042,7 +1046,7 @@
 		swap_list_unlock();
 		goto out_dput;
 	}
-	if (vm_enough_memory(p->pages))
+	if (!security_vm_enough_memory(p->pages))
 		vm_unacct_memory(p->pages);
 	else {
 		err = -ENOMEM;
diff -Nru a/security/capability.c b/security/capability.c
--- a/security/capability.c	Thu Jul  3 01:12:43 2003
+++ b/security/capability.c	Thu Jul  3 01:12:43 2003
@@ -15,6 +15,9 @@
 #include <linux/security.h>
 #include <linux/file.h>
 #include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
 #include <linux/smp_lock.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
@@ -275,6 +278,65 @@
 	return 0;
 }
 
+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-acounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ */
+int cap_vm_enough_memory(long pages)
+{
+	unsigned long free, allowed;
+
+	vm_acct_memory(pages);
+
+        /*
+	 * Sometimes we want to use more memory than we have
+	 */
+	if (sysctl_overcommit_memory == 1)
+		return 0;
+
+	if (sysctl_overcommit_memory == 0) {
+		free = get_page_cache_size();
+		free += nr_free_pages();
+		free += nr_swap_pages;
+
+		/*
+		 * Any slabs which are created with the
+		 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+		 * which are reclaimable, under pressure.  The dentry
+		 * cache and most inode caches should fall into this
+		 */
+		free += atomic_read(&slab_reclaim_pages);
+
+		/*
+		 * Leave the last 3% for root
+		 */
+		if (!capable(CAP_SYS_ADMIN))
+			free -= free / 32;
+
+		if (free > pages)
+			return 0;
+		vm_unacct_memory(pages);
+		return -ENOMEM;
+	}
+
+	allowed = totalram_pages * sysctl_overcommit_ratio / 100;
+	allowed += total_swap_pages;
+
+	if (atomic_read(&vm_committed_space) < allowed)
+		return 0;
+
+	vm_unacct_memory(pages);
+
+	return -ENOMEM;
+}
+
 EXPORT_SYMBOL(cap_capable);
 EXPORT_SYMBOL(cap_ptrace);
 EXPORT_SYMBOL(cap_capget);
@@ -286,6 +348,7 @@
 EXPORT_SYMBOL(cap_task_post_setuid);
 EXPORT_SYMBOL(cap_task_reparent_to_init);
 EXPORT_SYMBOL(cap_syslog);
+EXPORT_SYMBOL(cap_vm_enough_memory);
 
 #ifdef CONFIG_SECURITY
 
@@ -307,6 +370,8 @@
 	.task_reparent_to_init =	cap_task_reparent_to_init,
 
 	.syslog =                       cap_syslog,
+
+	.vm_enough_memory =             cap_vm_enough_memory,
 };
 
 #if defined(CONFIG_SECURITY_CAPABILITIES_MODULE)
diff -Nru a/security/dummy.c b/security/dummy.c
--- a/security/dummy.c	Thu Jul  3 01:12:43 2003
+++ b/security/dummy.c	Thu Jul  3 01:12:43 2003
@@ -17,6 +17,9 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/mman.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
 #include <linux/security.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
@@ -97,6 +100,54 @@
 	return 0;
 }
 
+static int dummy_vm_enough_memory(long pages)
+{
+	unsigned long free, allowed;
+
+	vm_acct_memory(pages);
+
+        /*
+	 * Sometimes we want to use more memory than we have
+	 */
+	if (sysctl_overcommit_memory == 1)
+		return 0;
+
+	if (sysctl_overcommit_memory == 0) {
+		free = get_page_cache_size();
+		free += nr_free_pages();
+		free += nr_swap_pages;
+
+		/*
+		 * Any slabs which are created with the
+		 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+		 * which are reclaimable, under pressure.  The dentry
+		 * cache and most inode caches should fall into this
+		 */
+		free += atomic_read(&slab_reclaim_pages);
+
+		/*
+		 * Leave the last 3% for root
+		 */
+		if (current->euid)
+			free -= free / 32;
+
+		if (free > pages)
+			return 0;
+		vm_unacct_memory(pages);
+		return -ENOMEM;
+	}
+
+	allowed = totalram_pages * sysctl_overcommit_ratio / 100;
+	allowed += total_swap_pages;
+
+	if (atomic_read(&vm_committed_space) < allowed)
+		return 0;
+
+	vm_unacct_memory(pages);
+
+	return -ENOMEM;
+}
+
 static int dummy_bprm_alloc_security (struct linux_binprm *bprm)
 {
 	return 0;
@@ -793,6 +844,7 @@
 	set_to_dummy_if_null(ops, quota_on);
 	set_to_dummy_if_null(ops, sysctl);
 	set_to_dummy_if_null(ops, syslog);
+	set_to_dummy_if_null(ops, vm_enough_memory);
 	set_to_dummy_if_null(ops, bprm_alloc_security);
 	set_to_dummy_if_null(ops, bprm_free_security);
 	set_to_dummy_if_null(ops, bprm_compute_creds);
