
[ppc64] NVRAM error logging/buffering patch, from Jake Moilanen

This is a port of the nvram buffering/error logging code from 2.4
to 2.6.  It includes moving /proc/rtas to /proc/ppc64/rtas and making
/proc/rtas a symlink to /proc/ppc64/rtas.  It also splits up the
/dev/nvram device read/write functions from the basic nvram access
functions, and adds ppc_md fields for the nvram access functions.


---

 arch/ppc64/Kconfig             |   14 
 arch/ppc64/kernel/Makefile     |    2 
 arch/ppc64/kernel/chrp_setup.c |    6 
 arch/ppc64/kernel/nvram.c      |  681 ++++++++++++++++++++++++++++++++++++++---
 arch/ppc64/kernel/ppc_ksyms.c  |    2 
 arch/ppc64/kernel/proc_pmc.c   |   17 -
 arch/ppc64/kernel/proc_ppc64.c |   19 -
 arch/ppc64/kernel/ras.c        |   17 -
 arch/ppc64/kernel/rtas-proc.c  |   22 -
 arch/ppc64/kernel/rtas.c       |    5 
 arch/ppc64/kernel/rtasd.c      |  262 ++++++++++++---
 arch/ppc64/kernel/scanlog.c    |   13 
 include/asm-ppc64/machdep.h    |   12 
 include/asm-ppc64/nvram.h      |   39 ++
 include/asm-ppc64/proc_fs.h    |    1 
 include/asm-ppc64/rtas.h       |   36 +-
 16 files changed, 1013 insertions(+), 135 deletions(-)

diff -puN arch/ppc64/Kconfig~ppc64-nvram_rewrite arch/ppc64/Kconfig
--- 25/arch/ppc64/Kconfig~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/Kconfig	2004-01-13 23:22:49.000000000 -0800
@@ -130,18 +130,18 @@ config MSCHUNKS
 	depends on PPC_ISERIES
 	default y
 
-config RTAS_FLASH
-	tristate "Firmware flash interface"
-	depends on !PPC_ISERIES
-
-config SCANLOG
-	tristate "Scanlog dump interface"
-	depends on !PPC_ISERIES
 
 config PPC_RTAS
 	bool "Proc interface to RTAS"
 	depends on !PPC_ISERIES
 
+config RTAS_FLASH
+	tristate "Firmware flash interface"
+	depends on PPC_RTAS
+
+config SCANLOG
+	tristate "Scanlog dump interface"
+	depends on PPC_RTAS
 endmenu
 
 
diff -puN arch/ppc64/kernel/Makefile~ppc64-nvram_rewrite arch/ppc64/kernel/Makefile
--- 25/arch/ppc64/kernel/Makefile~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/Makefile	2004-01-13 23:22:49.000000000 -0800
@@ -20,7 +20,7 @@ obj-$(CONFIG_PPC_ISERIES) += iSeries_pci
 			     mf.o HvLpEvent.o iSeries_proc.o 
 
 obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \
-			     eeh.o rtasd.o nvram.o ras.o
+			     eeh.o nvram.o rtasd.o ras.o
 
 # Change this to pSeries only once we've got iSeries up to date
 obj-y			  += open_pic.o xics.o pSeries_htab.o rtas.o \
diff -puN arch/ppc64/kernel/chrp_setup.c~ppc64-nvram_rewrite arch/ppc64/kernel/chrp_setup.c
--- 25/arch/ppc64/kernel/chrp_setup.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/chrp_setup.c	2004-01-13 23:22:49.000000000 -0800
@@ -57,6 +57,7 @@
 #include <asm/irq.h>
 #include <asm/naca.h>
 #include <asm/time.h>
+#include <asm/nvram.h>
 
 #include "i8259.h"
 #include "open_pic.h"
@@ -271,7 +272,10 @@ chrp_init(unsigned long r3, unsigned lon
 	ppc_md.set_rtc_time   = pSeries_set_rtc_time;
 	ppc_md.calibrate_decr = pSeries_calibrate_decr;
 
-	ppc_md.progress = chrp_progress;
+	ppc_md.progress       = chrp_progress;
+
+	ppc_md.nvram_read     = pSeries_nvram_read;
+	ppc_md.nvram_write    = pSeries_nvram_write;
 
         /* Build up the firmware_features bitmask field
          * using contents of device-tree/ibm,hypertas-functions.
diff -puN arch/ppc64/kernel/nvram.c~ppc64-nvram_rewrite arch/ppc64/kernel/nvram.c
--- 25/arch/ppc64/kernel/nvram.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/nvram.c	2004-01-13 23:22:49.000000000 -0800
@@ -20,23 +20,50 @@
 #include <linux/fcntl.h>
 #include <linux/nvram.h>
 #include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <asm/uaccess.h>
 #include <asm/nvram.h>
 #include <asm/rtas.h>
 #include <asm/prom.h>
+#include <asm/machdep.h>
 
-static unsigned int rtas_nvram_size;
+#define DEBUG_NVRAM
+
+static int nvram_scan_partitions(void);
+static int nvram_setup_partition(void);
+static int nvram_create_os_partition(void);
+static int nvram_remove_os_partition(void);
+static unsigned char nvram_checksum(struct nvram_header *p);
+static int nvram_write_header(struct nvram_partition * part);
+
+static unsigned int nvram_size;
 static unsigned int nvram_fetch, nvram_store;
-static char nvram_buf[4];	/* assume this is in the first 4GB */
+static char nvram_buf[NVRW_CNT];	/* assume this is in the first 4GB */
+static struct nvram_partition * nvram_part;
+static long nvram_error_log_index = -1;
+static long nvram_error_log_size = 0;
+static spinlock_t nvram_lock = SPIN_LOCK_UNLOCKED;
+
+volatile int no_more_logging = 1; /* Until we initialize everything,
+				   * make sure we don't try logging
+				   * anything */
+
+extern volatile int error_log_cnt;
+
+struct err_log_info {
+	int error_type;
+	unsigned int seq_num;
+};
 
-static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
+static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
 {
 	switch (origin) {
 	case 1:
 		offset += file->f_pos;
 		break;
 	case 2:
-		offset += rtas_nvram_size;
+		offset += nvram_size;
 		break;
 	}
 	if (offset < 0)
@@ -46,53 +73,76 @@ static loff_t nvram_llseek(struct file *
 }
 
 
-static ssize_t read_nvram(struct file *file, char *buf,
+static ssize_t dev_nvram_read(struct file *file, char *buf,
 			  size_t count, loff_t *ppos)
 {
-	unsigned int i;
-	unsigned long len;
-	char *p = buf;
+	ssize_t len;
+	char *tmp_buffer;
 
 	if (verify_area(VERIFY_WRITE, buf, count))
 		return -EFAULT;
-	if (*ppos >= rtas_nvram_size)
+	if (*ppos >= nvram_size)
 		return 0;
-	for (i = *ppos; count > 0 && i < rtas_nvram_size; ++i, ++p, --count) {
-		if ((rtas_call(nvram_fetch, 3, 2, &len, i, __pa(nvram_buf), 1) != 0) ||
-		    len != 1)
-			return -EIO;
-		if (__put_user(nvram_buf[0], p))
-			return -EFAULT;
+	if (count > nvram_size) 
+		count = nvram_size;
+
+	tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
+	if (!tmp_buffer) {
+		printk(KERN_ERR "dev_read_nvram: kmalloc failed\n");
+		return -ENOMEM;
 	}
-	*ppos = i;
-	return p - buf;
+
+	len = ppc_md.nvram_read(tmp_buffer, count, ppos);
+	if ((long)len <= 0) {
+		kfree(tmp_buffer);
+		return len;
+	}
+
+	if (copy_to_user(buf, tmp_buffer, len)) {
+		kfree(tmp_buffer);
+		return -EFAULT;
+	}
+
+	kfree(tmp_buffer);
+	return len;
+
 }
 
-static ssize_t write_nvram(struct file *file, const char *buf,
+static ssize_t dev_nvram_write(struct file *file, const char *buf,
 			   size_t count, loff_t *ppos)
 {
-	unsigned int i;
-	unsigned long len;
-	const char *p = buf;
-	char c;
+	ssize_t len;
+	char * tmp_buffer;
 
 	if (verify_area(VERIFY_READ, buf, count))
 		return -EFAULT;
-	if (*ppos >= rtas_nvram_size)
+	if (*ppos >= nvram_size)
 		return 0;
-	for (i = *ppos; count > 0 && i < rtas_nvram_size; ++i, ++p, --count) {
-		if (__get_user(c, p))
-			return -EFAULT;
-		nvram_buf[0] = c;
-		if ((rtas_call(nvram_store, 3, 2, &len, i, __pa(nvram_buf), 1) != 0) ||
-		    len != 1)
-			return -EIO;
+	if (count > nvram_size)
+		count = nvram_size;
+
+	tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
+	if (!tmp_buffer) {
+		printk(KERN_ERR "dev_nvram_write: kmalloc failed\n");
+		return -ENOMEM;
 	}
-	*ppos = i;
-	return p - buf;
+	
+	if (copy_from_user(tmp_buffer, buf, count)) {
+		kfree(tmp_buffer);
+		return -EFAULT;
+	}
+
+	len = ppc_md.nvram_write(tmp_buffer, count, ppos);
+	if ((long)len <= 0) {
+		kfree(tmp_buffer);
+		return len;
+	}
+
+	kfree(tmp_buffer);
+	return len;
 }
 
-static int nvram_ioctl(struct inode *inode, struct file *file,
+static int dev_nvram_ioctl(struct inode *inode, struct file *file,
 	unsigned int cmd, unsigned long arg)
 {
 	return -EINVAL;
@@ -100,10 +150,10 @@ static int nvram_ioctl(struct inode *ino
 
 struct file_operations nvram_fops = {
 	.owner =	THIS_MODULE,
-	.llseek =	nvram_llseek,
-	.read =		read_nvram,
-	.write =	write_nvram,
-	.ioctl =	nvram_ioctl,
+	.llseek =	dev_nvram_llseek,
+	.read =		dev_nvram_read,
+	.write =	dev_nvram_write,
+	.ioctl =	dev_nvram_ioctl,
 };
 
 static struct miscdevice nvram_dev = {
@@ -112,22 +162,135 @@ static struct miscdevice nvram_dev = {
 	&nvram_fops
 };
 
+ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len, done;
+	unsigned long flags;
+	char *p = buf;
+
+	if (*index >= nvram_size)
+		return 0;
+
+	i = *index;
+	if (i + count > nvram_size)
+		count = nvram_size - i;
+
+	spin_lock_irqsave(&nvram_lock, flags);
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > NVRW_CNT)
+			len = NVRW_CNT;
+		
+		if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
+			       len) != 0) || len != done) {
+			spin_unlock_irqrestore(&nvram_lock, flags);
+			return -EIO;
+		}
+		
+		memcpy(p, nvram_buf, len);
+
+		p += len;
+		i += len;
+	}
+
+	spin_unlock_irqrestore(&nvram_lock, flags);
+	
+	*index = i;
+	return p - buf;
+}
+
+ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len, done;
+	unsigned long flags;
+	const char *p = buf;
+
+	if (*index >= nvram_size)
+		return 0;
+
+	i = *index;
+	if (i + count > nvram_size)
+		count = nvram_size - i;
+
+	spin_lock_irqsave(&nvram_lock, flags);
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > NVRW_CNT)
+			len = NVRW_CNT;
+
+		memcpy(nvram_buf, p, len);
+
+		if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
+			       len) != 0) || len != done) {
+			spin_unlock_irqrestore(&nvram_lock, flags);
+			return -EIO;
+		}
+		
+		p += len;
+		i += len;
+	}
+	spin_unlock_irqrestore(&nvram_lock, flags);
+	
+	*index = i;
+	return p - buf;
+}
+ 
 int __init nvram_init(void)
 {
 	struct device_node *nvram;
 	unsigned int *nbytes_p, proplen;
+	int error;
+	int rc;
+	
 	if ((nvram = of_find_node_by_type(NULL, "nvram")) != NULL) {
 		nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
 		if (nbytes_p && proplen == sizeof(unsigned int)) {
-			rtas_nvram_size = *nbytes_p;
+			nvram_size = *nbytes_p;
+		} else {
+			return -EIO;
 		}
 	}
 	nvram_fetch = rtas_token("nvram-fetch");
 	nvram_store = rtas_token("nvram-store");
-	printk(KERN_INFO "PPC64 nvram contains %d bytes\n", rtas_nvram_size);
+	printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
 	of_node_put(nvram);
 
-	return misc_register(&nvram_dev);
+  	rc = misc_register(&nvram_dev);
+  
+  	/* If we don't know how big NVRAM is then we shouldn't touch
+  	   the nvram partitions */
+  	if (nvram == NULL) {
+  		return rc;
+  	}
+  	
+  	/* initialize our anchor for the nvram partition list */
+  	nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+  	if (!nvram_part) {
+  		printk(KERN_ERR "nvram_init: Failed kmalloc\n");
+  		return -ENOMEM;
+  	}
+  	INIT_LIST_HEAD(&nvram_part->partition);
+  
+  	/* Get all the NVRAM partitions */
+  	error = nvram_scan_partitions();
+  	if (error) {
+  		printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n");
+  		return error;
+  	}
+  		
+  	if(nvram_setup_partition()) 
+  		printk(KERN_WARNING "nvram_init: Could not find nvram partition"
+  		       " for nvram buffered error logging.\n");
+  
+#ifdef DEBUG_NVRAM
+	nvram_print_partitions("NVRAM Partitions");
+#endif
+
+  	return rc;
 }
 
 void __exit nvram_cleanup(void)
@@ -135,6 +298,444 @@ void __exit nvram_cleanup(void)
         misc_deregister( &nvram_dev );
 }
 
+static int nvram_scan_partitions(void)
+{
+	loff_t cur_index = 0;
+	struct nvram_header phead;
+	struct nvram_partition * tmp_part;
+	unsigned char c_sum;
+	char * header;
+	long size;
+	
+	header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
+	if (!header) {
+		printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
+		return -ENOMEM;
+	}
+
+	while (cur_index < nvram_size) {
+
+		size = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index);
+		if (size != NVRAM_HEADER_LEN) {
+			printk(KERN_ERR "nvram_scan_partitions: Error parsing "
+			       "nvram partitions\n");
+			kfree(header);
+			return size;
+		}
+
+		cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */
+
+		memcpy(&phead, header, NVRAM_HEADER_LEN);
+
+		c_sum = nvram_checksum(&phead);
+		if (c_sum != phead.checksum)
+			printk(KERN_WARNING "WARNING: nvram partition checksum "
+			       "was %02x, should be %02x!\n", phead.checksum, c_sum);
+		
+		tmp_part = (struct nvram_partition *)
+			kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+		if (!tmp_part) {
+			printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
+			kfree(header);
+			return -ENOMEM;
+		}
+		
+		memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
+		tmp_part->index = cur_index;
+		list_add_tail(&tmp_part->partition, &nvram_part->partition);
+		
+		cur_index += phead.length * NVRAM_BLOCK_LEN;
+	}
+
+	kfree(header);
+	return 0;
+}
+
+/* nvram_setup_partition
+ *
+ * This will setup the partition we need for buffering the
+ * error logs and cleanup partitions if needed.
+ *
+ * The general strategy is the following:
+ * 1.) If there is ppc64,linux partition large enough then use it.
+ * 2.) If there is not a ppc64,linux partition large enough, search
+ * for a free partition that is large enough.
+ * 3.) If there is not a free partition large enough remove 
+ * _all_ OS partitions and consolidate the space.
+ * 4.) Will first try getting a chunk that will satisfy the maximum
+ * error log size (NVRAM_MAX_REQ).
+ * 5.) If the max chunk cannot be allocated then try finding a chunk
+ * that will satisfy the minum needed (NVRAM_MIN_REQ).
+ */
+static int nvram_setup_partition(void)
+{
+	struct list_head * p;
+	struct nvram_partition * part;
+	int rc;
+
+	/* see if we have an OS partition that meets our needs.
+	   will try getting the max we need.  If not we'll delete
+	   partitions and try again. */
+	list_for_each(p, &nvram_part->partition) {
+		part = list_entry(p, struct nvram_partition, partition);
+		if (part->header.signature != NVRAM_SIG_OS)
+			continue;
+
+		if (strcmp(part->header.name, "ppc64,linux"))
+			continue;
+
+		if (part->header.length >= NVRAM_MIN_REQ) {
+			/* found our partition */
+			nvram_error_log_index = part->index + NVRAM_HEADER_LEN;
+			nvram_error_log_size = ((part->header.length - 1) *
+						NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
+			return 0;
+		}
+	}
+	
+	/* try creating a partition with the free space we have */
+	rc = nvram_create_os_partition();
+	if (!rc) {
+		return 0;
+	}
+		
+	/* need to free up some space */
+	rc = nvram_remove_os_partition();
+	if (rc) {
+		return rc;
+	}
+	
+	/* create a partition in this new space */
+	rc = nvram_create_os_partition();
+	if (rc) {
+		printk(KERN_ERR "nvram_create_os_partition: Could not find a "
+		       "NVRAM partition large enough\n");
+		return rc;
+	}
+	
+	return 0;
+}
+
+static int nvram_remove_os_partition(void)
+{
+	struct list_head *i;
+	struct list_head *j;
+	struct nvram_partition * part;
+	struct nvram_partition * cur_part;
+	int rc;
+
+	list_for_each(i, &nvram_part->partition) {
+		part = list_entry(i, struct nvram_partition, partition);
+		if (part->header.signature != NVRAM_SIG_OS)
+			continue;
+		
+		/* Make os partition a free partition */
+		part->header.signature = NVRAM_SIG_FREE;
+		sprintf(part->header.name, "wwwwwwwwwwww");
+		part->header.checksum = nvram_checksum(&part->header);
+
+		/* Merge contiguous free partitions backwards */
+		list_for_each_prev(j, &part->partition) {
+			cur_part = list_entry(j, struct nvram_partition, partition);
+			if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
+				break;
+			}
+			
+			part->header.length += cur_part->header.length;
+			part->header.checksum = nvram_checksum(&part->header);
+			part->index = cur_part->index;
+
+			list_del(&cur_part->partition);
+			kfree(cur_part);
+			j = &part->partition; /* fixup our loop */
+		}
+		
+		/* Merge contiguous free partitions forwards */
+		list_for_each(j, &part->partition) {
+			cur_part = list_entry(j, struct nvram_partition, partition);
+			if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
+				break;
+			}
+
+			part->header.length += cur_part->header.length;
+			part->header.checksum = nvram_checksum(&part->header);
+
+			list_del(&cur_part->partition);
+			kfree(cur_part);
+			j = &part->partition; /* fixup our loop */
+		}
+		
+		rc = nvram_write_header(part);
+		if (rc <= 0) {
+			printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc);
+			return rc;
+		}
+
+	}
+	
+	return 0;
+}
+
+/* nvram_create_os_partition
+ *
+ * Create a OS linux partition to buffer error logs.
+ * Will create a partition starting at the first free
+ * space found if space has enough room.
+ */
+static int nvram_create_os_partition(void)
+{
+	struct list_head * p;
+	struct nvram_partition * part;
+	struct nvram_partition * new_part = NULL;
+	struct nvram_partition * free_part;
+	int seq_init[2] = { 0, 0 };
+	loff_t tmp_index;
+	long size = 0;
+	int rc;
+	
+	/* Find a free partition that will give us the maximum needed size 
+	   If can't find one that will give us the minimum size needed */
+	list_for_each(p, &nvram_part->partition) {
+		part = list_entry(p, struct nvram_partition, partition);
+		if (part->header.signature != NVRAM_SIG_FREE)
+			continue;
+
+		if (part->header.length >= NVRAM_MAX_REQ) {
+			size = NVRAM_MAX_REQ;
+			free_part = part;
+			break;
+		}
+		if (!size && part->header.length >= NVRAM_MIN_REQ) {
+			size = NVRAM_MIN_REQ;
+			free_part = part;
+		}
+	}
+	if (!size) {
+		return -ENOSPC;
+	}
+	
+	/* Create our OS partition */
+	new_part = (struct nvram_partition *)
+		kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+	if (!new_part) {
+		printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n");
+		return -ENOMEM;
+	}
+
+	new_part->index = free_part->index;
+	new_part->header.signature = NVRAM_SIG_OS;
+	new_part->header.length = size;
+	sprintf(new_part->header.name, "ppc64,linux");
+	new_part->header.checksum = nvram_checksum(&new_part->header);
+
+	rc = nvram_write_header(new_part);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \
+				failed (%d)\n", rc);
+		return rc;
+	}
+
+	/* make sure and initialize to zero the sequence number and the error
+	   type logged */
+	tmp_index = new_part->index + NVRAM_HEADER_LEN;
+	rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_create_os_partition: nvram_write failed (%d)\n", rc);
+		return rc;
+	}
+	
+	nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN;
+	nvram_error_log_size = ((part->header.length - 1) *
+				NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
+	
+	list_add_tail(&new_part->partition, &free_part->partition);
+
+	if (free_part->header.length <= size) {
+		list_del(&free_part->partition);
+		kfree(free_part);
+		return 0;
+	} 
+
+	/* Adjust the partition we stole the space from */
+	free_part->index += size * NVRAM_BLOCK_LEN;
+	free_part->header.length -= size;
+	free_part->header.checksum = nvram_checksum(&free_part->header);
+	
+	rc = nvram_write_header(free_part);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
+		       "failed (%d)\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+
+void nvram_print_partitions(char * label)
+{
+	struct list_head * p;
+	struct nvram_partition * tmp_part;
+	
+	printk(KERN_WARNING "--------%s---------\n", label);
+	printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
+	list_for_each(p, &nvram_part->partition) {
+		tmp_part = list_entry(p, struct nvram_partition, partition);
+		printk(KERN_WARNING "%d    \t%02x\t%02x\t%d\t%s\n",
+		       tmp_part->index, tmp_part->header.signature,
+		       tmp_part->header.checksum, tmp_part->header.length,
+		       tmp_part->header.name);
+	}
+}
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode.  If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk.  For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name       | data             |
+ * |0          |1         |2      3|4         15|16        length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log                         |
+ * |0            3|4          7|8            nvram_error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_error_log(char * buff, int length, unsigned int err_type)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+	
+	if (no_more_logging) {
+		return -EPERM;
+	}
+
+	if (nvram_error_log_index == -1) {
+		return -ESPIPE;
+	}
+
+	if (length > nvram_error_log_size) {
+		length = nvram_error_log_size;
+	}
+
+	info.error_type = err_type;
+	info.seq_num = error_log_cnt;
+
+	tmp_index = nvram_error_log_index;
+
+	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+		return rc;
+	}
+
+	rc = ppc_md.nvram_write(buff, length, &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
+		return rc;
+	}
+	
+	return 0;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char * buff, int length, unsigned int * err_type)
+{
+	int rc;
+	loff_t tmp_index;
+	struct err_log_info info;
+	
+	if (nvram_error_log_index == -1)
+		return -1;
+
+	if (length > nvram_error_log_size)
+		length = nvram_error_log_size;
+
+	tmp_index = nvram_error_log_index;
+
+	rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+		return rc;
+	}
+
+	rc = ppc_md.nvram_read(buff, length, &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+		return rc;
+	}
+
+	error_log_cnt = info.seq_num;
+	*err_type = info.error_type;
+
+	return 0;
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log()
+{
+	loff_t tmp_index;
+	int clear_word = ERR_FLAG_ALREADY_LOGGED;
+	int rc;
+
+	tmp_index = nvram_error_log_index;
+	
+	rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+	if (rc <= 0) {
+		printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int nvram_write_header(struct nvram_partition * part)
+{
+	loff_t tmp_index;
+	int rc;
+	
+	tmp_index = part->index;
+	rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index); 
+
+	return rc;
+}
+
+static unsigned char nvram_checksum(struct nvram_header *p)
+{
+	unsigned int c_sum, c_sum2;
+	unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
+	c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5];
+
+	/* The sum may have spilled into the 3rd byte.  Fold it back. */
+	c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff;
+	/* The sum cannot exceed 2 bytes.  Fold it into a checksum */
+	c_sum2 = (c_sum >> 8) + (c_sum << 8);
+	c_sum = ((c_sum + c_sum2) >> 8) & 0xff;
+	return c_sum;
+}
+
 module_init(nvram_init);
 module_exit(nvram_cleanup);
 MODULE_LICENSE("GPL");
diff -puN arch/ppc64/kernel/ppc_ksyms.c~ppc64-nvram_rewrite arch/ppc64/kernel/ppc_ksyms.c
--- 25/arch/ppc64/kernel/ppc_ksyms.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/ppc_ksyms.c	2004-01-13 23:22:49.000000000 -0800
@@ -39,6 +39,7 @@
 #include <asm/hw_irq.h>
 #include <asm/abs_addr.h>
 #include <asm/cacheflush.h>
+#include <asm/proc_fs.h>
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iSeries/iSeries_pci.h>
 #include <asm/iSeries/iSeries_proc.h>
@@ -222,3 +223,4 @@ EXPORT_SYMBOL(debugger_fault_handler);
 
 EXPORT_SYMBOL(tb_ticks_per_usec);
 EXPORT_SYMBOL(paca);
+EXPORT_SYMBOL(proc_ppc64);
diff -puN arch/ppc64/kernel/proc_pmc.c~ppc64-nvram_rewrite arch/ppc64/kernel/proc_pmc.c
--- 25/arch/ppc64/kernel/proc_pmc.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/proc_pmc.c	2004-01-13 23:22:49.000000000 -0800
@@ -95,15 +95,21 @@ void proc_ppc64_init(void)
 	 *   /proc/ppc64/pmc/cpu0 
 	 */
 	spin_lock(&proc_ppc64_lock);
-	proc_ppc64_root = proc_mkdir("ppc64", 0);
-	if (!proc_ppc64_root) return;
+	if (proc_ppc64.root == NULL) {
+		proc_ppc64_init();
+		if (!proc_ppc64.root) {
+			spin_unlock(&proc_ppc64_lock);
+			return;
+		}
+	}
 	spin_unlock(&proc_ppc64_lock);
 
 	/* Placeholder for rtas interfaces. */
-	rtas_proc_dir = proc_mkdir("rtas", proc_ppc64_root);
-
+	if (proc_ppc64.rtas == NULL) {
+		return;
+	}
 
-	proc_ppc64_pmc_root = proc_mkdir("pmc", proc_ppc64_root);
+	proc_ppc64_pmc_root = proc_mkdir("pmc", proc_ppc64.root);
 
 	proc_ppc64_pmc_system_root = proc_mkdir("system", proc_ppc64_pmc_root);
 	for (i = 0; i < NR_CPUS; i++) {
@@ -114,7 +120,6 @@ void proc_ppc64_init(void)
 		}
 	}
 
-
 	/* Create directories for the software counters. */
 	for (i = 0; i < NR_CPUS; i++) {
 		if (!cpu_online(i))
diff -puN arch/ppc64/kernel/proc_ppc64.c~ppc64-nvram_rewrite arch/ppc64/kernel/proc_ppc64.c
--- 25/arch/ppc64/kernel/proc_ppc64.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/proc_ppc64.c	2004-01-13 23:22:49.000000000 -0800
@@ -67,14 +67,18 @@ static struct file_operations ofdt_fops 
 	.write = ofdt_write
 };
 
-static int __init proc_ppc64_init(void)
+int __init proc_ppc64_init(void)
 {
 
-	printk(KERN_INFO "proc_ppc64: Creating /proc/ppc64/\n");
 
-	proc_ppc64.root = proc_mkdir("ppc64", 0);
-	if (!proc_ppc64.root)
+	if (proc_ppc64.root == NULL) {
+		printk(KERN_INFO "proc_ppc64: Creating /proc/ppc64/\n");
+		proc_ppc64.root = proc_mkdir("ppc64", 0);
+		if (!proc_ppc64.root)
+			return 0;
+	} else {
 		return 0;
+	}
 
 	proc_ppc64.naca = create_proc_entry("naca", S_IRUSR, proc_ppc64.root);
 	if ( proc_ppc64.naca ) {
@@ -105,7 +109,11 @@ static int __init proc_ppc64_init(void)
 	}
 
 	/* Placeholder for rtas interfaces. */
-	proc_ppc64.rtas = proc_mkdir("rtas", proc_ppc64.root);
+	if (proc_ppc64.rtas == NULL)
+		proc_ppc64.rtas = proc_mkdir("rtas", proc_ppc64.root);
+
+	if (proc_ppc64.rtas)
+		proc_symlink("rtas", 0, "ppc64/rtas");
 
 	proc_ppc64_create_ofdt(proc_ppc64.root);
 
@@ -411,4 +419,3 @@ static void release_prop_list(const stru
 }
 
 fs_initcall(proc_ppc64_init);
-
diff -puN arch/ppc64/kernel/ras.c~ppc64-nvram_rewrite arch/ppc64/kernel/ras.c
--- 25/arch/ppc64/kernel/ras.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/ras.c	2004-01-13 23:22:49.000000000 -0800
@@ -122,6 +122,10 @@ ras_epow_interrupt(int irq, void *dev_id
 		    *((unsigned long *)&log_entry), status); 
 	printk(KERN_WARNING 
 		"EPOW <0x%lx 0x%lx>\n",*((unsigned long *)&log_entry), status);
+
+	/* format and print the extended information */
+	log_error((char *)&log_entry, ERR_TYPE_RTAS_LOG, 0);
+	
 	return IRQ_HANDLED;
 }
 
@@ -139,6 +143,7 @@ ras_error_interrupt(int irq, void *dev_i
 	struct rtas_error_log log_entry;
 	unsigned int size = sizeof(log_entry);
 	long status = 0xdeadbeef;
+	int fatal;
 
 	status = rtas_call(rtas_token("check-exception"), 6, 1, NULL, 
 			   0x500, irq, 
@@ -146,8 +151,15 @@ ras_error_interrupt(int irq, void *dev_i
 			   1, /* Time Critical */
 			   __pa(&log_entry), size);
 
-	if((status != 1) && 
-	   (log_entry.severity >= SEVERITY_ERROR_SYNC)) {
+	if ((status == 0) && (log_entry.severity >= SEVERITY_ERROR_SYNC)) 
+		fatal = 1;
+	else
+		fatal = 0;
+
+	/* format and print the extended information */
+	log_error((char *)&log_entry, ERR_TYPE_RTAS_LOG, fatal); 
+
+	if (fatal) {
 		udbg_printf("HW Error <0x%lx 0x%lx>\n",
 			    *((unsigned long *)&log_entry), status);
 		printk(KERN_EMERG 
@@ -157,6 +169,7 @@ ras_error_interrupt(int irq, void *dev_i
 #ifndef DEBUG
 		/* Don't actually power off when debugging so we can test
 		 * without actually failing while injecting errors.
+		 * Error data will not be logged to syslog.
 		 */
 		ppc_md.power_off();
 #endif
diff -puN arch/ppc64/kernel/rtas-proc.c~ppc64-nvram_rewrite arch/ppc64/kernel/rtas-proc.c
--- 25/arch/ppc64/kernel/rtas-proc.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/rtas-proc.c	2004-01-13 23:22:49.000000000 -0800
@@ -20,6 +20,7 @@
 #include <linux/ctype.h>
 #include <linux/time.h>
 #include <linux/string.h>
+#include <linux/init.h>
 
 #include <asm/uaccess.h>
 #include <asm/bitops.h>
@@ -27,6 +28,7 @@
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
+#include <asm/proc_fs.h>
 #include <asm/machdep.h> /* for ppc_md */
 #include <asm/time.h>
 
@@ -211,36 +213,36 @@ void proc_rtas_init(void)
 		return;
 	}
 	
-	if (proc_rtas == NULL) {
-		proc_rtas = proc_mkdir("rtas", 0);
+	if (proc_ppc64.rtas == NULL) {
+		proc_ppc64_init();
 	}
 
-	if (proc_rtas == NULL) {
+	if (proc_ppc64.rtas == NULL) {
 		printk(KERN_ERR "Failed to create /proc/rtas in proc_rtas_init\n");
 		return;
 	}
 
 	/* /proc/rtas entries */
 
-	entry = create_proc_entry("progress", S_IRUGO|S_IWUSR, proc_rtas);
+	entry = create_proc_entry("progress", S_IRUGO|S_IWUSR, proc_ppc64.rtas);
 	if (entry) entry->proc_fops = &ppc_rtas_progress_operations;
 
-	entry = create_proc_entry("clock", S_IRUGO|S_IWUSR, proc_rtas); 
+	entry = create_proc_entry("clock", S_IRUGO|S_IWUSR, proc_ppc64.rtas); 
 	if (entry) entry->proc_fops = &ppc_rtas_clock_operations;
 
-	entry = create_proc_entry("poweron", S_IWUSR|S_IRUGO, proc_rtas); 
+	entry = create_proc_entry("poweron", S_IWUSR|S_IRUGO, proc_ppc64.rtas); 
 	if (entry) entry->proc_fops = &ppc_rtas_poweron_operations;
 
-	create_proc_read_entry("sensors", S_IRUGO, proc_rtas, 
+	create_proc_read_entry("sensors", S_IRUGO, proc_ppc64.rtas, 
 			ppc_rtas_sensor_read, NULL);
 	
-	entry = create_proc_entry("frequency", S_IWUSR|S_IRUGO, proc_rtas); 
+	entry = create_proc_entry("frequency", S_IWUSR|S_IRUGO, proc_ppc64.rtas); 
 	if (entry) entry->proc_fops = &ppc_rtas_tone_freq_operations;
 
-	entry = create_proc_entry("volume", S_IWUSR|S_IRUGO, proc_rtas); 
+	entry = create_proc_entry("volume", S_IWUSR|S_IRUGO, proc_ppc64.rtas); 
 	if (entry) entry->proc_fops = &ppc_rtas_tone_volume_operations;
 
-	entry = create_proc_entry("rmo_buffer", S_IRUSR, proc_rtas);
+	entry = create_proc_entry("rmo_buffer", S_IRUSR, proc_ppc64.rtas);
 	if (entry) entry->proc_fops = &ppc_rtas_rmo_buf_ops;
 }
 
diff -puN arch/ppc64/kernel/rtas.c~ppc64-nvram_rewrite arch/ppc64/kernel/rtas.c
--- 25/arch/ppc64/kernel/rtas.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/rtas.c	2004-01-13 23:22:49.000000000 -0800
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
+#include <linux/init.h>
 
 #include <asm/prom.h>
 #include <asm/proc_fs.h>
@@ -61,7 +62,7 @@ struct rtas_t rtas = { 
 extern unsigned long reloc_offset(void);
 
 spinlock_t rtas_data_buf_lock = SPIN_LOCK_UNLOCKED;
-char rtas_data_buf[RTAS_DATA_BUF_SIZE];
+char rtas_data_buf[RTAS_DATA_BUF_SIZE]__page_aligned;
 
 void
 phys_call_rtas(int token, int nargs, int nret, ...)
@@ -422,7 +423,7 @@ asmlinkage int ppc_rtas(struct rtas_args
 	return 0;
 }
 
-EXPORT_SYMBOL(proc_ppc64);
+
 EXPORT_SYMBOL(rtas_firmware_flash_list);
 EXPORT_SYMBOL(rtas_token);
 EXPORT_SYMBOL(rtas_call);
diff -puN arch/ppc64/kernel/rtasd.c~ppc64-nvram_rewrite arch/ppc64/kernel/rtasd.c
--- 25/arch/ppc64/kernel/rtasd.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/rtasd.c	2004-01-13 23:22:49.000000000 -0800
@@ -17,11 +17,15 @@
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/rtas.h>
 #include <asm/prom.h>
+#include <asm/nvram.h>
+#include <asm/atomic.h>
+#include <asm/proc_fs.h>
 
 #if 0
 #define DEBUG(A...)	printk(KERN_ERR A)
@@ -29,13 +33,10 @@
 #define DEBUG(A...)
 #endif
 
-static spinlock_t rtas_log_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t log_lock = SPIN_LOCK_UNLOCKED;
 
 DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
 
-#define LOG_NUMBER		64		/* must be a power of two */
-#define LOG_NUMBER_MASK		(LOG_NUMBER-1)
-
 static char *rtas_log_buf;
 static unsigned long rtas_log_start;
 static unsigned long rtas_log_size;
@@ -43,21 +44,173 @@ static unsigned long rtas_log_size;
 static int surveillance_requested;
 static unsigned int rtas_event_scan_rate;
 static unsigned int rtas_error_log_max;
+static unsigned int rtas_error_log_buffer_max;
 
-#define SURVEILLANCE_TOKEN	9000
-#define SURVEILLANCE_TIMEOUT	1
-#define SURVEILLANCE_SCANRATE	1
+extern spinlock_t proc_ppc64_lock;
+extern volatile int no_more_logging;
 
-struct proc_dir_entry *proc_rtas;
+volatile int error_log_cnt = 0;
 
 /*
  * Since we use 32 bit RTAS, the physical address of this must be below
  * 4G or else bad things happen. Allocate this in the kernel data and
  * make it big enough.
  */
-#define RTAS_ERROR_LOG_MAX 1024
 static unsigned char logdata[RTAS_ERROR_LOG_MAX];
 
+/* To see this info, grep RTAS /var/log/messages and each entry
+ * will be collected together with obvious begin/end.
+ * There will be a unique identifier on the begin and end lines.
+ * This will persist across reboots.
+ *
+ * format of error logs returned from RTAS:
+ * bytes	(size)	: contents
+ * --------------------------------------------------------
+ * 0-7		(8)	: rtas_error_log
+ * 8-47		(40)	: extended info
+ * 48-51	(4)	: vendor id
+ * 52-1023 (vendor specific) : location code and debug data
+ */
+static void printk_log_rtas(char *buf, int len)
+{
+
+	int i,j,n;
+	int perline = 16;
+	char buffer[64];
+	char * str = "RTAS event";
+
+	printk(RTAS_ERR "%d -------- %s begin --------\n", error_log_cnt, str);
+
+	/*
+	 * Print perline bytes on each line, each line will start
+	 * with RTAS and a changing number, so syslogd will
+	 * print lines that are otherwise the same.  Separate every
+	 * 4 bytes with a space.
+	 */
+	for (i=0; i < len; i++) {
+		j = i % perline;
+		if (j == 0) {
+			memset(buffer, 0, sizeof(buffer));
+			n = sprintf(buffer, "RTAS %d:", i/perline);
+		}
+
+		if ((i % 4) == 0)
+			n += sprintf(buffer+n, " ");
+
+		n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
+
+		if (j == (perline-1))
+			printk(KERN_ERR "%s\n", buffer);
+	}
+	if ((i % perline) != 0)
+		printk(KERN_ERR "%s\n", buffer);
+
+	printk(RTAS_ERR "%d -------- %s end ----------\n", error_log_cnt, str);
+}
+
+static int log_rtas_len(char * buf)
+{
+	int len;
+	struct rtas_error_log *err;
+
+	/* rtas fixed header */
+	len = 8;
+	err = (struct rtas_error_log *)buf;
+	if (err->extended_log_length) {
+
+		/* extended header */
+		len += err->extended_log_length;
+
+		if (len > RTAS_ERROR_LOG_MAX)
+			len = RTAS_ERROR_LOG_MAX;
+	}
+	return len;
+}
+
+/*
+ * First write to nvram, if fatal error, that is the only
+ * place we log the info.  The error will be picked up
+ * on the next reboot by rtasd.  If not fatal, run the
+ * method for the type of error.  Currently, only RTAS
+ * errors have methods implemented, but in the future
+ * there might be a need to store data in nvram before a
+ * call to panic().
+ *
+ * XXX We write to nvram periodically, to indicate error has
+ * been written and sync'd, but there is a possibility
+ * that if we don't shutdown correctly, a duplicate error
+ * record will be created on next reboot.
+ */
+void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
+{
+	unsigned long offset;
+	unsigned long s;
+	int len = 0;
+
+	DEBUG("logging event\n");
+
+	if (buf == NULL)
+		return;
+
+	spin_lock_irqsave(&log_lock, s);
+
+	/* get length and increase count */
+	switch (err_type & ERR_TYPE_MASK) {
+	case ERR_TYPE_RTAS_LOG:
+		len = log_rtas_len(buf);
+		if (!(err_type & ERR_FLAG_BOOT))
+			error_log_cnt++;
+		break;
+	case ERR_TYPE_KERNEL_PANIC:
+	default:
+		spin_unlock_irqrestore(&log_lock, s);
+		return;
+	}
+
+	/* Write error to NVRAM */
+	if (!no_more_logging && !(err_type & ERR_FLAG_BOOT))
+		nvram_write_error_log(buf, len, err_type);
+
+	/* Check to see if we need to or have stopped logging */
+	if (fatal || no_more_logging) {
+		no_more_logging = 1;
+		spin_unlock_irqrestore(&log_lock, s);
+		return;
+	}
+
+	/* call type specific method for error */
+	switch (err_type & ERR_TYPE_MASK) {
+	case ERR_TYPE_RTAS_LOG:
+		/* put into syslog and error_log file */
+		printk_log_rtas(buf, len);
+
+		offset = rtas_error_log_buffer_max *
+			((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
+
+		/* First copy over sequence number */
+		memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
+
+		/* Second copy over error log data */
+		offset += sizeof(int);
+		memcpy(&rtas_log_buf[offset], buf, len);
+
+		if (rtas_log_size < LOG_NUMBER)
+			rtas_log_size += 1;
+		else
+			rtas_log_start += 1;
+
+		spin_unlock_irqrestore(&log_lock, s);
+		wake_up_interruptible(&rtas_log_wait);
+		break;
+	case ERR_TYPE_KERNEL_PANIC:
+	default:
+		spin_unlock_irqrestore(&log_lock, s);
+		return;
+	}
+
+}
+
+
 static int rtas_log_open(struct inode * inode, struct file * file)
 {
 	return 0;
@@ -68,36 +221,50 @@ static int rtas_log_release(struct inode
 	return 0;
 }
 
+/* This will check if all events are logged, if they are then, we
+ * know that we can safely clear the events in NVRAM.
+ * Next we'll sit and wait for something else to log.
+ */
 static ssize_t rtas_log_read(struct file * file, char * buf,
 			 size_t count, loff_t *ppos)
 {
 	int error;
 	char *tmp;
+	unsigned long s;
 	unsigned long offset;
 
-	if (!buf || count < rtas_error_log_max)
+	if (!buf || count < rtas_error_log_buffer_max)
 		return -EINVAL;
 
-	count = rtas_error_log_max;
+	count = rtas_error_log_buffer_max;
 
 	error = verify_area(VERIFY_WRITE, buf, count);
 	if (error)
-		return -EINVAL;
+		return -EFAULT;
 
-	tmp = kmalloc(rtas_error_log_max, GFP_KERNEL);
+	tmp = kmalloc(count, GFP_KERNEL);
 	if (!tmp)
 		return -ENOMEM;
 
+
+	spin_lock_irqsave(&log_lock, s);
+	/* if it's 0, then we know we got the last one (the one in NVRAM) */
+	if (rtas_log_size == 0 && !no_more_logging)
+		nvram_clear_error_log();
+	spin_unlock_irqrestore(&log_lock, s);
+
+
 	error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
 	if (error)
 		goto out;
 
-	spin_lock(&rtas_log_lock);
-	offset = rtas_error_log_max * (rtas_log_start & LOG_NUMBER_MASK);
+	spin_lock_irqsave(&log_lock, s);
+	offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
 	memcpy(tmp, &rtas_log_buf[offset], count);
+
 	rtas_log_start += 1;
 	rtas_log_size -= 1;
-	spin_unlock(&rtas_log_lock);
+	spin_unlock_irqrestore(&log_lock, s);
 
 	error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
 out:
@@ -120,28 +287,6 @@ struct file_operations proc_rtas_log_ope
 	.release =	rtas_log_release,
 };
 
-static void log_rtas(char *buf)
-{
-	unsigned long offset;
-
-	DEBUG("logging rtas event\n");
-
-	spin_lock(&rtas_log_lock);
-
-	offset = rtas_error_log_max *
-			((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
-
-	memcpy(&rtas_log_buf[offset], buf, rtas_error_log_max);
-
-	if (rtas_log_size < LOG_NUMBER)
-		rtas_log_size += 1;
-	else
-		rtas_log_start += 1;
-
-	spin_unlock(&rtas_log_lock);
-	wake_up_interruptible(&rtas_log_wait);
-}
-
 static int enable_surveillance(void)
 {
 	int error;
@@ -197,10 +342,12 @@ extern long sys_sched_get_priority_max(i
 
 static int rtasd(void *unused)
 {
+	unsigned int err_type;
 	int cpu = 0;
 	int error;
 	int first_pass = 1;
 	int event_scan = rtas_token("event-scan");
+	int rc;
 
 	if (event_scan == RTAS_UNKNOWN_SERVICE || get_eventscan_parms() == -1)
 		goto error;
@@ -211,6 +358,9 @@ static int rtasd(void *unused)
 		goto error;
 	}
 
+	/* We can use rtas_log_buf now */
+	no_more_logging = 0;
+
 	DEBUG("will sleep for %d jiffies\n", (HZ*60/rtas_event_scan_rate) / 2);
 
 	daemonize("rtasd");
@@ -221,6 +371,16 @@ static int rtasd(void *unused)
 	current->nice = sys_sched_get_priority_max(SCHED_FIFO) + 1;
 #endif
 
+	/* See if we have any error stored in NVRAM */
+	memset(logdata, 0, rtas_error_log_max);
+
+	rc = nvram_read_error_log(logdata, rtas_error_log_max, &err_type);
+	if (!rc) {
+		if (err_type != ERR_FLAG_ALREADY_LOGGED) {
+			pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
+		}
+	}
+
 repeat:
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 		if (!cpu_online(cpu))
@@ -241,7 +401,7 @@ repeat:
 			}
 
 			if (error == 0)
-				log_rtas(logdata);
+				pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0);
 
 		} while(error == 0);
 
@@ -275,25 +435,29 @@ static int __init rtas_init(void)
 {
 	struct proc_dir_entry *entry;
 
-	if (proc_rtas == NULL) {
-		proc_rtas = proc_mkdir("rtas", 0);
+	if (proc_ppc64.rtas == NULL) {
+		proc_ppc64_init();
 	}
 
-	if (proc_rtas == NULL) {
-		printk(KERN_ERR "Failed to create /proc/rtas in rtas_init\n");
-	} else {
-		entry = create_proc_entry("error_log", S_IRUSR, proc_rtas);
-		if (entry)
-			entry->proc_fops = &proc_rtas_log_operations;
-		else
-			printk(KERN_ERR "Failed to create rtas/error_log proc entry\n");
+	if (proc_ppc64.rtas == NULL) {
+		printk(KERN_ERR "rtas_init: /proc/ppc64/rtas does not exist.");
+		return -EIO;
 	}
 
+	entry = create_proc_entry("error_log", S_IRUSR, proc_ppc64.rtas);
+	if (entry)
+		entry->proc_fops = &proc_rtas_log_operations;
+	else
+		printk(KERN_ERR "Failed to create rtas/error_log proc entry\n");
+
 	if (kernel_thread(rtasd, 0, CLONE_FS) < 0)
 		printk(KERN_ERR "Failed to start RTAS daemon\n");
 
 	printk(KERN_ERR "RTAS daemon started\n");
 
+	/* Make room for the sequence number */
+	rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
+
 	return 0;
 }
 
diff -puN arch/ppc64/kernel/scanlog.c~ppc64-nvram_rewrite arch/ppc64/kernel/scanlog.c
--- 25/arch/ppc64/kernel/scanlog.c~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/scanlog.c	2004-01-13 23:22:49.000000000 -0800
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/rtas.h>
 #include <asm/prom.h>
+#include <asm/proc_fs.h>
 
 #define MODULE_VERSION "1.0"
 #define MODULE_NAME "scanlog"
@@ -43,9 +44,6 @@ static int scanlog_debug;
 static unsigned int ibm_scan_log_dump;			/* RTAS token */
 static struct proc_dir_entry *proc_ppc64_scan_log_dump;	/* The proc file */
 
-extern struct proc_dir_entry *proc_rtas;
-
-
 static ssize_t scanlog_read(struct file *file, char *buf,
 			    size_t count, loff_t *ppos)
 {
@@ -214,15 +212,16 @@ int __init scanlog_init(void)
 		return -EIO;
 	}
 
-	if (proc_rtas == NULL)
-                proc_rtas = proc_mkdir("rtas", 0);
+	if (proc_ppc64.rtas == NULL) {
+		proc_ppc64_init();
+	}
 
-	if (proc_rtas == NULL) {
+	if (proc_ppc64.rtas == NULL) {
 		printk(KERN_ERR "Failed to create /proc/rtas in scanlog_init\n");
 		return -EIO;
 	}
 
-        ent = create_proc_entry("scan-log-dump",  S_IRUSR, proc_rtas);
+        ent = create_proc_entry("scan-log-dump",  S_IRUSR, proc_ppc64.rtas);
 	if (ent) {
 		ent->proc_fops = &scanlog_fops;
 		/* Ideally we could allocate a buffer < 4G */
diff -puN include/asm-ppc64/machdep.h~ppc64-nvram_rewrite include/asm-ppc64/machdep.h
--- 25/include/asm-ppc64/machdep.h~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/include/asm-ppc64/machdep.h	2004-01-13 23:22:49.000000000 -0800
@@ -89,6 +89,12 @@ struct machdep_calls {
 	unsigned char	(*udbg_getc)(void);
 	int		(*udbg_getc_poll)(void);
 
+	/* Interface for platform error logging */
+	void 		(*log_error)(char *buf, unsigned int err_type, int fatal);
+
+	ssize_t		(*nvram_write)(char *buf, size_t count, loff_t *index);
+	ssize_t		(*nvram_read)(char *buf, size_t count, loff_t *index);	
+
 #ifdef CONFIG_SMP
 	/* functions for dealing with other cpus */
 	struct smp_ops_t smp_ops;
@@ -113,5 +119,11 @@ void ppc64_attention_msg(unsigned int sr
 /* Print a dump progress message. */
 void ppc64_dump_msg(unsigned int src, const char *msg);
 
+static inline void log_error(char *buf, unsigned int err_type, int fatal)
+{
+	if (ppc_md.log_error)
+		ppc_md.log_error(buf, err_type, fatal);
+}
+
 #endif /* _PPC64_MACHDEP_H */
 #endif /* __KERNEL__ */
diff -puN include/asm-ppc64/nvram.h~ppc64-nvram_rewrite include/asm-ppc64/nvram.h
--- 25/include/asm-ppc64/nvram.h~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/include/asm-ppc64/nvram.h	2004-01-13 23:22:49.000000000 -0800
@@ -11,6 +11,12 @@
 #ifndef _PPC64_NVRAM_H
 #define _PPC64_NVRAM_H
 
+#define NVRW_CNT 0x20
+#define NVRAM_HEADER_LEN 16 /* sizeof(struct nvram_header) */
+#define NVRAM_BLOCK_LEN 16
+#define NVRAM_MAX_REQ (2080/NVRAM_BLOCK_LEN)
+#define NVRAM_MIN_REQ (1056/NVRAM_BLOCK_LEN)
+
 #define NVRAM_AS0  0x74
 #define NVRAM_AS1  0x75
 #define NVRAM_DATA 0x77
@@ -28,4 +34,37 @@
 #define MOTO_RTC_CONTROLA       0x1FF8
 #define MOTO_RTC_CONTROLB       0x1FF9
 
+#define NVRAM_SIG_SP	0x02	/* support processor */
+#define NVRAM_SIG_OF	0x50	/* open firmware config */
+#define NVRAM_SIG_FW	0x51	/* general firmware */
+#define NVRAM_SIG_HW	0x52	/* hardware (VPD) */
+#define NVRAM_SIG_SYS	0x70	/* system env vars */
+#define NVRAM_SIG_CFG	0x71	/* config data */
+#define NVRAM_SIG_ELOG	0x72	/* error log */
+#define NVRAM_SIG_VEND	0x7e	/* vendor defined */
+#define NVRAM_SIG_FREE	0x7f	/* Free space */
+#define NVRAM_SIG_OS	0xa0	/* OS defined */
+
+/* If change this size, then change the size of NVNAME_LEN */
+struct nvram_header {
+	unsigned char signature;
+	unsigned char checksum;
+	unsigned short length;
+	char name[12];
+};
+
+struct nvram_partition {
+	struct list_head partition;
+	struct nvram_header header;
+	unsigned int index;
+};
+
+
+ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index);
+ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index);
+int nvram_write_error_log(char * buff, int length, unsigned int err_type);
+int nvram_read_error_log(char * buff, int length, unsigned int * err_type);
+int nvram_clear_error_log(void);
+void nvram_print_partitions(char * label);
+
 #endif /* _PPC64_NVRAM_H */
diff -puN include/asm-ppc64/proc_fs.h~ppc64-nvram_rewrite include/asm-ppc64/proc_fs.h
--- 25/include/asm-ppc64/proc_fs.h~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/include/asm-ppc64/proc_fs.h	2004-01-13 23:22:49.000000000 -0800
@@ -34,5 +34,6 @@ struct proc_ppc64_t {
 };
 
 extern struct proc_ppc64_t proc_ppc64;
+extern int proc_ppc64_init(void);
 
 #endif /* _PPC64_PROC_FS_H */
diff -puN include/asm-ppc64/rtas.h~ppc64-nvram_rewrite include/asm-ppc64/rtas.h
--- 25/include/asm-ppc64/rtas.h~ppc64-nvram_rewrite	2004-01-13 23:22:49.000000000 -0800
+++ 25-akpm/include/asm-ppc64/rtas.h	2004-01-13 23:22:49.000000000 -0800
@@ -22,6 +22,13 @@
 /* Buffer size for ppc_rtas system call. */
 #define RTAS_RMOBUF_MAX (64 * 1024)
 
+/* RTAS return codes */
+#define RTAS_BUSY		-2	/* RTAS Return Status - Busy */
+#define RTAS_EXTENDED_DELAY_MIN 9900
+#define RTAS_EXTENDED_DELAY_MAX 9905
+
+#define RTAS_UNKNOWN_OP		-1099	/* Return Status - Unknown RTAS Token */
+
 /*
  * In general to call RTAS use rtas_token("string") to lookup
  * an RTAS token for the given string (e.g. "event-scan").
@@ -179,16 +186,37 @@ static inline int rtas_is_extended_busy(
 	return status >= 9900 && status <= 9909;
 }
 
+extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
+
+/* Error types logged.  */
+#define ERR_FLAG_ALREADY_LOGGED	0x0
+#define ERR_FLAG_BOOT		0x1 	/* log was pulled from NVRAM on boot */
+#define ERR_TYPE_RTAS_LOG	0x2	/* from rtas event-scan */
+#define ERR_TYPE_KERNEL_PANIC	0x4	/* from panic() */
+
+/* All the types and not flags */
+#define ERR_TYPE_MASK	(ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC)
+
+#define RTAS_ERR KERN_ERR "RTAS: "
+ 
+#define RTAS_ERROR_LOG_MAX 2048
+ 
+ 
+/* Event Scan Parameters */
+#define EVENT_SCAN_ALL_EVENTS	0xf0000000
+#define SURVEILLANCE_TOKEN	9000
+#define SURVEILLANCE_TIMEOUT	1
+#define SURVEILLANCE_SCANRATE	1
+#define LOG_NUMBER		64		/* must be a power of two */
+#define LOG_NUMBER_MASK		(LOG_NUMBER-1)
+
 /* Some RTAS ops require a data buffer and that buffer must be < 4G.
  * Rather than having a memory allocator, just use this buffer
  * (get the lock first), make the RTAS call.  Copy the data instead
  * of holding the buffer for long.
  */
-#define RTAS_DATA_BUF_SIZE 1024
-
-#define RTAS_UNKNOWN_OP	-1099	/* Return Status - Unknown RTAS Token */
-#define RTAS_BUSY	-2	/* RTAS Return Status - Busy */
 
+#define RTAS_DATA_BUF_SIZE 4096
 extern spinlock_t rtas_data_buf_lock;
 extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
 

_
