http://jfs.bkbits.net/linux-2.5
shaggy@austin.ibm.com|ChangeSet|20040707213039|22166 shaggy

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2004/07/07 16:04:45-05:00 shaggy@austin.ibm.com 
#   JFS: Check for dmap corruption before using leafidx
#   
#   Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
# 
# fs/jfs/jfs_dmap.c
#   2004/07/07 16:04:26-05:00 shaggy@austin.ibm.com +34 -0
#   Check for dmap corruption before using leafidx
# 
# ChangeSet
#   2004/07/07 15:56:52-05:00 shaggy@austin.ibm.com 
#   JFS: prevent concurrent calls to txCommit on the imap inode
#   
#   Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
# 
# fs/jfs/jfs_imap.c
#   2004/07/07 15:56:10-05:00 shaggy@austin.ibm.com +6 -0
#   Don't allow diFree & diNewIAG to call txCommit at the same time
# 
# ChangeSet
#   2004/07/07 15:34:11-05:00 shaggy@austin.ibm.com 
#   JFS: Protect active_ag with a spinlock
#   
#   Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
# 
# fs/jfs/super.c
#   2004/07/07 15:33:27-05:00 shaggy@austin.ibm.com +4 -0
#   protect active_ag with a spinlock
# 
# fs/jfs/jfs_incore.h
#   2004/07/07 15:33:27-05:00 shaggy@austin.ibm.com +1 -0
#   protect active_ag with a spinlock
# 
# fs/jfs/jfs_extent.c
#   2004/07/07 15:33:27-05:00 shaggy@austin.ibm.com +2 -0
#   protect active_ag with a spinlock
# 
# fs/jfs/file.c
#   2004/07/07 15:33:27-05:00 shaggy@austin.ibm.com +4 -0
#   protect active_ag with a spinlock
# 
# ChangeSet
#   2004/07/06 08:31:31-05:00 shaggy@austin.ibm.com 
#   JFS: Updated field isn't always written to disk during truncate
#   
#   There is a possibility that a change to header.next is not logged
#   or written to disk if it is the only change to an xtree leaf page.
#   
#   Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
# 
# fs/jfs/jfs_xtree.c
#   2004/07/06 08:31:13-05:00 shaggy@austin.ibm.com +11 -1
#   Make sure header.next change is logged & written
# 
# ChangeSet
#   2004/06/30 15:41:40-05:00 shaggy@austin.ibm.com 
#   JFS: Error path released metadata page it shouldn't have
#   
#   Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
# 
# fs/jfs/jfs_xtree.c
#   2004/06/30 15:41:20-05:00 shaggy@austin.ibm.com +8 -17
#   Remove extra XT_PUTPAGE(rcmp) on err path
# 
# ChangeSet
#   2004/06/14 15:19:58-05:00 shaggy@austin.ibm.com 
#   JFS: Don't allow reading beyond the inode map's EOF
#   
#   If we try to read inodes that are beyond the size of the inode map,
#   __read_metapages would read unitialized pages into the inode map's
#   address space.  If the inode map is later grown in order to allocate
#   more inodes, the page is initialized and written under a different
#   address space.  Having the stale page in the page cache prevents the
#   properly initialized page from being read, and results in errors.
#   
#   This problem can be provoked by an nfs client trying to read an inode
#   that does not exist.
#   
#   Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
# 
# fs/jfs/jfs_metapage.c
#   2004/06/14 15:19:21-05:00 shaggy@austin.ibm.com +9 -1
#   Don't allow reading pages outside of imap
# 
diff -Nru a/fs/jfs/file.c b/fs/jfs/file.c
--- a/fs/jfs/file.c	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/file.c	2004-07-08 18:15:13 -07:00
@@ -65,11 +65,13 @@
 	if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE &&
 	    (inode->i_size == 0)) {
 		struct jfs_inode_info *ji = JFS_IP(inode);
+		spin_lock_irq(&ji->ag_lock);
 		if (ji->active_ag == -1) {
 			ji->active_ag = ji->agno;
 			atomic_inc(
 			    &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
 		}
+		spin_unlock_irq(&ji->ag_lock);
 	}
 
 	return 0;
@@ -78,11 +80,13 @@
 {
 	struct jfs_inode_info *ji = JFS_IP(inode);
 
+	spin_lock_irq(&ji->ag_lock);
 	if (ji->active_ag != -1) {
 		struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
 		atomic_dec(&bmap->db_active[ji->active_ag]);
 		ji->active_ag = -1;
 	}
+	spin_unlock_irq(&ji->ag_lock);
 
 	return 0;
 }
diff -Nru a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
--- a/fs/jfs/jfs_dmap.c	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/jfs_dmap.c	2004-07-08 18:15:13 -07:00
@@ -1204,6 +1204,12 @@
 	s8 *leaf;
 	u32 mask;
 
+	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
+		jfs_error(bmp->db_ipbmap->i_sb,
+			  "dbAllocNext: Corrupt dmap page");
+		return -EIO;
+	}
+
 	/* pick up a pointer to the leaves of the dmap tree.
 	 */
 	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
@@ -1327,6 +1333,13 @@
 	    struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
 {
 	int word, lword, rc;
+
+	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
+		jfs_error(bmp->db_ipbmap->i_sb,
+			  "dbAllocNear: Corrupt dmap page");
+		return -EIO;
+	}
+
 	s8 *leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
 
 	/* determine the word within the dmap that holds the hint
@@ -1489,6 +1502,13 @@
 	dcp = (struct dmapctl *) mp->data;
 	budmin = dcp->budmin;
 
+	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
+		jfs_error(bmp->db_ipbmap->i_sb,
+			  "dbAllocAG: Corrupt dmapctl page");
+		release_metapage(mp);
+		return -EIO;
+	}
+
 	/* search the subtree(s) of the dmap control page that describes
 	 * the allocation group, looking for sufficient free space.  to begin,
 	 * determine how many allocation groups are represented in a dmap
@@ -1697,6 +1717,13 @@
 		dcp = (struct dmapctl *) mp->data;
 		budmin = dcp->budmin;
 
+		if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
+			jfs_error(bmp->db_ipbmap->i_sb,
+				  "dbFindCtl: Corrupt dmapctl page");
+			release_metapage(mp);
+			return -EIO;
+		}
+
 		/* search the tree within the dmap control page for
 		 * sufficent free space.  if sufficient free space is found,
 		 * dbFindLeaf() returns the index of the leaf at which
@@ -2458,6 +2485,13 @@
 	if (mp == NULL)
 		return -EIO;
 	dcp = (struct dmapctl *) mp->data;
+
+	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
+		jfs_error(bmp->db_ipbmap->i_sb,
+			  "dbAdjCtl: Corrupt dmapctl page");
+		release_metapage(mp);
+		return -EIO;
+	}
 
 	/* determine the leaf number corresponding to the block and
 	 * the index within the dmap control tree.
diff -Nru a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
--- a/fs/jfs/jfs_extent.c	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/jfs_extent.c	2004-07-08 18:15:13 -07:00
@@ -553,6 +553,7 @@
 
 	if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) {
 		ag = BLKTOAG(daddr, sbi);
+		spin_lock_irq(&ji->ag_lock);
 		if (ji->active_ag == -1) {
 			atomic_inc(&bmp->db_active[ag]);
 			ji->active_ag = ag;
@@ -561,6 +562,7 @@
 			atomic_inc(&bmp->db_active[ag]);
 			ji->active_ag = ag;
 		}
+		spin_unlock_irq(&ji->ag_lock);
 	}
 
 	return (0);
diff -Nru a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
--- a/fs/jfs/jfs_imap.c	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/jfs_imap.c	2004-07-08 18:15:13 -07:00
@@ -1280,6 +1280,7 @@
 	 * to be freed by the transaction;  
 	 */
 	tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
+	down(&JFS_IP(ipimap)->commit_sem);
 
 	/* acquire tlock of the iag page of the freed ixad 
 	 * to force the page NOHOMEOK (even though no data is
@@ -1312,6 +1313,7 @@
 	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
 
 	txEnd(tid);
+	up(&JFS_IP(ipimap)->commit_sem);
 
 	/* unlock the AG inode map information */
 	AG_UNLOCK(imap, agno);
@@ -2622,10 +2624,13 @@
 		 */
 #endif				/*  _STILL_TO_PORT */
 		tid = txBegin(sb, COMMIT_FORCE);
+		down(&JFS_IP(ipimap)->commit_sem);
 
 		/* update the inode map addressing structure to point to it */
 		if ((rc =
 		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
+			txEnd(tid);
+			up(&JFS_IP(ipimap)->commit_sem);
 			/* Free the blocks allocated for the iag since it was
 			 * not successfully added to the inode map
 			 */
@@ -2650,6 +2655,7 @@
 		rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
 
 		txEnd(tid);
+		up(&JFS_IP(ipimap)->commit_sem);
 
 		duplicateIXtree(sb, blkno, xlen, &xaddr);
 
diff -Nru a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
--- a/fs/jfs/jfs_incore.h	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/jfs_incore.h	2004-07-08 18:15:13 -07:00
@@ -53,6 +53,7 @@
 	lid_t	blid;		/* lid of pseudo buffer?	*/
 	lid_t	atlhead;	/* anonymous tlock list head	*/
 	lid_t	atltail;	/* anonymous tlock list tail	*/
+	spinlock_t ag_lock;	/* protects active_ag		*/
 	struct list_head anon_inode_list; /* inodes having anonymous txns */
 	/*
 	 * rdwrlock serializes xtree between reads & writes and synchronizes
diff -Nru a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
--- a/fs/jfs/jfs_metapage.c	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/jfs_metapage.c	2004-07-08 18:15:13 -07:00
@@ -225,8 +225,16 @@
 
 	if (absolute)
 		mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
-	else
+	else {
+		/*
+		 * If an nfs client tries to read an inode that is larger
+		 * than any existing inodes, we may try to read past the
+		 * end of the inode map
+		 */
+		if ((lblock << inode->i_blkbits) >= inode->i_size)
+			return NULL;
 		mapping = inode->i_mapping;
+	}
 
 	hash_ptr = meta_hash(mapping, lblock);
 again:
diff -Nru a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
--- a/fs/jfs/jfs_xtree.c	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/jfs_xtree.c	2004-07-08 18:15:13 -07:00
@@ -1071,8 +1071,10 @@
 		 */
 		/* get/pin the parent page <sp> */
 		XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
-		if (rc)
-			goto errout2;
+		if (rc) {
+			XT_PUTPAGE(rcmp);
+			return rc;
+		}
 
 		/*
 		 * The new key entry goes ONE AFTER the index of parent entry,
@@ -1106,8 +1108,10 @@
 			rc = (sp->header.flag & BT_ROOT) ?
 			    xtSplitRoot(tid, ip, split, &rmp) :
 			    xtSplitPage(tid, ip, split, &rmp, &rbn);
-			if (rc)
-				goto errout1;
+			if (rc) {
+				XT_PUTPAGE(smp);
+				return rc;
+			}
 
 			XT_PUTPAGE(smp);
 			/* keep new child page <rp> pinned */
@@ -1170,19 +1174,6 @@
 	XT_PUTPAGE(rmp);
 
 	return 0;
-
-	/*
-	 * If something fails in the above loop we were already walking back
-	 * up the tree and the tree is now inconsistent.
-	 * release all pages we're holding.
-	 */
-      errout1:
-	XT_PUTPAGE(smp);
-
-      errout2:
-	XT_PUTPAGE(rcmp);
-
-	return rc;
 }
 
 
@@ -3504,7 +3495,17 @@
 	 * a page that was formerly to the right, let's make sure that the
 	 * next pointer is zero.
 	 */
-	p->header.next = 0;
+	if (p->header.next) {
+		if (log)
+			/*
+			 * Make sure this change to the header is logged.
+			 * If we really truncate this leaf, the flag
+			 * will be changed to tlckTRUNCATE
+			 */
+			tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
+		BT_MARK_DIRTY(mp, ip);
+		p->header.next = 0;
+	}
 
 	freed = 0;
 
diff -Nru a/fs/jfs/super.c b/fs/jfs/super.c
--- a/fs/jfs/super.c	2004-07-08 18:15:13 -07:00
+++ b/fs/jfs/super.c	2004-07-08 18:15:13 -07:00
@@ -141,10 +141,13 @@
 {
 	struct jfs_inode_info *ji = JFS_IP(inode);
 
+	spin_lock_irq(&ji->ag_lock);
 	if (ji->active_ag != -1) {
 		struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
 		atomic_dec(&bmap->db_active[ji->active_ag]);
+		ji->active_ag = -1;
 	}
+	spin_unlock_irq(&ji->ag_lock);
 
 #ifdef CONFIG_JFS_POSIX_ACL
 	if (ji->i_acl != JFS_ACL_NOT_CACHED) {
@@ -559,6 +562,7 @@
 		init_rwsem(&jfs_ip->rdwrlock);
 		init_MUTEX(&jfs_ip->commit_sem);
 		init_rwsem(&jfs_ip->xattr_sem);
+		spin_lock_init(&jfs_ip->ag_lock);
 		jfs_ip->active_ag = -1;
 #ifdef CONFIG_JFS_POSIX_ACL
 		jfs_ip->i_acl = JFS_ACL_NOT_CACHED;