
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>

We found a problem in hugetlbfs file system quota when using huge page
via mmap.  The mmap method in hugetlbfs_file_operation always takes
quota for every mmap even for pages that are already allocated on that
inode.  This results in taxing the same hugepage multiple times and
causing mmap to fail on existing file when quota mistakenly runs out.



 25-akpm/arch/i386/mm/hugetlbpage.c |    7 +++++++
 25-akpm/arch/ia64/mm/hugetlbpage.c |    7 +++++++
 25-akpm/fs/hugetlbfs/inode.c       |   28 +---------------------------
 25-akpm/include/linux/hugetlb.h    |   30 ++++++++++++++++++++++++++++++
 4 files changed, 45 insertions(+), 27 deletions(-)

diff -puN arch/i386/mm/hugetlbpage.c~hugetlbfs-accounting-fix arch/i386/mm/hugetlbpage.c
--- 25/arch/i386/mm/hugetlbpage.c~hugetlbfs-accounting-fix	Wed Sep 17 14:06:21 2003
+++ 25-akpm/arch/i386/mm/hugetlbpage.c	Wed Sep 17 14:06:22 2003
@@ -355,14 +355,21 @@ int hugetlb_prefault(struct address_spac
 			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
 		page = find_get_page(mapping, idx);
 		if (!page) {
+			/* charge the fs quota first */
+			if (hugetlb_get_quota(mapping)) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			page = alloc_hugetlb_page();
 			if (!page) {
+				hugetlb_put_quota(mapping);
 				ret = -ENOMEM;
 				goto out;
 			}
 			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
 			unlock_page(page);
 			if (ret) {
+				hugetlb_put_quota(mapping);
 				free_huge_page(page);
 				goto out;
 			}
diff -puN arch/ia64/mm/hugetlbpage.c~hugetlbfs-accounting-fix arch/ia64/mm/hugetlbpage.c
--- 25/arch/ia64/mm/hugetlbpage.c~hugetlbfs-accounting-fix	Wed Sep 17 14:06:21 2003
+++ 25-akpm/arch/ia64/mm/hugetlbpage.c	Wed Sep 17 14:06:22 2003
@@ -297,14 +297,21 @@ int hugetlb_prefault(struct address_spac
 			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
 		page = find_get_page(mapping, idx);
 		if (!page) {
+			/* charge the fs quota first */
+			if (hugetlb_get_quota(mapping)) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			page = alloc_hugetlb_page();
 			if (!page) {
+				hugetlb_put_quota(mapping);
 				ret = -ENOMEM;
 				goto out;
 			}
 			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
 			unlock_page(page);
 			if (ret) {
+				hugetlb_put_quota(mapping);
 				free_huge_page(page);
 				goto out;
 			}
diff -puN fs/hugetlbfs/inode.c~hugetlbfs-accounting-fix fs/hugetlbfs/inode.c
--- 25/fs/hugetlbfs/inode.c~hugetlbfs-accounting-fix	Wed Sep 17 14:06:21 2003
+++ 25-akpm/fs/hugetlbfs/inode.c	Wed Sep 17 14:06:22 2003
@@ -47,7 +47,6 @@ static int hugetlbfs_file_mmap(struct fi
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);
 	loff_t len, vma_len;
 	int ret;
 
@@ -61,19 +60,8 @@ static int hugetlbfs_file_mmap(struct fi
 		return -EINVAL;
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
-	if (sbinfo->free_blocks >= 0) { /* Check if there is any size limit. */
-		spin_lock(&sbinfo->stat_lock);
-		if ((vma_len >> HPAGE_SHIFT) <= sbinfo->free_blocks) {
-			sbinfo->free_blocks -= (vma_len >> HPAGE_SHIFT);
-			spin_unlock(&sbinfo->stat_lock);
-		} else {
-			spin_unlock(&sbinfo->stat_lock);
-			return -ENOMEM;
-		}
-	}
 
 	down(&inode->i_sem);
-
 	update_atime(inode);
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
@@ -83,15 +71,6 @@ static int hugetlbfs_file_mmap(struct fi
 		inode->i_size = len;
 	up(&inode->i_sem);
 
-	/*
-	 * If the huge page allocation has failed then increment free_blocks.
-	 */
-	if ((ret != 0) && (sbinfo->free_blocks >= 0)) {
-		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_blocks += (vma_len >> HPAGE_SHIFT);
-		spin_unlock(&sbinfo->stat_lock);
-	}
-
 	return ret;
 }
 
@@ -178,7 +157,6 @@ void truncate_huge_page(struct page *pag
 
 void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 {
-	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
 	const pgoff_t start = lstart >> HPAGE_SHIFT;
 	struct pagevec pvec;
 	pgoff_t next;
@@ -203,11 +181,7 @@ void truncate_hugepages(struct address_s
 			++next;
 			truncate_huge_page(page);
 			unlock_page(page);
-			if (sbinfo->free_blocks >= 0) {
-				spin_lock(&sbinfo->stat_lock);
-				sbinfo->free_blocks++;
-				spin_unlock(&sbinfo->stat_lock);
-			}
+			hugetlb_put_quota(mapping);
 		}
 		huge_pagevec_release(&pvec);
 	}
diff -puN include/linux/hugetlb.h~hugetlbfs-accounting-fix include/linux/hugetlb.h
--- 25/include/linux/hugetlb.h~hugetlbfs-accounting-fix	Wed Sep 17 14:06:21 2003
+++ 25-akpm/include/linux/hugetlb.h	Wed Sep 17 14:06:22 2003
@@ -93,6 +93,36 @@ static inline struct hugetlbfs_sb_info *
 	return sb->s_fs_info;
 }
 
+static inline int hugetlb_get_quota(struct address_space * mapping)
+{
+	int ret = 0;
+	struct hugetlbfs_sb_info *sbinfo =
+		HUGETLBFS_SB(mapping->host->i_sb);
+
+	if (sbinfo->free_blocks > -1) {
+		spin_lock(&sbinfo->stat_lock);
+		if (sbinfo->free_blocks > 0)
+			sbinfo->free_blocks--;
+		else
+			ret = -ENOMEM;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+
+	return ret;
+}
+
+static inline void hugetlb_put_quota(struct address_space *mapping)
+{
+	struct hugetlbfs_sb_info *sbinfo =
+		HUGETLBFS_SB(mapping->host->i_sb);
+
+	if (sbinfo->free_blocks > -1) {
+		spin_lock(&sbinfo->stat_lock);
+		sbinfo->free_blocks++;
+		spin_unlock(&sbinfo->stat_lock);
+	}
+}
+
 #define PSEUDO_DIRENT_SIZE	20
 
 extern struct file_operations hugetlbfs_file_operations;

_
