

MS_ASYNC will currently wait on previously-submitted I/O, then start new I/O
and not wait on it.  This can cause undesirable blocking if msync is called
rapidly against the same memory.

So instead, change msync(MS_ASYNC) to not start any IO at all.  Just flush
the pte dirty bits into the pageframe and leave it at that.

The IO _will_ happen within a kupdate period.  And the application can use
fsync() or fadvise(FADV_DONTNEED) if it actually wants to schedule the IO
immediately.



 25-akpm/mm/msync.c |   30 +++++++++++++++---------------
 1 files changed, 15 insertions(+), 15 deletions(-)

diff -puN mm/msync.c~MS_ASYNC-more-async mm/msync.c
--- 25/mm/msync.c~MS_ASYNC-more-async	Mon Apr  7 13:13:37 2003
+++ 25-akpm/mm/msync.c	Mon Apr  7 13:17:13 2003
@@ -125,11 +125,13 @@ static int filemap_sync(struct vm_area_s
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
- * MS_ASYNC initiates writeout of just the dirty mapped data.
- * This provides no guarantee of file integrity - things like indirect
- * blocks may not have started writeout.  MS_ASYNC is primarily useful
- * where the application knows that it has finished with the data and
- * wishes to intelligently schedule its own I/O traffic.
+ * MS_ASYNC does not start I/O (it used to, up to 2.5.67).  Instead, it just
+ * marks the relevant pages dirty.  The application may now run fsync() to
+ * write out the dirty pages and wait on the writeout and check the result.
+ * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
+ * async writeout immediately.
+ * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
+ * applications.
  */
 static int msync_interval(struct vm_area_struct * vma,
 	unsigned long start, unsigned long end, int flags)
@@ -143,22 +145,20 @@ static int msync_interval(struct vm_area
 	if (file && (vma->vm_flags & VM_SHARED)) {
 		ret = filemap_sync(vma, start, end-start, flags);
 
-		if (!ret && (flags & (MS_SYNC|MS_ASYNC))) {
-			struct inode * inode = file->f_dentry->d_inode;
+		if (!ret && (flags & MS_SYNC)) {
+			struct inode *inode = file->f_dentry->d_inode;
 			int err;
 
 			down(&inode->i_sem);
 			ret = filemap_fdatawrite(inode->i_mapping);
-			if (flags & MS_SYNC) {
-				if (file->f_op && file->f_op->fsync) {
-					err = file->f_op->fsync(file, file->f_dentry, 1);
-					if (err && !ret)
-						ret = err;
-				}
-				err = filemap_fdatawait(inode->i_mapping);
-				if (!ret)
+			if (file->f_op && file->f_op->fsync) {
+				err = file->f_op->fsync(file,file->f_dentry,1);
+				if (err && !ret)
 					ret = err;
 			}
+			err = filemap_fdatawait(inode->i_mapping);
+			if (!ret)
+				ret = err;
 			up(&inode->i_sem);
 		}
 	}

_
