bk://linux-ntfs.bkbits.net/ntfs-2.6-devel
aia21@cantab.net|ChangeSet|20040608103654|38230 aia21

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2004/06/08 11:36:54+01:00 aia21@cantab.net 
#   NTFS: 2.1.13 - Enable overwriting of resident files and housekeeping of system files.
#   - Mark the volume dirty when (re)mounting read-write and mark it clean
#     when unmounting or remounting read-only.  If any volume errors are
#     found, the volume is left marked dirty to force chkdsk to run.
#   - Add code to set the NT4 compatibility flag when (re)mounting
#     read-write for newer NTFS versions but leave it commented out for now
#     since we do not make any modifications that are NTFS 1.2 specific yet
#     and since setting this flag breaks Captive-NTFS which is not nice.
#     This code must be enabled once we start writing NTFS 1.2 specific
#     changes otherwise Windows NTFS driver might crash / cause corruption.
#   - Fix a silly bug that caused a deadlock in ntfs_mft_writepage().
#     For inode 0, i.e. $MFT itself, we cannot use ilookup5() from
#     there because the inode is already locked by the kernel
#     (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits
#     until the inode is unlocked before returning it and it never gets
#     unlocked because ntfs_mft_writepage() never returns.  )-:
#     Fortunately, we have inode 0 pinned in icache for the duration
#     of the mount so we can access it directly.
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/super.c
#   2004/06/08 11:36:48+01:00 aia21@cantab.net +139 -35
#   - Mark the volume dirty when (re)mounting read-write and mark it clean
#     when unmounting or remounting read-only.  If any volume errors are
#     found, the volume is left marked dirty to force chkdsk to run.
#   - Add code to set the NT4 compatibility flag when (re)mounting
#     read-write for newer NTFS versions but leave it commented out for now
#     since we do not make any modifications that are NTFS 1.2 specific yet
#     and since setting this flag breaks Captive-NTFS which is not nice.
#     This code must be enabled once we start writing NTFS 1.2 specific
#     changes otherwise Windows NTFS driver might crash / cause corruption.
# 
# fs/ntfs/mft.c
#   2004/06/08 11:36:48+01:00 aia21@cantab.net +41 -5
#   Fix a silly bug that caused a deadlock in ntfs_mft_writepage().
#   For inode 0, i.e. $MFT itself, we cannot use ilookup5() from
#   there because the inode is already locked by the kernel
#   (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits
#   until the inode is unlocked before returning it and it never gets
#   unlocked because ntfs_mft_writepage() never returns.  )-:
#   Fortunately, we have inode 0 pinned in icache for the duration
#   of the mount so we can access it directly.
# 
# fs/ntfs/ChangeLog
#   2004/06/08 11:36:48+01:00 aia21@cantab.net +1 -1
#   Missed a line.
# 
# fs/ntfs/Makefile
#   2004/06/08 09:36:50+01:00 aia21@cantab.net +1 -1
#   Bump version to 2.1.13.
# 
# fs/ntfs/ChangeLog
#   2004/06/08 09:36:50+01:00 aia21@cantab.net +12 -1
#   Update for 2.1.13 release.
# 
# Documentation/filesystems/ntfs.txt
#   2004/06/08 09:36:50+01:00 aia21@cantab.net +13 -0
#   Update for 2.1.13 release.
# 
# ChangeSet
#   2004/06/07 10:40:50+01:00 aia21@cantab.net 
#   NTFS: Add functions ntfs_{clear,set}_volume_flags(), to modify the volume
#         information flags (fs/ntfs/super.c).
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/super.c
#   2004/06/07 10:40:44+01:00 aia21@cantab.net +95 -0
#   Add functions ntfs_{clear,set}_volume_flags(), to modify the volume
#   information flags.
# 
# fs/ntfs/ChangeLog
#   2004/06/07 10:40:44+01:00 aia21@cantab.net +2 -0
#   Update
# 
# ChangeSet
#   2004/06/04 16:59:48+01:00 aia21@cantab.net 
#   NTFS: Implement ntfs_mft_writepage() so it now checks if any of the mft
#         records in the page are dirty and if so redirties the page and
#         returns.  Otherwise it just returns (after doing set_page_writeback(),
#         unlock_page(), end_page_writeback() or the radix-tree tag
#         PAGECACHE_TAG_DIRTY  remains set even though the page is clean), thus
#         alowing the VM to do with the page as it pleases.  Also, at umount
#         time, now only throw away dirty mft (meta)data pages if dirty inodes
#         are present and ask the user to email us if they see this happening.
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/super.c
#   2004/06/04 16:59:41+01:00 aia21@cantab.net +29 -11
#   Only throw away dirty mft (meta)data page cache pages if dirty
#   inodes are present as this should never happen any more with the
#   new ntfs_mft_writepage() implementation.  Ask the user to email
#   us if they see this happening.
# 
# fs/ntfs/mft.c
#   2004/06/04 16:59:41+01:00 aia21@cantab.net +167 -5
#   Implement ntfs_mft_writepage() so that it checks if any of the mft
#   records in the page are dirty and if so redirties the page before 
#   unlocking it.  Otherwise it just returns (after doing
#   set_page_writeback(), unlock_page(), end_page_writeback() or the
#   radix-tree tag PAGECACHE_TAG_DIRTY remains set even though the
#   page is clean).
# 
# fs/ntfs/ChangeLog
#   2004/06/04 16:59:41+01:00 aia21@cantab.net +8 -0
#   Update
# 
# ChangeSet
#   2004/06/04 16:35:54+01:00 aia21@cantab.net 
#   NTFS: Use set_page_writeback()/end_page_writeback() in ntfs_writepage()
#         resident attribute write code path as otherwise the radix-tree tag
#         PAGECACHE_TAG_DIRTY remains set even though the page is clean.
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/aops.c
#   2004/06/04 16:35:48+01:00 aia21@cantab.net +15 -12
#   - Use set_page_writeback()/end_page_writeback() in ntfs_writepage()
#     resident attribute write code path as otherwise the radix-tree tag
#     PAGECACHE_TAG_DIRTY remains set even though the page is clean.
#   - Cleanup some debug output.
# 
# fs/ntfs/ChangeLog
#   2004/06/04 16:35:47+01:00 aia21@cantab.net +4 -0
#   Update
# 
# ChangeSet
#   2004/06/01 17:00:58+01:00 aia21@cantab.net 
#   NTFS: - Implement fs/ntfs/mft.[hc]::{,__}mark_mft_record_dirty() and make
#           fs/ntfs/aops.c::ntfs_writepage() and ntfs_commit_write() use it, thus
#           finally enabling resident file overwrite!  (-8  This also includes a
#           placeholder for ->writepage (ntfs_mft_writepage()), which for now
#           just redirties the page and returns.  Also, at umount time, we for
#           now throw away all mft data page cache pages after the last call to
#           ntfs_commit_inode() in the hope that all inodes will have been
#           written out by then and hence no dirty (meta)data will be lost.  We
#           also check for this case and emit an error message telling the user
#           to run chkdsk.
#         - If the user is trying to enable (dir)atime updates, warn about the
#           fact that we are disabling them.
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/super.c
#   2004/06/01 17:00:52+01:00 aia21@cantab.net +27 -2
#   - At umount time, we for now throw away all mft data page cache
#     pages after the last call to ntfs_commit_inode() in the hope
#     that all inodes will have been written out by then and hence
#     no dirty (meta)data will be lost.  We also check for this case
#     and emit an error message telling the user to run chkdsk.
#   - If the user is trying to enable (dir)atime updates, warn about
#     the fact that we are disabling them.
# 
# fs/ntfs/mft.h
#   2004/06/01 17:00:52+01:00 aia21@cantab.net +19 -0
#   Implement {,__}mark_mft_record_dirty().
# 
# fs/ntfs/mft.c
#   2004/06/01 17:00:52+01:00 aia21@cantab.net +77 -0
#   Implement __mark_mft_record_dirty() and a placeholder for
#   ->writepage (ntfs_mft_writepage()), which for now just
#   redirties the page and returns.
# 
# fs/ntfs/aops.c
#   2004/06/01 17:00:52+01:00 aia21@cantab.net +14 -19
#   Use mark_mft_record_dirty() in ntfs_writepage() and ntfs_commit_write(),
#   thus finally enabling resident file overwrite!  (-8
# 
# fs/ntfs/ChangeLog
#   2004/06/01 17:00:52+01:00 aia21@cantab.net +10 -1
#   Update
# 
# ChangeSet
#   2004/05/28 16:24:23+01:00 aia21@cantab.net 
#   NTFS: Implement ->write_inode (fs/ntfs/inode.c::ntfs_write_inode()) for the
#         ntfs super operations.  This gives us inode writing via the VFS inode
#         dirty code paths.  Note:  Access time updates are not implemented yet.
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/super.c
#   2004/05/28 16:24:17+01:00 aia21@cantab.net +2 -2
#   Set ntfs_write_inode() to be our sops->write_inode.
# 
# fs/ntfs/inode.c
#   2004/05/28 16:24:17+01:00 aia21@cantab.net +101 -16
#   Implement ntfs_write_inode().
# 
# fs/ntfs/ChangeLog
#   2004/05/28 16:24:17+01:00 aia21@cantab.net +7 -1
#   Update.
# 
# ChangeSet
#   2004/05/28 12:38:37+01:00 aia21@cantab.net 
#   NTFS: Commit open system inodes at umount time.  This should make it
#         virtually impossible for sync_mft_mirror_umount() to ever be needed.
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/super.c
#   2004/05/28 12:38:31+01:00 aia21@cantab.net +35 -0
#   Commit open system inodes at umount time.
# 
# fs/ntfs/ChangeLog
#   2004/05/28 12:38:31+01:00 aia21@cantab.net +2 -0
#   Update.
# 
# ChangeSet
#   2004/05/28 12:29:35+01:00 aia21@cantab.net 
#   NTFS: Implement writing of mft records (fs/ntfs/mft.[hc]), which includes
#         keeping the mft mirror in sync with the mft when mirrored mft records
#         are written.  The functions are write_mft_record{,_nolock}().  The
#         implementation is quite rudimentary for now with lots of things not
#         implemented yet but I am not sure any of them can actually occur so
#         I will wait for people to hit each one and only then implement it.
#   
#   Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
# 
# fs/ntfs/mft.h
#   2004/05/28 12:29:29+01:00 aia21@cantab.net +35 -0
#   Add write_mft_record{,_nolock}().
# 
# fs/ntfs/mft.c
#   2004/05/28 12:29:29+01:00 aia21@cantab.net +387 -0
#   Add write_mft_record{,_nolock}().
# 
# fs/ntfs/compress.c
#   2004/05/28 12:29:29+01:00 aia21@cantab.net +2 -2
#   Error messages typo fixes.
# 
# fs/ntfs/attrib.c
#   2004/05/28 12:29:29+01:00 aia21@cantab.net +2 -2
#   Debug and error messages typo fixes.
# 
# fs/ntfs/aops.c
#   2004/05/28 12:29:29+01:00 aia21@cantab.net +1 -1
#   Debug message typo fix.
# 
# fs/ntfs/Makefile
#   2004/05/28 12:29:29+01:00 aia21@cantab.net +1 -1
#   Update.
# 
# fs/ntfs/ChangeLog
#   2004/05/28 12:29:29+01:00 aia21@cantab.net +15 -0
#   Update.
# 
diff -Nru a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
--- a/Documentation/filesystems/ntfs.txt	2004-06-08 21:54:24 -07:00
+++ b/Documentation/filesystems/ntfs.txt	2004-06-08 21:54:24 -07:00
@@ -273,6 +273,19 @@
 
 Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 
+2.1.13:
+	- Implement writing of inodes (access time updates are not implemented
+	  yet so mounting with -o noatime,nodiratime is enforced).
+	- Enable writing out of resident files so you can now overwrite any
+	  uncompressed, unencrypted, nonsparse file as long as you do not
+	  change the file size.
+	- Add housekeeping of ntfs system files so that ntfsfix no longer needs
+	  to be run after writing to an NTFS volume.
+	  NOTE:  This still leaves quota tracking and user space journalling on
+	  the side but they should not cause data corruption.  In the worst
+	  case the charged quotas will be out of date ($Quota) and some
+	  userspace applications might get confused due to the out of date
+	  userspace journal ($UsnJrnl).
 2.1.12:
 	- Fix the second fix to the decompression engine from the 2.1.9 release
 	  and some further internals cleanups.
diff -Nru a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
--- a/fs/ntfs/ChangeLog	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/ChangeLog	2004-06-08 21:54:24 -07:00
@@ -1,4 +1,4 @@
-ToDo:
+ToDo/Notes:
 	- Find and fix bugs.
 	- Either invalidate quotas or update the quota charges on NTFS 3.x
 	  volumes with quota tracking enabled ($Quota).
@@ -11,8 +11,10 @@
 	  pages as nothing can dirty a page other than ourselves. Should this
 	  change, we will really need to roll our own ->set_page_dirty().
 	- Implement sops->dirty_inode() to implement {a,m,c}time updates and
-	  such things.
-	- Implement sops->write_inode().
+	  such things.  This should probably just flag the ntfs inode such that
+	  sops->write_inode(), i.e. ntfs_write_inode(), will copy the times
+	  when it is invoked rather than having to update the mft record
+	  every time.
 	- In between ntfs_prepare/commit_write, need exclusion between
 	  simultaneous file extensions. Need perhaps an NInoResizeUnderway()
 	  flag which we can set in ntfs_prepare_write() and clear again in
@@ -24,6 +26,61 @@
 	  OTOH, perhaps i_sem, which is held accross generic_file_write is
 	  sufficient for synchronisation here. We then just need to make sure
 	  ntfs_readpage/writepage/truncate interoperate properly with us.
+	- Implement mft.c::sync_mft_mirror_umount().  We currently will just
+	  leave the volume dirty on umount if the final iput(vol->mft_ino)
+	  causes a write of any mirrored mft records due to the mft mirror
+	  inode having been discarded already.  Whether this can actually ever
+	  happen is unclear however so it is worth waiting until someone hits
+	  the problem.
+	- Enable the code for setting the NT4 compatibility flag when we start
+	  making NTFS 1.2 specific modifications.
+
+2.1.13 - Enable overwriting of resident files and housekeeping of system files.
+
+	- Implement writing of mft records (fs/ntfs/mft.[hc]), which includes
+	  keeping the mft mirror in sync with the mft when mirrored mft records
+	  are written.  The functions are write_mft_record{,_nolock}().  The
+	  implementation is quite rudimentary for now with lots of things not
+	  implemented yet but I am not sure any of them can actually occur so
+	  I will wait for people to hit each one and only then implement it.
+	- Commit open system inodes at umount time.  This should make it
+	  virtually impossible for sync_mft_mirror_umount() to ever be needed.
+	- Implement ->write_inode (fs/ntfs/inode.c::ntfs_write_inode()) for the
+	  ntfs super operations.  This gives us inode writing via the VFS inode
+	  dirty code paths.  Note:  Access time updates are not implemented yet.
+	- Implement fs/ntfs/mft.[hc]::{,__}mark_mft_record_dirty() and make
+	  fs/ntfs/aops.c::ntfs_writepage() and ntfs_commit_write() use it, thus
+	  finally enabling resident file overwrite!  (-8  This also includes a
+	  placeholder for ->writepage (ntfs_mft_writepage()), which for now
+	  just redirties the page and returns.  Also, at umount time, we for
+	  now throw away all mft data page cache pages after the last call to
+	  ntfs_commit_inode() in the hope that all inodes will have been
+	  written out by then and hence no dirty (meta)data will be lost.  We
+	  also check for this case and emit an error message telling the user
+	  to run chkdsk.
+	- Use set_page_writeback() and end_page_writeback() in the resident
+	  attribute code path of fs/ntfs/aops.c::ntfs_writepage() otherwise
+	  the radix-tree tag PAGECACHE_TAG_DIRTY remains set even though the
+	  page is clean.
+	- Implement ntfs_mft_writepage() so it now checks if any of the mft
+	  records in the page are dirty and if so redirties the page and
+	  returns.  Otherwise it just returns (after doing set_page_writeback(),
+	  unlock_page(), end_page_writeback() or the radix-tree tag
+	  PAGECACHE_TAG_DIRTY remains set even though the page is clean), thus
+	  alowing the VM to do with the page as it pleases.  Also, at umount
+	  time, now only throw away dirty mft (meta)data pages if dirty inodes
+	  are present and ask the user to email us if they see this happening.
+	- Add functions ntfs_{clear,set}_volume_flags(), to modify the volume
+	  information flags (fs/ntfs/super.c).
+	- Mark the volume dirty when (re)mounting read-write and mark it clean
+	  when unmounting or remounting read-only.  If any volume errors are
+	  found, the volume is left marked dirty to force chkdsk to run.
+	- Add code to set the NT4 compatibility flag when (re)mounting
+	  read-write for newer NTFS versions but leave it commented out for now
+	  since we do not make any modifications that are NTFS 1.2 specific yet
+	  and since setting this flag breaks Captive-NTFS which is not nice.
+	  This code must be enabled once we start writing NTFS 1.2 specific
+	  changes otherwise Windows NTFS driver might crash / cause corruption.
 
 2.1.12 - Fix the second fix to the decompression engine and some cleanups.
 
diff -Nru a/fs/ntfs/Makefile b/fs/ntfs/Makefile
--- a/fs/ntfs/Makefile	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/Makefile	2004-06-08 21:54:24 -07:00
@@ -5,7 +5,7 @@
 ntfs-objs := aops.o attrib.o compress.o debug.o dir.o file.o inode.o mft.o \
 	     mst.o namei.o super.o sysctl.o unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.12\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.13\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff -Nru a/fs/ntfs/aops.c b/fs/ntfs/aops.c
--- a/fs/ntfs/aops.c	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/aops.c	2004-06-08 21:54:24 -07:00
@@ -478,8 +478,8 @@
 	ni = NTFS_I(vi);
 	vol = ni->vol;
 
-	ntfs_debug("Entering for inode %li, attribute type 0x%x, page index "
-			"0x%lx.\n", vi->i_ino, ni->type, page->index);
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
+			"0x%lx.", vi->i_ino, ni->type, page->index);
 
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(NInoMstProtected(ni));
@@ -778,9 +778,8 @@
  *
  * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
  * the data to the mft record (which at this stage is most likely in memory).
- * Thus, in this case, I/O is synchronous, as even if the mft record is not
- * cached at this point in time, we need to wait for it to be read in before we
- * can do the copy.
+ * The mft record is then marked dirty and written out asynchronously via the
+ * vfs inode dirty code path.
  *
  * Note the caller clears the page dirty flag before calling ntfs_writepage().
  *
@@ -875,16 +874,6 @@
 	BUG_ON(page_has_buffers(page));
 	BUG_ON(!PageUptodate(page));
 
-	// TODO: Consider using PageWriteback() + unlock_page() in 2.5 once the
-	// "VM fiddling has ended". Note, don't forget to replace all the
-	// unlock_page() calls further below with end_page_writeback() ones.
-	// FIXME: Make sure it is ok to SetPageError() on unlocked page under
-	// writeback before doing the change!
-#if 0
-	set_page_writeback(page);
-	unlock_page(page);
-#endif
-
 	if (!NInoAttr(ni))
 		base_ni = ni;
 	else
@@ -935,6 +924,14 @@
 		bytes = PAGE_CACHE_SIZE;
 
 	/*
+	 * Keep the VM happy.  This must be done otherwise the radix-tree tag
+	 * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
+	 */
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+	unlock_page(page);
+
+	/*
 	 * Here, we don't need to zero the out of bounds area everytime because
 	 * the below memcpy() already takes care of the mmap-at-end-of-file
 	 * requirements. If the file is converted to a non-resident one, then
@@ -948,7 +945,10 @@
 	 * expose data to userspace/disk which should never have been exposed.
 	 *
 	 * FIXME: Ensure that i_size increases do the zeroing/overwriting and
-	 * if we cannot guarantee that, then enable the zeroing below.
+	 * if we cannot guarantee that, then enable the zeroing below.  If the
+	 * zeroing below is enabled, we MUST move the unlock_page() from above
+	 * to after the kunmap_atomic(), i.e. just before the
+	 * end_page_writeback().
 	 */
 
 	kaddr = kmap_atomic(page, KM_USER0);
@@ -966,11 +966,10 @@
 #endif
 	kunmap_atomic(kaddr, KM_USER0);
 
-	unlock_page(page);
+	end_page_writeback(page);
 
-	// TODO: Mark mft record dirty so it gets written back.
-	ntfs_error(vi->i_sb, "Writing to resident files is not supported yet. "
-			"Wrote to memory only...");
+	/* Mark the mft record dirty, so it gets written back. */
+	mark_mft_record_dirty(ctx->ntfs_ino);
 
 	put_attr_search_ctx(ctx);
 	unmap_mft_record(base_ni);
@@ -1022,7 +1021,7 @@
 	ni = NTFS_I(vi);
 	vol = ni->vol;
 
-	ntfs_debug("Entering for inode %li, attribute type 0x%x, page index "
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
 			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
 			page->index, from, to);
 
@@ -1379,7 +1378,7 @@
 	struct inode *vi = page->mapping->host;
 	ntfs_inode   *ni = NTFS_I(vi);
 
-	ntfs_debug("Entering for inode %li, attribute type 0x%x, page index "
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
 			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
 			page->index, from, to);
 
@@ -1487,7 +1486,7 @@
 
 	vi = page->mapping->host;
 
-	ntfs_debug("Entering for inode %li, attribute type 0x%x, page index "
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
 			"0x%lx, from = %u, to = %u.", vi->i_ino,
 			NTFS_I(vi)->type, page->index, from, to);
 
@@ -1583,7 +1582,7 @@
 	vi = page->mapping->host;
 	ni = NTFS_I(vi);
 
-	ntfs_debug("Entering for inode %li, attribute type 0x%x, page index "
+	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
 			"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
 			page->index, from, to);
 
@@ -1734,9 +1733,8 @@
 	}
 	kunmap_atomic(kaddr, KM_USER0);
 
-	// TODO: Mark mft record dirty so it gets written back.
-	ntfs_error(vi->i_sb, "Writing to resident files is not supported yet. "
-			"Wrote to memory only...");
+	/* Mark the mft record dirty, so it gets written back. */
+	mark_mft_record_dirty(ctx->ntfs_ino);
 
 	put_attr_search_ctx(ctx);
 	unmap_mft_record(base_ni);
diff -Nru a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
--- a/fs/ntfs/attrib.c	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/attrib.c	2004-06-08 21:54:24 -07:00
@@ -624,7 +624,7 @@
 
 			if (drl[ds].vcn == marker_vcn) {
 				ntfs_debug("Old marker = 0x%llx, replacing "
-						"with LCN_ENOENT.\n",
+						"with LCN_ENOENT.",
 						(unsigned long long)
 						drl[ds].lcn);
 				drl[ds].lcn = (LCN)LCN_ENOENT;
@@ -1565,7 +1565,7 @@
 		goto do_next_attr_loop;
 	}
 	ntfs_error(base_ni->vol->sb, "Inode contains corrupt attribute list "
-			"attribute.\n");
+			"attribute.");
 	if (ni != base_ni) {
 		unmap_extent_mft_record(ni);
 		ctx->ntfs_ino = base_ni;
diff -Nru a/fs/ntfs/compress.c b/fs/ntfs/compress.c
--- a/fs/ntfs/compress.c	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/compress.c	2004-06-08 21:54:24 -07:00
@@ -433,7 +433,7 @@
 	goto do_next_tag;
 
 return_overflow:
-	ntfs_error(NULL, "Failed. Returning -EOVERFLOW.\n");
+	ntfs_error(NULL, "Failed. Returning -EOVERFLOW.");
 	goto return_error;
 }
 
@@ -851,7 +851,7 @@
 		if (err) {
 			ntfs_error(vol->sb, "ntfs_decompress() failed in inode "
 					"0x%lx with error code %i. Skipping "
-					"this compression block.\n",
+					"this compression block.",
 					ni->mft_no, -err);
 			/* Release the unfinished pages. */
 			for (; prev_cur_page < cur_page; prev_cur_page++) {
diff -Nru a/fs/ntfs/inode.c b/fs/ntfs/inode.c
--- a/fs/ntfs/inode.c	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/inode.c	2004-06-08 21:54:24 -07:00
@@ -1960,49 +1960,134 @@
 	return err;
 }
 
+/**
+ * ntfs_write_inode - write out a dirty inode
+ * @vi:		inode to write out
+ * @sync:	if true, write out synchronously
+ *
+ * Write out a dirty inode to disk including any extent inodes if present.
+ *
+ * If @sync is true, commit the inode to disk and wait for io completion.  This
+ * is done using write_mft_record().
+ *
+ * If @sync is false, just schedule the write to happen but do not wait for i/o
+ * completion.  In 2.6 kernels, scheduling usually happens just by virtue of
+ * marking the page (and in this case mft record) dirty but we do not implement
+ * this yet as write_mft_record() largely ignores the @sync parameter and
+ * always performs synchronous writes.
+ */
 void ntfs_write_inode(struct inode *vi, int sync)
 {
 	ntfs_inode *ni = NTFS_I(vi);
+#if 0
+	attr_search_context *ctx;
+#endif
+	MFT_RECORD *m;
+	int err = 0;
 
 	ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "",
 			vi->i_ino);
-
 	/*
 	 * Dirty attribute inodes are written via their real inodes so just
-	 * clean them here.
+	 * clean them here.  TODO:  Take care of access time updates.
 	 */
 	if (NInoAttr(ni)) {
 		NInoClearDirty(ni);
 		return;
 	}
-
-	/* Write this base mft record. */
-	if (NInoDirty(ni)) {
-		ntfs_warning(vi->i_sb, "Cleaning dirty inode 0x%lx without "
-				"writing to disk as this is not yet "
-				"implemented.", vi->i_ino);
-		NInoClearDirty(ni);
+	/* Map, pin, and lock the mft record belonging to the inode. */
+	m = map_mft_record(ni);
+	if (unlikely(IS_ERR(m))) {
+		err = PTR_ERR(m);
+		goto err_out;
 	}
-
+#if 0
+	/* Obtain the standard information attribute. */
+	ctx = get_attr_search_ctx(ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto unm_err_out;
+	}
+	if (unlikely(!lookup_attr(AT_STANDARD_INFORMATION, NULL, 0,
+			IGNORE_CASE, 0, NULL, 0, ctx))) {
+		put_attr_search_ctx(ctx);
+		err = -ENOENT;
+		goto unm_err_out;
+	}
+	// TODO:  Update the access times in the standard information attribute
+	// which is now in ctx->attr.
+	// - Probably want to have use sops->dirty_inode() to set a flag that
+	//   we need to update the times here rather than having to blindly do
+	//   it every time.  Or even don't do it here at all and do it in
+	//   sops->dirty_inode() instead.  Problem with this would be that
+	//   sops->dirty_inode() must be atomic under certain circumstances
+	//   and mapping mft records and such like is not atomic.
+	// - For atime updates also need to check whether they are enabled in
+	//   the superblock flags.
+	ntfs_warning(vi->i_sb, "Access time updates not implement yet.");
+	/*
+	 * We just modified the mft record containing the standard information
+	 * attribute.  So need to mark the mft record dirty, too, but we do it
+	 * manually so that mark_inode_dirty() is not called again.
+	 * TODO:  Only do this if there was a change in any of the times!
+	 */
+	if (!NInoTestSetDirty(ctx->ntfs_ino))
+		__set_page_dirty_nobuffers(ctx->ntfs_ino->page);
+	put_attr_search_ctx(ctx);
+#endif
+	/* Write this base mft record. */
+	if (NInoDirty(ni))
+		err = write_mft_record(ni, m, sync);
 	/* Write all attached extent mft records. */
 	down(&ni->extent_lock);
 	if (ni->nr_extents > 0) {
-		int i;
 		ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
+		int i;
 
+		ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
 		for (i = 0; i < ni->nr_extents; i++) {
 			ntfs_inode *tni = extent_nis[i];
 
 			if (NInoDirty(tni)) {
-				ntfs_warning(vi->i_sb, "Cleaning dirty extent "
-						"inode 0x%lx without writing "
-						"to disk as this is not yet "
-						"implemented.", tni->mft_no);
-				NInoClearDirty(tni);
+				MFT_RECORD *tm = map_mft_record(tni);
+				int ret;
+
+				if (unlikely(IS_ERR(tm))) {
+					if (!err || err == -ENOMEM)
+						err = PTR_ERR(tm);
+					continue;
+				}
+				ret = write_mft_record(tni, tm, sync);
+				unmap_mft_record(tni);
+				if (unlikely(ret)) {
+					if (!err || err == -ENOMEM)
+						err = ret;
+				}
 			}
 		}
 	}
 	up(&ni->extent_lock);
+	unmap_mft_record(ni);
+	if (unlikely(err))
+		goto err_out;
+	ntfs_debug("Done.");
+	return;
+#if 0
+unm_err_out:
+	unmap_mft_record(ni);
+#endif
+err_out:
+	if (err == -ENOMEM) {
+		ntfs_warning(vi->i_sb, "Not enough memory to write inode.  "
+				"Marking the inode dirty again, so the VFS "
+				"retries later.");
+		mark_inode_dirty(vi);
+	} else {
+		ntfs_error(vi->i_sb, "Failed (error code %i):  Marking inode "
+				"as bad.  You should run chkdsk.", -err);
+		make_bad_inode(vi);
+	}
+	return;
 }
 
 #endif /* NTFS_RW */
diff -Nru a/fs/ntfs/mft.c b/fs/ntfs/mft.c
--- a/fs/ntfs/mft.c	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/mft.c	2004-06-08 21:54:24 -07:00
@@ -102,6 +102,13 @@
  */
 extern int ntfs_readpage(struct file *, struct page *);
 
+#ifdef NTFS_RW
+/**
+ * ntfs_mft_writepage - forward declaration, function is further below
+ */
+static int ntfs_mft_writepage(struct page *page, struct writeback_control *wbc);
+#endif /* NTFS_RW */
+
 /**
  * ntfs_mft_aops - address space operations for access to $MFT
  *
@@ -112,6 +119,10 @@
 	.readpage	= ntfs_readpage,	/* Fill page with data. */
 	.sync_page	= block_sync_page,	/* Currently, just unplugs the
 						   disk request queue. */
+#ifdef NTFS_RW
+	.writepage	= ntfs_mft_writepage,	/* Write out the dirty mft
+						   records in a page. */
+#endif /* NTFS_RW */
 };
 
 /**
@@ -429,3 +440,654 @@
 		ntfs_clear_extent_inode(ni);
 	return m;
 }
+
+#ifdef NTFS_RW
+
+/**
+ * __mark_mft_record_dirty - set the mft record and the page containing it dirty
+ * @ni:		ntfs inode describing the mapped mft record
+ *
+ * Internal function.  Users should call mark_mft_record_dirty() instead.
+ *
+ * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni,
+ * as well as the page containing the mft record, dirty.  Also, mark the base
+ * vfs inode dirty.  This ensures that any changes to the mft record are
+ * written out to disk.
+ *
+ * NOTE:  We only set I_DIRTY_SYNC and I_DIRTY_DATASYNC (and not I_DIRTY_PAGES)
+ * on the base vfs inode, because even though file data may have been modified,
+ * it is dirty in the inode meta data rather than the data page cache of the
+ * inode, and thus there are no data pages that need writing out.  Therefore, a
+ * full mark_inode_dirty() is overkill.  A mark_inode_dirty_sync(), on the
+ * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to
+ * ensure ->write_inode is called from generic_osync_inode() and this needs to
+ * happen or the file data would not necessarily hit the device synchronously,
+ * even though the vfs inode has the O_SYNC flag set.  Also, I_DIRTY_DATASYNC
+ * simply "feels" better than just I_DIRTY_SYNC, since the file data has not
+ * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own
+ * would suggest.
+ */
+void __mark_mft_record_dirty(ntfs_inode *ni)
+{
+	struct page *page = ni->page;
+	ntfs_inode *base_ni;
+
+	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
+	BUG_ON(!page);
+	BUG_ON(NInoAttr(ni));
+
+	/*
+	 * Set the page containing the mft record dirty.  This also marks the
+	 * $MFT inode dirty (I_DIRTY_PAGES).
+	 */
+	__set_page_dirty_nobuffers(page);
+
+	/* Determine the base vfs inode and mark it dirty, too. */
+	down(&ni->extent_lock);
+	if (likely(ni->nr_extents >= 0))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	up(&ni->extent_lock);
+	__mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+}
+
+static const char *ntfs_please_email = "Please email "
+		"linux-ntfs-dev@lists.sourceforge.net and say that you saw "
+		"this message.  Thank you.";
+
+/**
+ * sync_mft_mirror_umount - synchronise an mft record to the mft mirror
+ * @ni:		ntfs inode whose mft record to synchronize
+ * @m:		mapped, mst protected (extent) mft record to synchronize
+ *
+ * Write the mapped, mst protected (extent) mft record @m described by the
+ * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr) bypassing
+ * the page cache and the $MFTMirr inode itself.
+ *
+ * This function is only for use at umount time when the mft mirror inode has
+ * already been disposed off.  We BUG() if we are called while the mft mirror
+ * inode is still attached to the volume.
+ *
+ * On success return 0.  On error return -errno.
+ *
+ * NOTE:  This function is not implemented yet as I am not convinced it can
+ * actually be triggered considering the sequence of commits we do in super.c::
+ * ntfs_put_super().  But just in case we provide this place holder as the
+ * alternative would be either to BUG() or to get a NULL pointer dereference
+ * and Oops.
+ */
+static int sync_mft_mirror_umount(ntfs_inode *ni, MFT_RECORD *m)
+{
+	ntfs_volume *vol = ni->vol;
+
+	BUG_ON(vol->mftmirr_ino);
+	ntfs_error(vol->sb, "Umount time mft mirror syncing is not "
+			"implemented yet.  %s", ntfs_please_email);
+	return -EOPNOTSUPP;
+}
+
+/**
+ * sync_mft_mirror - synchronize an mft record to the mft mirror
+ * @ni:		ntfs inode whose mft record to synchronize
+ * @m:		mapped, mst protected (extent) mft record to synchronize
+ * @sync:	if true, wait for i/o completion
+ *
+ * Write the mapped, mst protected (extent) mft record @m described by the
+ * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr).
+ *
+ * On success return 0.  On error return -errno and set the volume errors flag
+ * in the ntfs_volume to which @ni belongs.
+ *
+ * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
+ *
+ * TODO:  If @sync is false, want to do truly asynchronous i/o, i.e. just
+ * schedule i/o via ->writepage or do it via kntfsd or whatever.
+ */
+static int sync_mft_mirror(ntfs_inode *ni, MFT_RECORD *m, int sync)
+{
+	ntfs_volume *vol = ni->vol;
+	struct page *page;
+	unsigned int blocksize = vol->sb->s_blocksize;
+	int max_bhs = vol->mft_record_size / blocksize;
+	struct buffer_head *bhs[max_bhs];
+	struct buffer_head *bh, *head;
+	u8 *kmirr;
+	unsigned int block_start, block_end, m_start, m_end;
+	int i_bhs, nr_bhs, err = 0;
+
+	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
+	BUG_ON(!max_bhs);
+	if (unlikely(!vol->mftmirr_ino)) {
+		/* This could happen during umount... */
+		err = sync_mft_mirror_umount(ni, m);
+		if (likely(!err))
+			return err;
+		goto err_out;
+	}
+	/* Get the page containing the mirror copy of the mft record @m. */
+	page = ntfs_map_page(vol->mftmirr_ino->i_mapping, ni->mft_no >>
+			(PAGE_CACHE_SHIFT - vol->mft_record_size_bits));
+	if (unlikely(IS_ERR(page))) {
+		ntfs_error(vol->sb, "Failed to map mft mirror page.");
+		err = PTR_ERR(page);
+		goto err_out;
+	}
+	/*
+	 * Exclusion against other writers.   This should never be a problem
+	 * since the page in which the mft record @m resides is also locked and
+	 * hence any other writers would be held up there but it is better to
+	 * make sure no one is writing from elsewhere.
+	 */
+	lock_page(page);
+	/* The address in the page of the mirror copy of the mft record @m. */
+	kmirr = page_address(page) + ((ni->mft_no << vol->mft_record_size_bits)
+			& ~PAGE_CACHE_MASK);
+	/* Copy the mst protected mft record to the mirror. */
+	memcpy(kmirr, m, vol->mft_record_size);
+	/* Make sure we have mapped buffers. */
+	if (!page_has_buffers(page)) {
+no_buffers_err_out:
+		ntfs_error(vol->sb, "Writing mft mirror records without "
+				"existing buffers is not implemented yet.  %s",
+				ntfs_please_email);
+		err = -EOPNOTSUPP;
+		goto unlock_err_out;
+	}
+	bh = head = page_buffers(page);
+	if (!bh)
+		goto no_buffers_err_out;
+	nr_bhs = 0;
+	block_start = 0;
+	m_start = kmirr - (u8*)page_address(page);
+	m_end = m_start + vol->mft_record_size;
+	do {
+		block_end = block_start + blocksize;
+		/*
+		 * If the buffer is outside the mft record, just skip it,
+		 * clearing it if it is dirty to make sure it is not written
+		 * out.  It should never be marked dirty but better be safe.
+		 */
+		if ((block_end <= m_start) || (block_start >= m_end)) {
+			if (buffer_dirty(bh)) {
+				ntfs_warning(vol->sb, "Clearing dirty mft "
+						"record page buffer.  %s",
+						ntfs_please_email);
+				clear_buffer_dirty(bh);
+			}
+			continue;
+		}
+		if (!buffer_mapped(bh)) {
+			ntfs_error(vol->sb, "Writing mft mirror records "
+					"without existing mapped buffers is "
+					"not implemented yet.  %s",
+					ntfs_please_email);
+			err = -EOPNOTSUPP;
+			continue;
+		}
+		if (!buffer_uptodate(bh)) {
+			ntfs_error(vol->sb, "Writing mft mirror records "
+					"without existing uptodate buffers is "
+					"not implemented yet.  %s",
+					ntfs_please_email);
+			err = -EOPNOTSUPP;
+			continue;
+		}
+		BUG_ON(!nr_bhs && (m_start != block_start));
+		BUG_ON(nr_bhs >= max_bhs);
+		bhs[nr_bhs++] = bh;
+		BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
+	} while (block_start = block_end, (bh = bh->b_this_page) != head);
+	if (likely(!err)) {
+		/* Lock buffers and start synchronous write i/o on them. */
+		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+			struct buffer_head *tbh = bhs[i_bhs];
+
+			if (unlikely(test_set_buffer_locked(tbh)))
+				BUG();
+			BUG_ON(!buffer_uptodate(tbh));
+			if (buffer_dirty(tbh))
+				clear_buffer_dirty(tbh);
+			get_bh(tbh);
+			tbh->b_end_io = end_buffer_write_sync;
+			submit_bh(WRITE, tbh);
+		}
+		/* Wait on i/o completion of buffers. */
+		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+			struct buffer_head *tbh = bhs[i_bhs];
+
+			wait_on_buffer(tbh);
+			if (unlikely(!buffer_uptodate(tbh))) {
+				err = -EIO;
+				/*
+				 * Set the buffer uptodate so the page & buffer
+				 * states don't become out of sync.
+				 */
+				if (PageUptodate(page))
+					set_buffer_uptodate(tbh);
+			}
+		}
+	} else /* if (unlikely(err)) */ {
+		/* Clean the buffers. */
+		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
+			clear_buffer_dirty(bhs[i_bhs]);
+	}
+unlock_err_out:
+	/* Current state: all buffers are clean, unlocked, and uptodate. */
+	/* Remove the mst protection fixups again. */
+	post_write_mst_fixup((NTFS_RECORD*)kmirr);
+	flush_dcache_page(page);
+	unlock_page(page);
+	ntfs_unmap_page(page);
+	if (unlikely(err)) {
+		/* I/O error during writing.  This is really bad! */
+		ntfs_error(vol->sb, "I/O error while writing mft mirror "
+				"record 0x%lx!  You should unmount the volume "
+				"and run chkdsk or ntfsfix.", ni->mft_no);
+		goto err_out;
+	}
+	ntfs_debug("Done.");
+	return 0;
+err_out:
+	ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error code %i).  "
+			"Volume will be left marked dirty on umount.  Run "
+			"ntfsfix on the partition after umounting to correct "
+			"this.", -err);
+	/* We don't want to clear the dirty bit on umount. */
+	NVolSetErrors(vol);
+	return err;
+}
+
+/**
+ * write_mft_record_nolock - write out a mapped (extent) mft record
+ * @ni:		ntfs inode describing the mapped (extent) mft record
+ * @m:		mapped (extent) mft record to write
+ * @sync:	if true, wait for i/o completion
+ *
+ * Write the mapped (extent) mft record @m described by the (regular or extent)
+ * ntfs inode @ni to backing store.  If the mft record @m has a counterpart in
+ * the mft mirror, that is also updated.
+ *
+ * On success, clean the mft record and return 0.  On error, leave the mft
+ * record dirty and return -errno.  The caller should call make_bad_inode() on
+ * the base inode to ensure no more access happens to this inode.  We do not do
+ * it here as the caller may want to finish writing other extent mft records
+ * first to minimize on-disk metadata inconsistencies.
+ *
+ * NOTE:  We always perform synchronous i/o and ignore the @sync parameter.
+ * However, if the mft record has a counterpart in the mft mirror and @sync is
+ * true, we write the mft record, wait for i/o completion, and only then write
+ * the mft mirror copy.  This ensures that if the system crashes either the mft
+ * or the mft mirror will contain a self-consistent mft record @m.  If @sync is
+ * false on the other hand, we start i/o on both and then wait for completion
+ * on them.  This provides a speedup but no longer guarantees that you will end
+ * up with a self-consistent mft record in the case of a crash but if you asked
+ * for asynchronous writing you probably do not care about that anyway.
+ *
+ * TODO:  If @sync is false, want to do truly asynchronous i/o, i.e. just
+ * schedule i/o via ->writepage or do it via kntfsd or whatever.
+ */
+int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
+{
+	ntfs_volume *vol = ni->vol;
+	struct page *page = ni->page;
+	unsigned int blocksize = vol->sb->s_blocksize;
+	int max_bhs = vol->mft_record_size / blocksize;
+	struct buffer_head *bhs[max_bhs];
+	struct buffer_head *bh, *head;
+	unsigned int block_start, block_end, m_start, m_end;
+	int i_bhs, nr_bhs, err = 0;
+
+	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
+	BUG_ON(NInoAttr(ni));
+	BUG_ON(!max_bhs);
+	BUG_ON(!page);
+	BUG_ON(!PageLocked(page));
+	/*
+	 * If the ntfs_inode is clean no need to do anything.  If it is dirty,
+	 * mark it as clean now so that it can be redirtied later on if needed.
+	 * There is no danger of races as as long as the caller is holding the
+	 * locks for the mft record @m and the page it is in.
+	 */
+	if (!NInoTestClearDirty(ni))
+		goto done;
+	/* Make sure we have mapped buffers. */
+	if (!page_has_buffers(page)) {
+no_buffers_err_out:
+		ntfs_error(vol->sb, "Writing mft records without existing "
+				"buffers is not implemented yet.  %s",
+				ntfs_please_email);
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+	bh = head = page_buffers(page);
+	if (!bh)
+		goto no_buffers_err_out;
+	nr_bhs = 0;
+	block_start = 0;
+	m_start = ni->page_ofs;
+	m_end = m_start + vol->mft_record_size;
+	do {
+		block_end = block_start + blocksize;
+		/*
+		 * If the buffer is outside the mft record, just skip it,
+		 * clearing it if it is dirty to make sure it is not written
+		 * out.  It should never be marked dirty but better be safe.
+		 */
+		if ((block_end <= m_start) || (block_start >= m_end)) {
+			if (buffer_dirty(bh)) {
+				ntfs_warning(vol->sb, "Clearing dirty mft "
+						"record page buffer.  %s",
+						ntfs_please_email);
+				clear_buffer_dirty(bh);
+			}
+			continue;
+		}
+		if (!buffer_mapped(bh)) {
+			ntfs_error(vol->sb, "Writing mft records without "
+					"existing mapped buffers is not "
+					"implemented yet.  %s",
+					ntfs_please_email);
+			err = -EOPNOTSUPP;
+			continue;
+		}
+		if (!buffer_uptodate(bh)) {
+			ntfs_error(vol->sb, "Writing mft records without "
+					"existing uptodate buffers is not "
+					"implemented yet.  %s",
+					ntfs_please_email);
+			err = -EOPNOTSUPP;
+			continue;
+		}
+		BUG_ON(!nr_bhs && (m_start != block_start));
+		BUG_ON(nr_bhs >= max_bhs);
+		bhs[nr_bhs++] = bh;
+		BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));
+	} while (block_start = block_end, (bh = bh->b_this_page) != head);
+	if (unlikely(err))
+		goto cleanup_out;
+	/* Apply the mst protection fixups. */
+	err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size);
+	if (err) {
+		ntfs_error(vol->sb, "Failed to apply mst fixups!");
+		goto cleanup_out;
+	}
+	flush_dcache_mft_record_page(ni);
+	/* Lock buffers and start synchronous write i/o on them. */
+	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+		struct buffer_head *tbh = bhs[i_bhs];
+
+		if (unlikely(test_set_buffer_locked(tbh)))
+			BUG();
+		BUG_ON(!buffer_uptodate(tbh));
+		if (buffer_dirty(tbh))
+			clear_buffer_dirty(tbh);
+		get_bh(tbh);
+		tbh->b_end_io = end_buffer_write_sync;
+		submit_bh(WRITE, tbh);
+	}
+	/* Synchronize the mft mirror now if not @sync. */
+	if (!sync && ni->mft_no < vol->mftmirr_size)
+		sync_mft_mirror(ni, m, sync);
+	/* Wait on i/o completion of buffers. */
+	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
+		struct buffer_head *tbh = bhs[i_bhs];
+
+		wait_on_buffer(tbh);
+		if (unlikely(!buffer_uptodate(tbh))) {
+			err = -EIO;
+			/*
+			 * Set the buffer uptodate so the page & buffer states
+			 * don't become out of sync.
+			 */
+			if (PageUptodate(page))
+				set_buffer_uptodate(tbh);
+		}
+	}
+	/* If @sync, now synchronize the mft mirror. */
+	if (sync && ni->mft_no < vol->mftmirr_size)
+		sync_mft_mirror(ni, m, sync);
+	/* Remove the mst protection fixups again. */
+	post_write_mst_fixup((NTFS_RECORD*)m);
+	flush_dcache_mft_record_page(ni);
+	if (unlikely(err)) {
+		/* I/O error during writing.  This is really bad! */
+		ntfs_error(vol->sb, "I/O error while writing mft record "
+				"0x%lx!  Marking base inode as bad.  You "
+				"should unmount the volume and run chkdsk.",
+				ni->mft_no);
+		goto err_out;
+	}
+done:
+	ntfs_debug("Done.");
+	return 0;
+cleanup_out:
+	/* Clean the buffers. */
+	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)
+		clear_buffer_dirty(bhs[i_bhs]);
+err_out:
+	/*
+	 * Current state: all buffers are clean, unlocked, and uptodate.
+	 * The caller should mark the base inode as bad so that no more i/o
+	 * happens.  ->clear_inode() will still be invoked so all extent inodes
+	 * and other allocated memory will be freed.
+	 */
+	if (err == -ENOMEM) {
+		ntfs_error(vol->sb, "Not enough memory to write mft record.  "
+				"Redirtying so the write is retried later.");
+		mark_mft_record_dirty(ni);
+		err = 0;
+	}
+	return err;
+}
+
+/**
+ * ntfs_mft_writepage - check if a metadata page contains dirty mft records
+ * @page:	metadata page possibly containing dirty mft records
+ * @wbc:	writeback control structure
+ *
+ * This is called from the VM when it wants to have a dirty $MFT/$DATA metadata
+ * page cache page cleaned.  The VM has already locked the page and marked it
+ * clean.  Instead of writing the page as a conventional ->writepage function
+ * would do, we check if the page still contains any dirty mft records (it must
+ * have done at some point in the past since the page was marked dirty) and if
+ * none are found, i.e. all mft records are clean, we unlock the page and
+ * return.  The VM is then free to do with the page as it pleases.  If on the
+ * other hand we do find any dirty mft records in the page, we redirty the page
+ * before unlocking it and returning so the VM knows that the page is still
+ * busy and cannot be thrown out.
+ *
+ * Note, we do not actually write any dirty mft records here because they are
+ * dirty inodes and hence will be written by the VFS inode dirty code paths.
+ * There is no need to write them from the VM page dirty code paths, too and in
+ * fact once we implement journalling it would be a complete nightmare having
+ * two code paths leading to mft record writeout.
+ */
+static int ntfs_mft_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *mft_vi = page->mapping->host;
+	struct super_block *sb = mft_vi->i_sb;
+	ntfs_volume *vol = NTFS_SB(sb);
+	u8 *maddr;
+	MFT_RECORD *m;
+	ntfs_inode **extent_nis;
+	unsigned long mft_no;
+	int nr, i, j;
+	BOOL is_dirty = FALSE;
+
+	BUG_ON(mft_vi != vol->mft_ino);
+	/* The first mft record number in the page. */
+	mft_no = page->index << (PAGE_CACHE_SHIFT - vol->mft_record_size_bits);
+	/* Number of mft records in the page. */
+	nr = PAGE_CACHE_SIZE >> vol->mft_record_size_bits;
+	BUG_ON(!nr);
+	ntfs_debug("Entering for %i inodes starting at 0x%lx.", nr, mft_no);
+	/* Iterate over the mft records in the page looking for a dirty one. */
+	maddr = (u8*)kmap(page);
+	for (i = 0; i < nr; ++i, ++mft_no, maddr += vol->mft_record_size) {
+		struct inode *vi;
+		ntfs_inode *ni, *eni;
+		ntfs_attr na;
+
+		na.mft_no = mft_no;
+		na.name = NULL;
+		na.name_len = 0;
+		na.type = AT_UNUSED;
+		/*
+		 * Check if the inode corresponding to this mft record is in
+		 * the VFS inode cache and obtain a reference to it if it is.
+		 */
+		ntfs_debug("Looking for inode 0x%lx in icache.", mft_no);
+		/*
+		 * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from
+		 * here or we deadlock because the inode is already locked by
+		 * the kernel (fs/fs-writeback.c::__sync_single_inode()) and
+		 * ilookup5() waits until the inode is unlocked before
+		 * returning it and it never gets unlocked because
+		 * ntfs_mft_writepage() never returns.  )-:  Fortunately, we
+		 * have inode 0 pinned in icache for the duration of the mount
+		 * so we can access it directly.
+		 */
+		if (!mft_no) {
+			/* Balance the below iput(). */
+			vi = igrab(mft_vi);
+			BUG_ON(vi != mft_vi);
+		} else
+			vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na);
+		if (vi) {
+			ntfs_debug("Inode 0x%lx is in icache.", mft_no);
+			/* The inode is in icache.  Check if it is dirty. */
+			ni = NTFS_I(vi);
+			if (!NInoDirty(ni)) {
+				/* The inode is not dirty, skip this record. */
+				ntfs_debug("Inode 0x%lx is not dirty, "
+						"continuing search.", mft_no);
+				iput(vi);
+				continue;
+			}
+			ntfs_debug("Inode 0x%lx is dirty, aborting search.",
+					mft_no);
+			/* The inode is dirty, no need to search further. */
+			iput(vi);
+			is_dirty = TRUE;
+			break;
+		}
+		ntfs_debug("Inode 0x%lx is not in icache.", mft_no);
+		/* The inode is not in icache. */
+		/* Skip the record if it is not a mft record (type "FILE"). */
+		if (!ntfs_is_mft_recordp(maddr)) {
+			ntfs_debug("Mft record 0x%lx is not a FILE record, "
+					"continuing search.", mft_no);
+			continue;
+		}
+		m = (MFT_RECORD*)maddr;
+		/*
+		 * Skip the mft record if it is not in use.  FIXME:  What about
+		 * deleted/deallocated (extent) inodes?  (AIA)
+		 */
+		if (!(m->flags & MFT_RECORD_IN_USE)) {
+			ntfs_debug("Mft record 0x%lx is not in use, "
+					"continuing search.", mft_no);
+			continue;
+		}
+		/* Skip the mft record if it is a base inode. */
+		if (!m->base_mft_record) {
+			ntfs_debug("Mft record 0x%lx is a base record, "
+					"continuing search.", mft_no);
+			continue;
+		}
+		/*
+		 * This is an extent mft record.  Check if the inode
+		 * corresponding to its base mft record is in icache.
+		 */
+		na.mft_no = MREF_LE(m->base_mft_record);
+		ntfs_debug("Mft record 0x%lx is an extent record.  Looking "
+				"for base inode 0x%lx in icache.", mft_no,
+				na.mft_no);
+		vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode,
+				&na);
+		if (!vi) {
+			/*
+			 * The base inode is not in icache.  Skip this extent
+			 * mft record.
+			 */
+			ntfs_debug("Base inode 0x%lx is not in icache, "
+					"continuing search.", na.mft_no);
+			continue;
+		}
+		ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no);
+		/*
+		 * The base inode is in icache.  Check if it has the extent
+		 * inode corresponding to this extent mft record attached.
+		 */
+		ni = NTFS_I(vi);
+		down(&ni->extent_lock);
+		if (ni->nr_extents <= 0) {
+			/*
+			 * The base inode has no attached extent inodes.  Skip
+			 * this extent mft record.
+			 */
+			up(&ni->extent_lock);
+			iput(vi);
+			continue;
+		}
+		/* Iterate over the attached extent inodes. */
+		extent_nis = ni->ext.extent_ntfs_inos;
+		for (eni = NULL, j = 0; j < ni->nr_extents; ++j) {
+			if (mft_no == extent_nis[j]->mft_no) {
+				/*
+				 * Found the extent inode corresponding to this
+				 * extent mft record.
+				 */
+				eni = extent_nis[j];
+				break;
+			}
+		}
+		/*
+		 * If the extent inode was not attached to the base inode, skip
+		 * this extent mft record.
+		 */
+		if (!eni) {
+			up(&ni->extent_lock);
+			iput(vi);
+			continue;
+		}
+		/*
+		 * Found the extent inode corrsponding to this extent mft
+		 * record.  If it is dirty, no need to search further.
+		 */
+		if (NInoDirty(eni)) {
+			up(&ni->extent_lock);
+			iput(vi);
+			is_dirty = TRUE;
+			break;
+		}
+		/* The extent inode is not dirty, so do the next record. */
+		up(&ni->extent_lock);
+		iput(vi);
+	}
+	kunmap(page);
+	/* If a dirty mft record was found, redirty the page. */
+	if (is_dirty) {
+		ntfs_debug("Inode 0x%lx is dirty.  Redirtying the page "
+				"starting at inode 0x%lx.", mft_no,
+				page->index << (PAGE_CACHE_SHIFT -
+				vol->mft_record_size_bits));
+		redirty_page_for_writepage(wbc, page);
+		unlock_page(page);
+	} else {
+		/*
+		 * Keep the VM happy.  This must be done otherwise the
+		 * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
+		 * the page is clean.
+		 */
+		BUG_ON(PageWriteback(page));
+		set_page_writeback(page);
+		unlock_page(page);
+		end_page_writeback(page);
+	}
+	ntfs_debug("Done.");
+	return 0;
+}
+
+#endif /* NTFS_RW */
diff -Nru a/fs/ntfs/mft.h b/fs/ntfs/mft.h
--- a/fs/ntfs/mft.h	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/mft.h	2004-06-08 21:54:24 -07:00
@@ -57,6 +57,60 @@
 	flush_dcache_page(ni->page);
 }
 
+extern void __mark_mft_record_dirty(ntfs_inode *ni);
+
+/**
+ * mark_mft_record_dirty - set the mft record and the page containing it dirty
+ * @ni:		ntfs inode describing the mapped mft record
+ *
+ * Set the mapped (extent) mft record of the (base or extent) ntfs inode @ni,
+ * as well as the page containing the mft record, dirty.  Also, mark the base
+ * vfs inode dirty.  This ensures that any changes to the mft record are
+ * written out to disk.
+ *
+ * NOTE:  Do not do anything if the mft record is already marked dirty.
+ */
+static inline void mark_mft_record_dirty(ntfs_inode *ni)
+{
+	if (!NInoTestSetDirty(ni))
+		__mark_mft_record_dirty(ni);
+}
+
+extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync);
+
+/**
+ * write_mft_record - write out a mapped (extent) mft record
+ * @ni:		ntfs inode describing the mapped (extent) mft record
+ * @m:		mapped (extent) mft record to write
+ * @sync:	if true, wait for i/o completion
+ *
+ * This is just a wrapper for write_mft_record_nolock() (see mft.c), which
+ * locks the page for the duration of the write.  This ensures that there are
+ * no race conditions between writing the mft record via the dirty inode code
+ * paths and via the page cache write back code paths or between writing
+ * neighbouring mft records residing in the same page.
+ *
+ * Locking the page also serializes us against ->readpage() if the page is not
+ * uptodate.
+ *
+ * On success, clean the mft record and return 0.  On error, leave the mft
+ * record dirty and return -errno.  The caller should call make_bad_inode() on
+ * the base inode to ensure no more access happens to this inode.  We do not do
+ * it here as the caller may want to finish writing other extent mft records
+ * first to minimize on-disk metadata inconsistencies.
+ */
+static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync)
+{
+	struct page *page = ni->page;
+	int err;
+
+	BUG_ON(!page);
+	lock_page(page);
+	err = write_mft_record_nolock(ni, m, sync);
+	unlock_page(page);
+	return err;
+}
+
 #endif /* NTFS_RW */
 
 #endif /* _LINUX_NTFS_MFT_H */
diff -Nru a/fs/ntfs/super.c b/fs/ntfs/super.c
--- a/fs/ntfs/super.c	2004-06-08 21:54:24 -07:00
+++ b/fs/ntfs/super.c	2004-06-08 21:54:24 -07:00
@@ -291,6 +291,101 @@
 	return FALSE;
 }
 
+#ifdef NTFS_RW
+
+/**
+ * ntfs_write_volume_flags - write new flags to the volume information flags
+ * @vol:	ntfs volume on which to modify the flags
+ * @flags:	new flags value for the volume information flags
+ *
+ * Internal function.  You probably want to use ntfs_{set,clear}_volume_flags()
+ * instead (see below).
+ *
+ * Replace the volume information flags on the volume @vol with the value
+ * supplied in @flags.  Note, this overwrites the volume information flags, so
+ * make sure to combine the flags you want to modify with the old flags and use
+ * the result when calling ntfs_write_volume_flags().
+ *
+ * Return 0 on success and -errno on error.
+ */
+static int ntfs_write_volume_flags(ntfs_volume *vol, const VOLUME_FLAGS flags)
+{
+	ntfs_inode *ni = NTFS_I(vol->vol_ino);
+	MFT_RECORD *m;
+	VOLUME_INFORMATION *vi;
+	attr_search_context *ctx;
+	int err;
+
+	ntfs_debug("Entering, old flags = 0x%x, new flags = 0x%x.",
+			vol->vol_flags, flags);
+	if (vol->vol_flags == flags)
+		goto done;
+	BUG_ON(!ni);
+	m = map_mft_record(ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		goto err_out;
+	}
+	ctx = get_attr_search_ctx(ni, m);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto put_unm_err_out;
+	}
+	if (!lookup_attr(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0, ctx)) {
+		err = -EIO;
+		goto put_unm_err_out;
+	}
+	vi = (VOLUME_INFORMATION*)((u8*)ctx->attr +
+			le16_to_cpu(ctx->attr->data.resident.value_offset));
+	vol->vol_flags = vi->flags = flags;
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	put_attr_search_ctx(ctx);
+	unmap_mft_record(ni);
+done:
+	ntfs_debug("Done.");
+	return 0;
+put_unm_err_out:
+	if (ctx)
+		put_attr_search_ctx(ctx);
+	unmap_mft_record(ni);
+err_out:
+	ntfs_error(vol->sb, "Failed with error code %i.", -err);
+	return err;
+}
+
+/**
+ * ntfs_set_volume_flags - set bits in the volume information flags
+ * @vol:	ntfs volume on which to modify the flags
+ * @flags:	flags to set on the volume
+ *
+ * Set the bits in @flags in the volume information flags on the volume @vol.
+ *
+ * Return 0 on success and -errno on error.
+ */
+static inline int ntfs_set_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags)
+{
+	flags &= VOLUME_FLAGS_MASK;
+	return ntfs_write_volume_flags(vol, vol->vol_flags | flags);
+}
+
+/**
+ * ntfs_clear_volume_flags - clear bits in the volume information flags
+ * @vol:	ntfs volume on which to modify the flags
+ * @flags:	flags to clear on the volume
+ *
+ * Clear the bits in @flags in the volume information flags on the volume @vol.
+ *
+ * Return 0 on success and -errno on error.
+ */
+static inline int ntfs_clear_volume_flags(ntfs_volume *vol, VOLUME_FLAGS flags)
+{
+	flags &= VOLUME_FLAGS_MASK;
+	return ntfs_write_volume_flags(vol, vol->vol_flags & ~flags);
+}
+
+#endif /* NTFS_RW */
+
 /**
  * ntfs_remount - change the mount options of a mounted ntfs filesystem
  * @sb:		superblock of mounted ntfs filesystem
@@ -316,30 +411,72 @@
 	 * For the read-write compiled driver, if we are remounting read-write,
 	 * make sure there are no volume errors and that no unsupported volume
 	 * flags are set.  Also, empty the logfile journal as it would become
-	 * stale as soon as something is written to the volume.
+	 * stale as soon as something is written to the volume and mark the
+	 * volume dirty so that chkdsk is run if the volume is not umounted
+	 * cleanly.
+	 *
+	 * When remounting read-only, mark the volume clean if no volume errors
+	 * have occured.
 	 */
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
 		static const char *es = ".  Cannot remount read-write.";
 
+		/* Remounting read-write. */
 		if (NVolErrors(vol)) {
 			ntfs_error(sb, "Volume has errors and is read-only%s",
 					es);
 			return -EROFS;
 		}
+		if (vol->vol_flags & VOLUME_IS_DIRTY) {
+			ntfs_error(sb, "Volume is dirty and read-only%s", es);
+			return -EROFS;
+		}
 		if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
 			ntfs_error(sb, "Volume has unsupported flags set and "
 					"is read-only%s", es);
 			return -EROFS;
 		}
+		if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
+			ntfs_error(sb, "Failed to set dirty bit in volume "
+					"information flags%s", es);
+			return -EROFS;
+		}
+#if 0
+		// TODO: Enable this code once we start modifying anything that
+		//	 is different between NTFS 1.2 and 3.x...
+		/* Set NT4 compatibility flag on newer NTFS version volumes. */
+		if ((vol->major_ver > 1)) {
+			if (ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) {
+				ntfs_error(sb, "Failed to set NT4 "
+						"compatibility flag%s", es);
+				NVolSetErrors(vol);
+				return -EROFS;
+			}
+		}
+#endif
 		if (!ntfs_empty_logfile(vol->logfile_ino)) {
 			ntfs_error(sb, "Failed to empty journal $LogFile%s",
 					es);
 			NVolSetErrors(vol);
 			return -EROFS;
 		}
+	} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
+		/* Remounting read-only. */
+		if (!NVolErrors(vol)) {
+			if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY))
+				ntfs_warning(sb, "Failed to clear dirty bit "
+						"in volume information "
+						"flags.  Run chkdsk.");
+		}
 	}
 	// TODO:  For now we enforce no atime and dir atime updates as they are
 	// not implemented.
+	if ((sb->s_flags & MS_NOATIME) && !(*flags & MS_NOATIME))
+		ntfs_warning(sb, "Atime updates are not implemented yet.  "
+				"Leaving them disabled.");
+	else if ((sb->s_flags & MS_NODIRATIME) && !(*flags & MS_NODIRATIME))
+		ntfs_warning(sb, "Directory atime updates are not implemented "
+				"yet.  Leaving them disabled.");
 	*flags |= MS_NOATIME | MS_NODIRATIME;
 #endif /* ! NTFS_RW */
 
@@ -1131,7 +1268,7 @@
 			le32_to_cpu(ctx->attr->data.resident.value_length) >
 			(u8*)ctx->attr + le32_to_cpu(ctx->attr->length))
 		goto err_put_vol;
-	/* Setup volume flags and version. */
+	/* Copy the volume flags and version to the ntfs_volume structure. */
 	vol->vol_flags = vi->flags;
 	vol->major_ver = vi->major_ver;
 	vol->minor_ver = vi->minor_ver;
@@ -1142,9 +1279,12 @@
 #ifdef NTFS_RW
 	/* Make sure that no unsupported volume flags are set. */
 	if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
-		static const char *es1 = "Volume has unsupported flags set";
+		static const char *es1a = "Volume is dirty";
+		static const char *es1b = "Volume has unsupported flags set";
 		static const char *es2 = ".  Run chkdsk and mount in Windows.";
-
+		const char *es1;
+		
+		es1 = vol->vol_flags & VOLUME_IS_DIRTY ? es1a : es1b;
 		/* If a read-write mount, convert it to a read-only mount. */
 		if (!(sb->s_flags & MS_RDONLY)) {
 			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
@@ -1171,10 +1311,12 @@
 	 */
 	if (!load_and_check_logfile(vol) ||
 			!ntfs_is_logfile_clean(vol->logfile_ino)) {
-		static const char *es1 = "Failed to load $LogFile";
-		static const char *es2 = "$LogFile is not clean";
-		static const char *es3 = ".  Mount in Windows.";
+		static const char *es1a = "Failed to load $LogFile";
+		static const char *es1b = "$LogFile is not clean";
+		static const char *es2 = ".  Mount in Windows.";
+		const char *es1;
 
+		es1 = !vol->logfile_ino ? es1a : es1b;
 		/* If a read-write mount, convert it to a read-only mount. */
 		if (!(sb->s_flags & MS_RDONLY)) {
 			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
@@ -1182,21 +1324,66 @@
 				ntfs_error(sb, "%s and neither on_errors="
 						"continue nor on_errors="
 						"remount-ro was specified%s",
-						!vol->logfile_ino ? es1 : es2,
-						es3);
+						es1, es2);
 				goto iput_logfile_err_out;
 			}
 			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
-			ntfs_error(sb, "%s.  Mounting read-only%s",
-					!vol->logfile_ino ? es1 : es2, es3);
+			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		} else
 			ntfs_warning(sb, "%s.  Will not be able to remount "
-					"read-write%s",
-					!vol->logfile_ino ? es1 : es2, es3);
+					"read-write%s", es1, es2);
 		/* This will prevent a read-write remount. */
 		NVolSetErrors(vol);
-	/* If a read-write mount, empty the logfile. */
-	} else if (!(sb->s_flags & MS_RDONLY) &&
+	}
+	/* If (still) a read-write mount, mark the volume dirty. */
+	if (!(sb->s_flags & MS_RDONLY) &&
+			ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
+		static const char *es1 = "Failed to set dirty bit in volume "
+				"information flags";
+		static const char *es2 = ".  Run chkdsk.";
+
+		/* Convert to a read-only mount. */
+		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
+				ON_ERRORS_CONTINUE))) {
+			ntfs_error(sb, "%s and neither on_errors=continue nor "
+					"on_errors=remount-ro was specified%s",
+					es1, es2);
+			goto iput_logfile_err_out;
+		}
+		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
+		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		/*
+		 * Do not set NVolErrors() because ntfs_remount() might manage
+		 * to set the dirty flag in which case all would be well.
+		 */
+	}
+#if 0
+	// TODO: Enable this code once we start modifying anything that is
+	//	 different between NTFS 1.2 and 3.x...
+	/*
+	 * If (still) a read-write mount, set the NT4 compatibility flag on
+	 * newer NTFS version volumes.
+	 */
+	if (!(sb->s_flags & MS_RDONLY) && (vol->major_ver > 1) &&
+			ntfs_set_volume_flags(vol, VOLUME_MOUNTED_ON_NT4)) {
+		static const char *es1 = "Failed to set NT4 compatibility flag";
+		static const char *es2 = ".  Run chkdsk.";
+
+		/* Convert to a read-only mount. */
+		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
+				ON_ERRORS_CONTINUE))) {
+			ntfs_error(sb, "%s and neither on_errors=continue nor "
+					"on_errors=remount-ro was specified%s",
+					es1, es2);
+			goto iput_logfile_err_out;
+		}
+		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
+		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		NVolSetErrors(vol);
+	}
+#endif
+	/* If (still) a read-write mount, empty the logfile. */
+	if (!(sb->s_flags & MS_RDONLY) &&
 			!ntfs_empty_logfile(vol->logfile_ino)) {
 		static const char *es1 = "Failed to empty $LogFile";
 		static const char *es2 = ".  Mount in Windows.";
@@ -1209,12 +1396,11 @@
 					es1, es2);
 			goto iput_logfile_err_out;
 		}
-		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
-		/* This will prevent a read-write remount. */
+		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
 		NVolSetErrors(vol);
 	}
-#endif
+#endif /* NTFS_RW */
 	/*
 	 * Get the inode for the attribute definitions file and parse the
 	 * attribute definitions.
@@ -1289,18 +1475,69 @@
 
 /**
  * ntfs_put_super - called by the vfs to unmount a volume
- * @vfs_sb:	vfs superblock of volume to unmount
+ * @sb:		vfs superblock of volume to unmount
  *
  * ntfs_put_super() is called by the VFS (from fs/super.c::do_umount()) when
  * the volume is being unmounted (umount system call has been invoked) and it
  * releases all inodes and memory belonging to the NTFS specific part of the
  * super block.
  */
-static void ntfs_put_super(struct super_block *vfs_sb)
+static void ntfs_put_super(struct super_block *sb)
 {
-	ntfs_volume *vol = NTFS_SB(vfs_sb);
+	ntfs_volume *vol = NTFS_SB(sb);
 
 	ntfs_debug("Entering.");
+#ifdef NTFS_RW
+	/*
+	 * Commit all inodes while they are still open in case some of them
+	 * cause others to be dirtied.
+	 */
+	ntfs_commit_inode(vol->vol_ino);
+
+	/* NTFS 3.0+ specific. */
+	if (vol->major_ver >= 3) {
+		if (vol->secure_ino)
+			ntfs_commit_inode(vol->secure_ino);
+	}
+
+	ntfs_commit_inode(vol->root_ino);
+
+	down_write(&vol->lcnbmp_lock);
+	ntfs_commit_inode(vol->lcnbmp_ino);
+	up_write(&vol->lcnbmp_lock);
+
+	down_write(&vol->mftbmp_lock);
+	ntfs_commit_inode(vol->mftbmp_ino);
+	up_write(&vol->mftbmp_lock);
+
+	if (vol->logfile_ino)
+		ntfs_commit_inode(vol->logfile_ino);
+
+	if (vol->mftmirr_ino)
+		ntfs_commit_inode(vol->mftmirr_ino);
+	ntfs_commit_inode(vol->mft_ino);
+
+	/*
+	 * If a read-write mount and no volume errors have occured, mark the
+	 * volume clean.  Also, re-commit all affected inodes.
+	 */
+	if (!(sb->s_flags & MS_RDONLY)) {
+		if (!NVolErrors(vol)) {
+			if (ntfs_clear_volume_flags(vol, VOLUME_IS_DIRTY))
+				ntfs_warning(sb, "Failed to clear dirty bit "
+						"in volume information "
+						"flags.  Run chkdsk.");
+			ntfs_commit_inode(vol->vol_ino);
+			ntfs_commit_inode(vol->root_ino);
+			if (vol->mftmirr_ino)
+				ntfs_commit_inode(vol->mftmirr_ino);
+			ntfs_commit_inode(vol->mft_ino);
+		} else {
+			ntfs_warning(sb, "Volume has errors.  Leaving volume "
+					"marked dirty.  Run chkdsk.");
+		}
+	}
+#endif /* NTFS_RW */
 
 	iput(vol->vol_ino);
 	vol->vol_ino = NULL;
@@ -1331,11 +1568,47 @@
 		iput(vol->logfile_ino);
 		vol->logfile_ino = NULL;
 	}
-
 	if (vol->mftmirr_ino) {
+		/* Re-commit the mft mirror and mft just in case. */
+		ntfs_commit_inode(vol->mftmirr_ino);
+		ntfs_commit_inode(vol->mft_ino);
 		iput(vol->mftmirr_ino);
 		vol->mftmirr_ino = NULL;
 	}
+	/*
+	 * If any dirty inodes are left, throw away all mft data page cache
+	 * pages to allow a clean umount.  This should never happen any more
+	 * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
+	 * the underlying mft records are written out and cleaned.  If it does,
+	 * happen anyway, we want to know...
+	 */
+	ntfs_commit_inode(vol->mft_ino);
+	write_inode_now(vol->mft_ino, 1);
+	if (!list_empty(&sb->s_dirty)) {
+		const char *s1, *s2;
+
+		down(&vol->mft_ino->i_sem);
+		truncate_inode_pages(vol->mft_ino->i_mapping, 0);
+		up(&vol->mft_ino->i_sem);
+		write_inode_now(vol->mft_ino, 1);
+		if (!list_empty(&sb->s_dirty)) {
+			static const char *_s1 = "inodes";
+			static const char *_s2 = "";
+			s1 = _s1;
+			s2 = _s2;
+		} else {
+			static const char *_s1 = "mft pages";
+			static const char *_s2 = "They have been thrown "
+					"away.  ";
+			s1 = _s1;
+			s2 = _s2;
+		}
+		ntfs_error(sb, "Dirty %s found at umount time.  %sYou should "
+				"run chkdsk.  Please email "
+				"linux-ntfs-dev@lists.sourceforge.net and say "
+				"that you saw this message.  Thank you.", s1,
+				s2);
+	}
 #endif /* NTFS_RW */
 
 	iput(vol->mft_ino);
@@ -1344,7 +1617,7 @@
 	vol->upcase_len = 0;
 	/*
 	 * Decrease the number of mounts and destroy the global default upcase
-	 * table if necessary. Also decrease the number of upcase users if we
+	 * table if necessary.  Also decrease the number of upcase users if we
 	 * are a user.
 	 */
 	down(&ntfs_lock);
@@ -1368,7 +1641,7 @@
 		unload_nls(vol->nls_map);
 		vol->nls_map = NULL;
 	}
-	vfs_sb->s_fs_info = NULL;
+	sb->s_fs_info = NULL;
 	kfree(vol);
 	return;
 }
@@ -1629,8 +1902,8 @@
 #ifdef NTFS_RW
 	//.dirty_inode	= NULL,			/* VFS: Called from
 	//					   __mark_inode_dirty(). */
-	//.write_inode	= NULL,			/* VFS: Write dirty inode to
-	//					   disk. */
+	.write_inode	= ntfs_write_inode,	/* VFS: Write dirty inode to
+						   disk. */
 	//.drop_inode	= NULL,			/* VFS: Called just after the
 	//					   inode reference count has
 	//					   been decreased to zero.
@@ -1719,8 +1992,12 @@
 #ifndef NTFS_RW
 	sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
 #else
-	// TODO:  For now we enforce no atime and dir atime updates as they are
-	// not implemented.
+	if (!(sb->s_flags & MS_NOATIME))
+		ntfs_warning(sb, "Atime updates are not implemented yet.  "
+				"Disabling them.");
+	else if (!(sb->s_flags & MS_NODIRATIME))
+		ntfs_warning(sb, "Directory atime updates are not implemented "
+				"yet.  Disabling them.");
 	sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
 #endif
 	/* Allocate a new ntfs_volume and place it in sb->s_fs_info. */
