

RAID5 is calling copy_data() under sh->lock.  But copy_data() does kmap(),
which can sleep.

The best fix is to use kmap_atomic() in there.  It is faster than kmap() and
does not block.  (It's a bit sad that raid5 is copying tons of data inside a
spinlock though).

The patch removes the unused bio_kmap() and replaces __bio_kmap() with
__bio_kmap_atomic().  I think it's best to withdraw the sleeping-and-slow
bio_kmap() from the kernel API before someone else tries to use it.


Also, I notice that bio_kmap_irq() was using local_save_flags().  I assume
this is a bug - local_save_flags() does not disable interrupts!  I converted
that to local_irq_save().  These names are terribly chosen.



Presumably bio_kmap_irq() exists to protect KM_BIO_SRC_IRQ.  On highmem
machines bio_kmap_irq() will disable interrupts, and on non-highmem
machines it will not.

This is fairly poor from an API perspective, because the name implies that it
has disabled interrupts.  But for non-highmem machines there is no need to
incur the additional interrupt latency, so no obvious solution suggests
itself.




 Documentation/block/biodoc.txt |    8 ++++----
 drivers/block/ll_rw_blk.c      |    2 +-
 drivers/md/raid5.c             |    5 +++--
 include/linux/bio.h            |   11 ++++++-----
 4 files changed, 14 insertions(+), 12 deletions(-)

diff -puN Documentation/block/biodoc.txt~bio_kmap-fix Documentation/block/biodoc.txt
--- 25/Documentation/block/biodoc.txt~bio_kmap-fix	2003-04-02 22:51:00.000000000 -0800
+++ 25-akpm/Documentation/block/biodoc.txt	2003-04-02 22:51:00.000000000 -0800
@@ -204,7 +204,7 @@ may need to abort DMA operations and rev
 which case a virtual mapping of the page is required. For SCSI it is also
 done in some scenarios where the low level driver cannot be trusted to
 handle a single sg entry correctly. The driver is expected to perform the
-kmaps as needed on such occasions using the bio_kmap and bio_kmap_irq
+kmaps as needed on such occasions using the __bio_kmap_atomic and bio_kmap_irq
 routines as appropriate. A driver could also use the blk_queue_bounce()
 routine on its own to bounce highmem i/o to low memory for specific requests
 if so desired.
@@ -1147,9 +1147,9 @@ use blk_rq_map_sg for scatter gather) to
 PIO drivers (or drivers that need to revert to PIO transfer once in a
 while (IDE for example)), where the CPU is doing the actual data
 transfer a virtual mapping is needed. If the driver supports highmem I/O,
-(Sec 1.1, (ii) ) it needs to use bio_kmap and bio_kmap_irq to temporarily
-map a bio into the virtual address space. See how IDE handles this with
-ide_map_buffer.
+(Sec 1.1, (ii) ) it needs to use __bio_kmap_atomic and bio_kmap_irq to
+temporarily map a bio into the virtual address space. See how IDE handles
+this with ide_map_buffer.
 
 
 8. Prior/Related/Impacted patches
diff -puN drivers/block/ll_rw_blk.c~bio_kmap-fix drivers/block/ll_rw_blk.c
--- 25/drivers/block/ll_rw_blk.c~bio_kmap-fix	2003-04-02 22:51:00.000000000 -0800
+++ 25-akpm/drivers/block/ll_rw_blk.c	2003-04-02 22:51:00.000000000 -0800
@@ -188,7 +188,7 @@ void blk_queue_merge_bvec(request_queue_
  * Caveat:
  *    The driver that does this *must* be able to deal appropriately
  *    with buffers in "highmemory". This can be accomplished by either calling
- *    bio_kmap() to get a temporary kernel mapping, or by calling
+ *    __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
  *    blk_queue_bounce() to create a buffer in normal memory.
  **/
 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
diff -puN drivers/md/raid5.c~bio_kmap-fix drivers/md/raid5.c
--- 25/drivers/md/raid5.c~bio_kmap-fix	2003-04-02 22:51:00.000000000 -0800
+++ 25-akpm/drivers/md/raid5.c	2003-04-02 22:51:00.000000000 -0800
@@ -21,6 +21,7 @@
 #include <linux/slab.h>
 #include <linux/raid/raid5.h>
 #include <linux/bio.h>
+#include <linux/highmem.h>
 #include <asm/bitops.h>
 #include <asm/atomic.h>
 
@@ -634,12 +635,12 @@ static void copy_data(int frombio, struc
 			else clen = len;
 			
 			if (clen > 0) {
-				char *ba = __bio_kmap(bio, i);
+				char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
 				if (frombio)
 					memcpy(pa+page_offset, ba+b_offset, clen);
 				else
 					memcpy(ba+b_offset, pa+page_offset, clen);
-				__bio_kunmap(bio, i);
+				__bio_kunmap_atomic(ba, KM_USER0);
 			}	
 			if (clen < len) /* hit end of page */
 				break;
diff -puN include/linux/bio.h~bio_kmap-fix include/linux/bio.h
--- 25/include/linux/bio.h~bio_kmap-fix	2003-04-02 22:51:00.000000000 -0800
+++ 25-akpm/include/linux/bio.h	2003-04-02 22:51:00.000000000 -0800
@@ -148,10 +148,11 @@ struct bio {
  * permanent PIO fall back, user is probably better off disabling highmem
  * I/O completely on that queue (see ide-dma for example)
  */
-#define __bio_kmap(bio, idx) (kmap(bio_iovec_idx((bio), (idx))->bv_page) + bio_iovec_idx((bio), (idx))->bv_offset)
-#define bio_kmap(bio)	__bio_kmap((bio), (bio)->bi_idx)
-#define __bio_kunmap(bio, idx)	kunmap(bio_iovec_idx((bio), (idx))->bv_page)
-#define bio_kunmap(bio)		__bio_kunmap((bio), (bio)->bi_idx)
+#define __bio_kmap_atomic(bio, idx, kmtype)				\
+	(kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) +	\
+		bio_iovec_idx((bio), (idx))->bv_offset)
+
+#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype)
 
 /*
  * merge helpers etc
@@ -238,7 +239,7 @@ extern inline char *bio_kmap_irq(struct 
 	 * might not be a highmem page, but the preempt/irq count
 	 * balancing is a lot nicer this way
 	 */
-	local_save_flags(*flags);
+	local_irq_save(*flags);
 	addr = (unsigned long) kmap_atomic(bio_page(bio), KM_BIO_SRC_IRQ);
 
 	if (addr & ~PAGE_MASK)

_
