Binary files ref/ID and 2.4.3aa/ID differ
diff -urN ref/drivers/char/mem.c 2.4.3aa/drivers/char/mem.c
--- ref/drivers/char/mem.c	Fri Apr  6 15:36:11 2001
+++ 2.4.3aa/drivers/char/mem.c	Fri Apr  6 05:10:16 2001
@@ -613,7 +613,6 @@
 		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 	memory_devfs_register();
 	rand_initialize();
-	raw_init();
 #ifdef CONFIG_I2C
 	i2c_init_all();
 #endif
diff -urN ref/drivers/char/raw.c 2.4.3aa/drivers/char/raw.c
--- ref/drivers/char/raw.c	Fri Apr  6 15:36:11 2001
+++ 2.4.3aa/drivers/char/raw.c	Fri Apr  6 17:37:59 2001
@@ -19,10 +19,15 @@
 
 #define dprintk(x...) 
 
-static struct block_device *raw_device_bindings[256];
-static int raw_device_inuse[256];
-static int raw_device_sector_size[256];
-static int raw_device_sector_bits[256];
+typedef struct raw_device_data_s {
+	struct kiobuf * iobuf;
+	int iobuf_lock;
+	struct block_device *binding;
+	int inuse, sector_size, sector_bits;
+	struct semaphore mutex;
+} raw_device_data_t;
+
+static raw_device_data_t raw_devices[256];
 
 static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
 
@@ -45,11 +50,19 @@
 	open:		raw_open,
 };
 
-void __init raw_init(void)
+int __init raw_init(void)
 {
+	int i;
 	register_chrdev(RAW_MAJOR, "raw", &raw_fops);
+
+	for (i = 0; i < 256; i++)
+		init_MUTEX(&raw_devices[i].mutex);
+
+	return 0;
 }
 
+__initcall(raw_init);
+
 /* 
  * Open/close code for raw IO.
  */
@@ -74,28 +87,43 @@
 		return 0;
 	}
 	
+	down(&raw_devices[minor].mutex);
 	/*
 	 * No, it is a normal raw device.  All we need to do on open is
 	 * to check that the device is bound, and force the underlying
 	 * block device to a sector-size blocksize. 
 	 */
 
-	bdev = raw_device_bindings[minor];
+	bdev = raw_devices[minor].binding;
+	err = -ENODEV;
 	if (!bdev)
-		return -ENODEV;
+		goto out;
 
 	rdev = to_kdev_t(bdev->bd_dev);
 	err = blkdev_get(bdev, filp->f_mode, 0, BDEV_RAW);
 	if (err)
-		return err;
+		goto out;
 	
 	/*
 	 * Don't change the blocksize if we already have users using
 	 * this device 
 	 */
 
-	if (raw_device_inuse[minor]++)
-		return 0;
+	if (raw_devices[minor].inuse++)
+		goto out;
+
+	/* 
+	 * We'll just use one kiobuf
+	 */
+
+	err = alloc_kiovec(1, &raw_devices[minor].iobuf);
+	if (err) {
+		raw_devices[minor].inuse--;
+		up(&raw_devices[minor].mutex);
+		blkdev_put(bdev, BDEV_RAW);
+		return err;
+	}
+
 	
 	/* 
 	 * Don't interfere with mounted devices: we cannot safely set
@@ -112,13 +140,16 @@
 	}
 
 	set_blocksize(rdev, sector_size);
-	raw_device_sector_size[minor] = sector_size;
+	raw_devices[minor].sector_size = sector_size;
 
 	for (sector_bits = 0; !(sector_size & 1); )
 		sector_size>>=1, sector_bits++;
-	raw_device_sector_bits[minor] = sector_bits;
+	raw_devices[minor].sector_bits = sector_bits;
+
+ out:
+	up(&raw_devices[minor].mutex);
 	
-	return 0;
+	return err;
 }
 
 int raw_release(struct inode *inode, struct file *filp)
@@ -127,11 +158,12 @@
 	struct block_device *bdev;
 	
 	minor = MINOR(inode->i_rdev);
-	lock_kernel();
-	bdev = raw_device_bindings[minor];
+	down(&raw_devices[minor].mutex);
+	bdev = raw_devices[minor].binding;
+	if (!--raw_devices[minor].inuse)
+		free_kiovec(1, &raw_devices[minor].iobuf);
+	up(&raw_devices[minor].mutex);
 	blkdev_put(bdev, BDEV_RAW);
-	raw_device_inuse[minor]--;
-	unlock_kernel();
 	return 0;
 }
 
@@ -184,26 +216,30 @@
 			 * major/minor numbers make sense. 
 			 */
 
-			if (rq.block_major == NODEV || 
+			if ((rq.block_major == NODEV && 
+			     rq.block_minor != NODEV) ||
 			    rq.block_major > MAX_BLKDEV ||
 			    rq.block_minor > MINORMASK) {
 				err = -EINVAL;
 				break;
 			}
 			
-			if (raw_device_inuse[minor]) {
+			down(&raw_devices[minor].mutex);
+			if (raw_devices[minor].inuse) {
+				up(&raw_devices[minor].mutex);
 				err = -EBUSY;
 				break;
 			}
-			if (raw_device_bindings[minor])
-				bdput(raw_device_bindings[minor]);
-			raw_device_bindings[minor] = 
+			if (raw_devices[minor].binding)
+				bdput(raw_devices[minor].binding);
+			raw_devices[minor].binding = 
 				bdget(kdev_t_to_nr(MKDEV(rq.block_major, rq.block_minor)));
+			up(&raw_devices[minor].mutex);
 		} else {
 			struct block_device *bdev;
 			kdev_t dev;
 
-			bdev = raw_device_bindings[minor];
+			bdev = raw_devices[minor].binding;
 			if (bdev) {
 				dev = to_kdev_t(bdev->bd_dev);
 				rq.block_major = MAJOR(dev);
@@ -244,9 +280,9 @@
 		   size_t size, loff_t *offp)
 {
 	struct kiobuf * iobuf;
-	int		err;
+	int		new_iobuf;
+	int		err = 0;
 	unsigned long	blocknr, blocks;
-	unsigned long	b[KIO_MAX_SECTORS];
 	size_t		transferred;
 	int		iosize;
 	int		i;
@@ -262,9 +298,23 @@
 	 */
 
 	minor = MINOR(filp->f_dentry->d_inode->i_rdev);
-	dev = to_kdev_t(raw_device_bindings[minor]->bd_dev);
-	sector_size = raw_device_sector_size[minor];
-	sector_bits = raw_device_sector_bits[minor];
+
+	new_iobuf = 0;
+	iobuf = raw_devices[minor].iobuf;
+	if (test_and_set_bit(0, &raw_devices[minor].iobuf_lock)) {
+		/*
+		 * A parallel read/write is using the preallocated iobuf
+		 * so just run slow and allocate a new one.
+		 */
+		err = alloc_kiovec(1, &iobuf);
+		if (err)
+			goto out;
+		new_iobuf = 1;
+	}
+
+	dev = to_kdev_t(raw_devices[minor].binding->bd_dev);
+	sector_size = raw_devices[minor].sector_size;
+	sector_bits = raw_devices[minor].sector_bits;
 	sector_mask = sector_size- 1;
 	max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
 	
@@ -275,18 +325,13 @@
 	dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
 		 MAJOR(dev), MINOR(dev), limit);
 	
+	err = -EINVAL;
 	if ((*offp & sector_mask) || (size & sector_mask))
-		return -EINVAL;
-	if ((*offp >> sector_bits) > limit)
-		return 0;
+		goto out_free;
 
-	/* 
-	 * We'll just use one kiobuf
-	 */
-
-	err = alloc_kiovec(1, &iobuf);
-	if (err)
-		return err;
+	err = 0;
+	if ((*offp >> sector_bits) > limit)
+		goto out_free;
 
 	/*
 	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
@@ -310,35 +355,37 @@
 		err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
 		if (err)
 			break;
-#if 0
-		err = lock_kiovec(1, &iobuf, 1);
-		if (err) 
-			break;
-#endif
-	
+
 		for (i=0; i < blocks; i++) 
-			b[i] = blocknr++;
+			iobuf->blocks[i] = blocknr++;
 		
-		err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size);
+		err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size);
 
+		if (rw == READ && err > 0)
+			mark_dirty_kiobuf(iobuf, err);
+		
 		if (err >= 0) {
 			transferred += err;
 			size -= err;
 			buf += err;
 		}
 
-		unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */
+		unmap_kiobuf(iobuf);
 
 		if (err != iosize)
 			break;
 	}
 	
-	free_kiovec(1, &iobuf);
-
 	if (transferred) {
 		*offp += transferred;
-		return transferred;
+		err = transferred;
 	}
-	
+
+ out_free:
+	if (!new_iobuf)
+		clear_bit(0, &raw_devices[minor].iobuf_lock);
+	else
+		free_kiovec(1, &iobuf);
+ out:	
 	return err;
 }
diff -urN 2.4.3/drivers/md/lvm-snap.c 2.4.3-rawio/drivers/md/lvm-snap.c
--- 2.4.3/drivers/md/lvm-snap.c	Sat Feb 10 02:34:09 2001
+++ 2.4.3-rawio/drivers/md/lvm-snap.c	Fri Apr  6 18:20:00 2001
@@ -246,7 +246,6 @@
 	int length_tmp;
 	ulong snap_pe_start, COW_table_sector_offset,
 	      COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
-	ulong blocks[1];
 	const char * reason;
 	kdev_t snap_phys_dev;
 	struct kiobuf * iobuf = lv_snap->lv_iobuf;
@@ -274,7 +273,7 @@
 	COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
 
         /* COW table block to write next */
-	blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
+	iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
 
 	/* store new COW_table entry */
 	lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org));
@@ -290,7 +289,7 @@
 	iobuf->nr_pages = 1;
 
 	if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
-		       blocks, blksize_snap) != blksize_snap)
+		       iobuf->blocks, blksize_snap) != blksize_snap)
 		goto fail_raw_write;
 
 
@@ -309,11 +308,11 @@
 			snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
 			snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
 			blksize_snap = lvm_get_blksize(snap_phys_dev);
-			blocks[0] = snap_pe_start >> (blksize_snap >> 10);
-		} else blocks[0]++;
+			iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10);
+		} else iobuf->blocks[0]++;
 
 		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
-			       blocks, blksize_snap) != blksize_snap)
+			       iobuf->blocks, blksize_snap) != blksize_snap)
 			goto fail_raw_write;
 	}
 
@@ -352,7 +351,6 @@
 	unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
 	int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
 	struct kiobuf * iobuf;
-	unsigned long blocks[KIO_MAX_SECTORS];
 	int blksize_snap, blksize_org, min_blksize, max_blksize;
 	int max_sectors, nr_sectors;
 
@@ -402,16 +400,16 @@
 
 		iobuf->length = nr_sectors << 9;
 
-		lvm_snapshot_prepare_blocks(blocks, org_start,
+		lvm_snapshot_prepare_blocks(iobuf->blocks, org_start,
 					    nr_sectors, blksize_org);
 		if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
-			       blocks, blksize_org) != (nr_sectors<<9))
+			       iobuf->blocks, blksize_org) != (nr_sectors<<9))
 			goto fail_raw_read;
 
-		lvm_snapshot_prepare_blocks(blocks, snap_start,
+		lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start,
 					    nr_sectors, blksize_snap);
 		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
-			       blocks, blksize_snap) != (nr_sectors<<9))
+			       iobuf->blocks, blksize_snap) != (nr_sectors<<9))
 			goto fail_raw_write;
 	}
 
diff -urN ref/fs/buffer.c 2.4.3aa/fs/buffer.c
--- ref/fs/buffer.c	Fri Apr  6 15:36:11 2001
+++ 2.4.3aa/fs/buffer.c	Fri Apr  6 16:27:08 2001
@@ -1200,10 +1200,10 @@
 		kmem_cache_free(bh_cachep, bh);
 	} else {
 		bh->b_blocknr = -1;
-		init_waitqueue_head(&bh->b_wait);
+		bh->b_this_page = NULL;
+
 		nr_unused_buffer_heads++;
 		bh->b_next_free = unused_list;
-		bh->b_this_page = NULL;
 		unused_list = bh;
 	}
 }
@@ -1232,8 +1232,8 @@
 	 * more buffer-heads itself.  Thus SLAB_BUFFER.
 	 */
 	if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) {
-		memset(bh, 0, sizeof(*bh));
-		init_waitqueue_head(&bh->b_wait);
+		bh->b_blocknr = -1;
+		bh->b_this_page = NULL;
 		return bh;
 	}
 
@@ -2003,21 +2003,18 @@
 
 static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size)
 {
-	int iosize;
+	int iosize, err;
 	int i;
 	struct buffer_head *tmp;
 
-
 	iosize = 0;
-	spin_lock(&unused_list_lock);
+	err = 0;
 
 	for (i = nr; --i >= 0; ) {
 		iosize += size;
 		tmp = bh[i];
 		if (buffer_locked(tmp)) {
-			spin_unlock(&unused_list_lock);
 			wait_on_buffer(tmp);
-			spin_lock(&unused_list_lock);
 		}
 		
 		if (!buffer_uptodate(tmp)) {
@@ -2025,13 +2022,13 @@
                            clearing iosize on error calculates the
                            amount of IO before the first error. */
 			iosize = 0;
+			err = -EIO;
 		}
-		__put_unused_buffer_head(tmp);
 	}
 	
-	spin_unlock(&unused_list_lock);
-
-	return iosize;
+	if (iosize)
+		return iosize;
+	return err;
 }
 
 /*
@@ -2060,7 +2057,7 @@
 	unsigned long	blocknr;
 	struct kiobuf *	iobuf = NULL;
 	struct page *	map;
-	struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
+	struct buffer_head *tmp, **bhs = NULL;
 
 	if (!nr)
 		return 0;
@@ -2086,22 +2083,20 @@
 		offset = iobuf->offset;
 		length = iobuf->length;
 		iobuf->errno = 0;
+		if (!bhs)
+			bhs = iobuf->bh;
 		
 		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
 			map  = iobuf->maplist[pageind];
 			if (!map) {
 				err = -EFAULT;
-				goto error;
+				goto finished;
 			}
 			
 			while (length > 0) {
 				blocknr = b[bufind++];
-				tmp = get_unused_buffer_head(0);
-				if (!tmp) {
-					err = -ENOMEM;
-					goto error;
-				}
-				
+				tmp = bhs[bhind++];
+
 				tmp->b_dev = B_FREE;
 				tmp->b_size = size;
 				set_bh_page(tmp, map, offset);
@@ -2115,9 +2110,9 @@
 				if (rw == WRITE) {
 					set_bit(BH_Uptodate, &tmp->b_state);
 					clear_bit(BH_Dirty, &tmp->b_state);
-				}
+				} else
+					set_bit(BH_Uptodate, &tmp->b_state);
 
-				bh[bhind++] = tmp;
 				length -= size;
 				offset += size;
 
@@ -2128,7 +2123,8 @@
 				 * Wait for IO if we have got too much 
 				 */
 				if (bhind >= KIO_MAX_SECTORS) {
-					err = wait_kio(rw, bhind, bh, size);
+					kiobuf_wait_for_io(iobuf); /* wake-one */
+					err = wait_kio(rw, bhind, bhs, size);
 					if (err >= 0)
 						transferred += err;
 					else
@@ -2146,7 +2142,8 @@
 
 	/* Is there any IO still left to submit? */
 	if (bhind) {
-		err = wait_kio(rw, bhind, bh, size);
+		kiobuf_wait_for_io(iobuf); /* wake-one */
+		err = wait_kio(rw, bhind, bhs, size);
 		if (err >= 0)
 			transferred += err;
 		else
@@ -2157,16 +2154,6 @@
 	if (transferred)
 		return transferred;
 	return err;
-
- error:
-	/* We got an error allocating the bh'es.  Just free the current
-           buffer_heads and exit. */
-	spin_lock(&unused_list_lock);
-	for (i = bhind; --i >= 0; ) {
-		__put_unused_buffer_head(bh[i]);
-	}
-	spin_unlock(&unused_list_lock);
-	goto finished;
 }
 
 /*
diff -urN ref/fs/dcache.c 2.4.3aa/fs/dcache.c
--- ref/fs/dcache.c	Fri Apr  6 15:36:11 2001
+++ 2.4.3aa/fs/dcache.c	Fri Apr  6 05:10:16 2001
@@ -1229,6 +1229,18 @@
 	} while (i);
 }
 
+static void init_buffer_head(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+	{
+		struct buffer_head * bh = (struct buffer_head *) foo;
+
+		memset(bh, 0, sizeof(*bh));
+		init_waitqueue_head(&bh->b_wait);
+	}
+}
+
 /* SLAB cache for __getname() consumers */
 kmem_cache_t *names_cachep;
 
@@ -1246,7 +1258,7 @@
 {
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
-			SLAB_HWCACHE_ALIGN, NULL, NULL);
+			SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
 	if(!bh_cachep)
 		panic("Cannot create buffer head SLAB cache");
 
diff -urN ref/fs/iobuf.c 2.4.3aa/fs/iobuf.c
--- ref/fs/iobuf.c	Fri Apr  6 15:36:11 2001
+++ 2.4.3aa/fs/iobuf.c	Fri Apr  6 19:58:27 2001
@@ -8,9 +8,7 @@
 
 #include <linux/iobuf.h>
 #include <linux/slab.h>
-
-static kmem_cache_t *kiobuf_cachep;
-
+#include <linux/vmalloc.h>
 
 void end_kio_request(struct kiobuf *kiobuf, int uptodate)
 {
@@ -24,17 +22,6 @@
 	}
 }
 
-
-void __init kiobuf_setup(void)
-{
-	kiobuf_cachep =  kmem_cache_create("kiobuf",
-					   sizeof(struct kiobuf),
-					   0,
-					   SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if(!kiobuf_cachep)
-		panic("Cannot create kernel iobuf cache\n");
-}
-
 void kiobuf_init(struct kiobuf *iobuf)
 {
 	memset(iobuf, 0, sizeof(*iobuf));
@@ -43,18 +30,48 @@
 	iobuf->maplist   = iobuf->map_array;
 }
 
+int alloc_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+	int i;
+
+	for (i = 0; i < KIO_MAX_SECTORS; i++)
+		if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) {
+			while (i--) {
+				kmem_cache_free(bh_cachep, kiobuf->bh[i]);
+				kiobuf->bh[i] = NULL;
+			}
+			return -ENOMEM;
+		}
+	return 0;
+}
+
+void free_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+	int i;
+
+	for (i = 0; i < KIO_MAX_SECTORS; i++) {
+		kmem_cache_free(bh_cachep, kiobuf->bh[i]);
+		kiobuf->bh[i] = NULL;
+	}
+}
+
 int alloc_kiovec(int nr, struct kiobuf **bufp)
 {
 	int i;
 	struct kiobuf *iobuf;
 	
 	for (i = 0; i < nr; i++) {
-		iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
+		iobuf = vmalloc(sizeof(struct kiobuf));
 		if (!iobuf) {
 			free_kiovec(i, bufp);
 			return -ENOMEM;
 		}
 		kiobuf_init(iobuf);
+ 		if (alloc_kiobuf_bhs(iobuf)) {
+			vfree(iobuf);
+ 			free_kiovec(i, bufp);
+ 			return -ENOMEM;
+ 		}
 		bufp[i] = iobuf;
 	}
 	
@@ -72,7 +89,8 @@
 			unlock_kiovec(1, &iobuf);
 		if (iobuf->array_len > KIO_STATIC_PAGES)
 			kfree (iobuf->maplist);
-		kmem_cache_free(kiobuf_cachep, bufp[i]);
+		free_kiobuf_bhs(iobuf);
+		vfree(bufp[i]);
 	}
 }
 
@@ -115,11 +133,12 @@
 
 	add_wait_queue(&kiobuf->wait_queue, &wait);
 repeat:
-	run_task_queue(&tq_disk);
 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 	if (atomic_read(&kiobuf->io_count) != 0) {
+		run_task_queue(&tq_disk);
 		schedule();
-		goto repeat;
+		if (atomic_read(&kiobuf->io_count) != 0)
+			goto repeat;
 	}
 	tsk->state = TASK_RUNNING;
 	remove_wait_queue(&kiobuf->wait_queue, &wait);
diff -urN ref/include/linux/iobuf.h 2.4.3aa/include/linux/iobuf.h
--- ref/include/linux/iobuf.h	Fri Apr  6 15:36:11 2001
+++ 2.4.3aa/include/linux/iobuf.h	Fri Apr  6 19:53:05 2001
@@ -24,8 +24,7 @@
  * entire iovec.
  */
 
-#define KIO_MAX_ATOMIC_IO	64 /* in kb */
-#define KIO_MAX_ATOMIC_BYTES	(64 * 1024)
+#define KIO_MAX_ATOMIC_IO	512 /* in kb */
 #define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
 #define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
 
@@ -47,8 +46,10 @@
 
 	unsigned int	locked : 1;	/* If set, pages has been locked */
 	
-	/* Always embed enough struct pages for 64k of IO */
+	/* Always embed enough struct pages for atomic IO */
 	struct page *	map_array[KIO_STATIC_PAGES];
+	struct buffer_head * bh[KIO_MAX_SECTORS];
+	unsigned long blocks[KIO_MAX_SECTORS];
 
 	/* Dynamic state for IO completion: */
 	atomic_t	io_count;	/* IOs still in progress */
@@ -64,10 +65,10 @@
 void	unmap_kiobuf(struct kiobuf *iobuf);
 int	lock_kiovec(int nr, struct kiobuf *iovec[], int wait);
 int	unlock_kiovec(int nr, struct kiobuf *iovec[]);
+void	mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes);
 
 /* fs/iobuf.c */
 
-void __init kiobuf_setup(void);
 void	kiobuf_init(struct kiobuf *);
 void	end_kio_request(struct kiobuf *, int);
 void	simple_wakeup_kiobuf(struct kiobuf *);
@@ -75,6 +76,8 @@
 void	free_kiovec(int nr, struct kiobuf **);
 int	expand_kiobuf(struct kiobuf *, int);
 void	kiobuf_wait_for_io(struct kiobuf *);
+extern int alloc_kiobuf_bhs(struct kiobuf *);
+extern void free_kiobuf_bhs(struct kiobuf *);
 
 /* fs/buffer.c */
 
diff -urN ref/include/linux/raw.h 2.4.3aa/include/linux/raw.h
--- ref/include/linux/raw.h	Fri Apr  6 05:24:48 2001
+++ 2.4.3aa/include/linux/raw.h	Fri Apr  6 16:00:19 2001
@@ -13,11 +13,4 @@
 	__u64	block_minor;
 };
 
-#ifdef __KERNEL__
-
-/* drivers/char/raw.c */
-extern void raw_init(void);
-
-#endif /* __KERNEL__ */
-
 #endif /* __LINUX_RAW_H */
diff -urN ref/init/main.c 2.4.3aa/init/main.c
--- ref/init/main.c	Sun Apr  1 01:17:33 2001
+++ 2.4.3aa/init/main.c	Fri Apr  6 19:40:44 2001
@@ -583,7 +583,6 @@
 	vfs_caches_init(mempages);
 	buffer_init(mempages);
 	page_cache_init(mempages);
-	kiobuf_setup();
 	signals_init();
 	bdev_init();
 	inode_init(mempages);
diff -urN ref/mm/memory.c 2.4.3aa/mm/memory.c
--- ref/mm/memory.c	Fri Apr  6 15:36:11 2001
+++ 2.4.3aa/mm/memory.c	Fri Apr  6 05:10:16 2001
@@ -389,20 +389,33 @@
 /*
  * Do a quick page-table lookup for a single page. 
  */
-static struct page * follow_page(unsigned long address) 
+static struct page * follow_page(unsigned long address, int write) 
 {
 	pgd_t *pgd;
 	pmd_t *pmd;
+	pte_t *ptep, pte;
 
 	pgd = pgd_offset(current->mm, address);
+	if (pgd_none(*pgd) || pgd_bad(*pgd))
+		goto out;
+
 	pmd = pmd_offset(pgd, address);
-	if (pmd) {
-		pte_t * pte = pte_offset(pmd, address);
-		if (pte && pte_present(*pte))
-			return pte_page(*pte);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		goto out;
+
+	ptep = pte_offset(pmd, address);
+	if (!ptep)
+		goto out;
+
+	pte = *ptep;
+	if (pte_present(pte)) {
+		if (!write ||
+		    (pte_write(pte) && pte_dirty(pte)))
+			return pte_page(pte);
 	}
-	
-	return NULL;
+
+out:
+	return 0;
 }
 
 /* 
@@ -476,15 +489,22 @@
 				goto out_unlock;
 			}
 		}
-		if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0) 
-			goto out_unlock;
 		spin_lock(&mm->page_table_lock);
-		map = follow_page(ptr);
-		if (!map) {
+		while (!(map = follow_page(ptr, datain))) {
+			int ret;
+
 			spin_unlock(&mm->page_table_lock);
-			dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
-			goto out_unlock;
-		}
+			ret = handle_mm_fault(current->mm, vma, ptr, datain);
+			if (ret <= 0) {
+				if (!ret)
+					goto out_unlock;
+				else {
+					err = -ENOMEM;
+					goto out_unlock;
+				}
+			}
+			spin_lock(&mm->page_table_lock);
+		}			
 		map = get_page_map(map);
 		if (map) {
 			flush_dcache_page(map);
@@ -509,6 +529,37 @@
 	return err;
 }
 
+/*
+ * Mark all of the pages in a kiobuf as dirty 
+ *
+ * We need to be able to deal with short reads from disk: if an IO error
+ * occurs, the number of bytes read into memory may be less than the
+ * size of the kiobuf, so we have to stop marking pages dirty once the
+ * requested byte count has been reached.
+ */
+
+void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes)
+{
+	int index, offset, remaining;
+	struct page *page;
+	
+	index = iobuf->offset >> PAGE_SHIFT;
+	offset = iobuf->offset & ~PAGE_MASK;
+	remaining = bytes;
+	if (remaining > iobuf->length)
+		remaining = iobuf->length;
+	
+	while (remaining > 0 && index < iobuf->nr_pages) {
+		page = iobuf->maplist[index];
+		
+		if (!PageReserved(page))
+			SetPageDirty(page);
+
+		remaining -= (PAGE_SIZE - offset);
+		offset = 0;
+		index++;
+	}
+}
 
 /*
  * Unmap all of the pages referenced by a kiobuf.  We release the pages,