diff -urN linux-2.4.25-imedia/Documentation/Configure.help linux-2.4.25-imedia-testing/Documentation/Configure.help
--- linux-2.4.25-imedia/Documentation/Configure.help	Tue Feb 24 13:53:31 2004
+++ linux-2.4.25-imedia-testing/Documentation/Configure.help	Tue Feb 24 21:06:25 2004
@@ -109,6 +109,23 @@
 
   Unless you know what you are doing you *should not* enable this option.
 
+Low latency scheduling
+CONFIG_LOLAT
+  This enables low latency scheduling, with reduces the scheduling
+  latency of the kernel.  This makes the kernel more responsive, and
+  potentially increases its bandwidth; since threads waste less time
+  waiting for execution.
+
+  If you don't know what to do here, say Y.
+
+Control low latency with sysctl
+CONFIG_LOLAT_SYSCTL
+  If you say Y here, you will be able to control low latency
+  scheduling using /proc/sys/kernel/lowlatency.  It will default
+  to '0': low latency disabled.
+
+  If you say N here, then low latency scheduling is always enabled.
+
 Symmetric Multi-Processing support
 CONFIG_SMP
   This enables support for systems with more than one CPU. If you have
diff -urN linux-2.4.25-imedia/arch/i386/config.in linux-2.4.25-imedia-testing/arch/i386/config.in
--- linux-2.4.25-imedia/arch/i386/config.in	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/arch/i386/config.in	Tue Feb 24 21:06:25 2004
@@ -25,6 +25,9 @@
 
 mainmenu_option next_comment
 comment 'Processor type and features'
+bool 'Low latency scheduling' CONFIG_LOLAT
+dep_bool 'Control low latency with sysctl' CONFIG_LOLAT_SYSCTL $CONFIG_LOLAT
+
 choice 'Processor family' \
 	"386					CONFIG_M386 \
 	 486					CONFIG_M486 \
diff -urN linux-2.4.25-imedia/drivers/block/ll_rw_blk.c linux-2.4.25-imedia-testing/drivers/block/ll_rw_blk.c
--- linux-2.4.25-imedia/drivers/block/ll_rw_blk.c	Tue Feb 24 13:53:06 2004
+++ linux-2.4.25-imedia-testing/drivers/block/ll_rw_blk.c	Tue Feb 24 21:06:25 2004
@@ -1318,6 +1318,7 @@
 			kstat.pgpgin += count;
 			break;
 	}
+	conditional_schedule();
 }
 
 /**
diff -urN linux-2.4.25-imedia/drivers/char/mem.c linux-2.4.25-imedia-testing/drivers/char/mem.c
--- linux-2.4.25-imedia/drivers/char/mem.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/drivers/char/mem.c	Tue Feb 24 21:06:25 2004
@@ -401,7 +401,7 @@
 		if (count > size)
 			count = size;
 
-		zap_page_range(mm, addr, count);
+		zap_page_range(mm, addr, count, 0);
         	zeromap_page_range(addr, count, PAGE_COPY);
 
 		size -= count;
diff -urN linux-2.4.25-imedia/drivers/char/random.c linux-2.4.25-imedia-testing/drivers/char/random.c
--- linux-2.4.25-imedia/drivers/char/random.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/drivers/char/random.c	Tue Feb 24 21:06:25 2004
@@ -1373,6 +1373,11 @@
 		buf += i;
 		ret += i;
 		add_timer_randomness(&extract_timer_state, nbytes);
+#if LOWLATENCY_NEEDED
+		/* This can happen in softirq's, but that's what we want */
+		if (conditional_schedule_needed())
+			break;
+#endif
 	}
 
 	/* Wipe data just returned from memory */
diff -urN linux-2.4.25-imedia/drivers/i2c/i2c-algo-bit.c linux-2.4.25-imedia-testing/drivers/i2c/i2c-algo-bit.c
--- linux-2.4.25-imedia/drivers/i2c/i2c-algo-bit.c	Tue Feb 24 13:53:31 2004
+++ linux-2.4.25-imedia-testing/drivers/i2c/i2c-algo-bit.c	Tue Feb 24 21:06:25 2004
@@ -363,6 +363,7 @@
 			return (retval<0)? retval : -EFAULT;
 			        /* got a better one ?? */
 		}
+		conditional_schedule();
 #if 0
 		/* from asm/delay.h */
 		__delay(adap->mdelay * (loops_per_sec / 1000) );
diff -urN linux-2.4.25-imedia/drivers/i2c/i2c-core.c linux-2.4.25-imedia-testing/drivers/i2c/i2c-core.c
--- linux-2.4.25-imedia/drivers/i2c/i2c-core.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/drivers/i2c/i2c-core.c	Tue Feb 24 21:06:25 2004
@@ -679,6 +679,8 @@
 {
 	int ret;
 
+	conditional_schedule();
+
 	if (adap->algo->master_xfer) {
  	 	DEB2(printk(KERN_DEBUG "i2c-core.o: master_xfer: %s with %d msgs.\n",
 		            adap->name,num));
@@ -701,6 +703,8 @@
 	struct i2c_adapter *adap=client->adapter;
 	struct i2c_msg msg;
 
+	conditional_schedule();
+
 	if (client->adapter->algo->master_xfer) {
 		msg.addr   = client->addr;
 		msg.flags = client->flags & I2C_M_TEN;
@@ -730,6 +734,9 @@
 	struct i2c_adapter *adap=client->adapter;
 	struct i2c_msg msg;
 	int ret;
+
+	conditional_schedule();
+
 	if (client->adapter->algo->master_xfer) {
 		msg.addr   = client->addr;
 		msg.flags = client->flags & I2C_M_TEN;
diff -urN linux-2.4.25-imedia/drivers/video/fbcon-cfb16.c linux-2.4.25-imedia-testing/drivers/video/fbcon-cfb16.c
--- linux-2.4.25-imedia/drivers/video/fbcon-cfb16.c	Mon Oct 15 23:47:13 2001
+++ linux-2.4.25-imedia-testing/drivers/video/fbcon-cfb16.c	Tue Feb 24 21:06:25 2004
@@ -189,6 +189,7 @@
     case 4:
     case 8:
 	while (count--) {
+	    conditional_schedule();
 	    c = scr_readw(s++) & p->charmask;
 	    cdat = p->fontdata + c * fontheight(p);
 	    for (rows = fontheight(p), dest = dest0; rows--; dest += bytes) {
@@ -206,6 +207,7 @@
     case 12:
     case 16:
 	while (count--) {
+	    conditional_schedule();
 	    c = scr_readw(s++) & p->charmask;
 	    cdat = p->fontdata + (c * fontheight(p) << 1);
 	    for (rows = fontheight(p), dest = dest0; rows--; dest += bytes) {
diff -urN linux-2.4.25-imedia/fs/buffer.c linux-2.4.25-imedia-testing/fs/buffer.c
--- linux-2.4.25-imedia/fs/buffer.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/buffer.c	Wed Feb 25 13:13:34 2004
@@ -261,8 +261,10 @@
 
 		if (dev != NODEV && bh->b_dev != dev)
 			continue;
-		if (test_and_set_bit(BH_Lock, &bh->b_state))
+		if (test_and_set_bit(BH_Lock, &bh->b_state)) {
+			__refile_buffer(bh);
 			continue;
+		}
 		if (buffer_delay(bh)) {
 			if (write_buffer_delay(bh)) {
 				if (count)
@@ -278,6 +280,7 @@
 
 			spin_unlock(&lru_list_lock);
 			write_locked_buffers(array, count);
+			conditional_schedule();
 			return -EAGAIN;
 		}
 		unlock_buffer(bh);
@@ -311,12 +314,19 @@
 	struct buffer_head * next;
 	int nr;
 
-	next = lru_list[index];
 	nr = nr_buffers_type[index];
+repeat:
+	next = lru_list[index];
 	while (next && --nr >= 0) {
 		struct buffer_head *bh = next;
 		next = bh->b_next_free;
 
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			unconditional_schedule();
+			spin_lock(&lru_list_lock);
+			goto repeat;
+		}
 		if (!buffer_locked(bh)) {
 			if (refile)
 				__refile_buffer(bh);
@@ -324,7 +334,6 @@
 		}
 		if (dev != NODEV && bh->b_dev != dev)
 			continue;
-
 		get_bh(bh);
 		spin_unlock(&lru_list_lock);
 		wait_on_buffer (bh);
@@ -357,6 +366,15 @@
 {
 	int err = 0;
 
+#if LOWLATENCY_NEEDED
+	/*
+	 * syncing devA when there are lots of buffers dirty against
+	 * devB is expensive.
+	 */
+	if (enable_lowlatency)
+		dev = NODEV;
+#endif
+
 	/* One pass for no-wait, three for wait:
 	 * 0) write out all dirty, unlocked buffers;
 	 * 1) wait for all dirty locked buffers;
@@ -751,6 +769,7 @@
 	int i, nlist, slept;
 	struct buffer_head * bh, * bh_next;
 	kdev_t dev = to_kdev_t(bdev->bd_dev);	/* will become bdev */
+	int lolat_retry = 0;
 
  retry:
 	slept = 0;
@@ -768,6 +787,17 @@
 			/* Not hashed? */
 			if (!bh->b_pprev)
 				continue;
+
+			if (lolat_retry < 10 && conditional_schedule_needed()) {
+				get_bh(bh);
+				spin_unlock(&lru_list_lock);
+				unconditional_schedule();
+				spin_lock(&lru_list_lock);
+				put_bh(bh);
+				slept = 1;
+				lolat_retry++;
+			}
+
 			if (buffer_locked(bh)) {
 				get_bh(bh);
 				spin_unlock(&lru_list_lock);
@@ -920,12 +950,18 @@
 	struct buffer_head *bh;
 	struct list_head tmp;
 	int err = 0, err2;
-	
+	DEFINE_RESCHED_COUNT;
+
 	INIT_LIST_HEAD(&tmp);
-	
+repeat:
 	spin_lock(&lru_list_lock);
 
 	while (!list_empty(list)) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			unconditional_schedule();
+			goto repeat;
+		}
 		bh = BH_ENTRY(list->next);
 		list_del(&bh->b_inode_buffers);
 		if (!buffer_dirty(bh) && !buffer_locked(bh))
@@ -950,8 +986,18 @@
 				spin_lock(&lru_list_lock);
 			}
 		}
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&lru_list_lock);
+				unconditional_schedule();
+				spin_lock(&lru_list_lock);
+			}
+		}
 	}
 
+	RESET_RESCHED_COUNT();
+
 	while (!list_empty(&tmp)) {
 		bh = BH_ENTRY(tmp.prev);
 		remove_inode_queue(bh);
@@ -961,6 +1007,7 @@
 		if (!buffer_uptodate(bh))
 			err = -EIO;
 		brelse(bh);
+		conditional_schedule();
 		spin_lock(&lru_list_lock);
 	}
 	
@@ -988,11 +1035,20 @@
 	struct buffer_head *bh;
 	struct list_head *p;
 	int err = 0;
+	DEFINE_RESCHED_COUNT;
 
+repeat:
+	conditional_schedule();
 	spin_lock(&lru_list_lock);
 	
- repeat:
 	list_for_each_prev(p, list) {
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&lru_list_lock);
+				goto repeat;
+			}
+		}
 		bh = BH_ENTRY(p);
 		if (buffer_locked(bh)) {
 			get_bh(bh);
@@ -1001,7 +1057,6 @@
 			if (!buffer_uptodate(bh))
 				err = -EIO;
 			brelse(bh);
-			spin_lock(&lru_list_lock);
 			goto repeat;
 		}
 	}
@@ -1018,12 +1073,24 @@
 void invalidate_inode_buffers(struct inode *inode)
 {
 	struct list_head * entry;
-	
+
+repeat:
+	conditional_schedule();
 	spin_lock(&lru_list_lock);
-	while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers)
+	while ((entry = inode->i_dirty_buffers.next) != &inode->i_dirty_buffers) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			goto repeat;
+		}
 		remove_inode_queue(BH_ENTRY(entry));
-	while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers)
+	}
+	while ((entry = inode->i_dirty_data_buffers.next) != &inode->i_dirty_data_buffers) {
+		if (conditional_schedule_needed()) {
+			spin_unlock(&lru_list_lock);
+			goto repeat;
+		}
 		remove_inode_queue(BH_ENTRY(entry));
+	}
 	spin_unlock(&lru_list_lock);
 }
 
@@ -1046,6 +1113,7 @@
 		bh = get_hash_table(dev, block, size);
 		if (bh) {
 			touch_buffer(bh);
+			conditional_schedule();
 			return bh;
 		}
 
diff -urN linux-2.4.25-imedia/fs/dcache.c linux-2.4.25-imedia-testing/fs/dcache.c
--- linux-2.4.25-imedia/fs/dcache.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/dcache.c	Tue Feb 24 21:06:25 2004
@@ -320,11 +320,23 @@
  
 void prune_dcache(int count)
 {
+	DEFINE_RESCHED_COUNT;
+
+redo:
 	spin_lock(&dcache_lock);
 	for (;;) {
 		struct dentry *dentry;
 		struct list_head *tmp;
 
+		if (TEST_RESCHED_COUNT(100)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				spin_unlock(&dcache_lock);
+				unconditional_schedule();
+				goto redo;
+			}
+		}
+
 		tmp = dentry_unused.prev;
 
 		if (tmp == &dentry_unused)
@@ -479,6 +491,7 @@
 	struct dentry *this_parent = parent;
 	struct list_head *next;
 	int found = 0;
+	DEFINE_RESCHED_COUNT;
 
 	spin_lock(&dcache_lock);
 repeat:
@@ -493,6 +506,13 @@
 			list_add(&dentry->d_lru, dentry_unused.prev);
 			found++;
 		}
+
+		if (TEST_RESCHED_COUNT(500) && found > 10) {
+			if (conditional_schedule_needed())	/* Typically sys_rmdir() */
+				goto out;
+			RESET_RESCHED_COUNT();
+		}
+
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
@@ -517,6 +537,7 @@
 #endif
 		goto resume;
 	}
+out:
 	spin_unlock(&dcache_lock);
 	return found;
 }
@@ -532,8 +553,10 @@
 {
 	int found;
 
-	while ((found = select_parent(parent)) != 0)
+	while ((found = select_parent(parent)) != 0) {
 		prune_dcache(found);
+		conditional_schedule();		/* Typically sys_rmdir() */
+	}
 }
 
 /*
diff -urN linux-2.4.25-imedia/fs/exec.c linux-2.4.25-imedia-testing/fs/exec.c
--- linux-2.4.25-imedia/fs/exec.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/exec.c	Tue Feb 24 21:06:25 2004
@@ -245,7 +245,7 @@
 					memset(kaddr+offset+len, 0,
 						PAGE_SIZE-offset-len);
 			}
-			err = copy_from_user(kaddr+offset, str, bytes_to_copy);
+			err = ll_copy_from_user(kaddr+offset, str, bytes_to_copy);
 			if (err) {
 				ret = -EFAULT;
 				goto out;
diff -urN linux-2.4.25-imedia/fs/ext2/dir.c linux-2.4.25-imedia-testing/fs/ext2/dir.c
--- linux-2.4.25-imedia/fs/ext2/dir.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/ext2/dir.c	Tue Feb 24 21:06:25 2004
@@ -153,6 +153,7 @@
 	struct address_space *mapping = dir->i_mapping;
 	struct page *page = read_cache_page(mapping, n,
 				(filler_t*)mapping->a_ops->readpage, NULL);
+	conditional_schedule();		/* Scanning large directories */
 	if (!IS_ERR(page)) {
 		wait_on_page(page);
 		kmap(page);
diff -urN linux-2.4.25-imedia/fs/ext2/inode.c linux-2.4.25-imedia-testing/fs/ext2/inode.c
--- linux-2.4.25-imedia/fs/ext2/inode.c	Tue Feb 24 13:53:07 2004
+++ linux-2.4.25-imedia-testing/fs/ext2/inode.c	Tue Feb 24 21:06:25 2004
@@ -725,8 +725,13 @@
 {
 	unsigned long block_to_free = 0, count = 0;
 	unsigned long nr;
+	DEFINE_RESCHED_COUNT;
 
 	for ( ; p < q ; p++) {
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			conditional_schedule();
+		}
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			*p = 0;
@@ -769,6 +774,7 @@
 	if (depth--) {
 		int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
 		for ( ; p < q ; p++) {
+			conditional_schedule();		/* Deleting large files */
 			nr = le32_to_cpu(*p);
 			if (!nr)
 				continue;
diff -urN linux-2.4.25-imedia/fs/ext3/balloc.c linux-2.4.25-imedia-testing/fs/ext3/balloc.c
--- linux-2.4.25-imedia/fs/ext3/balloc.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/ext3/balloc.c	Tue Feb 24 21:06:25 2004
@@ -363,6 +363,9 @@
 			}
 		}
 #endif
+		/* superblock lock is held, so this is safe */
+		conditional_schedule();
+
 		BUFFER_TRACE(bitmap_bh, "clear bit");
 		if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
 			ext3_error(sb, __FUNCTION__,
diff -urN linux-2.4.25-imedia/fs/ext3/inode.c linux-2.4.25-imedia-testing/fs/ext3/inode.c
--- linux-2.4.25-imedia/fs/ext3/inode.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/ext3/inode.c	Tue Feb 24 21:06:25 2004
@@ -929,6 +929,8 @@
 
 	prev_blocks = inode->i_blocks;
 
+	conditional_schedule();		/* Reading large directories */
+
 	bh = ext3_getblk (handle, inode, block, create, err);
 	if (!bh)
 		return bh;
@@ -1632,6 +1634,7 @@
 	 */
 	for (p = first; p < last; p++) {
 		u32 nr = le32_to_cpu(*p);
+		conditional_schedule();
 		if (nr) {
 			struct buffer_head *bh;
 
@@ -1686,6 +1689,7 @@
 	}
 
 	for (p = first; p < last; p++) {
+		conditional_schedule();
 		nr = le32_to_cpu(*p);
 		if (nr) {
 			/* accumulate blocks to free if they're contiguous */
diff -urN linux-2.4.25-imedia/fs/ext3/namei.c linux-2.4.25-imedia-testing/fs/ext3/namei.c
--- linux-2.4.25-imedia/fs/ext3/namei.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/ext3/namei.c	Tue Feb 24 21:06:25 2004
@@ -159,6 +159,7 @@
 		if ((bh = bh_use[ra_ptr++]) == NULL)
 			goto next;
 		wait_on_buffer(bh);
+		conditional_schedule();
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
 			brelse(bh);
diff -urN linux-2.4.25-imedia/fs/inode.c linux-2.4.25-imedia-testing/fs/inode.c
--- linux-2.4.25-imedia/fs/inode.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/inode.c	Tue Feb 24 21:06:25 2004
@@ -347,6 +347,8 @@
 
 	filemap_fdatawait(inode->i_mapping);
 
+	conditional_schedule();
+
 	spin_lock(&inode_lock);
 	inode->i_state &= ~I_LOCK;
 	__refile_inode(inode);
@@ -647,6 +649,7 @@
 	while (!list_empty(head)) {
 		struct inode *inode;
 
+		conditional_schedule();
 		inode = list_entry(head->next, struct inode, i_list);
 		list_del(&inode->i_list);
 
@@ -683,9 +686,22 @@
 		if (tmp == head)
 			break;
 		inode = list_entry(tmp, struct inode, i_list);
+
+		if (conditional_schedule_needed()) {
+			atomic_inc(&inode->i_count);
+			spin_unlock(&inode_lock);
+			unconditional_schedule();
+			spin_lock(&inode_lock);
+			atomic_dec(&inode->i_count);
+		}
+
 		if (inode->i_sb != sb)
 			continue;
+		atomic_inc(&inode->i_count);
+		spin_unlock(&inode_lock);
 		invalidate_inode_buffers(inode);
+		spin_lock(&inode_lock);
+		atomic_dec(&inode->i_count);
 		if (!atomic_read(&inode->i_count)) {
 			list_del_init(&inode->i_hash);
 			list_del(&inode->i_list);
@@ -795,15 +811,28 @@
 	int avg_pages;
 #endif
 	struct inode * inode;
+	int nr_to_scan = inodes_stat.nr_unused;
 
+resume:
 	spin_lock(&inode_lock);
-
 	count = 0;
 	entry = inode_unused.prev;
-	while (entry != &inode_unused)
-	{
+	while (entry != &inode_unused && nr_to_scan--) {
 		struct list_head *tmp = entry;
 
+		if (conditional_schedule_needed()) {
+			/*
+			 * Need to drop the lock.  Reposition
+			 * the list head so we start here next time.
+			 * This can corrupt the LRU nature of the
+			 * unused list, but this isn't very important.
+			 */
+			list_del(&inode_unused);
+			list_add(&inode_unused, entry);
+			spin_unlock(&inode_lock);
+			unconditional_schedule();
+			goto resume;
+		}
 		entry = entry->prev;
 		inode = INODE(tmp);
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
@@ -1005,6 +1034,8 @@
 	if (inode) {
 		struct inode * old;
 
+		conditional_schedule();			/* sync_old_buffers */
+
 		spin_lock(&inode_lock);
 		/* We released the lock, so.. */
 		old = find_inode(sb, ino, head, find_actor, opaque);
diff -urN linux-2.4.25-imedia/fs/jbd/checkpoint.c linux-2.4.25-imedia-testing/fs/jbd/checkpoint.c
--- linux-2.4.25-imedia/fs/jbd/checkpoint.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/jbd/checkpoint.c	Tue Feb 24 21:06:25 2004
@@ -431,7 +431,11 @@
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret = 0;
+	int ll_retries = 4;		/* lowlatency addition */
 
+restart:
+	if (ll_retries-- == 0)
+		goto out;
 	transaction = journal->j_checkpoint_transactions;
 	if (transaction == 0)
 		goto out;
@@ -451,6 +455,12 @@
 				jh = next_jh;
 				next_jh = jh->b_cpnext;
 				ret += __try_to_free_cp_buf(jh);
+				if (conditional_schedule_needed()) {
+					spin_unlock(&journal_datalist_lock);
+					unconditional_schedule();
+					spin_lock(&journal_datalist_lock);
+					goto restart;
+				}
 			} while (jh != last_jh);
 		}
 	} while (transaction != last_transaction);
diff -urN linux-2.4.25-imedia/fs/jbd/commit.c linux-2.4.25-imedia-testing/fs/jbd/commit.c
--- linux-2.4.25-imedia/fs/jbd/commit.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/jbd/commit.c	Wed Feb 25 13:18:35 2004
@@ -257,6 +257,16 @@
 				__journal_remove_journal_head(bh);
 				refile_buffer(bh);
 				release_buffer_page(bh);
+ 				if (conditional_schedule_needed()) {
+ 					if (commit_transaction->t_sync_datalist)
+ 						commit_transaction->t_sync_datalist =
+ 							next_jh;
+ 					if (bufs)
+ 						break;
+ 					spin_unlock(&journal_datalist_lock);
+ 					unconditional_schedule();
+ 					goto write_out_data;
+ 				}
 			}
 		}
 		if (bufs == ARRAY_SIZE(wbuf)) {
@@ -280,8 +290,7 @@
 		journal_brelse_array(wbuf, bufs);
 		lock_journal(journal);
 		spin_lock(&journal_datalist_lock);
-		if (bufs)
-			goto write_out_data_locked;
+		goto write_out_data_locked;
 	}
 
 	/*
@@ -317,6 +326,15 @@
 	 */
 	while ((jh = commit_transaction->t_async_datalist)) {
 		struct buffer_head *bh = jh2bh(jh);
+
+		if (conditional_schedule_needed()) {
+			spin_unlock(&journal_datalist_lock);
+			unlock_journal(journal);
+			unconditional_schedule();
+			lock_journal(journal);
+			spin_lock(&journal_datalist_lock);
+			continue;	/* List may have changed */
+		}
 		if (__buffer_state(bh, Freed)) {
 			BUFFER_TRACE(bh, "Cleaning freed buffer");
 			clear_bit(BH_Freed, &bh->b_state);
@@ -536,6 +554,8 @@
  wait_for_iobuf:
 	while (commit_transaction->t_iobuf_list != NULL) {
 		struct buffer_head *bh;
+
+		conditional_schedule();
 		jh = commit_transaction->t_iobuf_list->b_tprev;
 		bh = jh2bh(jh);
 		if (buffer_locked(bh)) {
@@ -695,6 +715,8 @@
 		struct buffer_head *bh;
 		int was_freed = 0;
 		
+		conditional_schedule();         /* journal is locked */
+		
 		jh = commit_transaction->t_forget;
 		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
 			jh->b_transaction == journal->j_running_transaction);
diff -urN linux-2.4.25-imedia/fs/proc/array.c linux-2.4.25-imedia-testing/fs/proc/array.c
--- linux-2.4.25-imedia/fs/proc/array.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/proc/array.c	Tue Feb 24 21:06:25 2004
@@ -417,9 +417,11 @@
 	if (end > PMD_SIZE)
 		end = PMD_SIZE;
 	do {
-		pte_t page = *pte;
+		pte_t page;
 		struct page *ptpage;
 
+		conditional_schedule();		/* For `top' and `ps' */
+		page = *pte;
 		address += PAGE_SIZE;
 		pte++;
 		if (pte_none(page))
diff -urN linux-2.4.25-imedia/fs/proc/generic.c linux-2.4.25-imedia-testing/fs/proc/generic.c
--- linux-2.4.25-imedia/fs/proc/generic.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/proc/generic.c	Tue Feb 24 21:06:25 2004
@@ -98,6 +98,8 @@
 				retval = n;
 			break;
 		}
+
+		conditional_schedule();		/* Some /proc files are large */
 		
 		/* This is a hack to allow mangling of file pos independent
  		 * of actual bytes read.  Simply place the data at page,
diff -urN linux-2.4.25-imedia/fs/reiserfs/buffer2.c linux-2.4.25-imedia-testing/fs/reiserfs/buffer2.c
--- linux-2.4.25-imedia/fs/reiserfs/buffer2.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/fs/reiserfs/buffer2.c	Tue Feb 24 21:06:25 2004
@@ -54,6 +54,7 @@
     PROC_EXP( unsigned int ctx_switches = nr_context_switches(); );
 
     result = bread (super -> s_dev, n_block, n_size);
+    conditional_schedule();
     PROC_INFO_INC( super, breads );
     PROC_EXP( if( nr_context_switches() != ctx_switches ) 
 	      PROC_INFO_INC( super, bread_miss ) );
diff -urN linux-2.4.25-imedia/fs/reiserfs/journal.c linux-2.4.25-imedia-testing/fs/reiserfs/journal.c
--- linux-2.4.25-imedia/fs/reiserfs/journal.c	Mon Aug 25 14:44:43 2003
+++ linux-2.4.25-imedia-testing/fs/reiserfs/journal.c	Tue Feb 24 21:06:25 2004
@@ -574,6 +574,7 @@
 /* lock the current transaction */
 inline static void lock_journal(struct super_block *p_s_sb) {
   PROC_INFO_INC( p_s_sb, journal.lock_journal );
+  conditional_schedule();
   while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
     PROC_INFO_INC( p_s_sb, journal.lock_journal_wait );
     sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
@@ -704,6 +705,7 @@
 	mark_buffer_dirty(tbh) ;
       }
       ll_rw_block(WRITE, 1, &tbh) ;
+      conditional_schedule();
       count++ ;
       put_bh(tbh) ; /* once for our get_hash */
     } 
@@ -833,6 +835,7 @@
     set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ;
     ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ;
     wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; 
+    conditional_schedule();
     if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
       reiserfs_warning( p_s_sb, "reiserfs: journal-837: IO error during journal replay\n" );
       return -EIO ;
@@ -2357,6 +2360,7 @@
 }
 
 int journal_begin(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, unsigned long nblocks) {
+  conditional_schedule();
   return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2497,6 +2501,7 @@
 }
 
 int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  conditional_schedule();
   return do_journal_end(th, p_s_sb, nblocks, 0) ;
 }
 
@@ -2968,6 +2973,7 @@
       RFALSE( buffer_locked(bh) && cur_tb != NULL,
 	      "waiting while do_balance was running\n") ;
       wait_on_buffer(bh) ;
+      conditional_schedule();
     }
     PROC_INFO_INC( p_s_sb, journal.prepare_retry );
     retry_count++ ;
@@ -3142,6 +3148,7 @@
     /* copy all the real blocks into log area.  dirty log blocks */
     if (test_bit(BH_JDirty, &cn->bh->b_state)) {
       struct buffer_head *tmp_bh ;
+      conditional_schedule();
       tmp_bh =  journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 
 		       ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
       mark_buffer_uptodate(tmp_bh, 1) ;
diff -urN linux-2.4.25-imedia/fs/reiserfs/stree.c linux-2.4.25-imedia-testing/fs/reiserfs/stree.c
--- linux-2.4.25-imedia/fs/reiserfs/stree.c	Mon Aug 25 14:44:43 2003
+++ linux-2.4.25-imedia-testing/fs/reiserfs/stree.c	Tue Feb 24 21:06:25 2004
@@ -652,9 +652,8 @@
                                        stop at leaf level - set to
                                        DISK_LEAF_NODE_LEVEL */
     ) {
-    int  n_block_number = SB_ROOT_BLOCK (p_s_sb),
-      expected_level = SB_TREE_HEIGHT (p_s_sb),
-      n_block_size    = p_s_sb->s_blocksize;
+    int n_block_number, expected_level;
+    int n_block_size    = p_s_sb->s_blocksize;
     struct buffer_head  *       p_s_bh;
     struct path_element *       p_s_last_element;
     int				n_node_level, n_retval;
@@ -666,7 +665,8 @@
 #endif
     
     PROC_INFO_INC( p_s_sb, search_by_key );
-    
+    conditional_schedule();
+
     /* As we add each node to a path we increase its count.  This means that
        we must be careful to release all nodes in a path before we either
        discard the path struct or re-use the path struct, as we do here. */
@@ -678,6 +678,8 @@
     /* With each iteration of this loop we search through the items in the
        current node, and calculate the next current node(next path element)
        for the next iteration of this loop.. */
+    n_block_number = SB_ROOT_BLOCK (p_s_sb);
+    expected_level = SB_TREE_HEIGHT (p_s_sb);
     while ( 1 ) {
 
 #ifdef CONFIG_REISERFS_CHECK
@@ -1104,6 +1106,8 @@
 	    for (n_counter = *p_n_removed;
 		 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
 
+		conditional_schedule();
+
 		if (item_moved (&s_ih, p_s_path)) {
 		    need_research = 1 ;
 		    break;
diff -urN linux-2.4.25-imedia/include/linux/low-latency.h linux-2.4.25-imedia-testing/include/linux/low-latency.h
--- linux-2.4.25-imedia/include/linux/low-latency.h	Thu Jan  1 02:00:00 1970
+++ linux-2.4.25-imedia-testing/include/linux/low-latency.h	Tue Feb 24 21:06:25 2004
@@ -0,0 +1,109 @@
+/*
+ * include/linux/low-latency.h
+ *
+ * Andrew Morton <akpm@zip.com.au>
+ */
+
+#ifndef LOW_LATENCY_H_INCLUDED
+#define LOW_LATENCY_H_INCLUDED
+
+#if defined(CONFIG_LOLAT)
+#define LOWLATENCY_NEEDED	1
+#else
+#define LOWLATENCY_NEEDED	0
+#endif
+
+#if LOWLATENCY_NEEDED
+
+#include <linux/cache.h>		/* For ____cacheline_aligned */
+
+#ifdef CONFIG_LOLAT_SYSCTL
+extern struct low_latency_enable_struct {
+	int yep;
+} ____cacheline_aligned __enable_lowlatency;
+#define enable_lowlatency __enable_lowlatency.yep
+
+#else
+#define enable_lowlatency 1
+#endif
+
+/*
+ * Set this non-zero to generate low-latency instrumentation
+ */
+#define LOWLATENCY_DEBUG		0
+
+/*
+ * Set this non-zero for robustness testing
+ */
+#define LOWLATENCY_ALWAYS_SCHEDULE	0
+
+#if LOWLATENCY_DEBUG
+
+#if LOWLATENCY_ALWAYS_SCHEDULE
+#define conditional_schedule_needed() ((enable_lowlatency == 2) || (enable_lowlatency && current->need_resched))
+#else
+#define conditional_schedule_needed() (enable_lowlatency && current->need_resched)
+#endif
+
+struct lolat_stats_t {
+	unsigned long count;
+	int visited;
+	const char *file;
+	int line;
+	struct lolat_stats_t *next;
+};
+
+void set_running_and_schedule(struct lolat_stats_t *stats);
+
+#define unconditional_schedule()					\
+	do {								\
+		static struct lolat_stats_t stats = {			\
+			file: __FILE__,					\
+			line: __LINE__,					\
+		};							\
+		set_running_and_schedule(&stats);			\
+	} while (0)
+
+extern void show_lolat_stats(void);
+
+#else	/* LOWLATENCY_DEBUG */
+
+#if LOWLATENCY_ALWAYS_SCHEDULE
+#define conditional_schedule_needed() 1
+#else
+#define conditional_schedule_needed() (current->need_resched)
+#endif
+
+void set_running_and_schedule(void);
+#define unconditional_schedule() set_running_and_schedule()
+
+#endif	/* LOWLATENCY_DEBUG */
+
+#define conditional_schedule()						\
+	do {								\
+		if (conditional_schedule_needed())			\
+			unconditional_schedule();			\
+	} while (0)
+
+#define DEFINE_RESCHED_COUNT	int resched_count = 0
+#define TEST_RESCHED_COUNT(n)	(enable_lowlatency && (++resched_count > (n)))
+#define RESET_RESCHED_COUNT()	resched_count = 0
+extern int ll_copy_to_user(void *to_user, const void *from, unsigned long len);
+extern int ll_copy_from_user(void *to, const void *from_user, unsigned long len);
+
+#else	/* LOWLATENCY_NEEDED */
+
+#define conditional_schedule_needed() 0
+#define conditional_schedule()
+#define unconditional_schedule()
+
+#define DEFINE_RESCHED_COUNT
+#define TEST_RESCHED_COUNT(n)	0
+#define RESET_RESCHED_COUNT()
+#define ll_copy_to_user(to_user, from, len) copy_to_user((to_user), (from), (len))
+#define ll_copy_from_user(to, from_user, len) copy_from_user((to), (from_user), (len))
+
+#endif	/* LOWLATENCY_NEEDED */
+
+#endif /* LOW_LATENCY_H_INCLUDED */
+
diff -urN linux-2.4.25-imedia/include/linux/mm.h linux-2.4.25-imedia-testing/include/linux/mm.h
--- linux-2.4.25-imedia/include/linux/mm.h	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/include/linux/mm.h	Tue Feb 24 21:06:25 2004
@@ -128,6 +128,8 @@
  */
 extern pgprot_t protection_map[16];
 
+/* Actions for zap_page_range() */
+#define ZPR_COND_RESCHED	1	/* Do a conditional_schedule() occasionally */
 
 /*
  * These are the virtual MM functions - opening of an area, closing and
@@ -488,7 +490,7 @@
 extern void shmem_lock(struct file * file, int lock);
 extern int shmem_zero_setup(struct vm_area_struct *);
 
-extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions);
 extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
diff -urN linux-2.4.25-imedia/include/linux/reiserfs_fs.h linux-2.4.25-imedia-testing/include/linux/reiserfs_fs.h
--- linux-2.4.25-imedia/include/linux/reiserfs_fs.h	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/include/linux/reiserfs_fs.h	Tue Feb 24 21:06:25 2004
@@ -1329,8 +1329,8 @@
 #define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter)
 #define get_generation(s) atomic_read (&fs_generation(s))
 #define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
-#define fs_changed(gen,s) (gen != get_generation (s))
-
+#define __fs_changed(gen,s) (gen != get_generation (s))
+#define fs_changed(gen,s) ({conditional_schedule(); __fs_changed(gen,s);})
 
 /***************************************************************************/
 /*                  FIXATE NODES                                           */
diff -urN linux-2.4.25-imedia/include/linux/sched.h linux-2.4.25-imedia-testing/include/linux/sched.h
--- linux-2.4.25-imedia/include/linux/sched.h	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/include/linux/sched.h	Tue Feb 24 21:06:25 2004
@@ -26,6 +26,7 @@
 #include <linux/signal.h>
 #include <linux/securebits.h>
 #include <linux/fs_struct.h>
+#include <linux/low-latency.h>
 
 struct exec_domain;
 
diff -urN linux-2.4.25-imedia/include/linux/sysctl.h linux-2.4.25-imedia-testing/include/linux/sysctl.h
--- linux-2.4.25-imedia/include/linux/sysctl.h	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/include/linux/sysctl.h	Tue Feb 24 21:06:25 2004
@@ -124,6 +124,7 @@
 	KERN_CORE_USES_PID=52,		/* int: use core or core.%pid */
 	KERN_TAINTED=53,	/* int: various kernel tainted flags */
 	KERN_CADPID=54,		/* int: PID of the process to notify on CAD */
+	KERN_LOWLATENCY=55,     /* int: enable low latency scheduling */
  	KERN_CORE_PATTERN=56,	/* string: pattern for core-files */
 	KERN_PPC_L3CR=57,       /* l3cr register on PPC */
 	KERN_EXCEPTION_TRACE=58, /* boolean: exception trace */
diff -urN linux-2.4.25-imedia/kernel/exit.c linux-2.4.25-imedia-testing/kernel/exit.c
--- linux-2.4.25-imedia/kernel/exit.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/kernel/exit.c	Tue Feb 24 21:06:25 2004
@@ -242,6 +242,7 @@
 			}
 			i++;
 			set >>= 1;
+			conditional_schedule();		/* sys_exit, many files open */
 		}
 	}
 }
diff -urN linux-2.4.25-imedia/kernel/ksyms.c linux-2.4.25-imedia-testing/kernel/ksyms.c
--- linux-2.4.25-imedia/kernel/ksyms.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/kernel/ksyms.c	Tue Feb 24 21:06:25 2004
@@ -494,6 +494,13 @@
 EXPORT_SYMBOL(do_gettimeofday);
 EXPORT_SYMBOL(do_settimeofday);
 
+#if LOWLATENCY_NEEDED
+EXPORT_SYMBOL(set_running_and_schedule);
+#ifdef CONFIG_LOLAT_SYSCTL
+EXPORT_SYMBOL(__enable_lowlatency);
+#endif
+#endif
+
 #if !defined(__ia64__)
 EXPORT_SYMBOL(loops_per_jiffy);
 #endif
diff -urN linux-2.4.25-imedia/kernel/module.c linux-2.4.25-imedia-testing/kernel/module.c
--- linux-2.4.25-imedia/kernel/module.c	Tue Feb 24 13:53:07 2004
+++ linux-2.4.25-imedia-testing/kernel/module.c	Tue Feb 24 21:06:25 2004
@@ -1196,6 +1196,11 @@
 		return ERR_PTR(-ENOMEM);
 	lock_kernel();
 	for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) {
+#if 0
+		/* We can't actually do this, because we'd create a
+		 * race against module unload.  Need a semaphore. */
+		conditional_schedule();
+#endif
 		if (n < v->nsyms) {
 			p->mod = v;
 			p->index = n;
diff -urN linux-2.4.25-imedia/kernel/sched.c linux-2.4.25-imedia-testing/kernel/sched.c
--- linux-2.4.25-imedia/kernel/sched.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/kernel/sched.c	Wed Feb 25 13:09:51 2004
@@ -319,6 +319,17 @@
 	set_tsk_need_resched(p);
 	if (!need_resched && (task_cpu(p) != smp_processor_id()))
 		smp_send_reschedule(task_cpu(p));
+#if LOWLATENCY_NEEDED
+ 	if (enable_lowlatency && (p->policy != SCHED_OTHER)) {
+ 		struct task_struct *t;
+ 		for (i = 0; i < smp_num_cpus; i++) {
+ 			cpu = cpu_logical_map(i);
+ 			t = cpu_curr(cpu);
+ 			if (t != tsk)
+ 				t->need_resched = 1;
+ 		}
+ 	}
+#endif
 #else
 	set_tsk_need_resched(p);
 #endif
@@ -630,6 +641,11 @@
 	prio_array_t *array;
 	struct list_head *head, *curr;
 
+	if (unlikely(prev->need_resched)) {
+		prev->need_resched = 0;
+		goto repeat_schedule;
+	}
+
 	/*
 	 * Handle platform specific balancing operations, such as 
 	 * hyperthreading.
@@ -1805,7 +1821,95 @@
 	atomic_inc(&init_mm.mm_count);
 	enter_lazy_tlb(&init_mm, current, smp_processor_id());
 }
-
+#if LOWLATENCY_NEEDED
+#if LOWLATENCY_DEBUG
+ 
+static struct lolat_stats_t *lolat_stats_head;
+static spinlock_t lolat_stats_lock = SPIN_LOCK_UNLOCKED;
+ 
+void set_running_and_schedule(struct lolat_stats_t *stats)
+{
+ 	spin_lock(&lolat_stats_lock);
+ 	if (stats->visited == 0) {
+ 		stats->visited = 1;
+ 		stats->next = lolat_stats_head;
+ 		lolat_stats_head = stats;
+ 	}
+ 	stats->count++;
+	spin_unlock(&lolat_stats_lock);
+ 
+ 	if (current->state != TASK_RUNNING)
+ 		set_current_state(TASK_RUNNING);
+ 	schedule();
+}
+ 
+void show_lolat_stats(void)
+{
+ 	struct lolat_stats_t *stats = lolat_stats_head;
+ 
+ 	printk("Low latency scheduling stats:\n");
+ 	while (stats) {
+ 		printk("%s:%d: %lu\n", stats->file, stats->line, stats->count);
+ 		stats->count = 0;
+ 		stats = stats->next;
+ 	}
+}
+ 
+#else	/* LOWLATENCY_DEBUG */
+ 
+void set_running_and_schedule()
+{
+ 	if (current->state != TASK_RUNNING)
+ 		__set_current_state(TASK_RUNNING);
+ 	schedule();
+}
+ 
+#endif	/* LOWLATENCY_DEBUG */
+ 
+int ll_copy_to_user(void *to_user, const void *from, unsigned long len)
+{
+ 	while (len) {
+ 		unsigned long n_to_copy = len;
+ 		unsigned long remainder;
+ 
+ 		if (n_to_copy > 4096)
+ 			n_to_copy = 4096;
+ 		remainder = copy_to_user(to_user, from, n_to_copy);
+ 		if (remainder)
+ 			return remainder + len;
+ 		to_user = ((char *)to_user) + n_to_copy;
+ 		from = ((char *)from) + n_to_copy;
+ 		len -= n_to_copy;
+ 		conditional_schedule();
+ 	}
+ 	return 0;
+}
+ 
+int ll_copy_from_user(void *to, const void *from_user, unsigned long len)
+{
+ 	while (len) {
+ 		unsigned long n_to_copy = len;
+ 		unsigned long remainder;
+ 
+ 		if (n_to_copy > 4096)
+ 			n_to_copy = 4096;
+ 		remainder = copy_from_user(to, from_user, n_to_copy);
+ 		if (remainder)
+ 			return remainder + len;
+ 		to = ((char *)to) + n_to_copy;
+ 		from_user = ((char *)from_user) + n_to_copy;
+ 		len -= n_to_copy;
+ 		conditional_schedule();
+ 	}
+ 	return 0;
+}
+ 
+#ifdef CONFIG_LOLAT_SYSCTL
+struct low_latency_enable_struct __enable_lowlatency = { 0, };
+#endif
+ 
+#endif	/* LOWLATENCY_NEEDED */
+ 
 #if CONFIG_SMP
 
 /*
diff -urN linux-2.4.25-imedia/kernel/sysctl.c linux-2.4.25-imedia-testing/kernel/sysctl.c
--- linux-2.4.25-imedia/kernel/sysctl.c	Tue Feb 24 13:53:07 2004
+++ linux-2.4.25-imedia-testing/kernel/sysctl.c	Tue Feb 24 21:06:25 2004
@@ -287,6 +287,10 @@
 	{KERN_EXCEPTION_TRACE,"exception-trace",
 	 &exception_trace,sizeof(int),0644,NULL,&proc_dointvec},
 #endif	
+#ifdef CONFIG_LOLAT_SYSCTL
+	{KERN_LOWLATENCY, "lowlatency", &enable_lowlatency, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif
 	{0}
 };
 
diff -urN linux-2.4.25-imedia/mm/filemap.c linux-2.4.25-imedia-testing/mm/filemap.c
--- linux-2.4.25-imedia/mm/filemap.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/mm/filemap.c	Tue Feb 24 21:06:25 2004
@@ -185,7 +185,9 @@
 {
 	struct list_head *head, *curr;
 	struct page * page;
+	int ll_count = 100;
 
+restart:
 	head = &inode->i_mapping->clean_pages;
 
 	spin_lock(&pagemap_lru_lock);
@@ -196,6 +198,14 @@
 		page = list_entry(curr, struct page, list);
 		curr = curr->next;
 
+		if (conditional_schedule_needed() && ll_count) {
+			spin_unlock(&pagecache_lock);
+			spin_unlock(&pagemap_lru_lock);
+			unconditional_schedule();
+			ll_count--;
+			goto restart;
+		}
+
 		/* We cannot invalidate something in dirty.. */
 		if (PageDirty(page))
 			continue;
@@ -259,8 +269,7 @@
 	page_cache_release(page);
 }
 
-static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *));
-static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial)
+static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial, int *restart_count)
 {
 	struct list_head *curr;
 	struct page * page;
@@ -271,6 +280,17 @@
 	while (curr != head) {
 		unsigned long offset;
 
+		if (conditional_schedule_needed() && *restart_count) {
+			(*restart_count)--;
+			list_del(head);
+			list_add(head, curr);		/* Restart on this page */
+			spin_unlock(&pagecache_lock);
+			unconditional_schedule();
+			spin_lock(&pagecache_lock);
+			unlocked = 1;
+			goto restart;
+		}
+
 		page = list_entry(curr, struct page, list);
 		offset = page->index;
 
@@ -303,13 +323,11 @@
 			} else
  				wait_on_page(page);
 
-			page_cache_release(page);
-
-			if (current->need_resched) {
-				__set_current_state(TASK_RUNNING);
-				schedule();
+			if (LOWLATENCY_NEEDED) {
+				*restart_count = 4;	/* We made progress */
 			}
 
+			page_cache_release(page);
 			spin_lock(&pagecache_lock);
 			goto restart;
 		}
@@ -332,13 +350,14 @@
 {
 	unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
+	int restart_count = 4;
 	int unlocked;
 
 	spin_lock(&pagecache_lock);
 	do {
-		unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial);
-		unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial);
-		unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial);
+		unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial, &restart_count);
+		unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial, &restart_count);
+		unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial, &restart_count);
 	} while (unlocked);
 	/* Traversed all three lists without dropping the lock */
 	spin_unlock(&pagecache_lock);
@@ -483,6 +502,7 @@
 
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
+		conditional_schedule();		/* sys_msync() (only used by minixfs, udf) */
 		lock_page(page);
 
 		/* The buffers could have been free'd while we waited for the page lock */
@@ -612,12 +632,14 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->locked_pages);
 
-		if (!PageDirty(page))
-			continue;
-
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		conditional_schedule();		/* sys_msync() */
+
+		if (!PageDirty(page))
+			goto clean;
+
 		lock_page(page);
 
 		if (PageDirty(page)) {
@@ -628,7 +650,7 @@
 				ret = err;
 		} else
 			UnlockPage(page);
-
+clean:
 		page_cache_release(page);
 		spin_lock(&pagecache_lock);
 	}
@@ -646,7 +668,8 @@
 int filemap_fdatawait(struct address_space * mapping)
 {
 	int ret = 0;
-
+	DEFINE_RESCHED_COUNT;
+restart:
 	spin_lock(&pagecache_lock);
 
         while (!list_empty(&mapping->locked_pages)) {
@@ -655,6 +678,17 @@
 		list_del(&page->list);
 		list_add(&page->list, &mapping->clean_pages);
 
+		if (TEST_RESCHED_COUNT(32)) {
+			RESET_RESCHED_COUNT();
+			if (conditional_schedule_needed()) {
+				page_cache_get(page);
+				spin_unlock(&pagecache_lock);
+				unconditional_schedule();
+				page_cache_release(page);
+				goto restart;
+			}
+		}
+
 		if (!PageLocked(page))
 			continue;
 
@@ -764,8 +798,10 @@
 	spin_lock(&pagecache_lock);
 	page = __find_page_nolock(mapping, offset, *hash);
 	spin_unlock(&pagecache_lock);
-	if (page)
+	if (page) {
+		conditional_schedule();
 		return 0;
+	}
 
 	page = page_cache_alloc(mapping);
 	if (!page)
@@ -1035,6 +1071,11 @@
 	 * the hash-list needs a held write-lock.
 	 */
 repeat:
+	if (conditional_schedule_needed()) {
+		spin_unlock(&pagecache_lock);
+		unconditional_schedule();
+		spin_lock(&pagecache_lock);
+	}
 	page = __find_page_nolock(mapping, offset, hash);
 	if (page) {
 		page_cache_get(page);
@@ -1490,6 +1531,8 @@
 		page_cache_get(page);
 		spin_unlock(&pagecache_lock);
 
+		conditional_schedule();		/* sys_read() */
+
 		if (!Page_Uptodate(page))
 			goto page_not_up_to_date;
 		generic_file_readahead(reada_ok, filp, inode, page);
@@ -2249,6 +2292,12 @@
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+
+	if (conditional_schedule_needed()) {
+		spin_unlock(&vma->vm_mm->page_table_lock);
+		unconditional_schedule();		/* syncing large mapped files */
+		spin_lock(&vma->vm_mm->page_table_lock);
+	}
 	return error;
 }
 
@@ -2665,7 +2714,9 @@
 	if (vma->vm_flags & VM_LOCKED)
 		return -EINVAL;
 
-	zap_page_range(vma->vm_mm, start, end - start);
+        zap_page_range(vma->vm_mm, start, end - start,
+		ZPR_COND_RESCHED);        /* sys_madvise(MADV_DONTNEED) */
+
 	return 0;
 }
 
@@ -3239,6 +3290,9 @@
 			goto sync_failure;
 		page_fault = __copy_from_user(kaddr+offset, buf, bytes);
 		flush_dcache_page(page);
+
+                conditional_schedule();
+
 		status = mapping->a_ops->commit_write(file, page, offset, offset+bytes);
 		if (page_fault)
 			goto fail_write;
diff -urN linux-2.4.25-imedia/mm/memory.c linux-2.4.25-imedia-testing/mm/memory.c
--- linux-2.4.25-imedia/mm/memory.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/mm/memory.c	Tue Feb 24 21:06:25 2004
@@ -357,7 +357,7 @@
 /*
  * remove user pages in a given range.
  */
-void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
+static void do_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
 {
 	mmu_gather_t *tlb;
 	pgd_t * dir;
@@ -478,6 +478,10 @@
 			struct page *map;
 			while (!(map = follow_page(mm, start, write))) {
 				spin_unlock(&mm->page_table_lock);
+
+				/* Pinning down many physical pages (kiobufs, mlockall) */
+				conditional_schedule();
+
 				switch (handle_mm_fault(mm, vma, start, write)) {
 				case 1:
 					tsk->min_flt++;
@@ -639,6 +643,21 @@
 	iobuf->locked = 0;
 }
 
+#define MAX_ZAP_BYTES 256*PAGE_SIZE
+
+void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, int actions)
+{
+	while (size) {
+		unsigned long chunk = size;
+		if (actions & ZPR_COND_RESCHED && chunk > MAX_ZAP_BYTES)
+			chunk = MAX_ZAP_BYTES;
+		do_zap_page_range(mm, address, chunk);
+		if (actions & ZPR_COND_RESCHED)
+			conditional_schedule();
+		address += chunk;
+		size -= chunk;
+	}
+}
 
 /*
  * Lock down all of the pages of a kiovec for IO.
@@ -748,11 +767,18 @@
 	return 0;
 }
 
-static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
+static inline void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
+				unsigned long address, unsigned long size,
+				pgprot_t prot)
 {
 	unsigned long end;
 
+	if (conditional_schedule_needed()) {
+		spin_unlock(&mm->page_table_lock);
+		unconditional_schedule();		/* mmap(/dev/zero) */
+		spin_lock(&mm->page_table_lock);
+	}
+
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -780,7 +806,7 @@
 		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
-		zeromap_pte_range(pte, address, end - address, prot);
+		zeromap_pte_range(mm, pte, address, end - address, prot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -1014,7 +1040,7 @@
 
 		/* mapping wholly truncated? */
 		if (mpnt->vm_pgoff >= pgoff) {
-			zap_page_range(mm, start, len);
+                        zap_page_range(mm, start, len, 0);
 			continue;
 		}
 
@@ -1027,7 +1053,7 @@
 		/* Ok, partially affected.. */
 		start += diff << PAGE_SHIFT;
 		len = (len - diff) << PAGE_SHIFT;
-		zap_page_range(mm, start, len);
+                zap_page_range(mm, start, len, 0);
 	} while ((mpnt = mpnt->vm_next_share) != NULL);
 }
 
diff -urN linux-2.4.25-imedia/mm/mmap.c linux-2.4.25-imedia-testing/mm/mmap.c
--- linux-2.4.25-imedia/mm/mmap.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/mm/mmap.c	Tue Feb 24 21:06:25 2004
@@ -688,7 +688,7 @@
 	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
-	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+        zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start, 0);
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
@@ -1187,7 +1187,7 @@
 		remove_shared_vm_struct(mpnt);
 		mm->map_count--;
 
-		zap_page_range(mm, st, size);
+                zap_page_range(mm, st, size, ZPR_COND_RESCHED);   /* sys_munmap() */
 
 		/*
 		 * Fix the mapping, and free the old area if it wasn't reused.
@@ -1361,7 +1361,7 @@
 		}
 		mm->map_count--;
 		remove_shared_vm_struct(mpnt);
-		zap_page_range(mm, start, size);
+		zap_page_range(mm, start, size, ZPR_COND_RESCHED);      /* sys_exit() */
 		if (mpnt->vm_file)
 			fput(mpnt->vm_file);
 		kmem_cache_free(vm_area_cachep, mpnt);
diff -urN linux-2.4.25-imedia/mm/mremap.c linux-2.4.25-imedia-testing/mm/mremap.c
--- linux-2.4.25-imedia/mm/mremap.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/mm/mremap.c	Tue Feb 24 21:06:25 2004
@@ -133,7 +133,7 @@
 	flush_cache_range(mm, new_addr, new_addr + len);
 	while ((offset += PAGE_SIZE) < len)
 		move_one_page(mm, new_addr + offset, old_addr + offset);
-	zap_page_range(mm, new_addr, len);
+        zap_page_range(mm, new_addr, len, 0);
 	return -1;
 }
 
diff -urN linux-2.4.25-imedia/mm/slab.c linux-2.4.25-imedia-testing/mm/slab.c
--- linux-2.4.25-imedia/mm/slab.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/mm/slab.c	Tue Feb 24 21:06:25 2004
@@ -937,6 +937,7 @@
 		list_del(&slabp->list);
 
 		spin_unlock_irq(&cachep->spinlock);
+		conditional_schedule();
 		kmem_slab_destroy(cachep, slabp);
 		ret++;
 		spin_lock_irq(&cachep->spinlock);
@@ -1853,6 +1854,7 @@
 		 */
 		spin_unlock_irq(&best_cachep->spinlock);
 		kmem_slab_destroy(best_cachep, slabp);
+		conditional_schedule();		/* try_to_free_pages() */
 		spin_lock_irq(&best_cachep->spinlock);
 	}
 	spin_unlock_irq(&best_cachep->spinlock);
diff -urN linux-2.4.25-imedia/mm/swapfile.c linux-2.4.25-imedia-testing/mm/swapfile.c
--- linux-2.4.25-imedia/mm/swapfile.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/mm/swapfile.c	Tue Feb 24 21:06:25 2004
@@ -843,7 +843,7 @@
 				len += sprintf(buf + len, "partition\t");
 
 			usedswap = 0;
-			for (j = 0; j < ptr->max; ++j)
+			for (j = 0; j < ptr->max; ++j) {
 				switch (ptr->swap_map[j]) {
 					case SWAP_MAP_BAD:
 					case 0:
@@ -851,6 +851,8 @@
 					default:
 						usedswap++;
 				}
+				conditional_schedule();
+			}
 			len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), 
 				usedswap << (PAGE_SHIFT - 10), ptr->prio);
 		}
@@ -1149,6 +1151,11 @@
 		if (swap_info[i].flags != SWP_USED)
 			continue;
 		for (j = 0; j < swap_info[i].max; ++j) {
+			if (conditional_schedule_needed()) {
+				swap_list_unlock();
+				conditional_schedule();
+				swap_list_lock();
+			}
 			switch (swap_info[i].swap_map[j]) {
 				case 0:
 				case SWAP_MAP_BAD:
diff -urN linux-2.4.25-imedia/mm/vmscan.c linux-2.4.25-imedia-testing/mm/vmscan.c
--- linux-2.4.25-imedia/mm/vmscan.c	Tue Feb 24 13:53:07 2004
+++ linux-2.4.25-imedia-testing/mm/vmscan.c	Tue Feb 24 21:06:25 2004
@@ -233,6 +233,7 @@
 {
 	pte_t * pte;
 	unsigned long pmd_end;
+	DEFINE_RESCHED_COUNT;
 
 	if (pmd_none(*dir))
 		return count;
@@ -258,11 +259,17 @@
 					address += PAGE_SIZE;
 					break;
 				}
+                                if (TEST_RESCHED_COUNT(4)) {
+                                        if (conditional_schedule_needed())
+						goto out;
+                                        RESET_RESCHED_COUNT();
+                                }
 			}
 		}
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
+out:
 	mm->swap_address = address;
 	return count;
 }
@@ -291,6 +298,8 @@
 		count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
 		if (!count)
 			break;
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -315,6 +324,8 @@
 		count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
 		if (!count)
 			break;
+		if (conditional_schedule_needed())
+			return count;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgdir++;
 	} while (address && (address < end));
@@ -336,6 +347,7 @@
 	 * Find the proper vm-area after freezing the vma chain 
 	 * and ptes.
 	 */
+continue_scan:
 	spin_lock(&mm->page_table_lock);
 	address = mm->swap_address;
 	if (address == TASK_SIZE || swap_mm != mm) {
@@ -353,6 +365,12 @@
 			vma = vma->vm_next;
 			if (!vma)
 				break;
+                        if (conditional_schedule_needed()) {    /* Scanning a large vma */
+                                spin_unlock(&mm->page_table_lock);
+                                unconditional_schedule();
+                                /* Continue from where we left off */
+                                goto continue_scan;
+                        }
 			if (!count)
 				goto out_unlock;
 			address = vma->vm_start;
diff -urN linux-2.4.25-imedia/net/core/iovec.c linux-2.4.25-imedia-testing/net/core/iovec.c
--- linux-2.4.25-imedia/net/core/iovec.c	Tue Feb 24 17:53:42 2004
+++ linux-2.4.25-imedia-testing/net/core/iovec.c	Tue Feb 24 21:06:25 2004
@@ -88,7 +88,7 @@
 		if(iov->iov_len)
 		{
 			int copy = min_t(unsigned int, iov->iov_len, len);
-			if (copy_to_user(iov->iov_base, kdata, copy))
+                        if (ll_copy_to_user(iov->iov_base, kdata, copy))
 				goto out;
 			kdata+=copy;
 			len-=copy;
diff -urN linux-2.4.25-imedia/net/ipv4/tcp_minisocks.c linux-2.4.25-imedia-testing/net/ipv4/tcp_minisocks.c
--- linux-2.4.25-imedia/net/ipv4/tcp_minisocks.c	Mon Aug 25 14:44:44 2003
+++ linux-2.4.25-imedia-testing/net/ipv4/tcp_minisocks.c	Tue Feb 24 21:06:25 2004
@@ -433,6 +433,9 @@
 {
 	struct tcp_tw_bucket *tw;
 	int killed = 0;
+#if LOWLATENCY_NEEDED
+	int max_killed = 0;
+#endif
 
 	/* NOTE: compare this to previous version where lock
 	 * was released after detaching chain. It was racy,
@@ -446,6 +449,13 @@
 		goto out;
 
 	while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) {
+#if LOWLATENCY_NEEDED
+		/* This loop takes ~6 usecs per iteration. */
+		if (killed > 100) {
+			max_killed = 1;
+			break;
+		}
+#endif
 		tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death;
 		if (tw->next_death)
 			tw->next_death->pprev_death = tw->pprev_death;
@@ -458,12 +468,24 @@
 		killed++;
 
 		spin_lock(&tw_death_lock);
+
+	}
+
+#if LOWLATENCY_NEEDED
+	if (max_killed) {	/* More to do: do it soon */
+		mod_timer(&tcp_tw_timer, jiffies+2);
+		tcp_tw_count -= killed;
+	}
+	else
+#endif
+	{
+		tcp_tw_death_row_slot =
+			((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+
+		if ((tcp_tw_count -= killed) != 0)
+			mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
 	}
-	tcp_tw_death_row_slot =
-		((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
 
-	if ((tcp_tw_count -= killed) != 0)
-		mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
 	net_statistics[smp_processor_id()*2].TimeWaited += killed;
 out:
 	spin_unlock(&tw_death_lock);

