diff -urNp x-ref/arch/i386/kernel/process.c x/arch/i386/kernel/process.c
--- x-ref/arch/i386/kernel/process.c	Wed Oct 16 16:45:08 2002
+++ x/arch/i386/kernel/process.c	Wed Oct 16 16:47:52 2002
@@ -129,7 +129,7 @@ void cpu_idle (void)
 		void (*idle)(void) = pm_idle;
 		if (!idle)
 			idle = default_idle;
-		if (!current->need_resched)
+		while (!current->need_resched)
 			idle();
 		schedule();
 		check_pgt_cache();
diff -urNp x-ref/include/asm-i386/smp_balance.h x/include/asm-i386/smp_balance.h
--- x-ref/include/asm-i386/smp_balance.h	Wed Oct 16 16:46:19 2002
+++ x/include/asm-i386/smp_balance.h	Wed Oct 16 16:47:52 2002
@@ -29,10 +29,28 @@ static inline int find_idle_package(int 
 	return -1;	/* not found */
 }
 
+static inline int arch_reschedule_idle_override(task_t * p, int idle)
+{
+	if (unlikely(smp_num_siblings > 1) && !idle_cpu(cpu_sibling_map[idle])) {
+		int true_idle = find_idle_package(idle);
+		if (true_idle >= 0) {
+			if (likely(p->cpus_allowed & (1UL << true_idle)))
+				idle = true_idle;
+			else {
+				true_idle = cpu_sibling_map[true_idle];
+				if (p->cpus_allowed & (1UL << true_idle))
+					idle = true_idle;
+			}
+		}
+	}
+
+	return idle;
+}
+
 static inline int arch_load_balance(int this_cpu, int idle)
 {
 	/* Special hack for hyperthreading */
-       if (unlikely(smp_num_siblings > 1 && idle && !idle_cpu(cpu_sibling_map[this_cpu]))) {
+       if (unlikely(smp_num_siblings > 1 && idle == 2 && !idle_cpu(cpu_sibling_map[this_cpu]))) {
                int found;
                struct runqueue *rq_target;
 
diff -urNp x-ref/include/linux/sched.h x/include/linux/sched.h
--- x-ref/include/linux/sched.h	Wed Oct 16 16:46:19 2002
+++ x/include/linux/sched.h	Wed Oct 16 16:47:52 2002
@@ -145,7 +145,6 @@ typedef struct task_struct task_t;
 
 extern void sched_init(void);
 extern void init_idle(task_t *idle, int cpu);
-extern int idle_cpu(int cpu);
 extern void show_state(void);
 extern void cpu_init (void);
 extern void trap_init(void);
diff -urNp x-ref/include/linux/sched_runqueue.h x/include/linux/sched_runqueue.h
--- x-ref/include/linux/sched_runqueue.h	Wed Oct 16 16:45:54 2002
+++ x/include/linux/sched_runqueue.h	Wed Oct 16 16:47:52 2002
@@ -62,9 +62,12 @@ struct runqueue {
 	task_t *curr, *idle;
 	prio_array_t *active, *expired, arrays[2];
 	long nr_uninterruptible;
+#ifdef CONFIG_SMP
+	long last_jiffy;
 	int prev_nr_running[NR_CPUS];
 	task_t *migration_thread;
 	list_t migration_queue;
+#endif
 } ____cacheline_aligned;
 
 typedef struct runqueue runqueue_t;
diff -urNp x-ref/include/linux/smp_balance.h x/include/linux/smp_balance.h
--- x-ref/include/linux/smp_balance.h	Wed Oct 16 16:46:19 2002
+++ x/include/linux/smp_balance.h	Wed Oct 16 16:47:52 2002
@@ -9,6 +9,7 @@
 #include <asm/smp_balance.h>
 #else
 #define arch_load_balance(x, y)		(0)
+#define arch_reschedule_idle_override(x, idle)	(idle)
 #endif
 
 #endif /* _LINUX_SMP_BALANCE_H */
diff -urNp x-ref/kernel/fork.c x/kernel/fork.c
--- x-ref/kernel/fork.c	Wed Oct 16 16:45:08 2002
+++ x/kernel/fork.c	Wed Oct 16 16:46:52 2002
@@ -639,8 +639,6 @@ int do_fork(unsigned long clone_flags, u
 	if (p->pid == 0 && current->pid != 0)
 		goto bad_fork_cleanup;
 
-	INIT_LIST_HEAD(&p->run_list);
-
 	p->p_cptr = NULL;
 	init_waitqueue_head(&p->wait_chldexit);
 	p->vfork_done = NULL;
@@ -674,7 +672,6 @@ int do_fork(unsigned long clone_flags, u
 		spin_lock_init(&p->sigmask_lock);
 	}
 #endif
-	p->array = NULL;
 	p->lock_depth = -1;		/* -1 = no lock */
 	p->start_time = jiffies;
 
diff -urNp x-ref/kernel/sched.c x/kernel/sched.c
--- x-ref/kernel/sched.c	Wed Oct 16 16:46:19 2002
+++ x/kernel/sched.c	Wed Oct 16 16:47:52 2002
@@ -54,9 +54,8 @@
  */
 #define MIN_TIMESLICE		( 10 * HZ / 1000)
 #define MAX_TIMESLICE		(300 * HZ / 1000)
-#define CHILD_PENALTY		95
+#define CHILD_PENALTY		50
 #define PARENT_PENALTY		100
-#define EXIT_WEIGHT		3
 #define PRIO_BONUS_RATIO	25
 #define INTERACTIVE_DELTA	2
 #define MAX_SLEEP_AVG		(2*HZ)
@@ -119,7 +118,7 @@ runqueue_t runqueues[NR_CPUS] __cachelin
  */
 #ifndef prepare_arch_switch
 # define prepare_arch_switch(rq, next) do { } while(0)
-# define finish_arch_switch(rq, next)  spin_unlock_irq(&(rq)->lock)
+# define finish_arch_switch(rq, prev)  spin_unlock_irq(&(rq)->lock)
 #endif
 
 /*
@@ -157,12 +156,18 @@ static inline void dequeue_task(struct t
 		__clear_bit(p->prio, array->bitmap);
 }
 
-static inline void enqueue_task(struct task_struct *p, prio_array_t *array)
+#define enqueue_task(p, array) __enqueue_task(p, array, NULL)
+static inline void __enqueue_task(struct task_struct *p, prio_array_t *array, task_t * parent)
 {
-	list_add_tail(&p->run_list, array->queue + p->prio);
-	__set_bit(p->prio, array->bitmap);
+	if (!parent) {
+		list_add_tail(&p->run_list, array->queue + p->prio);
+		__set_bit(p->prio, array->bitmap);
+		p->array = array;
+	} else {
+		list_add_tail(&p->run_list, &parent->run_list);
+		array = p->array = parent->array;
+	}
 	array->nr_active++;
-	p->array = array;
 }
 
 static inline int effective_prio(task_t *p)
@@ -191,12 +196,13 @@ static inline int effective_prio(task_t 
 	return prio;
 }
 
-static inline void activate_task(task_t *p, runqueue_t *rq)
+#define activate_task(p, rq) __activate_task(p, rq, NULL)
+static inline void __activate_task(task_t *p, runqueue_t *rq, task_t * parent)
 {
 	unsigned long sleep_time = jiffies - p->sleep_timestamp;
 	prio_array_t *array = rq->active;
 
-	if (!rt_task(p) && sleep_time) {
+	if (!parent && !rt_task(p) && sleep_time) {
 		/*
 		 * This code gives a bonus to interactive tasks. We update
 		 * an 'average sleep time' value here, based on
@@ -204,12 +210,13 @@ static inline void activate_task(task_t 
 		 * the higher the average gets - and the higher the priority
 		 * boost gets as well.
 		 */
+		p->sleep_timestamp = jiffies;
 		p->sleep_avg += sleep_time;
 		if (p->sleep_avg > MAX_SLEEP_AVG)
 			p->sleep_avg = MAX_SLEEP_AVG;
 		p->prio = effective_prio(p);
 	}
-	enqueue_task(p, array);
+	__enqueue_task(p, array, parent);
 	rq->nr_running++;
 }
 
@@ -278,6 +285,10 @@ void kick_if_running(task_t * p)
 }
 #endif
 
+#ifdef CONFIG_SMP
+static int FASTCALL(reschedule_idle(task_t * p));
+#endif
+
 /*
  * Wake up a process. Put it on the run-queue if it's not
  * already there.  The "current" process is always on the
@@ -295,18 +306,27 @@ static int try_to_wake_up(task_t * p, in
 	long old_state;
 	runqueue_t *rq;
 
+#ifdef CONFIG_SMP
 repeat_lock_task:
+#endif
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
 	if (!p->array) {
-		if (unlikely(sync) &&
-		    rq->curr != p &&
-		    p->cpu != smp_processor_id() &&
-		    p->cpus_allowed & (1UL << smp_processor_id())) {
-			p->cpu = smp_processor_id();
-			task_rq_unlock(rq, &flags);
-			goto repeat_lock_task;
+#ifdef CONFIG_SMP
+		if (likely(rq->curr != p)) {
+			/* can migrate */
+			if (unlikely(sync)) {
+				if (p->cpu != smp_processor_id() &&
+				    p->cpus_allowed & (1UL << smp_processor_id())) {
+					p->cpu = smp_processor_id();
+					goto migrated_task;
+				}
+			} else {
+				if (reschedule_idle(p))
+					goto migrated_task;
+			}
 		}
+#endif
 		if (old_state == TASK_UNINTERRUPTIBLE)
 			rq->nr_uninterruptible--;
 		activate_task(p, rq);
@@ -318,6 +338,12 @@ repeat_lock_task:
 	task_rq_unlock(rq, &flags);
 
 	return success;
+
+#ifdef CONFIG_SMP
+ migrated_task:
+	task_rq_unlock(rq, &flags);
+	goto repeat_lock_task;
+#endif
 }
 
 int wake_up_process(task_t * p)
@@ -328,23 +354,47 @@ int wake_up_process(task_t * p)
 void wake_up_forked_process(task_t * p)
 {
 	runqueue_t *rq;
+	task_t * parent = current;
 
 	rq = this_rq();
 	spin_lock_irq(&rq->lock);
 
 	p->state = TASK_RUNNING;
-	if (!rt_task(p)) {
+	if (likely(!rt_task(p) && parent->array)) {
 		/*
-		 * We decrease the sleep average of forking parents
-		 * and children as well, to keep max-interactive tasks
+		 * We decrease the sleep average of forked
+		 * children, to keep max-interactive tasks
 		 * from forking tasks that are max-interactive.
+		 * CHILD_PENALTY is set to 50% since we have
+		 * no clue if this is still an interactive
+		 * task like the parent or if this will be a
+		 * cpu bound task. The parent isn't touched
+		 * as we don't make assumption about the parent
+		 * changing behaviour after the child is forked.
 		 */
-		current->sleep_avg = current->sleep_avg * PARENT_PENALTY / 100;
+		parent->sleep_avg = parent->sleep_avg * PARENT_PENALTY / 100;
 		p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
+
+		/*
+		 * For its first schedule keep the child at the same
+		 * priority (i.e. in the same list) of the parent,
+		 * activate_forked_task() will take care to put the
+		 * child in front of the parent (lifo) to guarantee a
+		 * schedule-child-first behaviour after fork.
+		 */
+		p->prio = parent->prio;
+	} else {
+		/*
+		 * Take the usual wakeup path if it's RT or if
+		 * it's a child of the first idle task (during boot
+		 * only).
+		 */
 		p->prio = effective_prio(p);
+		parent = NULL;
 	}
+
 	p->cpu = smp_processor_id();
-	activate_task(p, rq);
+	__activate_task(p, rq, parent);
 	spin_unlock_irq(&rq->lock);
 }
 
@@ -366,13 +416,6 @@ void sched_exit(task_t * p)
 			current->time_slice = MAX_TIMESLICE;
 	}
 	__sti();
-	/*
-	 * If the child was a (relative-) CPU hog then decrease
-	 * the sleep_avg of the parent as well.
-	 */
-	if (p->sleep_avg < current->sleep_avg)
-		current->sleep_avg = (current->sleep_avg * EXIT_WEIGHT +
-			p->sleep_avg) / (EXIT_WEIGHT + 1);
 }
 
 #if CONFIG_SMP
@@ -484,8 +527,39 @@ static inline void pull_task(runqueue_t 
 		set_need_resched();
 }
 
+static inline int idle_cpu_reschedule(task_t * p, int cpu)
+{
+	if (unlikely(!(p->cpus_allowed & (1UL << cpu))))
+		return 0;
+	return idle_cpu(cpu);
+}
+
 #include <linux/smp_balance.h>
 
+static int reschedule_idle(task_t * p)
+{
+	int p_cpu = p->cpu, i;
+
+	if (idle_cpu(p_cpu))
+		return 0;
+
+	p_cpu = cpu_number_map(p_cpu);
+
+	for (i = (p_cpu + 1) % smp_num_cpus;
+	     i != p_cpu;
+	     i = (i + 1) % smp_num_cpus) {
+		int physical = cpu_logical_map(i);
+
+		if (idle_cpu_reschedule(p, physical)) {
+			physical = arch_reschedule_idle_override(p, physical);
+			p->cpu = physical;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Current runqueue is empty, or rebalance tick: if there is an
  * inbalance (current runqueue is too short) then pull from
@@ -509,6 +583,7 @@ static void load_balance(runqueue_t *thi
 	if (arch_load_balance(this_cpu, idle))
 		return;
 
+ retry:
 	/*
 	 * We search all runqueues to find the most busy one.
 	 * We do this lockless to reduce cache-bouncing overhead,
@@ -563,13 +638,13 @@ static void load_balance(runqueue_t *thi
 	if (!idle && (imbalance < (max_load + 3)/4))
 		return;
 
-	nr_running = double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running);
 	/*
-	 * Make sure nothing changed since we checked the
+	 * Make sure nothing significant changed since we checked the
 	 * runqueue length.
 	 */
-	if (busiest->nr_running <= nr_running + 1)
-		goto out_unlock;
+	if (double_lock_balance(this_rq, busiest, this_cpu, idle, nr_running) > nr_running ||
+	    busiest->nr_running < max_load)
+		goto out_unlock_retry;
 
 	/*
 	 * We first consider expired tasks. Those will likely not be
@@ -624,7 +699,7 @@ skip_queue:
 		goto skip_bitmap;
 	}
 	pull_task(busiest, array, tmp, this_rq, this_cpu);
-	if (!idle && --imbalance) {
+	if (--imbalance > 0) {
 		if (curr != head)
 			goto skip_queue;
 		idx++;
@@ -632,6 +707,10 @@ skip_queue:
 	}
 out_unlock:
 	spin_unlock(&busiest->lock);
+	return;
+out_unlock_retry:
+	spin_unlock(&busiest->lock);
+	goto retry;
 }
 
 /*
@@ -640,19 +719,19 @@ out_unlock:
  * frequency and balancing agressivity depends on whether the CPU is
  * idle or not.
  *
- * busy-rebalance every 250 msecs. idle-rebalance every 1 msec. (or on
- * systems with HZ=100, every 10 msecs.)
+ * busy-rebalance every 250 msecs. idle-rebalance every 100 msec.
  */
 #define BUSY_REBALANCE_TICK (HZ/4 ?: 1)
-#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1)
+#define IDLE_REBALANCE_TICK (HZ/10 ?: 1)
 
 static inline void idle_tick(void)
 {
-	if (jiffies % IDLE_REBALANCE_TICK)
-		return;
-	spin_lock(&this_rq()->lock);
-	load_balance(this_rq(), 1);
-	spin_unlock(&this_rq()->lock);
+	if (unlikely(time_before_eq(this_rq()->last_jiffy + IDLE_REBALANCE_TICK, jiffies))) {
+		spin_lock(&this_rq()->lock);
+		load_balance(this_rq(), 1);
+		spin_unlock(&this_rq()->lock);
+		this_rq()->last_jiffy = jiffies;
+	}
 }
 
 #endif
@@ -743,8 +822,10 @@ void scheduler_tick(int user_tick, int s
 	}
 out:
 #if CONFIG_SMP
-	if (!(jiffies % BUSY_REBALANCE_TICK))
+	if (unlikely(time_before_eq(this_rq()->last_jiffy + BUSY_REBALANCE_TICK, jiffies))) {
 		load_balance(rq, 0);
+		rq->last_jiffy = jiffies;
+	}
 #endif
 	spin_unlock(&rq->lock);
 }
@@ -789,7 +870,8 @@ pick_next_task:
 #endif
 	if (unlikely(!rq->nr_running)) {
 #if CONFIG_SMP
-		load_balance(rq, 1);
+		load_balance(rq, 2);
+		rq->last_jiffy = jiffies;
 		if (rq->nr_running)
 			goto pick_next_task;
 #endif
@@ -1027,7 +1109,7 @@ void set_user_nice(task_t *p, long nice)
 		 * If the task is running and lowered its priority,
 		 * or increased its priority then reschedule its CPU:
 		 */
-		if ((NICE_TO_PRIO(nice) < p->static_prio) || (p == rq->curr))
+		if (p == rq->curr)
 			resched_task(rq->curr);
 	}
 out_unlock:
@@ -1331,31 +1413,55 @@ out_unlock:
 asmlinkage long sys_sched_yield(void)
 {
 	runqueue_t *rq = this_rq();
-	prio_array_t *array = current->array;
+	prio_array_t *array;
 	int i;
 
 	spin_lock_irq(&rq->lock);
 	
+	if (unlikely(rq->nr_running == 1)) {
+		spin_unlock_irq(&rq->lock);
+		return 0;
+	}
+
+	array = current->array;
 	if (unlikely(rt_task(current))) {
 		list_del(&current->run_list);
 		list_add_tail(&current->run_list, array->queue + current->prio);
 		goto out_unlock;
 	}
 
+	if (unlikely(array == rq->expired) && rq->active->nr_active)
+		goto out_unlock;
+
 	list_del(&current->run_list);
 	if (!list_empty(array->queue + current->prio)) {
 		list_add(&current->run_list, array->queue[current->prio].next);
 		goto out_unlock;
 	}
+
 	__clear_bit(current->prio, array->bitmap);
+	if (likely(array == rq->active) && array->nr_active == 1) {
+		/*
+		 * We're the last task in the active queue so
+		 * we must move ourself to the expired array
+		 * to avoid running again immediatly.
+		 */
+		array->nr_active--;
+		array = rq->expired;
+		array->nr_active++;
+	}
 
 	i = sched_find_first_bit(array->bitmap);
 
-	if (i == MAX_PRIO || i <= current->prio)
+	BUG_ON(i == MAX_PRIO);
+	BUG_ON(i == current->prio && array == current->array);
+
+	if (array == current->array && i < current->prio)
 		i = current->prio;
-	else
+	else {
+		current->array = array;
 		current->prio = i;
-
+	}
 	list_add(&current->run_list, array->queue[i].next);
 	__set_bit(i, array->bitmap);
 
@@ -1602,7 +1708,9 @@ void __init sched_init(void)
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
 		spin_lock_init(&rq->lock);
+#ifdef CONFIG_SMP
 		INIT_LIST_HEAD(&rq->migration_queue);
+#endif
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;