
From: Andi Kleen <ak@muc.de>

Set up SMT for the domain scheduler on x86-64.  This way the scheduling
works better on HyperThreading aware systems; in particular it will use
both physical CPUs before sharing two virtual CPUs on the same package. 
This improves performance considerably in some cases.

Based on the i386 code and a previous patch from Suresh B. Siddha.


---

 25-akpm/arch/x86_64/Kconfig            |   10 +++
 25-akpm/arch/x86_64/kernel/Makefile    |    1 
 25-akpm/arch/x86_64/kernel/domain.c    |   89 +++++++++++++++++++++++++++++++++
 25-akpm/include/asm-x86_64/processor.h |    5 +
 4 files changed, 105 insertions(+)

diff -puN arch/x86_64/Kconfig~sched-x86_64-sched-domains-support arch/x86_64/Kconfig
--- 25/arch/x86_64/Kconfig~sched-x86_64-sched-domains-support	2004-05-10 01:12:39.625564760 -0700
+++ 25-akpm/arch/x86_64/Kconfig	2004-05-10 01:12:39.634563392 -0700
@@ -239,6 +239,16 @@ config PREEMPT
 	  Say Y here if you are feeling brave and building a kernel for a 
 	  desktop, embedded or real-time system.  Say N if you are unsure. 
 
+config SCHED_SMT
+	bool "SMT (Hyperthreading) scheduler support"
+	depends on SMP
+	default off
+	help
+	  SMT scheduler support improves the CPU scheduler's decision making
+	  when dealing with Intel Pentium 4 chips with HyperThreading at a
+	  cost of slightly increased overhead in some places. If unsure say
+	  N here.
+
 # someone write a better help text please.
 config K8_NUMA
        bool "K8 NUMA support"
diff -puN /dev/null arch/x86_64/kernel/domain.c
--- /dev/null	2003-09-15 06:40:47.000000000 -0700
+++ 25-akpm/arch/x86_64/kernel/domain.c	2004-05-10 01:12:39.633563544 -0700
@@ -0,0 +1,89 @@
+#include <linux/init.h>
+#include <linux/sched.h>
+
+/* Don't do any NUMA setup on Opteron right now. They seem to be
+   better off with flat scheduling. This is just for SMT. */
+
+#ifdef CONFIG_SCHED_SMT
+
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+__init void arch_init_sched_domains(void)
+{
+	int i;
+	struct sched_group *first = NULL, *last = NULL;
+
+	/* Set up domains */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+
+		*cpu_domain = SD_SIBLING_INIT;
+		cpu_domain->span = cpu_sibling_map[i];
+		cpu_domain->parent = phys_domain;
+		cpu_domain->groups = &sched_group_cpus[i];
+
+		*phys_domain = SD_CPU_INIT;
+		phys_domain->span = cpu_possible_map;
+		phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
+	}
+
+	/* Set up CPU (sibling) groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		int j;
+		first = last = NULL;
+
+		if (i != first_cpu(cpu_domain->span))
+			continue;
+
+		for_each_cpu_mask(j, cpu_domain->span) {
+			struct sched_group *cpu = &sched_group_cpus[j];
+
+			cpus_clear(cpu->cpumask);
+			cpu_set(j, cpu->cpumask);
+			cpu->cpu_power = SCHED_LOAD_SCALE;
+
+			if (!first)
+				first = cpu;
+			if (last)
+				last->next = cpu;
+			last = cpu;
+		}
+		last->next = first;
+	}
+
+	first = last = NULL;
+	/* Set up physical groups */
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		struct sched_group *cpu = &sched_group_phys[i];
+
+		if (i != first_cpu(cpu_domain->span))
+			continue;
+
+		cpu->cpumask = cpu_domain->span;
+		/*
+		 * Make each extra sibling increase power by 10% of
+		 * the basic CPU. This is very arbitrary.
+		 */
+		cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
+
+		if (!first)
+			first = cpu;
+		if (last)
+			last->next = cpu;
+		last = cpu;
+	}
+	last->next = first;
+
+	mb();
+	for_each_cpu(i) {
+		struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
+		cpu_attach_domain(cpu_domain, i);
+	}
+}
+
+#endif
diff -puN arch/x86_64/kernel/Makefile~sched-x86_64-sched-domains-support arch/x86_64/kernel/Makefile
--- 25/arch/x86_64/kernel/Makefile~sched-x86_64-sched-domains-support	2004-05-10 01:12:39.626564608 -0700
+++ 25-akpm/arch/x86_64/kernel/Makefile	2004-05-10 01:12:39.633563544 -0700
@@ -25,6 +25,7 @@ obj-$(CONFIG_EARLY_PRINTK)	+= early_prin
 obj-$(CONFIG_GART_IOMMU)	+= pci-gart.o aperture.o
 obj-$(CONFIG_DUMMY_IOMMU)	+= pci-nommu.o pci-dma.o
 obj-$(CONFIG_SWIOTLB)		+= swiotlb.o
+obj-$(CONFIG_SCHED_SMT)		+= domain.o
 
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_KGDB)		+= kgdb_stub.o
diff -puN include/asm-x86_64/processor.h~sched-x86_64-sched-domains-support include/asm-x86_64/processor.h
--- 25/include/asm-x86_64/processor.h~sched-x86_64-sched-domains-support	2004-05-10 01:12:39.628564304 -0700
+++ 25-akpm/include/asm-x86_64/processor.h	2004-05-10 01:12:39.632563696 -0700
@@ -456,4 +456,9 @@ static inline void __mwait(unsigned long
 
 #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
 
+#ifdef CONFIG_SCHED_SMT
+#define ARCH_HAS_SCHED_DOMAIN
+#define ARCH_HAS_SCHED_WAKE_IDLE
+#endif
+
 #endif /* __ASM_X86_64_PROCESSOR_H */

_
