From 70ffc71c5c42c8ac62d951e80d9799bd5764f2f5 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 27 May 2005 12:52:59 -0700 Subject: [PATCH] arch/i386/kernel/cpu/intel_cacheinfo.c: section fix num_cache_leaves is used in __devexit cache_remove_dev() and can therefore not be __devinit. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index aeb5b4ef8c8b..a710dc4eb20e 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -118,7 +118,7 @@ struct _cpuid4_info { }; #define MAX_CACHE_LEAVES 4 -static unsigned short __devinitdata num_cache_leaves; +static unsigned short num_cache_leaves; static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { -- cgit v1.2.3 From 49f384b82b03416dd7e4fc77847a959fe3247362 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Fri, 27 May 2005 12:53:01 -0700 Subject: [PATCH] x86: fix smp_num_siblings on buggy BIOSes This fixes 'smp_num_siblings' value on the systems with a buggy bios, which sets number of siblings to '2' even when HT is disabled. (more details are at http://bugzilla.kernel.org/show_bug.cgi?id=4359) I am planning to do more cleanup in this area (like moving smp_num_siblings to per cpuinfo) shortly. Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 35bfe138cb1a..bc1bb6919e6a 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1074,8 +1074,10 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpu_set(cpu, cpu_sibling_map[cpu]); } - if (siblings != smp_num_siblings) + if (siblings != smp_num_siblings) { printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); + smp_num_siblings = siblings; + } if (c->x86_num_cores > 1) { for (i = 0; i < NR_CPUS; i++) { -- cgit v1.2.3 From adaa765d76f58b47e10a4760f2c0bc86de5479b9 Mon Sep 17 00:00:00 2001 From: Alexander Nyberg Date: Tue, 31 May 2005 14:39:27 -0700 Subject: [PATCH] acpi build fix: x86 setup.c This is a neverending story linux/acpi.h contains empty declarations for acpi_boot_init() & acpi_boot_table_init() but they are nested inside #ifdef CONFIG_ACPI. So we'll have to #ifdef in arch/i386/kernel/setup.c: setup_arch() Signed-off-by: Alexander Nyberg Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 945ec73163c8..2bfbddebdbf8 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1502,11 +1502,13 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); +#ifdef CONFIG_ACPI_BOOT /* * Parse the ACPI tables for possible boot-time SMP configuration. */ acpi_boot_table_init(); acpi_boot_init(); +#endif #ifdef CONFIG_X86_LOCAL_APIC if (smp_found_config) -- cgit v1.2.3 From 7eb53d88230e23f83b2e20a78955e1412fa7bb26 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:42 -0700 Subject: [CPUFREQ] powernow-k7: don't print khz element of FSB. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 913f652623d9..80f01b00bada 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -592,7 +592,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) printk(KERN_WARNING PFX "can not determine bus frequency\n"); return -EINVAL; } - dprintk("FSB: %3d.%03d MHz\n", fsb/1000, fsb%1000); + dprintk("FSB: %3dMHz\n", fsb/1000); if (dmi_check_system(powernow_dmi_table) || acpi_force) { printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); -- cgit v1.2.3 From 8282864a96ef0a7b88ee9e4b357e08504131394d Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:43 -0700 Subject: [CPUFREQ] speedstep-centrino: Pentium 4 - M (HT) support The Pentium 4 - Ms (HT) with CPUID 0xF34 and 0xF41 seem to support centrino-like enhanced speedstep; however, no "table" support is possible. Therefore, put NULL entries into speedstep-centrino.c Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 07d5612dc00f..7dcbf70fc16f 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -54,6 +54,8 @@ enum { CPU_DOTHAN_A1, CPU_DOTHAN_A2, CPU_DOTHAN_B0, + CPU_MP4HT_D0, + CPU_MP4HT_E0, }; static const struct cpu_id cpu_ids[] = { @@ -61,6 +63,8 @@ static const struct cpu_id cpu_ids[] = { [CPU_DOTHAN_A1] = { 6, 13, 1 }, [CPU_DOTHAN_A2] = { 6, 13, 2 }, [CPU_DOTHAN_B0] = { 6, 13, 6 }, + [CPU_MP4HT_D0] = {15, 3, 4 }, + [CPU_MP4HT_E0] = {15, 4, 1 }, }; #define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0])) @@ -226,6 +230,8 @@ static struct cpu_model models[] = { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, { NULL, } }; -- cgit v1.2.3 From 6f4095af6df7aa365ecf18473c8b05c5f6c38a78 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:44 -0700 Subject: [CPUFREQ] speedstep-smi: it works on at least one P4M The speedstep-smi driver actually works on >=1 notebook with a Pentium 4-M CPU where all other cpufreq drivers fail. Therefore, allow speedstep-smi on P4Ms again, but warn users of likely failure Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/speedstep-smi.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 79440b3f087e..b25fb6b635ae 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -357,6 +357,9 @@ static int __init speedstep_init(void) case SPEEDSTEP_PROCESSOR_PIII_C: case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: break; + case SPEEDSTEP_PROCESSOR_P4M: + printk(KERN_INFO "speedstep-smi: you're trying to use this cpufreq driver on a Pentium 4-based CPU. Most likely it will not work.\n"); + break; default: speedstep_processor = 0; } -- cgit v1.2.3 From bf6fc9fd2d848d06b8f6c4caccef1dba9ef8c4c6 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:45 -0700 Subject: [CPUFREQ] AMD Elan SC520 cpufreq driver. From: Sean Young Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/Kconfig | 14 ++- arch/i386/kernel/cpu/cpufreq/Makefile | 1 + arch/i386/kernel/cpu/cpufreq/sc520_freq.c | 186 ++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 arch/i386/kernel/cpu/cpufreq/sc520_freq.c (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index f25ffd74235c..0f1eb507233b 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -23,7 +23,7 @@ config X86_ACPI_CPUFREQ If in doubt, say N. config ELAN_CPUFREQ - tristate "AMD Elan" + tristate "AMD Elan SC400 and SC410" select CPU_FREQ_TABLE depends on X86_ELAN ---help--- @@ -38,6 +38,18 @@ config ELAN_CPUFREQ If in doubt, say N. +config SC520_CPUFREQ + tristate "AMD Elan SC520" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC520 processor. + + For details, take a look at . + + If in doubt, say N. + + config X86_POWERNOW_K6 tristate "AMD Mobile K6-2/K6-3 PowerNow!" select CPU_FREQ_TABLE diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile index a922e97aeedd..2e894f1c8910 100644 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/arch/i386/kernel/cpu/cpufreq/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_LONGHAUL) += longhaul.o obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o obj-$(CONFIG_X86_LONGRUN) += longrun.o obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o diff --git a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c new file mode 100644 index 000000000000..ef457d50f4ac --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c @@ -0,0 +1,186 @@ +/* + * sc520_freq.c: cpufreq driver for the AMD Elan sc520 + * + * Copyright (C) 2005 Sean Young + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on elanfreq.c + * + * 2005-03-30: - initial revision + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#define MMCR_BASE 0xfffef000 /* The default base address */ +#define OFFS_CPUCTL 0x2 /* CPU Control Register */ + +static __u8 __iomem *cpuctl; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) + +static struct cpufreq_frequency_table sc520_freq_table[] = { + {0x01, 100000}, + {0x02, 133000}, + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg = *cpuctl; + + switch (clockspeed_reg & 0x03) { + default: + printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + case 0x01: + return 100000; + case 0x02: + return 133000; + } +} + +static void sc520_freq_set_cpu_state (unsigned int state) +{ + + struct cpufreq_freqs freqs; + u8 clockspeed_reg; + + freqs.old = sc520_freq_get_cpu_frequency(0); + freqs.new = sc520_freq_table[state].frequency; + freqs.cpu = 0; /* AMD Elan is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk("attempting to set frequency to %i kHz\n", + sc520_freq_table[state].frequency); + + local_irq_disable(); + + clockspeed_reg = *cpuctl & ~0x03; + *cpuctl = clockspeed_reg | sc520_freq_table[state].index; + + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int sc520_freq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); +} + +static int sc520_freq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + return -EINVAL; + + sc520_freq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int sc520_freq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + int result; + + /* capability check */ + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) + return -ENODEV; + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 1000000; /* 1ms */ + policy->cur = sc520_freq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); + + return 0; +} + + +static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +static struct freq_attr* sc520_freq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver sc520_freq_driver = { + .get = sc520_freq_get_cpu_frequency, + .verify = sc520_freq_verify, + .target = sc520_freq_target, + .init = sc520_freq_cpu_init, + .exit = sc520_freq_cpu_exit, + .name = "sc520_freq", + .owner = THIS_MODULE, + .attr = sc520_freq_attr, +}; + + +static int __init sc520_freq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* Test if we have the right hardware */ + if(c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { + dprintk("no Elan SC520 processor found!\n"); + return -ENODEV; + } + cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); + if(!cpuctl) { + printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); + return -ENOMEM; + } + + return cpufreq_register_driver(&sc520_freq_driver); +} + + +static void __exit sc520_freq_exit(void) +{ + cpufreq_unregister_driver(&sc520_freq_driver); + iounmap(cpuctl); +} + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sean Young "); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); + +module_init(sc520_freq_init); +module_exit(sc520_freq_exit); + -- cgit v1.2.3 From 91350ed49bf3613e243c2e216228cd4ae8f32516 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:45 -0700 Subject: [CPUFREQ] Recalibrate cpu_khz [1/2] We have to recalibrate cpu_khz in order to use the current FID instead the max FID since some BIOS do not put the processor at maximum frequency at POST. Also, some BIOS will change the processor frequency at our back after cpu_khz was calibrate. Finally, this will fix a long standing bug when we do something like this: # rmmod powernow-k7 # modprobe powernow-k7 Signed-off-by: Bruno Ducrot Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k7.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 80f01b00bada..5c530064eb74 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -586,8 +587,12 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - /* A K7 with powernow technology is set to max frequency by BIOS */ - fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.MFID]; + /* recalibrate cpu_khz */ + result = recalibrate_cpu_khz(); + if (result) + return result; + + fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; if (!fsb) { printk(KERN_WARNING PFX "can not determine bus frequency\n"); return -EINVAL; -- cgit v1.2.3 From c5d28fb297efaa97c4b90e36f9dff3066e7f2778 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:46 -0700 Subject: [CPUFREQ] Recalibrate cpu_khz [2/2] Some cpufreq drivers (at that time, only powernow-k7) need to recalibrate the cpu_khz at runtime. Signed-off-by: Bruno Ducrot Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/i386/kernel/timers/common.c | 6 ++++-- arch/i386/kernel/timers/timer_tsc.c | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index f7f90005e22e..8e201219f525 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -24,7 +25,7 @@ #define CALIBRATE_TIME (5 * 1000020/HZ) -unsigned long __init calibrate_tsc(void) +unsigned long calibrate_tsc(void) { mach_prepare_counter(); @@ -139,7 +140,7 @@ bad_calibration: #endif /* calculate cpu_khz */ -void __init init_cpu_khz(void) +void init_cpu_khz(void) { if (cpu_has_tsc) { unsigned long tsc_quotient = calibrate_tsc(); @@ -158,3 +159,4 @@ void __init init_cpu_khz(void) } } } + diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 7926d967be00..180444d87824 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -320,6 +320,26 @@ core_initcall(cpufreq_tsc); static inline void cpufreq_delayed_get(void) { return; } #endif +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + init_cpu_khz(); + cpu_data[0].loops_per_jiffy = + cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_khz_old, + cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} +EXPORT_SYMBOL(recalibrate_cpu_khz); + static void mark_offset_tsc(void) { unsigned long lost,delay; -- cgit v1.2.3 From 065b807ca1f5bdbeb081e3cf75ac8de9be8ac212 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:46 -0700 Subject: [CPUFREQ] dual-core powernow-k8 With the release of the dual-core AMD Opterons last week, it's high time that cpufreq supported them. The attached patch applies cleanly to 2.6.12-rc3 and updates powernow-k8 to support the latest Athlon 64 and Opteron processors. Update the driver to version 1.40.0 and provide support for dual-core processors. Signed-off-by: Mark Langsdorf Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 113 +++++++++++++++++++---------- arch/i386/kernel/cpu/cpufreq/powernow-k8.h | 15 ++++ 2 files changed, 91 insertions(+), 37 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index a65ff7e32e5d..10cc096c0ade 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -4,7 +4,7 @@ * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html * - * Support : paul.devriendt@amd.com + * Support : mark.langsdorf@amd.com * * Based on the powernow-k7.c module written by Dave Jones. * (C) 2003 Dave Jones on behalf of SuSE Labs @@ -15,12 +15,13 @@ * * Valuable input gratefully received from Dave Jones, Pavel Machek, * Dominik Brodowski, and others. + * Originally developed by Paul Devriendt. * Processor information obtained from Chapter 9 (Power and Thermal Management) * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD * Opteron Processors" available for download from www.amd.com * * Tables for specific CPUs can be infrerred from - * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf + * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf */ #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -42,7 +44,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.00.09e" +#define VERSION "version 1.40.2" #include "powernow-k8.h" /* serialize freq changes */ @@ -50,6 +52,10 @@ static DECLARE_MUTEX(fidvid_sem); static struct powernow_k8_data *powernow_data[NR_CPUS]; +#ifndef CONFIG_SMP +static cpumask_t cpu_core_map[1]; +#endif + /* Return a frequency in MHz, given an input fid */ static u32 find_freq_from_fid(u32 fid) { @@ -274,11 +280,18 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid { u32 rvosteps = data->rvo; u32 savefid = data->currfid; + u32 maxvid, lo; dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqvid, data->rvo); + rdmsr(MSR_FIDVID_STATUS, lo, maxvid); + maxvid = 0x1f & (maxvid >> 16); + dprintk("ph1 maxvid=0x%x\n", maxvid); + if (reqvid < maxvid) /* lower numbers are higher voltages */ + reqvid = maxvid; + while (data->currvid > reqvid) { dprintk("ph1: curr 0x%x, req vid 0x%x\n", data->currvid, reqvid); @@ -286,8 +299,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid return 1; } - while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { - if (data->currvid == 0) { + while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + if (data->currvid == maxvid) { rvosteps = 0; } else { dprintk("ph1: changing vid for rvo, req 0x%x\n", @@ -671,7 +684,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_ERR PFX "BIOS error - no PSB\n"); + printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n"); return -ENODEV; } @@ -695,7 +708,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) struct cpufreq_frequency_table *powernow_table; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { - dprintk("register performance failed\n"); + dprintk("register performance failed: bad ACPI data\n"); return -EIO; } @@ -746,22 +759,23 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) continue; } - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, ignore this - * one... - */ - if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || - (powernow_table[i].index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX "Too many lo freq table entries\n"); - goto err_out_mem; - } - - dprintk("double low frequency table entry, ignoring it.\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } else - cntlofreq = i; + /* verify only 1 entry from the lo frequency table */ + if (fid < HI_FID_TABLE_BOTTOM) { + if (cntlofreq) { + /* if both entries are the same, ignore this + * one... + */ + if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || + (powernow_table[i].index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX "Too many lo freq table entries\n"); + goto err_out_mem; + } + + dprintk("double low frequency table entry, ignoring it.\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } else + cntlofreq = i; } if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { @@ -816,7 +830,7 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde { u32 fid; u32 vid; - int res; + int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); @@ -841,7 +855,8 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde } if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk("ignoring illegal change in lo freq table-%x to 0x%x\n", + printk(KERN_ERR PFX + "ignoring illegal change in lo freq table-%x to 0x%x\n", data->currfid, fid); return 1; } @@ -850,18 +865,20 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde smp_processor_id(), fid, vid); freqs.cpu = data->cpu; - freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } - down(&fidvid_sem); res = transition_fid_vid(data, fid, vid); - up(&fidvid_sem); freqs.new = find_khz_freq_from_fid(data->currfid); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - + for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } return res; } @@ -874,6 +891,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi u32 checkvid = data->currvid; unsigned int newstate; int ret = -EIO; + int i; /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -902,22 +920,41 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi data->currfid, data->currvid); if ((checkvid != data->currvid) || (checkfid != data->currfid)) { - printk(KERN_ERR PFX - "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); + printk(KERN_INFO PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, checkvid, data->currvid); } if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) goto err_out; + down(&fidvid_sem); + + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + /* make sure the sibling is initialized */ + if (!powernow_data[i]) { + ret = 0; + up(&fidvid_sem); + goto err_out; + } + } + powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; + up(&fidvid_sem); goto err_out; } + /* Update all the fid/vids of our siblings */ + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + powernow_data[i]->currvid = data->currvid; + powernow_data[i]->currfid = data->currfid; + } + up(&fidvid_sem); + pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -962,7 +999,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) */ if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) { - printk(KERN_INFO PFX "MP systems not supported by PSB BIOS structure\n"); + printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); kfree(data); return -ENODEV; } @@ -1003,6 +1040,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) schedule(); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; + pol->cpus = cpu_core_map[pol->cpu]; /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ @@ -1069,7 +1107,7 @@ static unsigned int powernowk8_get (unsigned int cpu) return 0; } preempt_disable(); - + if (query_current_values_with_pending_wait(data)) goto out; @@ -1127,9 +1165,10 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt "); +MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf Date: Tue, 31 May 2005 19:03:51 -0700 Subject: [CPUFREQ] longhaul - disable PCI mastering around transition. The spec states that we have to do this, which is *horrid*. Based on code from: Ken Staton Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 49 ++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index ab0f9f5aac11..8ea545e35b3a 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -120,6 +121,11 @@ static void do_powersaver(union msr_longhaul *longhaul, unsigned int clock_ratio_index) { int version; + unsigned long flags; + struct pci_dev *dev; + int i; + u16 pci_cmd; + u16 cmd_state[64]; switch (cpu_model) { case CPU_EZRA_T: @@ -137,17 +143,52 @@ static void do_powersaver(union msr_longhaul *longhaul, longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; longhaul->bits.EnableSoftBusRatio = 1; longhaul->bits.RevisionKey = 0; - local_irq_disable(); - wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); + + preempt_disable(); + local_irq_save(flags); + + /* + * get current pci bus master state for all devices + * and clear bus master bit + */ + dev = NULL; + i = 0; + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (dev != NULL) { + pci_read_config_word(dev, PCI_COMMAND, &pci_cmd); + cmd_state[i++] = pci_cmd; + pci_cmd &= ~PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, pci_cmd); + } + } while (dev != NULL); + local_irq_enable(); + + __hlt(); + wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); __hlt(); + local_irq_disable(); + + /* restore pci bus master state for all devices */ + dev = NULL; + i = 0; + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (dev != NULL) { + pci_cmd = cmd_state[i++]; + pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); + } + } while (dev != NULL); + local_irq_restore(flags); + preempt_enable(); + + /* disable bus ratio bit */ rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); longhaul->bits.EnableSoftBusRatio = 0; longhaul->bits.RevisionKey = version; - local_irq_disable(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); - local_irq_enable(); } /** -- cgit v1.2.3 From 1174631418fbb2c0c6946081b0b7d391f5d92861 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:51 -0700 Subject: [CPUFREQ] Longhaul: Magic timer frobbing. As mandated by the spec, disable timer around transitions. From code by : Ken Staton --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 8ea545e35b3a..48899f0956f7 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -120,9 +120,10 @@ static int longhaul_get_cpu_mult(void) static void do_powersaver(union msr_longhaul *longhaul, unsigned int clock_ratio_index) { - int version; - unsigned long flags; struct pci_dev *dev; + unsigned long flags; + unsigned int tmp_mask; + int version; int i; u16 pci_cmd; u16 cmd_state[64]; @@ -163,6 +164,10 @@ static void do_powersaver(union msr_longhaul *longhaul, } } while (dev != NULL); + tmp_mask=inb(0x21); /* works on C3. save mask. */ + outb(0xFE,0x21); /* TMR0 only */ + outb(0xFF,0x80); /* delay */ + local_irq_enable(); __hlt(); @@ -171,6 +176,8 @@ static void do_powersaver(union msr_longhaul *longhaul, local_irq_disable(); + outb(tmp_mask,0x21); /* restore mask */ + /* restore pci bus master state for all devices */ dev = NULL; i = 0; -- cgit v1.2.3 From 6778bae0f2f1d4af1b8bb876c992ea094ee958b4 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:51 -0700 Subject: [CPUFREQ] longhaul - adjust transition latency. From patch by: Ken Staton Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 48899f0956f7..04e3563da4fe 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -626,7 +626,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) longhaul_setup_voltagescaling(); policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; /* nsec */ policy->cur = calc_speed(longhaul_get_cpu_mult()); ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); -- cgit v1.2.3 From f94ea640a28230f82a4395c34e1290748a9f6586 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 31 May 2005 19:03:52 -0700 Subject: [CPUFREQ] Typos. cpfureq developers cant spel. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/speedstep-lib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index 8ba430a9c3a2..d368b3f5fce8 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -336,7 +336,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (!prev_speed) return -EIO; - dprintk("previous seped is %u\n", prev_speed); + dprintk("previous speed is %u\n", prev_speed); local_irq_save(flags); @@ -348,7 +348,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, goto out; } - dprintk("low seped is %u\n", *low_speed); + dprintk("low speed is %u\n", *low_speed); /* switch to high state */ set_state(SPEEDSTEP_HIGH); @@ -358,7 +358,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, goto out; } - dprintk("high seped is %u\n", *high_speed); + dprintk("high speed is %u\n", *high_speed); if (*low_speed == *high_speed) { ret = -ENODEV; -- cgit v1.2.3 From 5754c9b649e414f1e3a3ea2ec15e42ed3e42eeb8 Mon Sep 17 00:00:00 2001 From: Keith Owens Date: Wed, 8 Jun 2005 15:49:07 -0700 Subject: [PATCH] Stop arch/i386/kernel/vsyscall-note.o being rebuilt every time arch/i386/kernel/vsyscall-note.o is not listed as a target so its .cmd file is neither considered as a target nor is it read on the next build. This causes vsyscall-note.o to be rebuilt every time that you run make, which causes vmlinux to be rebuilt every time. Signed-off-by: Keith Owens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 0fbcfe00dd8d..51ecd512603d 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_SCx200) += scx200.o # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) -targets += vsyscall.lds +targets += vsyscall-note.o vsyscall.lds # The DSO images are built using a special linker script. quiet_cmd_syscall = SYSCALL $@ -- cgit v1.2.3 From 92c6dc59b7c1ca514021502c7eef53b9f2c738fd Mon Sep 17 00:00:00 2001 From: Thomas Hood Date: Mon, 13 Jun 2005 22:58:04 -0700 Subject: [PATCH] apm.c: ignore_normal_resume is set a bit too late This patch causes the ignore_normal_resume flag to be set slightly earlier, before there is a chance that the apm driver will receive the normal resume event from the BIOS. (Addresses Debian bug #310865) Signed-off-by: Thomas Hood Acked-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 45641a872550..0ff65abcd56c 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -1222,6 +1222,7 @@ static int suspend(int vetoable) save_processor_state(); err = set_system_power_state(APM_STATE_SUSPEND); + ignore_normal_resume = 1; restore_processor_state(); local_irq_disable(); @@ -1229,7 +1230,6 @@ static int suspend(int vetoable) spin_lock(&i8253_lock); reinit_timer(); set_time(); - ignore_normal_resume = 1; spin_unlock(&i8253_lock); write_sequnlock(&xtime_lock); -- cgit v1.2.3 From 8874b414ffe037c39e73bb262ddf69653a13c0a4 Mon Sep 17 00:00:00 2001 From: "gregkh@suse.de" Date: Wed, 23 Mar 2005 09:56:34 -0800 Subject: [PATCH] class: convert arch/* to use the new class api instead of class_simple Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/cpuid.c | 22 +++++++++++----------- arch/i386/kernel/msr.c | 22 +++++++++++----------- 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 2e2756345bb2..4647db4ad6de 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -45,7 +45,7 @@ #include #include -static struct class_simple *cpuid_class; +static struct class *cpuid_class; #ifdef CONFIG_SMP @@ -158,12 +158,12 @@ static struct file_operations cpuid_fops = { .open = cpuid_open, }; -static int cpuid_class_simple_device_add(int i) +static int cpuid_class_device_create(int i) { int err = 0; struct class_device *class_err; - class_err = class_simple_device_add(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); + class_err = class_device_create(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; @@ -175,10 +175,10 @@ static int __devinit cpuid_class_cpu_callback(struct notifier_block *nfb, unsign switch (action) { case CPU_ONLINE: - cpuid_class_simple_device_add(cpu); + cpuid_class_device_create(cpu); break; case CPU_DEAD: - class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu)); + class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); break; } return NOTIFY_OK; @@ -200,13 +200,13 @@ static int __init cpuid_init(void) err = -EBUSY; goto out; } - cpuid_class = class_simple_create(THIS_MODULE, "cpuid"); + cpuid_class = class_create(THIS_MODULE, "cpuid"); if (IS_ERR(cpuid_class)) { err = PTR_ERR(cpuid_class); goto out_chrdev; } for_each_online_cpu(i) { - err = cpuid_class_simple_device_add(i); + err = cpuid_class_device_create(i); if (err != 0) goto out_class; } @@ -218,9 +218,9 @@ static int __init cpuid_init(void) out_class: i = 0; for_each_online_cpu(i) { - class_simple_device_remove(MKDEV(CPUID_MAJOR, i)); + class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i)); } - class_simple_destroy(cpuid_class); + class_destroy(cpuid_class); out_chrdev: unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); out: @@ -232,8 +232,8 @@ static void __exit cpuid_exit(void) int cpu = 0; for_each_online_cpu(cpu) - class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu)); - class_simple_destroy(cpuid_class); + class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + class_destroy(cpuid_class); unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); unregister_cpu_notifier(&cpuid_class_cpu_notifier); } diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 05d9f8f363a6..b2f03c39a6fe 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -44,7 +44,7 @@ #include #include -static struct class_simple *msr_class; +static struct class *msr_class; /* Note: "err" is handled in a funny way below. Otherwise one version of gcc or another breaks. */ @@ -260,12 +260,12 @@ static struct file_operations msr_fops = { .open = msr_open, }; -static int msr_class_simple_device_add(int i) +static int msr_class_device_create(int i) { int err = 0; struct class_device *class_err; - class_err = class_simple_device_add(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); + class_err = class_device_create(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; @@ -277,10 +277,10 @@ static int __devinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned switch (action) { case CPU_ONLINE: - msr_class_simple_device_add(cpu); + msr_class_device_create(cpu); break; case CPU_DEAD: - class_simple_device_remove(MKDEV(MSR_MAJOR, cpu)); + class_device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); break; } return NOTIFY_OK; @@ -302,13 +302,13 @@ static int __init msr_init(void) err = -EBUSY; goto out; } - msr_class = class_simple_create(THIS_MODULE, "msr"); + msr_class = class_create(THIS_MODULE, "msr"); if (IS_ERR(msr_class)) { err = PTR_ERR(msr_class); goto out_chrdev; } for_each_online_cpu(i) { - err = msr_class_simple_device_add(i); + err = msr_class_device_create(i); if (err != 0) goto out_class; } @@ -320,8 +320,8 @@ static int __init msr_init(void) out_class: i = 0; for_each_online_cpu(i) - class_simple_device_remove(MKDEV(MSR_MAJOR, i)); - class_simple_destroy(msr_class); + class_device_destroy(msr_class, MKDEV(MSR_MAJOR, i)); + class_destroy(msr_class); out_chrdev: unregister_chrdev(MSR_MAJOR, "cpu/msr"); out: @@ -332,8 +332,8 @@ static void __exit msr_exit(void) { int cpu = 0; for_each_online_cpu(cpu) - class_simple_device_remove(MKDEV(MSR_MAJOR, cpu)); - class_simple_destroy(msr_class); + class_device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); + class_destroy(msr_class); unregister_chrdev(MSR_MAJOR, "cpu/msr"); unregister_cpu_notifier(&msr_class_cpu_notifier); } -- cgit v1.2.3 From 39c715b71740c4a78ba4769fb54826929bac03cb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 21 Jun 2005 17:14:34 -0700 Subject: [PATCH] smp_processor_id() cleanup This patch implements a number of smp_processor_id() cleanup ideas that Arjan van de Ven and I came up with. The previous __smp_processor_id/_smp_processor_id/smp_processor_id API spaghetti was hard to follow both on the implementational and on the usage side. Some of the complexity arose from picking wrong names, some of the complexity comes from the fact that not all architectures defined __smp_processor_id. In the new code, there are two externally visible symbols: - smp_processor_id(): debug variant. - raw_smp_processor_id(): nondebug variant. Replaces all existing uses of _smp_processor_id() and __smp_processor_id(). Defined by every SMP architecture in include/asm-*/smp.h. There is one new internal symbol, dependent on DEBUG_PREEMPT: - debug_smp_processor_id(): internal debug variant, mapped to smp_processor_id(). Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new lib/smp_processor_id.c file. All related comments got updated and/or clarified. I have build/boot tested the following 8 .config combinations on x86: {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT} I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT. (Other architectures are untested, but should work just fine.) Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 00c63419c06f..83c579e82a81 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -306,7 +306,7 @@ void die(const char * str, struct pt_regs * regs, long err) }; static int die_counter; - if (die.lock_owner != _smp_processor_id()) { + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irq(&die.lock); die.lock_owner = smp_processor_id(); -- cgit v1.2.3 From 753ee728964e5afb80c17659cc6c3a6fd0a42fe0 Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Tue, 21 Jun 2005 17:14:41 -0700 Subject: [PATCH] VM: early zone reclaim This is the core of the (much simplified) early reclaim. The goal of this patch is to reclaim some easily-freed pages from a zone before falling back onto another zone. One of the major uses of this is NUMA machines. With the default allocator behavior the allocator would look for memory in another zone, which might be off-node, before trying to reclaim from the current zone. This adds a zone tuneable to enable early zone reclaim. It is selected on a per-zone basis and is turned on/off via syscall. Adding some extra throttling on the reclaim was also required (patch 4/4). Without the machine would grind to a crawl when doing a "make -j" kernel build. Even with this patch the System Time is higher on average, but it seems tolerable. Here are some numbers for kernbench runs on a 2-node, 4cpu, 8Gig RAM Altix in the "make -j" run: wall user sys %cpu ctx sw. sleeps ---- ---- --- ---- ------ ------ No patch 1009 1384 847 258 298170 504402 w/patch, no reclaim 880 1376 667 288 254064 396745 w/patch & reclaim 1079 1385 926 252 291625 548873 These numbers are the average of 2 runs of 3 "make -j" runs done right after system boot. Run-to-run variability for "make -j" is huge, so these numbers aren't terribly useful except to seee that with reclaim the benchmark still finishes in a reasonable amount of time. I also looked at the NUMA hit/miss stats for the "make -j" runs and the reclaim doesn't make any difference when the machine is thrashing away. Doing a "make -j8" on a single node that is filled with page cache pages takes 700 seconds with reclaim turned on and 735 seconds without reclaim (due to remote memory accesses). The simple zone_reclaim syscall program is at http://www.bork.org/~mort/sgi/zone_reclaim.c Signed-off-by: Martin Hicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/syscall_table.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 6cd1ed311f02..d408afaf6495 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -251,7 +251,7 @@ ENTRY(sys_call_table) .long sys_io_submit .long sys_io_cancel .long sys_fadvise64 /* 250 */ - .long sys_ni_syscall + .long sys_set_zone_reclaim .long sys_exit_group .long sys_lookup_dcookie .long sys_epoll_create -- cgit v1.2.3 From 05b79bdcb48c18cd9b580c39e3efb9a1ab078151 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 23 Jun 2005 00:07:57 -0700 Subject: [PATCH] sparsemem memory model for i386 Provide the architecture specific implementation for SPARSEMEM for i386 SMP and NUMA systems. Signed-off-by: Andy Whitcroft Signed-off-by: Dave Hansen Signed-off-by: Martin Bligh Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2bfbddebdbf8..0d689335c8d6 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -1022,7 +1023,7 @@ static void __init reserve_ebda_region(void) reserve_bootmem(addr, PAGE_SIZE); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) { @@ -1072,9 +1073,9 @@ void __init zone_sizes_init(void) free_area_init(zones_size); } #else -extern unsigned long setup_memory(void); +extern unsigned long __init setup_memory(void); extern void zone_sizes_init(void); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ void __init setup_bootmem_allocator(void) { @@ -1475,6 +1476,7 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); remapped_pgdat_init(); + sparse_init(); zone_sizes_init(); /* -- cgit v1.2.3 From 8a9e1b0f564615bd92ba50162623e25c2904e564 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Thu, 23 Jun 2005 00:08:13 -0700 Subject: [PATCH] Platform SMIs and their interferance with tsc based delay calibration Issue: Current tsc based delay_calibration can result in significant errors in loops_per_jiffy count when the platform events like SMIs (System Management Interrupts that are non-maskable) are present. This could lead to potential kernel panic(). This issue is becoming more visible with 2.6 kernel (as default HZ is 1000) and on platforms with higher SMI handling latencies. During the boot time, SMIs are mostly used by BIOS (for things like legacy keyboard emulation). Description: The psuedocode for current delay calibration with tsc based delay looks like (0) Estimate a value for loops_per_jiffy (1) While (loops_per_jiffy estimate is accurate enough) (2) wait for jiffy transition (jiffy1) (3) Note down current tsc (tsc1) (4) loop until tsc becomes tsc1 + loops_per_jiffy (5) check whether jiffy changed since jiffy1 or not and refine loops_per_jiffy estimate Consider the following cases Case 1: If SMIs happen between (2) and (3) above, we can end up with a loops_per_jiffy value that is too low. This results in shorted delays and kernel can panic () during boot (Mostly at IOAPIC timer initialization timer_irq_works() as we don't have enough timer interrupts in a specified interval). Case 2: If SMIs happen between (3) and (4) above, then we can end up with a loops_per_jiffy value that is too high. And with current i386 code, too high lpj value (greater than 17M) can result in a overflow in delay.c:__const_udelay() again resulting in shorter delay and panic(). Solution: The patch below makes the calibration routine aware of asynchronous events like SMIs. We increase the delay calibration time and also identify any significant errors (greater than 12.5%) in the calibration and notify it to user. Patch below changes both i386 and x86-64 architectures to use this new and improved calibrate_delay_direct() routine. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/timers/common.c | 9 +++++++++ arch/i386/kernel/timers/timer.c | 9 +++++++++ arch/i386/kernel/timers/timer_hpet.c | 1 + arch/i386/kernel/timers/timer_pm.c | 1 + arch/i386/kernel/timers/timer_tsc.c | 1 + 5 files changed, 21 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index 8e201219f525..b38cc0d0c71a 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -139,6 +139,15 @@ bad_calibration: } #endif + +unsigned long read_timer_tsc(void) +{ + unsigned long retval; + rdtscl(retval); + return retval; +} + + /* calculate cpu_khz */ void init_cpu_khz(void) { diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c index a3d6a288088b..7e39ed8e33f8 100644 --- a/arch/i386/kernel/timers/timer.c +++ b/arch/i386/kernel/timers/timer.c @@ -64,3 +64,12 @@ struct timer_opts* __init select_timer(void) panic("select_timer: Cannot find a suitable timer\n"); return NULL; } + +int read_current_timer(unsigned long *timer_val) +{ + if (cur_timer->read_timer) { + *timer_val = cur_timer->read_timer(); + return 0; + } + return -1; +} diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index f778f471a09a..15a7d727bd6f 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -186,6 +186,7 @@ static struct timer_opts timer_hpet = { .get_offset = get_offset_hpet, .monotonic_clock = monotonic_clock_hpet, .delay = delay_hpet, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_hpet_init = { diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index d77f22030fe6..4ef20e663498 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -246,6 +246,7 @@ static struct timer_opts timer_pmtmr = { .get_offset = get_offset_pmtmr, .monotonic_clock = monotonic_clock_pmtmr, .delay = delay_pmtmr, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_pmtmr_init = { diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 180444d87824..27f08ae9120b 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -572,6 +572,7 @@ static struct timer_opts timer_tsc = { .get_offset = get_offset_tsc, .monotonic_clock = monotonic_clock_tsc, .delay = delay_tsc, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_tsc_init = { -- cgit v1.2.3 From 7c1def1652c6c1a95eafca2991baace34afaed0f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 23 Jun 2005 00:08:21 -0700 Subject: [PATCH] i386: never block forced SIGSEGV This problem was first noticed on PPC and has already been fixed there. But the exact same issue applies to other platforms in the same way. The signal blocking for sa_mask and the handled signal takes place after the handler setup. When the stack is bogus, the handler setup forces a SIGSEGV. But then this will be blocked, and returning to user mode will fault again and iterate. This patch fixes the problem by checking whether signal handler setup failed, and not doing the signal-blocking if so. This copies what was done in the ppc code. I think all architectures' signal handler setup code follows this pattern and needs the change. Signed-off-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/signal.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index ea46d028af08..344400787c39 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -346,8 +346,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) extern void __user __kernel_sigreturn; extern void __user __kernel_rt_sigreturn; -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) +static int setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) { void __user *restorer; struct sigframe __user *frame; @@ -429,13 +429,14 @@ static void setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { void __user *restorer; @@ -522,20 +523,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs * regs) { + int ret; + /* Are we from a system call? */ if (regs->orig_eax >= 0) { /* If so, check system call restarting.. */ @@ -569,17 +573,19 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); + ret = setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -622,8 +628,7 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) } /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: -- cgit v1.2.3 From ca05fea6db5259c6d62e517c41d448a4249175f4 Mon Sep 17 00:00:00 2001 From: Natalie Protasevich Date: Thu, 23 Jun 2005 00:08:22 -0700 Subject: [PATCH] Do not enforce unique IO_APIC_ID check for xAPIC systems (i386) This patch is per Andi's request to remove NO_IOAPIC_CHECK from genapic and use heuristics to prevent unique I/O APIC ID check for systems that don't need it. The patch disables unique I/O APIC ID check for Xeon-based and other platforms that don't use serial APIC bus for interrupt delivery. Andi stated that AMD systems don't need unique IO_APIC_IDs either. Signed-off-by: Natalie Protasevich Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 10 ++++++---- arch/i386/kernel/mpparse.c | 5 ++++- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 7a324e8b86f9..51f7a5d8721f 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1658,6 +1658,12 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + return; /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. @@ -1684,10 +1690,6 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; } - /* Don't check I/O APIC IDs for some xAPIC systems. They have - * no meaning without the serial APIC bus. */ - if (NO_IOAPIC_CHECK) - continue; /* * Sanity check, is the ID really free? Every APIC in a * system must have a unique ID or we get lots of nice diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 1347ab4939e7..0a061056b828 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -914,7 +914,10 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + else + mp_ioapics[idx].mpc_apicid = id; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* -- cgit v1.2.3 From 7fbb4f6e6873593a2defb8f66512f55d08d88106 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 23 Jun 2005 00:08:23 -0700 Subject: [PATCH] adjust i386 watchdog tick calculation Get the i386 watchdog tick calculation into a state where it can also be used on CPUs with frequencies beyond 4GHz, and it consolidates the calculation into a single place (for potential furture adjustments). Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/nmi.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 2c0ee9c2d020..da6c46d667cb 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -28,8 +28,7 @@ #include #include -#include -#include +#include #include #include "mach_traps.h" @@ -324,6 +323,16 @@ static void clear_msr_range(unsigned int base, unsigned int n) wrmsr(base+i, 0, 0); } +static inline void write_watchdog_counter(const char *descr) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(nmi_perfctr_msr, 0 - count); +} + static void setup_k7_watchdog(void) { unsigned int evntsel; @@ -339,8 +348,7 @@ static void setup_k7_watchdog(void) | K7_NMI_EVENT; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("K7_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); @@ -361,8 +369,7 @@ static void setup_p6_watchdog(void) | P6_NMI_EVENT; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); + write_watchdog_counter("P6_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); @@ -402,8 +409,7 @@ static int setup_p4_watchdog(void) wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("P4_IQ_COUNTER0"); apic_write(APIC_LVTPC, APIC_DM_NMI); wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; @@ -518,7 +524,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) * other P6 variant */ apic_write(APIC_LVTPC, APIC_DM_NMI); } - wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter(NULL); } } -- cgit v1.2.3 From c434b7a6aedfe428ad17cd61b21b125a7b7a29ce Mon Sep 17 00:00:00 2001 From: Natalie Protasevich Date: Thu, 23 Jun 2005 00:08:29 -0700 Subject: [PATCH] x86: avoid wasting IRQs for PCI devices I have submitted the patch for x86_64, this is submission for i386. The patch changes the way IRQs are handed out to PCI devices. Currently, each I/O APIC pin gets associated with an IRQ, no matter if the pin is used or not. This imposes severe limitation on systems that have designs that employ many I/O APICs, only utilizing couple lines of each, such as P64H2 chipset. It is used in ES7000, and currently, there is no way to boot the system with more that 9 I/O APICs. The simple change below allows to boot a system with say 64 (or more) I/O APICs, each providing 1 slot, which otherwise impossible because of the IRQ gaps created for unused lines on each I/O APIC. It does not resolve the problem with number of devices that exceeds number of possible IRQs, but eases up a tension for IRQs on any large system with potentually large number of devices. Signed-off-by: Natalie Protasevich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/mpparse.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 0a061056b828..383a11600d2c 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -1058,11 +1058,20 @@ void __init mp_config_acpi_legacy_irqs (void) } } +#define MAX_GSI_NUM 4096 + int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrups, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; #ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ @@ -1097,11 +1106,26 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) if ((1< Date: Thu, 23 Jun 2005 00:08:33 -0700 Subject: [PATCH] Remove i386_ksyms.c, almost. * EXPORT_SYMBOL's moved to other files * #include , where needed * #include's in i386_ksyms.c cleaned up * After copy-paste, redundant due to Makefiles rules preprocessor directives removed: #ifdef CONFIG_FOO EXPORT_SYMBOL(foo); #endif obj-$(CONFIG_FOO) += foo.o * Tiny reformat to fit in 80 columns Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/i386_ksyms.c | 160 +----------------------------------------- arch/i386/kernel/i387.c | 3 + arch/i386/kernel/io_apic.c | 3 +- arch/i386/kernel/pci-dma.c | 3 + arch/i386/kernel/process.c | 7 ++ arch/i386/kernel/reboot.c | 5 ++ arch/i386/kernel/setup.c | 20 ++++++ arch/i386/kernel/smp.c | 3 + arch/i386/kernel/smpboot.c | 12 ++++ arch/i386/kernel/time.c | 4 ++ arch/i386/kernel/traps.c | 3 + 11 files changed, 64 insertions(+), 159 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 903190a4b3ff..180f070d03cb 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -1,97 +1,17 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include - -extern void dump_thread(struct pt_regs *, struct user *); -extern spinlock_t rtc_lock; /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -#if defined(CONFIG_APM_MODULE) -extern void machine_real_restart(unsigned char *, int); -EXPORT_SYMBOL(machine_real_restart); -extern void default_idle(void); -EXPORT_SYMBOL(default_idle); -#endif - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -#endif - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -extern struct drive_info_struct drive_info; -EXPORT_SYMBOL(drive_info); -#endif - -extern unsigned long cpu_khz; -extern unsigned long get_cmos_time(void); - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -#ifdef CONFIG_DISCONTIGMEM -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(physnode_map); -#endif -#ifdef CONFIG_X86_NUMAQ -EXPORT_SYMBOL(xquad_portio); -#endif -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL_GPL(kernel_fpu_begin); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(ioremap_nocache); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(get_cmos_time); -EXPORT_SYMBOL(cpu_khz); -EXPORT_SYMBOL(apm_info); - EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); -/* Delay loops */ -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); @@ -105,87 +25,11 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__copy_from_user_ll); -EXPORT_SYMBOL(__copy_to_user_ll); -EXPORT_SYMBOL(strnlen_user); - -EXPORT_SYMBOL(dma_alloc_coherent); -EXPORT_SYMBOL(dma_free_coherent); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - -#ifdef CONFIG_PCI_BIOS -EXPORT_SYMBOL(pcibios_set_irq_routing); -EXPORT_SYMBOL(pcibios_get_irq_routing_table); -#endif - -#ifdef CONFIG_X86_USE_3DNOW -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); -#endif - -#ifdef CONFIG_X86_HT -EXPORT_SYMBOL(smp_num_siblings); -EXPORT_SYMBOL(cpu_sibling_map); -#endif - #ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_callout_map); +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); - -/* Global SMP stuff */ -EXPORT_SYMBOL(smp_call_function); - -/* TLB flushing */ -EXPORT_SYMBOL(flush_tlb_page); -#endif - -#ifdef CONFIG_X86_IO_APIC -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -#endif - -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - -EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(rtc_lock); - -EXPORT_SYMBOL_GPL(set_nmi_callback); -EXPORT_SYMBOL_GPL(unset_nmi_callback); - -EXPORT_SYMBOL(register_die_notifier); -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(_atomic_dec_and_lock); -#endif - -EXPORT_SYMBOL(__PAGE_KERNEL); - -#ifdef CONFIG_HIGHMEM -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); -#endif - -#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) -EXPORT_SYMBOL(ist_info); #endif EXPORT_SYMBOL(csum_partial); diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c index c55e037f08f7..b817168d9c62 100644 --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -79,6 +80,7 @@ void kernel_fpu_begin(void) } clts(); } +EXPORT_SYMBOL_GPL(kernel_fpu_begin); void restore_fpu( struct task_struct *tsk ) { @@ -526,6 +528,7 @@ int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) return fpvalid; } +EXPORT_SYMBOL(dump_fpu); int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) { diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 51f7a5d8721f..08540bc4ba3e 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -31,7 +31,7 @@ #include #include #include - +#include #include #include #include @@ -812,6 +812,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } return best_guess; } +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); /* * This function currently is only a helper for the i386 smp boot process where diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 4de2e03c7b45..1e51427cc9eb 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -11,6 +11,7 @@ #include #include #include +#include #include struct dma_coherent_mem { @@ -54,6 +55,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, } return ret; } +EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) @@ -68,6 +70,7 @@ void dma_free_coherent(struct device *dev, size_t size, } else free_pages((unsigned long)vaddr, order); } +EXPORT_SYMBOL(dma_free_coherent); int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dma_addr_t device_addr, size_t size, int flags) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 96e3ea6b17c7..3c3f245cca53 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -73,6 +73,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk) * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); void disable_hlt(void) @@ -105,6 +106,9 @@ void default_idle(void) cpu_relax(); } } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(default_idle); +#endif /* * On SMP it's slightly faster (but much more power-consuming!) @@ -325,6 +329,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); /* * Free current thread data structures etc.. @@ -508,6 +513,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->u_fpvalid = dump_fpu (regs, &dump->i387); } +EXPORT_SYMBOL(dump_thread); /* * Capture the user space registers if the task is not running (in user space) @@ -731,6 +737,7 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } +EXPORT_SYMBOL(get_wchan); /* * sys_alloc_thread_area: get a yet unused TLS descriptor index. diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 6dc27eb70ee7..db912209a8d3 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -2,6 +2,7 @@ * linux/arch/i386/kernel/reboot.c */ +#include #include #include #include @@ -19,6 +20,7 @@ * Power off function, if any */ void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); static int reboot_mode; static int reboot_thru_bios; @@ -295,6 +297,9 @@ void machine_real_restart(unsigned char *code, int length) : : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(machine_real_restart); +#endif void machine_restart(char * __unused) { diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 0d689335c8d6..30406fd0b64c 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -23,6 +23,7 @@ * This file handles the architecture-dependent parts of initialization */ +#include #include #include #include @@ -74,6 +75,7 @@ EXPORT_SYMBOL(efi_enabled); struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; @@ -91,12 +93,18 @@ extern acpi_interrupt_flags acpi_sci_flags; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; +#ifdef CONFIG_MCA +EXPORT_SYMBOL(machine_id); +#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0x10000000; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; @@ -108,14 +116,26 @@ static unsigned int highmem_pages = -1; * Setup options */ struct drive_info_struct { char dummy[32]; } drive_info; +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \ + defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +EXPORT_SYMBOL(drive_info); +#endif struct screen_info screen_info; +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif struct apm_info apm_info; +EXPORT_SYMBOL(apm_info); struct sys_desc_table_struct { unsigned short length; unsigned char table[0]; }; struct edid_info edid_info; struct ist_info ist_info; +#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ + defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) +EXPORT_SYMBOL(ist_info); +#endif struct e820map e820; extern void early_cpu_init(void); diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6223c33ac91c..68be7d0c7238 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -452,6 +453,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void* info) { @@ -547,6 +549,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, return 0; } +EXPORT_SYMBOL(smp_call_function); static void stop_this_cpu (void * dummy) { diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bc1bb6919e6a..aaad95e7f879 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -60,6 +60,9 @@ static int __initdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(smp_num_siblings); +#endif int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ EXPORT_SYMBOL(phys_proc_id); int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ @@ -67,13 +70,16 @@ EXPORT_SYMBOL(cpu_core_id); /* bitmap of online cpus */ cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; +EXPORT_SYMBOL(cpu_callout_map); static cpumask_t smp_commenced_mask; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_data); u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; @@ -885,8 +891,14 @@ static void smp_tune_scheduling (void) static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; +#ifdef CONFIG_X86_NUMAQ +EXPORT_SYMBOL(xquad_portio); +#endif cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(cpu_sibling_map); +#endif cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a0dcb7c87c30..8bc8363fbb4c 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -78,10 +78,12 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -324,6 +326,8 @@ unsigned long get_cmos_time(void) return retval; } +EXPORT_SYMBOL(get_cmos_time); + static void sync_cmos_clock(unsigned long dummy); static struct timer_list sync_cmos_timer = diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 83c579e82a81..7f729665d292 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -104,6 +104,7 @@ int register_die_notifier(struct notifier_block *nb) spin_unlock_irqrestore(&die_notifier_lock, flags); return err; } +EXPORT_SYMBOL(register_die_notifier); static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -636,11 +637,13 @@ void set_nmi_callback(nmi_callback_t callback) { nmi_callback = callback; } +EXPORT_SYMBOL_GPL(set_nmi_callback); void unset_nmi_callback(void) { nmi_callback = dummy_nmi_callback; } +EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_KPROBES fastcall void do_int3(struct pt_regs *regs, long error_code) -- cgit v1.2.3 From a3a255e744dfa672e741dc24306491139d0de2d8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Jun 2005 00:08:34 -0700 Subject: [PATCH] x86: cpu_khz type fix x86_64's cpu_khz is unsigned int and there is no reason why x86 needs to use unsigned long. So make cpu_khz unsigned int on x86 as well. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/proc.c | 2 +- arch/i386/kernel/smpboot.c | 2 +- arch/i386/kernel/time.c | 2 +- arch/i386/kernel/timers/common.c | 3 ++- arch/i386/kernel/timers/timer_hpet.c | 2 +- arch/i386/kernel/timers/timer_tsc.c | 7 ++++--- 6 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 7323c19f354e..8bd77d948a84 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -86,7 +86,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", cpu_khz / 1000, (cpu_khz % 1000)); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index aaad95e7f879..c20d96d5c15c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -205,7 +205,7 @@ static void __init synchronize_tsc_bp (void) unsigned long long t0; unsigned long long sum, avg; long long delta; - unsigned long one_usec; + unsigned int one_usec; int buggy = 0; printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 8bc8363fbb4c..e68d9fdb0759 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -77,7 +77,7 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); -unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index b38cc0d0c71a..37353bd31803 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -163,7 +163,8 @@ void init_cpu_khz(void) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } } } diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index 15a7d727bd6f..d766e0963ac1 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -158,7 +158,7 @@ static int __init init_hpet(char* override) { unsigned long eax=0, edx=1000; ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, eax, edx); - printk("Detected %lu.%03lu MHz processor.\n", + printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 27f08ae9120b..54c36b182021 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -256,7 +256,7 @@ static unsigned long loops_per_jiffy_ref = 0; #ifndef CONFIG_SMP static unsigned long fast_gettimeoffset_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int cpu_khz_ref = 0; #endif static int @@ -323,7 +323,7 @@ static inline void cpufreq_delayed_get(void) { return; } int recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; + unsigned int cpu_khz_old = cpu_khz; if (cpu_has_tsc) { init_cpu_khz(); @@ -534,7 +534,8 @@ static int __init init_tsc(char* override) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); return 0; -- cgit v1.2.3 From c92c6ffdb16990872acf4ce8b24f82f98fcbbb68 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Jun 2005 00:08:35 -0700 Subject: [PATCH] mtrr size-and-base debugging Consolidate the mtrr sanity checking, add a dump_stack(). Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/mtrr/main.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index e1c2042b9b7e..d66b09e0c820 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -375,6 +375,19 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; } +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + /** * mtrr_add - Add a memory type region * @base: Physical base address of region @@ -415,11 +428,8 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type, char increment) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } @@ -511,11 +521,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) int mtrr_del(int reg, unsigned long base, unsigned long size) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } -- cgit v1.2.3 From 1cc6f12e03ebc064b74161c684f987284ce9d0cc Mon Sep 17 00:00:00 2001 From: Vincent Hanquez Date: Thu, 23 Jun 2005 00:08:43 -0700 Subject: [PATCH] xen: x86: Use new macro for debugreg Make use of the 2 new macro set_debugreg and get_debugreg. Signed-off-by: Vincent Hanquez Cc: Ian Pratt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 2 +- arch/i386/kernel/process.c | 12 ++++++------ arch/i386/kernel/signal.c | 2 +- arch/i386/kernel/traps.c | 6 ++---- 4 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d199e525680a..b9954248d0aa 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -635,7 +635,7 @@ void __init cpu_init (void) /* Clear all 6 debug registers: */ -#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); +#define CD(register) set_debugreg(0, register) CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 3c3f245cca53..2468ab70c386 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -633,13 +633,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * Now maybe reload the debug registers */ if (unlikely(next->debugreg[7])) { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); } if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 344400787c39..839d4dc88cd4 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -624,7 +624,7 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (unlikely(current->thread.debugreg[7])) { - loaddebug(¤t->thread, 7); + set_debugreg(current->thread.debugreg[7], 7); } /* Whee! Actually deliver the signal. */ diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7f729665d292..c01d7ba6d7e8 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -685,7 +685,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) unsigned int condition; struct task_struct *tsk = current; - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -727,9 +727,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * the signal is delivered. */ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); + set_debugreg(0, 7); return; debug_vm86: -- cgit v1.2.3 From fa1e1bdf78d405f9905b8290ee9211e7a7bbc99b Mon Sep 17 00:00:00 2001 From: Vincent Hanquez Date: Thu, 23 Jun 2005 00:08:44 -0700 Subject: [PATCH] xen: x86: Rename usermode macro Rename user_mode to user_mode_vm and add a user_mode macro similar to the x86-64 one. This is useful for Xen because the linux xen kernel does not runs on the same priviledge that a vanilla linux kernel, and with this we just need to redefine user_mode(). Signed-off-by: Vincent Hanquez Cc: Ian Pratt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 2 +- arch/i386/kernel/ptrace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index d509836b70c3..8d993fa71754 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1133,7 +1133,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs) } #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); #endif } diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index e34f651fa13c..0da59b42843c 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -668,7 +668,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); -- cgit v1.2.3 From 717b594a415bfaf2dbd5e8266636488f2564c689 Mon Sep 17 00:00:00 2001 From: Vincent Hanquez Date: Thu, 23 Jun 2005 00:08:45 -0700 Subject: [PATCH] xen: x86: Use more usermode macro Use the user_mode macro where it's possible. Signed-off-by: Vincent Hanquez Cc: Ian Pratt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 2 +- arch/i386/kernel/signal.c | 2 +- arch/i386/kernel/traps.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 2468ab70c386..be3efba7caf7 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -266,7 +266,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (regs->xcs & 3) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s)\n", regs->eflags, print_tainted(), system_utsname.release); diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 839d4dc88cd4..b9b8f4e20fad 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -605,7 +605,7 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->xcs & 3) != 3) + if (!user_mode(regs)) return 1; if (current->flags & PF_FREEZE) { diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index c01d7ba6d7e8..e4d4e2162c7a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -210,7 +210,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); ss = __KERNEL_DS; - if (regs->xcs & 3) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -266,7 +266,7 @@ static void handle_BUG(struct pt_regs *regs) char c; unsigned long eip; - if (regs->xcs & 3) + if (user_mode(regs)) goto no_bug; /* Not in kernel */ eip = regs->eip; @@ -354,7 +354,7 @@ void die(const char * str, struct pt_regs * regs, long err) static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) + if (!user_mode_vm(regs)) die(str, regs, err); } @@ -367,7 +367,7 @@ static void do_trap(int trapnr, int signr, char *str, int vm86, goto trap_signal; } - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto kernel_trap; trap_signal: { @@ -489,7 +489,7 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code) if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto gp_in_kernel; current->thread.error_code = error_code; @@ -716,7 +716,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * check for kernel mode by just checking the CPL * of CS. */ - if ((regs->xcs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; } -- cgit v1.2.3 From b94cce926b2b902b79380ccba370d6f9f2980de0 Mon Sep 17 00:00:00 2001 From: Hien Nguyen Date: Thu, 23 Jun 2005 00:09:19 -0700 Subject: [PATCH] kprobes: function-return probes This patch adds function-return probes to kprobes for the i386 architecture. This enables you to establish a handler to be run when a function returns. 1. API Two new functions are added to kprobes: int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); 2. Registration and unregistration 2.1 Register To register a function-return probe, the user populates the following fields in a kretprobe object and calls register_kretprobe() with the kretprobe address as an argument: kp.addr - the function's address handler - this function is run after the ret instruction executes, but before control returns to the return address in the caller. maxactive - The maximum number of instances of the probed function that can be active concurrently. For example, if the function is non- recursive and is called with a spinlock or mutex held, maxactive = 1 should be enough. If the function is non-recursive and can never relinquish the CPU (e.g., via a semaphore or preemption), NR_CPUS should be enough. maxactive is used to determine how many kretprobe_instance objects to allocate for this particular probed function. If maxactive <= 0, it is set to a default value (if CONFIG_PREEMPT maxactive=max(10, 2 * NR_CPUS) else maxactive=NR_CPUS) For example: struct kretprobe rp; rp.kp.addr = /* entrypoint address */ rp.handler = /*return probe handler */ rp.maxactive = /* e.g., 1 or NR_CPUS or 0, see the above explanation */ register_kretprobe(&rp); The following field may also be of interest: nmissed - Initialized to zero when the function-return probe is registered, and incremented every time the probed function is entered but there is no kretprobe_instance object available for establishing the function-return probe (i.e., because maxactive was set too low). 2.2 Unregister To unregiter a function-return probe, the user calls unregister_kretprobe() with the same kretprobe object as registered previously. If a probed function is running when the return probe is unregistered, the function will return as expected, but the handler won't be run. 3. Limitations 3.1 This patch supports only the i386 architecture, but patches for x86_64 and ppc64 are anticipated soon. 3.2 Return probes operates by replacing the return address in the stack (or in a known register, such as the lr register for ppc). This may cause __builtin_return_address(0), when invoked from the return-probed function, to return the address of the return-probes trampoline. 3.3 This implementation uses the "Multiprobes at an address" feature in 2.6.12-rc3-mm3. 3.4 Due to a limitation in multi-probes, you cannot currently establish a return probe and a jprobe on the same function. A patch to remove this limitation is being tested. This feature is required by SystemTap (http://sourceware.org/systemtap), and reflects ideas contributed by several SystemTap developers, including Will Cohen and Ananth Mavinakayanahalli. Signed-off-by: Hien Nguyen Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Frederik Deweerdt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 102 ++++++++++++++++++++++++++++++++++++++++++++- arch/i386/kernel/process.c | 15 +++++++ 2 files changed, 116 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 59ff9b455069..048f754bbe23 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -23,6 +23,9 @@ * Rusty Russell). * 2004-July Suparna Bhattacharya added jumper probes * interface to access function arguments. + * 2005-May Hien Nguyen , Jim Keniston + * and Prasanna S Panchamukhi + * added function-return probes. */ #include @@ -91,6 +94,53 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)®s->esp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk, spinlock_t *kp_lock) +{ + unsigned long flags = 0; + struct kretprobe_instance *ri; + spin_lock_irqsave(kp_lock, flags); + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } + spin_unlock_irqrestore(kp_lock, flags); +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -183,6 +233,55 @@ no_kprobe: return ret; } +/* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = ((unsigned long *) ®s->esp) - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)®s->esp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->eip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + /* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" @@ -266,7 +365,8 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (current_kprobe->post_handler) current_kprobe->post_handler(current_kprobe, regs, 0); - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_eflags; unlock_kprobes(); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index be3efba7caf7..aea2ce1145df 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -339,6 +340,13 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); @@ -362,6 +370,13 @@ void flush_thread(void) { struct task_struct *tsk = current; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* -- cgit v1.2.3 From 7e1048b11c5afe79aac46a42e3ccec86b8365c6d Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Thu, 23 Jun 2005 00:09:25 -0700 Subject: [PATCH] Move kprobe [dis]arming into arch specific code The architecture independent code of the current kprobes implementation is arming and disarming kprobes at registration time. The problem is that the code is assuming that arming and disarming is a just done by a simple write of some magic value to an address. This is problematic for ia64 where our instructions look more like structures, and we can not insert break points by just doing something like: *p->addr = BREAKPOINT_INSTRUCTION; The following patch to 2.6.12-rc4-mm2 adds two new architecture dependent functions: * void arch_arm_kprobe(struct kprobe *p) * void arch_disarm_kprobe(struct kprobe *p) and then adds the new functions for each of the architectures that already implement kprobes (spar64/ppc64/i386/x86_64). I thought arch_[dis]arm_kprobe was the most descriptive of what was really happening, but each of the architectures already had a disarm_kprobe() function that was really a "disarm and do some other clean-up items as needed when you stumble across a recursive kprobe." So... I took the liberty of changing the code that was calling disarm_kprobe() to call arch_disarm_kprobe(), and then do the cleanup in the block of code dealing with the recursive kprobe case. So far this patch as been tested on i386, x86_64, and ppc64, but still needs to be tested in sparc64. Signed-off-by: Rusty Lynch Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 048f754bbe23..2314d8d306fd 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -71,16 +72,25 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->eip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -177,7 +187,8 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); + arch_disarm_kprobe(p); + regs->eip = (unsigned long)p->addr; ret = 1; } else { p = current_kprobe; -- cgit v1.2.3 From 0aa55e4d7db822059fe8132fe9f2b7773c48216c Mon Sep 17 00:00:00 2001 From: Hien Nguyen Date: Thu, 23 Jun 2005 00:09:26 -0700 Subject: [PATCH] kprobes: moves lock-unlock to non-arch kprobe_flush_task This patch moves the lock/unlock of the arch specific kprobe_flush_task() to the non-arch specific kprobe_flusk_task(). Signed-off-by: Hien Nguyen Acked-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 2314d8d306fd..b8e2bae0ab4f 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -138,17 +138,14 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) } } -void arch_kprobe_flush_task(struct task_struct *tk, spinlock_t *kp_lock) +void arch_kprobe_flush_task(struct task_struct *tk) { - unsigned long flags = 0; struct kretprobe_instance *ri; - spin_lock_irqsave(kp_lock, flags); while ((ri = get_rp_inst_tsk(tk)) != NULL) { *((unsigned long *)(ri->stack_addr)) = (unsigned long) ri->ret_addr; recycle_rp_inst(ri); } - spin_unlock_irqrestore(kp_lock, flags); } /* -- cgit v1.2.3 From 417c8da6511b54843e6b557d94d8112d80e32156 Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Thu, 23 Jun 2005 00:09:37 -0700 Subject: [PATCH] kprobes: Temporary disarming of reentrant probe for i386 This patch includes i386 architecture specific changes to support temporary disarming on reentrancy of probes. Signed-of-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 62 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 13 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index b8e2bae0ab4f..3762f6b35ab2 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -37,12 +37,10 @@ #include #include -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_esp; /* copy of the kernel stack at the probe fire time */ @@ -93,6 +91,31 @@ void arch_remove_kprobe(struct kprobe *p) { } +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_eflags_prev = kprobe_old_eflags; + kprobe_saved_eflags_prev = kprobe_saved_eflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_eflags = kprobe_old_eflags_prev; + kprobe_saved_eflags = kprobe_saved_eflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_eflags = kprobe_old_eflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->opcode)) + kprobe_saved_eflags &= ~IF_MASK; +} + static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { regs->eflags |= TF_MASK; @@ -184,9 +207,18 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - arch_disarm_kprobe(p); - regs->eip = (unsigned long)p->addr; - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -221,11 +253,7 @@ static int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_eflags = kprobe_old_eflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->opcode)) - kprobe_saved_eflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -370,14 +398,22 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } if (current_kprobe->post_handler != trampoline_post_handler) resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_eflags; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* -- cgit v1.2.3 From e70c9d5e61c6cb2272c866fc1303e62975006752 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Jun 2005 14:54:25 -0700 Subject: [PATCH] I8K: use standard DMI interface I8K: Change to use stock dmi infrastructure instead of homegrown parsing code. The driver now requires box's DMI data to match list of supported models so driver can be safely compiled-in by default without fear of it poking into random SMM BIOS code. DMI checks can be ignored with i8k.ignore_dmi option. Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 6ed7e28f306c..3facd20212bb 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -414,6 +414,7 @@ static void __init dmi_decode(struct dmi_header *dm) dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); + dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); break; case 2: dmi_printk(("Board Vendor: %s\n", @@ -470,7 +471,6 @@ fail: d++; return count; } - EXPORT_SYMBOL(dmi_check_system); /** @@ -480,8 +480,8 @@ EXPORT_SYMBOL(dmi_check_system); * Returns one DMI data value, can be used to perform * complex DMI data checks. */ -char * dmi_get_system_info(int field) +char *dmi_get_system_info(int field) { return dmi_ident[field]; } - +EXPORT_SYMBOL(dmi_get_system_info); -- cgit v1.2.3 From aea00143a8db8c0b31dca85bff3c325444d93f0f Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:42 -0700 Subject: [PATCH] dmi: move ACPI boot quirk This patch moves ACPI boot quirks out of dmi_scan.c Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 219 ++++++++++++++++++++++++++++++++++++++++++- arch/i386/kernel/dmi_scan.c | 163 -------------------------------- 2 files changed, 217 insertions(+), 165 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 848bb97af7ca..0771596c5496 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -815,6 +816,218 @@ acpi_process_madt(void) return; } +extern int acpi_force; + +#ifdef __i386__ + +#ifdef CONFIG_ACPI_PCI +static int __init disable_acpi_irq(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", + d->ident); + acpi_noirq_set(); + } + return 0; +} + +static int __init disable_acpi_pci(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", + d->ident); + acpi_disable_pci(); + } + return 0; +} +#endif + +static int __init dmi_disable_acpi(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); + disable_acpi(); + } else { + printk(KERN_NOTICE + "Warning: DMI blacklist says broken, but acpi forced\n"); + } + return 0; +} + +/* + * Limit ACPI to CPU enumeration for HT + */ +static int __init force_acpi_ht(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); + disable_acpi(); + acpi_ht = 1; + } else { + printk(KERN_NOTICE + "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); + } + return 0; +} + +/* + * If your system is blacklisted here, but you find that acpi=force + * works for you, please contact acpi-devel@sourceforge.net + */ +static struct dmi_system_id __initdata acpi_dmi_table[] = { + /* + * Boxes that need ACPI disabled + */ + { + .callback = dmi_disable_acpi, + .ident = "IBM Thinkpad", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), + }, + }, + + /* + * Boxes that need acpi=ht + */ + { + .callback = force_acpi_ht, + .ident = "FSC Primergy T850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "DELL GX240", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "Compaq Workstation W8000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P4B266", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4B266"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P2B-DS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS CUR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ABIT i440BX-W83977", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ABIT "), + DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM Bladecenter", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eServer xSeries 360", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 330", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 440", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), + }, + }, + +#ifdef CONFIG_ACPI_PCI + /* + * Boxes that need ACPI PCI IRQ routing disabled + */ + { + .callback = disable_acpi_irq, + .ident = "ASUS A7V", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + DMI_MATCH(DMI_BOARD_NAME, ""), + /* newer BIOS, Revision 1011, does work */ + DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, + + /* + * Boxes that need ACPI PCI IRQ routing and PCI scan disabled + */ + { /* _BBN 0 bug */ + .callback = disable_acpi_pci, + .ident = "ASUS PR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), + DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), + DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") + }, + }, + { + .callback = disable_acpi_pci, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, +#endif +}; + +#endif /* __i386__ */ + /* * acpi_boot_table_init() and acpi_boot_init() * called from setup_arch(), always. @@ -843,6 +1056,10 @@ acpi_boot_table_init(void) { int error; +#ifdef __i386__ + dmi_check_system(acpi_dmi_table); +#endif + /* * If acpi_disabled, bail out * One exception: acpi=ht continues far enough to enumerate LAPICs @@ -870,8 +1087,6 @@ acpi_boot_table_init(void) */ error = acpi_blacklisted(); if (error) { - extern int acpi_force; - if (acpi_force) { printk(KERN_WARNING PREFIX "acpi=force override\n"); } else { diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 3facd20212bb..b4bae60f5065 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -188,59 +188,6 @@ static __init int reset_videomode_after_s3(struct dmi_blacklist *d) #endif -#ifdef CONFIG_ACPI_BOOT -extern int acpi_force; - -static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); - disable_acpi(); - } else { - printk(KERN_NOTICE - "Warning: DMI blacklist says broken, but acpi forced\n"); - } - return 0; -} - -/* - * Limit ACPI to CPU enumeration for HT - */ -static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); - disable_acpi(); - acpi_ht = 1; - } else { - printk(KERN_NOTICE - "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); - } - return 0; -} -#endif - -#ifdef CONFIG_ACPI_PCI -static __init int disable_acpi_irq(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", - d->ident); - acpi_noirq_set(); - } - return 0; -} -static __init int disable_acpi_pci(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", - d->ident); - acpi_disable_pci(); - } - return 0; -} -#endif - /* * Process the DMI blacklists */ @@ -264,116 +211,6 @@ static __initdata struct dmi_blacklist dmi_blacklist[]={ } }, #endif -#ifdef CONFIG_ACPI_BOOT - /* - * If your system is blacklisted here, but you find that acpi=force - * works for you, please contact acpi-devel@sourceforge.net - */ - - /* - * Boxes that need ACPI disabled - */ - - { dmi_disable_acpi, "IBM Thinkpad", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "2629H1G"), - NO_MATCH, NO_MATCH }}, - - /* - * Boxes that need acpi=ht - */ - - { force_acpi_ht, "FSC Primergy T850", { - MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), - MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "DELL GX240", { - MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), - MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "HP VISUALIZE NT Workstation", { - MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "Compaq Workstation W8000", { - MATCH(DMI_SYS_VENDOR, "Compaq"), - MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS P4B266", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "P4B266"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS P2B-DS", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "P2B-DS"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS CUR-DLS", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "CUR-DLS"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ABIT i440BX-W83977", { - MATCH(DMI_BOARD_VENDOR, "ABIT "), - MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM Bladecenter", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eServer xSeries 360", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eserver xSeries 330", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eserver xSeries 440", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), - NO_MATCH, NO_MATCH }}, - -#endif // CONFIG_ACPI_BOOT - -#ifdef CONFIG_ACPI_PCI - /* - * Boxes that need ACPI PCI IRQ routing disabled - */ - - { disable_acpi_irq, "ASUS A7V", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), - MATCH(DMI_BOARD_NAME, ""), - /* newer BIOS, Revision 1011, does work */ - MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), - NO_MATCH }}, - - /* - * Boxes that need ACPI PCI IRQ routing and PCI scan disabled - */ - { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */ - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "PR-DLS"), - MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), - MATCH(DMI_BIOS_DATE, "03/21/2003") }}, - - { disable_acpi_pci, "Acer TravelMate 36x Laptop", { - MATCH(DMI_SYS_VENDOR, "Acer"), - MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - NO_MATCH, NO_MATCH - } }, - -#endif - { NULL, } }; -- cgit v1.2.3 From 0f8133a8db81ff824a4abbe5bb0f15bf034d31c3 Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:45 -0700 Subject: [PATCH] dmi: move ACPI sleep quirk This patch moves ACPI sleep quirk out of dmi_scan.c Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 1 + arch/i386/kernel/acpi/sleep.c | 27 +++++++++++++++++++++++++++ arch/i386/kernel/dmi_scan.c | 16 ---------------- 3 files changed, 28 insertions(+), 16 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 0771596c5496..9f63ae0f404b 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -1024,6 +1024,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { }, }, #endif + { } }; #endif /* __i386__ */ diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index 28bb0514bb6e..c1af93032ff3 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str) __setup("acpi_sleep=", acpi_sleep_setup); + + +static __init int reset_videomode_after_s3(struct dmi_system_id *d) +{ + acpi_video_flags |= 2; + return 0; +} + +static __initdata struct dmi_system_id acpisleep_dmi_table[] = { + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + { } +}; + +static int __init acpisleep_dmi_init(void) +{ + dmi_check_system(acpisleep_dmi_table); + return 0; +} + +core_initcall(acpisleep_dmi_init); diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index b4bae60f5065..065c30a73c13 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -177,16 +177,6 @@ static __init int broken_toshiba_keyboard(struct dmi_blacklist *d) } -#ifdef CONFIG_ACPI_SLEEP -static __init int reset_videomode_after_s3(struct dmi_blacklist *d) -{ - /* See acpi_wakeup.S */ - extern long acpi_video_flags; - acpi_video_flags |= 2; - return 0; -} -#endif - /* * Process the DMI blacklists @@ -204,12 +194,6 @@ static __initdata struct dmi_blacklist dmi_blacklist[]={ MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), NO_MATCH, NO_MATCH, NO_MATCH } }, -#ifdef CONFIG_ACPI_SLEEP - { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */ - MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - NO_MATCH, NO_MATCH, NO_MATCH - } }, -#endif { NULL, } }; -- cgit v1.2.3 From b625883f24d018c989173aeb727f6de954fb154d Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:46 -0700 Subject: [PATCH] dmi: remove central blacklist Since last dmi quirk looks useless (it just prints 404 compliant url) we can finally remove central dmi blacklist. Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 45 +-------------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 065c30a73c13..ad4b369a5837 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -159,51 +159,11 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) printk(KERN_ERR "dmi_save_ident: out of memory.\n"); } -/* - * Ugly compatibility crap. - */ -#define dmi_blacklist dmi_system_id -#define NO_MATCH { DMI_NONE, NULL} -#define MATCH DMI_MATCH - -/* - * Toshiba keyboard likes to repeat keys when they are not repeated. - */ - -static __init int broken_toshiba_keyboard(struct dmi_blacklist *d) -{ - printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n"); - return 0; -} - - - -/* - * Process the DMI blacklists - */ - - -/* - * This will be expanded over time to force things like the APM - * interrupt mask settings according to the laptop - */ - -static __initdata struct dmi_blacklist dmi_blacklist[]={ - - { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */ - MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - NO_MATCH, NO_MATCH, NO_MATCH - } }, - - { NULL, } -}; - /* * Process a DMI table entry. Right now all we care about are the BIOS * and machine entries. For 2.5 we should pull the smbus controller info * out of here. */ - static void __init dmi_decode(struct dmi_header *dm) { #ifdef DMI_DEBUG @@ -253,10 +213,7 @@ static void __init dmi_decode(struct dmi_header *dm) void __init dmi_scan_machine(void) { - int err = dmi_iterate(dmi_decode); - if(err == 0) - dmi_check_system(dmi_blacklist); - else + if (dmi_iterate(dmi_decode)) printk(KERN_INFO "DMI not present.\n"); } -- cgit v1.2.3 From 1249c5138f573890eae0c01f13d627094edcd55c Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:47 -0700 Subject: [PATCH] dmi: spring cleanup Whitespace and CodingStyle cleanup. No functionality changes. Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 163 +++++++++++++++++++------------------------- 1 file changed, 70 insertions(+), 93 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index ad4b369a5837..a3cdf894302b 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -1,22 +1,15 @@ #include -#include #include #include #include -#include -#include -#include -#include -#include #include #include -struct dmi_header -{ - u8 type; - u8 length; - u16 handle; +struct dmi_header { + u8 type; + u8 length; + u16 handle; }; #undef DMI_DEBUG @@ -29,15 +22,13 @@ struct dmi_header static char * __init dmi_string(struct dmi_header *dm, u8 s) { - u8 *bp=(u8 *)dm; - bp+=dm->length; - if(!s) + u8 *bp = ((u8 *) dm) + dm->length; + + if (!s) return ""; s--; - while(s>0 && *bp) - { - bp+=strlen(bp); - bp++; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; s--; } return bp; @@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s) * We have to be cautious here. We have seen BIOSes with DMI pointers * pointing to completely the wrong place for example */ - -static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *)) +static int __init dmi_table(u32 base, int len, int num, + void (*decode)(struct dmi_header *)) { - u8 *buf; - struct dmi_header *dm; - u8 *data; - int i=0; + u8 *buf, *data; + int i = 0; buf = bt_ioremap(base, len); - if(buf==NULL) + if (buf == NULL) return -1; data = buf; @@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm * Stop when we see all the items the table claimed to have * OR we run off the end of the table (also happens) */ - - while(ilength; - while(data-buflength; + while ((data - buf < len - 1) && (data[0] || data[1])) data++; - if(data-buf>4, buf[14]&0x0F); + buf[14] >> 4, buf[14] & 0xF); else printk(KERN_INFO "DMI present.\n"); + dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", num, len)); - dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", - base)); - if(dmi_table(base,len, num, decode)==0) + dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base)); + + if (dmi_table(base,len, num, decode) == 0) return 0; } } @@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX]; /* * Save a DMI string */ - static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) { char *d = (char*)dm; char *p = dmi_string(dm, d[string]); - if(p==NULL || *p == 0) + + if (p == NULL || *p == 0) return; if (dmi_ident[slot]) return; - dmi_ident[slot] = alloc_bootmem(strlen(p)+1); + + dmi_ident[slot] = alloc_bootmem(strlen(p) + 1); if(dmi_ident[slot]) strcpy(dmi_ident[slot], p); else @@ -166,48 +156,35 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) */ static void __init dmi_decode(struct dmi_header *dm) { -#ifdef DMI_DEBUG - u8 *data = (u8 *)dm; -#endif + u8 *data __attribute__((__unused__)) = (u8 *)dm; - switch(dm->type) - { - case 0: - dmi_printk(("BIOS Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); - dmi_printk(("BIOS Version: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_BIOS_VERSION, 5); - dmi_printk(("BIOS Release: %s\n", - dmi_string(dm, data[8]))); - dmi_save_ident(dm, DMI_BIOS_DATE, 8); - break; - case 1: - dmi_printk(("System Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_SYS_VENDOR, 4); - dmi_printk(("Product Name: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); - dmi_printk(("Version: %s\n", - dmi_string(dm, data[6]))); - dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); - dmi_printk(("Serial Number: %s\n", - dmi_string(dm, data[7]))); - dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); - break; - case 2: - dmi_printk(("Board Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); - dmi_printk(("Board Name: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_BOARD_NAME, 5); - dmi_printk(("Board Version: %s\n", - dmi_string(dm, data[6]))); - dmi_save_ident(dm, DMI_BOARD_VERSION, 6); - break; + switch(dm->type) { + case 0: + dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); + dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_BIOS_VERSION, 5); + dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8]))); + dmi_save_ident(dm, DMI_BIOS_DATE, 8); + break; + case 1: + dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_SYS_VENDOR, 4); + dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); + dmi_printk(("Version: %s\n", dmi_string(dm, data[6]))); + dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); + dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); + dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); + break; + case 2: + dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); + dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_BOARD_NAME, 5); + dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6]))); + dmi_save_ident(dm, DMI_BOARD_VERSION, 6); + break; } } -- cgit v1.2.3 From a13db56624c2a9d6c0dae0a693b25b0e58de9ea3 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 25 Jun 2005 14:54:48 -0700 Subject: [PATCH] CPU hotplug: fix hpet sectioning With hpet enabled, cpu hotplug uses some routines marked with __init. Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/time_hpet.c | 2 +- arch/i386/kernel/timers/common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 10a0cbb88e75..658c0629ba6a 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a) * comparator value and continue. Next tick can be caught by checking * for a change in the comparator value. Used in apic.c. */ -static void __init wait_hpet_tick(void) +static void __devinit wait_hpet_tick(void) { unsigned int start_cmp_val, end_cmp_val; diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index 37353bd31803..8163fe0cf1f0 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -86,7 +86,7 @@ bad_ctc: #define CALIBRATE_CNT_HPET (5 * hpet_tick) #define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) -unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) +unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) { unsigned long tsc_startlow, tsc_starthigh; unsigned long tsc_endlow, tsc_endhigh; -- cgit v1.2.3 From d92de65cab5980c16d4a1c326c1ef9a591892883 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 25 Jun 2005 14:54:49 -0700 Subject: [PATCH] variable overflow after hundreds round of hotplug CPU I'm doing the cpu hotplug stress test and found a variable ('ready') is overflow after several hundreds rounds of cpu hotplug. Here is a fix. Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/head.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e966fc8c44c4..4477bb107098 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 - incb ready lgdt cpu_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f @@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP lldt %ax cld # gcc2 wants the direction flag cleared at all times #ifdef CONFIG_SMP - movb ready, %cl - cmpb $1,%cl + movb ready, %cl + movb $1, ready + cmpb $0,%cl je 1f # the first CPU calls start_kernel # all other CPUs call initialize_secondary call initialize_secondary -- cgit v1.2.3 From f370513640492641b4046bfd9a6e4714f6ae530d Mon Sep 17 00:00:00 2001 From: Zwane Mwaikambo Date: Sat, 25 Jun 2005 14:54:50 -0700 Subject: [PATCH] i386 CPU hotplug (The i386 CPU hotplug patch provides infrastructure for some work which Pavel is doing as well as for ACPI S3 (suspend-to-RAM) work which Li Shaohua is doing) The following provides i386 architecture support for safely unregistering and registering processors during runtime, updated for the current -mm tree. In order to avoid dumping cpu hotplug code into kernel/irq/* i dropped the cpu_online check in do_IRQ() by modifying fixup_irqs(). The difference being that on cpu offline, fixup_irqs() is called before we clear the cpu from cpu_online_map and a long delay in order to ensure that we never have any queued external interrupts on the APICs. There are additional changes to s390 and ppc64 to account for this change. 1) Add CONFIG_HOTPLUG_CPU 2) disable local APIC timer on dead cpus. 3) Disable preempt around irq balancing to prevent CPUs going down. 4) Print irq stats for all possible cpus. 5) Debugging check for interrupts on offline cpus. 6) Hacky fixup_irqs() to redirect irqs when cpus go off/online. 7) play_dead() for offline cpus to spin inside. 8) Handle offline cpus set in flush_tlb_others(). 9) Grab lock earlier in smp_call_function() to prevent CPUs going down. 10) Implement __cpu_disable() and __cpu_die(). 11) Enable local interrupts in cpu_enable() after fixup_irqs() 12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus. 13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline. Signed-off-by: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 3 +- arch/i386/kernel/io_apic.c | 2 + arch/i386/kernel/irq.c | 67 +++++++++++++++++++++++++------ arch/i386/kernel/process.c | 39 +++++++++++++++++- arch/i386/kernel/smp.c | 24 +++++++----- arch/i386/kernel/smpboot.c | 98 +++++++++++++++++++++++++++++++++++++++++++--- arch/i386/kernel/traps.c | 8 ++++ 7 files changed, 212 insertions(+), 29 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 8d993fa71754..a28a088f3e75 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1048,7 +1049,7 @@ void __init setup_secondary_APIC_clock(void) setup_APIC_timer(calibration_result); } -void __init disable_APIC_timer(void) +void __devinit disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 08540bc4ba3e..3c2b3bdfc807 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -576,9 +576,11 @@ static int balanced_irq(void *unused) try_to_freeze(PF_FREEZE); if (time_after(jiffies, prev_balance_time+balanced_irq_interval)) { + preempt_disable(); do_irq_balance(); prev_balance_time = jiffies; time_remaining = balanced_irq_interval; + preempt_enable(); } } return 0; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 73945a3c53c4..af115004aec5 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); @@ -210,9 +213,8 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for (j=0; jtypename); seq_printf(p, " %s", action->name); @@ -240,16 +241,14 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", nmi_count(j)); + for_each_cpu(j) + seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); + for_each_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); @@ -259,3 +258,45 @@ skip: } return 0; } + +#ifdef CONFIG_HOTPLUG_CPU +#include + +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for (irq = 0; irq < NR_IRQS; irq++) { + cpumask_t mask; + if (irq == 2) + continue; + + cpus_and(mask, irq_affinity[irq], map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + +#if 0 + barrier(); + /* Ingo Molnar says: "after the IO-APIC masks have been redirected + [note the nop - the interrupt-enable boundary on x86 is two + instructions from sti] - to flush out pending hardirqs and + IPIs. After this point nothing is supposed to reach this CPU." */ + __asm__ __volatile__("sti; nop; cli"); + barrier(); +#else + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +#endif +} +#endif + diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index aea2ce1145df..c1b11e8df60b 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -13,6 +13,7 @@ #include +#include #include #include #include @@ -55,6 +56,9 @@ #include #include +#include +#include + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); static int hlt_counter; @@ -143,14 +147,44 @@ static void poll_idle (void) } } +#ifdef CONFIG_HOTPLUG_CPU +#include +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* We shouldn't have to disable interrupts while dead, but + * some interrupts just don't seem to go away, and this makes + * it "work" for testing purposes. */ + /* Death loop */ + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); + + local_irq_disable(); + __flush_tlb_all(); + cpu_set(smp_processor_id(), cpu_online_map); + enable_APIC_timer(); + local_irq_enable(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ -void cpu_idle (void) +void cpu_idle(void) { + int cpu = raw_smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { @@ -165,6 +199,9 @@ void cpu_idle (void) if (!idle) idle = default_idle; + if (cpu_is_offline(cpu)) + play_dead(); + __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 68be7d0c7238..35f521612b20 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) unsigned long flags; local_irq_save(flags); - + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. */ @@ -346,21 +347,21 @@ out: static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, unsigned long va) { - cpumask_t tmp; /* * A couple of (to be removed) sanity checks: * - * - we do not send IPIs to not-yet booted CPUs. * - current CPU must not be in mask * - mask must exist :) */ BUG_ON(cpus_empty(cpumask)); - - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(cpumask, tmp)); BUG_ON(cpu_isset(smp_processor_id(), cpumask)); BUG_ON(!mm); + /* If a CPU which we ran on has gone down, OK. */ + cpus_and(cpumask, cpumask, cpu_online_map); + if (cpus_empty(cpumask)) + return; + /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -476,6 +477,7 @@ void flush_tlb_all(void) */ void smp_send_reschedule(int cpu) { + WARN_ON(cpu_is_offline(cpu)); send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } @@ -516,10 +518,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, */ { struct call_data_struct data; - int cpus = num_online_cpus()-1; + int cpus; - if (!cpus) + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + cpus = num_online_cpus() - 1; + if (!cpus) { + spin_unlock(&call_lock); return 0; + } /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); @@ -531,7 +538,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, if (wait) atomic_set(&data.finished, 0); - spin_lock(&call_lock); call_data = &data; mb(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index c20d96d5c15c..ad74a46e9ef0 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -44,6 +44,9 @@ #include #include #include +#include +#include +#include #include #include @@ -96,6 +99,9 @@ static int trampoline_exec; static void map_cpu_to_logical_apicid(void); +/* State of each CPU. */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller @@ -1119,6 +1125,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus) who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) { + smp_commenced_mask = cpumask_of_cpu(0); + cpu_callin_map = cpumask_of_cpu(0); + mb(); smp_boot_cpus(max_cpus); } @@ -1128,20 +1137,99 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_callout_map); } -int __devinit __cpu_up(unsigned int cpu) +#ifdef CONFIG_HOTPLUG_CPU + +/* must be called with the cpucontrol mutex held */ +static int __devinit cpu_enable(unsigned int cpu) { - /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) { - local_irq_enable(); - return -ENOSYS; + /* get the target out of its holding state */ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + wmb(); + + /* wait for the processor to ack it. timeout? */ + while (!cpu_online(cpu)) + cpu_relax(); + + fixup_irqs(cpu_online_map); + /* counter the disable in fixup_irqs() */ + local_irq_enable(); + return 0; +} + +int __cpu_disable(void) +{ + cpumask_t map = cpu_online_map; + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + /* We enable the timer again on the exit path of the death loop */ + disable_APIC_timer(); + /* Allow any queued timer interrupts to get serviced */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + cpu_clear(cpu, map); + fixup_irqs(map); + /* It's now safe to remove this processor from the online map */ + cpu_clear(cpu, cpu_online_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + /* We don't do anything here: idle task is faking death itself. */ + unsigned int i; + + for (i = 0; i < 10; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + return; + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#else /* ... !CONFIG_HOTPLUG_CPU */ +int __cpu_disable(void) +{ + return -ENOSYS; +} +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +int __devinit __cpu_up(unsigned int cpu) +{ /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { + printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); local_irq_enable(); return -EIO; } +#ifdef CONFIG_HOTPLUG_CPU + /* Already up, and in cpu_quiescent now? */ + if (cpu_isset(cpu, smp_commenced_mask)) { + cpu_enable(cpu); + return 0; + } +#endif + local_irq_enable(); /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index e4d4e2162c7a..207ea8ba7169 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -625,6 +625,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) nmi_enter(); cpu = smp_processor_id(); + +#ifdef CONFIG_HOTPLUG_CPU + if (!cpu_online(cpu)) { + nmi_exit(); + return; + } +#endif + ++nmi_count(cpu); if (!nmi_callback(regs, cpu)) -- cgit v1.2.3 From 6fe940d6c300886de4ff1454d8ffd363172af433 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:53 -0700 Subject: [PATCH] sep initializing rework Make SEP init per-cpu, so it is hotplug safe. Signed-off-by: Li Shaohua Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 3 +++ arch/i386/kernel/smp.c | 10 ++++++++++ arch/i386/kernel/smpboot.c | 11 +++++++++++ arch/i386/kernel/sysenter.c | 12 +++++++----- 4 files changed, 31 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index b9954248d0aa..d58e169fbdbb 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -432,6 +432,9 @@ void __init identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE mcheck_init(c); #endif + if (c == &boot_cpu_data) + sysenter_setup(); + enable_sep_cpu(); } #ifdef CONFIG_X86_HT diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 35f521612b20..cec4bde67161 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -495,6 +495,16 @@ struct call_data_struct { int wait; }; +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + static struct call_data_struct * call_data; /* diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index ad74a46e9ef0..c5517f332309 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -449,7 +449,18 @@ static void __init start_secondary(void *unused) * the local TLBs too. */ local_flush_tlb(); + + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI receipients, and the time when the determination is made + * for which cpus receive the IPI. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + lock_ipi_call_lock(); cpu_set(smp_processor_id(), cpu_online_map); + unlock_ipi_call_lock(); /* We can take interrupts now: we're officially "up". */ local_irq_enable(); diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 960d8bd137d0..0bada1870bdf 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -21,11 +21,16 @@ extern asmlinkage void sysenter_entry(void); -void enable_sep_cpu(void *info) +void enable_sep_cpu(void) { int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + if (!boot_cpu_has(X86_FEATURE_SEP)) { + put_cpu(); + return; + } + tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); @@ -41,7 +46,7 @@ void enable_sep_cpu(void *info) extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; -static int __init sysenter_setup(void) +int __init sysenter_setup(void) { void *page = (void *)get_zeroed_page(GFP_ATOMIC); @@ -58,8 +63,5 @@ static int __init sysenter_setup(void) &vsyscall_sysenter_start, &vsyscall_sysenter_end - &vsyscall_sysenter_start); - on_each_cpu(enable_sep_cpu, NULL, 1, 1); return 0; } - -__initcall(sysenter_setup); -- cgit v1.2.3 From d720803a9365d360b3e5ea02033f0c11b5b1226a Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:54 -0700 Subject: [PATCH] sibling map initializing rework Make sibling map init per-cpu. Hotplug CPU may change the map at runtime. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 97 ++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 47 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index c5517f332309..09b4ceb832b2 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -66,11 +66,21 @@ int smp_num_siblings = 1; #ifdef CONFIG_X86_HT EXPORT_SYMBOL(smp_num_siblings); #endif -int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ + +/* Package ID of each logical CPU */ +int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); -int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ + +/* Core ID of each logical CPU */ +int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); +cpumask_t cpu_sibling_map[NR_CPUS]; +EXPORT_SYMBOL(cpu_sibling_map); + +cpumask_t cpu_core_map[NR_CPUS]; +EXPORT_SYMBOL(cpu_core_map); + /* bitmap of online cpus */ cpumask_t cpu_online_map; EXPORT_SYMBOL(cpu_online_map); @@ -423,6 +433,38 @@ static void __init smp_callin(void) static int cpucount; +static inline void +set_cpu_sibling_map(int cpu) +{ + int i; + + if (smp_num_siblings > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (cpu_core_id[cpu] == cpu_core_id[i]) { + cpu_set(i, cpu_sibling_map[cpu]); + cpu_set(cpu, cpu_sibling_map[i]); + } + } + } else { + cpu_set(cpu, cpu_sibling_map[cpu]); + } + + if (current_cpu_data.x86_num_cores > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + } + } + } else { + cpu_core_map[cpu] = cpu_sibling_map[cpu]; + } +} + /* * Activate a secondary processor. */ @@ -450,6 +492,10 @@ static void __init start_secondary(void *unused) */ local_flush_tlb(); + /* This must be done before setting cpu_online_map */ + set_cpu_sibling_map(raw_smp_processor_id()); + wmb(); + /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of @@ -912,13 +958,6 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -#ifdef CONFIG_X86_HT -EXPORT_SYMBOL(cpu_sibling_map); -#endif -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_core_map); - static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; @@ -1082,44 +1121,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpus_clear(cpu_core_map[cpu]); } - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct cpuinfo_x86 *c = cpu_data + cpu; - int siblings = 0; - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; - - if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) { - printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); - smp_num_siblings = siblings; - } - - if (c->x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - } - } - } else { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - } - } + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); smpboot_setup_io_apic(); -- cgit v1.2.3 From 0bb3184df537002a742bafddf3f4fb482b7fe610 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:55 -0700 Subject: [PATCH] init call cleanup Trival patch for CPU hotplug. In CPU identify part, only did cleaup for intel CPUs. Need do for other CPUs if they support S3 SMP. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 14 +++++++------- arch/i386/kernel/cpu/common.c | 30 +++++++++++++++--------------- arch/i386/kernel/cpu/intel.c | 12 ++++++------ arch/i386/kernel/cpu/intel_cacheinfo.c | 4 ++-- arch/i386/kernel/cpu/mcheck/mce.c | 2 +- arch/i386/kernel/cpu/mcheck/p5.c | 2 +- arch/i386/kernel/process.c | 2 +- arch/i386/kernel/setup.c | 2 +- arch/i386/kernel/smpboot.c | 18 +++++++++--------- arch/i386/kernel/timers/timer_tsc.c | 2 +- 10 files changed, 44 insertions(+), 44 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index a28a088f3e75..b905d7bb9a0d 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -364,7 +364,7 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __devinit setup_local_APIC(void) { unsigned long oldvalue, value, ver, maxlvt; @@ -635,7 +635,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __init apic_pm_activate(void) +static void __devinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -856,7 +856,7 @@ fake_ioapic_page: * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. */ -static unsigned int __init get_8254_timer_count(void) +static unsigned int __devinit get_8254_timer_count(void) { extern spinlock_t i8253_lock; unsigned long flags; @@ -875,7 +875,7 @@ static unsigned int __init get_8254_timer_count(void) } /* next tick in 8254 can be caught by catching timer wraparound */ -static void __init wait_8254_wraparound(void) +static void __devinit wait_8254_wraparound(void) { unsigned int curr_count, prev_count; @@ -895,7 +895,7 @@ static void __init wait_8254_wraparound(void) * Default initialization for 8254 timers. If we use other timers like HPET, * we override this later */ -void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound; +void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; /* * This function sets up the local APIC timer, with a timeout of @@ -931,7 +931,7 @@ static void __setup_APIC_LVTT(unsigned int clocks) apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -static void __init setup_APIC_timer(unsigned int clocks) +static void __devinit setup_APIC_timer(unsigned int clocks) { unsigned long flags; @@ -1044,7 +1044,7 @@ void __init setup_boot_APIC_clock(void) local_irq_enable(); } -void __init setup_secondary_APIC_clock(void) +void __devinit setup_secondary_APIC_clock(void) { setup_APIC_timer(calibration_result); } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d58e169fbdbb..aac74758caf4 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); -static int cachesize_override __initdata = -1; -static int disable_x86_fxsr __initdata = 0; -static int disable_x86_serial_nr __initdata = 1; +static int cachesize_override __devinitdata = -1; +static int disable_x86_fxsr __devinitdata = 0; +static int disable_x86_serial_nr __devinitdata = 1; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __init get_model_name(struct cpuinfo_x86 *c) +int __devinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c) } -void __init display_cacheinfo(struct cpuinfo_x86 *c) +void __devinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) +static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c) } -void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) +void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) +static int __devinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -249,7 +249,7 @@ static void __init early_cpu_detect(void) #endif } -void __init generic_identify(struct cpuinfo_x86 * c) +void __devinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int junk; @@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c) } } -static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __init identify_cpu(struct cpuinfo_x86 *c) +void __devinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -438,7 +438,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_HT -void __init detect_ht(struct cpuinfo_x86 *c) +void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, tmp; @@ -493,7 +493,7 @@ void __init detect_ht(struct cpuinfo_x86 *c) } #endif -void __init print_cpu_info(struct cpuinfo_x86 *c) +void __devinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -516,7 +516,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -563,7 +563,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __init cpu_init (void) +void __devinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 121aa2176e69..96a75d045835 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -28,7 +28,7 @@ extern int trap_init_f00f_bug(void); struct movsl_mask movsl_mask; #endif -void __init early_intel_workaround(struct cpuinfo_x86 *c) +void __devinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __init ppro_with_ram_bug(void) +int __devinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __init num_cpu_cores(struct cpuinfo_x86 *c) +static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax; @@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __init init_intel(struct cpuinfo_x86 *c) +static void __devinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __initdata = { +static struct cpu_dev intel_cpu_dev __devinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index a710dc4eb20e..1d768b263269 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -28,7 +28,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __initdata = +static struct _cache_table cache_table[] __devinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void) return retval; } -unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c) +unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index bf6d1aefafc0..7218a7341fbc 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_ void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; /* This has to be run for each processor */ -void __init mcheck_init(struct cpuinfo_x86 *c) +void __devinit mcheck_init(struct cpuinfo_x86 *c) { if (mce_disabled==1) return; diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c index c45a1b485c80..ec0614cd2925 100644 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ b/arch/i386/kernel/cpu/mcheck/p5.c @@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting for processors with Intel style MCE */ -void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c1b11e8df60b..e06f2dc7123d 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -260,7 +260,7 @@ static void mwait_idle(void) } } -void __init select_idle_routine(const struct cpuinfo_x86 *c) +void __devinit select_idle_routine(const struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_MWAIT)) { printk("monitor/mwait feature present.\n"); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 30406fd0b64c..cba67e4ba0af 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -60,7 +60,7 @@ address, and must not be in the .bss segment! */ unsigned long init_pg_tables_end __initdata = ~0UL; -int disable_pse __initdata = 0; +int disable_pse __devinitdata = 0; /* * Machine setup.. diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 09b4ceb832b2..fb0b200d1d85 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -59,7 +59,7 @@ #include /* Set if we find a B stepping CPU */ -static int __initdata smp_b_stepping; +static int __devinitdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -118,7 +118,7 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 }; * has made sure it's suitably aligned. */ -static unsigned long __init setup_trampoline(void) +static unsigned long __devinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(trampoline_base); @@ -148,7 +148,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __init smp_store_cpu_info(int id) +static void __devinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -342,7 +342,7 @@ extern void calibrate_delay(void); static atomic_t init_deasserted; -static void __init smp_callin(void) +static void __devinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; @@ -468,7 +468,7 @@ set_cpu_sibling_map(int cpu) /* * Activate a secondary processor. */ -static void __init start_secondary(void *unused) +static void __devinit start_secondary(void *unused) { /* * Dont put anything before smp_callin(), SMP @@ -521,7 +521,7 @@ static void __init start_secondary(void *unused) * from the task structure * This function must not return. */ -void __init initialize_secondary(void) +void __devinit initialize_secondary(void) { /* * We don't actually need to load the full TSS, @@ -635,7 +635,7 @@ static inline void __inquire_remote_apic(int apicid) * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -static int __init +static int __devinit wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; @@ -681,7 +681,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) #endif /* WAKE_SECONDARY_VIA_NMI */ #ifdef WAKE_SECONDARY_VIA_INIT -static int __init +static int __devinit wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; @@ -817,7 +817,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) extern cpumask_t cpu_initialized; -static int __init do_boot_cpu(int apicid) +static int __devinit do_boot_cpu(int apicid) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 54c36b182021..f46e625bab67 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -33,7 +33,7 @@ static struct timer_opts timer_tsc; static inline void cpufreq_delayed_get(void); -int tsc_disable __initdata = 0; +int tsc_disable __devinitdata = 0; extern spinlock_t i8253_lock; -- cgit v1.2.3 From e1367daf3eed5cd619ee88c9907e1e6ddaa58406 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:56 -0700 Subject: [PATCH] cpu state clean after hot remove Clean CPU states in order to reuse smp boot code for CPU hotplug. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 12 +++ arch/i386/kernel/irq.c | 5 ++ arch/i386/kernel/process.c | 20 +++-- arch/i386/kernel/smpboot.c | 175 ++++++++++++++++++++++++++++++++++-------- 4 files changed, 170 insertions(+), 42 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index aac74758caf4..2203a9d20212 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -651,3 +651,15 @@ void __devinit cpu_init(void) clear_used_math(); mxcsr_feature_mask_init(); } + +#ifdef CONFIG_HOTPLUG_CPU +void __devinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index af115004aec5..ce66dcc26d90 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -156,6 +156,11 @@ void irq_ctx_init(int cpu) cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); } +void irq_ctx_exit(int cpu) +{ + hardirq_ctx[cpu] = NULL; +} + extern asmlinkage void __do_softirq(void); asmlinkage void do_softirq(void) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index e06f2dc7123d..5f8cfa6b7940 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -152,21 +152,19 @@ static void poll_idle (void) /* We don't actually take CPU down, just spin without interrupts. */ static inline void play_dead(void) { + /* This must be done before dead CPU ack */ + cpu_exit_clear(); + wbinvd(); + mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; - /* We shouldn't have to disable interrupts while dead, but - * some interrupts just don't seem to go away, and this makes - * it "work" for testing purposes. */ - /* Death loop */ - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) - cpu_relax(); - + /* + * With physical CPU hotplug, we should halt the cpu + */ local_irq_disable(); - __flush_tlb_all(); - cpu_set(smp_processor_id(), cpu_online_map); - enable_APIC_timer(); - local_irq_enable(); + while (1) + __asm__ __volatile__("hlt":::"memory"); } #else static inline void play_dead(void) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index fb0b200d1d85..d66bf489a2e9 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -90,6 +90,12 @@ cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); static cpumask_t smp_commenced_mask; +/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there + * is no way to resync one AP against BP. TBD: for prescott and above, we + * should use IA64's algorithm + */ +static int __devinitdata tsc_sync_disabled; + /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); @@ -427,7 +433,7 @@ static void __devinit smp_callin(void) /* * Synchronize the TSC with the BP */ - if (cpu_has_tsc && cpu_khz) + if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) synchronize_tsc_ap(); } @@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused) lock_ipi_call_lock(); cpu_set(smp_processor_id(), cpu_online_map); unlock_ipi_call_lock(); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) #endif /* WAKE_SECONDARY_VIA_INIT */ extern cpumask_t cpu_initialized; +static inline int alloc_cpu_id(void) +{ + cpumask_t tmp_map; + int cpu; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + if (cpu >= NR_CPUS) + return -ENODEV; + return cpu; +} + +#ifdef CONFIG_HOTPLUG_CPU +static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS]; +static inline struct task_struct * alloc_idle_task(int cpu) +{ + struct task_struct *idle; + + if ((idle = cpu_idle_tasks[cpu]) != NULL) { + /* initialize thread_struct. we really want to avoid destroy + * idle tread + */ + idle->thread.esp = (unsigned long)(((struct pt_regs *) + (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1); + init_idle(idle, cpu); + return idle; + } + idle = fork_idle(cpu); + + if (!IS_ERR(idle)) + cpu_idle_tasks[cpu] = idle; + return idle; +} +#else +#define alloc_idle_task(cpu) fork_idle(cpu) +#endif -static int __devinit do_boot_cpu(int apicid) +static int __devinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid) { struct task_struct *idle; unsigned long boot_error; - int timeout, cpu; + int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - cpu = ++cpucount; + ++cpucount; + /* * We can't use kernel_thread since we must avoid to * reschedule the child. */ - idle = fork_idle(cpu); + idle = alloc_idle_task(cpu); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); idle->thread.eip = (unsigned long) start_secondary; @@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid) inquire_remote_apic(apicid); } } - x86_cpu_to_apicid[cpu] = apicid; + if (boot_error) { /* Try to put things back the way they were before ... */ unmap_cpu_to_logical_apicid(cpu); cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpucount--; + } else { + x86_cpu_to_apicid[cpu] = apicid; + cpu_set(cpu, cpu_present_map); } /* mark "stuck" area as not stuck */ @@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid) return boot_error; } +#ifdef CONFIG_HOTPLUG_CPU +void cpu_exit_clear(void) +{ + int cpu = raw_smp_processor_id(); + + idle_task_exit(); + + cpucount --; + cpu_uninit(); + irq_ctx_exit(cpu); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + cpu_clear(cpu, cpu_present_map); + + cpu_clear(cpu, smp_commenced_mask); + unmap_cpu_to_logical_apicid(cpu); +} + +struct warm_boot_cpu_info { + struct completion *complete; + int apicid; + int cpu; +}; + +static void __devinit do_warm_boot_cpu(void *p) +{ + struct warm_boot_cpu_info *info = p; + do_boot_cpu(info->apicid, info->cpu); + complete(info->complete); +} + +int __devinit smp_prepare_cpu(int cpu) +{ + DECLARE_COMPLETION(done); + struct warm_boot_cpu_info info; + struct work_struct task; + int apicid, ret; + + lock_cpu_hotplug(); + apicid = x86_cpu_to_apicid[cpu]; + if (apicid == BAD_APICID) { + ret = -ENODEV; + goto exit; + } + + info.complete = &done; + info.apicid = apicid; + info.cpu = cpu; + INIT_WORK(&task, do_warm_boot_cpu, &info); + + tsc_sync_disabled = 1; + + /* init low mem mapping */ + memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); + flush_tlb_all(); + schedule_work(&task); + wait_for_completion(&done); + + tsc_sync_disabled = 0; + zap_low_mappings(); + ret = 0; +exit: + unlock_cpu_hotplug(); + return ret; +} +#endif + static void smp_tune_scheduling (void) { unsigned long cachesize; /* kB */ @@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) if (max_cpus <= cpucount+1) continue; - if (do_boot_cpu(apicid)) + if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) printk("CPU #%d not responding - cannot use it.\n", apicid); else @@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); + cpu_set(smp_processor_id(), cpu_present_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } #ifdef CONFIG_HOTPLUG_CPU - -/* must be called with the cpucontrol mutex held */ -static int __devinit cpu_enable(unsigned int cpu) +static void +remove_siblinginfo(int cpu) { - /* get the target out of its holding state */ - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - wmb(); - - /* wait for the processor to ack it. timeout? */ - while (!cpu_online(cpu)) - cpu_relax(); - - fixup_irqs(cpu_online_map); - /* counter the disable in fixup_irqs() */ - local_irq_enable(); - return 0; + int sibling; + + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[sibling]); + for_each_cpu_mask(sibling, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[sibling]); + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + phys_proc_id[cpu] = BAD_APICID; + cpu_core_id[cpu] = BAD_APICID; } int __cpu_disable(void) @@ -1193,6 +1307,8 @@ int __cpu_disable(void) mdelay(1); local_irq_disable(); + remove_siblinginfo(cpu); + cpu_clear(cpu, map); fixup_irqs(map); /* It's now safe to remove this processor from the online map */ @@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu) for (i = 0; i < 10; i++) { /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + printk ("CPU %d is now offline\n", cpu); return; + } current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ/10); } @@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu) return -EIO; } -#ifdef CONFIG_HOTPLUG_CPU - /* Already up, and in cpu_quiescent now? */ - if (cpu_isset(cpu, smp_commenced_mask)) { - cpu_enable(cpu); - return 0; - } -#endif - local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) @@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif zap_low_mappings(); +#ifndef CONFIG_HOTPLUG_CPU /* * Disable executability of the SMP trampoline: */ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); +#endif } void __init smp_intr_init(void) -- cgit v1.2.3 From 5a72e04df5470df0ec646029d31e5528167ab1a7 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:55:06 -0700 Subject: [PATCH] suspend/resume SMP support Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use disable/enable_nonboot_cpus API. The S4 part is based on Pavel's original S4 SMP patch. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/mcheck/k7.c | 2 +- arch/i386/kernel/cpu/mcheck/mce.c | 2 +- arch/i386/kernel/cpu/mcheck/p4.c | 4 ++-- arch/i386/kernel/cpu/mcheck/p6.c | 2 +- arch/i386/kernel/cpu/mcheck/winchip.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index 8df52e86c4d2..c4abe7657397 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c @@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) /* AMD K7 machine check is Intel like */ -void __init amd_mcheck_init(struct cpuinfo_x86 *c) +void __devinit amd_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 7218a7341fbc..2cf25d2ba0f1 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -16,7 +16,7 @@ #include "mce.h" -int mce_disabled __initdata = 0; +int mce_disabled __devinitdata = 0; int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 8b16ceb929b4..0abccb6fdf9e 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) } /* P4/Xeon Thermal regulation detect and init */ -static void __init intel_init_thermal(struct cpuinfo_x86 *c) +static void __devinit intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; unsigned int cpu = smp_processor_id(); @@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } -void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index 46640f8c2494..f01b73f947e1 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } /* Set up machine check reporting for processors with Intel style MCE */ -void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c index 753fa7acb984..7bae68fa168f 100644 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ b/arch/i386/kernel/cpu/mcheck/winchip.c @@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting on the Winchip C6 series */ -void __init winchip_mcheck_init(struct cpuinfo_x86 *c) +void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; machine_check_vector = winchip_machine_check; -- cgit v1.2.3 From 9635b47d910223745258768418003580ef7dba17 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 25 Jun 2005 14:57:41 -0700 Subject: [PATCH] kexec: x86: local apic fix From: "Maciej W. Rozycki" Fix a kexec problem whcih causes local APIC detection failure. The problem is detect_init_APIC() is called early, before the command line have been processed. Therefore "lapic" (and "nolapic") have not been seen, yet. Signed-off-by: Maciej W. Rozycki Signed-off-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 25 +++++-------------------- arch/i386/kernel/setup.c | 11 +++++++++++ 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index b905d7bb9a0d..cf45bed96d08 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -40,6 +40,11 @@ #include "io_ports.h" +/* + * Knob to control our willingness to enable the local APIC. + */ +int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + /* * Debug level */ @@ -666,26 +671,6 @@ static void apic_pm_activate(void) { } * Original code written by Keir Fraser. */ -/* - * Knob to control our willingness to enable the local APIC. - */ -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ - -static int __init lapic_disable(char *str) -{ - enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return 0; -} -__setup("nolapic", lapic_disable); - -static int __init lapic_enable(char *str) -{ - enable_local_apic = 1; - return 0; -} -__setup("lapic", lapic_enable); - static int __init apic_set_verbosity(char *str) { if (strcmp("debug", str) == 0) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index cba67e4ba0af..f1ad9fdeaad8 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -44,6 +44,7 @@ #include #include #include