From 56b9aea3b740be7665be100872a913da9bdc653b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 2 Dec 2010 01:19:32 -0500 Subject: intel_idle: recognize ARAT on WSM-EX We erroneously ignored the Always Running APIC Timer on WSM-EX. Move the check for ARAT down so that it can apply to any/all models. Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 41665d2f9f93..c131d58bcb50 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -273,8 +273,6 @@ static int intel_idle_probe(void) pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); - if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ - lapic_timer_reliable_states = 0xFFFFFFFF; if (boot_cpu_data.x86 != 6) /* family 6 */ return -ENODEV; @@ -286,8 +284,6 @@ static int intel_idle_probe(void) case 0x1F: /* Core i7 and i5 Processor - Nehalem */ case 0x2E: /* Nehalem-EX Xeon */ case 0x2F: /* Westmere-EX Xeon */ - lapic_timer_reliable_states = (1 << 1); /* C1 */ - case 0x25: /* Westmere */ case 0x2C: /* Westmere */ cpuidle_state_table = nehalem_cstates; @@ -295,7 +291,6 @@ static int intel_idle_probe(void) case 0x1C: /* 28 - Atom Processor */ case 0x26: /* 38 - Lincroft Atom Processor */ - lapic_timer_reliable_states = (1 << 1); /* C1 */ cpuidle_state_table = atom_cstates; break; @@ -303,10 +298,6 @@ static int intel_idle_probe(void) case 0x2D: /* SNB Xeon */ cpuidle_state_table = snb_cstates; break; -#ifdef FUTURE_USE - case 0x17: /* 23 - Core 2 Duo */ - lapic_timer_reliable_states = (1 << 2) | (1 << 1); /* C2, C1 */ -#endif default: pr_debug(PREFIX "does not run on family %d model %d\n", @@ -314,6 +305,9 @@ static int intel_idle_probe(void) return -ENODEV; } + if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ + lapic_timer_reliable_states = 0xFFFFFFFF; + pr_debug(PREFIX "v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); -- cgit v1.2.3 From 61a0d49c33c7fd57c14895e5b0760bd02b65ac1f Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 3 Jan 2011 17:50:43 +0100 Subject: perf: Do not export power_frequency, but power_start event power_frequency moved to drivers/cpufreq/cpufreq.c which has to be compiled in, no need to export it. intel_idle can a be module though... Signed-off-by: Thomas Renninger Signed-off-by: Ingo Molnar Acked-by: Jean Pihet Cc: Jean Pihet Cc: Arjan van de Ven Cc: rjw@sisk.pl LKML-Reference: <1294073445-14812-2-git-send-email-trenn@suse.de> Signed-off-by: Ingo Molnar LKML-Reference: <1290072314-31155-2-git-send-email-trenn@suse.de> --- drivers/idle/intel_idle.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c131d58bcb50..15783d5501a8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -220,9 +220,7 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) kt_before = ktime_get_real(); stop_critical_timings(); -#ifndef MODULE trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); -#endif if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); -- cgit v1.2.3 From 25e41933b58777f2d020c3b0186b430ea004ec28 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 3 Jan 2011 17:50:44 +0100 Subject: perf: Clean up power events by introducing new, more generic ones Add these new power trace events: power:cpu_idle power:cpu_frequency power:machine_suspend The old C-state/idle accounting events: power:power_start power:power_end Have now a replacement (but we are still keeping the old tracepoints for compatibility): power:cpu_idle and power:power_frequency is replaced with: power:cpu_frequency power:machine_suspend is newly introduced. Jean Pihet has a patch integrated into the generic layer (kernel/power/suspend.c) which will make use of it. the type= field got removed from both, it was never used and the type is differed by the event type itself. perf timechart userspace tool gets adjusted in a separate patch. Signed-off-by: Thomas Renninger Signed-off-by: Ingo Molnar Acked-by: Arjan van de Ven Acked-by: Jean Pihet Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Linus Torvalds Cc: rjw@sisk.pl LKML-Reference: <1294073445-14812-3-git-send-email-trenn@suse.de> Signed-off-by: Ingo Molnar LKML-Reference: <1290072314-31155-2-git-send-email-trenn@suse.de> --- drivers/idle/intel_idle.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 15783d5501a8..56ac09d6c930 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -221,6 +221,7 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) stop_critical_timings(); trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); + trace_cpu_idle((eax >> 4) + 1, cpu); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); -- cgit v1.2.3 From ddbd550d503c9cdefcd6674a0ef168d57d3f0917 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 13 Dec 2010 18:28:22 -0500 Subject: intel_idle: update Sandy Bridge core C-state residency targets Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c131d58bcb50..94a652625ae2 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -122,7 +122,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x00, .flags = CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, - .target_residency = 4, + .target_residency = 1, .enter = &intel_idle }, { /* MWAIT C2 */ .name = "SNB-C3", @@ -130,7 +130,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x10, .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, - .target_residency = 160, + .target_residency = 211, .enter = &intel_idle }, { /* MWAIT C3 */ .name = "SNB-C6", @@ -138,7 +138,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x20, .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, - .target_residency = 208, + .target_residency = 345, .enter = &intel_idle }, { /* MWAIT C4 */ .name = "SNB-C7", @@ -146,7 +146,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { .driver_data = (void *) 0x30, .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, - .target_residency = 300, + .target_residency = 345, .enter = &intel_idle }, }; -- cgit v1.2.3 From d18960494f65ca4fa0d67c865aaca99452070d15 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 3 Nov 2010 17:06:14 +0100 Subject: ACPI, intel_idle: Cleanup idle= internal variables Having four variables for the same thing: idle_halt, idle_nomwait, force_mwait and boot_option_idle_overrides is rather confusing and unnecessary complex. if idle= boot param is passed, only set up one variable: boot_option_idle_overrides Introduces following functional changes/fixes: - intel_idle driver does not register if any idle=xy boot param is passed. - processor_idle.c will also not register a cpuidle driver and get active if idle=halt is passed. Before a cpuidle driver with one (C1, halt) state got registered Now the default_idle function will be used which finally uses the same idle call to enter sleep state (safe_halt()), but without registering a whole cpuidle driver. That means idle= param will always avoid cpuidle drivers to register with one exception (same behavior as before): idle=nomwait may still register acpi_idle cpuidle driver, but C1 will not use mwait, but hlt. This can be a workaround for IO based deeper sleep states where C1 mwait causes problems. Signed-off-by: Thomas Renninger cc: x86@kernel.org Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 94a652625ae2..21d387132dbc 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -404,6 +404,10 @@ static int __init intel_idle_init(void) { int retval; + /* Do not load intel_idle at all for now if idle= is passed */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return -ENODEV; + retval = intel_idle_probe(); if (retval) return retval; -- cgit v1.2.3 From 956d033fb2eb3f8818260cdf01644bf4dc1a9e33 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:51:20 -0500 Subject: cpuidle: CPUIDLE_FLAG_TLB_FLUSHED is specific to intel_idle Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 21d387132dbc..8256309deaad 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -81,6 +81,14 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state); static struct cpuidle_state *cpuidle_state_table; +/* + * Set this flag for states where the HW flushes the TLB for us + * and so we don't need cross-calls to keep it consistent. + * If this flag is set, SW flushes the TLB, so even if the + * HW doesn't do the flushing, this flag is safe to use. + */ +#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. -- cgit v1.2.3 From 2a2d31c8dc6f1ebcf5eab1d93a0cb0fb4ed57c7c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 10 Jan 2011 09:38:12 +0800 Subject: intel_idle: open broadcast clock event Intel_idle driver uses CLOCK_EVT_NOTIFY_BROADCAST_ENTER CLOCK_EVT_NOTIFY_BROADCAST_EXIT for broadcast clock events. The _ENTER/_EXIT doesn't really open broadcast clock events, please see processor_idle.c for an example. In some situation, this will cause boot hang, because some CPUs enters idle but local APIC timer stalls. Reported-and-tested-by: Yan Zheng Signed-off-by: Shaohua Li cc: stable@kernel.org Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8256309deaad..fc393586cc70 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -59,6 +59,8 @@ #include /* ktime_get_real() */ #include #include +#include +#include #include #define INTEL_IDLE_VERSION "0.4" @@ -73,6 +75,7 @@ static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; static unsigned int mwait_substates; +#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ @@ -252,6 +255,39 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) return usec_delta; } +static void __setup_broadcast_timer(void *arg) +{ + unsigned long reason = (unsigned long)arg; + int cpu = smp_processor_id(); + + reason = reason ? + CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; + + clockevents_notify(reason, &cpu); +} + +static int __cpuinit setup_broadcast_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + + switch (action & 0xf) { + case CPU_ONLINE: + smp_call_function_single(hotcpu, __setup_broadcast_timer, + (void *)true, 1); + break; + case CPU_DOWN_PREPARE: + smp_call_function_single(hotcpu, __setup_broadcast_timer, + (void *)false, 1); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata setup_broadcast_notifier = { + .notifier_call = setup_broadcast_cpuhp_notify, +}; + /* * intel_idle_probe() */ @@ -314,7 +350,11 @@ static int intel_idle_probe(void) } if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ - lapic_timer_reliable_states = 0xFFFFFFFF; + lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; + else { + smp_call_function(__setup_broadcast_timer, (void *)true, 1); + register_cpu_notifier(&setup_broadcast_notifier); + } pr_debug(PREFIX "v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -441,6 +481,11 @@ static void __exit intel_idle_exit(void) intel_idle_cpuidle_devices_uninit(); cpuidle_unregister_driver(&intel_idle_driver); + if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) { + smp_call_function(__setup_broadcast_timer, (void *)false, 1); + unregister_cpu_notifier(&setup_broadcast_notifier); + } + return; } -- cgit v1.2.3 From f77cfe4ea21760268c0277fa3e4b02dfd2a2c2f4 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 7 Jan 2011 11:29:44 +0100 Subject: cpuidle/x86/perf: fix power:cpu_idle double end events and throw cpu_idle events from the cpuidle layer Currently intel_idle and acpi_idle driver show double cpu_idle "exit idle" events -> this patch fixes it and makes cpu_idle events throwing less complex. It also introduces cpu_idle events for all architectures which use the cpuidle subsystem, namely: - arch/arm/mach-at91/cpuidle.c - arch/arm/mach-davinci/cpuidle.c - arch/arm/mach-kirkwood/cpuidle.c - arch/arm/mach-omap2/cpuidle34xx.c - arch/drivers/acpi/processor_idle.c (for all cases, not only mwait) - arch/x86/kernel/process.c (did throw events before, but was a mess) - drivers/idle/intel_idle.c (did throw events before) Convention should be: Fire cpu_idle events inside the current pm_idle function (not somewhere down the the callee tree) to keep things easy. Current possible pm_idle functions in X86: c1e_idle, poll_idle, cpuidle_idle_call, mwait_idle, default_idle -> this is really easy is now. This affects userspace: The type field of the cpu_idle power event can now direclty get mapped to: /sys/devices/system/cpu/cpuX/cpuidle/stateX/{name,desc,usage,time,...} instead of throwing very CPU/mwait specific values. This change is not visible for the intel_idle driver. For the acpi_idle driver it should only be visible if the vendor misses out C-states in his BIOS. Another (perf timechart) patch reads out cpuidle info of cpu_idle events from: /sys/.../cpuidle/stateX/*, then the cpuidle events are mapped to the correct C-/cpuidle state again, even if e.g. vendors miss out C-states in their BIOS and for example only export C1 and C3. -> everything is fine. Signed-off-by: Thomas Renninger CC: Robert Schoene CC: Jean Pihet CC: Arjan van de Ven CC: Ingo Molnar CC: Frederic Weisbecker CC: linux-pm@lists.linux-foundation.org CC: linux-acpi@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: linux-perf-users@vger.kernel.org CC: linux-omap@vger.kernel.org Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 56ac09d6c930..60fa6ecdb41f 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -220,8 +220,6 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) kt_before = ktime_get_real(); stop_critical_timings(); - trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); - trace_cpu_idle((eax >> 4) + 1, cpu); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); -- cgit v1.2.3 From ec30f343d61391ab23705e50a525da1d55395780 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 24 Jan 2011 08:00:01 +0000 Subject: fix a shutdown regression in intel_idle Fix a shutdown regression caused by 2a2d31c8dc6f ("intel_idle: open broadcast clock event"). The clockevent framework can automatically shutdown broadcast timers for hotremove CPUs. And we get a shutdown regression when we shutdown broadcast timer for hot remove CPU, so just delete some code. Also fix some section mismatch. Reported-by: Ari Savolainen Signed-off-by: Shaohua Li Tested-by: Linus Torvalds Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/idle/intel_idle.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 7acb32e7f817..1fa091e05690 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -263,7 +263,7 @@ static void __setup_broadcast_timer(void *arg) clockevents_notify(reason, &cpu); } -static int __cpuinit setup_broadcast_cpuhp_notify(struct notifier_block *n, +static int setup_broadcast_cpuhp_notify(struct notifier_block *n, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; @@ -273,15 +273,11 @@ static int __cpuinit setup_broadcast_cpuhp_notify(struct notifier_block *n, smp_call_function_single(hotcpu, __setup_broadcast_timer, (void *)true, 1); break; - case CPU_DOWN_PREPARE: - smp_call_function_single(hotcpu, __setup_broadcast_timer, - (void *)false, 1); - break; } return NOTIFY_OK; } -static struct notifier_block __cpuinitdata setup_broadcast_notifier = { +static struct notifier_block setup_broadcast_notifier = { .notifier_call = setup_broadcast_cpuhp_notify, }; -- cgit v1.2.3