From 9a46d7e5b63903a70cd96c2c1391a7a26a8dbec9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Feb 2008 09:30:32 +0100 Subject: x86: ioremap, remove WARN_ON() Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ac3c959e271d..8fe576baa148 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,8 +134,6 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, return NULL; } - WARN_ON_ONCE(page_is_ram(pfn)); - switch (mode) { case IOR_MODE_UNCACHED: default: -- cgit v1.2.3 From 40f0933d51f4cba26a5c009a26bb230f4514c1b6 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 28 Feb 2008 19:57:07 -0800 Subject: x86: ia32 syscall restart fix The code to restart syscalls after signals depends on checking for a negative orig_ax, and for particular negative -ERESTART* values in ax. These fields are 64 bits and for a 32-bit task they get zero-extended. The syscall restart behavior is lost, a regression from a native 32-bit kernel and from 64-bit tasks' behavior. This patch fixes the problem by doing sign-extension where it matters. For orig_ax, the only time the value should be -1 but winds up as 0x0ffffffff is via a 32-bit ptrace call. So the patch changes ptrace to sign-extend the 32-bit orig_eax value when it's stored; it doesn't change the checks on orig_ax, though it uses the new current_syscall() inline to better document the subtle importance of the used of signedness there. The ax value is stored a lot of ways and it seems hard to get them all sign-extended at their origins. So for that, we use the current_syscall_ret() to sign-extend it only for 32-bit tasks at the time of the -ERESTART* comparisons. Signed-off-by: Roland McGrath Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 9 ++++++++- arch/x86/kernel/signal_64.c | 38 +++++++++++++++++++++++++++++++++----- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 8f64abe699fd..d5904eef1d31 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1055,10 +1055,17 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) R32(esi, si); R32(ebp, bp); R32(eax, ax); - R32(orig_eax, orig_ax); R32(eip, ip); R32(esp, sp); + case offsetof(struct user32, regs.orig_eax): + /* + * Sign-extend the value so that orig_eax = -1 + * causes (long)orig_ax < 0 tests to fire correctly. + */ + regs->orig_ax = (long) (s32) value; + break; + case offsetof(struct user32, regs.eflags): return set_flags(child, value); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 56b72fb67f9b..1c83e5124c65 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -310,6 +310,35 @@ give_sigsegv: return -EFAULT; } +/* + * Return -1L or the syscall number that @regs is executing. + */ +static long current_syscall(struct pt_regs *regs) +{ + /* + * We always sign-extend a -1 value being set here, + * so this is always either -1L or a syscall number. + */ + return regs->orig_ax; +} + +/* + * Return a value that is -EFOO if the system call in @regs->orig_ax + * returned an error. This only works for @regs from @current. + */ +static long current_syscall_ret(struct pt_regs *regs) +{ +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + return (int) regs->ax; +#endif + return regs->ax; +} + /* * OK, we're invoking a handler */ @@ -327,9 +356,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, #endif /* Are we from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (current_syscall(regs) >= 0) { /* If so, check system call restarting.. */ - switch (regs->ax) { + switch (current_syscall_ret(regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -426,10 +455,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if ((long)regs->orig_ax >= 0) { + if (current_syscall(regs) >= 0) { /* Restart the system call - no handlers present */ - long res = regs->ax; - switch (res) { + switch (current_syscall_ret(regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: -- cgit v1.2.3 From 985a34bd75cc8c96e43f00dcdda7c3fdb51a3026 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 9 Mar 2008 13:14:37 +0100 Subject: x86: remove quicklists quicklists cause a serious memory leak on 32-bit x86, as documented at: http://bugzilla.kernel.org/show_bug.cgi?id=9991 the reason is that the quicklist pool is a special-purpose cache that grows out of proportion. It is not accounted for anywhere and users have no way to even realize that it's the quicklists that are causing RAM usage spikes. It was supposed to be a relatively small pool, but as demonstrated by KOSAKI Motohiro, they can grow as large as: Quicklists: 1194304 kB given how much trouble this code has caused historically, and given that Andrew objected to its introduction on x86 (years ago), the best option at this point is to remove them. [ any performance benefits of caching constructed pgds should be implemented in a more generic way (possibly within the page allocator), while still allowing constructed pages to be allocated by other workloads. ] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 --- arch/x86/mm/pgtable_32.c | 18 +++++++++--------- include/asm-x86/pgtable_32.h | 5 ++--- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f41c9538ca30..237fc128143d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -66,9 +66,6 @@ config MMU config ZONE_DMA def_bool y -config QUICKLIST - def_bool X86_32 - config SBUS bool diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 73aba7125203..2f9e9afcb9f4 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -342,12 +342,16 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - mm->pgd = pgd; /* so that alloc_pd can use it */ + /* so that alloc_pd can use it */ + mm->pgd = pgd; + if (pgd) + pgd_ctor(pgd); if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { - quicklist_free(0, pgd_dtor, pgd); + pgd_dtor(pgd); + free_page((unsigned long)pgd); pgd = NULL; } @@ -357,12 +361,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { pgd_mop_up_pmds(mm, pgd); - quicklist_free(0, pgd_dtor, pgd); -} - -void check_pgt_cache(void) -{ - quicklist_trim(0, pgd_dtor, 25, 16); + pgd_dtor(pgd); + free_page((unsigned long)pgd); } void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index a842c7222b1e..4e6a0fca0b47 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -26,10 +26,9 @@ struct mm_struct; struct vm_area_struct; extern pgd_t swapper_pg_dir[1024]; -extern struct kmem_cache *pmd_cache; -void check_pgt_cache(void); -static inline void pgtable_cache_init(void) {} +static inline void pgtable_cache_init(void) { } +static inline void check_pgt_cache(void) { } void paging_init(void); -- cgit v1.2.3