From e4707dd3e9d0cb57597b6568a5e51fea5d6fca41 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 12 Mar 2009 20:11:43 +0100 Subject: [ARM] 5422/1: ARM: MMU: add a Non-cacheable Normal executable memory type This patch adds a Non-cacheable Normal ARM executable memory type, MT_MEMORY_NONCACHED. On OMAP3, this is used for rapid dynamic voltage/frequency scaling in the VDD2 voltage domain. OMAP3's SDRAM controller (SDRC) is in the VDD2 voltage domain, and its clock frequency must change along with voltage. The SDRC clock change code cannot run from SDRAM itself, since SDRAM accesses are paused during the clock change. So the current implementation of the DVFS code executes from OMAP on-chip SRAM, aka "OCM RAM." If the OCM RAM pages are marked as Cacheable, the ARM cache controller will attempt to flush dirty cache lines to the SDRC, so it can fill those lines with OCM RAM instruction code. The problem is that the SDRC is paused during DVFS, and so any SDRAM access causes the ARM MPU subsystem to hang. TI's original solution to this problem was to mark the OCM RAM sections as Strongly Ordered memory, thus preventing caching. This is overkill: since the memory is marked as non-bufferable, OCM RAM writes become needlessly slow. The idea of "Strongly Ordered SRAM" is also conceptually disturbing. Previous LAKML list discussion is here: http://www.spinics.net/lists/arm-kernel/msg54312.html This memory type MT_MEMORY_NONCACHED is used for OCM RAM by a future patch. Cc: Richard Woodruff Signed-off-by: Paul Walmsley Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 9b36c5cb5e9f..aa424e1da8a1 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -243,6 +243,10 @@ static struct mem_type mem_types[] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_NONCACHED] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, }; const struct mem_type *get_mem_type(unsigned int type) @@ -406,9 +410,28 @@ static void __init build_mem_type_table(void) kern_pgprot |= L_PTE_SHARED; vecs_pgprot |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; #endif } + /* + * Non-cacheable Normal - intended for memory areas that must + * not cause dirty cache line writebacks when used + */ + if (cpu_arch >= CPU_ARCH_ARMv6) { + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* Non-cacheable Normal is XCB = 001 */ + mem_types[MT_MEMORY_NONCACHED].prot_sect |= + PMD_SECT_BUFFERED; + } else { + /* For both ARMv6 and non-TEX-remapping ARMv7 */ + mem_types[MT_MEMORY_NONCACHED].prot_sect |= + PMD_SECT_TEX(1); + } + } else { + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; + } + for (i = 0; i < 16; i++) { unsigned long v = pgprot_val(protection_map[i]); protection_map[i] = __pgprot(v | user_pgprot); -- cgit v1.2.3 From cb88214d726b337d49c1f65cbc5e5ac85837b11b Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Sun, 8 Feb 2009 02:00:50 +0100 Subject: [ARM] MX31/MX35: Add l2x0 cache support Signed-off-by: Sascha Hauer --- arch/arm/mm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d490f3773c01..0d8581f11211 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -704,7 +704,8 @@ config CACHE_FEROCEON_L2_WRITETHROUGH config CACHE_L2X0 bool "Enable the L2x0 outer cache controller" - depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || REALVIEW_EB_A9MP + depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \ + REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31 default y select OUTER_CACHE help -- cgit v1.2.3 From 5f0fbf9ecaf354fa4bbf266fffdea2ea3d14a0ed Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 16 Sep 2008 13:05:53 -0400 Subject: [ARM] fixmap support This is the minimum fixmap interface expected to be implemented by architectures supporting highmem. We have a second level page table already allocated and covering 0xfff00000-0xffffffff because the exception vector page is located at 0xffff0000, and various cache tricks already use some entries above 0xffff0000. Therefore the PTEs covering 0xfff00000-0xfffeffff are free to be used. However the XScale cache flushing code already uses virtual addresses between 0xfffe0000 and 0xfffeffff. So this reserves the 0xfff00000-0xfffdffff range for fixmap stuff. The Documentation/arm/memory.txt information is updated accordingly, including the information about the actual top of DMA memory mapping region which didn't match the code. Signed-off-by: Nicolas Pitre --- arch/arm/mm/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 95bbe112965e..c4f6f05198e0 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -1,7 +1,6 @@ -/* the upper-most page table pointer */ - #ifdef CONFIG_MMU +/* the upper-most page table pointer */ extern pmd_t *top_pmd; #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -- cgit v1.2.3 From d73cd42893f4cdc06e6829fea2347bb92cb789d1 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 15 Sep 2008 16:44:55 -0400 Subject: [ARM] kmap support The kmap virtual area borrows a 2MB range at the top of the 16MB area below PAGE_OFFSET currently reserved for kernel modules and/or the XIP kernel. This 2MB corresponds to the range covered by 2 consecutive second-level page tables, or a single pmd entry as seen by the Linux page table abstraction. Because XIP kernels are unlikely to be seen on systems needing highmem support, there shouldn't be any shortage of VM space for modules (14 MB for modules is still way more than twice the typical usage). Because the virtual mapping of highmem pages can go away at any moment after kunmap() is called on them, we need to bypass the delayed cache flushing provided by flush_dcache_page() in that case. The atomic kmap versions are based on fixmaps, and __cpuc_flush_dcache_page() is used directly in that case. Signed-off-by: Nicolas Pitre --- arch/arm/mm/Makefile | 1 + arch/arm/mm/flush.c | 2 +- arch/arm/mm/highmem.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++ arch/arm/mm/mmu.c | 13 ++++++ 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 arch/arm/mm/highmem.c (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 480f78a3611a..185e7dc7dcf2 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 0fa9bf388f0b..4e283481cee1 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -192,7 +192,7 @@ void flush_dcache_page(struct page *page) struct address_space *mapping = page_mapping(page); #ifndef CONFIG_SMP - if (mapping && !mapping_mapped(mapping)) + if (!PageHighMem(page) && mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else #endif diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c new file mode 100644 index 000000000000..a34954d9df7d --- /dev/null +++ b/arch/arm/mm/highmem.c @@ -0,0 +1,116 @@ +/* + * arch/arm/mm/highmem.c -- ARM highmem support + * + * Author: Nicolas Pitre + * Created: september 8, 2008 + * Copyright: Marvell Semiconductors Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include "mm.h" + +void *kmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + return kmap_high(page); +} +EXPORT_SYMBOL(kmap); + +void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} +EXPORT_SYMBOL(kunmap); + +void *kmap_atomic(struct page *page, enum km_type type) +{ + unsigned int idx; + unsigned long vaddr; + + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * With debugging enabled, kunmap_atomic forces that entry to 0. + * Make sure it was indeed properly unmapped. + */ + BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); +#endif + set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0); + /* + * When debugging is off, kunmap_atomic leaves the previous mapping + * in place, so this TLB flush ensures the TLB is updated with the + * new mapping. + */ + local_flush_tlb_kernel_page(vaddr); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void kunmap_atomic(void *kvaddr, enum km_type type) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + unsigned int idx = type + KM_TYPE_NR * smp_processor_id(); + + if (kvaddr >= (void *)FIXADDR_START) { + __cpuc_flush_dcache_page((void *)vaddr); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); + local_flush_tlb_kernel_page(vaddr); +#else + (void) idx; /* to kill a warning */ +#endif + } + pagefault_enable(); +} +EXPORT_SYMBOL(kunmap_atomic); + +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +{ + unsigned int idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); +#endif + set_pte_ext(TOP_PTE(vaddr), pfn_pte(pfn, kmap_prot), 0); + local_flush_tlb_kernel_page(vaddr); + + return (void *)vaddr; +} + +struct page *kmap_atomic_to_page(const void *ptr) +{ + unsigned long vaddr = (unsigned long)ptr; + pte_t *pte; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + pte = TOP_PTE(vaddr); + return pte_page(*pte); +} diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index d4d082c5c2d4..4810a4c9ffce 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -895,6 +896,17 @@ static void __init devicemaps_init(struct machine_desc *mdesc) flush_cache_all(); } +static void __init kmap_init(void) +{ +#ifdef CONFIG_HIGHMEM + pmd_t *pmd = pmd_off_k(PKMAP_BASE); + pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t)); + BUG_ON(!pmd_none(*pmd) || !pte); + __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE); + pkmap_page_table = pte + PTRS_PER_PTE; +#endif +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. @@ -908,6 +920,7 @@ void __init paging_init(struct machine_desc *mdesc) prepare_page_table(); bootmem_init(); devicemaps_init(mdesc); + kmap_init(); top_pmd = pmd_off_k(0xffff0000); -- cgit v1.2.3 From 3835f6cb645bdb9a58aa6e062fe1d5777f1a9748 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 17 Sep 2008 15:21:55 -0400 Subject: [ARM] mem_init(): make highmem pages available for use Signed-off-by: Nicolas Pitre --- arch/arm/mm/init.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 80fd3b69ae1f..8277802ec859 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -485,7 +486,7 @@ void __init mem_init(void) int i, node; #ifndef CONFIG_DISCONTIGMEM - max_mapnr = virt_to_page(high_memory) - mem_map; + max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #endif /* this will put all unused low memory onto the freelists */ @@ -504,6 +505,19 @@ void __init mem_init(void) __phys_to_pfn(__pa(swapper_pg_dir)), NULL); #endif +#ifdef CONFIG_HIGHMEM + /* set highmem page free */ + for_each_online_node(node) { + for_each_nodebank (i, &meminfo, node) { + unsigned long start = bank_pfn_start(&meminfo.bank[i]); + unsigned long end = bank_pfn_end(&meminfo.bank[i]); + if (start >= max_low_pfn + PHYS_PFN_OFFSET) + totalhigh_pages += free_area(start, end, NULL); + } + } + totalram_pages += totalhigh_pages; +#endif + /* * Since our memory may not be contiguous, calculate the * real number of pages we have in this system @@ -521,9 +535,10 @@ void __init mem_init(void) initsize = __init_end - __init_begin; printk(KERN_NOTICE "Memory: %luKB available (%dK code, " - "%dK data, %dK init)\n", + "%dK data, %dK init, %luK highmem)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - codesize >> 10, datasize >> 10, initsize >> 10); + codesize >> 10, datasize >> 10, initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); if (PAGE_SIZE >= 16384 && num_physpages <= 128) { extern int sysctl_overcommit_memory; -- cgit v1.2.3 From 43377453af83b8ff8c1c731da1508bd6b84ebfea Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 12 Mar 2009 22:52:09 -0400 Subject: [ARM] introduce dma_cache_maint_page() This is a helper to be used by the DMA mapping API to handle cache maintenance for memory identified by a page structure instead of a virtual address. Those pages may or may not be highmem pages, and when they're highmem pages, they may or may not be virtually mapped. When they're not mapped then there is no L1 cache to worry about. But even in that case the L2 cache must be processed since unmapped highmem pages can still be L2 cached. Signed-off-by: Nicolas Pitre --- arch/arm/mm/dma-mapping.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f1ef5613ccd4..510c179b0ac8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -517,6 +518,74 @@ void dma_cache_maint(const void *start, size_t size, int direction) } EXPORT_SYMBOL(dma_cache_maint); +static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, + size_t size, int direction) +{ + void *vaddr; + unsigned long paddr; + void (*inner_op)(const void *, const void *); + void (*outer_op)(unsigned long, unsigned long); + + switch (direction) { + case DMA_FROM_DEVICE: /* invalidate only */ + inner_op = dmac_inv_range; + outer_op = outer_inv_range; + break; + case DMA_TO_DEVICE: /* writeback only */ + inner_op = dmac_clean_range; + outer_op = outer_clean_range; + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + inner_op = dmac_flush_range; + outer_op = outer_flush_range; + break; + default: + BUG(); + } + + if (!PageHighMem(page)) { + vaddr = page_address(page) + offset; + inner_op(vaddr, vaddr + size); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + vaddr += offset; + inner_op(vaddr, vaddr + size); + kunmap_high(page); + } + } + + paddr = page_to_phys(page) + offset; + outer_op(paddr, paddr + size); +} + +void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, int dir) +{ + /* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ + size_t left = size; + do { + size_t len = left; + if (PageHighMem(page) && len + offset > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + } + len = PAGE_SIZE - offset; + } + dma_cache_maint_contiguous(page, offset, len, dir); + offset = 0; + page++; + left -= len; + } while (left); +} +EXPORT_SYMBOL(dma_cache_maint_page); + /** * dma_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -614,7 +683,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, continue; if (!arch_is_coherent()) - dma_cache_maint(sg_virt(s), s->length, dir); + dma_cache_maint_page(sg_page(s), s->offset, + s->length, dir); } } EXPORT_SYMBOL(dma_sync_sg_for_device); -- cgit v1.2.3 From 1bb772679ffb0ba1ff1d40d8c6b855ab029f177d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 12 Sep 2008 16:11:51 -0400 Subject: [ARM] Feroceon: add highmem support to L2 cache handling code The choice is between looping over the physical range and performing single cache line operations, or to map highmem pages somewhere, as cache range ops are possible only on virtual addresses. Because L2 range ops are much faster, we go with the later by factoring the physical-to-virtual address conversion and use a fixmap entry for it in the HIGHMEM case. Possible future optimizations to avoid the pte setup cost: - do the pte setup for highmem pages only - determine a threshold for doing a line-by-line processing on physical addresses when the range is small Signed-off-by: Nicolas Pitre --- arch/arm/mm/cache-feroceon-l2.c | 54 ++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 17 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index 80cd207cbaea..d6dd83826f8a 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -14,8 +14,12 @@ #include #include +#include +#include +#include +#include #include - +#include "mm.h" /* * Low-level cache maintenance operations. @@ -34,14 +38,36 @@ * The range operations require two successive cp15 writes, in * between which we don't want to be preempted. */ + +static inline unsigned long l2_start_va(unsigned long paddr) +{ +#ifdef CONFIG_HIGHMEM + /* + * Let's do our own fixmap stuff in a minimal way here. + * Because range ops can't be done on physical addresses, + * we simply install a virtual mapping for it only for the + * TLB lookup to occur, hence no need to flush the untouched + * memory mapping. This is protected with the disabling of + * interrupts by the caller. + */ + unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(vaddr); + return vaddr + (paddr & ~PAGE_MASK); +#else + return __phys_to_virt(paddr); +#endif +} + static inline void l2_clean_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); } -static inline void l2_clean_mva_range(unsigned long start, unsigned long end) +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) { - unsigned long flags; + unsigned long va_start, va_end, flags; /* * Make sure 'start' and 'end' reference the same page, as @@ -51,17 +77,14 @@ static inline void l2_clean_mva_range(unsigned long start, unsigned long end) BUG_ON((start ^ end) >> PAGE_SHIFT); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" "mcr p15, 1, %1, c15, c9, 5" - : : "r" (start), "r" (end)); + : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); } -static inline void l2_clean_pa_range(unsigned long start, unsigned long end) -{ - l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end)); -} - static inline void l2_clean_inv_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); @@ -72,9 +95,9 @@ static inline void l2_inv_pa(unsigned long addr) __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); } -static inline void l2_inv_mva_range(unsigned long start, unsigned long end) +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) { - unsigned long flags; + unsigned long va_start, va_end, flags; /* * Make sure 'start' and 'end' reference the same page, as @@ -84,17 +107,14 @@ static inline void l2_inv_mva_range(unsigned long start, unsigned long end) BUG_ON((start ^ end) >> PAGE_SHIFT); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" "mcr p15, 1, %1, c15, c11, 5" - : : "r" (start), "r" (end)); + : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); } -static inline void l2_inv_pa_range(unsigned long start, unsigned long end) -{ - l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end)); -} - /* * Linux primitives. -- cgit v1.2.3 From 3902a15e784e9b1efa8e6ad246489c609e0ef880 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 18 Sep 2008 22:55:47 -0400 Subject: [ARM] xsc3: add highmem support to L2 cache handling code On xsc3, L2 cache ops are possible only on virtual addresses. The code is rearranged so to have a linear progression requiring the least amount of pte setups in the highmem case. To protect the virtual mapping so created, interrupts must be disabled currently up to a page worth of address range. The interrupt disabling is done in a way to minimize the overhead within the inner loop. The alternative would consist in separate code for the highmem and non highmem compilation which is less preferable. Signed-off-by: Nicolas Pitre --- arch/arm/mm/cache-xsc3l2.c | 107 +++++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 27 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c index 464de893a988..5d180cb0bd94 100644 --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -17,12 +17,14 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -#include -#include - #include #include #include +#include +#include +#include +#include +#include "mm.h" #define CR_L2 (1 << 26) @@ -47,21 +49,11 @@ static inline void xsc3_l2_clean_mva(unsigned long addr) __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr)); } -static inline void xsc3_l2_clean_pa(unsigned long addr) -{ - xsc3_l2_clean_mva(__phys_to_virt(addr)); -} - static inline void xsc3_l2_inv_mva(unsigned long addr) { __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr)); } -static inline void xsc3_l2_inv_pa(unsigned long addr) -{ - xsc3_l2_inv_mva(__phys_to_virt(addr)); -} - static inline void xsc3_l2_inv_all(void) { unsigned long l2ctype, set_way; @@ -79,50 +71,103 @@ static inline void xsc3_l2_inv_all(void) dsb(); } +#ifdef CONFIG_HIGHMEM +#define l2_map_save_flags(x) raw_local_save_flags(x) +#define l2_map_restore_flags(x) raw_local_irq_restore(x) +#else +#define l2_map_save_flags(x) ((x) = 0) +#define l2_map_restore_flags(x) ((void)(x)) +#endif + +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, + unsigned long flags) +{ +#ifdef CONFIG_HIGHMEM + unsigned long va = prev_va & PAGE_MASK; + unsigned long pa_offset = pa << (32 - PAGE_SHIFT); + if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) { + /* + * Switching to a new page. Because cache ops are + * using virtual addresses only, we must put a mapping + * in place for it. We also enable interrupts for a + * short while and disable them again to protect this + * mapping. + */ + unsigned long idx; + raw_local_irq_restore(flags); + idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + va = __fix_to_virt(FIX_KMAP_BEGIN + idx); + raw_local_irq_restore(flags | PSR_I_BIT); + set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(va); + } + return va + (pa_offset >> (32 - PAGE_SHIFT)); +#else + return __phys_to_virt(pa); +#endif +} + static void xsc3_l2_inv_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + if (start == 0 && end == -1ul) { xsc3_l2_inv_all(); return; } + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + /* * Clean and invalidate partial first cache line. */ if (start & (CACHE_LINE_SIZE - 1)) { - xsc3_l2_clean_pa(start & ~(CACHE_LINE_SIZE - 1)); - xsc3_l2_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); start = (start | (CACHE_LINE_SIZE - 1)) + 1; } /* - * Clean and invalidate partial last cache line. + * Invalidate all full cache lines between 'start' and 'end'. */ - if (start < end && (end & (CACHE_LINE_SIZE - 1))) { - xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1)); - xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); - end &= ~(CACHE_LINE_SIZE - 1); + while (start < (end & ~(CACHE_LINE_SIZE - 1))) { + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; } /* - * Invalidate all full cache lines between 'start' and 'end'. + * Clean and invalidate partial last cache line. */ - while (start < end) { - xsc3_l2_inv_pa(start); - start += CACHE_LINE_SIZE; + if (start < end) { + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); } + l2_map_restore_flags(flags); + dsb(); } static void xsc3_l2_clean_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - xsc3_l2_clean_pa(start); + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); start += CACHE_LINE_SIZE; } + l2_map_restore_flags(flags); + dsb(); } @@ -148,18 +193,26 @@ static inline void xsc3_l2_flush_all(void) static void xsc3_l2_flush_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + if (start == 0 && end == -1ul) { xsc3_l2_flush_all(); return; } + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - xsc3_l2_clean_pa(start); - xsc3_l2_inv_pa(start); + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } + l2_map_restore_flags(flags); + dsb(); } -- cgit v1.2.3 From 3f973e22160257c5bda85815be5b1540d391a671 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 4 Nov 2008 00:48:42 -0500 Subject: [ARM] ignore high memory with VIPT aliasing caches VIPT aliasing caches have issues of their own which are not yet handled. Usage of discard_old_kernel_data() in copypage-v6.c is not highmem ready, kmap/fixmap stuff doesn't take account of cache colouring, etc. If/when those issues are handled then this could be reverted. Signed-off-by: Nicolas Pitre --- arch/arm/mm/mmu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4810a4c9ffce..cf504885a5fb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -678,6 +679,10 @@ static void __init sanity_check_meminfo(void) if (meminfo.nr_banks >= NR_BANKS) { printk(KERN_CRIT "NR_BANKS too low, " "ignoring high memory\n"); + } else if (cache_is_vipt_aliasing()) { + printk(KERN_CRIT "HIGHMEM is not yet supported " + "with VIPT aliasing cache, " + "ignoring high memory\n"); } else { memmove(bank + 1, bank, (meminfo.nr_banks - i) * sizeof(*bank)); -- cgit v1.2.3 From 49cbe78637eb0503f45fc9b556ec08918a616534 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Tue, 20 Jan 2009 14:15:18 +0800 Subject: [ARM] pxa: add base support for Marvell's PXA168 processor line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit """The MarvellĀ® PXA168 processor is the first in a family of application processors targeted at mass market opportunities in computing and consumer devices. It balances high computing and multimedia performance with low power consumption to support extended battery life, and includes a wealth of integrated peripherals to reduce overall BOM cost .... """ See http://www.marvell.com/featured/pxa168.jsp for more information. 1. Marvell Mohawk core is a hybrid of xscale3 and its own ARM core, there are many enhancements like instructions for flushing the whole D-cache, and so on 2. Clock reuses Russell's common clkdev, and added the basic support for UART1/2. 3. Devices are a bit different from the 'mach-pxa' way, the platform devices are now dynamically allocated only when necessary (i.e. when pxa_register_device() is called). Description for each device are stored in an array of 'struct pxa_device_desc'. Now that: a. this array of device description is marked with __initdata and can be freed up system is fully up b. which means board code has to add all needed devices early in his initializing function c. platform specific data can now be marked as __initdata since they are allocated and copied by platform_device_add_data() 4. only the basic UART1/2/3 are added, more devices will come later. Signed-off-by: Jason Chagas Signed-off-by: Eric Miao --- arch/arm/mm/Kconfig | 15 +- arch/arm/mm/Makefile | 1 + arch/arm/mm/proc-mohawk.S | 416 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 430 insertions(+), 2 deletions(-) create mode 100644 arch/arm/mm/proc-mohawk.S (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d490f3773c01..64086f4f5fcc 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -340,6 +340,17 @@ config CPU_XSC3 select CPU_TLB_V4WBI if MMU select IO_36 +# Marvell PJ1 (Mohawk) +config CPU_MOHAWK + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_PABRT_NOIFAR + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_TLB_V4WBI if MMU + select CPU_COPY_V4WB if MMU + # Feroceon config CPU_FEROCEON bool @@ -569,7 +580,7 @@ comment "Processor Features" config ARM_THUMB bool "Support Thumb user binaries" - depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_V6 || CPU_V7 || CPU_FEROCEON + depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V7 || CPU_FEROCEON default y help Say Y if you want to include kernel support for running user space @@ -653,7 +664,7 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 + depends on CPU_ARM1020 || CPU_V6 || CPU_MOHAWK || CPU_XSC3 || CPU_V7 help Say Y here to disable branch prediction. If unsure, say N. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 480f78a3611a..64149d9e55a5 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_CPU_SA110) += proc-sa110.o obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o +obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S new file mode 100644 index 000000000000..540f5078496b --- /dev/null +++ b/arch/arm/mm/proc-mohawk.S @@ -0,0 +1,416 @@ +/* + * linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core + * + * PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core. + * + * Heavily based on proc-arm926.S and proc-xsc3.S + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the + * area is larger than this, then we flush the whole cache. + */ +#define CACHE_DLIMIT 32768 + +/* + * The cache line size of the L1 D cache. + */ +#define CACHE_DLINESIZE 32 + +/* + * cpu_mohawk_proc_init() + */ +ENTRY(cpu_mohawk_proc_init) + mov pc, lr + +/* + * cpu_mohawk_proc_fin() + */ +ENTRY(cpu_mohawk_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl mohawk_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...iz........... + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ldmfd sp!, {pc} + +/* + * cpu_mohawk_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + * + * (same as arm926) + */ + .align 5 +ENTRY(cpu_mohawk_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x0007 @ .............cam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_mohawk_do_idle() + * + * Called with IRQs disabled + */ + .align 5 +ENTRY(cpu_mohawk_do_idle) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt + mov pc, lr + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(mohawk_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(mohawk_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + * + * (same as arm926) + */ +ENTRY(mohawk_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +ENTRY(mohawk_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_page(void *page) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - page aligned address + */ +ENTRY(mohawk_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(mohawk_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +ENTRY(mohawk_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +ENTRY(mohawk_cache_fns) + .long mohawk_flush_kern_cache_all + .long mohawk_flush_user_cache_all + .long mohawk_flush_user_cache_range + .long mohawk_coherent_kern_range + .long mohawk_coherent_user_range + .long mohawk_flush_kern_dcache_page + .long mohawk_dma_inv_range + .long mohawk_dma_clean_range + .long mohawk_dma_flush_range + +ENTRY(cpu_mohawk_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * cpu_mohawk_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_mohawk_switch_mm) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + orr r0, r0, #0x18 @ cache the page table in L2 + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_mohawk_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_mohawk_set_pte_ext) + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __INIT + + .type __mohawk_setup, #function +__mohawk_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs + orr r4, r4, #0x18 @ cache the page table in L2 + mcr p15, 0, r4, c2, c0, 0 @ load page table pointer + + mov r0, #0 @ don't allow CP access + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + + adr r5, mohawk_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + + .size __mohawk_setup, . - __mohawk_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..00 0101 + * + */ + .type mohawk_crval, #object +mohawk_crval: + crval clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134 + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type mohawk_processor_functions, #object +mohawk_processor_functions: + .word v5t_early_abort + .word pabort_noifar + .word cpu_mohawk_proc_init + .word cpu_mohawk_proc_fin + .word cpu_mohawk_reset + .word cpu_mohawk_do_idle + .word cpu_mohawk_dcache_clean_area + .word cpu_mohawk_switch_mm + .word cpu_mohawk_set_pte_ext + .size mohawk_processor_functions, . - mohawk_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv5te" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v5" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_mohawk_name, #object +cpu_mohawk_name: + .asciz "Marvell 88SV331x" + .size cpu_mohawk_name, . - cpu_mohawk_name + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __88sv331x_proc_info,#object +__88sv331x_proc_info: + .long 0x56158000 @ Marvell 88SV331x (MOHAWK) + .long 0xfffff000 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __mohawk_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_mohawk_name + .long mohawk_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long mohawk_cache_fns + .size __88sv331x_proc_info, . - __88sv331x_proc_info -- cgit v1.2.3 From 28853ac8fe5221de74a14f1182d7b2b383dfd85c Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Wed, 25 Mar 2009 13:10:01 +0200 Subject: ARM: Add support for FA526 v2 Adds support for Faraday FA526 core. This core is used at least by: Cortina Systems Gemini and Centroid family Cavium Networks ECONA family Grain Media GM8120 Pixelplus ImageARM Prolific PL-1029 Faraday IP evaluation boards v2: - move TLB_BTB to separate patch - update copyrights Signed-off-by: Paulius Zaleckas --- arch/arm/mm/Kconfig | 35 ++++++- arch/arm/mm/Makefile | 4 + arch/arm/mm/cache-fa.S | 220 ++++++++++++++++++++++++++++++++++++++++ arch/arm/mm/copypage-fa.c | 86 ++++++++++++++++ arch/arm/mm/proc-fa526.S | 248 ++++++++++++++++++++++++++++++++++++++++++++++ arch/arm/mm/tlb-fa.S | 75 ++++++++++++++ 6 files changed, 666 insertions(+), 2 deletions(-) create mode 100644 arch/arm/mm/cache-fa.S create mode 100644 arch/arm/mm/copypage-fa.c create mode 100644 arch/arm/mm/proc-fa526.S create mode 100644 arch/arm/mm/tlb-fa.S (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d490f3773c01..bc3331863d9d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -186,6 +186,24 @@ config CPU_ARM926T Say Y if you want support for the ARM926T processor. Otherwise, say N. +# FA526 +config CPU_FA526 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_PABRT_NOIFAR + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_CACHE_FA + select CPU_COPY_FA if MMU + select CPU_TLB_FA if MMU + help + The FA526 is a version of the ARMv4 compatible processor with + Branch Target Buffer, Unified TLB and cache line size 16. + + Say Y if you want support for the FA526 processor. + Otherwise, say N. + # ARM940T config CPU_ARM940T bool "Support ARM940T processor" if ARCH_INTEGRATOR @@ -484,6 +502,9 @@ config CPU_CACHE_VIVT config CPU_CACHE_VIPT bool +config CPU_CACHE_FA + bool + if MMU # The copy-page model config CPU_COPY_V3 @@ -498,6 +519,9 @@ config CPU_COPY_V4WB config CPU_COPY_FEROCEON bool +config CPU_COPY_FA + bool + config CPU_COPY_V6 bool @@ -528,6 +552,13 @@ config CPU_TLB_FEROCEON help Feroceon TLB (v4wbi with non-outer-cachable page table walks). +config CPU_TLB_FA + bool + help + Faraday ARM FA526 architecture, unified TLB with writeback cache + and invalidate instruction cache entry. Branch target buffer is + also supported. + config CPU_TLB_V6 bool @@ -638,7 +669,7 @@ config CPU_DCACHE_SIZE config CPU_DCACHE_WRITETHROUGH bool "Force write through D-cache" - depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE + depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE default y if CPU_ARM925T help Say Y here to use the data cache in writethrough mode. Unless you @@ -653,7 +684,7 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 + depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 || CPU_FA526 help Say Y here to disable branch prediction. If unsure, say N. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 480f78a3611a..40f941c2245c 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o +obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o @@ -41,6 +42,7 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o +obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o @@ -49,6 +51,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o +obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o @@ -62,6 +65,7 @@ obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o +obj-$(CONFIG_CPU_FA526) += proc-fa526.o obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S new file mode 100644 index 000000000000..b63a8f7b95cf --- /dev/null +++ b/arch/arm/mm/cache-fa.S @@ -0,0 +1,220 @@ +/* + * linux/arch/arm/mm/cache-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on cache-v4wb.S: + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Processors: FA520 FA526 FA626 + */ +#include +#include +#include +#include + +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The total size of the data cache. + */ +#ifdef CONFIG_ARCH_GEMINI +#define CACHE_DSIZE 8192 +#else +#define CACHE_DSIZE 16384 +#endif + +/* FIXME: put optimal value here. Current one is just estimation */ +#define CACHE_DLIMIT (CACHE_DSIZE * 2) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(fa_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(fa_flush_kern_cache_all) + mov ip, #0 + mov r2, #VM_EXEC +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(fa_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - kaddr - kernel address (guaranteed to be page aligned) + */ +ENTRY(fa_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + tst r1, #CACHE_DLINESIZE - 1 + bic r1, r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(fa_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + + __INITDATA + + .type fa_cache_fns, #object +ENTRY(fa_cache_fns) + .long fa_flush_kern_cache_all + .long fa_flush_user_cache_all + .long fa_flush_user_cache_range + .long fa_coherent_kern_range + .long fa_coherent_user_range + .long fa_flush_kern_dcache_page + .long fa_dma_inv_range + .long fa_dma_clean_range + .long fa_dma_flush_range + .size fa_cache_fns, . - fa_cache_fns diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c new file mode 100644 index 000000000000..b2a6008b0111 --- /dev/null +++ b/arch/arm/mm/copypage-fa.c @@ -0,0 +1,86 @@ +/* + * linux/arch/arm/lib/copypage-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on copypage-v4wb.S: + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +/* + * Faraday optimised copy_user_page + */ +static void __naked +fa_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %0 @ 1\n\ +1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + subs r2, r2, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "I" (PAGE_SIZE / 32)); +} + +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to, KM_USER0); + kfrom = kmap_atomic(from, KM_USER1); + fa_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom, KM_USER1); + kunmap_atomic(kto, KM_USER0); +} + +/* + * Faraday optimised clear_user_page + * + * Same story as above. + */ +void fa_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr, KM_USER0); +} + +struct cpu_user_fns fa_user_fns __initdata = { + .cpu_clear_user_highpage = fa_clear_user_highpage, + .cpu_copy_user_highpage = fa_copy_user_highpage, +}; diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S new file mode 100644 index 000000000000..08b8a955d5d7 --- /dev/null +++ b/arch/arm/mm/proc-fa526.S @@ -0,0 +1,248 @@ +/* + * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526 + * + * Written by : Luke Lee + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the fa526. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "proc-macros.S" + +#define CACHE_DLINESIZE 16 + + .text +/* + * cpu_fa526_proc_init() + */ +ENTRY(cpu_fa526_proc_init) + mov pc, lr + +/* + * cpu_fa526_proc_fin() + */ +ENTRY(cpu_fa526_proc_fin) + stmfd sp!, {lr} + mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE + msr cpsr_c, ip + bl fa_flush_kern_cache_all + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + nop + nop + ldmfd sp!, {pc} + +/* + * cpu_fa526_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 4 +ENTRY(cpu_fa526_reset) +/* TODO: Use CP8 if possible... */ + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + bic ip, ip, #0x0800 @ BTB off + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + nop + nop + mov pc, r0 + +/* + * cpu_fa526_do_idle() + */ + .align 4 +ENTRY(cpu_fa526_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +ENTRY(cpu_fa526_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_fa526_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 4 +ENTRY(cpu_fa526_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB +#endif + mov pc, lr + +/* + * cpu_fa526_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 4 +ENTRY(cpu_fa526_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + + __INIT + + .type __fa526_setup, #function +__fa526_setup: + /* On return of this routine, r0 must carry correct flags for CFG register */ + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM + + mov r0, #1 + mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR + + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + + mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client + mcr p15, 0, r0, c3, c0 @ load domain access register + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, fa526_cr1_clear + bic r0, r0, r5 + ldr r5, fa526_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __fa526_setup, . - __fa526_setup + + /* + * .RVI ZFRS BLDP WCAM + * ..11 1001 .111 1101 + * + */ + .type fa526_cr1_clear, #object + .type fa526_cr1_set, #object +fa526_cr1_clear: + .word 0x3f3f +fa526_cr1_set: + .word 0x397D + + __INITDATA + +/* + * Purpose : Function pointers used to access above functions - all calls + * come through these + */ + .type fa526_processor_functions, #object +fa526_processor_functions: + .word v4_early_abort + .word pabort_noifar + .word cpu_fa526_proc_init + .word cpu_fa526_proc_fin + .word cpu_fa526_reset + .word cpu_fa526_do_idle + .word cpu_fa526_dcache_clean_area + .word cpu_fa526_switch_mm + .word cpu_fa526_set_pte_ext + .size fa526_processor_functions, . - fa526_processor_functions + + .section ".rodata" + + .type cpu_arch_name, #object +cpu_arch_name: + .asciz "armv4" + .size cpu_arch_name, . - cpu_arch_name + + .type cpu_elf_name, #object +cpu_elf_name: + .asciz "v4" + .size cpu_elf_name, . - cpu_elf_name + + .type cpu_fa526_name, #object +cpu_fa526_name: + .asciz "FA526" + .size cpu_fa526_name, . - cpu_fa526_name + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __fa526_proc_info,#object +__fa526_proc_info: + .long 0x66015261 + .long 0xff01fff1 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __fa526_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF + .long cpu_fa526_name + .long fa526_processor_functions + .long fa_tlb_fns + .long fa_user_fns + .long fa_cache_fns + .size __fa526_proc_info, . - __fa526_proc_info diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S new file mode 100644 index 000000000000..9694f1f6f485 --- /dev/null +++ b/arch/arm/mm/tlb-fa.S @@ -0,0 +1,75 @@ +/* + * linux/arch/arm/mm/tlb-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + * + * Based on tlb-v4wbi.S: + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4, Faraday variation. + * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB) + * + * Processors: FA520 FA526 FA626 + */ +#include +#include +#include +#include +#include "proc-macros.S" + + +/* + * flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 4 +ENTRY(fa_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mov pc, lr + + +ENTRY(fa_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mcr p15, 0, r3, c7, c5, 4 @ prefetch flush + mov pc, lr + + __INITDATA + + .type fa_tlb_fns, #object +ENTRY(fa_tlb_fns) + .long fa_flush_user_tlb_range + .long fa_flush_kern_tlb_range + .long fa_tlb_flags + .size fa_tlb_fns, . - fa_tlb_fns -- cgit v1.2.3 From f0bba9f934517533acbda7329be93f55d5a01c03 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Sat, 28 Mar 2009 19:18:05 +0100 Subject: [ARM] 5435/1: fix compile warning in sanity_check_meminfo() Compiling recent 2.6.29-rc kernels for ARM gives me the following warning: arch/arm/mm/mmu.c: In function 'sanity_check_meminfo': arch/arm/mm/mmu.c:697: warning: comparison between pointer and integer This is because commit 3fd9825c42c784a59b3b90bdf073f49d4bb42a8d "[ARM] 5402/1: fix a case of wrap-around in sanity_check_meminfo()" in 2.6.29-rc5-git4 added a comparison of a pointer with PAGE_OFFSET, which is an integer. Fixed by casting PAGE_OFFSET to void *. Signed-off-by: Mikael Pettersson Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index d4d082c5c2d4..5a89e57e342d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -694,7 +694,7 @@ static void __init sanity_check_meminfo(void) * the vmalloc area. */ if (__va(bank->start) >= VMALLOC_MIN || - __va(bank->start) < PAGE_OFFSET) { + __va(bank->start) < (void *)PAGE_OFFSET) { printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx " "(vmalloc region overlap).\n", bank->start, bank->start + bank->size - 1); -- cgit v1.2.3