summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile19
-rw-r--r--arch/powerpc/kernel/asm-offsets.c46
-rw-r--r--arch/powerpc/kernel/cpu_setup_power4.S233
-rw-r--r--arch/powerpc/kernel/cputable.c19
-rw-r--r--arch/powerpc/kernel/firmware.c45
-rw-r--r--arch/powerpc/kernel/fpu.S24
-rw-r--r--arch/powerpc/kernel/head_32.S1
-rw-r--r--arch/powerpc/kernel/head_64.S91
-rw-r--r--arch/powerpc/kernel/ioctl32.c49
-rw-r--r--arch/powerpc/kernel/irq.c478
-rw-r--r--arch/powerpc/kernel/lparcfg.c609
-rw-r--r--arch/powerpc/kernel/misc_32.S23
-rw-r--r--arch/powerpc/kernel/misc_64.S8
-rw-r--r--arch/powerpc/kernel/paca.c135
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/proc_ppc64.c126
-rw-r--r--arch/powerpc/kernel/prom.c35
-rw-r--r--arch/powerpc/kernel/prom_init.c187
-rw-r--r--arch/powerpc/kernel/rtas-proc.c3
-rw-r--r--arch/powerpc/kernel/rtas.c5
-rw-r--r--arch/powerpc/kernel/rtas_pci.c513
-rw-r--r--arch/powerpc/kernel/setup-common.c41
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_32.c18
-rw-r--r--arch/powerpc/kernel/setup_64.c123
-rw-r--r--arch/powerpc/kernel/signal_32.c13
-rw-r--r--arch/powerpc/kernel/smp.c10
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c1
-rw-r--r--arch/powerpc/kernel/sysfs.c383
-rw-r--r--arch/powerpc/kernel/time.c41
-rw-r--r--arch/powerpc/kernel/traps.c7
-rw-r--r--arch/powerpc/kernel/udbg.c125
-rw-r--r--arch/powerpc/kernel/udbg_16550.c123
-rw-r--r--arch/powerpc/kernel/udbg_scc.c135
-rw-r--r--arch/powerpc/kernel/vdso.c746
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile40
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S67
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S84
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S315
-rw-r--r--arch/powerpc/kernel/vdso32/note.S25
-rw-r--r--arch/powerpc/kernel/vdso32/sigtramp.S300
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S117
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S13
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile35
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S66
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S84
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S242
-rw-r--r--arch/powerpc/kernel/vdso64/note.S1
-rw-r--r--arch/powerpc/kernel/vdso64/sigtramp.S295
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S116
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S13
-rw-r--r--arch/powerpc/kernel/vio.c27
52 files changed, 5898 insertions, 368 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b3ae2993efb8..9a74b7ab03a4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -4,6 +4,7 @@
ifeq ($(CONFIG_PPC64),y)
EXTRA_CFLAGS += -mno-minimal-toc
+CFLAGS_ioctl32.o += -Ifs/
endif
ifeq ($(CONFIG_PPC32),y)
CFLAGS_prom_init.o += -fPIC
@@ -11,17 +12,29 @@ CFLAGS_btext.o += -fPIC
endif
obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
- signal_32.o pmc.o
+ irq.o signal_32.o pmc.o vdso.o
+obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
- signal_64.o ptrace32.o systbl.o
+ signal_64.o ptrace32.o systbl.o \
+ paca.o ioctl32.o cpu_setup_power4.o \
+ firmware.o sysfs.o udbg.o
+obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_POWER4) += idle_power4.o
obj-$(CONFIG_PPC_OF) += of_device.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o
+procfs-$(CONFIG_PPC64) := proc_ppc64.o
+obj-$(CONFIG_PROC_FS) += $(procfs-y)
+rtaspci-$(CONFIG_PPC64) := rtas_pci.o
+obj-$(CONFIG_PPC_RTAS) += rtas.o $(rtaspci-y)
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
+obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
+obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
+obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
+udbgscc-$(CONFIG_PPC64) := udbg_scc.o
+obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y)
ifeq ($(CONFIG_PPC_MERGE),y)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b75757251994..4550eb4f4fbd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -37,12 +37,12 @@
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/rtas.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/iseries/hv_lp_event.h>
#include <asm/cache.h>
-#include <asm/systemcfg.h>
#include <asm/compat.h>
#endif
@@ -106,7 +106,6 @@ int main(void)
DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
- DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
/* paca */
@@ -252,25 +251,42 @@ int main(void)
DEFINE(TASK_SIZE, TASK_SIZE);
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
-#else /* CONFIG_PPC64 */
- /* systemcfg offsets for use by vdso */
- DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp));
- DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec));
- DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs));
- DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec));
- DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count));
- DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest));
- DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime));
- DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32));
- DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64));
+#endif /* ! CONFIG_PPC64 */
- /* timeval/timezone offsets for use by vdso */
+ /* datapage offsets for use by vdso */
+ DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
+ DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
+ DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
+ DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
+ DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
+ DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
+ DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
+ DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
+ DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+#ifdef CONFIG_PPC64
+ DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
+ DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
+ DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
+#else
+ DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
+ DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
+ DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec));
+ DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+#endif
+ /* timeval/timezone offsets for use by vdso */
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
-#endif /* CONFIG_PPC64 */
+
+ /* Other bits used by the vdso */
+ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
+ DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
+ DEFINE(CLOCK_REALTIME_RES, TICK_NSEC);
+
return 0;
}
diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_power4.S
new file mode 100644
index 000000000000..cca942fe6115
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power4.S
@@ -0,0 +1,233 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+_GLOBAL(__970_cpu_preinit)
+ /*
+ * Do nothing if not running in HV mode
+ */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ /*
+ * Deal only with PPC970 and PPC970FX.
+ */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+1:
+
+ /* Make sure HID4:rm_ci is off before MMU is turned off, that large
+ * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
+ * HID5:DCBZ32_ill
+ */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+ mfspr r3,SPRN_HID5
+ rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */
+ sync
+ mtspr SPRN_HID5,r3
+ isync
+ sync
+
+ /* Setup some basic HID1 features */
+ mfspr r0,SPRN_HID1
+ li r3,0x1200 /* enable i-fetch cacheability */
+ sldi r3,r3,44 /* and prefetch */
+ or r0,r0,r3
+ mtspr SPRN_HID1,r0
+ mtspr SPRN_HID1,r0
+ isync
+
+ /* Clear HIOR */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0 /* Clear interrupt prefix */
+ isync
+ blr
+
+_GLOBAL(__setup_cpu_power4)
+ blr
+
+_GLOBAL(__setup_cpu_be)
+ /* Set large page sizes LP=0: 16MB, LP=1: 64KB */
+ addi r3, 0, 0
+ ori r3, r3, HID6_LB
+ sldi r3, r3, 32
+ nor r3, r3, r3
+ mfspr r4, SPRN_HID6
+ and r4, r4, r3
+ addi r3, 0, 0x02000
+ sldi r3, r3, 32
+ or r4, r4, r3
+ mtspr SPRN_HID6, r4
+ blr
+
+_GLOBAL(__setup_cpu_ppc970)
+ mfspr r0,SPRN_HID0
+ li r11,5 /* clear DOZE and SLEEP */
+ rldimi r0,r11,52,8 /* set NAP and DPM */
+ mtspr SPRN_HID0,r0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ mfspr r0,SPRN_HID0
+ sync
+ isync
+ blr
+
+/* Definitions for the table use to save CPU states */
+#define CS_HID0 0
+#define CS_HID1 8
+#define CS_HID4 16
+#define CS_HID5 24
+#define CS_SIZE 32
+
+ .data
+ .balign L1_CACHE_BYTES,0
+cpu_state_storage:
+ .space CS_SIZE
+ .balign L1_CACHE_BYTES,0
+ .text
+
+/* Called in normal context to backup CPU 0 state. This
+ * does not include cache settings. This function is also
+ * called for machine sleep. This does not include the MMU
+ * setup, BATs, etc... but rather the "special" registers
+ * like HID0, HID1, HID4, etc...
+ */
+_GLOBAL(__save_cpu_setup)
+ /* Some CR fields are volatile, we back it up all */
+ mfcr r7
+
+ /* Get storage ptr */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bne 2f
+
+1: /* Save HID0,1,4 and 5 */
+ mfspr r3,SPRN_HID0
+ std r3,CS_HID0(r5)
+ mfspr r3,SPRN_HID1
+ std r3,CS_HID1(r5)
+ mfspr r3,SPRN_HID4
+ std r3,CS_HID4(r5)
+ mfspr r3,SPRN_HID5
+ std r3,CS_HID5(r5)
+
+2:
+ mtcr r7
+ blr
+
+/* Called with no MMU context (typically MSR:IR/DR off) to
+ * restore CPU state as backed up by the previous
+ * function. This does not include cache setting
+ */
+_GLOBAL(__restore_cpu_setup)
+ /* Get storage ptr (FIXME when using anton reloc as we
+ * are running with translation disabled here
+ */
+ LOADADDR(r5,cpu_state_storage)
+
+ /* We only deal with 970 for now */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39
+ beq 1f
+ cmpwi r0,0x3c
+ beq 1f
+ cmpwi r0,0x44
+ bnelr
+
+1: /* Before accessing memory, we make sure rm_ci is clear */
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ sync
+ mtspr SPRN_HID4,r3
+ isync
+ sync
+
+ /* Clear interrupt prefix */
+ li r0,0
+ sync
+ mtspr SPRN_HIOR,0
+ isync
+
+ /* Restore HID0 */
+ ld r3,CS_HID0(r5)
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ mfspr r3,SPRN_HID0
+ sync
+ isync
+
+ /* Restore HID1 */
+ ld r3,CS_HID1(r5)
+ sync
+ isync
+ mtspr SPRN_HID1,r3
+ mtspr SPRN_HID1,r3
+ sync
+ isync
+
+ /* Restore HID4 */
+ ld r3,CS_HID4(r5)
+ sync
+ isync
+ mtspr SPRN_HID4,r3
+ sync
+ isync
+
+ /* Restore HID5 */
+ ld r3,CS_HID5(r5)
+ sync
+ isync
+ mtspr SPRN_HID5,r3
+ sync
+ isync
+ blr
+
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index cc4e9eb1c13f..1d85cedbbb7b 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -52,6 +52,9 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
PPC_FEATURE_HAS_MMU)
#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64)
+#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5)
+#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS)
/* We only set the spe features if the kernel was compiled with
@@ -160,7 +163,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00350000,
.cpu_name = "POWER4 (gp)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -175,7 +178,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00380000,
.cpu_name = "POWER4+ (gq)",
.cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -190,7 +193,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00390000,
.cpu_name = "PPC970",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -212,7 +215,7 @@ struct cpu_spec cpu_specs[] = {
#else
.cpu_features = CPU_FTRS_PPC970,
#endif
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -230,7 +233,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x00440000,
.cpu_name = "PPC970MP",
.cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_PPC64 |
+ .cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
@@ -245,7 +248,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x003a0000,
.cpu_name = "POWER5 (gr)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -260,7 +263,7 @@ struct cpu_spec cpu_specs[] = {
.pvr_value = 0x003b0000,
.cpu_name = "POWER5 (gs)",
.cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_PPC64,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
@@ -276,7 +279,7 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "Cell Broadband Engine",
.cpu_features = CPU_FTRS_CELL,
.cpu_user_features = COMMON_USER_PPC64 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
+ PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP,
.icache_bsize = 128,
.dcache_bsize = 128,
.cpu_setup = __setup_cpu_be,
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
new file mode 100644
index 000000000000..65eae752a527
--- /dev/null
+++ b/arch/powerpc/kernel/firmware.c
@@ -0,0 +1,45 @@
+/*
+ * Extracted from cputable.c
+ *
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ * Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <asm/firmware.h>
+
+unsigned long ppc64_firmware_features;
+
+#ifdef CONFIG_PPC_PSERIES
+firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
+ {FW_FEATURE_PFT, "hcall-pft"},
+ {FW_FEATURE_TCE, "hcall-tce"},
+ {FW_FEATURE_SPRG0, "hcall-sprg0"},
+ {FW_FEATURE_DABR, "hcall-dabr"},
+ {FW_FEATURE_COPY, "hcall-copy"},
+ {FW_FEATURE_ASR, "hcall-asr"},
+ {FW_FEATURE_DEBUG, "hcall-debug"},
+ {FW_FEATURE_PERF, "hcall-perf"},
+ {FW_FEATURE_DUMP, "hcall-dump"},
+ {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
+ {FW_FEATURE_MIGRATE, "hcall-migrate"},
+ {FW_FEATURE_PERFMON, "hcall-perfmon"},
+ {FW_FEATURE_CRQ, "hcall-crq"},
+ {FW_FEATURE_VIO, "hcall-vio"},
+ {FW_FEATURE_RDMA, "hcall-rdma"},
+ {FW_FEATURE_LLAN, "hcall-lLAN"},
+ {FW_FEATURE_BULK, "hcall-bulk"},
+ {FW_FEATURE_XDABR, "hcall-xdabr"},
+ {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
+ {FW_FEATURE_SPLPAR, "hcall-splpar"},
+};
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4d6001fa1cf2..b780b42c95fc 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -41,20 +41,20 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
LOADBASE(r3, last_task_used_math)
toreal(r3)
- LDL r4,OFF(last_task_used_math)(r3)
- CMPI 0,r4,0
+ PPC_LL r4,OFF(last_task_used_math)(r3)
+ PPC_LCMPI 0,r4,0
beq 1f
toreal(r4)
addi r4,r4,THREAD /* want last_task_used_math->thread */
SAVE_32FPRS(0, r4)
mffs fr0
stfd fr0,THREAD_FPSCR(r4)
- LDL r5,PT_REGS(r4)
+ PPC_LL r5,PT_REGS(r4)
toreal(r5)
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r10,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r10 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#endif /* CONFIG_SMP */
/* enable use of FP after return */
@@ -77,7 +77,7 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
- STL r4,OFF(last_task_used_math)(r3)
+ PPC_STL r4,OFF(last_task_used_math)(r3)
#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
@@ -97,24 +97,24 @@ _GLOBAL(giveup_fpu)
MTMSRD(r5) /* enable use of fpu now */
SYNC_601
isync
- CMPI 0,r3,0
+ PPC_LCMPI 0,r3,0
beqlr- /* if no previous owner, done */
addi r3,r3,THREAD /* want THREAD of task */
- LDL r5,PT_REGS(r3)
- CMPI 0,r5,0
+ PPC_LL r5,PT_REGS(r3)
+ PPC_LCMPI 0,r5,0
SAVE_32FPRS(0, r3)
mffs fr0
stfd fr0,THREAD_FPSCR(r3)
beq 1f
- LDL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r3,MSR_FP|MSR_FE0|MSR_FE1
andc r4,r4,r3 /* disable FP for previous task */
- STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
#ifndef CONFIG_SMP
li r5,0
LOADBASE(r4,last_task_used_math)
- STL r5,OFF(last_task_used_math)(r4)
+ PPC_STL r5,OFF(last_task_used_math)(r4)
#endif /* CONFIG_SMP */
blr
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index b102e3a2415e..ccdf94731e30 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -1100,6 +1100,7 @@ start_here:
mr r3,r31
mr r4,r30
bl machine_init
+ bl __save_cpu_setup
bl MMU_init
#ifdef CONFIG_APUS
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 16ab40daa738..8a8bf79ef044 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -28,7 +28,6 @@
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/systemcfg.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/bug.h>
@@ -1697,25 +1696,14 @@ _GLOBAL(pmac_secondary_start)
* SPRG3 = paca virtual address
*/
_GLOBAL(__secondary_start)
+ /* Set thread priority to MEDIUM */
+ HMT_MEDIUM
- HMT_MEDIUM /* Set thread priority to MEDIUM */
-
+ /* Load TOC */
ld r2,PACATOC(r13)
- li r6,0
- stb r6,PACAPROCENABLED(r13)
-
-#ifndef CONFIG_PPC_ISERIES
- /* Initialize the page table pointer register. */
- LOADADDR(r6,_SDR1)
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-#endif
- /* Initialize the first segment table (or SLB) entry */
- ld r3,PACASTABVIRT(r13) /* get addr of segment table */
-BEGIN_FTR_SECTION
- bl .stab_initialize
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
- bl .slb_initialize
+
+ /* Do early setup for that CPU (stab, slb, hash table pointer) */
+ bl .early_setup_secondary
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
@@ -1724,37 +1712,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
std r1,PACAKSAVE(r13)
- ld r3,PACASTABREAL(r13) /* get raddr of segment table */
- ori r4,r3,1 /* turn on valid bit */
-
-#ifdef CONFIG_PPC_ISERIES
- li r0,-1 /* hypervisor call */
- li r3,1
- sldi r3,r3,63 /* 0x8000000000000000 */
- ori r3,r3,4 /* 0x8000000000000004 */
- sc /* HvCall_setASR */
-#else
- /* set the ASR */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
-#endif
+ /* Clear backchain so we get nice backtraces */
li r7,0
mtlr r7
@@ -1777,6 +1735,7 @@ _GLOBAL(start_secondary_prolog)
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
bl .start_secondary
+ b .
#endif
/*
@@ -1896,40 +1855,6 @@ _STATIC(start_here_multiplatform)
mr r3,r31
bl .early_setup
- /* set the ASR */
- ld r3,PACASTABREAL(r13)
- ori r4,r3,1 /* turn on valid bit */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
- beq 98f /* branch if result is 0 */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmpwi r3,0x37 /* SStar */
- beq 97f
- cmpwi r3,0x36 /* IStar */
- beq 97f
- cmpwi r3,0x34 /* Pulsar */
- bne 98f
-97: li r3,H_SET_ASR /* hcall = H_SET_ASR */
- HVSC /* Invoking hcall */
- b 99f
-98: /* !(rpa hypervisor) || !(star) */
- mtasr r4 /* set the stab location */
-99:
- /* Set SDR1 (hash table pointer) */
- ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
- ld r3,0(r3)
- lwz r3,PLATFORM(r3) /* r3 = platform flags */
- /* Test if bit 0 is set (LPAR bit) */
- andi. r3,r3,PLATFORM_LPAR
- bne 98f /* branch if result is !0 */
- LOADADDR(r6,_SDR1) /* Only if NOT LPAR */
- add r6,r6,r26
- ld r6,0(r6) /* get the value of _SDR1 */
- mtspr SPRN_SDR1,r6 /* set the htab location */
-98:
LOADADDR(r3,.start_here_common)
SET_REG_TO_CONST(r4, MSR_KERNEL)
mtspr SPRN_SRR0,r3
diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c
new file mode 100644
index 000000000000..3fa6a93adbd0
--- /dev/null
+++ b/arch/powerpc/kernel/ioctl32.c
@@ -0,0 +1,49 @@
+/*
+ * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
+ *
+ * Based on sparc64 ioctl32.c by:
+ *
+ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
+ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
+ *
+ * ppc64 changes:
+ *
+ * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com)
+ * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * ioctls.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define INCLUDES
+#include "compat_ioctl.c"
+#include <linux/syscalls.h>
+
+#define CODE
+#include "compat_ioctl.c"
+
+#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL },
+#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
+
+#define IOCTL_TABLE_START \
+ struct ioctl_trans ioctl_start[] = {
+#define IOCTL_TABLE_END \
+ };
+
+IOCTL_TABLE_START
+#include <linux/compat_ioctl.h>
+#define DECLARES
+#include "compat_ioctl.c"
+
+/* Little p (/dev/rtc, /dev/envctrl, etc.) */
+COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
+COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
+
+IOCTL_TABLE_END
+
+int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
new file mode 100644
index 000000000000..4b7940693f3d
--- /dev/null
+++ b/arch/powerpc/kernel/irq.c
@@ -0,0 +1,478 @@
+/*
+ * arch/ppc/kernel/irq.c
+ *
+ * Derived from arch/i386/kernel/irq.c
+ * Copyright (C) 1992 Linus Torvalds
+ * Adapted from arch/i386 by Gary Thomas
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * The MPC8xx has an interrupt mask in the SIU. If a bit is set, the
+ * interrupt is _enabled_. As expected, IRQ0 is bit 0 in the 32-bit
+ * mask register (of which only 16 are defined), hence the weird shifting
+ * and complement of the cached_irq_mask. I want to be able to stuff
+ * this right into the SIU SMASK register.
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * to reduce code space and undefined function references.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/random.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#ifdef CONFIG_PPC64
+#include <linux/kallsyms.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#ifdef CONFIG_PPC64
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+#endif
+
+static int ppc_spurious_interrupts;
+
+#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP)
+extern void iSeries_smp_message_recv(struct pt_regs *);
+#endif
+
+#ifdef CONFIG_PPC32
+#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
+
+unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+atomic_t ppc_n_lost_interrupts;
+
+#ifdef CONFIG_TAU_INT
+extern int tau_initialized;
+extern int tau_interrupts(int);
+#endif
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+extern atomic_t ipi_recv;
+extern atomic_t ipi_sent;
+#endif
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(irq_desc);
+
+int distribute_irqs = 1;
+int __irq_offset_value;
+u64 ppc64_interrupt_controller;
+#endif /* CONFIG_PPC64 */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *)v, j;
+ struct irqaction *action;
+ irq_desc_t *desc;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_puts(p, " ");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%d ", j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ desc = get_irq_desc(i);
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
+ if (!action || !action->handler)
+ goto skip;
+ seq_printf(p, "%3d: ", i);
+#ifdef CONFIG_SMP
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#else
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#endif /* CONFIG_SMP */
+ if (desc->handler)
+ seq_printf(p, " %s ", desc->handler->typename);
+ else
+ seq_puts(p, " None ");
+ seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge ");
+ seq_printf(p, " %s", action->name);
+ for (action = action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ } else if (i == NR_IRQS) {
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_TAU_INT
+ if (tau_initialized){
+ seq_puts(p, "TAU: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", tau_interrupts(j));
+ seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
+ }
+#endif
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
+ /* should this be per processor send/receive? */
+ seq_printf(p, "IPI (recv/sent): %10u/%u\n",
+ atomic_read(&ipi_recv), atomic_read(&ipi_sent));
+#endif
+#endif /* CONFIG_PPC32 */
+ seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for_each_irq(irq) {
+ cpumask_t mask;
+
+ if (irq_desc[irq].status & IRQ_PER_CPU)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+}
+#endif
+
+#ifdef CONFIG_PPC_ISERIES
+void do_IRQ(struct pt_regs *regs)
+{
+ struct paca_struct *lpaca;
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ lpaca = get_paca();
+#ifdef CONFIG_SMP
+ if (lpaca->lppaca.int_dword.fields.ipi_cnt) {
+ lpaca->lppaca.int_dword.fields.ipi_cnt = 0;
+ iSeries_smp_message_recv(regs);
+ }
+#endif /* CONFIG_SMP */
+ if (hvlpevent_is_pending())
+ process_hvlpevents(regs);
+
+ irq_exit();
+
+ if (lpaca->lppaca.int_dword.fields.decr_int) {
+ lpaca->lppaca.int_dword.fields.decr_int = 0;
+ /* Signal a fake decrementer interrupt */
+ timer_interrupt(regs);
+ }
+}
+
+#else /* CONFIG_PPC_ISERIES */
+
+void do_IRQ(struct pt_regs *regs)
+{
+ int irq;
+#ifdef CONFIG_IRQSTACKS
+ struct thread_info *curtp, *irqtp;
+#endif
+
+ irq_enter();
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 2KB free? */
+ {
+ long sp;
+
+ sp = __get_SP() & (THREAD_SIZE-1);
+
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+ /*
+ * Every platform is required to implement ppc_md.get_irq.
+ * This function will either return an irq number or -1 to
+ * indicate there are no more pending.
+ * The value -2 is for buggy hardware and means that this IRQ
+ * has already been handled. -- Tom
+ */
+ irq = ppc_md.get_irq(regs);
+
+ if (irq >= 0) {
+#ifdef CONFIG_IRQSTACKS
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[smp_processor_id()];
+ if (curtp != irqtp) {
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ call___do_IRQ(irq, regs, irqtp);
+ irqtp->task = NULL;
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
+ } else
+#endif
+ __do_IRQ(irq, regs);
+ } else
+#ifdef CONFIG_PPC32
+ if (irq != -2)
+#endif
+ /* That's not SMP safe ... but who cares ? */
+ ppc_spurious_interrupts++;
+ irq_exit();
+}
+
+#endif /* CONFIG_PPC_ISERIES */
+
+void __init init_IRQ(void)
+{
+#ifdef CONFIG_PPC64
+ static int once = 0;
+
+ if (once)
+ return;
+
+ once++;
+
+#endif
+ ppc_md.init_IRQ();
+#ifdef CONFIG_PPC64
+ irq_ctx_init();
+#endif
+}
+
+#ifdef CONFIG_PPC64
+#ifndef CONFIG_PPC_ISERIES
+/*
+ * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
+ */
+
+#define UNDEFINED_IRQ 0xffffffff
+unsigned int virt_irq_to_real_map[NR_IRQS];
+
+/*
+ * Don't use virtual irqs 0, 1, 2 for devices.
+ * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
+ * and 2 is the XICS IPI interrupt.
+ * We limit virtual irqs to 17 less than NR_IRQS so that when we
+ * offset them by 16 (to reserve the first 16 for ISA interrupts)
+ * we don't end up with an interrupt number >= NR_IRQS.
+ */
+#define MIN_VIRT_IRQ 3
+#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1)
+#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1)
+
+void
+virt_irq_init(void)
+{
+ int i;
+ for (i = 0; i < NR_IRQS; i++)
+ virt_irq_to_real_map[i] = UNDEFINED_IRQ;
+}
+
+/* Create a mapping for a real_irq if it doesn't already exist.
+ * Return the virtual irq as a convenience.
+ */
+int virt_irq_create_mapping(unsigned int real_irq)
+{
+ unsigned int virq, first_virq;
+ static int warned;
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC)
+ return real_irq; /* no mapping for openpic (for now) */
+
+ if (ppc64_interrupt_controller == IC_CELL_PIC)
+ return real_irq; /* no mapping for iic either */
+
+ /* don't map interrupts < MIN_VIRT_IRQ */
+ if (real_irq < MIN_VIRT_IRQ) {
+ virt_irq_to_real_map[real_irq] = real_irq;
+ return real_irq;
+ }
+
+ /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */
+ virq = real_irq;
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ /* search for this number or a free slot */
+ first_virq = virq;
+ while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+ if (++virq > MAX_VIRT_IRQ)
+ virq = MIN_VIRT_IRQ;
+ if (virq == first_virq)
+ goto nospace; /* oops, no free slots */
+ }
+
+ virt_irq_to_real_map[virq] = real_irq;
+ return virq;
+
+ nospace:
+ if (!warned) {
+ printk(KERN_CRIT "Interrupt table is full\n");
+ printk(KERN_CRIT "Increase NR_IRQS (currently %d) "
+ "in your kernel sources and rebuild.\n", NR_IRQS);
+ warned = 1;
+ }
+ return NO_IRQ;
+}
+
+/*
+ * In most cases will get a hit on the very first slot checked in the
+ * virt_irq_to_real_map. Only when there are a large number of
+ * IRQs will this be expensive.
+ */
+unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
+{
+ unsigned int virq;
+ unsigned int first_virq;
+
+ virq = real_irq;
+
+ if (virq > MAX_VIRT_IRQ)
+ virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
+
+ first_virq = virq;
+
+ do {
+ if (virt_irq_to_real_map[virq] == real_irq)
+ return virq;
+
+ virq++;
+
+ if (virq >= MAX_VIRT_IRQ)
+ virq = 0;
+
+ } while (first_virq != virq);
+
+ return NO_IRQ;
+
+}
+
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_IRQSTACKS
+struct thread_info *softirq_ctx[NR_CPUS];
+struct thread_info *hardirq_ctx[NR_CPUS];
+
+void irq_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i;
+
+ for_each_cpu(i) {
+ memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
+ tp = softirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = SOFTIRQ_OFFSET;
+
+ memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
+ tp = hardirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = HARDIRQ_OFFSET;
+ }
+}
+
+void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curtp, *irqtp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curtp = current_thread_info();
+ irqtp = softirq_ctx[smp_processor_id()];
+ irqtp->task = curtp->task;
+ call_do_softirq(irqtp);
+ irqtp->task = NULL;
+ }
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(do_softirq);
+
+#endif /* CONFIG_IRQSTACKS */
+
+static int __init setup_noirqdistrib(char *str)
+{
+ distribute_irqs = 0;
+ return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
new file mode 100644
index 000000000000..1b3ba8a440a6
--- /dev/null
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -0,0 +1,609 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ * Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/iseries/it_exp_vpd_panel.h>
+#include <asm/prom.h>
+#include <asm/vdso_datapage.h>
+
+#define MODULE_VERS "1.6"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/* find a better place for this function... */
+static void log_plpar_hcall_return(unsigned long rc, char *tag)
+{
+ if (rc == 0) /* success, return */
+ return;
+/* check for null tag ? */
+ if (rc == H_Hardware)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with hardware fault\n", tag);
+ else if (rc == H_Function)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; function not allowed\n", tag);
+ else if (rc == H_Authority)
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed; not authorized to this function\n",
+ tag);
+ else if (rc == H_Parameter)
+ printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
+ tag);
+ else
+ printk(KERN_INFO
+ "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
+ tag, rc);
+
+}
+
+static struct proc_dir_entry *proc_ppc64_lparcfg;
+#define LPARCFG_BUFF_SIZE 4096
+
+#ifdef CONFIG_PPC_ISERIES
+
+/*
+ * For iSeries legacy systems, the PPA purr function is available from the
+ * emulated_time_base field in the paca.
+ */
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct paca_struct *lpaca;
+
+ for_each_cpu(cpu) {
+ lpaca = paca + cpu;
+ sum_purr += lpaca->lppaca.emulated_time_base;
+
+#ifdef PURR_DEBUG
+ printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
+ cpu, lpaca->lppaca.emulated_time_base);
+#endif
+ }
+ return sum_purr;
+}
+
+#define lparcfg_write NULL
+
+/*
+ * Methods used to fetch LPAR data when running on an iSeries platform.
+ */
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ unsigned long pool_id, lp_index;
+ int shared, entitled_capacity, max_entitled_capacity;
+ int processors, max_processors;
+ struct paca_struct *lpaca = get_paca();
+ unsigned long purr = get_purr();
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ shared = (int)(lpaca->lppaca_ptr->shared_proc);
+ seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
+ e2a(xItExtVpdPanel.mfgID[2]),
+ e2a(xItExtVpdPanel.mfgID[3]),
+ e2a(xItExtVpdPanel.systemSerial[1]),
+ e2a(xItExtVpdPanel.systemSerial[2]),
+ e2a(xItExtVpdPanel.systemSerial[3]),
+ e2a(xItExtVpdPanel.systemSerial[4]),
+ e2a(xItExtVpdPanel.systemSerial[5]));
+
+ seq_printf(m, "system_type=%c%c%c%c\n",
+ e2a(xItExtVpdPanel.machineType[0]),
+ e2a(xItExtVpdPanel.machineType[1]),
+ e2a(xItExtVpdPanel.machineType[2]),
+ e2a(xItExtVpdPanel.machineType[3]));
+
+ lp_index = HvLpConfig_getLpIndex();
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ seq_printf(m, "system_active_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ (int)HvLpConfig_getSystemPhysicalProcessors());
+
+ processors = (int)HvLpConfig_getPhysicalProcessors();
+ seq_printf(m, "partition_active_processors=%d\n", processors);
+
+ max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
+ seq_printf(m, "partition_potential_processors=%d\n", max_processors);
+
+ if (shared) {
+ entitled_capacity = HvLpConfig_getSharedProcUnits();
+ max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
+ } else {
+ entitled_capacity = processors * 100;
+ max_entitled_capacity = max_processors * 100;
+ }
+ seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ max_entitled_capacity);
+
+ if (shared) {
+ pool_id = HvLpConfig_getSharedPoolIndex();
+ seq_printf(m, "pool=%d\n", (int)pool_id);
+ seq_printf(m, "pool_capacity=%d\n",
+ (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
+ 100));
+ seq_printf(m, "purr=%ld\n", purr);
+ }
+
+ seq_printf(m, "shared_processor_mode=%d\n", shared);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ * entitled_capacity,unallocated_capacity,
+ * aggregation, resource_capability).
+ *
+ * R4 = Entitled Processor Capacity Percentage.
+ * R5 = Unallocated Processor Capacity Percentage.
+ * R6 (AABBCCDDEEFFGGHH).
+ * XXXX - reserved (0)
+ * XXXX - reserved (0)
+ * XXXX - Group Number
+ * XXXX - Pool Number.
+ * R7 (IIJJKKLLMMNNOOPP).
+ * XX - reserved. (0)
+ * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
+ * XX - variable processor Capacity Weight
+ * XX - Unallocated Variable Processor Capacity Weight.
+ * XXXX - Active processors in Physical Processor Pool.
+ * XXXX - Processors active on platform.
+ */
+static unsigned int h_get_ppp(unsigned long *entitled,
+ unsigned long *unallocated,
+ unsigned long *aggregation,
+ unsigned long *resource)
+{
+ unsigned long rc;
+ rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
+ aggregation, resource);
+
+ log_plpar_hcall_return(rc, "H_GET_PPP");
+
+ return rc;
+}
+
+static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+{
+ unsigned long rc;
+ unsigned long dummy;
+ rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+
+ if (rc != H_Authority)
+ log_plpar_hcall_return(rc, "H_PIC");
+}
+
+/* Track sum of all purrs across all processors. This is used to further */
+/* calculate usage values by different applications */
+
+static unsigned long get_purr(void)
+{
+ unsigned long sum_purr = 0;
+ int cpu;
+ struct cpu_usage *cu;
+
+ for_each_cpu(cpu) {
+ cu = &per_cpu(cpu_usage_array, cpu);
+ sum_purr += cu->current_tb;
+ }
+ return sum_purr;
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call. Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+ int call_status;
+
+ char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!local_buffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ return;
+ }
+
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ SPLPAR_CHARACTERISTICS_TOKEN,
+ __pa(rtas_data_buf));
+ memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (call_status != 0) {
+ printk(KERN_INFO
+ "%s %s Error calling get-system-parameter (0x%x)\n",
+ __FILE__, __FUNCTION__, call_status);
+ } else {
+ int splpar_strlen;
+ int idx, w_idx;
+ char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+ if (!workbuffer) {
+ printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
+ __FILE__, __FUNCTION__, __LINE__);
+ kfree(local_buffer);
+ return;
+ }
+#ifdef LPARCFG_DEBUG
+ printk(KERN_INFO "success calling get-system-parameter \n");
+#endif
+ splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
+ local_buffer += 2; /* step over strlen value */
+
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ w_idx = 0;
+ idx = 0;
+ while ((*local_buffer) && (idx < splpar_strlen)) {
+ workbuffer[w_idx++] = local_buffer[idx++];
+ if ((local_buffer[idx] == ',')
+ || (local_buffer[idx] == '\0')) {
+ workbuffer[w_idx] = '\0';
+ if (w_idx) {
+ /* avoid the empty string */
+ seq_printf(m, "%s\n", workbuffer);
+ }
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ idx++; /* skip the comma */
+ w_idx = 0;
+ } else if (local_buffer[idx] == '=') {
+ /* code here to replace workbuffer contents
+ with different keyword strings */
+ if (0 == strcmp(workbuffer, "MaxEntCap")) {
+ strcpy(workbuffer,
+ "partition_max_entitled_capacity");
+ w_idx = strlen(workbuffer);
+ }
+ if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+ strcpy(workbuffer,
+ "system_potential_processors");
+ w_idx = strlen(workbuffer);
+ }
+ }
+ }
+ kfree(workbuffer);
+ local_buffer -= 2; /* back up over strlen value */
+ }
+ kfree(local_buffer);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+ struct device_node *cpus_dn = NULL;
+ int count = 0;
+
+ while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+ printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
+#endif
+ count++;
+ }
+ return count;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ int partition_potential_processors;
+ int partition_active_processors;
+ struct device_node *rootdn;
+ const char *model = "";
+ const char *system_id = "";
+ unsigned int *lp_index_ptr, lp_index = 0;
+ struct device_node *rtas_node;
+ int *lrdrp;
+
+ rootdn = find_path_device("/");
+ if (rootdn) {
+ model = get_property(rootdn, "model", NULL);
+ system_id = get_property(rootdn, "system-id", NULL);
+ lp_index_ptr = (unsigned int *)
+ get_property(rootdn, "ibm,partition-no", NULL);
+ if (lp_index_ptr)
+ lp_index = *lp_index_ptr;
+ }
+
+ seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
+
+ seq_printf(m, "serial_number=%s\n", system_id);
+
+ seq_printf(m, "system_type=%s\n", model);
+
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ rtas_node = find_path_device("/rtas");
+ lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+ if (lrdrp == NULL) {
+ partition_potential_processors = vdso_data->processorCount;
+ } else {
+ partition_potential_processors = *(lrdrp + 4);
+ }
+
+ partition_active_processors = lparcfg_count_active_processors();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ unsigned long h_entitled, h_unallocated;
+ unsigned long h_aggregation, h_resource;
+ unsigned long pool_idle_time, pool_procs;
+ unsigned long purr;
+
+ h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
+ &h_resource);
+
+ seq_printf(m, "R4=0x%lx\n", h_entitled);
+ seq_printf(m, "R5=0x%lx\n", h_unallocated);
+ seq_printf(m, "R6=0x%lx\n", h_aggregation);
+ seq_printf(m, "R7=0x%lx\n", h_resource);
+
+ purr = get_purr();
+
+ /* this call handles the ibm,get-system-parameter contents */
+ parse_system_parameter_string(m);
+
+ seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
+
+ seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
+
+ seq_printf(m, "system_active_processors=%ld\n",
+ (h_resource >> 0 * 8) & 0xffff);
+
+ /* pool related entries are apropriate for shared configs */
+ if (paca[0].lppaca.shared_proc) {
+
+ h_pic(&pool_idle_time, &pool_procs);
+
+ seq_printf(m, "pool=%ld\n",
+ (h_aggregation >> 0 * 8) & 0xffff);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%ld\n",
+ ((h_resource >> 2 * 8) & 0xffff) * 100);
+
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%ld\n",
+ (h_resource >> 4 * 8) & 0xFF);
+
+ seq_printf(m, "capacity_weight=%ld\n",
+ (h_resource >> 5 * 8) & 0xFF);
+
+ seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
+
+ seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
+
+ seq_printf(m, "purr=%ld\n", purr);
+
+ } else { /* non SPLPAR case */
+
+ seq_printf(m, "system_active_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ partition_potential_processors * 100);
+
+ seq_printf(m, "partition_entitled_capacity=%d\n",
+ partition_active_processors * 100);
+ }
+
+ seq_printf(m, "partition_active_processors=%d\n",
+ partition_active_processors);
+
+ seq_printf(m, "partition_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
+
+ return 0;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity). Format of input is "param_name=value";
+ * anything after value is ignored. Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight". We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+ size_t count, loff_t * off)
+{
+ char *kbuf;
+ char *tmp;
+ u64 new_entitled, *new_entitled_ptr = &new_entitled;
+ u8 new_weight, *new_weight_ptr = &new_weight;
+
+ unsigned long current_entitled; /* parameters for h_get_ppp */
+ unsigned long dummy;
+ unsigned long resource;
+ u8 current_weight;
+
+ ssize_t retval = -ENOMEM;
+
+ kbuf = kmalloc(count, GFP_KERNEL);
+ if (!kbuf)
+ goto out;
+
+ retval = -EFAULT;
+ if (copy_from_user(kbuf, buf, count))
+ goto out;
+
+ retval = -EINVAL;
+ kbuf[count - 1] = '\0';
+ tmp = strchr(kbuf, '=');
+ if (!tmp)
+ goto out;
+
+ *tmp++ = '\0';
+
+ if (!strcmp(kbuf, "partition_entitled_capacity")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_weight_ptr = &current_weight;
+ } else if (!strcmp(kbuf, "capacity_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ goto out;
+ new_entitled_ptr = &current_entitled;
+ } else
+ goto out;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
+ if (retval) {
+ retval = -EIO;
+ goto out;
+ }
+
+ current_weight = (resource >> 5 * 8) & 0xFF;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
+ __FUNCTION__, current_entitled, current_weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
+ __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
+
+ retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
+ *new_weight_ptr);
+
+ if (retval == H_Success || retval == H_Constrained) {
+ retval = count;
+ } else if (retval == H_Busy) {
+ retval = -EBUSY;
+ } else if (retval == H_Hardware) {
+ retval = -EIO;
+ } else if (retval == H_Parameter) {
+ retval = -EINVAL;
+ } else {
+ printk(KERN_WARNING "%s: received unknown hv return code %ld",
+ __FUNCTION__, retval);
+ retval = -EIO;
+ }
+
+out:
+ kfree(kbuf);
+ return retval;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lparcfg_data, NULL);
+}
+
+struct file_operations lparcfg_fops = {
+ .owner = THIS_MODULE,
+ .read = seq_read,
+ .open = lparcfg_open,
+ .release = single_release,
+};
+
+int __init lparcfg_init(void)
+{
+ struct proc_dir_entry *ent;
+ mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+ /* Allow writing if we have FW_FEATURE_SPLPAR */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ lparcfg_fops.write = lparcfg_write;
+ mode |= S_IWUSR;
+ }
+
+ ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
+ if (ent) {
+ ent->proc_fops = &lparcfg_fops;
+ ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
+ if (!ent->data) {
+ printk(KERN_ERR
+ "Failed to allocate buffer for lparcfg\n");
+ remove_proc_entry("lparcfg", ent->parent);
+ return -ENOMEM;
+ }
+ } else {
+ printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
+ return -EIO;
+ }
+
+ proc_ppc64_lparcfg = ent;
+ return 0;
+}
+
+void __exit lparcfg_cleanup(void)
+{
+ if (proc_ppc64_lparcfg) {
+ kfree(proc_ppc64_lparcfg->data);
+ remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
+ }
+}
+
+module_init(lparcfg_init);
+module_exit(lparcfg_cleanup);
+MODULE_DESCRIPTION("Interface for LPAR configuration data");
+MODULE_AUTHOR("Dave Engebretsen");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 3bedb532aed9..f6d84a75ed26 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -519,7 +519,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_GLOBAL(flush_icache_range)
+_GLOBAL(__flush_icache_range)
BEGIN_FTR_SECTION
blr /* for 601, do nothing */
END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
@@ -607,27 +607,6 @@ _GLOBAL(invalidate_dcache_range)
sync /* wait for dcbi's to get to ram */
blr
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * 40x cores have 8K or 16K dcache and 32 byte line size.
- * 44x has a 32K dcache and 32 byte line size.
- * 8xx has 1, 2, 4, 8K variants.
- * For now, cover the worst case of the 44x.
- * Must be called with external interrupts disabled.
- */
-#define CACHE_NWAYS 64
-#define CACHE_NLINES 16
-
-_GLOBAL(flush_dcache_all)
- li r4, (2 * CACHE_NWAYS * CACHE_NLINES)
- mtctr r4
- lis r5, KERNELBASE@h
-1: lwz r3, 0(r5) /* Load one word from every line */
- addi r5, r5, L1_CACHE_BYTES
- bdnz 1b
- blr
-#endif /* CONFIG_NOT_COHERENT_CACHE */
-
/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index ae1433da09b2..ae48a002f81a 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -89,12 +89,12 @@ _GLOBAL(call_do_softirq)
mtlr r0
blr
-_GLOBAL(call_handle_IRQ_event)
+_GLOBAL(call___do_IRQ)
mflr r0
std r0,16(r1)
- stdu r1,THREAD_SIZE-112(r6)
- mr r1,r6
- bl .handle_IRQ_event
+ stdu r1,THREAD_SIZE-112(r5)
+ mr r1,r5
+ bl .__do_IRQ
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
new file mode 100644
index 000000000000..a7b68f911eb1
--- /dev/null
+++ b/arch/powerpc/kernel/paca.c
@@ -0,0 +1,135 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/module.h>
+
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/lppaca.h>
+#include <asm/iseries/it_lp_queue.h>
+#include <asm/paca.h>
+
+
+/* This symbol is provided by the linker - let it fill in the paca
+ * field correctly */
+extern unsigned long __toc_start;
+
+/* The Paca is an array with one entry per processor. Each contains an
+ * lppaca, which contains the information shared between the
+ * hypervisor and Linux. Each also contains an ItLpRegSave area which
+ * is used by the hypervisor to save registers.
+ * On systems with hardware multi-threading, there are two threads
+ * per processor. The Paca array must contain an entry for each thread.
+ * The VPD Areas will give a max logical processors = 2 * max physical
+ * processors. The processor VPD array needs one entry per physical
+ * processor (not thread).
+ */
+#define PACA_INIT_COMMON(number, start, asrr, asrv) \
+ .lock_token = 0x8000, \
+ .paca_index = (number), /* Paca Index */ \
+ .default_decr = 0x00ff0000, /* Initial Decr */ \
+ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
+ .stab_real = (asrr), /* Real pointer to segment table */ \
+ .stab_addr = (asrv), /* Virt pointer to segment table */ \
+ .cpu_start = (start), /* Processor start */ \
+ .hw_cpu_id = 0xffff, \
+ .lppaca = { \
+ .desc = 0xd397d781, /* "LpPa" */ \
+ .size = sizeof(struct lppaca), \
+ .dyn_proc_status = 2, \
+ .decr_val = 0x00ff0000, \
+ .fpregs_in_use = 1, \
+ .end_of_quantum = 0xfffffffffffffffful, \
+ .slb_count = 64, \
+ .vmxregs_in_use = 0, \
+ }, \
+
+#ifdef CONFIG_PPC_ISERIES
+#define PACA_INIT_ISERIES(number) \
+ .lppaca_ptr = &paca[number].lppaca, \
+ .reg_save_ptr = &paca[number].reg_save, \
+ .reg_save = { \
+ .xDesc = 0xd397d9e2, /* "LpRS" */ \
+ .xSize = sizeof(struct ItLpRegSave) \
+ }
+
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \
+ PACA_INIT_ISERIES(number) \
+}
+
+#else
+#define PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 0, 0, 0) \
+}
+
+#define BOOTCPU_PACA_INIT(number) \
+{ \
+ PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \
+}
+#endif
+
+struct paca_struct paca[] = {
+ BOOTCPU_PACA_INIT(0),
+#if NR_CPUS > 1
+ PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
+#if NR_CPUS > 4
+ PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
+#if NR_CPUS > 8
+ PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
+ PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
+ PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
+ PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
+ PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
+ PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
+#if NR_CPUS > 32
+ PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
+ PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
+ PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
+ PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
+ PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
+ PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
+ PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
+ PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
+#if NR_CPUS > 64
+ PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
+ PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
+ PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
+ PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
+ PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
+ PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
+ PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
+ PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
+ PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
+ PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
+ PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
+ PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
+ PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
+ PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
+ PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
+ PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
+#endif
+#endif
+#endif
+#endif
+#endif
+};
+EXPORT_SYMBOL(paca);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 47d6f7e2ea9f..5dcf4ba05ee8 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -44,6 +44,7 @@
#include <asm/cputable.h>
#include <asm/btext.h>
#include <asm/div64.h>
+#include <asm/signal.h>
#ifdef CONFIG_8xx
#include <asm/commproc.h>
@@ -56,7 +57,6 @@ extern void machine_check_exception(struct pt_regs *regs);
extern void alignment_exception(struct pt_regs *regs);
extern void program_check_exception(struct pt_regs *regs);
extern void single_step_exception(struct pt_regs *regs);
-extern int do_signal(sigset_t *, struct pt_regs *);
extern int pmac_newworld;
extern int sys_sigreturn(struct pt_regs *regs);
@@ -188,9 +188,6 @@ EXPORT_SYMBOL(adb_try_handler_change);
EXPORT_SYMBOL(cuda_request);
EXPORT_SYMBOL(cuda_poll);
#endif /* CONFIG_ADB_CUDA */
-#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_PPC32)
-EXPORT_SYMBOL(_machine);
-#endif
#ifdef CONFIG_PPC_PMAC
EXPORT_SYMBOL(sys_ctrler);
#endif
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
new file mode 100644
index 000000000000..7ba42a405f41
--- /dev/null
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+#include <asm/vdso_datapage.h>
+#include <asm/rtas.h>
+#include <asm/uaccess.h>
+#include <asm/prom.h>
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence);
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos);
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
+
+static struct file_operations page_map_fops = {
+ .llseek = page_map_seek,
+ .read = page_map_read,
+ .mmap = page_map_mmap
+};
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+ struct proc_dir_entry *root;
+
+ root = proc_mkdir("ppc64", NULL);
+ if (!root)
+ return 1;
+
+ if (!(platform_is_pseries() || _machine == PLATFORM_CELL))
+ return 0;
+
+ if (!proc_mkdir("rtas", root))
+ return 1;
+
+ if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
+ return 1;
+
+ return 0;
+}
+core_initcall(proc_ppc64_create);
+
+static int __init proc_ppc64_init(void)
+{
+ struct proc_dir_entry *pde;
+
+ pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
+ if (!pde)
+ return 1;
+ pde->nlink = 1;
+ pde->data = vdso_data;
+ pde->size = PAGE_SIZE;
+ pde->proc_fops = &page_map_fops;
+
+ return 0;
+}
+__initcall(proc_ppc64_init);
+
+static loff_t page_map_seek( struct file *file, loff_t off, int whence)
+{
+ loff_t new;
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ switch(whence) {
+ case 0:
+ new = off;
+ break;
+ case 1:
+ new = file->f_pos + off;
+ break;
+ case 2:
+ new = dp->size + off;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if ( new < 0 || new > dp->size )
+ return -EINVAL;
+ return (file->f_pos = new);
+}
+
+static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
+ loff_t *ppos)
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+ return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
+}
+
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+ struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+
+ vma->vm_flags |= VM_SHM | VM_LOCKED;
+
+ if ((vma->vm_end - vma->vm_start) > dp->size)
+ return -EINVAL;
+
+ remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
+ dp->size, vma->vm_page_prot);
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f645adb57534..6a5b468edb4d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -48,9 +48,6 @@
#include <asm/machdep.h>
#include <asm/pSeries_reconfig.h>
#include <asm/pci-bridge.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
#ifdef DEBUG
#define DBG(fmt...) printk(KERN_ERR fmt)
@@ -74,10 +71,6 @@ struct isa_reg_property {
typedef int interpret_func(struct device_node *, unsigned long *,
int, int, int);
-extern struct rtas_t rtas;
-extern struct lmb lmb;
-extern unsigned long klimit;
-
static int __initdata dt_root_addr_cells;
static int __initdata dt_root_size_cells;
@@ -391,7 +384,7 @@ static int __devinit finish_node_interrupts(struct device_node *np,
#ifdef CONFIG_PPC64
/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
- if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) {
+ if (_machine == PLATFORM_POWERMAC && ic && ic->parent) {
char *name = get_property(ic->parent, "name", NULL);
if (name && !strcmp(name, "u3"))
np->intrs[intrcount].line += 128;
@@ -1087,9 +1080,9 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = of_get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
- unsigned long size = 0;
+ unsigned long size;
+ char *type = of_get_flat_dt_prop(node, "device_type", &size);
/* We are scanning "cpu" nodes only */
if (type == NULL || strcmp(type, "cpu") != 0)
@@ -1115,7 +1108,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
- prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size);
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1161,13 +1154,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
-#ifdef CONFIG_PPC64
- systemcfg->platform = *prop;
-#else
#ifdef CONFIG_PPC_MULTIPLATFORM
_machine = *prop;
#endif
-#endif
#ifdef CONFIG_PPC64
/* check if iommu is forced on or off */
@@ -1264,7 +1253,14 @@ static int __init early_init_dt_scan_memory(unsigned long node,
unsigned long l;
/* We are scanning "memory" nodes only */
- if (type == NULL || strcmp(type, "memory") != 0)
+ if (type == NULL) {
+ /*
+ * The longtrail doesn't have a device_type on the
+ * /memory node, so look for the node called /memory@0.
+ */
+ if (depth != 1 || strcmp(uname, "memory@0") != 0)
+ return 0;
+ } else if (strcmp(type, "memory") != 0)
return 0;
reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
@@ -1339,9 +1335,6 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
-#ifdef CONFIG_PPC64
- systemcfg->physicalMemorySize = lmb_phys_mem_size();
-#endif
lmb_reserve(0, __pa(klimit));
DBG("Phys. mem: %lx\n", lmb_phys_mem_size());
@@ -1908,7 +1901,7 @@ static int of_finish_dynamic_node(struct device_node *node,
/* We don't support that function on PowerMac, at least
* not yet
*/
- if (systemcfg->platform == PLATFORM_POWERMAC)
+ if (_machine == PLATFORM_POWERMAC)
return -ENODEV;
/* fix up new node's linux_phandle field */
@@ -1992,9 +1985,11 @@ int prom_add_property(struct device_node* np, struct property* prop)
*next = prop;
write_unlock(&devtree_lock);
+#ifdef CONFIG_PROC_DEVICETREE
/* try to add to proc as well if it was initialized */
if (np->pde)
proc_device_tree_add_prop(np->pde, prop);
+#endif /* CONFIG_PROC_DEVICETREE */
return 0;
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 6dc33d19fc2a..4ce0105c308e 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -94,11 +94,17 @@ extern const struct linux_logo logo_linux_clut224;
#ifdef CONFIG_PPC64
#define RELOC(x) (*PTRRELOC(&(x)))
#define ADDR(x) (u32) add_reloc_offset((unsigned long)(x))
+#define OF_WORKAROUNDS 0
#else
#define RELOC(x) (x)
#define ADDR(x) (u32) (x)
+#define OF_WORKAROUNDS of_workarounds
+int of_workarounds;
#endif
+#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
+#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */
+
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
RELOC(__FILE__), __LINE__); \
@@ -111,11 +117,6 @@ extern const struct linux_logo logo_linux_clut224;
#define prom_debug(x...)
#endif
-#ifdef CONFIG_PPC32
-#define PLATFORM_POWERMAC _MACH_Pmac
-#define PLATFORM_CHRP _MACH_chrp
-#endif
-
typedef u32 prom_arg_t;
@@ -128,10 +129,11 @@ struct prom_args {
struct prom_t {
ihandle root;
- ihandle chosen;
+ phandle chosen;
int cpu;
ihandle stdout;
ihandle mmumap;
+ ihandle memory;
};
struct mem_map_entry {
@@ -360,16 +362,36 @@ static void __init prom_printf(const char *format, ...)
static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
unsigned long align)
{
- int ret;
struct prom_t *_prom = &RELOC(prom);
- ret = call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
- (prom_arg_t)align);
- if (ret != -1 && _prom->mmumap != 0)
- /* old pmacs need us to map as well */
+ if (align == 0 && (OF_WORKAROUNDS & OF_WA_CLAIM)) {
+ /*
+ * Old OF requires we claim physical and virtual separately
+ * and then map explicitly (assuming virtual mode)
+ */
+ int ret;
+ prom_arg_t result;
+
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->memory,
+ align, size, virt);
+ if (ret != 0 || result == -1)
+ return -1;
+ ret = call_prom_ret("call-method", 5, 2, &result,
+ ADDR("claim"), _prom->mmumap,
+ align, size, virt);
+ if (ret != 0) {
+ call_prom("call-method", 4, 1, ADDR("release"),
+ _prom->memory, size, virt);
+ return -1;
+ }
+ /* the 0x12 is M (coherence) + PP == read/write */
call_prom("call-method", 6, 1,
- ADDR("map"), _prom->mmumap, 0, size, virt, virt);
- return ret;
+ ADDR("map"), _prom->mmumap, 0x12, size, virt, virt);
+ return virt;
+ }
+ return call_prom("claim", 3, 1, (prom_arg_t)virt, (prom_arg_t)size,
+ (prom_arg_t)align);
}
static void __init __attribute__((noreturn)) prom_panic(const char *reason)
@@ -415,11 +437,52 @@ static int inline prom_getproplen(phandle node, const char *pname)
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static int inline prom_setprop(phandle node, const char *pname,
- void *value, size_t valuelen)
+static void add_string(char **str, const char *q)
{
- return call_prom("setprop", 4, 1, node, ADDR(pname),
- (u32)(unsigned long) value, (u32) valuelen);
+ char *p = *str;
+
+ while (*q)
+ *p++ = *q++;
+ *p++ = ' ';
+ *str = p;
+}
+
+static char *tohex(unsigned int x)
+{
+ static char digits[] = "0123456789abcdef";
+ static char result[9];
+ int i;
+
+ result[8] = 0;
+ i = 8;
+ do {
+ --i;
+ result[i] = digits[x & 0xf];
+ x >>= 4;
+ } while (x != 0 && i > 0);
+ return &result[i];
+}
+
+static int __init prom_setprop(phandle node, const char *nodename,
+ const char *pname, void *value, size_t valuelen)
+{
+ char cmd[256], *p;
+
+ if (!(OF_WORKAROUNDS & OF_WA_LONGTRAIL))
+ return call_prom("setprop", 4, 1, node, ADDR(pname),
+ (u32)(unsigned long) value, (u32) valuelen);
+
+ /* gah... setprop doesn't work on longtrail, have to use interpret */
+ p = cmd;
+ add_string(&p, "dev");
+ add_string(&p, nodename);
+ add_string(&p, tohex((u32)(unsigned long) value));
+ add_string(&p, tohex(valuelen));
+ add_string(&p, tohex(ADDR(pname)));
+ add_string(&p, tohex(strlen(RELOC(pname))));
+ add_string(&p, "property");
+ *p = 0;
+ return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
}
/* We can't use the standard versions because of RELOC headaches. */
@@ -980,7 +1043,7 @@ static void __init prom_instantiate_rtas(void)
rtas_inst = call_prom("open", 1, 1, ADDR("/rtas"));
if (!IHANDLE_VALID(rtas_inst)) {
- prom_printf("opening rtas package failed");
+ prom_printf("opening rtas package failed (%x)\n", rtas_inst);
return;
}
@@ -988,7 +1051,7 @@ static void __init prom_instantiate_rtas(void)
if (call_prom_ret("call-method", 3, 2, &entry,
ADDR("instantiate-rtas"),
- rtas_inst, base) == PROM_ERROR
+ rtas_inst, base) != 0
|| entry == 0) {
prom_printf(" failed\n");
return;
@@ -997,8 +1060,10 @@ static void __init prom_instantiate_rtas(void)
reserve_mem(base, size);
- prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base));
- prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-base",
+ &base, sizeof(base));
+ prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
+ &entry, sizeof(entry));
prom_debug("rtas base = 0x%x\n", base);
prom_debug("rtas entry = 0x%x\n", entry);
@@ -1089,10 +1154,6 @@ static void __init prom_initialize_tce_table(void)
if (base < local_alloc_bottom)
local_alloc_bottom = base;
- /* Save away the TCE table attributes for later use. */
- prom_setprop(node, "linux,tce-base", &base, sizeof(base));
- prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize));
-
/* It seems OF doesn't null-terminate the path :-( */
memset(path, 0, sizeof(path));
/* Call OF to setup the TCE hardware */
@@ -1101,6 +1162,10 @@ static void __init prom_initialize_tce_table(void)
prom_printf("package-to-path failed\n");
}
+ /* Save away the TCE table attributes for later use. */
+ prom_setprop(node, path, "linux,tce-base", &base, sizeof(base));
+ prom_setprop(node, path, "linux,tce-size", &minsize, sizeof(minsize));
+
prom_debug("TCE table: %s\n", path);
prom_debug("\tnode = 0x%x\n", node);
prom_debug("\tbase = 0x%x\n", base);
@@ -1342,6 +1407,7 @@ static void __init prom_init_client_services(unsigned long pp)
/*
* For really old powermacs, we need to map things we claim.
* For that, we need the ihandle of the mmu.
+ * Also, on the longtrail, we need to work around other bugs.
*/
static void __init prom_find_mmu(void)
{
@@ -1355,12 +1421,19 @@ static void __init prom_find_mmu(void)
if (prom_getprop(oprom, "model", version, sizeof(version)) <= 0)
return;
version[sizeof(version) - 1] = 0;
- prom_printf("OF version is '%s'\n", version);
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") != 0)
+ if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ of_workarounds = OF_WA_CLAIM;
+ else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
+ call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
+ } else
return;
+ _prom->memory = call_prom("open", 1, 1, ADDR("/memory"));
prom_getprop(_prom->chosen, "mmu", &_prom->mmumap,
sizeof(_prom->mmumap));
+ if (!IHANDLE_VALID(_prom->memory) || !IHANDLE_VALID(_prom->mmumap))
+ of_workarounds &= ~OF_WA_CLAIM; /* hmmm */
}
#else
#define prom_find_mmu()
@@ -1382,16 +1455,17 @@ static void __init prom_init_stdout(void)
memset(path, 0, 256);
call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255);
val = call_prom("instance-to-package", 1, 1, _prom->stdout);
- prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-package",
+ &val, sizeof(val));
prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device));
- prom_setprop(_prom->chosen, "linux,stdout-path",
- RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1);
+ prom_setprop(_prom->chosen, "/chosen", "linux,stdout-path",
+ path, strlen(path) + 1);
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(val, "device_type", type, sizeof(type));
if (strcmp(type, RELOC("display")) == 0)
- prom_setprop(val, "linux,boot-display", NULL, 0);
+ prom_setprop(val, path, "linux,boot-display", NULL, 0);
}
static void __init prom_close_stdin(void)
@@ -1514,7 +1588,7 @@ static void __init prom_check_displays(void)
/* Success */
prom_printf("done\n");
- prom_setprop(node, "linux,opened", NULL, 0);
+ prom_setprop(node, path, "linux,opened", NULL, 0);
/* Setup a usable color table when the appropriate
* method is available. Should update this to set-colors */
@@ -1884,9 +1958,11 @@ static void __init fixup_device_tree(void)
/* interrupt on this revision of u3 is number 0 and level */
interrupts[0] = 0;
interrupts[1] = 1;
- prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupts",
+ &interrupts, sizeof(interrupts));
parent = (u32)mpic;
- prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent));
+ prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
+ &parent, sizeof(parent));
#endif
}
@@ -1922,11 +1998,11 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4;
val = RELOC(prom_initrd_start);
- prom_setprop(_prom->chosen, "linux,initrd-start", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-start",
+ &val, sizeof(val));
val = RELOC(prom_initrd_end);
- prom_setprop(_prom->chosen, "linux,initrd-end", &val,
- sizeof(val));
+ prom_setprop(_prom->chosen, "/chosen", "linux,initrd-end",
+ &val, sizeof(val));
reserve_mem(RELOC(prom_initrd_start),
RELOC(prom_initrd_end) - RELOC(prom_initrd_start));
@@ -1969,14 +2045,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_init_client_services(pp);
/*
- * Init prom stdout device
+ * See if this OF is old enough that we need to do explicit maps
+ * and other workarounds
*/
- prom_init_stdout();
+ prom_find_mmu();
/*
- * See if this OF is old enough that we need to do explicit maps
+ * Init prom stdout device
*/
- prom_find_mmu();
+ prom_init_stdout();
/*
* Check for an initrd
@@ -1989,14 +2066,15 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
RELOC(of_platform) = prom_find_machine_type();
getprop_rval = RELOC(of_platform);
- prom_setprop(_prom->chosen, "linux,platform",
+ prom_setprop(_prom->chosen, "/chosen", "linux,platform",
&getprop_rval, sizeof(getprop_rval));
#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
- if (RELOC(of_platform) & PLATFORM_PSERIES)
+ if (RELOC(of_platform) == PLATFORM_PSERIES ||
+ RELOC(of_platform) == PLATFORM_PSERIES_LPAR)
prom_send_capabilities();
#endif
@@ -2050,21 +2128,23 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* Fill in some infos for use by the kernel later on
*/
if (RELOC(prom_memory_limit))
- prom_setprop(_prom->chosen, "linux,memory-limit",
+ prom_setprop(_prom->chosen, "/chosen", "linux,memory-limit",
&RELOC(prom_memory_limit),
sizeof(prom_memory_limit));
#ifdef CONFIG_PPC64
if (RELOC(ppc64_iommu_off))
- prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-off",
+ NULL, 0);
if (RELOC(iommu_force_on))
- prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0);
+ prom_setprop(_prom->chosen, "/chosen", "linux,iommu-force-on",
+ NULL, 0);
if (RELOC(prom_tce_alloc_start)) {
- prom_setprop(_prom->chosen, "linux,tce-alloc-start",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-start",
&RELOC(prom_tce_alloc_start),
sizeof(prom_tce_alloc_start));
- prom_setprop(_prom->chosen, "linux,tce-alloc-end",
+ prom_setprop(_prom->chosen, "/chosen", "linux,tce-alloc-end",
&RELOC(prom_tce_alloc_end),
sizeof(prom_tce_alloc_end));
}
@@ -2081,8 +2161,13 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
prom_printf("copying OF device tree ...\n");
flatten_device_tree();
- /* in case stdin is USB and still active on IBM machines... */
- prom_close_stdin();
+ /*
+ * in case stdin is USB and still active on IBM machines...
+ * Unfortunately quiesce crashes on some powermacs if we have
+ * closed stdin already (in particular the powerbook 101).
+ */
+ if (RELOC(of_platform) != PLATFORM_POWERMAC)
+ prom_close_stdin();
/*
* Call OF "quiesce" method to shut down pending DMA's from
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 5bdd5b079d96..7a95b8a28354 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -32,7 +32,6 @@
#include <asm/rtas.h>
#include <asm/machdep.h> /* for ppc_md */
#include <asm/time.h>
-#include <asm/systemcfg.h>
/* Token for Sensors */
#define KEY_SWITCH 0x0001
@@ -259,7 +258,7 @@ static int __init proc_rtas_init(void)
{
struct proc_dir_entry *entry;
- if (!(systemcfg->platform & PLATFORM_PSERIES))
+ if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR)
return 1;
rtas_node = of_find_node_by_name(NULL, "rtas");
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 9d4e07f6f1ec..4283fa33f784 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -29,9 +29,6 @@
#include <asm/delay.h>
#include <asm/uaccess.h>
#include <asm/lmb.h>
-#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
-#endif
struct rtas_t rtas = {
.lock = SPIN_LOCK_UNLOCKED
@@ -671,7 +668,7 @@ void __init rtas_initialize(void)
* the stop-self token if any
*/
#ifdef CONFIG_PPC64
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
+ if (_machine == PLATFORM_PSERIES_LPAR)
rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
#endif
rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
new file mode 100644
index 000000000000..0e5a8e116653
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -0,0 +1,513 @@
+/*
+ * arch/ppc64/kernel/rtas_pci.c
+ *
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * RTAS specific routines for PCI.
+ *
+ * Based on code from pci.c, chrp_pci.c and pSeries_pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/rtas.h>
+#include <asm/mpic.h>
+#include <asm/ppc-pci.h>
+
+/* RTAS tokens */
+static int read_pci_config;
+static int write_pci_config;
+static int ibm_read_pci_config;
+static int ibm_write_pci_config;
+
+static inline int config_access_valid(struct pci_dn *dn, int where)
+{
+ if (where < 256)
+ return 1;
+ if (where < 4096 && dn->pci_ext_config_space)
+ return 1;
+
+ return 0;
+}
+
+static int of_device_available(struct device_node * dn)
+{
+ char * status;
+
+ status = get_property(dn, "status", NULL);
+
+ if (!status)
+ return 1;
+
+ if (!strcmp(status, "okay"))
+ return 1;
+
+ return 0;
+}
+
+static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+{
+ int returnval = -1;
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
+ addr, BUID_HI(buid), BUID_LO(buid), size);
+ } else {
+ ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
+ }
+ *val = returnval;
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (returnval == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dn_check_failure (pdn->node, NULL))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_read_config(pdn, where, size, val);
+ }
+
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+{
+ unsigned long buid, addr;
+ int ret;
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!config_access_valid(pdn, where))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
+ (pdn->devfn << 8) | (where & 0xff);
+ buid = pdn->phb->buid;
+ if (buid) {
+ ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid), size, (ulong) val);
+ } else {
+ ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
+ }
+
+ if (ret)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 val)
+{
+ struct device_node *busdn, *dn;
+
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else
+ busdn = bus->sysdata; /* must be a phb */
+
+ /* Search only direct children of the bus */
+ for (dn = busdn->child; dn; dn = dn->sibling) {
+ struct pci_dn *pdn = PCI_DN(dn);
+ if (pdn && pdn->devfn == devfn
+ && of_device_available(dn))
+ return rtas_write_config(pdn, where, size, val);
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+struct pci_ops rtas_pci_ops = {
+ rtas_pci_read_config,
+ rtas_pci_write_config
+};
+
+int is_python(struct device_node *dev)
+{
+ char *model = (char *)get_property(dev, "model", NULL);
+
+ if (model && strstr(model, "Python"))
+ return 1;
+
+ return 0;
+}
+
+static int get_phb_reg_prop(struct device_node *dev,
+ unsigned int addr_size_words,
+ struct reg_property64 *reg)
+{
+ unsigned int *ui_ptr = NULL, len;
+
+ /* Found a PHB, now figure out where his registers are mapped. */
+ ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
+ if (ui_ptr == NULL)
+ return 1;
+
+ if (addr_size_words == 1) {
+ reg->address = ((struct reg_property32 *)ui_ptr)->address;
+ reg->size = ((struct reg_property32 *)ui_ptr)->size;
+ } else {
+ *reg = *((struct reg_property64 *)ui_ptr);
+ }
+
+ return 0;
+}
+
+static void python_countermeasures(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct reg_property64 reg_struct;
+ void __iomem *chip_regs;
+ volatile u32 val;
+
+ if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
+ return;
+
+ /* Python's register file is 1 MB in size. */
+ chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
+
+ /*
+ * Firmware doesn't always clear this bit which is critical
+ * for good performance - Anton
+ */
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+ val = in_be32(chip_regs + 0xf6030);
+ if (val & PRG_CL_RESET_VALID) {
+ printk(KERN_INFO "Python workaround: ");
+ val &= ~PRG_CL_RESET_VALID;
+ out_be32(chip_regs + 0xf6030, val);
+ /*
+ * We must read it back for changes to
+ * take effect
+ */
+ val = in_be32(chip_regs + 0xf6030);
+ printk("reg0: %x\n", val);
+ }
+
+ iounmap(chip_regs);
+}
+
+void __init init_pci_config_tokens (void)
+{
+ read_pci_config = rtas_token("read-pci-config");
+ write_pci_config = rtas_token("write-pci-config");
+ ibm_read_pci_config = rtas_token("ibm,read-pci-config");
+ ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+}
+
+unsigned long __devinit get_phb_buid (struct device_node *phb)
+{
+ int addr_cells;
+ unsigned int *buid_vals;
+ unsigned int len;
+ unsigned long buid;
+
+ if (ibm_read_pci_config == -1) return 0;
+
+ /* PHB's will always be children of the root node,
+ * or so it is promised by the current firmware. */
+ if (phb->parent == NULL)
+ return 0;
+ if (phb->parent->parent)
+ return 0;
+
+ buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+ if (buid_vals == NULL)
+ return 0;
+
+ addr_cells = prom_n_addr_cells(phb);
+ if (addr_cells == 1) {
+ buid = (unsigned long) buid_vals[0];
+ } else {
+ buid = (((unsigned long)buid_vals[0]) << 32UL) |
+ (((unsigned long)buid_vals[1]) & 0xffffffff);
+ }
+ return buid;
+}
+
+static int phb_set_bus_ranges(struct device_node *dev,
+ struct pci_controller *phb)
+{
+ int *bus_range;
+ unsigned int len;
+
+ bus_range = (int *) get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ return 1;
+ }
+
+ phb->first_busno = bus_range[0];
+ phb->last_busno = bus_range[1];
+
+ return 0;
+}
+
+static int __devinit setup_phb(struct device_node *dev,
+ struct pci_controller *phb,
+ unsigned int addr_size_words)
+{
+ pci_setup_pci_controller(phb);
+
+ if (is_python(dev))
+ python_countermeasures(dev, addr_size_words);
+
+ if (phb_set_bus_ranges(dev, phb))
+ return 1;
+
+ phb->arch_data = dev;
+ phb->ops = &rtas_pci_ops;
+ phb->buid = get_phb_buid(dev);
+
+ return 0;
+}
+
+static void __devinit add_linux_pci_domain(struct device_node *dev,
+ struct pci_controller *phb,
+ struct property *of_prop)
+{
+ memset(of_prop, 0, sizeof(struct property));
+ of_prop->name = "linux,pci-domain";
+ of_prop->length = sizeof(phb->global_number);
+ of_prop->value = (unsigned char *)&of_prop[1];
+ memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
+ prom_add_property(dev, of_prop);
+}
+
+static struct pci_controller * __init alloc_phb(struct device_node *dev,
+ unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+ struct property *of_prop;
+
+ phb = alloc_bootmem(sizeof(struct pci_controller));
+ if (phb == NULL)
+ return NULL;
+
+ of_prop = alloc_bootmem(sizeof(struct property) +
+ sizeof(phb->global_number));
+ if (!of_prop)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ add_linux_pci_domain(dev, phb, of_prop);
+
+ return phb;
+}
+
+static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
+{
+ struct pci_controller *phb;
+
+ phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
+ GFP_KERNEL);
+ if (phb == NULL)
+ return NULL;
+
+ if (setup_phb(dev, phb, addr_size_words))
+ return NULL;
+
+ phb->is_dynamic = 1;
+
+ /* TODO: linux,pci-domain? */
+
+ return phb;
+}
+
+unsigned long __init find_and_init_phbs(void)
+{
+ struct device_node *node;
+ struct pci_controller *phb;
+ unsigned int root_size_cells = 0;
+ unsigned int index;
+ unsigned int *opprop = NULL;
+ struct device_node *root = of_find_node_by_path("/");
+
+ if (ppc64_interrupt_controller == IC_OPEN_PIC) {
+ opprop = (unsigned int *)get_property(root,
+ "platform-open-pic", NULL);
+ }
+
+ root_size_cells = prom_n_size_cells(root);
+
+ index = 0;
+
+ for (node = of_get_next_child(root, NULL);
+ node != NULL;
+ node = of_get_next_child(root, node)) {
+ if (node->type == NULL || strcmp(node->type, "pci") != 0)
+ continue;
+
+ phb = alloc_phb(node, root_size_cells);
+ if (!phb)
+ continue;
+
+ pci_process_bridge_OF_ranges(phb, node, 0);
+ pci_setup_phb_io(phb, index == 0);
+#ifdef CONFIG_PPC_PSERIES
+ if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
+ int addr = root_size_cells * (index + 2) - 1;
+ mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
+ }
+#endif
+ index++;
+ }
+
+ of_node_put(root);
+ pci_devs_phb_init();
+
+ /*
+ * pci_probe_only and pci_assign_all_buses can be set via properties
+ * in chosen.
+ */
+ if (of_chosen) {
+ int *prop;
+
+ prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
+ NULL);
+ if (prop)
+ pci_probe_only = *prop;
+
+ prop = (int *)get_property(of_chosen,
+ "linux,pci-assign-all-buses", NULL);
+ if (prop)
+ pci_assign_all_buses = *prop;
+ }
+
+ return 0;
+}
+
+struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ unsigned int root_size_cells = 0;
+ struct pci_controller *phb;
+ int primary;
+
+ root_size_cells = prom_n_size_cells(root);
+
+ primary = list_empty(&hose_list);
+ phb = alloc_phb_dynamic(dn, root_size_cells);
+ if (!phb)
+ return NULL;
+
+ pci_process_bridge_OF_ranges(phb, dn, primary);
+
+ pci_setup_phb_io_dynamic(phb, primary);
+ of_node_put(root);
+
+ pci_devs_phb_init_dynamic(phb);
+ scan_phb(phb);
+
+ return phb;
+}
+EXPORT_SYMBOL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int pcibios_remove_root_bus(struct pci_controller *phb)
+{
+ struct pci_bus *b = phb->bus;
+ struct resource *res;
+ int rc, i;
+
+ res = b->resource[0];
+ if (!res->flags) {
+ printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
+ b->name);
+ return 1;
+ }
+
+ rc = unmap_bus_range(b);
+ if (rc) {
+ printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ if (release_resource(res)) {
+ printk(KERN_ERR "%s: failed to release IO on bus %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+
+ for (i = 1; i < 3; ++i) {
+ res = b->resource[i];
+ if (!res->flags && i == 0) {
+ printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
+ __FUNCTION__, b->name);
+ return 1;
+ }
+ if (res->flags && release_resource(res)) {
+ printk(KERN_ERR
+ "%s: failed to release IO %d on bus %s\n",
+ __FUNCTION__, i, b->name);
+ return 1;
+ }
+ }
+
+ list_del(&phb->list_node);
+ if (phb->is_dynamic)
+ kfree(phb);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcibios_remove_root_bus);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index e22856ecb5a0..33e7f2c7f194 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,6 +33,7 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/processor.h>
+#include <asm/vdso_datapage.h>
#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
@@ -51,6 +52,9 @@
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/lmb.h>
+#include <asm/xmon.h>
+
+#include "setup.h"
#undef DEBUG
@@ -60,6 +64,13 @@
#define DBG(fmt...)
#endif
+#ifdef CONFIG_PPC_MULTIPLATFORM
+int _machine = 0;
+EXPORT_SYMBOL(_machine);
+#endif
+
+unsigned long klimit = (unsigned long) _end;
+
/*
* This still seems to be needed... -- paulus
*/
@@ -433,10 +444,8 @@ void __init check_for_initrd(void)
if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
initrd_end > initrd_start)
ROOT_DEV = Root_RAM0;
- else {
- printk("Bogus initrd %08lx %08lx\n", initrd_start, initrd_end);
+ else
initrd_start = initrd_end = 0;
- }
if (initrd_start)
printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
@@ -510,8 +519,8 @@ void __init smp_setup_cpu_maps(void)
* On pSeries LPAR, we need to know how many cpus
* could possibly be added to this partition.
*/
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR &&
- (dn = of_find_node_by_path("/rtas"))) {
+ if (_machine == PLATFORM_PSERIES_LPAR &&
+ (dn = of_find_node_by_path("/rtas"))) {
int num_addr_cell, num_size_cell, maxcpus;
unsigned int *ireg;
@@ -555,7 +564,27 @@ void __init smp_setup_cpu_maps(void)
cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
}
- systemcfg->processorCount = num_present_cpus();
+ vdso_data->processorCount = num_present_cpus();
#endif /* CONFIG_PPC64 */
}
#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_XMON
+static int __init early_xmon(char *p)
+{
+ /* ensure xmon is enabled */
+ if (p) {
+ if (strncmp(p, "on", 2) == 0)
+ xmon_init(1);
+ if (strncmp(p, "off", 3) == 0)
+ xmon_init(0);
+ if (strncmp(p, "early", 5) != 0)
+ return 0;
+ }
+ xmon_init(1);
+ debugger(NULL);
+
+ return 0;
+}
+early_param("xmon", early_xmon);
+#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 000000000000..2ebba755272e
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,6 @@
+#ifndef _POWERPC_KERNEL_SETUP_H
+#define _POWERPC_KERNEL_SETUP_H
+
+void check_for_initrd(void);
+
+#endif /* _POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 3af2631e3fab..c98cfcc9cd9a 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -40,6 +40,8 @@
#include <asm/xmon.h>
#include <asm/time.h>
+#include "setup.h"
+
#define DBG(fmt...)
#if defined CONFIG_KGDB
@@ -70,8 +72,6 @@ unsigned int DMA_MODE_WRITE;
int have_of = 1;
#ifdef CONFIG_PPC_MULTIPLATFORM
-int _machine = 0;
-
extern void prep_init(void);
extern void pmac_init(void);
extern void chrp_init(void);
@@ -279,7 +279,6 @@ arch_initcall(ppc_init);
/* Warning, IO base is not yet inited */
void __init setup_arch(char **cmdline_p)
{
- extern char *klimit;
extern void do_init_bootmem(void);
/* so udelay does something sensible, assume <= 1000 bogomips */
@@ -303,14 +302,9 @@ void __init setup_arch(char **cmdline_p)
pmac_feature_init(); /* New cool way */
#endif
-#ifdef CONFIG_XMON
- xmon_map_scc();
- if (strstr(cmd_line, "xmon")) {
- xmon_init(1);
- debugger(NULL);
- }
-#endif /* CONFIG_XMON */
- if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
+#ifdef CONFIG_XMON_DEFAULT
+ xmon_init(1);
+#endif
#if defined(CONFIG_KGDB)
if (ppc_md.kgdb_map_scc)
@@ -343,7 +337,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) klimit;
+ init_mm.brk = klimit;
/* Save unparsed command line copy for /proc/cmdline */
strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 0471e843b6c5..fdbd9f9122f2 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -57,10 +57,11 @@
#include <asm/lmb.h>
#include <asm/iseries/it_lp_naca.h>
#include <asm/firmware.h>
-#include <asm/systemcfg.h>
#include <asm/xmon.h>
#include <asm/udbg.h>
+#include "setup.h"
+
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#else
@@ -94,15 +95,6 @@ extern void udbg_init_maple_realmode(void);
do { udbg_putc = call_rtas_display_status_delay; } while(0)
#endif
-/* extern void *stab; */
-extern unsigned long klimit;
-
-extern void mm_init_ppc64(void);
-extern void stab_initialize(unsigned long stab);
-extern void htab_initialize(void);
-extern void early_init_devtree(void *flat_dt);
-extern void unflatten_device_tree(void);
-
int have_of = 1;
int boot_cpuid = 0;
int boot_cpuid_phys = 0;
@@ -254,11 +246,10 @@ void __init early_setup(unsigned long dt_ptr)
* Iterate all ppc_md structures until we find the proper
* one for the current machine type
*/
- DBG("Probing machine type for platform %x...\n",
- systemcfg->platform);
+ DBG("Probing machine type for platform %x...\n", _machine);
for (mach = machines; *mach; mach++) {
- if ((*mach)->probe(systemcfg->platform))
+ if ((*mach)->probe(_machine))
break;
}
/* What can we do if we didn't find ? */
@@ -290,6 +281,28 @@ void __init early_setup(unsigned long dt_ptr)
DBG(" <- early_setup()\n");
}
+#ifdef CONFIG_SMP
+void early_setup_secondary(void)
+{
+ struct paca_struct *lpaca = get_paca();
+
+ /* Mark enabled in PACA */
+ lpaca->proc_enabled = 0;
+
+ /* Initialize hash table for that CPU */
+ htab_initialize_secondary();
+
+ /* Initialize STAB/SLB. We use a virtual address as it works
+ * in real mode on pSeries and we want a virutal address on
+ * iSeries anyway
+ */
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_addr);
+}
+
+#endif /* CONFIG_SMP */
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
void smp_release_cpus(void)
@@ -315,7 +328,8 @@ void smp_release_cpus(void)
#endif /* CONFIG_SMP || CONFIG_KEXEC */
/*
- * Initialize some remaining members of the ppc64_caches and systemcfg structures
+ * Initialize some remaining members of the ppc64_caches and systemcfg
+ * structures
* (at least until we get rid of them completely). This is mostly some
* cache informations about the CPU that will be used by cache flush
* routines and/or provided to userland
@@ -340,7 +354,7 @@ static void __init initialize_cache_info(void)
const char *dc, *ic;
/* Then read cache informations */
- if (systemcfg->platform == PLATFORM_POWERMAC) {
+ if (_machine == PLATFORM_POWERMAC) {
dc = "d-cache-block-size";
ic = "i-cache-block-size";
} else {
@@ -360,9 +374,8 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find dcache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->dcache_size = ppc64_caches.dsize = size;
- systemcfg->dcache_line_size =
- ppc64_caches.dline_size = lsize;
+ ppc64_caches.dsize = size;
+ ppc64_caches.dline_size = lsize;
ppc64_caches.log_dline_size = __ilog2(lsize);
ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
@@ -378,20 +391,13 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find icache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
- systemcfg->icache_size = ppc64_caches.isize = size;
- systemcfg->icache_line_size =
- ppc64_caches.iline_size = lsize;
+ ppc64_caches.isize = size;
+ ppc64_caches.iline_size = lsize;
ppc64_caches.log_iline_size = __ilog2(lsize);
ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
}
}
- /* Add an eye catcher and the systemcfg layout version number */
- strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
- systemcfg->version.major = SYSTEMCFG_MAJOR;
- systemcfg->version.minor = SYSTEMCFG_MINOR;
- systemcfg->processor = mfspr(SPRN_PVR);
-
DBG(" <- initialize_cache_info()\n");
}
@@ -478,15 +484,14 @@ void __init setup_system(void)
printk("-----------------------------------------------------\n");
printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
- printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller);
- printk("systemcfg = 0x%p\n", systemcfg);
- printk("systemcfg->platform = 0x%x\n", systemcfg->platform);
- printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount);
- printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize);
+ printk("ppc64_interrupt_controller = 0x%ld\n",
+ ppc64_interrupt_controller);
+ printk("platform = 0x%x\n", _machine);
+ printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size());
printk("ppc64_caches.dcache_line_size = 0x%x\n",
- ppc64_caches.dline_size);
+ ppc64_caches.dline_size);
printk("ppc64_caches.icache_line_size = 0x%x\n",
- ppc64_caches.iline_size);
+ ppc64_caches.iline_size);
printk("htab_address = 0x%p\n", htab_address);
printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
printk("-----------------------------------------------------\n");
@@ -551,33 +556,6 @@ static void __init emergency_stack_init(void)
}
/*
- * Called from setup_arch to initialize the bitmap of available
- * syscalls in the systemcfg page
- */
-void __init setup_syscall_map(void)
-{
- unsigned int i, count64 = 0, count32 = 0;
- extern unsigned long *sys_call_table;
- extern unsigned long sys_ni_syscall;
-
-
- for (i = 0; i < __NR_syscalls; i++) {
- if (sys_call_table[i*2] != sys_ni_syscall) {
- count64++;
- systemcfg->syscall_map_64[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- }
- if (sys_call_table[i*2+1] != sys_ni_syscall) {
- count32++;
- systemcfg->syscall_map_32[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- }
- }
- printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n",
- count32, count64);
-}
-
-/*
* Called into from start_kernel, after lock_kernel has been called.
* Initializes bootmem, which is unsed to manage page allocation until
* mem_init is called.
@@ -618,9 +596,6 @@ void __init setup_arch(char **cmdline_p)
do_init_bootmem();
sparse_init();
- /* initialize the syscall map in systemcfg */
- setup_syscall_map();
-
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
@@ -858,26 +833,6 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-#ifdef CONFIG_XMON
-static int __init early_xmon(char *p)
-{
- /* ensure xmon is enabled */
- if (p) {
- if (strncmp(p, "on", 2) == 0)
- xmon_init(1);
- if (strncmp(p, "off", 3) == 0)
- xmon_init(0);
- if (strncmp(p, "early", 5) != 0)
- return 0;
- }
- xmon_init(1);
- debugger(NULL);
-
- return 0;
-}
-early_param("xmon", early_xmon);
-#endif
-
void cpu_die(void)
{
if (ppc_md.cpu_die)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 081d931eae48..8bdf95b7e420 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -42,10 +42,11 @@
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
+#include <asm/sigcontext.h>
+#include <asm/vdso.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
#include <asm/unistd.h>
-#include <asm/vdso.h>
#else
#include <asm/ucontext.h>
#include <asm/pgtable.h>
@@ -808,14 +809,11 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
/* Save user registers on the stack */
frame = &rt_sf->uc.uc_mcontext;
-#ifdef CONFIG_PPC64
if (vdso32_rt_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, frame, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_rt_sigtramp;
- } else
-#endif
- {
+ } else {
if (save_user_regs(regs, frame, __NR_rt_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->tramp;
@@ -1089,14 +1087,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
|| __put_user(sig, &sc->signal))
goto badframe;
-#ifdef CONFIG_PPC64
if (vdso32_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, &frame->mctx, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_sigtramp;
- } else
-#endif
- {
+ } else {
if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->mctx.tramp;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5c330c3366e4..62dfc5b8d765 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -44,6 +44,7 @@
#include <asm/cputable.h>
#include <asm/system.h>
#include <asm/mpic.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
@@ -368,9 +369,11 @@ int generic_cpu_disable(void)
if (cpu == boot_cpuid)
return -EBUSY;
- systemcfg->processorCount--;
cpu_clear(cpu, cpu_online_map);
+#ifdef CONFIG_PPC64
+ vdso_data->processorCount--;
fixup_irqs(cpu_online_map);
+#endif
return 0;
}
@@ -388,9 +391,11 @@ int generic_cpu_enable(unsigned int cpu)
while (!cpu_online(cpu))
cpu_relax();
+#ifdef CONFIG_PPC64
fixup_irqs(cpu_online_map);
/* counter the irq disable in fixup_irqs */
local_irq_enable();
+#endif
return 0;
}
@@ -419,7 +424,9 @@ void generic_mach_cpu_die(void)
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
cpu_relax();
+#ifdef CONFIG_PPC64
flush_tlb_pending();
+#endif
cpu_set(cpu, cpu_online_map);
local_irq_enable();
}
@@ -510,6 +517,7 @@ int __devinit start_secondary(void *unused)
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
+ preempt_disable();
cpu_callin_map[cpu] = 1;
smp_ops->setup_cpu(cpu);
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index a8210ed5c686..9c921d1c4084 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -52,7 +52,6 @@
#include <asm/semaphore.h>
#include <asm/time.h>
#include <asm/mmu_context.h>
-#include <asm/systemcfg.h>
#include <asm/ppc-pci.h>
/* readdir & getdents */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
new file mode 100644
index 000000000000..0f0c3a9ae2e5
--- /dev/null
+++ b/arch/powerpc/kernel/sysfs.c
@@ -0,0 +1,383 @@
+#include <linux/config.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/cpumask.h>
+#include <linux/notifier.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/prom.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+/* SMT stuff */
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/* default to snooze disabled */
+DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
+
+static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+ ssize_t ret;
+ unsigned long snooze;
+
+ ret = sscanf(buf, "%lu", &snooze);
+ if (ret != 1)
+ return -EINVAL;
+
+ per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
+
+ return count;
+}
+
+static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
+}
+
+static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
+ store_smt_snooze_delay);
+
+/* Only parse OF options if the matching cmdline option was not specified */
+static int smt_snooze_cmdline;
+
+static int __init smt_setup(void)
+{
+ struct device_node *options;
+ unsigned int *val;
+ unsigned int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ options = find_path_device("/options");
+ if (!options)
+ return 1;
+
+ val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
+ NULL);
+ if (!smt_snooze_cmdline && val) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = *val;
+ }
+
+ return 1;
+}
+__initcall(smt_setup);
+
+static int __init setup_smt_snooze_delay(char *str)
+{
+ unsigned int cpu;
+ int snooze;
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return 1;
+
+ smt_snooze_cmdline = 1;
+
+ if (get_option(&str, &snooze)) {
+ for_each_cpu(cpu)
+ per_cpu(smt_snooze_delay, cpu) = snooze;
+ }
+
+ return 1;
+}
+__setup("smt-snooze-delay=", setup_smt_snooze_delay);
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+/*
+ * Enabling PMCs will slow partition context switch times so we only do
+ * it the first time we write to the PMCs.
+ */
+
+static DEFINE_PER_CPU(char, pmcs_enabled);
+
+void ppc64_enable_pmcs(void)
+{
+ /* Only need to enable them once */
+ if (__get_cpu_var(pmcs_enabled))
+ return;
+
+ __get_cpu_var(pmcs_enabled) = 1;
+
+ if (ppc_md.enable_pmcs)
+ ppc_md.enable_pmcs();
+}
+EXPORT_SYMBOL(ppc64_enable_pmcs);
+
+/* XXX convert to rusty's on_one_cpu */
+static unsigned long run_on_cpu(unsigned long cpu,
+ unsigned long (*func)(unsigned long),
+ unsigned long arg)
+{
+ cpumask_t old_affinity = current->cpus_allowed;
+ unsigned long ret;
+
+ /* should return -EINVAL to userspace */
+ if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
+ return 0;
+
+ ret = func(arg);
+
+ set_cpus_allowed(current, old_affinity);
+
+ return ret;
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+static unsigned long read_##NAME(unsigned long junk) \
+{ \
+ return mfspr(ADDRESS); \
+} \
+static unsigned long write_##NAME(unsigned long val) \
+{ \
+ ppc64_enable_pmcs(); \
+ mtspr(ADDRESS, val); \
+ return 0; \
+} \
+static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __attribute_used__ \
+ store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
+ return count; \
+}
+
+SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
+SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
+SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
+SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
+SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
+SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
+SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
+SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
+SYSFS_PMCSETUP(purr, SPRN_PURR);
+
+static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
+static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
+static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
+static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
+static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
+static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
+static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
+static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
+static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
+static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
+static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
+
+static void register_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_create_file(s, &attr_mmcr0);
+ sysdev_create_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_create_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_create_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_create_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_create_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_create_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_create_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_create_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_create_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_create_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_create_file(s, &attr_purr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_cpu_online(unsigned int cpu)
+{
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct sys_device *s = &c->sysdev;
+
+ BUG_ON(c->no_control);
+
+#ifndef CONFIG_PPC_ISERIES
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_smt_snooze_delay);
+#endif
+
+ /* PMC stuff */
+
+ sysdev_remove_file(s, &attr_mmcr0);
+ sysdev_remove_file(s, &attr_mmcr1);
+
+ if (cpu_has_feature(CPU_FTR_MMCRA))
+ sysdev_remove_file(s, &attr_mmcra);
+
+ if (cur_cpu_spec->num_pmcs >= 1)
+ sysdev_remove_file(s, &attr_pmc1);
+ if (cur_cpu_spec->num_pmcs >= 2)
+ sysdev_remove_file(s, &attr_pmc2);
+ if (cur_cpu_spec->num_pmcs >= 3)
+ sysdev_remove_file(s, &attr_pmc3);
+ if (cur_cpu_spec->num_pmcs >= 4)
+ sysdev_remove_file(s, &attr_pmc4);
+ if (cur_cpu_spec->num_pmcs >= 5)
+ sysdev_remove_file(s, &attr_pmc5);
+ if (cur_cpu_spec->num_pmcs >= 6)
+ sysdev_remove_file(s, &attr_pmc6);
+ if (cur_cpu_spec->num_pmcs >= 7)
+ sysdev_remove_file(s, &attr_pmc7);
+ if (cur_cpu_spec->num_pmcs >= 8)
+ sysdev_remove_file(s, &attr_pmc8);
+
+ if (cpu_has_feature(CPU_FTR_SMT))
+ sysdev_remove_file(s, &attr_purr);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit sysfs_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ register_cpu_online(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ unregister_cpu_online(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata sysfs_cpu_nb = {
+ .notifier_call = sysfs_cpu_notify,
+};
+
+/* NUMA stuff */
+
+#ifdef CONFIG_NUMA
+static struct node node_devices[MAX_NUMNODES];
+
+static void register_nodes(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ if (node_online(i)) {
+ int p_node = parent_node(i);
+ struct node *parent = NULL;
+
+ if (p_node != i)
+ parent = &node_devices[p_node];
+
+ register_node(&node_devices[i], i, parent);
+ }
+ }
+}
+#else
+static void register_nodes(void)
+{
+ return;
+}
+#endif
+
+/* Only valid if CPU is present. */
+static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, sysdev);
+
+ return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
+}
+static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
+
+static int __init topology_init(void)
+{
+ int cpu;
+ struct node *parent = NULL;
+
+ register_nodes();
+
+ register_cpu_notifier(&sysfs_cpu_nb);
+
+ for_each_cpu(cpu) {
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+#ifdef CONFIG_NUMA
+ /* The node to which a cpu belongs can't be known
+ * until the cpu is made present.
+ */
+ parent = NULL;
+ if (cpu_present(cpu))
+ parent = &node_devices[cpu_to_node(cpu)];
+#endif
+ /*
+ * For now, we just see if the system supports making
+ * the RTAS calls for CPU hotplug. But, there may be a
+ * more comprehensive way to do this for an individual
+ * CPU. For instance, the boot cpu might never be valid
+ * for hotplugging.
+ */
+ if (!ppc_md.cpu_die)
+ c->no_control = 1;
+
+ if (cpu_online(cpu) || (c->no_control == 0)) {
+ register_cpu(c, cpu, parent);
+
+ sysdev_create_file(&c->sysdev, &attr_physical_id);
+ }
+
+ if (cpu_online(cpu))
+ register_cpu_online(cpu);
+ }
+
+ return 0;
+}
+__initcall(topology_init);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a6282b625b44..070b4b458aaf 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -62,8 +62,8 @@
#include <asm/irq.h>
#include <asm/div64.h>
#include <asm/smp.h>
+#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
-#include <asm/systemcfg.h>
#include <asm/firmware.h>
#endif
#ifdef CONFIG_PPC_ISERIES
@@ -261,7 +261,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
do_gtod.varp = temp_varp;
do_gtod.var_idx = temp_idx;
-#ifdef CONFIG_PPC64
/*
* tb_update_count is used to allow the userspace gettimeofday code
* to assure itself that it sees a consistent view of the tb_to_xs and
@@ -271,14 +270,15 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
* tb_to_xs and stamp_xsec values are consistent. If not, then it
* loops back and reads them again until this criteria is met.
*/
- ++(systemcfg->tb_update_count);
+ ++(vdso_data->tb_update_count);
smp_wmb();
- systemcfg->tb_orig_stamp = new_tb_stamp;
- systemcfg->stamp_xsec = new_stamp_xsec;
- systemcfg->tb_to_xs = new_tb_to_xs;
+ vdso_data->tb_orig_stamp = new_tb_stamp;
+ vdso_data->stamp_xsec = new_stamp_xsec;
+ vdso_data->tb_to_xs = new_tb_to_xs;
+ vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
smp_wmb();
- ++(systemcfg->tb_update_count);
-#endif
+ ++(vdso_data->tb_update_count);
}
/*
@@ -357,8 +357,8 @@ static void iSeries_tb_recal(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
do_gtod.varp->tb_to_xs = tb_to_xs;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->tb_to_xs = tb_to_xs;
+ vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+ vdso_data->tb_to_xs = tb_to_xs;
}
else {
printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -483,6 +483,8 @@ void __init smp_space_timers(unsigned int max_cpus)
unsigned long offset = tb_ticks_per_jiffy / max_cpus;
unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
+ /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
+ previous_tb -= tb_ticks_per_jiffy;
for_each_cpu(i) {
if (i != boot_cpuid) {
previous_tb += offset;
@@ -558,10 +560,8 @@ int do_settimeofday(struct timespec *tv)
new_xsec += (u64)new_sec * XSEC_PER_SEC - tb_delta_xs;
update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs);
-#ifdef CONFIG_PPC64
- systemcfg->tz_minuteswest = sys_tz.tz_minuteswest;
- systemcfg->tz_dsttime = sys_tz.tz_dsttime;
-#endif
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
write_sequnlock_irqrestore(&xtime_lock, flags);
clock_was_set();
@@ -710,13 +710,12 @@ void __init time_init(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
do_gtod.varp->tb_to_xs = tb_to_xs;
do_gtod.tb_to_us = tb_to_us;
-#ifdef CONFIG_PPC64
- systemcfg->tb_orig_stamp = tb_last_jiffy;
- systemcfg->tb_update_count = 0;
- systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
- systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
- systemcfg->tb_to_xs = tb_to_xs;
-#endif
+
+ vdso_data->tb_orig_stamp = tb_last_jiffy;
+ vdso_data->tb_update_count = 0;
+ vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+ vdso_data->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
+ vdso_data->tb_to_xs = tb_to_xs;
time_freq = 0;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 32f215825e8d..1511454c4690 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -49,7 +49,6 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/processor.h>
-#include <asm/systemcfg.h>
#endif
#ifdef CONFIG_PPC64 /* XXX */
@@ -129,7 +128,7 @@ int die(const char *str, struct pt_regs *regs, long err)
nl = 1;
#endif
#ifdef CONFIG_PPC64
- switch (systemcfg->platform) {
+ switch (_machine) {
case PLATFORM_PSERIES:
printk("PSERIES ");
nl = 1;
@@ -887,10 +886,6 @@ void altivec_unavailable_exception(struct pt_regs *regs)
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
}
-#ifdef CONFIG_PPC64
-extern perf_irq_t perf_irq;
-#endif
-
#if defined(CONFIG_PPC64) || defined(CONFIG_E500)
void performance_monitor_exception(struct pt_regs *regs)
{
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
new file mode 100644
index 000000000000..0d878e72fc44
--- /dev/null
+++ b/arch/powerpc/kernel/udbg.c
@@ -0,0 +1,125 @@
+/*
+ * polling mode stateless debugging stuff, originally for NS16550 Serial Ports
+ *
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdarg.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/console.h>
+#include <asm/processor.h>
+
+void (*udbg_putc)(unsigned char c);
+unsigned char (*udbg_getc)(void);
+int (*udbg_getc_poll)(void);
+
+/* udbg library, used by xmon et al */
+void udbg_puts(const char *s)
+{
+ if (udbg_putc) {
+ char c;
+
+ if (s && *s != '\0') {
+ while ((c = *s++) != '\0')
+ udbg_putc(c);
+ }
+ }
+#if 0
+ else {
+ printk("%s", s);
+ }
+#endif
+}
+
+int udbg_write(const char *s, int n)
+{
+ int remain = n;
+ char c;
+
+ if (!udbg_putc)
+ return 0;
+
+ if (s && *s != '\0') {
+ while (((c = *s++) != '\0') && (remain-- > 0)) {
+ udbg_putc(c);
+ }
+ }
+
+ return n - remain;
+}
+
+int udbg_read(char *buf, int buflen)
+{
+ char c, *p = buf;
+ int i;
+
+ if (!udbg_getc)
+ return 0;
+
+ for (i = 0; i < buflen; ++i) {
+ do {
+ c = udbg_getc();
+ } while (c == 0x11 || c == 0x13);
+ if (c == 0)
+ break;
+ *p++ = c;
+ }
+
+ return i;
+}
+
+#define UDBG_BUFSIZE 256
+void udbg_printf(const char *fmt, ...)
+{
+ unsigned char buf[UDBG_BUFSIZE];
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+ udbg_puts(buf);
+ va_end(args);
+}
+
+/*
+ * Early boot console based on udbg
+ */
+static void udbg_console_write(struct console *con, const char *s,
+ unsigned int n)
+{
+ udbg_write(s, n);
+}
+
+static struct console udbg_console = {
+ .name = "udbg",
+ .write = udbg_console_write,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
+
+static int early_console_initialized;
+
+void __init disable_early_printk(void)
+{
+ if (!early_console_initialized)
+ return;
+ unregister_console(&udbg_console);
+ early_console_initialized = 0;
+}
+
+/* called by setup_system */
+void register_early_udbg_console(void)
+{
+ early_console_initialized = 1;
+ register_console(&udbg_console);
+}
+
+#if 0 /* if you want to use this as a regular output console */
+console_initcall(register_udbg_console);
+#endif
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
new file mode 100644
index 000000000000..9313574ab935
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -0,0 +1,123 @@
+/*
+ * udbg for for NS16550 compatable serial ports
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/io.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+struct NS16550 {
+ /* this struct must be packed */
+ unsigned char rbr; /* 0 */
+ unsigned char ier; /* 1 */
+ unsigned char fcr; /* 2 */
+ unsigned char lcr; /* 3 */
+ unsigned char mcr; /* 4 */
+ unsigned char lsr; /* 5 */
+ unsigned char msr; /* 6 */
+ unsigned char scr; /* 7 */
+};
+
+#define thr rbr
+#define iir fcr
+#define dll rbr
+#define dlm ier
+#define dlab lcr
+
+#define LSR_DR 0x01 /* Data ready */
+#define LSR_OE 0x02 /* Overrun */
+#define LSR_PE 0x04 /* Parity error */
+#define LSR_FE 0x08 /* Framing error */
+#define LSR_BI 0x10 /* Break */
+#define LSR_THRE 0x20 /* Xmit holding register empty */
+#define LSR_TEMT 0x40 /* Xmitter empty */
+#define LSR_ERR 0x80 /* Error */
+
+static volatile struct NS16550 __iomem *udbg_comport;
+
+static void udbg_550_putc(unsigned char c)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ out_8(&udbg_comport->thr, c);
+ if (c == '\n')
+ udbg_550_putc('\r');
+ }
+}
+
+static int udbg_550_getc_poll(void)
+{
+ if (udbg_comport) {
+ if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0)
+ return in_8(&udbg_comport->rbr);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static unsigned char udbg_550_getc(void)
+{
+ if (udbg_comport) {
+ while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0)
+ /* wait for char */;
+ return in_8(&udbg_comport->rbr);
+ }
+ return 0;
+}
+
+void udbg_init_uart(void __iomem *comport, unsigned int speed)
+{
+ u16 dll = speed ? (115200 / speed) : 12;
+
+ if (comport) {
+ udbg_comport = (struct NS16550 __iomem *)comport;
+ out_8(&udbg_comport->lcr, 0x00);
+ out_8(&udbg_comport->ier, 0xff);
+ out_8(&udbg_comport->ier, 0x00);
+ out_8(&udbg_comport->lcr, 0x80); /* Access baud rate */
+ out_8(&udbg_comport->dll, dll & 0xff); /* 1 = 115200, 2 = 57600,
+ 3 = 38400, 12 = 9600 baud */
+ out_8(&udbg_comport->dlm, dll >> 8); /* dll >> 8 which should be zero
+ for fast rates; */
+ out_8(&udbg_comport->lcr, 0x03); /* 8 data, 1 stop, no parity */
+ out_8(&udbg_comport->mcr, 0x03); /* RTS/DTR */
+ out_8(&udbg_comport->fcr ,0x07); /* Clear & enable FIFOs */
+ udbg_putc = udbg_550_putc;
+ udbg_getc = udbg_550_getc;
+ udbg_getc_poll = udbg_550_getc_poll;
+ }
+}
+
+#ifdef CONFIG_PPC_MAPLE
+void udbg_maple_real_putc(unsigned char c)
+{
+ if (udbg_comport) {
+ while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+ /* wait for idle */;
+ real_writeb(c, &udbg_comport->thr); eieio();
+ if (c == '\n')
+ udbg_maple_real_putc('\r');
+ }
+}
+
+void udbg_init_maple_realmode(void)
+{
+ udbg_comport = (volatile struct NS16550 __iomem *)0xf40003f8;
+
+ udbg_putc = udbg_maple_real_putc;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC_MAPLE */
diff --git a/arch/powerpc/kernel/udbg_scc.c b/arch/powerpc/kernel/udbg_scc.c
new file mode 100644
index 000000000000..820c53551507
--- /dev/null
+++ b/arch/powerpc/kernel/udbg_scc.c
@@ -0,0 +1,135 @@
+/*
+ * udbg for for zilog scc ports as found on Apple PowerMacs
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <asm/udbg.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pmac_feature.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define SCC_TXRDY 4
+#define SCC_RXRDY 1
+
+static volatile u8 __iomem *sccc;
+static volatile u8 __iomem *sccd;
+
+static void udbg_scc_putc(unsigned char c)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_TXRDY) == 0)
+ ;
+ out_8(sccd, c);
+ if (c == '\n')
+ udbg_scc_putc('\r');
+ }
+}
+
+static int udbg_scc_getc_poll(void)
+{
+ if (sccc) {
+ if ((in_8(sccc) & SCC_RXRDY) != 0)
+ return in_8(sccd);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static unsigned char udbg_scc_getc(void)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_RXRDY) == 0)
+ ;
+ return in_8(sccd);
+ }
+ return 0;
+}
+
+static unsigned char scc_inittab[] = {
+ 13, 0, /* set baud rate divisor */
+ 12, 0,
+ 14, 1, /* baud rate gen enable, src=rtxc */
+ 11, 0x50, /* clocks = br gen */
+ 5, 0xea, /* tx 8 bits, assert DTR & RTS */
+ 4, 0x46, /* x16 clock, 1 stop */
+ 3, 0xc1, /* rx enable, 8 bits */
+};
+
+void udbg_init_scc(struct device_node *np)
+{
+ u32 *reg;
+ unsigned long addr;
+ int i, x;
+
+ if (np == NULL)
+ np = of_find_node_by_name(NULL, "escc");
+ if (np == NULL || np->parent == NULL)
+ return;
+
+ udbg_printf("found SCC...\n");
+ /* Get address within mac-io ASIC */
+ reg = (u32 *)get_property(np, "reg", NULL);
+ if (reg == NULL)
+ return;
+ addr = reg[0];
+ udbg_printf("local addr: %lx\n", addr);
+ /* Get address of mac-io PCI itself */
+ reg = (u32 *)get_property(np->parent, "assigned-addresses", NULL);
+ if (reg == NULL)
+ return;
+ addr += reg[2];
+ udbg_printf("final addr: %lx\n", addr);
+
+ /* Setup for 57600 8N1 */
+ addr += 0x20;
+ sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
+ sccc += addr & ~PAGE_MASK;
+ sccd = sccc + 0x10;
+
+ udbg_printf("ioremap result sccc: %p\n", sccc);
+ mb();
+
+ for (i = 20000; i != 0; --i)
+ x = in_8(sccc);
+ out_8(sccc, 0x09); /* reset A or B side */
+ out_8(sccc, 0xc0);
+ for (i = 0; i < sizeof(scc_inittab); ++i)
+ out_8(sccc, scc_inittab[i]);
+
+ udbg_putc = udbg_scc_putc;
+ udbg_getc = udbg_scc_getc;
+ udbg_getc_poll = udbg_scc_getc_poll;
+
+ udbg_puts("Hello World !\n");
+}
+
+static void udbg_real_scc_putc(unsigned char c)
+{
+ while ((real_readb(sccc) & SCC_TXRDY) == 0)
+ ;
+ real_writeb(c, sccd);
+ if (c == '\n')
+ udbg_real_scc_putc('\r');
+}
+
+void udbg_init_pmac_realmode(void)
+{
+ sccc = (volatile u8 __iomem *)0x80013020ul;
+ sccd = (volatile u8 __iomem *)0x80013030ul;
+
+ udbg_putc = udbg_real_scc_putc;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
new file mode 100644
index 000000000000..0d4d8bec0df4
--- /dev/null
+++ b/arch/powerpc/kernel/vdso.c
@@ -0,0 +1,746 @@
+/*
+ * linux/arch/ppc64/kernel/vdso.c
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ * <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/lmb.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* Max supported size for symbol names */
+#define MAX_SYMNAME 64
+
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+unsigned int vdso32_pages;
+unsigned long vdso32_sigtramp;
+unsigned long vdso32_rt_sigtramp;
+
+#ifdef CONFIG_PPC64
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+unsigned int vdso64_pages;
+unsigned long vdso64_rt_sigtramp;
+#endif /* CONFIG_PPC64 */
+
+/*
+ * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
+ * Once the early boot kernel code no longer needs to muck around
+ * with it, it will become dynamically allocated
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __attribute__((__section__(".data.page_aligned")));
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/* Format of the patch table */
+struct vdso_patch_def
+{
+ unsigned long ftr_mask, ftr_value;
+ const char *gen_name;
+ const char *fix_name;
+};
+
+/* Table of functions to patch based on the CPU type/revision
+ *
+ * Currently, we only change sync_dicache to do nothing on processors
+ * with a coherent icache
+ */
+static struct vdso_patch_def vdso_patches[] = {
+ {
+ CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
+ "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
+ },
+ {
+ CPU_FTR_USE_TB, 0,
+ "__kernel_gettimeofday", NULL
+ },
+};
+
+/*
+ * Some infos carried around for each of them during parsing at
+ * boot time.
+ */
+struct lib32_elfinfo
+{
+ Elf32_Ehdr *hdr; /* ptr to ELF */
+ Elf32_Sym *dynsym; /* ptr to .dynsym section */
+ unsigned long dynsymsize; /* size of .dynsym section */
+ char *dynstr; /* ptr to .dynstr section */
+ unsigned long text; /* offset of .text section in .so */
+};
+
+struct lib64_elfinfo
+{
+ Elf64_Ehdr *hdr;
+ Elf64_Sym *dynsym;
+ unsigned long dynsymsize;
+ char *dynstr;
+ unsigned long text;
+};
+
+
+#ifdef __DEBUG
+static void dump_one_vdso_page(struct page *pg, struct page *upg)
+{
+ printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
+ page_count(pg),
+ pg->flags);
+ if (upg/* && pg != upg*/) {
+ printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
+ << PAGE_SHIFT),
+ page_count(upg),
+ upg->flags);
+ }
+ printk("\n");
+}
+
+static void dump_vdso_pages(struct vm_area_struct * vma)
+{
+ int i;
+
+ if (!vma || test_thread_flag(TIF_32BIT)) {
+ printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
+ for (i=0; i<vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma->vm_mm, vma->vm_start +
+ i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+ if (!vma || !test_thread_flag(TIF_32BIT)) {
+ printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
+ for (i=0; i<vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase +
+ i*PAGE_SIZE);
+ struct page *upg = (vma && vma->vm_mm) ?
+ follow_page(vma->vm_mm, vma->vm_start +
+ i*PAGE_SIZE, 0)
+ : NULL;
+ dump_one_vdso_page(pg, upg);
+ }
+ }
+}
+#endif /* DEBUG */
+
+/*
+ * Keep a dummy vma_close for now, it will prevent VMA merging.
+ */
+static void vdso_vma_close(struct vm_area_struct * vma)
+{
+}
+
+/*
+ * Our nopage() function, maps in the actual vDSO kernel pages, they will
+ * be mapped read-only by do_no_page(), and eventually COW'ed, either
+ * right away for an initial write access, or by do_wp_page().
+ */
+static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
+ unsigned long address, int *type)
+{
+ unsigned long offset = address - vma->vm_start;
+ struct page *pg;
+#ifdef CONFIG_PPC64
+ void *vbase = test_thread_flag(TIF_32BIT) ?
+ vdso32_kbase : vdso64_kbase;
+#else
+ void *vbase = vdso32_kbase;
+#endif
+
+ DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
+ current->comm, address, offset);
+
+ if (address < vma->vm_start || address > vma->vm_end)
+ return NOPAGE_SIGBUS;
+
+ /*
+ * Last page is systemcfg.
+ */
+ if ((vma->vm_end - address) <= PAGE_SIZE)
+ pg = virt_to_page(vdso_data);
+ else
+ pg = virt_to_page(vbase + offset);
+
+ get_page(pg);
+ DBG(" ->page count: %d\n", page_count(pg));
+
+ return pg;
+}
+
+static struct vm_operations_struct vdso_vmops = {
+ .close = vdso_vma_close,
+ .nopage = vdso_vma_nopage,
+};
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long vdso_pages;
+ unsigned long vdso_base;
+
+#ifdef CONFIG_PPC64
+ if (test_thread_flag(TIF_32BIT)) {
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+ } else {
+ vdso_pages = vdso64_pages;
+ vdso_base = VDSO64_MBASE;
+ }
+#else
+ vdso_pages = vdso32_pages;
+ vdso_base = VDSO32_MBASE;
+#endif
+
+ current->thread.vdso_base = 0;
+
+ /* vDSO has a problem and was disabled, just don't "enable" it for the
+ * process
+ */
+ if (vdso_pages == 0)
+ return 0;
+
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma == NULL)
+ return -ENOMEM;
+
+ memset(vma, 0, sizeof(*vma));
+
+ /* Add a page to the vdso size for the data page */
+ vdso_pages ++;
+
+ /*
+ * pick a base address for the vDSO in process space. We try to put it
+ * at vdso_base which is the "natural" base for it, but we might fail
+ * and end up putting it elsewhere.
+ */
+ vdso_base = get_unmapped_area(NULL, vdso_base,
+ vdso_pages << PAGE_SHIFT, 0, 0);
+ if (vdso_base & ~PAGE_MASK) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return (int)vdso_base;
+ }
+
+ current->thread.vdso_base = vdso_base;
+
+ vma->vm_mm = mm;
+ vma->vm_start = current->thread.vdso_base;
+ vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT);
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process isn't
+ * allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW on
+ * those pages but it's then your responsibility to never do that on
+ * the "data" page of the vDSO or you'll stop getting kernel updates
+ * and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though
+ */
+ vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE |
+ VM_MAYEXEC | VM_RESERVED;
+ vma->vm_flags |= mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+ vma->vm_ops = &vdso_vmops;
+
+ down_write(&mm->mmap_sem);
+ if (insert_vm_struct(mm, vma)) {
+ up_write(&mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
+ mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ up_write(&mm->mmap_sem);
+
+ return 0;
+}
+
+static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf32_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ *size = 0;
+ return NULL;
+}
+
+static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function32(struct lib32_elfinfo *lib,
+ const char *symname)
+{
+ Elf32_Sym *sym = find_symbol32(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO32: function %s not found !\n",
+ symname);
+ return 0;
+ }
+ return sym->st_value - VDSO32_LBASE;
+}
+
+static int vdso_do_func_patch32(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf32_Sym *sym32_gen, *sym32_fix;
+
+ sym32_gen = find_symbol32(v32, orig);
+ if (sym32_gen == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym32_gen->st_name = 0;
+ return 0;
+ }
+ sym32_fix = find_symbol32(v32, fix);
+ if (sym32_fix == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym32_gen->st_value = sym32_fix->st_value;
+ sym32_gen->st_size = sym32_fix->st_size;
+ sym32_gen->st_info = sym32_fix->st_info;
+ sym32_gen->st_other = sym32_fix->st_other;
+ sym32_gen->st_shndx = sym32_fix->st_shndx;
+
+ return 0;
+}
+
+
+#ifdef CONFIG_PPC64
+
+static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
+ unsigned long *size)
+{
+ Elf64_Shdr *sechdrs;
+ unsigned int i;
+ char *secnames;
+
+ /* Grab section headers and strings so we can tell who is who */
+ sechdrs = (void *)ehdr + ehdr->e_shoff;
+ secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ /* Find the section they want */
+ for (i = 1; i < ehdr->e_shnum; i++) {
+ if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
+ if (size)
+ *size = sechdrs[i].sh_size;
+ return (void *)ehdr + sechdrs[i].sh_offset;
+ }
+ }
+ if (size)
+ *size = 0;
+ return NULL;
+}
+
+static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ unsigned int i;
+ char name[MAX_SYMNAME], *c;
+
+ for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
+ if (lib->dynsym[i].st_name == 0)
+ continue;
+ strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ MAX_SYMNAME);
+ c = strchr(name, '@');
+ if (c)
+ *c = 0;
+ if (strcmp(symname, name) == 0)
+ return &lib->dynsym[i];
+ }
+ return NULL;
+}
+
+/* Note that we assume the section is .text and the symbol is relative to
+ * the library base
+ */
+static unsigned long __init find_function64(struct lib64_elfinfo *lib,
+ const char *symname)
+{
+ Elf64_Sym *sym = find_symbol64(lib, symname);
+
+ if (sym == NULL) {
+ printk(KERN_WARNING "vDSO64: function %s not found !\n",
+ symname);
+ return 0;
+ }
+#ifdef VDS64_HAS_DESCRIPTORS
+ return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
+ VDSO64_LBASE;
+#else
+ return sym->st_value - VDSO64_LBASE;
+#endif
+}
+
+static int vdso_do_func_patch64(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64,
+ const char *orig, const char *fix)
+{
+ Elf64_Sym *sym64_gen, *sym64_fix;
+
+ sym64_gen = find_symbol64(v64, orig);
+ if (sym64_gen == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
+ return -1;
+ }
+ if (fix == NULL) {
+ sym64_gen->st_name = 0;
+ return 0;
+ }
+ sym64_fix = find_symbol64(v64, fix);
+ if (sym64_fix == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
+ return -1;
+ }
+ sym64_gen->st_value = sym64_fix->st_value;
+ sym64_gen->st_size = sym64_fix->st_size;
+ sym64_gen->st_info = sym64_fix->st_info;
+ sym64_gen->st_other = sym64_fix->st_other;
+ sym64_gen->st_shndx = sym64_fix->st_shndx;
+
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+
+static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ void *sect;
+
+ /*
+ * Locate symbol tables & text section
+ */
+
+ v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
+ v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
+ if (v32->dynsym == NULL || v32->dynstr == NULL) {
+ printk(KERN_ERR "vDSO32: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section32(v32->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO32: the .text section was not found\n");
+ return -1;
+ }
+ v32->text = sect - vdso32_kbase;
+
+#ifdef CONFIG_PPC64
+ v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
+ v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
+ if (v64->dynsym == NULL || v64->dynstr == NULL) {
+ printk(KERN_ERR "vDSO64: required symbol section not found\n");
+ return -1;
+ }
+ sect = find_section64(v64->hdr, ".text", NULL);
+ if (sect == NULL) {
+ printk(KERN_ERR "vDSO64: the .text section was not found\n");
+ return -1;
+ }
+ v64->text = sect - vdso64_kbase;
+#endif /* CONFIG_PPC64 */
+
+ return 0;
+}
+
+static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ /*
+ * Find signal trampolines
+ */
+
+#ifdef CONFIG_PPC64
+ vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
+#endif
+ vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
+ vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
+}
+
+static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ Elf32_Sym *sym32;
+#ifdef CONFIG_PPC64
+ Elf64_Sym *sym64;
+
+ sym64 = find_symbol64(v64, "__kernel_datapage_offset");
+ if (sym64 == NULL) {
+ printk(KERN_ERR "vDSO64: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
+ (vdso64_pages << PAGE_SHIFT) -
+ (sym64->st_value - VDSO64_LBASE);
+#endif /* CONFIG_PPC64 */
+
+ sym32 = find_symbol32(v32, "__kernel_datapage_offset");
+ if (sym32 == NULL) {
+ printk(KERN_ERR "vDSO32: Can't find symbol "
+ "__kernel_datapage_offset !\n");
+ return -1;
+ }
+ *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
+ (vdso32_pages << PAGE_SHIFT) -
+ (sym32->st_value - VDSO32_LBASE);
+
+ return 0;
+}
+
+static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
+ struct lib64_elfinfo *v64)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
+ struct vdso_patch_def *patch = &vdso_patches[i];
+ int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
+ == patch->ftr_value;
+ if (!match)
+ continue;
+
+ DBG("replacing %s with %s...\n", patch->gen_name,
+ patch->fix_name ? "NONE" : patch->fix_name);
+
+ /*
+ * Patch the 32 bits and 64 bits symbols. Note that we do not
+ * patch the "." symbol on 64 bits.
+ * It would be easy to do, but doesn't seem to be necessary,
+ * patching the OPD symbol is enough.
+ */
+ vdso_do_func_patch32(v32, v64, patch->gen_name,
+ patch->fix_name);
+#ifdef CONFIG_PPC64
+ vdso_do_func_patch64(v32, v64, patch->gen_name,
+ patch->fix_name);
+#endif /* CONFIG_PPC64 */
+ }
+
+ return 0;
+}
+
+
+static __init int vdso_setup(void)
+{
+ struct lib32_elfinfo v32;
+ struct lib64_elfinfo v64;
+
+ v32.hdr = vdso32_kbase;
+#ifdef CONFIG_PPC64
+ v64.hdr = vdso64_kbase;
+#endif
+ if (vdso_do_find_sections(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_datapage(&v32, &v64))
+ return -1;
+
+ if (vdso_fixup_alt_funcs(&v32, &v64))
+ return -1;
+
+ vdso_setup_trampolines(&v32, &v64);
+
+ return 0;
+}
+
+/*
+ * Called from setup_arch to initialize the bitmap of available
+ * syscalls in the systemcfg page
+ */
+static void __init vdso_setup_syscall_map(void)
+{
+ unsigned int i;
+ extern unsigned long *sys_call_table;
+ extern unsigned long sys_ni_syscall;
+
+
+ for (i = 0; i < __NR_syscalls; i++) {
+#ifdef CONFIG_PPC64
+ if (sys_call_table[i*2] != sys_ni_syscall)
+ vdso_data->syscall_map_64[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+ if (sys_call_table[i*2+1] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#else /* CONFIG_PPC64 */
+ if (sys_call_table[i] != sys_ni_syscall)
+ vdso_data->syscall_map_32[i >> 5] |=
+ 0x80000000UL >> (i & 0x1f);
+#endif /* CONFIG_PPC64 */
+ }
+}
+
+
+void __init vdso_init(void)
+{
+ int i;
+
+#ifdef CONFIG_PPC64
+ /*
+ * Fill up the "systemcfg" stuff for backward compatiblity
+ */
+ strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
+ vdso_data->version.major = SYSTEMCFG_MAJOR;
+ vdso_data->version.minor = SYSTEMCFG_MINOR;
+ vdso_data->processor = mfspr(SPRN_PVR);
+ vdso_data->platform = _machine;
+ vdso_data->physicalMemorySize = lmb_phys_mem_size();
+ vdso_data->dcache_size = ppc64_caches.dsize;
+ vdso_data->dcache_line_size = ppc64_caches.dline_size;
+ vdso_data->icache_size = ppc64_caches.isize;
+ vdso_data->icache_line_size = ppc64_caches.iline_size;
+
+ /*
+ * Calculate the size of the 64 bits vDSO
+ */
+ vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
+ DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
+#endif /* CONFIG_PPC64 */
+
+
+ /*
+ * Calculate the size of the 32 bits vDSO
+ */
+ vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
+ DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
+
+
+ /*
+ * Setup the syscall map in the vDOS
+ */
+ vdso_setup_syscall_map();
+ /*
+ * Initialize the vDSO images in memory, that is do necessary
+ * fixups of vDSO symbols, locate trampolines, etc...
+ */
+ if (vdso_setup()) {
+ printk(KERN_ERR "vDSO setup failure, not enabled !\n");
+ vdso32_pages = 0;
+#ifdef CONFIG_PPC64
+ vdso64_pages = 0;
+#endif
+ return;
+ }
+
+ /* Make sure pages are in the correct state */
+ for (i = 0; i < vdso32_pages; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+
+ }
+#ifdef CONFIG_PPC64
+ for (i = 0; i < vdso64_pages; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ }
+#endif /* CONFIG_PPC64 */
+
+ get_page(virt_to_page(vdso_data));
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+ return 0;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+ return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+ return NULL;
+}
+
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..8a3bed5f143a
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -0,0 +1,40 @@
+
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+ifeq ($(CONFIG_PPC32),y)
+CROSS32CC := $(CC)
+endif
+
+targets := $(obj-vdso32) vdso32.so
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+
+EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
+EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1
+EXTRA_AFLAGS := -D__VDSO32__ -s
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -Upowerpc
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32)
+ $(call if_changed,vdso32ld)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
new file mode 100644
index 000000000000..c8db993574ee
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -0,0 +1,67 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ *
+ * Note: all CPUs supported by this kernel have a 128 bytes cache
+ * line size so we don't have to peek that info from the datapage
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ li r5,127
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ srwi. r8,r8,7 /* compute line count */
+ beqlr /* nothing to do? */
+ mtctr r8
+ mr r3,r6
+1: dcbst 0,r3
+ addi r3,r3,128
+ bdnz 1b
+ sync
+ mtctr r8
+1: icbi 0,r6
+ addi r6,r6,128
+ bdnz 1b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
+
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
new file mode 100644
index 000000000000..a08c26e87835
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -0,0 +1,84 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r4,r3
+ bl __get_datapage@local
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP32
+ cmpli cr0,r4,0
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+/*
+ * void unsigned long long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl __get_datapage@local
+ lwz r3,CFG_TB_TICKS_PER_SEC(r3)
+ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
+ mtlr r12
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
new file mode 100644
index 000000000000..aeb5fc9b87b3
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,315 @@
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r10,r3 /* r10 saves tv */
+ mr r11,r4 /* r11 saves tz */
+ bl __get_datapage@local /* get data page */
+ mr r9, r3 /* datapage ptr in r9 */
+ bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 2f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r5,r4,12,20,31
+ rlwimi r5,r3,12,0,19
+ stw r5,TVAL32_TV_SEC(r10)
+
+ /* get remaining xsec and convert to usec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication
+ */
+ rlwinm r5,r4,12,0,19
+ lis r6,1000000@h
+ ori r6,r6,1000000@l
+ mulhwu r5,r5,r6
+ stw r5,TVAL32_TV_USEC(r10)
+
+ cmpli cr0,r11,0 /* check if tz is NULL */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r9)
+ stw r4,TZONE_TZ_MINWEST(r11)
+ stw r5,TZONE_TZ_DSTTIME(r11)
+
+1: mtlr r12
+ li r3,0
+ blr
+
+2:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+ li r0,__NR_gettimeofday
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpli cr0,r3,CLOCK_REALTIME
+ cmpli cr1,r3,CLOCK_MONOTONIC
+ cror cr0,cr0,cr1
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r10,r3 /* r10 saves id */
+ mr r11,r4 /* r11 saves tp */
+ bl __get_datapage@local /* get data page */
+ mr r9, r3 /* datapage ptr in r9 */
+ beq cr1,50f /* if monotonic -> jump there */
+
+ /*
+ * CLOCK_REALTIME
+ */
+
+ bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 98f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r5,r4,12,20,31
+ rlwimi r5,r3,12,0,19
+ stw r5,TSPC32_TV_SEC(r11)
+
+ /* get remaining xsec and convert to nsec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication, then we multiply by 1000
+ */
+ rlwinm r5,r4,12,0,19
+ lis r6,1000000@h
+ ori r6,r6,1000000@l
+ mulhwu r5,r5,r6
+ mulli r5,r5,1000
+ stw r5,TSPC32_TV_NSEC(r11)
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+50: bl __do_get_xsec@local /* get xsec from tb & kernel */
+ bne- 98f /* out of line -> do syscall */
+
+ /* seconds are xsec >> 20 */
+ rlwinm r6,r4,12,20,31
+ rlwimi r6,r3,12,0,19
+
+ /* get remaining xsec and convert to nsec. we scale
+ * up remaining xsec by 12 bits and get the top 32 bits
+ * of the multiplication, then we multiply by 1000
+ */
+ rlwinm r7,r4,12,0,19
+ lis r5,1000000@h
+ ori r5,r5,1000000@l
+ mulhwu r7,r7,r5
+ mulli r7,r7,1000
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_xsec.
+ * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
+ * can be used
+ */
+
+ lwz r3,WTOM_CLOCK_SEC(r9)
+ lwz r4,WTOM_CLOCK_NSEC(r9)
+
+ /* We now have our result in r3,r4. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r5,r4,r3
+ xor r0,r5,r5
+ add r9,r9,r0
+#ifdef CONFIG_PPC64
+ lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+ lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ cmpl cr0,r8,r0 /* check if updated */
+ bne- 50b
+
+ /* Calculate and store result. Note that this mimmics the C code,
+ * which may cause funny results if nsec goes negative... is that
+ * possible at all ?
+ */
+ add r3,r3,r6
+ add r4,r4,r7
+ lis r5,NSEC_PER_SEC@h
+ ori r5,r5,NSEC_PER_SEC@l
+ cmpli cr0,r4,r5
+ blt 1f
+ subf r4,r5,r4
+ addi r3,r3,1
+1: stw r3,TSPC32_TV_SEC(r11)
+ stw r4,TSPC32_TV_NSEC(r11)
+
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+98:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0,cr0,cr1
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ stw r3,TSPC32_TV_SEC(r4)
+ stw r5,TSPC32_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of gettimeofday() & friends, it returns the xsec
+ * value in r3 & r4 and expects the datapage ptr (non clobbered)
+ * in r9. clobbers r0,r4,r5,r6,r7,r8.
+ * When returning, r8 contains the counter value that can be reused
+ * by the monotonic clock implementation
+ */
+__do_get_xsec:
+ .cfi_startproc
+ /* Check for update count & load values. We use the low
+ * order 32 bits of the update count
+ */
+#ifdef CONFIG_PPC64
+1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r8,r8 /* create dependency */
+ add r9,r9,r0
+
+ /* Load orig stamp (offset to TB) */
+ lwz r5,CFG_TB_ORIG_STAMP(r9)
+ lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
+
+ /* Get a stable TB value */
+2: mftbu r3
+ mftbl r4
+ mftbu r0
+ cmpl cr0,r3,r0
+ bne- 2b
+
+ /* Substract tb orig stamp. If the high part is non-zero, we jump to
+ * the slow path which call the syscall.
+ * If it's ok, then we have our 32 bits tb_ticks value in r7
+ */
+ subfc r7,r6,r4
+ subfe. r0,r5,r3
+ bne- 3f
+
+ /* Load scale factor & do multiplication */
+ lwz r5,CFG_TB_TO_XS(r9) /* load values */
+ lwz r6,(CFG_TB_TO_XS+4)(r9)
+ mulhwu r4,r7,r5
+ mulhwu r6,r7,r6
+ mullw r0,r7,r5
+ addc r6,r6,r0
+
+ /* At this point, we have the scaled xsec value in r4 + XER:CA
+ * we load & add the stamp since epoch
+ */
+ lwz r5,CFG_STAMP_XSEC(r9)
+ lwz r6,(CFG_STAMP_XSEC+4)(r9)
+ adde r4,r4,r6
+ addze r3,r5
+
+ /* We now have our result in r3,r4. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r6,r4,r3
+ xor r0,r6,r6
+ add r9,r9,r0
+#ifdef CONFIG_PPC64
+ lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
+#else
+ lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
+#endif
+ cmpl cr0,r8,r0 /* check if updated */
+ bne- 1b
+
+ /* Warning ! The caller expects CR:EQ to be set to indicate a
+ * successful calculation (so it won't fallback to the syscall
+ * method). We have overriden that CR bit in the counter check,
+ * but fortunately, the loop exit condition _is_ CR:EQ set, so
+ * we can exit safely here. If you change this code, be careful
+ * of that side effect.
+ */
+3: blr
+ .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S
new file mode 100644
index 000000000000..d4b5be4f3d5f
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/note.S
@@ -0,0 +1,25 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+
+#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
+ .section name, flags; \
+ .balign 4; \
+ .long 1f - 0f; /* name length */ \
+ .long 3f - 2f; /* data length */ \
+ .long type; /* note type */ \
+0: .asciz vendor; /* vendor name */ \
+1: .balign 4; \
+2:
+
+#define ASM_ELF_NOTE_END \
+3: .balign 4; /* pad out section */ \
+ .previous
+
+ ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
+ .long LINUX_VERSION_CODE
+ ASM_ELF_NOTE_END
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S
new file mode 100644
index 000000000000..e04642781917
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/sigtramp.S
@@ -0,0 +1,300 @@
+/*
+ * Signal trampolines for 32 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by adding a nop before
+ the real start. */
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp32)
+.Lsig_start = . - 4
+ li r0,__NR_sigreturn
+ sc
+.Lsig_end:
+V_FUNCTION_END(__kernel_sigtramp32)
+
+.Lsigrt_start:
+ nop
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt32)
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt32)
+
+ .section .eh_frame,"a",@progbits
+
+/* Register r1 can be found at offset 4 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. The VMX reg struct is at offset VREGS of
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 64+28
+
+/* Size of regs. */
+#define RSIZE 4
+
+/* This is the offset of the VMX regs. */
+#define VREGS 48*RSIZE+34*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 32*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 4
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .long .Lsig_start - . /* PC start, length */
+ .long .Lsig_end - .Lsig_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde0_end:
+
+/* We have a different stack layout for rt_sigreturn. */
+#undef PTREGS
+#define PTREGS 64+16+128+20+28
+
+ .long .Lfde1_end - .Lfde1_start
+.Lfde1_start:
+ .long .Lfde1_start - .Lcie /* CIE pointer. */
+ .long .Lsigrt_start - . /* PC start, length */
+ .long .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+ .balign 4
+.Lfde1_end:
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000000..f4bad720cb0a
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,117 @@
+
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+/* Default link addresses for the vDSOs */
+OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO32_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ }
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+ .fixup : { *(.fixup) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .got : { *(.got) }
+ .plt : { *(.plt) }
+
+ _end = .;
+ __end = .;
+ PROVIDE (end = .);
+
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_datapage_offset; /* Has to be there for the kernel to find */
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp32;
+ __kernel_sigtramp_rt32;
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000000..556f0caa5d84
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,13 @@
+#include <linux/init.h>
+#include <asm/page.h>
+
+ .section ".data.page_aligned"
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/powerpc/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
new file mode 100644
index 000000000000..ab39988452cc
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -0,0 +1,35 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
+EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1
+EXTRA_AFLAGS := -D__VDSO64__ -s
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64)
+ $(call if_changed,vdso64ld)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
new file mode 100644
index 000000000000..d4a0ad28d534
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -0,0 +1,66 @@
+/*
+ * vDSO provided cache flush routines
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+/*
+ * Default "generic" version of __kernel_sync_dicache.
+ *
+ * void __kernel_sync_dicache(unsigned long start, unsigned long end)
+ *
+ * Flushes the data cache & invalidate the instruction cache for the
+ * provided range [start, end[
+ *
+ * Note: all CPUs supported by this kernel have a 128 bytes cache
+ * line size so we don't have to peek that info from the datapage
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache)
+ .cfi_startproc
+ li r5,127
+ andc r6,r3,r5 /* round low to line bdy */
+ subf r8,r6,r4 /* compute length */
+ add r8,r8,r5 /* ensure we get enough */
+ srwi. r8,r8,7 /* compute line count */
+ beqlr /* nothing to do? */
+ mtctr r8
+ mr r3,r6
+1: dcbst 0,r3
+ addi r3,r3,128
+ bdnz 1b
+ sync
+ mtctr r8
+1: icbi 0,r6
+ addi r6,r6,128
+ bdnz 1b
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache)
+
+
+/*
+ * POWER5 version of __kernel_sync_dicache
+ */
+V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
+ .cfi_startproc
+ sync
+ isync
+ li r3,0
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_sync_dicache_p5)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
new file mode 100644
index 000000000000..e67eda0f8cda
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -0,0 +1,84 @@
+/*
+ * Access to the shared data page by the vDSO & syscall map
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+
+ .text
+V_FUNCTION_BEGIN(__get_datapage)
+ .cfi_startproc
+ /* We don't want that exposed or overridable as we want other objects
+ * to be able to bl directly to here
+ */
+ .protected __get_datapage
+ .hidden __get_datapage
+
+ mflr r0
+ .cfi_register lr,r0
+
+ bcl 20,31,1f
+ .global __kernel_datapage_offset;
+__kernel_datapage_offset:
+ .long 0
+1:
+ mflr r3
+ mtlr r0
+ lwz r0,0(r3)
+ add r3,r0,r3
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__get_datapage)
+
+/*
+ * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
+ *
+ * returns a pointer to the syscall map. the map is agnostic to the
+ * size of "long", unlike kernel bitops, it stores bits from top to
+ * bottom so that memory actually contains a linear bitmap
+ * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
+ * 32 bits int at N >> 5.
+ */
+V_FUNCTION_BEGIN(__kernel_get_syscall_map)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r4,r3
+ bl V_LOCAL_FUNC(__get_datapage)
+ mtlr r12
+ addi r3,r3,CFG_SYSCALL_MAP64
+ cmpli cr0,r4,0
+ beqlr
+ li r0,__NR_syscalls
+ stw r0,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_syscall_map)
+
+
+/*
+ * void unsigned long __kernel_get_tbfreq(void);
+ *
+ * returns the timebase frequency in HZ
+ */
+V_FUNCTION_BEGIN(__kernel_get_tbfreq)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+ bl V_LOCAL_FUNC(__get_datapage)
+ ld r3,CFG_TB_TICKS_PER_SEC(r3)
+ mtlr r12
+ .cfi_endproc
+V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
new file mode 100644
index 000000000000..d371c02a8c0e
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,242 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ .cfi_startproc
+ mflr r12
+ .cfi_register lr,r12
+
+ mr r11,r3 /* r11 holds tv */
+ mr r10,r4 /* r10 holds tz */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+ lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
+ ori r7,r7,16960
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ std r5,TVAL64_TV_SEC(r11) /* store sec in tv */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r0,r0,44,20
+ cmpldi cr0,r10,0 /* check if tz is NULL */
+ std r0,TVAL64_TV_USEC(r11) /* store usec in tv */
+ beq 1f
+ lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
+ lwz r5,CFG_TZ_DSTTIME(r3)
+ stw r4,TZONE_TZ_MINWEST(r10)
+ stw r5,TZONE_TZ_DSTTIME(r10)
+1: mtlr r12
+ li r3,0 /* always success */
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_gettimeofday)
+
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0,cr0,cr1
+ bne cr0,99f
+
+ mflr r12 /* r12 saves lr */
+ .cfi_register lr,r12
+ mr r10,r3 /* r10 saves id */
+ mr r11,r4 /* r11 saves tp */
+ bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ beq cr1,50f /* if monotonic -> jump there */
+
+ /*
+ * CLOCK_REALTIME
+ */
+
+ bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+
+ lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */
+ ori r7,r7,0xca00
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r0,r0,44,20
+ std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
+
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * CLOCK_MONOTONIC
+ */
+
+50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
+
+ lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */
+ ori r7,r7,0xca00
+ rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
+ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
+ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
+ mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) /
+ * XSEC_PER_SEC
+ */
+ rldicl r6,r0,44,20
+
+ /* now we must fixup using wall to monotonic. We need to snapshot
+ * that value and do the counter trick again. Fortunately, we still
+ * have the counter value in r8 that was returned by __do_get_xsec.
+ * At this point, r5,r6 contain our sec/nsec values.
+ * can be used
+ */
+
+ lwz r4,WTOM_CLOCK_SEC(r9)
+ lwz r7,WTOM_CLOCK_NSEC(r9)
+
+ /* We now have our result in r4,r7. We create a fake dependency
+ * on that result and re-check the counter
+ */
+ or r9,r4,r7
+ xor r0,r9,r9
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 50b
+
+ /* Calculate and store result. Note that this mimmics the C code,
+ * which may cause funny results if nsec goes negative... is that
+ * possible at all ?
+ */
+ add r4,r4,r5
+ add r7,r7,r6
+ lis r9,NSEC_PER_SEC@h
+ ori r9,r9,NSEC_PER_SEC@l
+ cmpli cr0,r7,r9
+ blt 1f
+ subf r7,r9,r7
+ addi r4,r4,1
+1: std r4,TSPC64_TV_SEC(r11)
+ std r7,TSPC64_TV_NSEC(r11)
+
+ mtlr r12
+ li r3,0
+ blr
+
+ /*
+ * syscall fallback
+ */
+98:
+ mtlr r12
+ mr r3,r10
+ mr r4,r11
+99:
+ li r0,__NR_clock_gettime
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_gettime)
+
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ .cfi_startproc
+ /* Check for supported clock IDs */
+ cmpwi cr0,r3,CLOCK_REALTIME
+ cmpwi cr1,r3,CLOCK_MONOTONIC
+ cror cr0,cr0,cr1
+ bne cr0,99f
+
+ li r3,0
+ cmpli cr0,r4,0
+ beqlr
+ lis r5,CLOCK_REALTIME_RES@h
+ ori r5,r5,CLOCK_REALTIME_RES@l
+ std r3,TSPC64_TV_SEC(r4)
+ std r5,TSPC64_TV_NSEC(r4)
+ blr
+
+ /*
+ * syscall fallback
+ */
+99:
+ li r0,__NR_clock_getres
+ sc
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * This is the core of gettimeofday(), it returns the xsec
+ * value in r4 and expects the datapage ptr (non clobbered)
+ * in r3. clobbers r0,r4,r5,r6,r7,r8
+ * When returning, r8 contains the counter value that can be reused
+ */
+V_FUNCTION_BEGIN(__do_get_xsec)
+ .cfi_startproc
+ /* check for update count & load values */
+1: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r4,1 /* pending update ? loop */
+ bne- 1b
+ xor r0,r4,r4 /* create dependency */
+ add r3,r3,r0
+
+ /* Get TB & offset it */
+ mftb r7
+ ld r9,CFG_TB_ORIG_STAMP(r3)
+ subf r7,r9,r7
+
+ /* Scale result */
+ ld r5,CFG_TB_TO_XS(r3)
+ mulhdu r7,r7,r5
+
+ /* Add stamp since epoch */
+ ld r6,CFG_STAMP_XSEC(r3)
+ add r4,r6,r7
+
+ xor r0,r4,r4
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 1b
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__do_get_xsec)
diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S
new file mode 100644
index 000000000000..dc2a509f7e8a
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/note.S
@@ -0,0 +1 @@
+#include "../vdso32/note.S"
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
new file mode 100644
index 000000000000..31b604ab56de
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -0,0 +1,295 @@
+/*
+ * Signal trampoline for 64 bits processes in a ppc64 kernel for
+ * use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
+ * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */
+
+ .text
+
+/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
+ the return address to get an address in the middle of the presumed
+ call instruction. Since we don't have a call here, we artifically
+ extend the range covered by the unwind info by padding before the
+ real start. */
+ nop
+ .balign 8
+V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
+.Lsigrt_start = . - 4
+ addi r1, r1, __SIGNAL_FRAMESIZE
+ li r0,__NR_rt_sigreturn
+ sc
+.Lsigrt_end:
+V_FUNCTION_END(__kernel_sigtramp_rt64)
+/* The ".balign 8" above and the following zeros mimic the old stack
+ trampoline layout. The last magic value is the ucontext pointer,
+ chosen in such a way that older libgcc unwind code returns a zero
+ for a sigcontext pointer. */
+ .long 0,0,0
+ .quad 0,-21*8
+
+/* Register r1 can be found at offset 8 of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define cfa_save \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+9:
+
+/* Register REGNO can be found at offset OFS of a pt_regs structure.
+ A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
+#define rsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .ifne ofs; \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+ .endif; \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in
+ the pt_regs struct. This macro is for REGNO == 0, and contains
+ 'subroutines' that the other macros jump to. */
+#define vsave_msr0(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit0 */ \
+2: \
+ .byte 0x40; /* DW_OP_lit16 */ \
+ .byte 0x1e; /* DW_OP_mul */ \
+3: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x12; /* DW_OP_dup */ \
+ .byte 0x23; /* DW_OP_plus_uconst */ \
+ .uleb128 33*RSIZE; /* msr offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
+ .byte 0x1a; /* DW_OP_and */ \
+ .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
+ .byte 0x30; /* DW_OP_lit0 */ \
+ .byte 0x29; /* DW_OP_eq */ \
+ .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
+ .byte 0x13; /* DW_OP_drop, pop the 0 */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x22; /* DW_OP_plus */ \
+ .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
+ of the VMX reg struct. REGNO is 1 thru 31. */
+#define vsave_msr1(regno) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x30 + regno; /* DW_OP_lit n */ \
+ .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
+9:
+
+/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
+ the VMX save block. */
+#define vsave_msr2(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
+ .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
+9:
+
+/* VMX register REGNO is at offset OFS of the VMX save area. */
+#define vsave(regno, ofs) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno + 77; /* regno */ \
+ .uleb128 9f - 1f; /* length */ \
+1: \
+ .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
+ .byte 0x06; /* DW_OP_deref */ \
+ .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
+9:
+
+/* This is where the pt_regs pointer can be found on the stack. */
+#define PTREGS 128+168+56
+
+/* Size of regs. */
+#define RSIZE 8
+
+/* This is the offset of the VMX reg pointer. */
+#define VREGS 48*RSIZE+33*8
+
+/* Describe where general purpose regs are saved. */
+#define EH_FRAME_GEN \
+ cfa_save; \
+ rsave ( 0, 0*RSIZE); \
+ rsave ( 2, 2*RSIZE); \
+ rsave ( 3, 3*RSIZE); \
+ rsave ( 4, 4*RSIZE); \
+ rsave ( 5, 5*RSIZE); \
+ rsave ( 6, 6*RSIZE); \
+ rsave ( 7, 7*RSIZE); \
+ rsave ( 8, 8*RSIZE); \
+ rsave ( 9, 9*RSIZE); \
+ rsave (10, 10*RSIZE); \
+ rsave (11, 11*RSIZE); \
+ rsave (12, 12*RSIZE); \
+ rsave (13, 13*RSIZE); \
+ rsave (14, 14*RSIZE); \
+ rsave (15, 15*RSIZE); \
+ rsave (16, 16*RSIZE); \
+ rsave (17, 17*RSIZE); \
+ rsave (18, 18*RSIZE); \
+ rsave (19, 19*RSIZE); \
+ rsave (20, 20*RSIZE); \
+ rsave (21, 21*RSIZE); \
+ rsave (22, 22*RSIZE); \
+ rsave (23, 23*RSIZE); \
+ rsave (24, 24*RSIZE); \
+ rsave (25, 25*RSIZE); \
+ rsave (26, 26*RSIZE); \
+ rsave (27, 27*RSIZE); \
+ rsave (28, 28*RSIZE); \
+ rsave (29, 29*RSIZE); \
+ rsave (30, 30*RSIZE); \
+ rsave (31, 31*RSIZE); \
+ rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
+ rsave (65, 36*RSIZE); /* lr */ \
+ rsave (70, 38*RSIZE) /* cr */
+
+/* Describe where the FP regs are saved. */
+#define EH_FRAME_FP \
+ rsave (32, 48*RSIZE + 0*8); \
+ rsave (33, 48*RSIZE + 1*8); \
+ rsave (34, 48*RSIZE + 2*8); \
+ rsave (35, 48*RSIZE + 3*8); \
+ rsave (36, 48*RSIZE + 4*8); \
+ rsave (37, 48*RSIZE + 5*8); \
+ rsave (38, 48*RSIZE + 6*8); \
+ rsave (39, 48*RSIZE + 7*8); \
+ rsave (40, 48*RSIZE + 8*8); \
+ rsave (41, 48*RSIZE + 9*8); \
+ rsave (42, 48*RSIZE + 10*8); \
+ rsave (43, 48*RSIZE + 11*8); \
+ rsave (44, 48*RSIZE + 12*8); \
+ rsave (45, 48*RSIZE + 13*8); \
+ rsave (46, 48*RSIZE + 14*8); \
+ rsave (47, 48*RSIZE + 15*8); \
+ rsave (48, 48*RSIZE + 16*8); \
+ rsave (49, 48*RSIZE + 17*8); \
+ rsave (50, 48*RSIZE + 18*8); \
+ rsave (51, 48*RSIZE + 19*8); \
+ rsave (52, 48*RSIZE + 20*8); \
+ rsave (53, 48*RSIZE + 21*8); \
+ rsave (54, 48*RSIZE + 22*8); \
+ rsave (55, 48*RSIZE + 23*8); \
+ rsave (56, 48*RSIZE + 24*8); \
+ rsave (57, 48*RSIZE + 25*8); \
+ rsave (58, 48*RSIZE + 26*8); \
+ rsave (59, 48*RSIZE + 27*8); \
+ rsave (60, 48*RSIZE + 28*8); \
+ rsave (61, 48*RSIZE + 29*8); \
+ rsave (62, 48*RSIZE + 30*8); \
+ rsave (63, 48*RSIZE + 31*8)
+
+/* Describe where the VMX regs are saved. */
+#ifdef CONFIG_ALTIVEC
+#define EH_FRAME_VMX \
+ vsave_msr0 ( 0); \
+ vsave_msr1 ( 1); \
+ vsave_msr1 ( 2); \
+ vsave_msr1 ( 3); \
+ vsave_msr1 ( 4); \
+ vsave_msr1 ( 5); \
+ vsave_msr1 ( 6); \
+ vsave_msr1 ( 7); \
+ vsave_msr1 ( 8); \
+ vsave_msr1 ( 9); \
+ vsave_msr1 (10); \
+ vsave_msr1 (11); \
+ vsave_msr1 (12); \
+ vsave_msr1 (13); \
+ vsave_msr1 (14); \
+ vsave_msr1 (15); \
+ vsave_msr1 (16); \
+ vsave_msr1 (17); \
+ vsave_msr1 (18); \
+ vsave_msr1 (19); \
+ vsave_msr1 (20); \
+ vsave_msr1 (21); \
+ vsave_msr1 (22); \
+ vsave_msr1 (23); \
+ vsave_msr1 (24); \
+ vsave_msr1 (25); \
+ vsave_msr1 (26); \
+ vsave_msr1 (27); \
+ vsave_msr1 (28); \
+ vsave_msr1 (29); \
+ vsave_msr1 (30); \
+ vsave_msr1 (31); \
+ vsave_msr2 (33, 32*16+12); \
+ vsave (32, 33*16)
+#else
+#define EH_FRAME_VMX
+#endif
+
+ .section .eh_frame,"a",@progbits
+.Lcie:
+ .long .Lcie_end - .Lcie_start
+.Lcie_start:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 4 /* Code alignment factor */
+ .sleb128 -8 /* Data alignment factor */
+ .byte 67 /* Return address register column, ap */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
+ .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
+ .balign 8
+.Lcie_end:
+
+ .long .Lfde0_end - .Lfde0_start
+.Lfde0_start:
+ .long .Lfde0_start - .Lcie /* CIE pointer. */
+ .quad .Lsigrt_start - . /* PC start, length */
+ .quad .Lsigrt_end - .Lsigrt_start
+ .uleb128 0 /* Augmentation */
+ EH_FRAME_GEN
+ EH_FRAME_FP
+ EH_FRAME_VMX
+# Do we really need to describe the frame at this point? ie. will
+# we ever have some call chain that returns somewhere past the addi?
+# I don't think so, since gcc doesn't support async signals.
+# .byte 0x41 /* DW_CFA_advance_loc 1*4 */
+#undef PTREGS
+#define PTREGS 168+56
+# EH_FRAME_GEN
+# EH_FRAME_FP
+# EH_FRAME_VMX
+ .balign 8
+.Lfde0_end:
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 000000000000..4bdf224464ab
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,116 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
+OUTPUT_ARCH(powerpc:common64)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO64_LBASE + SIZEOF_HEADERS;
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN (16);
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ *(.sfpr .glink)
+ } :text
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+
+ /* Other stuff is appended to the text segment: */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table) }
+
+ .opd ALIGN(8) : { KEEP (*(.opd)) }
+ .got ALIGN(8) : { *(.got .toc) }
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ _end = .;
+ PROVIDE (end = .);
+
+ /* Stabs debugging sections are here too
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sectio/ns.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+
+ /DISCARD/ : { *(.note.GNU-stack) }
+ /DISCARD/ : { *(.branch_lt) }
+ /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.*) }
+ /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+}
+
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __kernel_datapage_offset; /* Has to be there for the kernel to find */
+ __kernel_get_syscall_map;
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_get_tbfreq;
+ __kernel_sync_dicache;
+ __kernel_sync_dicache_p5;
+ __kernel_sigtramp_rt64;
+ local: *;
+ };
+}
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 000000000000..0529cb9e3b97
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,13 @@
+#include <linux/init.h>
+#include <asm/page.h>
+
+ .section ".data.page_aligned"
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/powerpc/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 97082a4203ad..71a6addf9f7f 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -21,6 +21,7 @@
#include <asm/iommu.h>
#include <asm/dma.h>
#include <asm/vio.h>
+#include <asm/prom.h>
static const struct vio_device_id *vio_match_device(
const struct vio_device_id *, const struct vio_dev *);
@@ -265,7 +266,33 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv)
return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
}
+static int vio_hotplug(struct device *dev, char **envp, int num_envp,
+ char *buffer, int buffer_size)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ char *cp;
+ int length;
+
+ if (!num_envp)
+ return -ENOMEM;
+
+ if (!vio_dev->dev.platform_data)
+ return -ENODEV;
+ cp = (char *)get_property(vio_dev->dev.platform_data, "compatible", &length);
+ if (!cp)
+ return -ENODEV;
+
+ envp[0] = buffer;
+ length = scnprintf(buffer, buffer_size, "MODALIAS=vio:T%sS%s",
+ vio_dev->type, cp);
+ if (buffer_size - length <= 0)
+ return -ENOMEM;
+ envp[1] = NULL;
+ return 0;
+}
+
struct bus_type vio_bus_type = {
.name = "vio",
+ .hotplug = vio_hotplug,
.match = vio_bus_match,
};