diff options
Diffstat (limited to 'drivers/edac')
46 files changed, 2338 insertions, 904 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 409b92b8d346..e443f2c1dfd1 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -4,10 +4,13 @@ # Licensed and distributed under the GPL # +config EDAC_SUPPORT + bool + menuconfig EDAC bool "EDAC (Error Detection And Correction) reporting" depends on HAS_IOMEM - depends on X86 || PPC || TILE || ARM + depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or @@ -29,8 +32,6 @@ menuconfig EDAC if EDAC -comment "Reporting subsystems" - config EDAC_LEGACY_SYSFS bool "EDAC legacy sysfs" default y @@ -42,10 +43,10 @@ config EDAC_LEGACY_SYSFS config EDAC_DEBUG bool "Debugging" help - This turns on debugging information for the entire EDAC - sub-system. You can insert module with "debug_level=x", current - there're four debug levels (x=0,1,2,3 from low to high). - Usually you should select 'N'. + This turns on debugging information for the entire EDAC subsystem. + You do so by inserting edac_module with "edac_debug_level=x." Valid + levels are 0-4 (from low to high) and by default it is set to 2. + Usually you should select 'N' here. config EDAC_DECODE_MCE tristate "Decode MCEs in human-readable form (only on AMD for now)" @@ -79,6 +80,29 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_GHES + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + default y + help + Not all machines support hardware-driven error report. Some of those + provide a BIOS-driven error report mechanism via ACPI, using the + APEI/GHES driver. By enabling this option, the error reports provided + by GHES are sent to userspace via the EDAC API. + + When this option is enabled, it will disable the hardware-driven + mechanisms, if a GHES BIOS is detected, entering into the + "Firmware First" mode. + + It should be noticed that keeping both GHES and a hardware-driven + error mechanism won't work well, as BIOS will race with OS, while + reading the error registers. So, if you want to not use "Firmware + first" GHES error mechanism, you should disable GHES either at + compilation time or by passing "ghes.disable=1" Kernel parameter + at boot time. + + In doubt, say 'Y'. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE @@ -157,7 +181,7 @@ config EDAC_I3000 config EDAC_I3200 tristate "Intel 3200" - depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL + depends on EDAC_MM_EDAC && PCI && X86 help Support for error detection and correction on the Intel 3200 and 3210 server chipsets. @@ -223,7 +247,7 @@ config EDAC_I7300 config EDAC_SBRIDGE tristate "Intel Sandy-Bridge Integrated MC" depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL - depends on PCI_MMCONFIG && EXPERIMENTAL + depends on PCI_MMCONFIG help Support for error detection and correction the Intel Sandy Bridge Integrated Memory Controller. @@ -316,4 +340,32 @@ config EDAC_HIGHBANK_L2 Support for error detection and correction on the Calxeda Highbank memory controller. +config EDAC_OCTEON_PC + tristate "Cavium Octeon Primary Caches" + depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the primary caches of + the cnMIPS cores of Cavium Octeon family SOCs. + +config EDAC_OCTEON_L2C + tristate "Cavium Octeon Secondary Caches (L2C)" + depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + +config EDAC_OCTEON_LMC + tristate "Cavium Octeon DRAM Memory Controller (LMC)" + depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + +config EDAC_OCTEON_PCI + tristate "Cavium Octeon PCI Controller" + depends on EDAC_MM_EDAC && PCI && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 7e5129a733f8..4154ed6a02c6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,6 +16,7 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif +obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o @@ -58,3 +59,8 @@ obj-$(CONFIG_EDAC_TILE) += tile_edac.o obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o + +obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o +obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o +obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o +obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cc8e7c78a23c..910b0116c128 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -31,7 +31,7 @@ static struct ecc_settings **ecc_stngs; * *FIXME: Produce a better mapping/linearisation. */ -struct scrubrate { +static const struct scrubrate { u32 scrubval; /* bit pattern for scrub rate */ u32 bandwidth; /* bandwidth consumed (bytes/sec) */ } scrubrates[] = { @@ -60,8 +60,8 @@ struct scrubrate { { 0x00, 0UL}, /* scrubbing off */ }; -static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, - u32 *val, const char *func) +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func) { int err = 0; @@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * DRAM base/limit associated with node_id */ static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) + u8 nid) { u64 addr; @@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -423,7 +423,6 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 *hole_offset, u64 *hole_size) { struct amd64_pvt *pvt = mci->pvt_info; - u64 base; /* only revE and later have the DRAM Hole Address Register */ if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { @@ -462,10 +461,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, * addresses in the hole so that they start at 0x100000000. */ - base = dhar_base(pvt); - - *hole_base = base; - *hole_size = (0x1ull << 32) - base; + *hole_base = dhar_base(pvt); + *hole_size = (1ULL << 32) - *hole_base; if (boot_cpu_data.x86 > 0xf) *hole_offset = f10_dhar_offset(pvt); @@ -513,15 +510,15 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt = mci->pvt_info; u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; - int ret = 0; + int ret; dram_base = get_dram_base(pvt, pvt->mc_node_id); ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size); if (!ret) { - if ((sys_addr >= (1ull << 32)) && - (sys_addr < ((1ull << 32) + hole_size))) { + if ((sys_addr >= (1ULL << 32)) && + (sys_addr < ((1ULL << 32) + hole_size))) { /* use DHAR to translate SysAddr to DramAddr */ dram_addr = sys_addr - hole_offset; @@ -605,117 +602,12 @@ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr) return input_addr; } - -/* - * @input_addr is an InputAddr associated with the node represented by mci. - * Translate @input_addr to a DramAddr and return the result. - */ -static u64 input_addr_to_dram_addr(struct mem_ctl_info *mci, u64 input_addr) -{ - struct amd64_pvt *pvt; - unsigned node_id, intlv_shift; - u64 bits, dram_addr; - u32 intlv_sel; - - /* - * Near the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E) - * shows how to translate a DramAddr to an InputAddr. Here we reverse - * this procedure. When translating from a DramAddr to an InputAddr, the - * bits used for node interleaving are discarded. Here we recover these - * bits from the IntlvSel field of the DRAM Limit register (section - * 3.4.4.2) for the node that input_addr is associated with. - */ - pvt = mci->pvt_info; - node_id = pvt->mc_node_id; - - BUG_ON(node_id > 7); - - intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0)); - if (intlv_shift == 0) { - edac_dbg(1, " InputAddr 0x%lx translates to DramAddr of same value\n", - (unsigned long)input_addr); - - return input_addr; - } - - bits = ((input_addr & GENMASK(12, 35)) << intlv_shift) + - (input_addr & 0xfff); - - intlv_sel = dram_intlv_sel(pvt, node_id) & ((1 << intlv_shift) - 1); - dram_addr = bits + (intlv_sel << 12); - - edac_dbg(1, "InputAddr 0x%lx translates to DramAddr 0x%lx (%d node interleave bits)\n", - (unsigned long)input_addr, - (unsigned long)dram_addr, intlv_shift); - - return dram_addr; -} - -/* - * @dram_addr is a DramAddr that maps to the node represented by mci. Convert - * @dram_addr to a SysAddr. - */ -static u64 dram_addr_to_sys_addr(struct mem_ctl_info *mci, u64 dram_addr) -{ - struct amd64_pvt *pvt = mci->pvt_info; - u64 hole_base, hole_offset, hole_size, base, sys_addr; - int ret = 0; - - ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, - &hole_size); - if (!ret) { - if ((dram_addr >= hole_base) && - (dram_addr < (hole_base + hole_size))) { - sys_addr = dram_addr + hole_offset; - - edac_dbg(1, "using DHAR to translate DramAddr 0x%lx to SysAddr 0x%lx\n", - (unsigned long)dram_addr, - (unsigned long)sys_addr); - - return sys_addr; - } - } - - base = get_dram_base(pvt, pvt->mc_node_id); - sys_addr = dram_addr + base; - - /* - * The sys_addr we have computed up to this point is a 40-bit value - * because the k8 deals with 40-bit values. However, the value we are - * supposed to return is a full 64-bit physical address. The AMD - * x86-64 architecture specifies that the most significant implemented - * address bit through bit 63 of a physical address must be either all - * 0s or all 1s. Therefore we sign-extend the 40-bit sys_addr to a - * 64-bit value below. See section 3.4.2 of AMD publication 24592: - * AMD x86-64 Architecture Programmer's Manual Volume 1 Application - * Programming. - */ - sys_addr |= ~((sys_addr & (1ull << 39)) - 1); - - edac_dbg(1, " Node %d, DramAddr 0x%lx to SysAddr 0x%lx\n", - pvt->mc_node_id, (unsigned long)dram_addr, - (unsigned long)sys_addr); - - return sys_addr; -} - -/* - * @input_addr is an InputAddr associated with the node given by mci. Translate - * @input_addr to a SysAddr. - */ -static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci, - u64 input_addr) -{ - return dram_addr_to_sys_addr(mci, - input_addr_to_dram_addr(mci, input_addr)); -} - /* Map the Error address to a PAGE and PAGE OFFSET. */ static inline void error_address_to_page_and_offset(u64 error_address, - u32 *page, u32 *offset) + struct err_info *err) { - *page = (u32) (error_address >> PAGE_SHIFT); - *offset = ((u32) error_address) & ~PAGE_MASK; + err->page = (u32) (error_address >> PAGE_SHIFT); + err->offset = ((u32) error_address) & ~PAGE_MASK; } /* @@ -942,7 +834,8 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -982,10 +875,29 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + while ((dev = pci_get_device(vendor, device, dev))) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { + struct amd_northbridge *nb; + struct pci_dev *misc, *f1 = NULL; struct cpuinfo_x86 *c = &boot_cpu_data; int off = range << 3; + u32 llim; amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo); @@ -999,86 +911,73 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); - /* Factor in CC6 save area by reading dst node's limit reg */ - if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; - u8 nid = dram_dst_node(pvt, range); - u32 llim; + /* F15h: factor in CC6 save area by reading dst node's limit reg */ + if (c->x86 != 0x15) + return; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); - if (WARN_ON(!f1)) - return; + nb = node_to_amd_nb(dram_dst_node(pvt, range)); + if (WARN_ON(!nb)) + return; - amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); + misc = nb->misc; + f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc); + if (WARN_ON(!f1)) + return; - pvt->ranges[range].lim.lo &= GENMASK(0, 15); + amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); - /* {[39:27],111b} */ - pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; + pvt->ranges[range].lim.lo &= GENMASK(0, 15); - pvt->ranges[range].lim.hi &= GENMASK(0, 7); + /* {[39:27],111b} */ + pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; - /* [47:40] */ - pvt->ranges[range].lim.hi |= llim >> 13; + pvt->ranges[range].lim.hi &= GENMASK(0, 7); - pci_dev_put(f1); - } + /* [47:40] */ + pvt->ranges[range].lim.hi |= llim >> 13; + + pci_dev_put(f1); } static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, - u16 syndrome) + struct err_info *err) { - struct mem_ctl_info *src_mci; struct amd64_pvt *pvt = mci->pvt_info; - int channel, csrow; - u32 page, offset; - error_address_to_page_and_offset(sys_addr, &page, &offset); + error_address_to_page_and_offset(sys_addr, err); /* * Find out which node the error address belongs to. This may be * different from the node that detected the error. */ - src_mci = find_mc_by_sys_addr(mci, sys_addr); - if (!src_mci) { + err->src_mci = find_mc_by_sys_addr(mci, sys_addr); + if (!err->src_mci) { amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n", (unsigned long)sys_addr); - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - page, offset, syndrome, - -1, -1, -1, - "failed to map error addr to a node", - ""); + err->err_code = ERR_NODE; return; } /* Now map the sys_addr to a CSROW */ - csrow = sys_addr_to_csrow(src_mci, sys_addr); - if (csrow < 0) { - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - page, offset, syndrome, - -1, -1, -1, - "failed to map error addr to a csrow", - ""); + err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr); + if (err->csrow < 0) { + err->err_code = ERR_CSROW; return; } /* CHIPKILL enabled */ if (pvt->nbcfg & NBCFG_CHIPKILL) { - channel = get_channel_from_ecc_syndrome(mci, syndrome); - if (channel < 0) { + err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); + if (err->channel < 0) { /* * Syndrome didn't map, so we don't know which of the * 2 DIMMs is in error. So we need to ID 'both' of them * as suspect. */ - amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - " + amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - " "possible error reporting race\n", - syndrome); - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - page, offset, syndrome, - csrow, -1, -1, - "unknown syndrome - possible error reporting race", - ""); + err->syndrome); + err->err_code = ERR_CHANNEL; return; } } else { @@ -1090,13 +989,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, * was obtained from email communication with someone at AMD. * (Wish the email was placed in this comment - norsk) */ - channel = ((sys_addr & BIT(3)) != 0); + err->channel = ((sys_addr & BIT(3)) != 0); } - - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1, - page, offset, syndrome, - csrow, channel, -1, - "", ""); } static int ddr2_cs_size(unsigned i, bool dct_width) @@ -1328,7 +1222,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1404,7 +1298,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -1482,7 +1376,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr) /* For a given @dram_range, check if @sys_addr falls within it. */ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, - u64 sys_addr, int *nid, int *chan_sel) + u64 sys_addr, int *chan_sel) { int cs_found = -EINVAL; u64 chan_addr; @@ -1555,15 +1449,14 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range, cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel); - if (cs_found >= 0) { - *nid = node_id; + if (cs_found >= 0) *chan_sel = channel; - } + return cs_found; } static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, - int *node, int *chan_sel) + int *chan_sel) { int cs_found = -EINVAL; unsigned range; @@ -1577,8 +1470,7 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, (get_dram_limit(pvt, range) >= sys_addr)) { cs_found = f1x_match_to_this_node(pvt, range, - sys_addr, node, - chan_sel); + sys_addr, chan_sel); if (cs_found >= 0) break; } @@ -1594,22 +1486,15 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, * (MCX_ADDR). */ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, - u16 syndrome) + struct err_info *err) { struct amd64_pvt *pvt = mci->pvt_info; - u32 page, offset; - int nid, csrow, chan = 0; - error_address_to_page_and_offset(sys_addr, &page, &offset); + error_address_to_page_and_offset(sys_addr, err); - csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); - - if (csrow < 0) { - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - page, offset, syndrome, - -1, -1, -1, - "failed to map error addr to a csrow", - ""); + err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel); + if (err->csrow < 0) { + err->err_code = ERR_CSROW; return; } @@ -1619,12 +1504,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, * this point. */ if (dct_ganging_enabled(pvt)) - chan = get_channel_from_ecc_syndrome(mci, syndrome); - - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - page, offset, syndrome, - csrow, chan, -1, - "", ""); + err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); } /* @@ -1633,14 +1513,11 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, */ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) { - int dimm, size0, size1, factor = 0; + int dimm, size0, size1; u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; if (boot_cpu_data.x86 == 0xf) { - if (pvt->dclr0 & WIDTH_128) - factor = 1; - /* K8 families < revF not supported yet */ if (pvt->ext_model < K8_REV_F) return; @@ -1671,8 +1548,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) DBAM_DIMM(dimm, dbam)); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", - dimm * 2, size0 << factor, - dimm * 2 + 1, size1 << factor); + dimm * 2, size0, + dimm * 2 + 1, size1); } } @@ -1712,23 +1589,6 @@ static struct amd64_family_type amd64_family_types[] = { }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm @@ -1736,7 +1596,7 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, * * Algorithm courtesy of Ross LaFetra from AMD. */ -static u16 x4_vectors[] = { +static const u16 x4_vectors[] = { 0x2f57, 0x1afe, 0x66cc, 0xdd88, 0x11eb, 0x3396, 0x7f4c, 0xeac8, 0x0001, 0x0002, 0x0004, 0x0008, @@ -1775,7 +1635,7 @@ static u16 x4_vectors[] = { 0x19a9, 0x2efe, 0xb5cc, 0x6f88, }; -static u16 x8_vectors[] = { +static const u16 x8_vectors[] = { 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, @@ -1797,7 +1657,7 @@ static u16 x8_vectors[] = { 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, }; -static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs, +static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs, unsigned v_dim) { unsigned int i, err_sym; @@ -1893,101 +1753,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz); } -/* - * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR - * ADDRESS and process. - */ -static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m) -{ - struct amd64_pvt *pvt = mci->pvt_info; - u64 sys_addr; - u16 syndrome; - - /* Ensure that the Error Address is VALID */ - if (!(m->status & MCI_STATUS_ADDRV)) { - amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, 0, - -1, -1, -1, - "HW has no ERROR_ADDRESS available", - ""); - return; - } - - sys_addr = get_error_address(m); - syndrome = extract_syndrome(m->status); - - amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); - - pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome); -} - -/* Handle any Un-correctable Errors (UEs) */ -static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) +static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err, + u8 ecc_type) { - struct mem_ctl_info *log_mci, *src_mci = NULL; - int csrow; - u64 sys_addr; - u32 page, offset; - - log_mci = mci; + enum hw_event_mc_err_type err_type; + const char *string; - if (!(m->status & MCI_STATUS_ADDRV)) { - amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - 0, 0, 0, - -1, -1, -1, - "HW has no ERROR_ADDRESS available", - ""); + if (ecc_type == 2) + err_type = HW_EVENT_ERR_CORRECTED; + else if (ecc_type == 1) + err_type = HW_EVENT_ERR_UNCORRECTED; + else { + WARN(1, "Something is rotten in the state of Denmark.\n"); return; } - sys_addr = get_error_address(m); - error_address_to_page_and_offset(sys_addr, &page, &offset); - - /* - * Find out which node the error address belongs to. This may be - * different from the node that detected the error. - */ - src_mci = find_mc_by_sys_addr(mci, sys_addr); - if (!src_mci) { - amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n", - (unsigned long)sys_addr); - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - page, offset, 0, - -1, -1, -1, - "ERROR ADDRESS NOT mapped to a MC", - ""); - return; + switch (err->err_code) { + case DECODE_OK: + string = ""; + break; + case ERR_NODE: + string = "Failed to map error addr to a node"; + break; + case ERR_CSROW: + string = "Failed to map error addr to a csrow"; + break; + case ERR_CHANNEL: + string = "unknown syndrome - possible error reporting race"; + break; + default: + string = "WTF error"; + break; } - log_mci = src_mci; - - csrow = sys_addr_to_csrow(log_mci, sys_addr); - if (csrow < 0) { - amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n", - (unsigned long)sys_addr); - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - page, offset, 0, - -1, -1, -1, - "ERROR ADDRESS NOT mapped to CS", - ""); - } else { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - page, offset, 0, - csrow, -1, -1, - "", ""); - } + edac_mc_handle_error(err_type, mci, 1, + err->page, err->offset, err->syndrome, + err->csrow, err->channel, -1, + string, ""); } static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, struct mce *m) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, 0x1f); + struct amd64_pvt *pvt = mci->pvt_info; u8 ecc_type = (m->status >> 45) & 0x3; + u8 xec = XEC(m->status, 0x1f); + u16 ec = EC(m->status); + u64 sys_addr; + struct err_info err; - /* Bail early out if this was an 'observed' error */ + /* Bail out early if this was an 'observed' error */ if (PP(ec) == NBSL_PP_OBS) return; @@ -1995,10 +1810,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, if (xec && xec != F10_NBSL_EXT_ERR_ECC) return; + memset(&err, 0, sizeof(err)); + + sys_addr = get_error_address(m); + if (ecc_type == 2) - amd64_handle_ce(mci, m); - else if (ecc_type == 1) - amd64_handle_ue(mci, m); + err.syndrome = extract_syndrome(m->status); + + pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err); + + __log_bus_error(mci, &err, ecc_type); } void amd64_decode_bus_error(int node_id, struct mce *m) @@ -2166,6 +1987,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) u32 cs_mode, nr_pages; u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; + /* * The math on this doesn't look right on the surface because x/2*4 can * be simplified to x*2 but this expression makes use of the fact that @@ -2173,13 +1995,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) * number of bits to shift the DBAM register to extract the proper CSROW * field. */ - cs_mode = (dbam >> ((csrow_nr / 2) * 4)) & 0xF; + cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); - edac_dbg(0, " (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode); - edac_dbg(0, " nr_pages/channel= %u channel-count = %d\n", - nr_pages, pvt->channel_count); + edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", + csrow_nr, dct, cs_mode); + edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); return nr_pages; } @@ -2190,15 +2012,14 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) */ static int init_csrows(struct mem_ctl_info *mci) { + struct amd64_pvt *pvt = mci->pvt_info; struct csrow_info *csrow; struct dimm_info *dimm; - struct amd64_pvt *pvt = mci->pvt_info; - u64 base, mask; - u32 val; - int i, j, empty = 1; - enum mem_type mtype; enum edac_type edac_mode; + enum mem_type mtype; + int i, j, empty = 1; int nr_pages = 0; + u32 val; amd64_read_pci_cfg(pvt->F3, NBCFG, &val); @@ -2208,29 +2029,35 @@ static int init_csrows(struct mem_ctl_info *mci) pvt->mc_node_id, val, !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); + /* + * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. + */ for_each_chip_select(i, 0, pvt) { - csrow = mci->csrows[i]; + bool row_dct0 = !!csrow_enabled(i, 0, pvt); + bool row_dct1 = false; + + if (boot_cpu_data.x86 != 0xf) + row_dct1 = !!csrow_enabled(i, 1, pvt); - if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) { - edac_dbg(1, "----CSROW %d VALID for MC node %d\n", - i, pvt->mc_node_id); + if (!row_dct0 && !row_dct1) continue; - } + csrow = mci->csrows[i]; empty = 0; - if (csrow_enabled(i, 0, pvt)) + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, i); + + if (row_dct0) nr_pages = amd64_csrow_nr_pages(pvt, 0, i); - if (csrow_enabled(i, 1, pvt)) - nr_pages += amd64_csrow_nr_pages(pvt, 1, i); - get_cs_base_and_mask(pvt, i, 0, &base, &mask); - /* 8 bytes of resolution */ + /* K8 has only one DCT */ + if (boot_cpu_data.x86 != 0xf && row_dct1) + nr_pages += amd64_csrow_nr_pages(pvt, 1, i); mtype = amd64_determine_memory_type(pvt, i); - edac_dbg(1, " for MC node %d csrow %d:\n", pvt->mc_node_id, i); - edac_dbg(1, " nr_pages: %u\n", - nr_pages * pvt->channel_count); + edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* * determine whether CHIPKILL or JUST ECC or NO ECC is operating @@ -2247,13 +2074,14 @@ static int init_csrows(struct mem_ctl_info *mci) dimm->edac_mode = edac_mode; dimm->nr_pages = nr_pages; } + csrow->nr_pages = nr_pages; } return empty; } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) { int cpu; @@ -2263,7 +2091,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; @@ -2296,7 +2124,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2334,7 +2162,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2386,7 +2214,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2425,7 +2253,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2546,7 +2374,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2591,6 +2419,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) mci->pvt_info = pvt; mci->pdev = &pvt->F2->dev; + mci->csbased = 1; setup_mci_misc_attrs(mci, fam_type); @@ -2634,10 +2463,10 @@ err_ret: return ret; } -static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, - const struct pci_device_id *mc_type) +static int amd64_probe_one_instance(struct pci_dev *pdev, + const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2683,11 +2512,11 @@ err_out: return ret; } -static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) +static void amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; @@ -2757,7 +2586,7 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table); static struct pci_driver amd64_pci_driver = { .name = EDAC_MOD_STR, .probe = amd64_probe_one_instance, - .remove = __devexit_p(amd64_remove_one_instance), + .remove = amd64_remove_one_instance, .id_table = amd64_pci_table, }; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8d4804732bac..35637d83f235 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -33,7 +33,7 @@ * detection. The mods to Rev F required more family * information detection. * - * Changes/Fixes by Borislav Petkov <borislav.petkov@amd.com>: + * Changes/Fixes by Borislav Petkov <bp@alien8.de>: * - misc fixes and code cleanups * * This module is based on the following documents @@ -219,7 +219,7 @@ #define DBAM1 0x180 /* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */ -#define DBAM_DIMM(i, reg) ((((reg) >> (4*i))) & 0xF) +#define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF) #define DBAM_MAX_VALUE 11 @@ -267,18 +267,20 @@ #define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7) #define F10_NB_ARRAY_ADDR 0xB8 -#define F10_NB_ARRAY_DRAM_ECC BIT(31) +#define F10_NB_ARRAY_DRAM BIT(31) /* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */ -#define SET_NB_ARRAY_ADDRESS(section) (((section) & 0x3) << 1) +#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1) #define F10_NB_ARRAY_DATA 0xBC -#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \ - (BIT(((word) & 0xF) + 20) | \ - BIT(17) | bits) -#define SET_NB_DRAM_INJECTION_READ(word, bits) \ - (BIT(((word) & 0xF) + 20) | \ - BIT(16) | bits) +#define F10_NB_ARR_ECC_WR_REQ BIT(17) +#define SET_NB_DRAM_INJECTION_WRITE(inj) \ + (BIT(((inj.word) & 0xF) + 20) | \ + F10_NB_ARR_ECC_WR_REQ | inj.bit_map) +#define SET_NB_DRAM_INJECTION_READ(inj) \ + (BIT(((inj.word) & 0xF) + 20) | \ + BIT(16) | inj.bit_map) + #define NBCAP 0xE8 #define NBCAP_CHIPKILL BIT(4) @@ -290,12 +292,6 @@ /* MSRs */ #define MSR_MCGCTL_NBE BIT(4) -/* AMD sets the first MC device at device ID 0x18. */ -static inline u8 get_node_id(struct pci_dev *pdev) -{ - return PCI_SLOT(pdev->devfn) - 0x18; -} - enum amd_families { K8_CPUS = 0, F10_CPUS, @@ -305,9 +301,9 @@ enum amd_families { /* Error injection control structure */ struct error_injection { - u32 section; - u32 word; - u32 bit_map; + u32 section; + u32 word; + u32 bit_map; }; /* low and high part of PCI config space regs */ @@ -338,7 +334,7 @@ struct amd64_pvt { /* pci_device handles which we utilize */ struct pci_dev *F1, *F2, *F3; - unsigned mc_node_id; /* MC index of this MC node */ + u16 mc_node_id; /* MC index of this MC node */ int ext_model; /* extended model value of this node */ int channel_count; @@ -374,7 +370,24 @@ struct amd64_pvt { struct error_injection injection; }; -static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) +enum err_codes { + DECODE_OK = 0, + ERR_NODE = -1, + ERR_CSROW = -2, + ERR_CHANNEL = -3, +}; + +struct err_info { + int err_code; + struct mem_ctl_info *src_mci; + int csrow; + int channel; + u16 syndrome; + u32 page; + u32 offset; +}; + +static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) { u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; @@ -384,7 +397,7 @@ static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr; } -static inline u64 get_dram_limit(struct amd64_pvt *pvt, unsigned i) +static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i) { u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff; @@ -447,7 +460,7 @@ static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci) struct low_ops { int (*early_channel_count) (struct amd64_pvt *pvt); void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, - u16 syndrome); + struct err_info *); int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode); int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset, u32 *val, const char *func); @@ -459,6 +472,8 @@ struct amd64_family_type { struct low_ops ops; }; +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func); int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, u32 val, const char *func); @@ -475,3 +490,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 *hole_offset, u64 *hole_size); #define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +/* Injection helpers */ +static inline void disable_caches(void *dummy) +{ + write_cr0(read_cr0() | X86_CR0_CD); + wbinvd(); +} + +static inline void enable_caches(void *dummy) +{ + write_cr0(read_cr0() & ~X86_CR0_CD); +} diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c index 53d972e00dfb..8c171fa1cb9b 100644 --- a/drivers/edac/amd64_edac_inj.c +++ b/drivers/edac/amd64_edac_inj.c @@ -22,20 +22,19 @@ static ssize_t amd64_inject_section_store(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; - int ret = 0; + int ret; ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + if (ret < 0) + return ret; - if (value > 3) { - amd64_warn("%s: invalid section 0x%lx\n", __func__, value); - return -EINVAL; - } - - pvt->injection.section = (u32) value; - return count; + if (value > 3) { + amd64_warn("%s: invalid section 0x%lx\n", __func__, value); + return -EINVAL; } - return ret; + + pvt->injection.section = (u32) value; + return count; } static ssize_t amd64_inject_word_show(struct device *dev, @@ -60,20 +59,19 @@ static ssize_t amd64_inject_word_store(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; - int ret = 0; + int ret; ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + if (ret < 0) + return ret; - if (value > 8) { - amd64_warn("%s: invalid word 0x%lx\n", __func__, value); - return -EINVAL; - } - - pvt->injection.word = (u32) value; - return count; + if (value > 8) { + amd64_warn("%s: invalid word 0x%lx\n", __func__, value); + return -EINVAL; } - return ret; + + pvt->injection.word = (u32) value; + return count; } static ssize_t amd64_inject_ecc_vector_show(struct device *dev, @@ -97,21 +95,19 @@ static ssize_t amd64_inject_ecc_vector_store(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; - int ret = 0; + int ret; ret = strict_strtoul(data, 16, &value); - if (ret != -EINVAL) { + if (ret < 0) + return ret; - if (value & 0xFFFF0000) { - amd64_warn("%s: invalid EccVector: 0x%lx\n", - __func__, value); - return -EINVAL; - } - - pvt->injection.bit_map = (u32) value; - return count; + if (value & 0xFFFF0000) { + amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value); + return -EINVAL; } - return ret; + + pvt->injection.bit_map = (u32) value; + return count; } /* @@ -126,28 +122,25 @@ static ssize_t amd64_inject_read_store(struct device *dev, struct amd64_pvt *pvt = mci->pvt_info; unsigned long value; u32 section, word_bits; - int ret = 0; + int ret; ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + if (ret < 0) + return ret; - /* Form value to choose 16-byte section of cacheline */ - section = F10_NB_ARRAY_DRAM_ECC | - SET_NB_ARRAY_ADDRESS(pvt->injection.section); - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); + /* Form value to choose 16-byte section of cacheline */ + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); - word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word, - pvt->injection.bit_map); + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); - /* Issue 'word' and 'bit' along with the READ request */ - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection); - edac_dbg(0, "section=0x%x word_bits=0x%x\n", - section, word_bits); + /* Issue 'word' and 'bit' along with the READ request */ + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); - return count; - } - return ret; + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); + + return count; } /* @@ -160,30 +153,43 @@ static ssize_t amd64_inject_write_store(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; + u32 section, word_bits, tmp; unsigned long value; - u32 section, word_bits; - int ret = 0; + int ret; ret = strict_strtoul(data, 10, &value); - if (ret != -EINVAL) { + if (ret < 0) + return ret; + + /* Form value to choose 16-byte section of cacheline */ + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); + + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); - /* Form value to choose 16-byte section of cacheline */ - section = F10_NB_ARRAY_DRAM_ECC | - SET_NB_ARRAY_ADDRESS(pvt->injection.section); - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); + word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); - word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word, - pvt->injection.bit_map); + pr_notice_once("Don't forget to decrease MCE polling interval in\n" + "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" + "so that you can get the error report faster.\n"); - /* Issue 'word' and 'bit' along with the READ request */ - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + on_each_cpu(disable_caches, NULL, 1); - edac_dbg(0, "section=0x%x word_bits=0x%x\n", - section, word_bits); + /* Issue 'word' and 'bit' along with the READ request */ + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); - return count; + retry: + /* wait until injection happens */ + amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); + if (tmp & F10_NB_ARR_ECC_WR_REQ) { + cpu_relax(); + goto retry; } - return ret; + + on_each_cpu(enable_caches, NULL, 1); + + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); + + return count; } /* diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index 29eeb68a200c..96e3ee3460a5 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -301,8 +301,8 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit amd76x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int amd76x_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { edac_dbg(0, "\n"); @@ -318,7 +318,7 @@ static int __devinit amd76x_init_one(struct pci_dev *pdev, * structure for the device then delete the mci and free the * resources. */ -static void __devexit amd76x_remove_one(struct pci_dev *pdev) +static void amd76x_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -350,7 +350,7 @@ MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl); static struct pci_driver amd76x_driver = { .name = EDAC_MOD_STR, .probe = amd76x_init_one, - .remove = __devexit_p(amd76x_remove_one), + .remove = amd76x_remove_one, .id_table = amd76x_pci_tbl, }; diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c index a1bbd8edd257..c2eaf334b90b 100644 --- a/drivers/edac/cell_edac.c +++ b/drivers/edac/cell_edac.c @@ -124,7 +124,7 @@ static void cell_edac_check(struct mem_ctl_info *mci) } } -static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci) +static void cell_edac_init_csrows(struct mem_ctl_info *mci) { struct csrow_info *csrow = mci->csrows[0]; struct dimm_info *dimm; @@ -164,7 +164,7 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci) } } -static int __devinit cell_edac_probe(struct platform_device *pdev) +static int cell_edac_probe(struct platform_device *pdev) { struct cbe_mic_tm_regs __iomem *regs; struct mem_ctl_info *mci; @@ -233,7 +233,7 @@ static int __devinit cell_edac_probe(struct platform_device *pdev) return 0; } -static int __devexit cell_edac_remove(struct platform_device *pdev) +static int cell_edac_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); if (mci) @@ -247,7 +247,7 @@ static struct platform_driver cell_edac_driver = { .owner = THIS_MODULE, }, .probe = cell_edac_probe, - .remove = __devexit_p(cell_edac_remove), + .remove = cell_edac_remove, }; static int __init cell_edac_init(void) diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index c2ef13495873..7f3c57113ba1 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -932,7 +932,7 @@ static int cpc925_mc_get_channels(void __iomem *vbase) return dual; } -static int __devinit cpc925_probe(struct platform_device *pdev) +static int cpc925_probe(struct platform_device *pdev) { static int edac_mc_idx; struct mem_ctl_info *mci; diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index a5ed6b795fd4..644fec54681f 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -1390,8 +1390,7 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit e752x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int e752x_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { edac_dbg(0, "\n"); @@ -1402,7 +1401,7 @@ static int __devinit e752x_init_one(struct pci_dev *pdev, return e752x_probe1(pdev, ent->driver_data); } -static void __devexit e752x_remove_one(struct pci_dev *pdev) +static void e752x_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct e752x_pvt *pvt; @@ -1445,7 +1444,7 @@ MODULE_DEVICE_TABLE(pci, e752x_pci_tbl); static struct pci_driver e752x_driver = { .name = EDAC_MOD_STR, .probe = e752x_init_one, - .remove = __devexit_p(e752x_remove_one), + .remove = e752x_remove_one, .id_table = e752x_pci_tbl, }; diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 9ff57f361a43..1c4056a50383 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -528,8 +528,7 @@ fail0: } /* returns count (>= 0), or negative on error */ -static int __devinit e7xxx_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int e7xxx_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { edac_dbg(0, "\n"); @@ -538,7 +537,7 @@ static int __devinit e7xxx_init_one(struct pci_dev *pdev, -EIO : e7xxx_probe1(pdev, ent->driver_data); } -static void __devexit e7xxx_remove_one(struct pci_dev *pdev) +static void e7xxx_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct e7xxx_pvt *pvt; @@ -579,7 +578,7 @@ MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl); static struct pci_driver e7xxx_driver = { .name = EDAC_MOD_STR, .probe = e7xxx_init_one, - .remove = __devexit_p(e7xxx_remove_one), + .remove = e7xxx_remove_one, .id_table = e7xxx_pci_tbl, }; diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 23bb99fa44f1..3c2625e7980d 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page); + +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e); + void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, const u16 error_count, diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 90f0b730e9bb..cdb81aa73ab7 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -42,6 +42,12 @@ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +/* + * Used to lock EDAC MC to just one module, avoiding two drivers e. g. + * apei/ghes and i7core_edac to be used at the same time. + */ +static void const *edac_mc_owner; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -340,7 +346,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, /* * Alocate and fill the csrow/channels structs */ - mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL); + mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL); if (!mci->csrows) goto error; for (row = 0; row < tot_csrows; row++) { @@ -351,7 +357,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, csr->csrow_idx = row; csr->mci = mci; csr->nr_channels = tot_channels; - csr->channels = kcalloc(sizeof(*csr->channels), tot_channels, + csr->channels = kcalloc(tot_channels, sizeof(*csr->channels), GFP_KERNEL); if (!csr->channels) goto error; @@ -369,7 +375,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, /* * Allocate and fill the dimm structs */ - mci->dimms = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL); + mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL); if (!mci->dimms) goto error; @@ -416,10 +422,18 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, dimm->cschannel = chn; /* Increment csrow location */ - row++; - if (row == tot_csrows) { - row = 0; + if (layers[0].is_virt_csrow) { chn++; + if (chn == tot_channels) { + chn = 0; + row++; + } + } else { + row++; + if (row == tot_csrows) { + row = 0; + chn++; + } } /* Increment dimm location */ @@ -433,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, mci->op_state = OP_ALLOC; - /* at this point, the root kobj is valid, and in order to - * 'free' the object, then the function: - * edac_mc_unregister_sysfs_main_kobj() must be called - * which will perform kobj unregistration and the actual free - * will occur during the kobject callback operation - */ - return mci; error: @@ -658,9 +665,9 @@ fail1: return 1; } -static void del_mc_from_global_list(struct mem_ctl_info *mci) +static int del_mc_from_global_list(struct mem_ctl_info *mci) { - atomic_dec(&edac_handlers); + int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -668,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ synchronize_rcu(); INIT_LIST_HEAD(&mci->link); + + return handlers; } /** @@ -711,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { + int ret = -EINVAL; edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG @@ -741,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) #endif mutex_lock(&mem_ctls_mutex); + if (edac_mc_owner && edac_mc_owner != mci->mod_name) { + ret = -EPERM; + goto fail0; + } + if (add_mc_to_global_list(mci)) goto fail0; @@ -767,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_owner = mci->mod_name; + mutex_unlock(&mem_ctls_mutex); return 0; @@ -775,7 +792,7 @@ fail1: fail0: mutex_unlock(&mem_ctls_mutex); - return 1; + return ret; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -801,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - del_mc_from_global_list(mci); + if (!del_mc_from_global_list(mci)) + edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); /* flush workq processes */ @@ -899,6 +917,7 @@ const char *edac_layer_name[] = { [EDAC_MC_LAYER_CHANNEL] = "channel", [EDAC_MC_LAYER_SLOT] = "slot", [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", }; EXPORT_SYMBOL_GPL(edac_layer_name); @@ -966,20 +985,22 @@ static void edac_ce_error(struct mem_ctl_info *mci, long grain) { unsigned long remapped_page; + char *msg_aux = ""; + + if (*msg) + msg_aux = " "; if (edac_mc_get_log_ce()) { if (other_detail && *other_detail) edac_mc_printk(mci, KERN_WARNING, - "%d CE %s on %s (%s %s - %s)\n", - error_count, - msg, label, location, - detail, other_detail); + "%d CE %s%son %s (%s %s - %s)\n", + error_count, msg, msg_aux, label, + location, detail, other_detail); else edac_mc_printk(mci, KERN_WARNING, - "%d CE %s on %s (%s %s)\n", - error_count, - msg, label, location, - detail); + "%d CE %s%son %s (%s %s)\n", + error_count, msg, msg_aux, label, + location, detail); } edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count); @@ -1014,33 +1035,76 @@ static void edac_ue_error(struct mem_ctl_info *mci, const char *other_detail, const bool enable_per_layer_report) { + char *msg_aux = ""; + + if (*msg) + msg_aux = " "; + if (edac_mc_get_log_ue()) { if (other_detail && *other_detail) edac_mc_printk(mci, KERN_WARNING, - "%d UE %s on %s (%s %s - %s)\n", - error_count, - msg, label, location, detail, - other_detail); + "%d UE %s%son %s (%s %s - %s)\n", + error_count, msg, msg_aux, label, + location, detail, other_detail); else edac_mc_printk(mci, KERN_WARNING, - "%d UE %s on %s (%s %s)\n", - error_count, - msg, label, location, detail); + "%d UE %s%son %s (%s %s)\n", + error_count, msg, msg_aux, label, + location, detail); } if (edac_mc_get_panic_on_ue()) { if (other_detail && *other_detail) - panic("UE %s on %s (%s%s - %s)\n", - msg, label, location, detail, other_detail); + panic("UE %s%son %s (%s%s - %s)\n", + msg, msg_aux, label, location, detail, other_detail); else - panic("UE %s on %s (%s%s)\n", - msg, label, location, detail); + panic("UE %s%son %s (%s%s)\n", + msg, msg_aux, label, location, detail); } edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -#define OTHER_LABEL " or " +/** + * edac_raw_mc_handle_error - reports a memory event to userspace without doing + * anything to discover the error location + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e) +{ + char detail[80]; + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + + /* Memory type dependent details about the error */ + if (type == HW_EVENT_ERR_CORRECTED) { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, e->grain); + } else { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld", + e->page_frame_number, e->offset_in_page, e->grain); + + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report); + } + + +} +EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); /** * edac_mc_handle_error - reports a memory event to userspace @@ -1072,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - /* FIXME: too much for stack: move it to some pre-alocated area */ - char detail[80], location[80]; - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; - int i; - long grain; - bool enable_per_layer_report = false; + int i, n_labels = 0; u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; + /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be @@ -1093,10 +1165,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, */ for (i = 0; i < mci->n_layers; i++) { if (pos[i] >= (int)mci->layers[i].size) { - if (type == HW_EVENT_ERR_CORRECTED) - p = "CE"; - else - p = "UE"; edac_mc_printk(mci, KERN_ERR, "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", @@ -1111,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - enable_per_layer_report = true; + e->enable_per_layer_report = true; } /* @@ -1125,9 +1193,9 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ - grain = 0; - p = label; + p = e->label; *p = '\0'; + for (i = 0; i < mci->tot_dimms; i++) { struct dimm_info *dimm = mci->dimms[i]; @@ -1139,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ - if (dimm->grain > grain) - grain = dimm->grain; + if (dimm->grain > e->grain) + e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to @@ -1148,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (enable_per_layer_report && dimm->nr_pages) { - if (p != label) { + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } @@ -1176,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } - if (!enable_per_layer_report) { - strcpy(label, "any memory"); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == label) - strcpy(label, "unknown memory"); + if (p == e->label) + strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; @@ -1194,7 +1267,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ - p = location; + p = e->location; + for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) continue; @@ -1203,33 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } - if (p > location) + if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); - grain_bits = fls_long(grain) + 1; - trace_mc_event(type, msg, label, error_count, - mci->mc_idx, top_layer, mid_layer, low_layer, - PAGES_TO_MiB(page_frame_number) | offset_in_page, - grain_bits, syndrome, other_detail); - - /* Memory type dependent details about the error */ - if (type == HW_EVENT_ERR_CORRECTED) { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - page_frame_number, offset_in_page, - grain, syndrome); - edac_ce_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report, - page_frame_number, offset_in_page, grain); - } else { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld", - page_frame_number, offset_in_page, grain); - - edac_ue_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report); - } + edac_raw_mc_handle_error(type, mci, e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index ed0bc07b8503..4f4b6137d74e 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,7 +7,7 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * - * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com> + * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com> * The entire API were re-written, and ported to use struct device * */ @@ -180,6 +180,9 @@ static ssize_t csrow_size_show(struct device *dev, int i; u32 nr_pages = 0; + if (csrow->mci->csbased) + return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages)); + for (i = 0; i < csrow->nr_channels; i++) nr_pages += csrow->channels[i]->dimm->nr_pages; return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages)); @@ -373,6 +376,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, csrow->dev.bus = &mci->bus; device_initialize(&csrow->dev); csrow->dev.parent = &mci->dev; + csrow->mci = mci; dev_set_name(&csrow->dev, "csrow%d", index); dev_set_drvdata(&csrow->dev, csrow); @@ -425,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) + if (err < 0) { + edac_dbg(1, + "failure: create csrow objects for csrow %d\n", + i); goto error; + } } return 0; @@ -468,8 +476,7 @@ static void edac_delete_csrow_objects(struct mem_ctl_info *mci) device_remove_file(&csrow->dev, dynamic_csrow_ce_count_attr[chan]); } - put_device(&mci->csrows[i]->dev); - device_del(&mci->csrows[i]->dev); + device_unregister(&mci->csrows[i]->dev); } } #endif @@ -674,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev, unsigned long bandwidth = 0; int new_bw = 0; - if (!mci->set_sdram_scrub_rate) - return -ENODEV; - if (strict_strtoul(data, 10, &bandwidth) < 0) return -EINVAL; @@ -700,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) - return -ENODEV; - bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); @@ -777,10 +778,14 @@ static ssize_t mci_size_mb_show(struct device *dev, for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) { struct csrow_info *csrow = mci->csrows[csrow_idx]; - for (j = 0; j < csrow->nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + if (csrow->mci->csbased) { + total_pages += csrow->nr_pages; + } else { + for (j = 0; j < csrow->nr_channels; j++) { + struct dimm_info *dimm = csrow->channels[j]->dimm; - total_pages += dimm->nr_pages; + total_pages += dimm->nr_pages; + } } } @@ -838,14 +843,8 @@ static ssize_t edac_fake_inject_write(struct file *file, return count; } -static int debugfs_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static const struct file_operations debug_fake_inject_fops = { - .open = debugfs_open, + .open = simple_open, .write = edac_fake_inject_write, .llseek = generic_file_llseek, }; @@ -865,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, - mci_sdram_scrub_rate_store); +DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); static struct attribute *mci_attrs[] = { &dev_attr_reset_counters.attr, @@ -877,7 +875,6 @@ static struct attribute *mci_attrs[] = { &dev_attr_ce_noinfo_count.attr, &dev_attr_ue_count.attr, &dev_attr_ce_count.attr, - &dev_attr_sdram_scrub_rate.attr, &dev_attr_max_location.attr, NULL }; @@ -1006,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); bus_unregister(&mci->bus); kfree(mci->bus.name); return err; } + if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { + if (mci->get_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; + dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; + } + if (mci->set_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; + dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; + } + err = device_create_file(&mci->dev, + &dev_attr_sdram_scrub_rate); + if (err) { + edac_dbg(1, "failure: create sdram_scrub_rate\n"); + goto fail2; + } + } /* * Create the dimm/rank devices */ @@ -1053,11 +1067,10 @@ fail: struct dimm_info *dimm = mci->dimms[i]; if (dimm->nr_pages == 0) continue; - put_device(&dimm->dev); - device_del(&dimm->dev); + device_unregister(&dimm->dev); } - put_device(&mci->dev); - device_del(&mci->dev); +fail2: + device_unregister(&mci->dev); bus_unregister(&mci->bus); kfree(mci->bus.name); return err; @@ -1084,16 +1097,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) if (dimm->nr_pages == 0) continue; edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev)); - put_device(&dimm->dev); - device_del(&dimm->dev); + device_unregister(&dimm->dev); } } void edac_unregister_sysfs(struct mem_ctl_info *mci) { edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); - put_device(&mci->dev); - device_del(&mci->dev); + device_unregister(&mci->dev); bus_unregister(&mci->bus); kfree(mci->bus.name); } @@ -1124,10 +1135,15 @@ int __init edac_mc_sysfs_init(void) edac_subsys = edac_get_sysfs_subsys(); if (edac_subsys == NULL) { edac_dbg(1, "no edac_subsys\n"); - return -EINVAL; + err = -EINVAL; + goto out; } mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); + if (!mci_pdev) { + err = -ENOMEM; + goto out_put_sysfs; + } mci_pdev->bus = edac_subsys; mci_pdev->type = &mc_attr_type; @@ -1136,16 +1152,22 @@ int __init edac_mc_sysfs_init(void) err = device_add(mci_pdev); if (err < 0) - return err; + goto out_dev_free; edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); return 0; + + out_dev_free: + kfree(mci_pdev); + out_put_sysfs: + edac_put_sysfs_subsys(); + out: + return err; } void __exit edac_mc_sysfs_exit(void) { - put_device(mci_pdev); - device_del(mci_pdev); + device_unregister(mci_pdev); edac_put_sysfs_subsys(); } diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 58a28d838f37..a66941fea5a4 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -18,9 +18,29 @@ #define EDAC_VERSION "Ver: 3.0.0" #ifdef CONFIG_EDAC_DEBUG + +static int edac_set_debug_level(const char *buf, struct kernel_param *kp) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val < 0 || val > 4) + return -EINVAL; + + return param_set_int(buf, kp); +} + /* Values of 0 to 4 will generate output */ int edac_debug_level = 2; EXPORT_SYMBOL_GPL(edac_debug_level); + +module_param_call(edac_debug_level, edac_set_debug_level, param_get_int, + &edac_debug_level, 0644); +MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2"); #endif /* scope is to module level only */ @@ -126,16 +146,9 @@ static void __exit edac_exit(void) /* * Inform the kernel of our entry and exit points */ -module_init(edac_init); +subsys_initcall(edac_init); module_exit(edac_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al"); MODULE_DESCRIPTION("Core library routines for EDAC reporting"); - -/* refer to *_sysfs.c files for parameters that are exported via sysfs */ - -#ifdef CONFIG_EDAC_DEBUG -module_param(edac_debug_level, int, 0644); -MODULE_PARM_DESC(edac_debug_level, "Debug level"); -#endif diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index ee87ef972ead..dd370f92ace3 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -470,7 +470,8 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, pci->mod_name = mod_name; pci->ctl_name = EDAC_PCI_GENCTL_NAME; - pci->edac_check = edac_pci_generic_check; + if (edac_op_state == EDAC_OPSTATE_POLL) + pci->edac_check = edac_pci_generic_check; pdata->edac_idx = edac_pci_idx++; diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index e164c555a337..e8658e451762 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -256,7 +256,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj, struct edac_pci_dev_attribute *edac_pci_dev; edac_pci_dev = (struct edac_pci_dev_attribute *)attr; - if (edac_pci_dev->show) + if (edac_pci_dev->store) return edac_pci_dev->store(edac_pci_dev->value, buffer, count); return -EIO; } @@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); + edac_put_sysfs_subsys(); } - edac_put_sysfs_subsys(); } /* @@ -645,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); /* * pci_dev parity list iterator - * Scan the PCI device list for one pass, looking for SERRORs - * Master Parity ERRORS or Parity ERRORs on primary or secondary devices + * + * Scan the PCI device list looking for SERRORs, Master Parity ERRORS or + * Parity ERRORs on primary or secondary devices. */ static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) { struct pci_dev *dev = NULL; - /* request for kernel access to the next PCI device, if any, - * and while we are looking at it have its reference count - * bumped until we are done with it - */ - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) fn(dev); - } } /* diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c index 6c86f6e54558..351945fa2ecd 100644 --- a/drivers/edac/edac_stub.c +++ b/drivers/edac/edac_stub.c @@ -5,7 +5,7 @@ * * 2007 (c) MontaVista Software, Inc. * 2010 (c) Advanced Micro Devices Inc. - * Borislav Petkov <borislav.petkov@amd.com> + * Borislav Petkov <bp@alien8.de> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 000000000000..bb534670ec02 --- /dev/null +++ b/drivers/edac/ghes_edac.c @@ -0,0 +1,537 @@ +/* + * GHES/EDAC Linux driver + * + * This file may be distributed under the terms of the GNU General Public + * License version 2. + * + * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <acpi/ghes.h> +#include <linux/edac.h> +#include <linux/dmi.h> +#include "edac_core.h" +#include <ras/ras_event.h> + +#define GHES_EDAC_REVISION " Ver: 1.0.0" + +struct ghes_edac_pvt { + struct list_head list; + struct ghes *ghes; + struct mem_ctl_info *mci; + + /* Buffers for the error handling routine */ + char detail_location[240]; + char other_detail[160]; + char msg[80]; +}; + +static LIST_HEAD(ghes_reglist); +static DEFINE_MUTEX(ghes_edac_lock); +static int ghes_edac_mc_num; + + +/* Memory Device - Type 17 of SMBIOS spec */ +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form_factor; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + +struct ghes_edac_dimm_fill { + struct mem_ctl_info *mci; + unsigned count; +}; + +char *memory_type[] = { + [MEM_EMPTY] = "EMPTY", + [MEM_RESERVED] = "RESERVED", + [MEM_UNKNOWN] = "UNKNOWN", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "SDR", + [MEM_RDR] = "RDR", + [MEM_DDR] = "DDR", + [MEM_RDDR] = "RDDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "DDR2", + [MEM_FB_DDR2] = "FB_DDR2", + [MEM_RDDR2] = "RDDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "DDR3", + [MEM_RDDR3] = "RDDR3", +}; + +static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +{ + int *num_dimm = arg; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) + (*num_dimm)++; +} + +static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +{ + struct ghes_edac_dimm_fill *dimm_fill = arg; + struct mem_ctl_info *mci = dimm_fill->mci; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, + dimm_fill->count, 0, 0); + + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", + dimm_fill->count); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & 1 << 15) + dimm->nr_pages = MiB_TO_PAGES((entry->size & + 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } + + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR; + else + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + default: + if (entry->type_detail & 1 << 6) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & ((1 << 7) | (1 << 13))) + == ((1 << 7) | (1 << 13))) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & 1 << 7) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & 1 << 9) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } + + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + /* + * FIXME: It shouldn't be hard to also fill the DIMM labels + */ + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm_fill->count, memory_type[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm_fill->count++; + } +} + +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ + enum hw_event_mc_err_type type; + struct edac_raw_error_desc *e; + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt = NULL; + char *p; + u8 grain_bits; + + list_for_each_entry(pvt, &ghes_reglist, list) { + if (ghes == pvt->ghes) + break; + } + if (!pvt) { + pr_err("Internal error: Can't find EDAC structure\n"); + return; + } + mci = pvt->mci; + e = &mci->error_desc; + + /* Cleans the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = 1; + strcpy(e->label, "unknown label"); + e->msg = pvt->msg; + e->other_detail = pvt->other_detail; + e->top_layer = -1; + e->mid_layer = -1; + e->low_layer = -1; + *pvt->other_detail = '\0'; + *pvt->msg = '\0'; + + switch (sev) { + case GHES_SEV_CORRECTED: + type = HW_EVENT_ERR_CORRECTED; + break; + case GHES_SEV_RECOVERABLE: + type = HW_EVENT_ERR_UNCORRECTED; + break; + case GHES_SEV_PANIC: + type = HW_EVENT_ERR_FATAL; + break; + default: + case GHES_SEV_NO: + type = HW_EVENT_ERR_INFO; + } + + edac_dbg(1, "error validation_bits: 0x%08llx\n", + (long long)mem_err->validation_bits); + + /* Error type, mapped on e->msg */ + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + p = pvt->msg; + switch (mem_err->error_type) { + case 0: + p += sprintf(p, "Unknown"); + break; + case 1: + p += sprintf(p, "No error"); + break; + case 2: + p += sprintf(p, "Single-bit ECC"); + break; + case 3: + p += sprintf(p, "Multi-bit ECC"); + break; + case 4: + p += sprintf(p, "Single-symbol ChipKill ECC"); + break; + case 5: + p += sprintf(p, "Multi-symbol ChipKill ECC"); + break; + case 6: + p += sprintf(p, "Master abort"); + break; + case 7: + p += sprintf(p, "Target abort"); + break; + case 8: + p += sprintf(p, "Parity Error"); + break; + case 9: + p += sprintf(p, "Watchdog timeout"); + break; + case 10: + p += sprintf(p, "Invalid address"); + break; + case 11: + p += sprintf(p, "Mirror Broken"); + break; + case 12: + p += sprintf(p, "Memory Sparing"); + break; + case 13: + p += sprintf(p, "Scrub corrected error"); + break; + case 14: + p += sprintf(p, "Scrub uncorrected error"); + break; + case 15: + p += sprintf(p, "Physical Memory Map-out event"); + break; + default: + p += sprintf(p, "reserved error (%d)", + mem_err->error_type); + } + } else { + strcpy(pvt->msg, "unknown error"); + } + + /* Error address */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; + e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + } + + /* Error grain */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { + e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + } + + /* Memory error location, mapped on e->location */ + p = e->location; + if (mem_err->validation_bits & CPER_MEM_VALID_NODE) + p += sprintf(p, "node:%d ", mem_err->node); + if (mem_err->validation_bits & CPER_MEM_VALID_CARD) + p += sprintf(p, "card:%d ", mem_err->card); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) + p += sprintf(p, "module:%d ", mem_err->module); + if (mem_err->validation_bits & CPER_MEM_VALID_BANK) + p += sprintf(p, "bank:%d ", mem_err->bank); + if (mem_err->validation_bits & CPER_MEM_VALID_ROW) + p += sprintf(p, "row:%d ", mem_err->row); + if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) + p += sprintf(p, "col:%d ", mem_err->column); + if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) + p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + if (p > e->location) + *(p - 1) = '\0'; + + /* All other fields are mapped on e->other_detail */ + p = pvt->other_detail; + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { + u64 status = mem_err->error_status; + + p += sprintf(p, "status(0x%016llx): ", (long long)status); + switch ((status >> 8) & 0xff) { + case 1: + p += sprintf(p, "Error detected internal to the component "); + break; + case 16: + p += sprintf(p, "Error detected in the bus "); + break; + case 4: + p += sprintf(p, "Storage error in DRAM memory "); + break; + case 5: + p += sprintf(p, "Storage error in TLB "); + break; + case 6: + p += sprintf(p, "Storage error in cache "); + break; + case 7: + p += sprintf(p, "Error in one or more functional units "); + break; + case 8: + p += sprintf(p, "component failed self test "); + break; + case 9: + p += sprintf(p, "Overflow or undervalue of internal queue "); + break; + case 17: + p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); + break; + case 18: + p += sprintf(p, "Improper access error "); + break; + case 19: + p += sprintf(p, "Access to a memory address which is not mapped to any component "); + break; + case 20: + p += sprintf(p, "Loss of Lockstep "); + break; + case 21: + p += sprintf(p, "Response not associated with a request "); + break; + case 22: + p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); + break; + case 23: + p += sprintf(p, "Detection of a PATH_ERROR "); + break; + case 25: + p += sprintf(p, "Bus operation timeout "); + break; + case 26: + p += sprintf(p, "A read was issued to data that has been poisoned "); + break; + default: + p += sprintf(p, "reserved "); + break; + } + } + if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + p += sprintf(p, "requestorID: 0x%016llx ", + (long long)mem_err->requestor_id); + if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + p += sprintf(p, "responderID: 0x%016llx ", + (long long)mem_err->responder_id); + if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) + p += sprintf(p, "targetID: 0x%016llx ", + (long long)mem_err->responder_id); + if (p > pvt->other_detail) + *(p - 1) = '\0'; + + /* Generate the trace event */ + grain_bits = fls_long(e->grain); + sprintf(pvt->detail_location, "APEI location: %s %s", + e->location, e->other_detail); + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, pvt->detail_location); + + /* Report the error via EDAC API */ + edac_raw_mc_handle_error(type, mci, e); +} +EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); + +int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + bool fake = false; + int rc, num_dimm = 0; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + struct ghes_edac_pvt *pvt; + struct ghes_edac_dimm_fill dimm_fill; + + /* Get the number of DIMMs */ + dmi_walk(ghes_edac_count_dimms, &num_dimm); + + /* Check if we've got a bogus BIOS */ + if (num_dimm == 0) { + fake = true; + num_dimm = 1; + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = num_dimm; + layers[0].is_virt_csrow = true; + + /* + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), + * to avoid duplicated memory controller numbers + */ + mutex_lock(&ghes_edac_lock); + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (!mci) { + pr_info("Can't allocate memory for EDAC data\n"); + mutex_unlock(&ghes_edac_lock); + return -ENOMEM; + } + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + list_add_tail(&pvt->list, &ghes_reglist); + pvt->ghes = ghes; + pvt->mci = mci; + mci->pdev = dev; + + mci->mtype_cap = MEM_FLAG_EMPTY; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "ghes_edac.c"; + mci->mod_ver = GHES_EDAC_REVISION; + mci->ctl_name = "ghes_edac"; + mci->dev_name = "ghes"; + + if (!ghes_edac_mc_num) { + if (!fake) { + pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); + pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); + pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); + pr_info("If you find incorrect reports, please contact your hardware vendor\n"); + pr_info("to correct its BIOS.\n"); + pr_info("This system has %d DIMM sockets.\n", + num_dimm); + } else { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); + } + } + + if (!fake) { + /* + * Fill DIMM info from DMI for the memory controller #0 + * + * Keep it in blank for the other memory controllers, as + * there's no reliable way to properly credit each DIMM to + * the memory controller, as different BIOSes fill the + * DMI bank location fields on different ways + */ + if (!ghes_edac_mc_num) { + dimm_fill.count = 0; + dimm_fill.mci = mci; + dmi_walk(ghes_edac_dmidecode, &dimm_fill); + } + } else { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, 0, 0, 0); + + dimm->nr_pages = 1; + dimm->grain = 128; + dimm->mtype = MEM_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_SECDED; + } + + rc = edac_mc_add_mc(mci); + if (rc < 0) { + pr_info("Can't register at EDAC core\n"); + edac_mc_free(mci); + mutex_unlock(&ghes_edac_lock); + return -ENODEV; + } + + ghes_edac_mc_num++; + mutex_unlock(&ghes_edac_lock); + return 0; +} +EXPORT_SYMBOL_GPL(ghes_edac_register); + +void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt, *tmp; + + list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { + if (ghes == pvt->ghes) { + mci = pvt->mci; + edac_mc_del_mc(mci->pdev); + edac_mc_free(mci); + list_del(&pvt->list); + } + } +} +EXPORT_SYMBOL_GPL(ghes_edac_unregister); diff --git a/drivers/edac/highbank_l2_edac.c b/drivers/edac/highbank_l2_edac.c index e599b00c05a8..c2bd8c6a4349 100644 --- a/drivers/edac/highbank_l2_edac.c +++ b/drivers/edac/highbank_l2_edac.c @@ -50,7 +50,7 @@ static irqreturn_t highbank_l2_err_handler(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit highbank_l2_err_probe(struct platform_device *pdev) +static int highbank_l2_err_probe(struct platform_device *pdev) { struct edac_device_ctl_info *dci; struct hb_l2_drvdata *drvdata; diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index c769f477fd22..4695dd2d71fd 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -113,19 +113,13 @@ static ssize_t highbank_mc_err_inject_write(struct file *file, return count; } -static int debugfs_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static const struct file_operations highbank_mc_debug_inject_fops = { - .open = debugfs_open, + .open = simple_open, .write = highbank_mc_err_inject_write, .llseek = generic_file_llseek, }; -static void __devinit highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) +static void highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) { if (mci->debugfs) debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci, @@ -133,11 +127,11 @@ static void __devinit highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) ; } #else -static void __devinit highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) +static void highbank_mc_create_debugfs_nodes(struct mem_ctl_info *mci) {} #endif -static int __devinit highbank_mc_probe(struct platform_device *pdev) +static int highbank_mc_probe(struct platform_device *pdev) { struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index d3d19cc4e9a1..694efcbf19c0 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -455,8 +455,7 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit i3000_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i3000_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int rc; @@ -472,7 +471,7 @@ static int __devinit i3000_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i3000_remove_one(struct pci_dev *pdev) +static void i3000_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -502,7 +501,7 @@ MODULE_DEVICE_TABLE(pci, i3000_pci_tbl); static struct pci_driver i3000_driver = { .name = EDAC_MOD_STR, .probe = i3000_init_one, - .remove = __devexit_p(i3000_remove_one), + .remove = i3000_remove_one, .id_table = i3000_pci_tbl, }; diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index b6653a6fc5d5..aa44c1718f50 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -106,16 +106,26 @@ static int nr_channels; static int how_many_channels(struct pci_dev *pdev) { + int n_channels; + unsigned char capid0_8b; /* 8th byte of CAPID0 */ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ edac_dbg(0, "In single channel mode\n"); - return 1; + n_channels = 1; } else { edac_dbg(0, "In dual channel mode\n"); - return 2; + n_channels = 2; } + + if (capid0_8b & 0x10) /* check if both channels are filled */ + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; } static unsigned long eccerrlog_syndrome(u64 log) @@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window, for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + + edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); } } @@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages( int n; n = drbs[channel][rank]; + if (!n) + return 0; + if (rank > 0) n -= drbs[channel][rank - 1]; if (stacked && (channel == 1) && @@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < I3200_DIMMS; i++) { unsigned long nr_pages; - struct csrow_info *csrow = mci->csrows[i]; - nr_pages = drb_to_nr_pages(drbs, stacked, - i / I3200_RANKS_PER_CHANNEL, - i % I3200_RANKS_PER_CHANNEL); + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); - if (nr_pages == 0) - continue; + nr_pages = drb_to_nr_pages(drbs, stacked, j, i); + if (nr_pages == 0) + continue; - for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; dimm->grain = nr_pages << PAGE_SHIFT; @@ -419,8 +434,7 @@ fail: return rc; } -static int __devinit i3200_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int rc; @@ -436,7 +450,7 @@ static int __devinit i3200_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i3200_remove_one(struct pci_dev *pdev) +static void i3200_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i3200_priv *priv; @@ -467,7 +481,7 @@ MODULE_DEVICE_TABLE(pci, i3200_pci_tbl); static struct pci_driver i3200_driver = { .name = EDAC_MOD_STR, .probe = i3200_init_one, - .remove = __devexit_p(i3200_remove_one), + .remove = i3200_remove_one, .id_table = i3200_pci_tbl, }; diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 6a49dd00b81b..63b2194e8c20 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1489,8 +1489,7 @@ fail0: * negative on error * count (>= 0) */ -static int __devinit i5000_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i5000_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; @@ -1509,7 +1508,7 @@ static int __devinit i5000_init_one(struct pci_dev *pdev, * i5000_remove_one destructor for one instance of device * */ -static void __devexit i5000_remove_one(struct pci_dev *pdev) +static void i5000_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -1547,7 +1546,7 @@ MODULE_DEVICE_TABLE(pci, i5000_pci_tbl); static struct pci_driver i5000_driver = { .name = KBUILD_BASENAME, .probe = i5000_init_one, - .remove = __devexit_p(i5000_remove_one), + .remove = i5000_remove_one, .id_table = i5000_pci_tbl, }; diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index c4b5e5f868e8..1b635178cc44 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -27,6 +27,7 @@ #include <linux/edac.h> #include <linux/delay.h> #include <linux/mmzone.h> +#include <linux/debugfs.h> #include "edac_core.h" @@ -68,6 +69,14 @@ I5100_FERR_NF_MEM_M1ERR_MASK) #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ +#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ +#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ +#define I5100_MEMXEINJMSK0_EINJEN (1 << 27) +#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ +#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ + +/* Device 19, Function 0 */ +#define I5100_DINJ0 0x9a /* device 21 and 22, func 0 */ #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ @@ -338,13 +347,26 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ struct delayed_work i5100_scrubbing; int scrub_enable; + + /* Error injection */ + u8 inject_channel; + u8 inject_hlinesel; + u8 inject_deviceptr1; + u8 inject_deviceptr2; + u16 inject_eccmask1; + u16 inject_eccmask2; + + struct dentry *debugfs; }; +static struct dentry *i5100_debugfs; + /* map a rank/chan to a slot number on the mainboard */ static int i5100_rank_to_slot(const struct mem_ctl_info *mci, int chan, int rank) @@ -638,8 +660,7 @@ static struct pci_dev *pci_get_device_func(unsigned vendor, return ret; } -static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci, - int csrow) +static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow) { struct i5100_priv *priv = mci->pvt_info; const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow); @@ -660,7 +681,7 @@ static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci, ((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE); } -static void __devinit i5100_init_mtr(struct mem_ctl_info *mci) +static void i5100_init_mtr(struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm }; @@ -732,7 +753,7 @@ static int i5100_read_spd_byte(const struct mem_ctl_info *mci, * o not the only way to may chip selects to dimm slots * o investigate if there is some way to obtain this map from the bios */ -static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci) +static void i5100_init_dimm_csmap(struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; int i; @@ -762,8 +783,8 @@ static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci) } } -static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev, - struct mem_ctl_info *mci) +static void i5100_init_dimm_layout(struct pci_dev *pdev, + struct mem_ctl_info *mci) { struct i5100_priv *priv = mci->pvt_info; int i; @@ -784,8 +805,8 @@ static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev, i5100_init_dimm_csmap(mci); } -static void __devinit i5100_init_interleaving(struct pci_dev *pdev, - struct mem_ctl_info *mci) +static void i5100_init_interleaving(struct pci_dev *pdev, + struct mem_ctl_info *mci) { u16 w; u32 dw; @@ -830,7 +851,7 @@ static void __devinit i5100_init_interleaving(struct pci_dev *pdev, i5100_init_mtr(mci); } -static void __devinit i5100_init_csrows(struct mem_ctl_info *mci) +static void i5100_init_csrows(struct mem_ctl_info *mci) { int i; struct i5100_priv *priv = mci->pvt_info; @@ -864,14 +885,126 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci) } } -static int __devinit i5100_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +/**************************************************************************** + * Error injection routines + ****************************************************************************/ + +static void i5100_do_inject(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 mask0; + u16 mask1; + + /* MEM[1:0]EINJMSK0 + * 31 - ADDRMATCHEN + * 29:28 - HLINESEL + * 00 Reserved + * 01 Lower half of cache line + * 10 Upper half of cache line + * 11 Both upper and lower parts of cache line + * 27 - EINJEN + * 25:19 - XORMASK1 for deviceptr1 + * 9:5 - SEC2RAM for deviceptr2 + * 4:0 - FIR2RAM for deviceptr1 + */ + mask0 = ((priv->inject_hlinesel & 0x3) << 28) | + I5100_MEMXEINJMSK0_EINJEN | + ((priv->inject_eccmask1 & 0xffff) << 10) | + ((priv->inject_deviceptr2 & 0x1f) << 5) | + (priv->inject_deviceptr1 & 0x1f); + + /* MEM[1:0]EINJMSK1 + * 15:0 - XORMASK2 for deviceptr2 + */ + mask1 = priv->inject_eccmask2; + + if (priv->inject_channel == 0) { + pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); + } else { + pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); + } + + /* Error Injection Response Function + * Intel 5100 Memory Controller Hub Chipset (318378) datasheet + * hints about this register but carry no data about them. All + * data regarding device 19 is based on experimentation and the + * Intel 7300 Chipset Memory Controller Hub (318082) datasheet + * which appears to be accurate for the i5100 in this area. + * + * The injection code don't work without setting this register. + * The register needs to be flipped off then on else the hardware + * will only preform the first injection. + * + * Stop condition bits 7:4 + * 1010 - Stop after one injection + * 1011 - Never stop injecting faults + * + * Start condition bits 3:0 + * 1010 - Never start + * 1011 - Start immediately + */ + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) +static ssize_t inject_enable_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + + i5100_do_inject(mci); + + return count; +} + +static const struct file_operations i5100_inject_enable_fops = { + .open = simple_open, + .write = inject_enable_write, + .llseek = generic_file_llseek, +}; + +static int i5100_setup_debugfs(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + + if (!i5100_debugfs) + return -ENODEV; + + priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + + if (!priv->debugfs) + return -ENOMEM; + + debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); + + return 0; + +} + +static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -943,6 +1076,22 @@ static int __devinit i5100_init_one(struct pci_dev *pdev, goto bail_disable_ch1; } + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -950,6 +1099,7 @@ static int __devinit i5100_init_one(struct pci_dev *pdev, priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -977,6 +1127,13 @@ static int __devinit i5100_init_one(struct pci_dev *pdev, mci->set_sdram_scrub_rate = i5100_set_scrub_rate; mci->get_sdram_scrub_rate = i5100_get_scrub_rate; + priv->inject_channel = 0; + priv->inject_hlinesel = 0; + priv->inject_deviceptr1 = 0; + priv->inject_deviceptr2 = 0; + priv->inject_eccmask1 = 0; + priv->inject_eccmask2 = 0; + i5100_init_csrows(mci); /* this strange construction seems to be in every driver, dunno why */ @@ -994,6 +1151,8 @@ static int __devinit i5100_init_one(struct pci_dev *pdev, goto bail_scrub; } + i5100_setup_debugfs(mci); + return ret; bail_scrub: @@ -1001,6 +1160,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1020,7 +1185,7 @@ bail: return ret; } -static void __devexit i5100_remove_one(struct pci_dev *pdev) +static void i5100_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i5100_priv *priv; @@ -1032,14 +1197,18 @@ static void __devexit i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; + debugfs_remove_recursive(priv->debugfs); + priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } @@ -1054,7 +1223,7 @@ MODULE_DEVICE_TABLE(pci, i5100_pci_tbl); static struct pci_driver i5100_driver = { .name = KBUILD_BASENAME, .probe = i5100_init_one, - .remove = __devexit_p(i5100_remove_one), + .remove = i5100_remove_one, .id_table = i5100_pci_tbl, }; @@ -1062,13 +1231,16 @@ static int __init i5100_init(void) { int pci_rc; - pci_rc = pci_register_driver(&i5100_driver); + i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; } static void __exit i5100_exit(void) { + debugfs_remove(i5100_debugfs); + pci_unregister_driver(&i5100_driver); } diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 277246998b80..0a05bbceb08f 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -1373,8 +1373,7 @@ fail0: * negative on error * count (>= 0) */ -static int __devinit i5400_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i5400_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; @@ -1393,7 +1392,7 @@ static int __devinit i5400_init_one(struct pci_dev *pdev, * i5400_remove_one destructor for one instance of device * */ -static void __devexit i5400_remove_one(struct pci_dev *pdev) +static void i5400_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -1431,7 +1430,7 @@ MODULE_DEVICE_TABLE(pci, i5400_pci_tbl); static struct pci_driver i5400_driver = { .name = "i5400_edac", .probe = i5400_init_one, - .remove = __devexit_p(i5400_remove_one), + .remove = i5400_remove_one, .id_table = i5400_pci_tbl, }; diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index a09d0667f72a..087c27bc5d42 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -197,8 +197,8 @@ static const char *ferr_fat_fbd_name[] = { [0] = "Memory Write error on non-redundant retry or " "FBD configuration Write error on retry", }; -#define GET_FBD_FAT_IDX(fbderr) (fbderr & (3 << 28)) -#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)) +#define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3) +#define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22)) #define FERR_NF_FBD 0xa0 static const char *ferr_nf_fbd_name[] = { @@ -225,7 +225,7 @@ static const char *ferr_nf_fbd_name[] = { [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", [0] = "Uncorrectable Data ECC on Replay", }; -#define GET_FBD_NF_IDX(fbderr) (fbderr & (3 << 28)) +#define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3) #define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\ (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\ (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\ @@ -464,7 +464,7 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci) errnum = find_first_bit(&errors, ARRAY_SIZE(ferr_nf_fbd_name)); specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum); - branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0; + branch = (GET_FBD_NF_IDX(error_reg) == 2) ? 1 : 0; pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, REDMEMA, &syndrome); @@ -923,7 +923,7 @@ static void i7300_put_devices(struct mem_ctl_info *mci) * Device 21 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 * Device 22 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 */ -static int __devinit i7300_get_devices(struct mem_ctl_info *mci) +static int i7300_get_devices(struct mem_ctl_info *mci) { struct i7300_pvt *pvt; struct pci_dev *pdev; @@ -1008,8 +1008,7 @@ error: * @pdev: struct pci_dev pointer * @id: struct pci_device_id pointer - currently unused */ -static int __devinit i7300_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i7300_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mem_ctl_info *mci; struct edac_mc_layer layers[3]; @@ -1122,7 +1121,7 @@ fail0: * i7300_remove_one() - Remove the driver * @pdev: struct pci_dev pointer */ -static void __devexit i7300_remove_one(struct pci_dev *pdev) +static void i7300_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; char *tmp; @@ -1163,7 +1162,7 @@ MODULE_DEVICE_TABLE(pci, i7300_pci_tbl); static struct pci_driver i7300_driver = { .name = "i7300_edac", .probe = i7300_init_one, - .remove = __devexit_p(i7300_remove_one), + .remove = i7300_remove_one, .id_table = i7300_pci_tbl, }; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 3672101023bd..0ec3e95a12cd 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms) static inline int numrank(u32 rank) { - static int ranks[4] = { 1, 2, 4, -EINVAL }; + static const int ranks[] = { 1, 2, 4, -EINVAL }; return ranks[rank & 0x3]; } static inline int numbank(u32 bank) { - static int banks[4] = { 4, 8, 16, -EINVAL }; + static const int banks[] = { 4, 8, 16, -EINVAL }; return banks[bank & 0x3]; } static inline int numrow(u32 row) { - static int rows[8] = { + static const int rows[] = { 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, -EINVAL, -EINVAL, -EINVAL, }; @@ -444,7 +444,7 @@ static inline int numrow(u32 row) static inline int numcol(u32 col) { - static int cols[8] = { + static const int cols[] = { 1 << 10, 1 << 11, 1 << 12, -EINVAL, }; return cols[col & 0x3]; @@ -816,7 +816,7 @@ static ssize_t i7core_inject_store_##param( \ struct device_attribute *mattr, \ const char *data, size_t count) \ { \ - struct mem_ctl_info *mci = to_mci(dev); \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt; \ long value; \ int rc; \ @@ -845,7 +845,7 @@ static ssize_t i7core_inject_show_##param( \ struct device_attribute *mattr, \ char *data) \ { \ - struct mem_ctl_info *mci = to_mci(dev); \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt; \ \ pvt = mci->pvt_info; \ @@ -1052,7 +1052,7 @@ static ssize_t i7core_show_counter_##param( \ struct device_attribute *mattr, \ char *data) \ { \ - struct mem_ctl_info *mci = to_mci(dev); \ + struct mem_ctl_info *mci = dev_get_drvdata(dev); \ struct i7core_pvt *pvt = mci->pvt_info; \ \ edac_dbg(1, "\n"); \ @@ -2305,8 +2305,7 @@ fail0: * < 0 for error code */ -static int __devinit i7core_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int rc, count = 0; struct i7core_dev *i7core_dev; @@ -2368,7 +2367,7 @@ fail0: * i7core_remove destructor for one instance of device * */ -static void __devexit i7core_remove(struct pci_dev *pdev) +static void i7core_remove(struct pci_dev *pdev) { struct i7core_dev *i7core_dev; @@ -2409,7 +2408,7 @@ MODULE_DEVICE_TABLE(pci, i7core_pci_tbl); static struct pci_driver i7core_driver = { .name = "i7core_edac", .probe = i7core_probe, - .remove = __devexit_p(i7core_remove), + .remove = i7core_remove, .id_table = i7core_pci_tbl, }; diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 90f303db5d1d..57fdb77903ba 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -353,8 +353,8 @@ fail: EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1); /* returns count (>= 0), or negative on error */ -static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82443bxgx_edacmc_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; @@ -369,7 +369,7 @@ static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) +static void i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -399,7 +399,7 @@ MODULE_DEVICE_TABLE(pci, i82443bxgx_pci_tbl); static struct pci_driver i82443bxgx_edacmc_driver = { .name = EDAC_MOD_STR, .probe = i82443bxgx_edacmc_init_one, - .remove = __devexit_p(i82443bxgx_edacmc_remove_one), + .remove = i82443bxgx_edacmc_remove_one, .id_table = i82443bxgx_pci_tbl, }; diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index 1faa74971513..3e3e431c8301 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -254,8 +254,8 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit i82860_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82860_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; @@ -273,7 +273,7 @@ static int __devinit i82860_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82860_remove_one(struct pci_dev *pdev) +static void i82860_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -302,7 +302,7 @@ MODULE_DEVICE_TABLE(pci, i82860_pci_tbl); static struct pci_driver i82860_driver = { .name = EDAC_MOD_STR, .probe = i82860_init_one, - .remove = __devexit_p(i82860_remove_one), + .remove = i82860_remove_one, .id_table = i82860_pci_tbl, }; diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 3e416b1a6b53..2f8535fc451e 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -479,8 +479,8 @@ fail0: } /* returns count (>= 0), or negative on error */ -static int __devinit i82875p_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82875p_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; @@ -498,7 +498,7 @@ static int __devinit i82875p_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82875p_remove_one(struct pci_dev *pdev) +static void i82875p_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i82875p_pvt *pvt = NULL; @@ -541,7 +541,7 @@ MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl); static struct pci_driver i82875p_driver = { .name = EDAC_MOD_STR, .probe = i82875p_init_one, - .remove = __devexit_p(i82875p_remove_one), + .remove = i82875p_remove_one, .id_table = i82875p_pci_tbl, }; diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 069e26c11c4f..0c8d4b0eaa32 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -370,10 +370,6 @@ static enum dev_type i82975x_dram_type(void __iomem *mch_window, int rank) static void i82975x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, void __iomem *mch_window) { - static const char *labels[4] = { - "DIMM A1", "DIMM A2", - "DIMM B1", "DIMM B2" - }; struct csrow_info *csrow; unsigned long last_cumul_size; u8 value; @@ -423,9 +419,10 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci, dimm = mci->csrows[index]->channels[chan]->dimm; dimm->nr_pages = nr_pages / csrow->nr_channels; - strncpy(csrow->channels[chan]->dimm->label, - labels[(index >> 1) + (chan * 2)], - EDAC_MC_LABEL_LEN); + + snprintf(csrow->channels[chan]->dimm->label, EDAC_MC_LABEL_LEN, "DIMM %c%d", + (chan == 0) ? 'A' : 'B', + index); dimm->grain = 1 << 7; /* 128Byte cache-line resolution */ dimm->dtype = i82975x_dram_type(mch_window, index); dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */ @@ -595,8 +592,8 @@ fail0: } /* returns count (>= 0), or negative on error */ -static int __devinit i82975x_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int i82975x_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { int rc; @@ -613,7 +610,7 @@ static int __devinit i82975x_init_one(struct pci_dev *pdev, return rc; } -static void __devexit i82975x_remove_one(struct pci_dev *pdev) +static void i82975x_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct i82975x_pvt *pvt; @@ -646,7 +643,7 @@ MODULE_DEVICE_TABLE(pci, i82975x_pci_tbl); static struct pci_driver i82975x_driver = { .name = EDAC_MOD_STR, .probe = i82975x_init_one, - .remove = __devexit_p(i82975x_remove_one), + .remove = i82975x_remove_one, .id_table = i82975x_pci_tbl, }; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index d0c372e30de4..f3f0c930d550 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -39,32 +39,30 @@ EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); */ /* transaction type */ -const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; -EXPORT_SYMBOL_GPL(tt_msgs); +static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; /* cache level */ -const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; -EXPORT_SYMBOL_GPL(ll_msgs); +static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; /* memory transaction type */ -const char * const rrrr_msgs[] = { +static const char * const rrrr_msgs[] = { "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" }; -EXPORT_SYMBOL_GPL(rrrr_msgs); /* participating processor */ const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; EXPORT_SYMBOL_GPL(pp_msgs); /* request timeout */ -const char * const to_msgs[] = { "no timeout", "timed out" }; -EXPORT_SYMBOL_GPL(to_msgs); +static const char * const to_msgs[] = { "no timeout", "timed out" }; /* memory or i/o */ -const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; -EXPORT_SYMBOL_GPL(ii_msgs); +static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; -static const char * const f15h_ic_mce_desc[] = { +/* internal error type */ +static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" }; + +static const char * const f15h_mc1_mce_desc[] = { "UC during a demand linefill from L2", "Parity error during data load from IC", "Parity error for IC valid bit", @@ -84,7 +82,7 @@ static const char * const f15h_ic_mce_desc[] = { "fetch address FIFO" }; -static const char * const f15h_cu_mce_desc[] = { +static const char * const f15h_mc2_mce_desc[] = { "Fill ECC error on data fills", /* xec = 0x4 */ "Fill parity error on insn fills", "Prefetcher request FIFO parity error", @@ -101,7 +99,7 @@ static const char * const f15h_cu_mce_desc[] = { "PRB address parity error" }; -static const char * const nb_mce_desc[] = { +static const char * const mc4_mce_desc[] = { "DRAM ECC error detected on the NB", "CRC error detected on HT link", "Link-defined sync error packets detected on HT link", @@ -123,7 +121,7 @@ static const char * const nb_mce_desc[] = { "ECC Error in the Probe Filter directory" }; -static const char * const fr_ex_mce_desc[] = { +static const char * const mc5_mce_desc[] = { "CPU Watchdog timer expire", "Wakeup array dest tag", "AG payload array", @@ -139,7 +137,7 @@ static const char * const fr_ex_mce_desc[] = { "DE error occurred" }; -static bool f12h_dc_mce(u16 ec, u8 xec) +static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -157,26 +155,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f10h_dc_mce(u16 ec, u8 xec) +static bool f10h_mc0_mce(u16 ec, u8 xec) { if (R4(ec) == R4_GEN && LL(ec) == LL_L1) { pr_cont("during data scrub.\n"); return true; } - return f12h_dc_mce(ec, xec); + return f12h_mc0_mce(ec, xec); } -static bool k8_dc_mce(u16 ec, u8 xec) +static bool k8_mc0_mce(u16 ec, u8 xec) { if (BUS_ERROR(ec)) { pr_cont("during system linefill.\n"); return true; } - return f10h_dc_mce(ec, xec); + return f10h_mc0_mce(ec, xec); } -static bool f14h_dc_mce(u16 ec, u8 xec) +static bool cat_mc0_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; @@ -228,7 +226,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec) return ret; } -static bool f15h_dc_mce(u16 ec, u8 xec) +static bool f15h_mc0_mce(u16 ec, u8 xec) { bool ret = true; @@ -275,12 +273,12 @@ static bool f15h_dc_mce(u16 ec, u8 xec) return ret; } -static void amd_decode_dc_mce(struct mce *m) +static void decode_mc0_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Data Cache Error: "); + pr_emerg(HW_ERR "MC0 Error: "); /* TLB error signatures are the same across families */ if (TLB_ERROR(ec)) { @@ -290,13 +288,13 @@ static void amd_decode_dc_mce(struct mce *m) : (xec ? "multimatch" : "parity"))); return; } - } else if (fam_ops->dc_mce(ec, xec)) + } else if (fam_ops->mc0_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); } -static bool k8_ic_mce(u16 ec, u8 xec) +static bool k8_mc1_mce(u16 ec, u8 xec) { u8 ll = LL(ec); bool ret = true; @@ -330,26 +328,32 @@ static bool k8_ic_mce(u16 ec, u8 xec) return ret; } -static bool f14h_ic_mce(u16 ec, u8 xec) +static bool cat_mc1_mce(u16 ec, u8 xec) { u8 r4 = R4(ec); bool ret = true; - if (MEM_ERROR(ec)) { - if (TT(ec) != 0 || LL(ec) != 1) - ret = false; + if (!MEM_ERROR(ec)) + return false; + + if (TT(ec) != TT_INSTR) + return false; + + if (r4 == R4_IRD) + pr_cont("Data/tag array parity error for a tag hit.\n"); + else if (r4 == R4_SNOOP) + pr_cont("Tag error during snoop/victimization.\n"); + else if (xec == 0x0) + pr_cont("Tag parity error from victim castout.\n"); + else if (xec == 0x2) + pr_cont("Microcode patch RAM parity error.\n"); + else + ret = false; - if (r4 == R4_IRD) - pr_cont("Data/tag array parity error for a tag hit.\n"); - else if (r4 == R4_SNOOP) - pr_cont("Tag error during snoop/victimization.\n"); - else - ret = false; - } return ret; } -static bool f15h_ic_mce(u16 ec, u8 xec) +static bool f15h_mc1_mce(u16 ec, u8 xec) { bool ret = true; @@ -358,19 +362,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec) switch (xec) { case 0x0 ... 0xa: - pr_cont("%s.\n", f15h_ic_mce_desc[xec]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec]); break; case 0xd: - pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]); break; case 0x10: - pr_cont("%s.\n", f15h_ic_mce_desc[xec-4]); + pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]); break; case 0x11 ... 0x14: - pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]); + pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]); break; default: @@ -379,12 +383,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec) return ret; } -static void amd_decode_ic_mce(struct mce *m) +static void decode_mc1_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Instruction Cache Error: "); + pr_emerg(HW_ERR "MC1 Error: "); if (TLB_ERROR(ec)) pr_cont("%s TLB %s.\n", LL_MSG(ec), @@ -393,18 +397,15 @@ static void amd_decode_ic_mce(struct mce *m) bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); - } else if (fam_ops->ic_mce(ec, xec)) + } else if (fam_ops->mc1_mce(ec, xec)) ; else - pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); + pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n"); } -static void amd_decode_bu_mce(struct mce *m) +static bool k8_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "Bus Unit Error"); + bool ret = true; if (xec == 0x1) pr_cont(" in the write data buffers.\n"); @@ -429,24 +430,18 @@ static void amd_decode_bu_mce(struct mce *m) pr_cont(": %s parity/ECC error during data " "access from L2.\n", R4_MSG(ec)); else - goto wrong_bu_mce; + ret = false; } else - goto wrong_bu_mce; + ret = false; } else - goto wrong_bu_mce; - - return; + ret = false; -wrong_bu_mce: - pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); + return ret; } -static void amd_decode_cu_mce(struct mce *m) +static bool f15h_mc2_mce(u16 ec, u8 xec) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, xec_mask); - - pr_emerg(HW_ERR "Combined Unit Error: "); + bool ret = true; if (TLB_ERROR(ec)) { if (xec == 0x0) @@ -454,63 +449,112 @@ static void amd_decode_cu_mce(struct mce *m) else if (xec == 0x1) pr_cont("Poison data provided for TLB fill.\n"); else - goto wrong_cu_mce; + ret = false; } else if (BUS_ERROR(ec)) { if (xec > 2) - goto wrong_cu_mce; + ret = false; pr_cont("Error during attempted NB data read.\n"); } else if (MEM_ERROR(ec)) { switch (xec) { case 0x4 ... 0xc: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]); break; case 0x10 ... 0x14: - pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]); + pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]); break; default: - goto wrong_cu_mce; + ret = false; } } - return; + return ret; +} + +static bool f16h_mc2_mce(u16 ec, u8 xec) +{ + u8 r4 = R4(ec); + + if (!MEM_ERROR(ec)) + return false; + + switch (xec) { + case 0x04 ... 0x05: + pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O'); + break; -wrong_cu_mce: - pr_emerg(HW_ERR "Corrupted CU MCE info?\n"); + case 0x09 ... 0x0b: + case 0x0d ... 0x0f: + pr_cont("ECC error in L2 tag (%s).\n", + ((r4 == R4_GEN) ? "BankReq" : + ((r4 == R4_SNOOP) ? "Prb" : "Fill"))); + break; + + case 0x10 ... 0x19: + case 0x1b: + pr_cont("ECC error in L2 data array (%s).\n", + (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : + ((r4 == R4_EVICT) ? "Vict" : "Fill")))); + break; + + case 0x1c ... 0x1d: + case 0x1f: + pr_cont("Parity error in L2 attribute bits (%s).\n", + ((r4 == R4_RD) ? "Hit" : + ((r4 == R4_GEN) ? "Attr" : "Fill"))); + break; + + default: + return false; + } + + return true; } -static void amd_decode_ls_mce(struct mce *m) +static void decode_mc2_mce(struct mce *m) +{ + u16 ec = EC(m->status); + u8 xec = XEC(m->status, xec_mask); + + pr_emerg(HW_ERR "MC2 Error: "); + + if (!fam_ops->mc2_mce(ec, xec)) + pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); +} + +static void decode_mc3_mce(struct mce *m) { u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); if (boot_cpu_data.x86 >= 0x14) { - pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," + pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family," " please report on LKML.\n"); return; } - pr_emerg(HW_ERR "Load Store Error"); + pr_emerg(HW_ERR "MC3 Error"); if (xec == 0x0) { u8 r4 = R4(ec); if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) - goto wrong_ls_mce; + goto wrong_mc3_mce; pr_cont(" during %s.\n", R4_MSG(ec)); } else - goto wrong_ls_mce; + goto wrong_mc3_mce; return; -wrong_ls_mce: - pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); + wrong_mc3_mce: + pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n"); } -void amd_decode_nb_mce(struct mce *m) +static void decode_mc4_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; int node_id = amd_get_nb_id(m->extcpu); @@ -518,7 +562,7 @@ void amd_decode_nb_mce(struct mce *m) u8 xec = XEC(m->status, 0x1f); u8 offset = 0; - pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id); + pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id); switch (xec) { case 0x0 ... 0xe: @@ -527,9 +571,9 @@ void amd_decode_nb_mce(struct mce *m) if (xec == 0x0 || xec == 0x8) { /* no ECCs on F11h */ if (c->x86 == 0x11) - goto wrong_nb_mce; + goto wrong_mc4_mce; - pr_cont("%s.\n", nb_mce_desc[xec]); + pr_cont("%s.\n", mc4_mce_desc[xec]); if (nb_bus_decoder) nb_bus_decoder(node_id, m); @@ -543,14 +587,14 @@ void amd_decode_nb_mce(struct mce *m) else if (BUS_ERROR(ec)) pr_cont("DMA Exclusion Vector Table Walk error.\n"); else - goto wrong_nb_mce; + goto wrong_mc4_mce; return; case 0x19: - if (boot_cpu_data.x86 == 0x15) + if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16) pr_cont("Compute Unit Data Error.\n"); else - goto wrong_nb_mce; + goto wrong_mc4_mce; return; case 0x1c ... 0x1f: @@ -558,46 +602,44 @@ void amd_decode_nb_mce(struct mce *m) break; default: - goto wrong_nb_mce; + goto wrong_mc4_mce; } - pr_cont("%s.\n", nb_mce_desc[xec - offset]); + pr_cont("%s.\n", mc4_mce_desc[xec - offset]); return; -wrong_nb_mce: - pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); + wrong_mc4_mce: + pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n"); } -EXPORT_SYMBOL_GPL(amd_decode_nb_mce); -static void amd_decode_fr_mce(struct mce *m) +static void decode_mc5_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; u8 xec = XEC(m->status, xec_mask); if (c->x86 == 0xf || c->x86 == 0x11) - goto wrong_fr_mce; + goto wrong_mc5_mce; - pr_emerg(HW_ERR "%s Error: ", - (c->x86 == 0x15 ? "Execution Unit" : "FIROB")); + pr_emerg(HW_ERR "MC5 Error: "); if (xec == 0x0 || xec == 0xc) - pr_cont("%s.\n", fr_ex_mce_desc[xec]); + pr_cont("%s.\n", mc5_mce_desc[xec]); else if (xec < 0xd) - pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]); + pr_cont("%s parity error.\n", mc5_mce_desc[xec]); else - goto wrong_fr_mce; + goto wrong_mc5_mce; return; -wrong_fr_mce: - pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); + wrong_mc5_mce: + pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n"); } -static void amd_decode_fp_mce(struct mce *m) +static void decode_mc6_mce(struct mce *m) { u8 xec = XEC(m->status, xec_mask); - pr_emerg(HW_ERR "Floating Point Unit Error: "); + pr_emerg(HW_ERR "MC6 Error: "); switch (xec) { case 0x1: @@ -621,7 +663,7 @@ static void amd_decode_fp_mce(struct mce *m) break; default: - goto wrong_fp_mce; + goto wrong_mc6_mce; break; } @@ -629,12 +671,16 @@ static void amd_decode_fp_mce(struct mce *m) return; -wrong_fp_mce: - pr_emerg(HW_ERR "Corrupted FP MCE info?\n"); + wrong_mc6_mce: + pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } static inline void amd_decode_err_code(u16 ec) { + if (INT_ERROR(ec)) { + pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec)); + return; + } pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec)); @@ -669,74 +715,91 @@ static bool amd_filter_mce(struct mce *m) return false; } +static const char *decode_error_status(struct mce *m) +{ + if (m->status & MCI_STATUS_UC) { + if (m->status & MCI_STATUS_PCC) + return "System Fatal error."; + if (m->mcgstatus & MCG_STATUS_RIPV) + return "Uncorrected, software restartable error."; + return "Uncorrected, software containable error."; + } + + if (m->status & MCI_STATUS_DEFERRED) + return "Deferred error."; + + return "Corrected error, no action required."; +} + int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) { struct mce *m = (struct mce *)data; - struct cpuinfo_x86 *c = &boot_cpu_data; + struct cpuinfo_x86 *c = &cpu_data(m->extcpu); int ecc; if (amd_filter_mce(m)) return NOTIFY_STOP; - pr_emerg(HW_ERR "CPU:%d\tMC%d_STATUS[%s|%s|%s|%s|%s", - m->extcpu, m->bank, - ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), - ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), - ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), - ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), - ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); - - if (c->x86 == 0x15) - pr_cont("|%s|%s", - ((m->status & BIT_64(44)) ? "Deferred" : "-"), - ((m->status & BIT_64(43)) ? "Poison" : "-")); - - /* do the two bits[14:13] together */ - ecc = (m->status >> 45) & 0x3; - if (ecc) - pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); - - pr_cont("]: 0x%016llx\n", m->status); - - if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "\tMC%d_ADDR: 0x%016llx\n", m->bank, m->addr); - switch (m->bank) { case 0: - amd_decode_dc_mce(m); + decode_mc0_mce(m); break; case 1: - amd_decode_ic_mce(m); + decode_mc1_mce(m); break; case 2: - if (c->x86 == 0x15) - amd_decode_cu_mce(m); - else - amd_decode_bu_mce(m); + decode_mc2_mce(m); break; case 3: - amd_decode_ls_mce(m); + decode_mc3_mce(m); break; case 4: - amd_decode_nb_mce(m); + decode_mc4_mce(m); break; case 5: - amd_decode_fr_mce(m); + decode_mc5_mce(m); break; case 6: - amd_decode_fp_mce(m); + decode_mc6_mce(m); break; default: break; } + pr_emerg(HW_ERR "Error Status: %s\n", decode_error_status(m)); + + pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", + m->extcpu, + c->x86, c->x86_model, c->x86_mask, + m->bank, + ((m->status & MCI_STATUS_OVER) ? "Over" : "-"), + ((m->status & MCI_STATUS_UC) ? "UE" : "CE"), + ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), + ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), + ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); + + if (c->x86 == 0x15 || c->x86 == 0x16) + pr_cont("|%s|%s", + ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"), + ((m->status & MCI_STATUS_POISON) ? "Poison" : "-")); + + /* do the two bits[14:13] together */ + ecc = (m->status >> 45) & 0x3; + if (ecc) + pr_cont("|%sECC", ((ecc == 2) ? "C" : "U")); + + pr_cont("]: 0x%016llx\n", m->status); + + if (m->status & MCI_STATUS_ADDRV) + pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); + amd_decode_err_code(m->status & 0xffff); return NOTIFY_STOP; @@ -754,7 +817,7 @@ static int __init mce_amd_init(void) if (c->x86_vendor != X86_VENDOR_AMD) return 0; - if (c->x86 < 0xf || c->x86 > 0x15) + if (c->x86 < 0xf || c->x86 > 0x16) return 0; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); @@ -763,35 +826,48 @@ static int __init mce_amd_init(void) switch (c->x86) { case 0xf: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x10: - fam_ops->dc_mce = f10h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = f10h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x11: - fam_ops->dc_mce = k8_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = k8_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x12: - fam_ops->dc_mce = f12h_dc_mce; - fam_ops->ic_mce = k8_ic_mce; + fam_ops->mc0_mce = f12h_mc0_mce; + fam_ops->mc1_mce = k8_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x14: nb_err_cpumask = 0x3; - fam_ops->dc_mce = f14h_dc_mce; - fam_ops->ic_mce = f14h_ic_mce; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = k8_mc2_mce; break; case 0x15: xec_mask = 0x1f; - fam_ops->dc_mce = f15h_dc_mce; - fam_ops->ic_mce = f15h_ic_mce; + fam_ops->mc0_mce = f15h_mc0_mce; + fam_ops->mc1_mce = f15h_mc1_mce; + fam_ops->mc2_mce = f15h_mc2_mce; + break; + + case 0x16: + xec_mask = 0x1f; + fam_ops->mc0_mce = cat_mc0_mce; + fam_ops->mc1_mce = cat_mc1_mce; + fam_ops->mc2_mce = f16h_mc2_mce; break; default: diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 8c87a5e87057..51b7e3a36e37 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -14,6 +14,7 @@ #define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) #define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) #define BUS_ERROR(x) (((x) & 0xF800) == 0x0800) +#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400) #define TT(x) (((x) >> 2) & 0x3) #define TT_MSG(x) tt_msgs[TT(x)] @@ -25,14 +26,16 @@ #define TO_MSG(x) to_msgs[TO(x)] #define PP(x) (((x) >> 9) & 0x3) #define PP_MSG(x) pp_msgs[PP(x)] +#define UU(x) (((x) >> 8) & 0x3) +#define UU_MSG(x) uu_msgs[UU(x)] #define R4(x) (((x) >> 4) & 0xf) #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") -/* - * F3x4C bits (MCi_STATUS' high half) - */ -#define NBSH_ERR_CPU_VAL BIT(24) +#define MCI_STATUS_DEFERRED BIT_64(44) +#define MCI_STATUS_POISON BIT_64(43) + +extern const char * const pp_msgs[]; enum tt_ids { TT_INSTR = 0, @@ -67,25 +70,18 @@ enum rrrr_ids { R4_SNOOP, }; -extern const char * const tt_msgs[]; -extern const char * const ll_msgs[]; -extern const char * const rrrr_msgs[]; -extern const char * const pp_msgs[]; -extern const char * const to_msgs[]; -extern const char * const ii_msgs[]; - /* * per-family decoder ops */ struct amd_decoder_ops { - bool (*dc_mce)(u16, u8); - bool (*ic_mce)(u16, u8); + bool (*mc0_mce)(u16, u8); + bool (*mc1_mce)(u16, u8); + bool (*mc2_mce)(u16, u8); }; void amd_report_gart_errors(bool); void amd_register_ecc_decoder(void (*f)(int, struct mce *)); void amd_unregister_ecc_decoder(void (*f)(int, struct mce *)); -void amd_decode_nb_mce(struct mce *); int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); #endif /* _EDAC_MCE_AMD_H */ diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 66b5151c1080..2ae78f20cc28 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -6,7 +6,7 @@ * This file may be distributed under the terms of the GNU General Public * License version 2. * - * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com> + * Copyright (c) 2010: Borislav Petkov <bp@alien8.de> * Advanced Micro Devices Inc. */ @@ -168,6 +168,6 @@ module_init(edac_init_mce_inject); module_exit(edac_exit_mce_inject); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>"); +MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>"); MODULE_AUTHOR("AMD Inc."); MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding"); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 4fe66fa183ec..3eb32f62d72a 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -212,7 +212,7 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) return IRQ_HANDLED; } -int __devinit mpc85xx_pci_err_probe(struct platform_device *op) +int mpc85xx_pci_err_probe(struct platform_device *op) { struct edac_pci_ctl_info *pci; struct mpc85xx_pci_pdata *pdata; @@ -301,7 +301,7 @@ int __devinit mpc85xx_pci_err_probe(struct platform_device *op) "[EDAC] PCI err", pci); if (res < 0) { printk(KERN_ERR - "%s: Unable to requiest irq %d for " + "%s: Unable to request irq %d for " "MPC85xx PCI err\n", __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; @@ -504,7 +504,7 @@ static irqreturn_t mpc85xx_l2_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mpc85xx_l2_err_probe(struct platform_device *op) +static int mpc85xx_l2_err_probe(struct platform_device *op) { struct edac_device_ctl_info *edac_dev; struct mpc85xx_l2_pdata *pdata; @@ -583,7 +583,7 @@ static int __devinit mpc85xx_l2_err_probe(struct platform_device *op) "[EDAC] L2 err", edac_dev); if (res < 0) { printk(KERN_ERR - "%s: Unable to requiest irq %d for " + "%s: Unable to request irq %d for " "MPC85xx L2 err\n", __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; @@ -885,7 +885,7 @@ static irqreturn_t mpc85xx_mc_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) +static void mpc85xx_init_csrows(struct mem_ctl_info *mci) { struct mpc85xx_mc_pdata *pdata = mci->pvt_info; struct csrow_info *csrow; @@ -964,7 +964,7 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) } } -static int __devinit mpc85xx_mc_err_probe(struct platform_device *op) +static int mpc85xx_mc_err_probe(struct platform_device *op) { struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 2b315c2edc3c..542fad70e360 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -100,7 +100,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev) return 0; } -static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) +static int mv64x60_pci_err_probe(struct platform_device *pdev) { struct edac_pci_ctl_info *pci; struct mv64x60_pci_pdata *pdata; @@ -221,7 +221,7 @@ static int mv64x60_pci_err_remove(struct platform_device *pdev) static struct platform_driver mv64x60_pci_err_driver = { .probe = mv64x60_pci_err_probe, - .remove = __devexit_p(mv64x60_pci_err_remove), + .remove = mv64x60_pci_err_remove, .driver = { .name = "mv64x60_pci_err", } @@ -271,7 +271,7 @@ static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) +static int mv64x60_sram_err_probe(struct platform_device *pdev) { struct edac_device_ctl_info *edac_dev; struct mv64x60_sram_pdata *pdata; @@ -439,7 +439,7 @@ static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) +static int mv64x60_cpu_err_probe(struct platform_device *pdev) { struct edac_device_ctl_info *edac_dev; struct resource *r; @@ -697,7 +697,7 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci, dimm->edac_mode = EDAC_SECDED; } -static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) +static int mv64x60_mc_err_probe(struct platform_device *pdev) { struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c new file mode 100644 index 000000000000..7e98084d3645 --- /dev/null +++ b/drivers/edac/octeon_edac-l2c.c @@ -0,0 +1,208 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 Cavium, Inc. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/edac.h> + +#include <asm/octeon/cvmx.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_MOD_STR "octeon-l2c" + +static void octeon_l2c_poll_oct1(struct edac_device_ctl_info *l2c) +{ + union cvmx_l2t_err l2t_err, l2t_err_reset; + union cvmx_l2d_err l2d_err, l2d_err_reset; + + l2t_err_reset.u64 = 0; + l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); + if (l2t_err.s.sec_err) { + edac_device_handle_ce(l2c, 0, 0, + "Tag Single bit error (corrected)"); + l2t_err_reset.s.sec_err = 1; + } + if (l2t_err.s.ded_err) { + edac_device_handle_ue(l2c, 0, 0, + "Tag Double bit error (detected)"); + l2t_err_reset.s.ded_err = 1; + } + if (l2t_err_reset.u64) + cvmx_write_csr(CVMX_L2T_ERR, l2t_err_reset.u64); + + l2d_err_reset.u64 = 0; + l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR); + if (l2d_err.s.sec_err) { + edac_device_handle_ce(l2c, 0, 1, + "Data Single bit error (corrected)"); + l2d_err_reset.s.sec_err = 1; + } + if (l2d_err.s.ded_err) { + edac_device_handle_ue(l2c, 0, 1, + "Data Double bit error (detected)"); + l2d_err_reset.s.ded_err = 1; + } + if (l2d_err_reset.u64) + cvmx_write_csr(CVMX_L2D_ERR, l2d_err_reset.u64); + +} + +static void _octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c, int tad) +{ + union cvmx_l2c_err_tdtx err_tdtx, err_tdtx_reset; + union cvmx_l2c_err_ttgx err_ttgx, err_ttgx_reset; + char buf1[64]; + char buf2[80]; + + err_tdtx_reset.u64 = 0; + err_tdtx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TDTX(tad)); + if (err_tdtx.s.dbe || err_tdtx.s.sbe || + err_tdtx.s.vdbe || err_tdtx.s.vsbe) + snprintf(buf1, sizeof(buf1), + "type:%d, syn:0x%x, way:%d", + err_tdtx.s.type, err_tdtx.s.syn, err_tdtx.s.wayidx); + + if (err_tdtx.s.dbe) { + snprintf(buf2, sizeof(buf2), + "L2D Double bit error (detected):%s", buf1); + err_tdtx_reset.s.dbe = 1; + edac_device_handle_ue(l2c, tad, 1, buf2); + } + if (err_tdtx.s.sbe) { + snprintf(buf2, sizeof(buf2), + "L2D Single bit error (corrected):%s", buf1); + err_tdtx_reset.s.sbe = 1; + edac_device_handle_ce(l2c, tad, 1, buf2); + } + if (err_tdtx.s.vdbe) { + snprintf(buf2, sizeof(buf2), + "VBF Double bit error (detected):%s", buf1); + err_tdtx_reset.s.vdbe = 1; + edac_device_handle_ue(l2c, tad, 1, buf2); + } + if (err_tdtx.s.vsbe) { + snprintf(buf2, sizeof(buf2), + "VBF Single bit error (corrected):%s", buf1); + err_tdtx_reset.s.vsbe = 1; + edac_device_handle_ce(l2c, tad, 1, buf2); + } + if (err_tdtx_reset.u64) + cvmx_write_csr(CVMX_L2C_ERR_TDTX(tad), err_tdtx_reset.u64); + + err_ttgx_reset.u64 = 0; + err_ttgx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TTGX(tad)); + + if (err_ttgx.s.dbe || err_ttgx.s.sbe) + snprintf(buf1, sizeof(buf1), + "type:%d, syn:0x%x, way:%d", + err_ttgx.s.type, err_ttgx.s.syn, err_ttgx.s.wayidx); + + if (err_ttgx.s.dbe) { + snprintf(buf2, sizeof(buf2), + "Tag Double bit error (detected):%s", buf1); + err_ttgx_reset.s.dbe = 1; + edac_device_handle_ue(l2c, tad, 0, buf2); + } + if (err_ttgx.s.sbe) { + snprintf(buf2, sizeof(buf2), + "Tag Single bit error (corrected):%s", buf1); + err_ttgx_reset.s.sbe = 1; + edac_device_handle_ce(l2c, tad, 0, buf2); + } + if (err_ttgx_reset.u64) + cvmx_write_csr(CVMX_L2C_ERR_TTGX(tad), err_ttgx_reset.u64); +} + +static void octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c) +{ + int i; + for (i = 0; i < l2c->nr_instances; i++) + _octeon_l2c_poll_oct2(l2c, i); +} + +static int octeon_l2c_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *l2c; + + int num_tads = OCTEON_IS_MODEL(OCTEON_CN68XX) ? 4 : 1; + + /* 'Tags' are block 0, 'Data' is block 1*/ + l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0, + NULL, 0, edac_device_alloc_index()); + if (!l2c) + return -ENOMEM; + + l2c->dev = &pdev->dev; + platform_set_drvdata(pdev, l2c); + l2c->dev_name = dev_name(&pdev->dev); + + l2c->mod_name = "octeon-l2c"; + l2c->ctl_name = "octeon_l2c_err"; + + + if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + union cvmx_l2t_err l2t_err; + union cvmx_l2d_err l2d_err; + + l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); + l2t_err.s.sec_intena = 0; /* We poll */ + l2t_err.s.ded_intena = 0; + cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + + l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR); + l2d_err.s.sec_intena = 0; /* We poll */ + l2d_err.s.ded_intena = 0; + cvmx_write_csr(CVMX_L2T_ERR, l2d_err.u64); + + l2c->edac_check = octeon_l2c_poll_oct1; + } else { + /* OCTEON II */ + l2c->edac_check = octeon_l2c_poll_oct2; + } + + if (edac_device_add_device(l2c) > 0) { + pr_err("%s: edac_device_add_device() failed\n", __func__); + goto err; + } + + + return 0; + +err: + edac_device_free_ctl_info(l2c); + + return -ENXIO; +} + +static int octeon_l2c_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(l2c); + + return 0; +} + +static struct platform_driver octeon_l2c_driver = { + .probe = octeon_l2c_probe, + .remove = octeon_l2c_remove, + .driver = { + .name = "octeon_l2c_edac", + } +}; +module_platform_driver(octeon_l2c_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c new file mode 100644 index 000000000000..93412d6b3af1 --- /dev/null +++ b/drivers/edac/octeon_edac-lmc.c @@ -0,0 +1,186 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/edac.h> + +#include <asm/octeon/octeon.h> +#include <asm/octeon/cvmx-lmcx-defs.h> + +#include "edac_core.h" +#include "edac_module.h" + +#define OCTEON_MAX_MC 4 + +static void octeon_lmc_edac_poll(struct mem_ctl_info *mci) +{ + union cvmx_lmcx_mem_cfg0 cfg0; + bool do_clear = false; + char msg[64]; + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx)); + if (cfg0.s.sec_err || cfg0.s.ded_err) { + union cvmx_lmcx_fadr fadr; + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + snprintf(msg, sizeof(msg), + "DIMM %d rank %d bank %d row %d col %d", + fadr.cn30xx.fdimm, fadr.cn30xx.fbunk, + fadr.cn30xx.fbank, fadr.cn30xx.frow, fadr.cn30xx.fcol); + } + + if (cfg0.s.sec_err) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + cfg0.s.sec_err = -1; /* Done, re-arm */ + do_clear = true; + } + + if (cfg0.s.ded_err) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + cfg0.s.ded_err = -1; /* Done, re-arm */ + do_clear = true; + } + if (do_clear) + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx), cfg0.u64); +} + +static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) +{ + union cvmx_lmcx_int int_reg; + bool do_clear = false; + char msg[64]; + + int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); + if (int_reg.s.sec_err || int_reg.s.ded_err) { + union cvmx_lmcx_fadr fadr; + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + snprintf(msg, sizeof(msg), + "DIMM %d rank %d bank %d row %d col %d", + fadr.cn61xx.fdimm, fadr.cn61xx.fbunk, + fadr.cn61xx.fbank, fadr.cn61xx.frow, fadr.cn61xx.fcol); + } + + if (int_reg.s.sec_err) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + int_reg.s.sec_err = -1; /* Done, re-arm */ + do_clear = true; + } + + if (int_reg.s.ded_err) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + int_reg.s.ded_err = -1; /* Done, re-arm */ + do_clear = true; + } + if (do_clear) + cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64); +} + +static int octeon_lmc_edac_probe(struct platform_device *pdev) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + int mc = pdev->id; + + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 1; + layers[0].is_virt_csrow = false; + + if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + union cvmx_lmcx_mem_cfg0 cfg0; + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); + if (!cfg0.s.ecc_ena) { + dev_info(&pdev->dev, "Disabled (ECC not enabled)\n"); + return 0; + } + + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + if (!mci) + return -ENXIO; + + mci->pdev = &pdev->dev; + mci->dev_name = dev_name(&pdev->dev); + + mci->mod_name = "octeon-lmc"; + mci->ctl_name = "octeon-lmc-err"; + mci->edac_check = octeon_lmc_edac_poll; + + if (edac_mc_add_mc(mci)) { + dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); + edac_mc_free(mci); + return -ENXIO; + } + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); + cfg0.s.intr_ded_ena = 0; /* We poll */ + cfg0.s.intr_sec_ena = 0; + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), cfg0.u64); + } else { + /* OCTEON II */ + union cvmx_lmcx_int_en en; + union cvmx_lmcx_config config; + + config.u64 = cvmx_read_csr(CVMX_LMCX_CONFIG(0)); + if (!config.s.ecc_ena) { + dev_info(&pdev->dev, "Disabled (ECC not enabled)\n"); + return 0; + } + + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + if (!mci) + return -ENXIO; + + mci->pdev = &pdev->dev; + mci->dev_name = dev_name(&pdev->dev); + + mci->mod_name = "octeon-lmc"; + mci->ctl_name = "co_lmc_err"; + mci->edac_check = octeon_lmc_edac_poll_o2; + + if (edac_mc_add_mc(mci)) { + dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); + edac_mc_free(mci); + return -ENXIO; + } + + en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); + en.s.intr_ded_ena = 0; /* We poll */ + en.s.intr_sec_ena = 0; + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), en.u64); + } + platform_set_drvdata(pdev, mci); + + return 0; +} + +static int octeon_lmc_edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + return 0; +} + +static struct platform_driver octeon_lmc_edac_driver = { + .probe = octeon_lmc_edac_probe, + .remove = octeon_lmc_edac_remove, + .driver = { + .name = "octeon_lmc_edac", + } +}; +module_platform_driver(octeon_lmc_edac_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c new file mode 100644 index 000000000000..0f83c33a7d1f --- /dev/null +++ b/drivers/edac/octeon_edac-pc.c @@ -0,0 +1,143 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 Cavium, Inc. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/edac.h> + +#include "edac_core.h" +#include "edac_module.h" + +#include <asm/octeon/cvmx.h> +#include <asm/mipsregs.h> + +extern int register_co_cache_error_notifier(struct notifier_block *nb); +extern int unregister_co_cache_error_notifier(struct notifier_block *nb); + +extern unsigned long long cache_err_dcache[NR_CPUS]; + +struct co_cache_error { + struct notifier_block notifier; + struct edac_device_ctl_info *ed; +}; + +/** + * EDAC CPU cache error callback + * + * @event: non-zero if unrecoverable. + */ +static int co_cache_error_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct co_cache_error *p = container_of(this, struct co_cache_error, + notifier); + + unsigned int core = cvmx_get_core_num(); + unsigned int cpu = smp_processor_id(); + u64 icache_err = read_octeon_c0_icacheerr(); + u64 dcache_err; + + if (event) { + dcache_err = cache_err_dcache[core]; + cache_err_dcache[core] = 0; + } else { + dcache_err = read_octeon_c0_dcacheerr(); + } + + if (icache_err & 1) { + edac_device_printk(p->ed, KERN_ERR, + "CacheErr (Icache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n", + (unsigned long long)icache_err, core, cpu, + read_c0_errorepc()); + write_octeon_c0_icacheerr(0); + edac_device_handle_ce(p->ed, cpu, 1, "icache"); + } + if (dcache_err & 1) { + edac_device_printk(p->ed, KERN_ERR, + "CacheErr (Dcache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n", + (unsigned long long)dcache_err, core, cpu, + read_c0_errorepc()); + if (event) + edac_device_handle_ue(p->ed, cpu, 0, "dcache"); + else + edac_device_handle_ce(p->ed, cpu, 0, "dcache"); + + /* Clear the error indication */ + if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + write_octeon_c0_dcacheerr(1); + else + write_octeon_c0_dcacheerr(0); + } + + return NOTIFY_STOP; +} + +static int co_cache_error_probe(struct platform_device *pdev) +{ + struct co_cache_error *p = devm_kzalloc(&pdev->dev, sizeof(*p), + GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->notifier.notifier_call = co_cache_error_event; + platform_set_drvdata(pdev, p); + + p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(), + "cache", 2, 0, NULL, 0, + edac_device_alloc_index()); + if (!p->ed) + goto err; + + p->ed->dev = &pdev->dev; + + p->ed->dev_name = dev_name(&pdev->dev); + + p->ed->mod_name = "octeon-cpu"; + p->ed->ctl_name = "cache"; + + if (edac_device_add_device(p->ed)) { + pr_err("%s: edac_device_add_device() failed\n", __func__); + goto err1; + } + + register_co_cache_error_notifier(&p->notifier); + + return 0; + +err1: + edac_device_free_ctl_info(p->ed); +err: + return -ENXIO; +} + +static int co_cache_error_remove(struct platform_device *pdev) +{ + struct co_cache_error *p = platform_get_drvdata(pdev); + + unregister_co_cache_error_notifier(&p->notifier); + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(p->ed); + return 0; +} + +static struct platform_driver co_cache_error_driver = { + .probe = co_cache_error_probe, + .remove = co_cache_error_remove, + .driver = { + .name = "octeon_pc_edac", + } +}; +module_platform_driver(co_cache_error_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c new file mode 100644 index 000000000000..9ca73cec74e7 --- /dev/null +++ b/drivers/edac/octeon_edac-pci.c @@ -0,0 +1,111 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2012 Cavium, Inc. + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/edac.h> + +#include <asm/octeon/cvmx.h> +#include <asm/octeon/cvmx-npi-defs.h> +#include <asm/octeon/cvmx-pci-defs.h> +#include <asm/octeon/octeon.h> + +#include "edac_core.h" +#include "edac_module.h" + +static void octeon_pci_poll(struct edac_pci_ctl_info *pci) +{ + union cvmx_pci_cfg01 cfg01; + + cfg01.u32 = octeon_npi_read32(CVMX_NPI_PCI_CFG01); + if (cfg01.s.dpe) { /* Detected parity error */ + edac_pci_handle_pe(pci, pci->ctl_name); + cfg01.s.dpe = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.sse) { + edac_pci_handle_npe(pci, "Signaled System Error"); + cfg01.s.sse = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.rma) { + edac_pci_handle_npe(pci, "Received Master Abort"); + cfg01.s.rma = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.rta) { + edac_pci_handle_npe(pci, "Received Target Abort"); + cfg01.s.rta = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.sta) { + edac_pci_handle_npe(pci, "Signaled Target Abort"); + cfg01.s.sta = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.mdpe) { + edac_pci_handle_npe(pci, "Master Data Parity Error"); + cfg01.s.mdpe = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } +} + +static int octeon_pci_probe(struct platform_device *pdev) +{ + struct edac_pci_ctl_info *pci; + int res = 0; + + pci = edac_pci_alloc_ctl_info(0, "octeon_pci_err"); + if (!pci) + return -ENOMEM; + + pci->dev = &pdev->dev; + platform_set_drvdata(pdev, pci); + pci->dev_name = dev_name(&pdev->dev); + + pci->mod_name = "octeon-pci"; + pci->ctl_name = "octeon_pci_err"; + pci->edac_check = octeon_pci_poll; + + if (edac_pci_add_device(pci, 0) > 0) { + pr_err("%s: edac_pci_add_device() failed\n", __func__); + goto err; + } + + return 0; + +err: + edac_pci_free_ctl_info(pci); + + return res; +} + +static int octeon_pci_remove(struct platform_device *pdev) +{ + struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); + + edac_pci_del_device(&pdev->dev); + edac_pci_free_ctl_info(pci); + + return 0; +} + +static struct platform_driver octeon_pci_driver = { + .probe = octeon_pci_probe, + .remove = octeon_pci_remove, + .driver = { + .name = "octeon_pci_edac", + } +}; +module_platform_driver(octeon_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c index 2d35b78ada3c..9c971b575530 100644 --- a/drivers/edac/pasemi_edac.c +++ b/drivers/edac/pasemi_edac.c @@ -188,8 +188,8 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci, return 0; } -static int __devinit pasemi_edac_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int pasemi_edac_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) { struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; @@ -266,7 +266,7 @@ fail: return -ENODEV; } -static void __devexit pasemi_edac_remove(struct pci_dev *pdev) +static void pasemi_edac_remove(struct pci_dev *pdev) { struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); @@ -287,7 +287,7 @@ MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl); static struct pci_driver pasemi_edac_driver = { .name = MODULE_NAME, .probe = pasemi_edac_probe, - .remove = __devexit_p(pasemi_edac_remove), + .remove = pasemi_edac_remove, .id_table = pasemi_edac_pci_tbl, }; diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index bf0957635991..ef6b7e08f485 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -838,8 +838,7 @@ ppc4xx_edac_isr(int irq, void *dev_id) * * Returns a device type width enumeration. */ -static enum dev_type __devinit -ppc4xx_edac_get_dtype(u32 mcopt1) +static enum dev_type ppc4xx_edac_get_dtype(u32 mcopt1) { switch (mcopt1 & SDRAM_MCOPT1_WDTH_MASK) { case SDRAM_MCOPT1_WDTH_16: @@ -862,8 +861,7 @@ ppc4xx_edac_get_dtype(u32 mcopt1) * * Returns a memory type enumeration. */ -static enum mem_type __devinit -ppc4xx_edac_get_mtype(u32 mcopt1) +static enum mem_type ppc4xx_edac_get_mtype(u32 mcopt1) { bool rden = ((mcopt1 & SDRAM_MCOPT1_RDEN_MASK) == SDRAM_MCOPT1_RDEN); @@ -893,8 +891,7 @@ ppc4xx_edac_get_mtype(u32 mcopt1) * Returns 0 if OK; otherwise, -EINVAL if the memory bank size * configuration cannot be determined. */ -static int __devinit -ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) +static int ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) { const struct ppc4xx_edac_pdata *pdata = mci->pvt_info; int status = 0; @@ -1011,11 +1008,9 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1) * * Returns 0 if OK; otherwise, < 0 on error. */ -static int __devinit -ppc4xx_edac_mc_init(struct mem_ctl_info *mci, - struct platform_device *op, - const dcr_host_t *dcr_host, - u32 mcopt1) +static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci, + struct platform_device *op, + const dcr_host_t *dcr_host, u32 mcopt1) { int status = 0; const u32 memcheck = (mcopt1 & SDRAM_MCOPT1_MCHK_MASK); @@ -1105,8 +1100,8 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci, * Returns 0 if OK; otherwise, -ENODEV if the interrupts could not be * mapped and assigned. */ -static int __devinit -ppc4xx_edac_register_irq(struct platform_device *op, struct mem_ctl_info *mci) +static int ppc4xx_edac_register_irq(struct platform_device *op, + struct mem_ctl_info *mci) { int status = 0; int ded_irq, sec_irq; @@ -1183,8 +1178,8 @@ ppc4xx_edac_register_irq(struct platform_device *op, struct mem_ctl_info *mci) * Returns 0 if the DCRs were successfully mapped; otherwise, < 0 on * error. */ -static int __devinit -ppc4xx_edac_map_dcrs(const struct device_node *np, dcr_host_t *dcr_host) +static int ppc4xx_edac_map_dcrs(const struct device_node *np, + dcr_host_t *dcr_host) { unsigned int dcr_base, dcr_len; @@ -1232,7 +1227,7 @@ ppc4xx_edac_map_dcrs(const struct device_node *np, dcr_host_t *dcr_host) * Returns 0 if the controller instance was successfully bound to the * driver; otherwise, < 0 on error. */ -static int __devinit ppc4xx_edac_probe(struct platform_device *op) +static int ppc4xx_edac_probe(struct platform_device *op) { int status = 0; u32 mcopt1, memcheck; diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index f854debd5533..2fd6a5490905 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -359,8 +359,8 @@ fail: } /* returns count (>= 0), or negative on error */ -static int __devinit r82600_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int r82600_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) { edac_dbg(0, "\n"); @@ -368,7 +368,7 @@ static int __devinit r82600_init_one(struct pci_dev *pdev, return r82600_probe1(pdev, ent->driver_data); } -static void __devexit r82600_remove_one(struct pci_dev *pdev) +static void r82600_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -397,7 +397,7 @@ MODULE_DEVICE_TABLE(pci, r82600_pci_tbl); static struct pci_driver r82600_driver = { .name = EDAC_MOD_STR, .probe = r82600_init_one, - .remove = __devexit_p(r82600_remove_one), + .remove = r82600_remove_one, .id_table = r82600_pci_tbl, }; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 5715b7c2c517..57244f995614 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (1 + pvt->tohm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list @@ -1692,8 +1692,7 @@ fail0: * < 0 for error code */ -static int __devinit sbridge_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; u8 mc, num_mc = 0; @@ -1744,7 +1743,7 @@ fail0: * sbridge_remove destructor for one instance of device * */ -static void __devexit sbridge_remove(struct pci_dev *pdev) +static void sbridge_remove(struct pci_dev *pdev) { struct sbridge_dev *sbridge_dev; @@ -1785,7 +1784,7 @@ MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); static struct pci_driver sbridge_driver = { .name = "sbridge_edac", .probe = sbridge_probe, - .remove = __devexit_p(sbridge_remove), + .remove = sbridge_remove, .id_table = sbridge_pci_tbl, }; diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c index 1e904b7b79a0..a0820536b7d9 100644 --- a/drivers/edac/tile_edac.c +++ b/drivers/edac/tile_edac.c @@ -82,7 +82,7 @@ static void tile_edac_check(struct mem_ctl_info *mci) * Initialize the 'csrows' table within the mci control structure with the * addressing of memory. */ -static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci) +static int tile_edac_init_csrows(struct mem_ctl_info *mci) { struct csrow_info *csrow = mci->csrows[0]; struct tile_edac_priv *priv = mci->pvt_info; @@ -120,7 +120,7 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci) return 0; } -static int __devinit tile_edac_mc_probe(struct platform_device *pdev) +static int tile_edac_mc_probe(struct platform_device *pdev) { char hv_file[32]; int hv_devhdl; @@ -186,7 +186,7 @@ static int __devinit tile_edac_mc_probe(struct platform_device *pdev) return 0; } -static int __devexit tile_edac_mc_remove(struct platform_device *pdev) +static int tile_edac_mc_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); @@ -202,7 +202,7 @@ static struct platform_driver tile_edac_mc_driver = { .owner = THIS_MODULE, }, .probe = tile_edac_mc_probe, - .remove = __devexit_p(tile_edac_mc_remove), + .remove = tile_edac_mc_remove, }; /* diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index 08a992693e62..c9db24d95caa 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -418,8 +418,7 @@ fail: return rc; } -static int __devinit x38_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int x38_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int rc; @@ -435,7 +434,7 @@ static int __devinit x38_init_one(struct pci_dev *pdev, return rc; } -static void __devexit x38_remove_one(struct pci_dev *pdev) +static void x38_remove_one(struct pci_dev *pdev) { struct mem_ctl_info *mci; @@ -464,7 +463,7 @@ MODULE_DEVICE_TABLE(pci, x38_pci_tbl); static struct pci_driver x38_driver = { .name = EDAC_MOD_STR, .probe = x38_init_one, - .remove = __devexit_p(x38_remove_one), + .remove = x38_remove_one, .id_table = x38_pci_tbl, }; |