From 75568f8094eb0333e9c2109b23cbc8b82d318a3c Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 8 Mar 2010 10:24:29 -0700 Subject: PCI: create function symlinks in /sys/bus/pci/slots/N/ Create convenience symlinks in sysfs, linking slots to device functions, and vice versa. These links make it easier for users to figure out which devices actually live in what slots. For example: sapphire:/sys/bus/pci/slots # ls 1 10 2 3 4 5 6 7 8 9 sapphire:/sys/bus/pci/slots # ls -l 3 total 0 -r--r--r-- 1 root root 65536 Aug 18 14:10 address lrwxrwxrwx 1 root root 0 Aug 18 14:10 function0 -> ../../../../devices/pci0000:23/0000:23:01.0 lrwxrwxrwx 1 root root 0 Aug 18 14:10 function1 -> ../../../../devices/pci0000:23/0000:23:01.1 sapphire:/sys/bus/pci/slots # ls -l 3/function0/slot lrwxrwxrwx 1 root root 0 Aug 18 14:13 3/function0/slot -> ../../../bus/pci/slots/3 The original form of this patch was written by Matthew Wilcox, and was enhanced to include links from the sysfs slots/ directory pointing back at the device functions. Cc: willy@linux.intel.com Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- Documentation/ABI/testing/sysfs-bus-pci | 40 +++++++++++++++++++++++++++ drivers/pci/pci-sysfs.c | 37 +++++++++++++++++++++++++ drivers/pci/slot.c | 48 +++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 25be3250f7d6..428676cfa61e 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -133,6 +133,46 @@ Description: The symbolic link points to the PCI device sysfs entry of the Physical Function this device associates with. + +What: /sys/bus/pci/slots/... +Date: April 2005 (possibly older) +KernelVersion: 2.6.12 (possibly older) +Contact: linux-pci@vger.kernel.org +Description: + When the appropriate driver is loaded, it will create a + directory per claimed physical PCI slot in + /sys/bus/pci/slots/. The names of these directories are + specific to the driver, which in turn, are specific to the + platform, but in general, should match the label on the + machine's physical chassis. + + The drivers that can create slot directories include the + PCI hotplug drivers, and as of 2.6.27, the pci_slot driver. + + The slot directories contain, at a minimum, a file named + 'address' which contains the PCI bus:device:function tuple. + Other files may appear as well, but are specific to the + driver. + +What: /sys/bus/pci/slots/.../function[0-7] +Date: March 2010 +KernelVersion: 2.6.35 +Contact: linux-pci@vger.kernel.org +Description: + If PCI slot directories (as described above) are created, + and the physical slot is actually populated with a device, + symbolic links in the slot directory pointing to the + device's PCI functions are created as well. + +What: /sys/bus/pci/devices/.../slot +Date: March 2010 +KernelVersion: 2.6.35 +Contact: linux-pci@vger.kernel.org +Description: + If PCI slot directories (as described above) are created, + a symbolic link pointing to the slot directory will be + created as well. + What: /sys/bus/pci/slots/.../module Date: June 2009 Contact: linux-pci@vger.kernel.org diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index fad93983bfed..941e939d1da9 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1011,6 +1011,39 @@ error: return retval; } +static void pci_remove_slot_links(struct pci_dev *dev) +{ + char func[10]; + struct pci_slot *slot; + + sysfs_remove_link(&dev->dev.kobj, "slot"); + list_for_each_entry(slot, &dev->bus->slots, list) { + if (slot->number != PCI_SLOT(dev->devfn)) + continue; + snprintf(func, 10, "function%d", PCI_FUNC(dev->devfn)); + sysfs_remove_link(&slot->kobj, func); + } +} + +static int pci_create_slot_links(struct pci_dev *dev) +{ + int result = 0; + char func[10]; + struct pci_slot *slot; + + list_for_each_entry(slot, &dev->bus->slots, list) { + if (slot->number != PCI_SLOT(dev->devfn)) + continue; + result = sysfs_create_link(&dev->dev.kobj, &slot->kobj, "slot"); + if (result) + goto out; + snprintf(func, 10, "function%d", PCI_FUNC(dev->devfn)); + result = sysfs_create_link(&slot->kobj, &dev->dev.kobj, func); + } +out: + return result; +} + int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) { int retval; @@ -1073,6 +1106,8 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) if (retval) goto err_vga_file; + pci_create_slot_links(pdev); + return 0; err_vga_file: @@ -1122,6 +1157,8 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev) if (!sysfs_initialized) return; + pci_remove_slot_links(pdev); + pci_remove_capabilities_sysfs(pdev); if (pdev->cfg_size < PCI_CFG_SPACE_EXP_SIZE) diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 659eaa0fc48f..e0189cf7c558 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -97,6 +97,50 @@ static ssize_t cur_speed_read_file(struct pci_slot *slot, char *buf) return bus_speed_read(slot->bus->cur_bus_speed, buf); } +static void remove_sysfs_files(struct pci_slot *slot) +{ + char func[10]; + struct list_head *tmp; + + list_for_each(tmp, &slot->bus->devices) { + struct pci_dev *dev = pci_dev_b(tmp); + if (PCI_SLOT(dev->devfn) != slot->number) + continue; + sysfs_remove_link(&dev->dev.kobj, "slot"); + + snprintf(func, 10, "function%d", PCI_FUNC(dev->devfn)); + sysfs_remove_link(&slot->kobj, func); + } +} + +static int create_sysfs_files(struct pci_slot *slot) +{ + int result; + char func[10]; + struct list_head *tmp; + + list_for_each(tmp, &slot->bus->devices) { + struct pci_dev *dev = pci_dev_b(tmp); + if (PCI_SLOT(dev->devfn) != slot->number) + continue; + + result = sysfs_create_link(&dev->dev.kobj, &slot->kobj, "slot"); + if (result) + goto fail; + + snprintf(func, 10, "function%d", PCI_FUNC(dev->devfn)); + result = sysfs_create_link(&slot->kobj, &dev->dev.kobj, func); + if (result) + goto fail; + } + + return 0; + +fail: + remove_sysfs_files(slot); + return result; +} + static void pci_slot_release(struct kobject *kobj) { struct pci_dev *dev; @@ -109,6 +153,8 @@ static void pci_slot_release(struct kobject *kobj) if (PCI_SLOT(dev->devfn) == slot->number) dev->slot = NULL; + remove_sysfs_files(slot); + list_del(&slot->list); kfree(slot); @@ -300,6 +346,8 @@ placeholder: INIT_LIST_HEAD(&slot->list); list_add(&slot->list, &parent->slots); + create_sysfs_files(slot); + list_for_each_entry(dev, &parent->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot_nr) dev->slot = slot; -- cgit v1.2.3 From 52b265a12768b9a72679bec825eb82c784116464 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 8 Mar 2010 16:48:49 -0500 Subject: PCI: clearing wakeup flags not needed This patch (as1353) removes a couple of unnecessary assignments from the PCI core. The should_wakeup flag is naturally initialized to 0; there's no need to clear it. Acked-by: Rafael J. Wysocki Signed-off-by: Alan Stern Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 37499127c801..60fcb6f02c91 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1631,7 +1631,6 @@ void pci_pm_init(struct pci_dev *dev) * let the user space enable it to wake up the system as needed. */ device_set_wakeup_capable(&dev->dev, true); - device_set_wakeup_enable(&dev->dev, false); /* Disable the PME# generation functionality */ pci_pme_active(dev, false); } else { @@ -1655,7 +1654,6 @@ void platform_pci_wakeup_init(struct pci_dev *dev) return; device_set_wakeup_capable(&dev->dev, true); - device_set_wakeup_enable(&dev->dev, false); platform_pci_sleep_wake(dev, false); } -- cgit v1.2.3 From 511dd98ce8cf6dc4f8f2cb32a8af31ce9f4ba4a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 Feb 2010 14:35:19 +0000 Subject: PCI: Convert pci_lock to raw_spinlock pci_lock must be a real spinlock in preempt-rt. Convert it to raw_spinlock. No change for !RT kernels. Signed-off-by: Thomas Gleixner Signed-off-by: Jesse Barnes --- drivers/pci/access.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 2f646fe1260f..affb83b42ebb 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -13,7 +13,7 @@ * configuration space. */ -static DEFINE_SPINLOCK(pci_lock); +static DEFINE_RAW_SPINLOCK(pci_lock); /* * Wrappers for all PCI configuration access functions. They just check @@ -33,10 +33,10 @@ int pci_bus_read_config_##size \ unsigned long flags; \ u32 data = 0; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irqsave(&pci_lock, flags); \ + raw_spin_lock_irqsave(&pci_lock, flags); \ res = bus->ops->read(bus, devfn, pos, len, &data); \ *value = (type)data; \ - spin_unlock_irqrestore(&pci_lock, flags); \ + raw_spin_unlock_irqrestore(&pci_lock, flags); \ return res; \ } @@ -47,9 +47,9 @@ int pci_bus_write_config_##size \ int res; \ unsigned long flags; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irqsave(&pci_lock, flags); \ + raw_spin_lock_irqsave(&pci_lock, flags); \ res = bus->ops->write(bus, devfn, pos, len, value); \ - spin_unlock_irqrestore(&pci_lock, flags); \ + raw_spin_unlock_irqrestore(&pci_lock, flags); \ return res; \ } @@ -79,10 +79,10 @@ struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops) struct pci_ops *old_ops; unsigned long flags; - spin_lock_irqsave(&pci_lock, flags); + raw_spin_lock_irqsave(&pci_lock, flags); old_ops = bus->ops; bus->ops = ops; - spin_unlock_irqrestore(&pci_lock, flags); + raw_spin_unlock_irqrestore(&pci_lock, flags); return old_ops; } EXPORT_SYMBOL(pci_bus_set_ops); @@ -136,9 +136,9 @@ static noinline void pci_wait_ucfg(struct pci_dev *dev) __add_wait_queue(&pci_ucfg_wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&pci_lock); + raw_spin_unlock_irq(&pci_lock); schedule(); - spin_lock_irq(&pci_lock); + raw_spin_lock_irq(&pci_lock); } while (dev->block_ucfg_access); __remove_wait_queue(&pci_ucfg_wait, &wait); } @@ -150,11 +150,11 @@ int pci_user_read_config_##size \ int ret = 0; \ u32 data = -1; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irq(&pci_lock); \ + raw_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->read(dev->bus, dev->devfn, \ pos, sizeof(type), &data); \ - spin_unlock_irq(&pci_lock); \ + raw_spin_unlock_irq(&pci_lock); \ *val = (type)data; \ return ret; \ } @@ -165,11 +165,11 @@ int pci_user_write_config_##size \ { \ int ret = -EIO; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irq(&pci_lock); \ + raw_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->write(dev->bus, dev->devfn, \ pos, sizeof(type), val); \ - spin_unlock_irq(&pci_lock); \ + raw_spin_unlock_irq(&pci_lock); \ return ret; \ } @@ -396,10 +396,10 @@ void pci_block_user_cfg_access(struct pci_dev *dev) unsigned long flags; int was_blocked; - spin_lock_irqsave(&pci_lock, flags); + raw_spin_lock_irqsave(&pci_lock, flags); was_blocked = dev->block_ucfg_access; dev->block_ucfg_access = 1; - spin_unlock_irqrestore(&pci_lock, flags); + raw_spin_unlock_irqrestore(&pci_lock, flags); /* If we BUG() inside the pci_lock, we're guaranteed to hose * the machine */ @@ -417,7 +417,7 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev) { unsigned long flags; - spin_lock_irqsave(&pci_lock, flags); + raw_spin_lock_irqsave(&pci_lock, flags); /* This indicates a problem in the caller, but we don't need * to kill them, unlike a double-block above. */ @@ -425,6 +425,6 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev) dev->block_ucfg_access = 0; wake_up_all(&pci_ucfg_wait); - spin_unlock_irqrestore(&pci_lock, flags); + raw_spin_unlock_irqrestore(&pci_lock, flags); } EXPORT_SYMBOL_GPL(pci_unblock_user_cfg_access); -- cgit v1.2.3 From d19f61f098ae9315b76a97962007f687683916d4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 Feb 2010 14:35:25 +0000 Subject: x86/PCI: Convert pci_config_lock to raw_spinlock pci_config_lock must be a real spinlock in preempt-rt. Convert it to raw_spinlock. No change for !RT kernels. Signed-off-by: Thomas Gleixner Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 2 +- arch/x86/pci/common.c | 2 +- arch/x86/pci/direct.c | 16 ++++++++-------- arch/x86/pci/mmconfig_32.c | 8 ++++---- arch/x86/pci/numaq_32.c | 8 ++++---- arch/x86/pci/pcbios.c | 8 ++++---- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 1a0422348d6d..8d8797eae5d7 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -83,7 +83,7 @@ struct irq_routing_table { extern unsigned int pcibios_irq_mask; -extern spinlock_t pci_config_lock; +extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index cf2e93869c48..215a27ae050d 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -76,7 +76,7 @@ struct pci_ops pci_root_ops = { * This interrupt-safe spinlock protects all accesses to PCI * configuration space. */ -DEFINE_SPINLOCK(pci_config_lock); +DEFINE_RAW_SPINLOCK(pci_config_lock); static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) { diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 347d882b3bb3..bd33620b0071 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -27,7 +27,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, return -EINVAL; } - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); @@ -43,7 +43,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -56,7 +56,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); @@ -72,7 +72,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -108,7 +108,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, if (dev & 0x10) return PCIBIOS_DEVICE_NOT_FOUND; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); outb((u8)(0xF0 | (fn << 1)), 0xCF8); outb((u8)bus, 0xCFA); @@ -127,7 +127,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, outb(0, 0xCF8); - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -147,7 +147,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, if (dev & 0x10) return PCIBIOS_DEVICE_NOT_FOUND; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); outb((u8)(0xF0 | (fn << 1)), 0xCF8); outb((u8)bus, 0xCFA); @@ -166,7 +166,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, outb(0, 0xCF8); - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 90d5fd476ed4..a3d9c54792ae 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -64,7 +64,7 @@ err: *value = -1; if (!base) goto err; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); pci_exp_set_dev_base(base, bus, devfn); @@ -79,7 +79,7 @@ err: *value = -1; *value = mmio_config_readl(mmcfg_virt_addr + reg); break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -97,7 +97,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, if (!base) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); pci_exp_set_dev_base(base, bus, devfn); @@ -112,7 +112,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, mmio_config_writel(mmcfg_virt_addr + reg, value); break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8223738ad806..5c9e2458df4e 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -37,7 +37,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); write_cf8(bus, devfn, reg); @@ -62,7 +62,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -76,7 +76,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); write_cf8(bus, devfn, reg); @@ -101,7 +101,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 59a225c17b84..2492d165096a 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -162,7 +162,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); switch (len) { case 1: @@ -213,7 +213,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return (int)((result & 0xff00) >> 8); } @@ -228,7 +228,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, if ((bus > 255) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + raw_spin_lock_irqsave(&pci_config_lock, flags); switch (len) { case 1: @@ -269,7 +269,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); return (int)((result & 0xff00) >> 8); } -- cgit v1.2.3 From 8b6d043b7ee2d1b819dc833d677ea2aead71a0c0 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 29 Mar 2010 19:38:00 +0200 Subject: resource: shared I/O region support SuperIO devices share regions and use lock/unlock operations to chip select. We therefore need to be able to request a resource and wait for it to be freed by whichever other SuperIO device currently hogs it. Right now you have to poll which is horrible. Add a MUXED field to IO port resources. If the MUXED field is set on the resource and on the request (via request_muxed_region) then we block until the previous owner of the muxed resource releases their region. This allows us to implement proper resource sharing and locking for superio chips using code of the form enable_my_superio_dev() { request_muxed_region(0x44, 0x02, "superio:watchdog"); outb() ..sequence to enable chip } disable_my_superio_dev() { outb() .. sequence of disable chip release_region(0x44, 0x02); } Signed-off-by: Giel van Schijndel Signed-off-by: Alan Cox Signed-off-by: Jesse Barnes --- include/linux/ioport.h | 4 +++- kernel/resource.c | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 26fad187d661..b22790268b64 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -52,6 +52,7 @@ struct resource_list { #define IORESOURCE_MEM_64 0x00100000 #define IORESOURCE_WINDOW 0x00200000 /* forwarded by bridge */ +#define IORESOURCE_MUXED 0x00400000 /* Resource is software muxed */ #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 @@ -143,7 +144,8 @@ static inline unsigned long resource_type(const struct resource *res) } /* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) #define request_mem_region_exclusive(start,n,name) \ diff --git a/kernel/resource.c b/kernel/resource.c index 9c358e263534..7b36976e5dea 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -681,6 +682,8 @@ resource_size_t resource_alignment(struct resource *res) * release_region releases a matching busy region. */ +static DECLARE_WAIT_QUEUE_HEAD(muxed_resource_wait); + /** * __request_region - create a new busy resource region * @parent: parent resource descriptor @@ -693,6 +696,7 @@ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, const char *name, int flags) { + DECLARE_WAITQUEUE(wait, current); struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -717,7 +721,15 @@ struct resource * __request_region(struct resource *parent, if (!(conflict->flags & IORESOURCE_BUSY)) continue; } - + if (conflict->flags & flags & IORESOURCE_MUXED) { + add_wait_queue(&muxed_resource_wait, &wait); + write_unlock(&resource_lock); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + remove_wait_queue(&muxed_resource_wait, &wait); + write_lock(&resource_lock); + continue; + } /* Uhhuh, that didn't work out.. */ kfree(res); res = NULL; @@ -791,6 +803,8 @@ void __release_region(struct resource *parent, resource_size_t start, break; *p = res->sibling; write_unlock(&resource_lock); + if (res->flags & IORESOURCE_MUXED) + wake_up(&muxed_resource_wait); kfree(res); return; } -- cgit v1.2.3 From 3196180a54b593838c0b6496e5b524a2f69bb190 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 8 Apr 2010 09:38:47 -0700 Subject: PCI: change PCI_MSI help text to recommend enabling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most current machines have no problem with this, and in fact many devices and features work best (or only!) with MSI. Reported-by: Petteri Räty Signed-off-by: Jesse Barnes --- drivers/pci/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7858a117e80b..34ef70d562b2 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -19,7 +19,7 @@ config PCI_MSI by using the 'pci=nomsi' option. This disables MSI for the entire system. - If you don't know what to do here, say N. + If you don't know what to do here, say Y. config PCI_DEBUG bool "PCI Debugging" -- cgit v1.2.3 From 447c5dd7338638f526e9bcf7dcf69b4da5835c7d Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 11 May 2010 11:44:54 +0200 Subject: PCI: return correct value when writing to the "reset" attribute A successful write() to the "reset" sysfs attribute should return the number of bytes written, not 0. Otherwise userspace (bash) retries the write over and over again. Acked-by: Michael S. Tsirkin Acked-by: Greg Kroah-Hartman Signed-off-by: Michal Schmidt Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 941e939d1da9..89a08ed39c94 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -960,7 +960,12 @@ static ssize_t reset_store(struct device *dev, if (val != 1) return -EINVAL; - return pci_reset_function(pdev); + + result = pci_reset_function(pdev); + if (result < 0) + return result; + + return count; } static struct device_attribute reset_attr = __ATTR(reset, 0200, NULL, reset_store); -- cgit v1.2.3 From d4dfd7278eade24c4aa4b36b8df981fab04f2f26 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:08:24 +0900 Subject: PCI: aerdrv, doc: update example output in pcieaer-howto.txt Follow new format. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- Documentation/PCI/pcieaer-howto.txt | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt index be21001ab144..8c406a496799 100644 --- a/Documentation/PCI/pcieaer-howto.txt +++ b/Documentation/PCI/pcieaer-howto.txt @@ -71,15 +71,11 @@ console. If it's a correctable error, it is outputed as a warning. Otherwise, it is printed as an error. So users could choose different log level to filter out correctable error messages. -Below shows an example. -+------ PCI-Express Device Error -----+ -Error Severity : Uncorrected (Fatal) -PCIE Bus Error type : Transaction Layer -Unsupported Request : First -Requester ID : 0500 -VendorID=8086h, DeviceID=0329h, Bus=05h, Device=00h, Function=00h -TLB Header: -04000001 00200a03 05010000 00050100 +Below shows an example: +0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID) +0000:50:00.0: device [8086:0329] error status/mask=00100000/00000000 +0000:50:00.0: [20] Unsupported Request (First) +0000:50:00.0: TLP Header: 04000001 00200a03 05010000 00050100 In the example, 'Requester ID' means the ID of the device who sends the error message to root port. Pls. refer to pci express specs for -- cgit v1.2.3 From c6d34eddecb34fd84f9fb2ea26a63cfde5662f49 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:09:13 +0900 Subject: PCI: aerdrv: RsvdP of PCI_ERR_ROOT_COMMAND Handle preserved bits properly. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.c | 18 +++++++++++------- drivers/pci/pcie/aer/aerdrv_core.c | 10 ++++++---- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 7a711ee314b7..4e845ab18643 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -234,13 +234,15 @@ static int __devinit aer_probe(struct pcie_device *dev) static pci_ers_result_t aer_root_reset(struct pci_dev *dev) { u16 p2p_ctrl; - u32 status; + u32 reg32; int pos; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); /* Disable Root's interrupt in response to error messages */ - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0); + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); /* Assert Secondary Bus Reset */ pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &p2p_ctrl); @@ -265,12 +267,14 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) msleep(200); dev_printk(KERN_DEBUG, &dev->dev, "Root Port link has been reset\n"); + /* Clear Root Error Status */ + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32); + /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); - pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); - pci_write_config_dword(dev, - pos + PCI_ERR_ROOT_COMMAND, - ROOT_PORT_INTR_ON_MESG_MASK); + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index aceb04b67b60..9754a09bf20e 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -623,9 +623,9 @@ void aer_enable_rootport(struct aer_rpc *rpc) set_downstream_devices_error_reporting(pdev, true); /* Enable Root Port's interrupt in response to error messages */ - pci_write_config_dword(pdev, - aer_pos + PCI_ERR_ROOT_COMMAND, - ROOT_PORT_INTR_ON_MESG_MASK); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); } /** @@ -648,7 +648,9 @@ static void disable_root_aer(struct aer_rpc *rpc) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); /* Disable Root's interrupt in response to error messages */ - pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0); + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); /* Clear Root's error status reg */ pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); -- cgit v1.2.3 From 460d298d521910483dcdc09920ca4c4a63b16730 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:10:03 +0900 Subject: PCI: aerdrv: cleanup inconsistent functions This cleanup solves some minor naming issues by removing unuseful function aer_delete_rootport() and by renaming disable_root_aer() to aer_disable_rootport(). - Inconsistent location of alloc & free: The struct rpc is allocated in aer_alloc_rpc() at aerdrv.c while it is implicitly freed in aer_delete_rootport() at aerdrv_core.c. - Inconsistent function name: It makes a bit confusion that aer_delete_rootport() is seemed to be paired with aer_enable_rootport(), i.e. there is neither "add" against "delete" nor "disable" against "enable". Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.c | 3 ++- drivers/pci/pcie/aer/aerdrv.h | 2 +- drivers/pci/pcie/aer/aerdrv_core.c | 18 ++---------------- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 4e845ab18643..14081f807e50 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -179,7 +179,8 @@ static void aer_remove(struct pcie_device *dev) wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx); - aer_delete_rootport(rpc); + aer_disable_rootport(rpc); + kfree(rpc); set_service_data(dev, NULL); } } diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index bd833ea3ba49..b6fc5389dd09 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -118,7 +118,7 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig, extern struct bus_type pcie_port_bus_type; extern void aer_enable_rootport(struct aer_rpc *rpc); -extern void aer_delete_rootport(struct aer_rpc *rpc); +extern void aer_disable_rootport(struct aer_rpc *rpc); extern int aer_init(struct pcie_device *dev); extern void aer_isr(struct work_struct *work); extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 9754a09bf20e..0dcbae126834 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -629,12 +629,12 @@ void aer_enable_rootport(struct aer_rpc *rpc) } /** - * disable_root_aer - disable Root Port's interrupts when receiving messages + * aer_disable_rootport - disable Root Port's interrupts when receiving messages * @rpc: pointer to a Root Port data structure * * Invoked when PCIe bus unloads AER service driver. */ -static void disable_root_aer(struct aer_rpc *rpc) +void aer_disable_rootport(struct aer_rpc *rpc) { struct pci_dev *pdev = rpc->rpd->port; u32 reg32; @@ -839,20 +839,6 @@ void aer_isr(struct work_struct *work) wake_up(&rpc->wait_release); } -/** - * aer_delete_rootport - disable root port aer and delete service data - * @rpc: pointer to a root port device being deleted - * - * Invoked when AER service unloaded on a specific Root Port - */ -void aer_delete_rootport(struct aer_rpc *rpc) -{ - /* Disable root port AER itself */ - disable_root_aer(rpc); - - kfree(rpc); -} - /** * aer_init - provide AER initialization * @dev: pointer to AER pcie device -- cgit v1.2.3 From 843f4697eea576c24f057bbdb199115bbb6b10bc Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:10:53 +0900 Subject: PCI: aerdrv: make aer_{en,dis}able_rootport static These functions are only called from init/remove path of aerdrv, so move them from aerdrv_core.c to aerdrv.c, to make them static. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.c | 107 +++++++++++++++++++++++++++++++++++++ drivers/pci/pcie/aer/aerdrv.h | 2 - drivers/pci/pcie/aer/aerdrv_core.c | 107 ------------------------------------- 3 files changed, 107 insertions(+), 109 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 14081f807e50..b69dbdc36817 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -72,6 +72,113 @@ void pci_no_aer(void) pcie_aer_disable = 1; /* has priority over 'forceload' */ } +static int set_device_error_reporting(struct pci_dev *dev, void *data) +{ + bool enable = *((bool *)data); + + if ((dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) || + (dev->pcie_type == PCI_EXP_TYPE_UPSTREAM) || + (dev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)) { + if (enable) + pci_enable_pcie_error_reporting(dev); + else + pci_disable_pcie_error_reporting(dev); + } + + if (enable) + pcie_set_ecrc_checking(dev); + + return 0; +} + +/** + * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. + * @dev: pointer to root port's pci_dev data structure + * @enable: true = enable error reporting, false = disable error reporting. + */ +static void set_downstream_devices_error_reporting(struct pci_dev *dev, + bool enable) +{ + set_device_error_reporting(dev, &enable); + + if (!dev->subordinate) + return; + pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); +} + +/** + * aer_enable_rootport - enable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus loads AER service driver. + */ +static void aer_enable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd->port; + int pos, aer_pos; + u16 reg16; + u32 reg32; + + pos = pci_pcie_cap(pdev); + /* Clear PCIe Capability's Device Status */ + pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, ®16); + pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16); + + /* Disable system error generation in response to error messages */ + pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, ®16); + reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK); + pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16); + + aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + /* Clear error status */ + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); + pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); + pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); + + /* + * Enable error reporting for the root port device and downstream port + * devices. + */ + set_downstream_devices_error_reporting(pdev, true); + + /* Enable Root Port's interrupt in response to error messages */ + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); +} + +/** + * aer_disable_rootport - disable Root Port's interrupts when receiving messages + * @rpc: pointer to a Root Port data structure + * + * Invoked when PCIe bus unloads AER service driver. + */ +static void aer_disable_rootport(struct aer_rpc *rpc) +{ + struct pci_dev *pdev = rpc->rpd->port; + u32 reg32; + int pos; + + /* + * Disable error reporting for the root port device and downstream port + * devices. + */ + set_downstream_devices_error_reporting(pdev, false); + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + /* Disable Root's interrupt in response to error messages */ + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); + reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); + + /* Clear Root's error status reg */ + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); +} + /** * aer_irq - Root Port's ISR * @irq: IRQ assigned to Root Port diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index b6fc5389dd09..2f345405e823 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -117,8 +117,6 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig, } extern struct bus_type pcie_port_bus_type; -extern void aer_enable_rootport(struct aer_rpc *rpc); -extern void aer_disable_rootport(struct aer_rpc *rpc); extern int aer_init(struct pcie_device *dev); extern void aer_isr(struct work_struct *work); extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 0dcbae126834..ad15eea54fd0 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -99,40 +99,6 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); -static int set_device_error_reporting(struct pci_dev *dev, void *data) -{ - bool enable = *((bool *)data); - - if ((dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) || - (dev->pcie_type == PCI_EXP_TYPE_UPSTREAM) || - (dev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)) { - if (enable) - pci_enable_pcie_error_reporting(dev); - else - pci_disable_pcie_error_reporting(dev); - } - - if (enable) - pcie_set_ecrc_checking(dev); - - return 0; -} - -/** - * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports. - * @dev: pointer to root port's pci_dev data structure - * @enable: true = enable error reporting, false = disable error reporting. - */ -static void set_downstream_devices_error_reporting(struct pci_dev *dev, - bool enable) -{ - set_device_error_reporting(dev, &enable); - - if (!dev->subordinate) - return; - pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable); -} - static inline int compare_device_id(struct pci_dev *dev, struct aer_err_info *e_info) { @@ -584,79 +550,6 @@ static void handle_error_source(struct pcie_device *aerdev, } } -/** - * aer_enable_rootport - enable Root Port's interrupts when receiving messages - * @rpc: pointer to a Root Port data structure - * - * Invoked when PCIe bus loads AER service driver. - */ -void aer_enable_rootport(struct aer_rpc *rpc) -{ - struct pci_dev *pdev = rpc->rpd->port; - int pos, aer_pos; - u16 reg16; - u32 reg32; - - pos = pci_pcie_cap(pdev); - /* Clear PCIe Capability's Device Status */ - pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, ®16); - pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16); - - /* Disable system error generation in response to error messages */ - pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, ®16); - reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK); - pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16); - - aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - /* Clear error status */ - pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); - pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); - pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32); - pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); - - /* - * Enable error reporting for the root port device and downstream port - * devices. - */ - set_downstream_devices_error_reporting(pdev, true); - - /* Enable Root Port's interrupt in response to error messages */ - pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32); -} - -/** - * aer_disable_rootport - disable Root Port's interrupts when receiving messages - * @rpc: pointer to a Root Port data structure - * - * Invoked when PCIe bus unloads AER service driver. - */ -void aer_disable_rootport(struct aer_rpc *rpc) -{ - struct pci_dev *pdev = rpc->rpd->port; - u32 reg32; - int pos; - - /* - * Disable error reporting for the root port device and downstream port - * devices. - */ - set_downstream_devices_error_reporting(pdev, false); - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - /* Disable Root's interrupt in response to error messages */ - pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); - reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; - pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32); - - /* Clear Root's error status reg */ - pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32); - pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); -} - /** * get_e_source - retrieve an error source * @rpc: pointer to the root port which holds an error -- cgit v1.2.3 From 98ca3964fe8da0d742331af80952443af5cff464 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:11:42 +0900 Subject: PCI: aerdrv: rework find_source_device Return bool to indicate that the source device is found or not. This allows us to skip calling aer_process_err_devices() if we can. And move dev_printk for debug into this function. v2: return bool instead of int Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index ad15eea54fd0..5fa5c76719b0 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -215,11 +215,13 @@ added: /** * find_source_device - search through device hierarchy for source device * @parent: pointer to Root Port pci_dev data structure - * @err_info: including detailed error information such like id + * @e_info: including detailed error information such like id * - * Invoked when error is detected at the Root Port. + * Return true if found. + * + * Invoked by DPC when error is detected at the Root Port. */ -static void find_source_device(struct pci_dev *parent, +static bool find_source_device(struct pci_dev *parent, struct aer_err_info *e_info) { struct pci_dev *dev = parent; @@ -228,9 +230,17 @@ static void find_source_device(struct pci_dev *parent, /* Is Root Port an agent that sends error message? */ result = find_device_iter(dev, e_info); if (result) - return; + return true; pci_walk_bus(parent->subordinate, find_device_iter, e_info); + + if (!e_info->error_dev_num) { + dev_printk(KERN_DEBUG, &parent->dev, + "can't find device of ID%04x\n", + e_info->id); + return false; + } + return true; } static int report_error_detected(struct pci_dev *dev, void *data) @@ -639,12 +649,6 @@ static inline void aer_process_err_devices(struct pcie_device *p_device, { int i; - if (!e_info->dev[0]) { - dev_printk(KERN_DEBUG, &p_device->port->dev, - "can't find device of ID%04x\n", - e_info->id); - } - /* Report all before handle them, not to lost records by reset etc. */ for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { if (get_device_error_info(e_info->dev[i], e_info)) @@ -702,8 +706,8 @@ static void aer_isr_one_error(struct pcie_device *p_device, aer_print_port_info(p_device->port, e_info); - find_source_device(p_device->port, e_info); - aer_process_err_devices(p_device, e_info); + if (find_source_device(p_device->port, e_info)) + aer_process_err_devices(p_device, e_info); } kfree(e_info); -- cgit v1.2.3 From c887275e6a5b857b72c798e4a6019160a860e2ef Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:12:21 +0900 Subject: PCI: aerdrv: introduce is_error_source Take core part of find_device_iter() to make a new function is_error_source() that checks given device has report an error or not. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 74 ++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 5fa5c76719b0..5b02f62cdc47 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -126,14 +126,17 @@ static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) #define PCI_BUS(x) (((x) >> 8) & 0xff) -static int find_device_iter(struct pci_dev *dev, void *data) +/** + * is_error_source - check whether the device is source of reported error + * @dev: pointer to pci_dev to be checked + * @e_info: pointer to reported error info + */ +static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) { int pos; - u32 status; - u32 mask; + u32 status, mask; u16 reg16; int result; - struct aer_err_info *e_info = (struct aer_err_info *)data; /* * When bus id is equal to 0, it might be a bad id @@ -142,22 +145,11 @@ static int find_device_iter(struct pci_dev *dev, void *data) if (!nosourceid && (PCI_BUS(e_info->id) != 0)) { result = compare_device_id(dev, e_info); if (result) - add_error_device(e_info, dev); + return true; - /* - * If there is no multiple error, we stop - * or continue based on the id comparing. - */ + /* Continue id comparing if there is no multiple error */ if (!e_info->multi_error_valid) - return result; - - /* - * If there are multiple errors and id does match, - * We need continue to search other devices under - * the root port. Return 0 means that. - */ - if (result) - return 0; + return false; } /* @@ -166,50 +158,52 @@ static int find_device_iter(struct pci_dev *dev, void *data) * 2) bus id is equal to 0. Some ports might lose the bus * id of error source id; * 3) There are multiple errors and prior id comparing fails; - * We check AER status registers to find the initial reporter. + * We check AER status registers to find possible reporter. */ if (atomic_read(&dev->enable_cnt) == 0) - return 0; + return false; pos = pci_pcie_cap(dev); if (!pos) - return 0; + return false; + /* Check if AER is enabled */ - pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, ®16); if (!(reg16 & ( PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE))) - return 0; + return false; pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); if (!pos) - return 0; + return false; - status = 0; - mask = 0; + /* Check if error is recorded */ if (e_info->severity == AER_CORRECTABLE) { pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask); - if (status & ~mask) { - add_error_device(e_info, dev); - goto added; - } } else { pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); - if (status & ~mask) { - add_error_device(e_info, dev); - goto added; - } } + if (status & ~mask) + return true; - return 0; + return false; +} -added: - if (e_info->multi_error_valid) - return 0; - else - return 1; +static int find_device_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *e_info = (struct aer_err_info *)data; + + if (is_error_source(dev, e_info)) { + add_error_device(e_info, dev); + + /* If there is only a single error, stop iteration */ + if (!e_info->multi_error_valid) + return 1; + } + return 0; } /** -- cgit v1.2.3 From bd17d4742d5a8cbedd41a1d44c0cdee84a532363 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:13:41 +0900 Subject: PCI: aerdrv: remove compare_device_id Inline too-simple subroutine only used here. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 5b02f62cdc47..f8ffa47503b7 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -99,19 +99,6 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); -static inline int compare_device_id(struct pci_dev *dev, - struct aer_err_info *e_info) -{ - if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) { - /* - * Device ID match - */ - return 1; - } - - return 0; -} - static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) { if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { @@ -136,15 +123,14 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) int pos; u32 status, mask; u16 reg16; - int result; /* * When bus id is equal to 0, it might be a bad id * reported by root port. */ if (!nosourceid && (PCI_BUS(e_info->id) != 0)) { - result = compare_device_id(dev, e_info); - if (result) + /* Device ID match? */ + if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) return true; /* Continue id comparing if there is no multiple error */ -- cgit v1.2.3 From 4a0c096efd4383fc98aa40e195363f600ba814f8 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:14:17 +0900 Subject: PCI: aerdrv: rework add_error_device Stop iteration if we cannot register any more. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index f8ffa47503b7..f5eb69f532e7 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -99,18 +99,21 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); +/** + * add_error_device - list device to be handled + * @e_info: pointer to error info + * @dev: pointer to pci_dev to be added + */ static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) { if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { e_info->dev[e_info->error_dev_num] = dev; e_info->error_dev_num++; - return 1; + return 0; } - - return 0; + return -ENOSPC; } - #define PCI_BUS(x) (((x) >> 8) & 0xff) /** @@ -183,7 +186,12 @@ static int find_device_iter(struct pci_dev *dev, void *data) struct aer_err_info *e_info = (struct aer_err_info *)data; if (is_error_source(dev, e_info)) { - add_error_device(e_info, dev); + /* List this device */ + if (add_error_device(e_info, dev)) { + /* We cannot handle more... Stop iteration */ + /* TODO: Should print error message here? */ + return 1; + } /* If there is only a single error, stop iteration */ if (!e_info->multi_error_valid) -- cgit v1.2.3 From 7c4ec94f72cefec1c1b42219469794a34864a1ee Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:15:08 +0900 Subject: PCI: aerdrv: rework aer_isr_one_error() Divide tricky for-loop into readable if-blocks. The logic to set multi_error_valid (to force walking pci bus hierarchy to find 2nd~ error devices) is changed too, to check MULTI_{,_UN}COR_RCV bit individually and to force walk only when it is required. And rework setting e_info->severity for uncorrectable, not to use magic numbers. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 57 ++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index f5eb69f532e7..ca140580199c 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -208,6 +208,9 @@ static int find_device_iter(struct pci_dev *dev, void *data) * Return true if found. * * Invoked by DPC when error is detected at the Root Port. + * Caller of this function must set id, severity, and multi_error_valid of + * struct aer_err_info pointed by @e_info properly. This function must fill + * e_info->error_dev_num and e_info->dev[], based on the given information. */ static bool find_source_device(struct pci_dev *parent, struct aer_err_info *e_info) @@ -215,6 +218,9 @@ static bool find_source_device(struct pci_dev *parent, struct pci_dev *dev = parent; int result; + /* Must reset in this function */ + e_info->error_dev_num = 0; + /* Is Root Port an agent that sends error message? */ result = find_device_iter(dev, e_info); if (result) @@ -580,11 +586,14 @@ static struct aer_err_source *get_e_source(struct aer_rpc *rpc) * @info: pointer to structure to store the error record * * Return 1 on success, 0 on error. + * + * Note that @info is reused among all error devices. Clear fields properly. */ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) { int pos, temp; + /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; @@ -657,11 +666,10 @@ static void aer_isr_one_error(struct pcie_device *p_device, struct aer_err_source *e_src) { struct aer_err_info *e_info; - int i; /* struct aer_err_info might be big, so we allocate it with slab */ e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL); - if (e_info == NULL) { + if (!e_info) { dev_printk(KERN_DEBUG, &p_device->port->dev, "Can't allocate mem when processing AER errors\n"); return; @@ -671,26 +679,33 @@ static void aer_isr_one_error(struct pcie_device *p_device, * There is a possibility that both correctable error and * uncorrectable error being logged. Report correctable error first. */ - for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) { - if (i > 4) - break; - if (!(e_src->status & i)) - continue; - - memset(e_info, 0, sizeof(struct aer_err_info)); - - /* Init comprehensive error information */ - if (i & PCI_ERR_ROOT_COR_RCV) { - e_info->id = ERR_COR_ID(e_src->id); - e_info->severity = AER_CORRECTABLE; - } else { - e_info->id = ERR_UNCOR_ID(e_src->id); - e_info->severity = ((e_src->status >> 6) & 1); - } - if (e_src->status & - (PCI_ERR_ROOT_MULTI_COR_RCV | - PCI_ERR_ROOT_MULTI_UNCOR_RCV)) + if (e_src->status & PCI_ERR_ROOT_COR_RCV) { + e_info->id = ERR_COR_ID(e_src->id); + e_info->severity = AER_CORRECTABLE; + + if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) + e_info->multi_error_valid = 1; + else + e_info->multi_error_valid = 0; + + aer_print_port_info(p_device->port, e_info); + + if (find_source_device(p_device->port, e_info)) + aer_process_err_devices(p_device, e_info); + } + + if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { + e_info->id = ERR_UNCOR_ID(e_src->id); + + if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) + e_info->severity = AER_FATAL; + else + e_info->severity = AER_NONFATAL; + + if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) e_info->multi_error_valid = 1; + else + e_info->multi_error_valid = 0; aer_print_port_info(p_device->port, e_info); -- cgit v1.2.3 From 88da13bfabbffb8f89574eb168b9da9a0abc693f Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:16:16 +0900 Subject: PCI: aerdrv: rework get_e_source() Current get_e_source() returns pointer to an element of array. However since it also progress consume counter, it is possible that the element is overwritten by newly produced data before the element is really consumed. This patch changes get_e_source() to copy contents of the element to address pointed by its caller. Once copied the element in array can be consumed. And relocate this function to more innocuous place. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 63 +++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index ca140580199c..210e53c2fdc1 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -554,32 +554,6 @@ static void handle_error_source(struct pcie_device *aerdev, } } -/** - * get_e_source - retrieve an error source - * @rpc: pointer to the root port which holds an error - * - * Invoked by DPC handler to consume an error. - */ -static struct aer_err_source *get_e_source(struct aer_rpc *rpc) -{ - struct aer_err_source *e_source; - unsigned long flags; - - /* Lock access to Root error producer/consumer index */ - spin_lock_irqsave(&rpc->e_lock, flags); - if (rpc->prod_idx == rpc->cons_idx) { - spin_unlock_irqrestore(&rpc->e_lock, flags); - return NULL; - } - e_source = &rpc->e_sources[rpc->cons_idx]; - rpc->cons_idx++; - if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) - rpc->cons_idx = 0; - spin_unlock_irqrestore(&rpc->e_lock, flags); - - return e_source; -} - /** * get_device_error_info - read error status from dev and store it to info * @dev: pointer to the device expected to have a error record @@ -716,6 +690,34 @@ static void aer_isr_one_error(struct pcie_device *p_device, kfree(e_info); } +/** + * get_e_source - retrieve an error source + * @rpc: pointer to the root port which holds an error + * @e_src: pointer to store retrieved error source + * + * Return 1 if an error source is retrieved, otherwise 0. + * + * Invoked by DPC handler to consume an error. + */ +static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src) +{ + unsigned long flags; + int ret = 0; + + /* Lock access to Root error producer/consumer index */ + spin_lock_irqsave(&rpc->e_lock, flags); + if (rpc->prod_idx != rpc->cons_idx) { + *e_src = rpc->e_sources[rpc->cons_idx]; + rpc->cons_idx++; + if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) + rpc->cons_idx = 0; + ret = 1; + } + spin_unlock_irqrestore(&rpc->e_lock, flags); + + return ret; +} + /** * aer_isr - consume errors detected by root port * @work: definition of this work item @@ -726,14 +728,11 @@ void aer_isr(struct work_struct *work) { struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); struct pcie_device *p_device = rpc->rpd; - struct aer_err_source *e_src; + struct aer_err_source e_src; mutex_lock(&rpc->rpc_mutex); - e_src = get_e_source(rpc); - while (e_src) { - aer_isr_one_error(p_device, e_src); - e_src = get_e_source(rpc); - } + while (get_e_source(rpc, &e_src)) + aer_isr_one_error(p_device, &e_src); mutex_unlock(&rpc->rpc_mutex); wake_up(&rpc->wait_release); -- cgit v1.2.3 From 17e21854bd59862f4ee47d1c7e828549f782711b Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:16:52 +0900 Subject: PCI: aerdrv: rework do_recovery Move dev_printks for debug into do_recovery(). This allows do_recovery() to return void. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 210e53c2fdc1..9dcd3aeeafb6 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -465,8 +465,7 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, * error detected message to all downstream drivers within a hierarchy in * question and return the returned code. */ -static pci_ers_result_t do_recovery(struct pcie_device *aerdev, - struct pci_dev *dev, +static void do_recovery(struct pcie_device *aerdev, struct pci_dev *dev, int severity) { pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; @@ -484,10 +483,8 @@ static pci_ers_result_t do_recovery(struct pcie_device *aerdev, if (severity == AER_FATAL) { result = reset_link(aerdev, dev); - if (result != PCI_ERS_RESULT_RECOVERED) { - /* TODO: Should panic here? */ - return result; - } + if (result != PCI_ERS_RESULT_RECOVERED) + goto failed; } if (status == PCI_ERS_RESULT_CAN_RECOVER) @@ -508,13 +505,22 @@ static pci_ers_result_t do_recovery(struct pcie_device *aerdev, report_slot_reset); } - if (status == PCI_ERS_RESULT_RECOVERED) - broadcast_error_message(dev, + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + + broadcast_error_message(dev, state, "resume", report_resume); - return status; + dev_printk(KERN_DEBUG, &dev->dev, + "AER driver successfully recovered\n"); + return; + +failed: + /* TODO: Should kernel panic here? */ + dev_printk(KERN_DEBUG, &dev->dev, + "AER driver didn't recover\n"); } /** @@ -529,7 +535,6 @@ static void handle_error_source(struct pcie_device *aerdev, struct pci_dev *dev, struct aer_err_info *info) { - pci_ers_result_t status = 0; int pos; if (info->severity == AER_CORRECTABLE) { @@ -541,17 +546,8 @@ static void handle_error_source(struct pcie_device *aerdev, if (pos) pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); - } else { - status = do_recovery(aerdev, dev, info->severity); - if (status == PCI_ERS_RESULT_RECOVERED) { - dev_printk(KERN_DEBUG, &dev->dev, "AER driver " - "successfully recovered\n"); - } else { - /* TODO: Should kernel panic here? */ - dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't " - "recover\n"); - } - } + } else + do_recovery(aerdev, dev, info->severity); } /** -- cgit v1.2.3 From f647a44f5725b0e6c8211096f4b49900164123ee Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:17:33 +0900 Subject: PCI: aerdrv: redefine PCI_ERR_ROOT_*_SRC The Error Source Identification Register (Offset 34h) is 4 byte which contains a couple of 2 byte field, "[15:0] ERR_COR Source Identification" and "[31:16] ERR_FATAL/NONFATAL Source Identification." This patch defines PCI_ERR_ROOT_ERR_SRC to make dword access sensible. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aer_inject.c | 2 +- drivers/pci/pcie/aer/aerdrv.c | 2 +- include/linux/pci_regs.h | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c index f8f425b8731d..909924692b8a 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer/aer_inject.c @@ -168,7 +168,7 @@ static u32 *find_pci_config_dword(struct aer_error *err, int where, target = &err->root_status; rw1cs = 1; break; - case PCI_ERR_ROOT_COR_SRC: + case PCI_ERR_ROOT_ERR_SRC: target = &err->source_id; break; } diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index b69dbdc36817..1a55c16e2f3f 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -210,7 +210,7 @@ irqreturn_t aer_irq(int irq, void *context) } /* Read error source and clear error status */ - pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_COR_SRC, &id); + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id); pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status); /* Store error source for later DPC handler */ diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index c8f302991b66..dd0dd873f637 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -563,8 +563,7 @@ #define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ #define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ -#define PCI_ERR_ROOT_COR_SRC 52 -#define PCI_ERR_ROOT_SRC 54 +#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ /* Virtual Channel */ #define PCI_VC_PORT_REG1 4 -- cgit v1.2.3 From e167bfcaa4cd44b4c66206a3c06b2aafb3f1260e Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:18:26 +0900 Subject: PCI: aerdrv: remove magical ROOT_ERR_STATUS_MASKS Make it clear that we only interest in 2 *_RCV bits. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.c | 2 +- drivers/pci/pcie/aer/aerdrv.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 1a55c16e2f3f..cbc7cc77b2c3 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -204,7 +204,7 @@ irqreturn_t aer_irq(int irq, void *context) /* Read error status */ pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status); - if (!(status & ROOT_ERR_STATUS_MASKS)) { + if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) { spin_unlock_irqrestore(&rpc->e_lock, flags); return IRQ_NONE; } diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 2f345405e823..d0f8291c5ca0 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -17,9 +17,6 @@ #define AER_FATAL 1 #define AER_CORRECTABLE 2 -/* Root Error Status Register Bits */ -#define ROOT_ERR_STATUS_MASKS 0x0f - #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ PCI_EXP_RTCTL_SENFEE| \ PCI_EXP_RTCTL_SEFEE) -- cgit v1.2.3 From 4f7ccf6a6085eefd2517b8c7090608c64b01ab67 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:19:48 +0900 Subject: PCI: aerdrv: remove is_downstream The pcie->port of port service device points the port associated the service with. The find_aer_service iterates over children of given port udev. So it is clear that the pcie->port of port service of given port udev must always point the udev. Therefore we can know the type of udev without checking its children. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 9dcd3aeeafb6..8d458a03afc9 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -375,30 +375,20 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, struct find_aer_service_data { struct pcie_port_service_driver *aer_driver; - int is_downstream; }; static int find_aer_service_iter(struct device *device, void *data) { - struct device_driver *driver; struct pcie_port_service_driver *service_driver; struct find_aer_service_data *result; result = (struct find_aer_service_data *) data; - if (device->bus == &pcie_port_bus_type) { - struct pcie_device *pcie = to_pcie_device(device); - - if (pcie->port->pcie_type == PCI_EXP_TYPE_DOWNSTREAM) - result->is_downstream = 1; - - driver = device->driver; - if (driver) { - service_driver = to_service_driver(driver); - if (service_driver->service == PCIE_PORT_SERVICE_AER) { - result->aer_driver = service_driver; - return 1; - } + if (device->bus == &pcie_port_bus_type && device->driver) { + service_driver = to_service_driver(device->driver); + if (service_driver->service == PCIE_PORT_SERVICE_AER) { + result->aer_driver = service_driver; + return 1; } } @@ -424,7 +414,6 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, else udev = dev->bus->self; - data.is_downstream = 0; data.aer_driver = NULL; find_aer_service(udev, &data); @@ -433,22 +422,24 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, * If it hasn't the aer driver, use the root port's */ if (!data.aer_driver || !data.aer_driver->reset_link) { - if (data.is_downstream && + if (udev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM && aerdev->device.driver && to_service_driver(aerdev->device.driver)->reset_link) { data.aer_driver = to_service_driver(aerdev->device.driver); } else { - dev_printk(KERN_DEBUG, &dev->dev, "no link-reset " - "support\n"); + dev_printk(KERN_DEBUG, &dev->dev, + "no link-reset support at upstream device %s\n", + pci_name(udev)); return PCI_ERS_RESULT_DISCONNECT; } } status = data.aer_driver->reset_link(udev); if (status != PCI_ERS_RESULT_RECOVERED) { - dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream " - "device %s failed\n", pci_name(udev)); + dev_printk(KERN_DEBUG, &dev->dev, + "link reset at upstream device %s failed\n", + pci_name(udev)); return PCI_ERS_RESULT_DISCONNECT; } -- cgit v1.2.3 From 517cae3829ae8cc3033c24f60e64eb251b2f0d14 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:20:43 +0900 Subject: PCI: aerdrv: rework find_aer_service The structure find_aer_service_data is no longer useful. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Reviewed-by: Jin Dongming Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 8d458a03afc9..8fb14aeb74dd 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -373,21 +373,16 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, return result_data.result; } -struct find_aer_service_data { - struct pcie_port_service_driver *aer_driver; -}; - static int find_aer_service_iter(struct device *device, void *data) { - struct pcie_port_service_driver *service_driver; - struct find_aer_service_data *result; + struct pcie_port_service_driver *service_driver, **drv; - result = (struct find_aer_service_data *) data; + drv = (struct pcie_port_service_driver **) data; if (device->bus == &pcie_port_bus_type && device->driver) { service_driver = to_service_driver(device->driver); if (service_driver->service == PCIE_PORT_SERVICE_AER) { - result->aer_driver = service_driver; + *drv = service_driver; return 1; } } @@ -395,11 +390,13 @@ static int find_aer_service_iter(struct device *device, void *data) return 0; } -static void find_aer_service(struct pci_dev *dev, - struct find_aer_service_data *data) +static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) { - int retval; - retval = device_for_each_child(&dev->dev, data, find_aer_service_iter); + struct pcie_port_service_driver *drv = NULL; + + device_for_each_child(&dev->dev, &drv, find_aer_service_iter); + + return drv; } static pci_ers_result_t reset_link(struct pcie_device *aerdev, @@ -407,26 +404,24 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, { struct pci_dev *udev; pci_ers_result_t status; - struct find_aer_service_data data; + struct pcie_port_service_driver *driver; if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) udev = dev; else udev = dev->bus->self; - data.aer_driver = NULL; - find_aer_service(udev, &data); + /* Use the aer driver of the component firstly */ + driver = find_aer_service(udev); /* - * Use the aer driver of the error agent firstly. - * If it hasn't the aer driver, use the root port's + * If it hasn't the driver and is downstream port, use the root port's */ - if (!data.aer_driver || !data.aer_driver->reset_link) { + if (!driver || !driver->reset_link) { if (udev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM && aerdev->device.driver && to_service_driver(aerdev->device.driver)->reset_link) { - data.aer_driver = - to_service_driver(aerdev->device.driver); + driver = to_service_driver(aerdev->device.driver); } else { dev_printk(KERN_DEBUG, &dev->dev, "no link-reset support at upstream device %s\n", @@ -435,7 +430,7 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, } } - status = data.aer_driver->reset_link(udev); + status = driver->reset_link(udev); if (status != PCI_ERS_RESULT_RECOVERED) { dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream device %s failed\n", -- cgit v1.2.3 From 89713422a768458a0d375f0c2f3586cd5ccde6a1 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:21:27 +0900 Subject: PCI: aerdrv: introduce default_downstream_reset_link I noticed that when I inject a fatal error to an endpoint via aer-inject, aer_root_reset() is called as reset_link for a downstream port at upstream of the endpoint: pcieport 0000:00:06.0: AER: Uncorrected (Fatal) error received: id=5401 : pcieport 0000:52:02.0: Root Port link has been reset It externally appears to be working, but internally issues some accesses to PCI_ERR_ROOT_COMMAND/STATUS registers that is for root port so not available on downstream port. This patch introduces default_downstream_reset_link that is a version of aer_root_reset() with no accesses to root port's register. It is used for downstream ports that has no reset_link function its specific. This patch also updates related description in pcieaer-howto.txt. Some minor fixes are included. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- Documentation/PCI/pcieaer-howto.txt | 15 +++---- drivers/pci/pcie/aer/aerdrv.c | 23 +---------- drivers/pci/pcie/aer/aerdrv.h | 1 + drivers/pci/pcie/aer/aerdrv_core.c | 78 +++++++++++++++++++++++++++++-------- 4 files changed, 71 insertions(+), 46 deletions(-) diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt index 8c406a496799..26d3d945c3c2 100644 --- a/Documentation/PCI/pcieaer-howto.txt +++ b/Documentation/PCI/pcieaer-howto.txt @@ -13,7 +13,7 @@ Reporting (AER) driver and provides information on how to use it, as well as how to enable the drivers of endpoint devices to conform with PCI Express AER driver. -1.2 Copyright © Intel Corporation 2006. +1.2 Copyright (C) Intel Corporation 2006. 1.3 What is the PCI Express AER Driver? @@ -108,7 +108,7 @@ but the PCI Express link itself is fully functional. Fatal errors, on the other hand, cause the link to be unreliable. When AER is enabled, a PCI Express device will automatically send an -error message to the PCIE root port above it when the device captures +error message to the PCIe root port above it when the device captures an error. The Root Port, upon receiving an error reporting message, internally processes and logs the error message in its PCI Express capability structure. Error information being logged includes storing @@ -194,8 +194,9 @@ to reset link, AER port service driver is required to provide the function to reset link. Firstly, kernel looks for if the upstream component has an aer driver. If it has, kernel uses the reset_link callback of the aer driver. If the upstream component has no aer driver -and the port is downstream port, we will use the aer driver of the -root port who reports the AER error. As for upstream ports, +and the port is downstream port, we will perform a hot reset as the +default by setting the Secondary Bus Reset bit of the Bridge Control +register associated with the downstream port. As for upstream ports, they should provide their own aer service drivers with reset_link function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes @@ -249,11 +250,11 @@ cleanup uncorrectable status register. Pls. refer to section 3.3. 4. Software error injection -Debugging PCIE AER error recovery code is quite difficult because it +Debugging PCIe AER error recovery code is quite difficult because it is hard to trigger real hardware errors. Software based error -injection can be used to fake various kinds of PCIE errors. +injection can be used to fake various kinds of PCIe errors. -First you should enable PCIE AER software error injection in kernel +First you should enable PCIe AER software error injection in kernel configuration, that is, following item should be in your .config. CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index cbc7cc77b2c3..a225d58c1ac8 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -341,7 +341,6 @@ static int __devinit aer_probe(struct pcie_device *dev) **/ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) { - u16 p2p_ctrl; u32 reg32; int pos; @@ -352,27 +351,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); - /* Assert Secondary Bus Reset */ - pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &p2p_ctrl); - p2p_ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); - - /* - * we should send hot reset message for 2ms to allow it time to - * propogate to all downstream ports - */ - msleep(2); - - /* De-assert Secondary Bus Reset */ - p2p_ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); - - /* - * System software must wait for at least 100ms from the end - * of a reset of one or more device before it is permitted - * to issue Configuration Requests to those devices. - */ - msleep(200); + aer_do_secondary_bus_reset(dev); dev_printk(KERN_DEBUG, &dev->dev, "Root Port link has been reset\n"); /* Clear Root Error Status */ diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index d0f8291c5ca0..7aaae2d2bd67 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -114,6 +114,7 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig, } extern struct bus_type pcie_port_bus_type; +extern void aer_do_secondary_bus_reset(struct pci_dev *dev); extern int aer_init(struct pcie_device *dev); extern void aer_isr(struct work_struct *work); extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 8fb14aeb74dd..ce42cac99dd3 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -373,6 +373,53 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, return result_data.result; } +/** + * aer_do_secondary_bus_reset - perform secondary bus reset + * @dev: pointer to bridge's pci_dev data structure + * + * Invoked when performing link reset at Root Port or Downstream Port. + */ +void aer_do_secondary_bus_reset(struct pci_dev *dev) +{ + u16 p2p_ctrl; + + /* Assert Secondary Bus Reset */ + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &p2p_ctrl); + p2p_ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); + + /* + * we should send hot reset message for 2ms to allow it time to + * propagate to all downstream ports + */ + msleep(2); + + /* De-assert Secondary Bus Reset */ + p2p_ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); + + /* + * System software must wait for at least 100ms from the end + * of a reset of one or more device before it is permitted + * to issue Configuration Requests to those devices. + */ + msleep(200); +} + +/** + * default_downstream_reset_link - default reset function for Downstream Port + * @dev: pointer to downstream port's pci_dev data structure + * + * Invoked when performing link reset at Downstream Port w/ no aer driver. + */ +static pci_ers_result_t default_downstream_reset_link(struct pci_dev *dev) +{ + aer_do_secondary_bus_reset(dev); + dev_printk(KERN_DEBUG, &dev->dev, + "Downstream Port link has been reset\n"); + return PCI_ERS_RESULT_RECOVERED; +} + static int find_aer_service_iter(struct device *device, void *data) { struct pcie_port_service_driver *service_driver, **drv; @@ -406,31 +453,28 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev, pci_ers_result_t status; struct pcie_port_service_driver *driver; - if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) + if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) { + /* Reset this port for all subordinates */ udev = dev; - else + } else { + /* Reset the upstream component (likely downstream port) */ udev = dev->bus->self; + } /* Use the aer driver of the component firstly */ driver = find_aer_service(udev); - /* - * If it hasn't the driver and is downstream port, use the root port's - */ - if (!driver || !driver->reset_link) { - if (udev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM && - aerdev->device.driver && - to_service_driver(aerdev->device.driver)->reset_link) { - driver = to_service_driver(aerdev->device.driver); - } else { - dev_printk(KERN_DEBUG, &dev->dev, - "no link-reset support at upstream device %s\n", - pci_name(udev)); - return PCI_ERS_RESULT_DISCONNECT; - } + if (driver && driver->reset_link) { + status = driver->reset_link(udev); + } else if (udev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM) { + status = default_downstream_reset_link(udev); + } else { + dev_printk(KERN_DEBUG, &dev->dev, + "no link-reset support at upstream device %s\n", + pci_name(udev)); + return PCI_ERS_RESULT_DISCONNECT; } - status = driver->reset_link(udev); if (status != PCI_ERS_RESULT_RECOVERED) { dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream device %s failed\n", -- cgit v1.2.3 From f6d3780061283039de33b402c35c3bf9322afe14 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:22:11 +0900 Subject: PCI: aerdrv: trivial cleanup for aerdrv.c Skip zero-ing in aer_alloc_rpc() since it is allocated by kzalloc(). The closing comment marker "*/" is recommended for kernel-doc comments. Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index a225d58c1ac8..484cc55194b8 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -185,7 +185,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc) * @context: pointer to Root Port data structure * * Invoked when Root Port detects AER messages. - **/ + */ irqreturn_t aer_irq(int irq, void *context) { unsigned int status, id; @@ -242,7 +242,7 @@ EXPORT_SYMBOL_GPL(aer_irq); * @dev: pointer to the pcie_dev data structure * * Invoked when Root Port's AER service is loaded. - **/ + */ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) { struct aer_rpc *rpc; @@ -251,15 +251,11 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) if (!rpc) return NULL; - /* - * Initialize Root lock access, e_lock, to Root Error Status Reg, - * Root Error ID Reg, and Root error producer/consumer index. - */ + /* Initialize Root lock access, e_lock, to Root Error Status Reg */ spin_lock_init(&rpc->e_lock); rpc->rpd = dev; INIT_WORK(&rpc->dpc_handler, aer_isr); - rpc->prod_idx = rpc->cons_idx = 0; mutex_init(&rpc->rpc_mutex); init_waitqueue_head(&rpc->wait_release); @@ -274,7 +270,7 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) * @dev: pointer to the pcie_dev data structure * * Invoked when PCI Express bus unloads or AER probe fails. - **/ + */ static void aer_remove(struct pcie_device *dev) { struct aer_rpc *rpc = get_service_data(dev); @@ -298,7 +294,7 @@ static void aer_remove(struct pcie_device *dev) * @id: pointer to the service id data structure * * Invoked when PCI Express bus loads AER service driver. - **/ + */ static int __devinit aer_probe(struct pcie_device *dev) { int status; @@ -338,7 +334,7 @@ static int __devinit aer_probe(struct pcie_device *dev) * @dev: pointer to Root Port's pci_dev data structure * * Invoked by Port Bus driver when performing link reset at Root Port. - **/ + */ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) { u32 reg32; @@ -372,7 +368,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) * @error: error severity being notified by port bus * * Invoked by Port Bus driver during error recovery. - **/ + */ static pci_ers_result_t aer_error_detected(struct pci_dev *dev, enum pci_channel_state error) { @@ -385,7 +381,7 @@ static pci_ers_result_t aer_error_detected(struct pci_dev *dev, * @dev: pointer to Root Port's pci_dev data structure * * Invoked by Port Bus driver during nonfatal recovery. - **/ + */ static void aer_error_resume(struct pci_dev *dev) { int pos; @@ -412,7 +408,7 @@ static void aer_error_resume(struct pci_dev *dev) * aer_service_init - register AER root service driver * * Invoked when AER root service driver is loaded. - **/ + */ static int __init aer_service_init(void) { if (pcie_aer_disable) @@ -426,7 +422,7 @@ static int __init aer_service_init(void) * aer_service_exit - unregister AER root service driver * * Invoked when AER root service driver is unloaded. - **/ + */ static void __exit aer_service_exit(void) { pcie_port_service_unregister(&aerdriver); -- cgit v1.2.3 From caa5afbd4831c649b951ae1227a7985f47547e31 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 15 Apr 2010 13:23:17 +0900 Subject: PCI: aerdrv: trivial cleanup for aerdrv_core.c Style cleanup for pci_{en,dis}able_pcie_error_reporting(). Signed-off-by: Hidetoshi Seto Reviewed-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index ce42cac99dd3..df2d686fe3dd 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -47,13 +47,12 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev) if (!pos) return -EIO; - pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); - reg16 = reg16 | - PCI_EXP_DEVCTL_CERE | + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, ®16); + reg16 |= (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE; - pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16); + PCI_EXP_DEVCTL_URRE); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, reg16); return 0; } @@ -71,12 +70,12 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev) if (!pos) return -EIO; - pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, ®16); - reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE | - PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16); + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, ®16); + reg16 &= ~(PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, reg16); return 0; } -- cgit v1.2.3 From 33852cb03ee4cdb05dc6e3a21ec19a4ee63511a4 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Thu, 25 Mar 2010 16:11:37 -0700 Subject: x86/PCI: irq and pci_ids patch for additional Intel Cougar Point DeviceIDs This patch adds additional LPC Controller DeviceIDs for the Intel Cougar Point PCH. The DeviceIDs are defined and referenced as a range of values, the same way Ibex Peak was implemented. Signed-off-by: Seth Heasley Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 9 +++++++-- include/linux/pci_ids.h | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 5d362b5ba06f..9810a0f76c91 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -589,8 +589,6 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: - case PCI_DEVICE_ID_INTEL_CPT_LPC1: - case PCI_DEVICE_ID_INTEL_CPT_LPC2: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; @@ -605,6 +603,13 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route return 1; } + if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) && + (device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) { + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } return 0; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9f688d243b86..ae66851870be 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2419,8 +2419,8 @@ #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 #define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 #define PCI_DEVICE_ID_INTEL_CPT_SMBUS 0x1c22 -#define PCI_DEVICE_ID_INTEL_CPT_LPC1 0x1c42 -#define PCI_DEVICE_ID_INTEL_CPT_LPC2 0x1c43 +#define PCI_DEVICE_ID_INTEL_CPT_LPC_MIN 0x1c41 +#define PCI_DEVICE_ID_INTEL_CPT_LPC_MAX 0x1c5f #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 #define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411 #define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413 -- cgit v1.2.3 From e4146bb9088c01c8b6e82be11f0c371f8aff023c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 16 May 2010 02:28:49 +0100 Subject: PCI: Disable MSI for MCP55 on P5N32-E SLI As reported in , MSI appears to be broken for this on-board device. We already have a quirk for the P5N32-SLI Premium; extend it to cover both variants of the board. Reported-by: Romain DEGEZ Signed-off-by: Ben Hutchings Cc: stable@kernel.org Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 27c0e6eb7136..480782530218 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2218,15 +2218,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, ht_enable_msi_mapping); -/* The P5N32-SLI Premium motherboard from Asus has a problem with msi +/* The P5N32-SLI motherboards from Asus have a problem with msi * for the MCP55 NIC. It is not yet determined whether the msi problem * also affects other devices. As for now, turn off msi for this device. */ static void __devinit nvenet_msi_disable(struct pci_dev *dev) { - if (dmi_name_in_vendors("P5N32-SLI PREMIUM")) { + if (dmi_name_in_vendors("P5N32-SLI PREMIUM") || + dmi_name_in_vendors("P5N32-E SLI")) { dev_info(&dev->dev, - "Disabling msi for MCP55 NIC on P5N32-SLI Premium\n"); + "Disabling msi for MCP55 NIC on P5N32-SLI\n"); dev->no_msi = 1; } } -- cgit v1.2.3 From 9313ff450400e6a2ab10fe6b9bdb12a828329410 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 May 2010 10:42:53 -0400 Subject: PCI quirks: disable msi on AMD rs4xx internal gfx bridges Doesn't work reliably for internal gfx. Fixes kernel bug https://bugzilla.kernel.org/show_bug.cgi?id=15626. Signed-off-by: Alex Deucher Cc: Stable Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 480782530218..e5861d5a2e4d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2127,6 +2127,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x9602, quirk_disable_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASUSTEK, 0x9602, quirk_disable_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AI, 0x9602, quirk_disable_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0xa238, quirk_disable_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x5a3f, quirk_disable_msi); /* Go through the list of Hypertransport capabilities and * return 1 if a HT MSI capability is found and enabled */ -- cgit v1.2.3 From ee6583f6e8f8dad4a53985dbabcd7c242d66a6b6 Mon Sep 17 00:00:00 2001 From: Roman Fietze Date: Tue, 18 May 2010 14:45:47 +0200 Subject: PCI: fix typos pci_device_dis/enable to pci_dis/enable_device in comments This fixes all occurrences of pci_enable_device and pci_disable_device in all comments. There are no code changes involved. Signed-off-by: Roman Fietze Signed-off-by: Jesse Barnes --- drivers/edac/amd76x_edac.c | 2 +- drivers/edac/i82443bxgx_edac.c | 2 +- drivers/edac/r82600_edac.c | 2 +- drivers/pci/pci.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index f2330f81cb5e..cace0a7b707a 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -294,7 +294,7 @@ static int __devinit amd76x_init_one(struct pci_dev *pdev, { debugf0("%s()\n", __func__); - /* don't need to call pci_device_enable() */ + /* don't need to call pci_enable_device() */ return amd76x_probe1(pdev, ent->driver_data); } diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 7f3884fcbd46..2bf2c5051bfe 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -354,7 +354,7 @@ static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev, debugf0("MC: " __FILE__ ": %s()\n", __func__); - /* don't need to call pci_device_enable() */ + /* don't need to call pci_enable_device() */ rc = i82443bxgx_edacmc_probe1(pdev, ent->driver_data); if (mci_pdev == NULL) diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index d55f8e9de788..6a822c631ef5 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -354,7 +354,7 @@ static int __devinit r82600_init_one(struct pci_dev *pdev, { debugf0("%s()\n", __func__); - /* don't need to call pci_device_enable() */ + /* don't need to call pci_enable_device() */ return r82600_probe1(pdev, ent->driver_data); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 60fcb6f02c91..264c3f6b8476 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1193,7 +1193,7 @@ void pci_disable_enabled_device(struct pci_dev *dev) * anymore. This only involves disabling PCI bus-mastering, if active. * * Note we don't actually disable the device until all callers of - * pci_device_enable() have called pci_device_disable(). + * pci_enable_device() have called pci_disable_device(). */ void pci_disable_device(struct pci_dev *dev) -- cgit v1.2.3 From 5030718ee465c759c2a851851e79039f58b9efb3 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 17 May 2010 14:25:14 -0400 Subject: PCI: output FW warning in pci_read/write_vpd pci_read/write_vpd() can fail due to a timeout. Usually the command times out because of firmware issues (incorrect vpd length, etc.) on the PCI card. Currently, the timeout occurs silently. Output a message to the user indicating that they should check with their vendor for new firmware. Reviewed-by: Randy Dunlap Signed-off-by: Prarit Bhargava Signed-off-by: Jesse Barnes --- drivers/pci/access.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index affb83b42ebb..531bc697d800 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -220,8 +220,13 @@ static int pci_vpd_pci22_wait(struct pci_dev *dev) return 0; } - if (time_after(jiffies, timeout)) + if (time_after(jiffies, timeout)) { + dev_printk(KERN_DEBUG, &dev->dev, + "vpd r/w failed. This is likely a firmware " + "bug on this device. Contact the card " + "vendor for a firmware update."); return -ETIMEDOUT; + } if (fatal_signal_pending(current)) return -EINTR; if (!cond_resched()) -- cgit v1.2.3 From b79995700e25dd6b0b0aff7edd0c102d1b6281f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 May 2010 00:23:24 +0200 Subject: PM/PCI: Update PCI power management documentation I power management document, Documentation/power/pci.txt, is outdated and partially inaccurate. It also is missing some important information about the power management of PCI device. Rewrite it to make it more up to date and more complete. Reviewed-by: Randy Dunlap Signed-off-by: Rafael J. Wysocki --- Documentation/power/pci.txt | 1258 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 992 insertions(+), 266 deletions(-) diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index dd8fe43888d3..62328d76b55b 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -1,299 +1,1025 @@ - PCI Power Management -~~~~~~~~~~~~~~~~~~~~ -An overview of the concepts and the related functions in the Linux kernel +Copyright (c) 2010 Rafael J. Wysocki , Novell Inc. + +An overview of concepts and the Linux kernel's interfaces related to PCI power +management. Based on previous work by Patrick Mochel +(and others). -Patrick Mochel -(and others) +This document only covers the aspects of power management specific to PCI +devices. For general description of the kernel's interfaces related to device +power management refer to Documentation/power/devices.txt and +Documentation/power/runtime_pm.txt. --------------------------------------------------------------------------- -1. Overview -2. How the PCI Subsystem Does Power Management -3. PCI Utility Functions -4. PCI Device Drivers -5. Resources - -1. Overview -~~~~~~~~~~~ - -The PCI Power Management Specification was introduced between the PCI 2.1 and -PCI 2.2 Specifications. It a standard interface for controlling various -power management operations. - -Implementation of the PCI PM Spec is optional, as are several sub-components of -it. If a device supports the PCI PM Spec, the device will have an 8 byte -capability field in its PCI configuration space. This field is used to describe -and control the standard PCI power management features. - -The PCI PM spec defines 4 operating states for devices (D0 - D3) and for buses -(B0 - B3). The higher the number, the less power the device consumes. However, -the higher the number, the longer the latency is for the device to return to -an operational state (D0). - -There are actually two D3 states. When someone talks about D3, they usually -mean D3hot, which corresponds to an ACPI D2 state (power is reduced, the -device may lose some context). But they may also mean D3cold, which is an -ACPI D3 state (power is fully off, all state was discarded); or both. - -Bus power management is not covered in this version of this document. - -Note that all PCI devices support D0 and D3cold by default, regardless of -whether or not they implement any of the PCI PM spec. - -The possible state transitions that a device can undergo are: - -+---------------------------+ -| Current State | New State | -+---------------------------+ -| D0 | D1, D2, D3| -+---------------------------+ -| D1 | D2, D3 | -+---------------------------+ -| D2 | D3 | -+---------------------------+ -| D1, D2, D3 | D0 | -+---------------------------+ - -Note that when the system is entering a global suspend state, all devices will -be placed into D3 and when resuming, all devices will be placed into D0. -However, when the system is running, other state transitions are possible. - -2. How The PCI Subsystem Handles Power Management -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The PCI suspend/resume functionality is accessed indirectly via the Power -Management subsystem. At boot, the PCI driver registers a power management -callback with that layer. Upon entering a suspend state, the PM layer iterates -through all of its registered callbacks. This currently takes place only during -APM state transitions. - -Upon going to sleep, the PCI subsystem walks its device tree twice. Both times, -it does a depth first walk of the device tree. The first walk saves each of the -device's state and checks for devices that will prevent the system from entering -a global power state. The next walk then places the devices in a low power +1. Hardware and Platform Support for PCI Power Management +2. PCI Subsystem and Device Power Management +3. PCI Device Drivers and Power Management +4. Resources + + +1. Hardware and Platform Support for PCI Power Management +========================================================= + +1.1. Native and Platform-Based Power Management +----------------------------------------------- +In general, power management is a feature allowing one to save energy by putting +devices into states in which they draw less power (low-power states) at the +price of reduced functionality or performance. + +Usually, a device is put into a low-power state when it is underutilized or +completely inactive. However, when it is necessary to use the device once +again, it has to be put back into the "fully functional" state (full-power +state). This may happen when there are some data for the device to handle or +as a result of an external event requiring the device to be active, which may +be signaled by the device itself. + +PCI devices may be put into low-power states in two ways, by using the device +capabilities introduced by the PCI Bus Power Management Interface Specification, +or with the help of platform firmware, such as an ACPI BIOS. In the first +approach, that is referred to as the native PCI power management (native PCI PM) +in what follows, the device power state is changed as a result of writing a +specific value into one of its standard configuration registers. The second +approach requires the platform firmware to provide special methods that may be +used by the kernel to change the device's power state. + +Devices supporting the native PCI PM usually can generate wakeup signals called +Power Management Events (PMEs) to let the kernel know about external events +requiring the device to be active. After receiving a PME the kernel is supposed +to put the device that sent it into the full-power state. However, the PCI Bus +Power Management Interface Specification doesn't define any standard method of +delivering the PME from the device to the CPU and the operating system kernel. +It is assumed that the platform firmware will perform this task and therefore, +even though a PCI device is set up to generate PMEs, it also may be necessary to +prepare the platform firmware for notifying the CPU of the PMEs coming from the +device (e.g. by generating interrupts). + +In turn, if the methods provided by the platform firmware are used for changing +the power state of a device, usually the platform also provides a method for +preparing the device to generate wakeup signals. In that case, however, it +often also is necessary to prepare the device for generating PMEs using the +native PCI PM mechanism, because the method provided by the platform depends on +that. + +Thus in many situations both the native and the platform-based power management +mechanisms have to be used simultaneously to obtain the desired result. + +1.2. Native PCI Power Management +-------------------------------- +The PCI Bus Power Management Interface Specification (PCI PM Spec) was +introduced between the PCI 2.1 and PCI 2.2 Specifications. It defined a +standard interface for performing various operations related to power +management. + +The implementation of the PCI PM Spec is optional for conventional PCI devices, +but it is mandatory for PCI Express devices. If a device supports the PCI PM +Spec, it has an 8 byte power management capability field in its PCI +configuration space. This field is used to describe and control the standard +features related to the native PCI power management. + +The PCI PM Spec defines 4 operating states for devices (D0-D3) and for buses +(B0-B3). The higher the number, the less power is drawn by the device or bus +in that state. However, the higher the number, the longer the latency for +the device or bus to return to the full-power state (D0 or B0, respectively). + +There are two variants of the D3 state defined by the specification. The first +one is D3hot, referred to as the software accessible D3, because devices can be +programmed to go into it. The second one, D3cold, is the state that PCI devices +are in when the supply voltage (Vcc) is removed from them. It is not possible +to program a PCI device to go into D3cold, although there may be a programmable +interface for putting the bus the device is on into a state in which Vcc is +removed from all devices on the bus. + +PCI bus power management, however, is not supported by the Linux kernel at the +time of this writing and therefore it is not covered by this document. + +Note that every PCI device can be in the full-power state (D0) or in D3cold, +regardless of whether or not it implements the PCI PM Spec. In addition to +that, if the PCI PM Spec is implemented by the device, it must support D3hot +as well as D0. The support for the D1 and D2 power states is optional. + +PCI devices supporting the PCI PM Spec can be programmed to go to any of the +supported low-power states (except for D3cold). While in D1-D3hot the +standard configuration registers of the device must be accessible to software +(i.e. the device is required to respond to PCI configuration accesses), although +its I/O and memory spaces are then disabled. This allows the device to be +programmatically put into D0. Thus the kernel can switch the device back and +forth between D0 and the supported low-power states (except for D3cold) and the +possible power state transitions the device can undergo are the following: + ++----------------------------+ +| Current State | New State | ++----------------------------+ +| D0 | D1, D2, D3 | ++----------------------------+ +| D1 | D2, D3 | ++----------------------------+ +| D2 | D3 | ++----------------------------+ +| D1, D2, D3 | D0 | ++----------------------------+ + +The transition from D3cold to D0 occurs when the supply voltage is provided to +the device (i.e. power is restored). In that case the device returns to D0 with +a full power-on reset sequence and the power-on defaults are restored to the +device by hardware just as at initial power up. + +PCI devices supporting the PCI PM Spec can be programmed to generate PMEs +while in a low-power state (D1-D3), but they are not required to be capable +of generating PMEs from all supported low-power states. In particular, the +capability of generating PMEs from D3cold is optional and depends on the +presence of additional voltage (3.3Vaux) allowing the device to remain +sufficiently active to generate a wakeup signal. + +1.3. ACPI Device Power Management +--------------------------------- +The platform firmware support for the power management of PCI devices is +system-specific. However, if the system in question is compliant with the +Advanced Configuration and Power Interface (ACPI) Specification, like the +majority of x86-based systems, it is supposed to implement device power +management interfaces defined by the ACPI standard. + +For this purpose the ACPI BIOS provides special functions called "control +methods" that may be executed by the kernel to perform specific tasks, such as +putting a device into a low-power state. These control methods are encoded +using special byte-code language called the ACPI Machine Language (AML) and +stored in the machine's BIOS. The kernel loads them from the BIOS and executes +them as needed using an AML interpreter that translates the AML byte code into +computations and memory or I/O space accesses. This way, in theory, a BIOS +writer can provide the kernel with a means to perform actions depending +on the system design in a system-specific fashion. + +ACPI control methods may be divided into global control methods, that are not +associated with any particular devices, and device control methods, that have +to be defined separately for each device supposed to be handled with the help of +the platform. This means, in particular, that ACPI device control methods can +only be used to handle devices that the BIOS writer knew about in advance. The +ACPI methods used for device power management fall into that category. + +The ACPI specification assumes that devices can be in one of four power states +labeled as D0, D1, D2, and D3 that roughly correspond to the native PCI PM +D0-D3 states (although the difference between D3hot and D3cold is not taken +into account by ACPI). Moreover, for each power state of a device there is a +set of power resources that have to be enabled for the device to be put into +that state. These power resources are controlled (i.e. enabled or disabled) +with the help of their own control methods, _ON and _OFF, that have to be +defined individually for each of them. + +To put a device into the ACPI power state Dx (where x is a number between 0 and +3 inclusive) the kernel is supposed to (1) enable the power resources required +by the device in this state using their _ON control methods and (2) execute the +_PSx control method defined for the device. In addition to that, if the device +is going to be put into a low-power state (D1-D3) and is supposed to generate +wakeup signals from that state, the _DSW (or _PSW, replaced with _DSW by ACPI +3.0) control method defined for it has to be executed before _PSx. Power +resources that are not required by the device in the target power state and are +not required any more by any other device should be disabled (by executing their +_OFF control methods). If the current power state of the device is D3, it can +only be put into D0 this way. + +However, quite often the power states of devices are changed during a +system-wide transition into a sleep state or back into the working state. ACPI +defines four system sleep states, S1, S2, S3, and S4, and denotes the system +working state as S0. In general, the target system sleep (or working) state +determines the highest power (lowest number) state the device can be put +into and the kernel is supposed to obtain this information by executing the +device's _SxD control method (where x is a number between 0 and 4 inclusive). +If the device is required to wake up the system from the target sleep state, the +lowest power (highest number) state it can be put into is also determined by the +target state of the system. The kernel is then supposed to use the device's +_SxW control method to obtain the number of that state. It also is supposed to +use the device's _PRW control method to learn which power resources need to be +enabled for the device to be able to generate wakeup signals. + +1.4. Wakeup Signaling +--------------------- +Wakeup signals generated by PCI devices, either as native PCI PMEs, or as +a result of the execution of the _DSW (or _PSW) ACPI control method before +putting the device into a low-power state, have to be caught and handled as +appropriate. If they are sent while the system is in the working state +(ACPI S0), they should be translated into interrupts so that the kernel can +put the devices generating them into the full-power state and take care of the +events that triggered them. In turn, if they are sent while the system is +sleeping, they should cause the system's core logic to trigger wakeup. + +On ACPI-based systems wakeup signals sent by conventional PCI devices are +converted into ACPI General-Purpose Events (GPEs) which are hardware signals +from the system core logic generated in response to various events that need to +be acted upon. Every GPE is associated with one or more sources of potentially +interesting events. In particular, a GPE may be associated with a PCI device +capable of signaling wakeup. The information on the connections between GPEs +and event sources is recorded in the system's ACPI BIOS from where it can be +read by the kernel. + +If a PCI device known to the system's ACPI BIOS signals wakeup, the GPE +associated with it (if there is one) is triggered. The GPEs associated with PCI +bridges may also be triggered in response to a wakeup signal from one of the +devices below the bridge (this also is the case for root bridges) and, for +example, native PCI PMEs from devices unknown to the system's ACPI BIOS may be +handled this way. + +A GPE may be triggered when the system is sleeping (i.e. when it is in one of +the ACPI S1-S4 states), in which case system wakeup is started by its core logic +(the device that was the source of the signal causing the system wakeup to occur +may be identified later). The GPEs used in such situations are referred to as +wakeup GPEs. + +Usually, however, GPEs are also triggered when the system is in the working +state (ACPI S0) and in that case the system's core logic generates a System +Control Interrupt (SCI) to notify the kernel of the event. Then, the SCI +handler identifies the GPE that caused the interrupt to be generated which, +in turn, allows the kernel to identify the source of the event (that may be +a PCI device signaling wakeup). The GPEs used for notifying the kernel of +events occurring while the system is in the working state are referred to as +runtime GPEs. + +Unfortunately, there is no standard way of handling wakeup signals sent by +conventional PCI devices on systems that are not ACPI-based, but there is one +for PCI Express devices. Namely, the PCI Express Base Specification introduced +a native mechanism for converting native PCI PMEs into interrupts generated by +root ports. For conventional PCI devices native PMEs are out-of-band, so they +are routed separately and they need not pass through bridges (in principle they +may be routed directly to the system's core logic), but for PCI Express devices +they are in-band messages that have to pass through the PCI Express hierarchy, +including the root port on the path from the device to the Root Complex. Thus +it was possible to introduce a mechanism by which a root port generates an +interrupt whenever it receives a PME message from one of the devices below it. +The PCI Express Requester ID of the device that sent the PME message is then +recorded in one of the root port's configuration registers from where it may be +read by the interrupt handler allowing the device to be identified. [PME +messages sent by PCI Express endpoints integrated with the Root Complex don't +pass through root ports, but instead they cause a Root Complex Event Collector +(if there is one) to generate interrupts.] + +In principle the native PCI Express PME signaling may also be used on ACPI-based +systems along with the GPEs, but to use it the kernel has to ask the system's +ACPI BIOS to release control of root port configuration registers. The ACPI +BIOS, however, is not required to allow the kernel to control these registers +and if it doesn't do that, the kernel must not modify their contents. Of course +the native PCI Express PME signaling cannot be used by the kernel in that case. + + +2. PCI Subsystem and Device Power Management +============================================ + +2.1. Device Power Management Callbacks +-------------------------------------- +The PCI Subsystem participates in the power management of PCI devices in a +number of ways. First of all, it provides an intermediate code layer between +the device power management core (PM core) and PCI device drivers. +Specifically, the pm field of the PCI subsystem's struct bus_type object, +pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing +pointers to several device power management callbacks: + +const struct dev_pm_ops pci_dev_pm_ops = { + .prepare = pci_pm_prepare, + .complete = pci_pm_complete, + .suspend = pci_pm_suspend, + .resume = pci_pm_resume, + .freeze = pci_pm_freeze, + .thaw = pci_pm_thaw, + .poweroff = pci_pm_poweroff, + .restore = pci_pm_restore, + .suspend_noirq = pci_pm_suspend_noirq, + .resume_noirq = pci_pm_resume_noirq, + .freeze_noirq = pci_pm_freeze_noirq, + .thaw_noirq = pci_pm_thaw_noirq, + .poweroff_noirq = pci_pm_poweroff_noirq, + .restore_noirq = pci_pm_restore_noirq, + .runtime_suspend = pci_pm_runtime_suspend, + .runtime_resume = pci_pm_runtime_resume, + .runtime_idle = pci_pm_runtime_idle, +}; + +These callbacks are executed by the PM core in various situations related to +device power management and they, in turn, execute power management callbacks +provided by PCI device drivers. They also perform power management operations +involving some standard configuration registers of PCI devices that device +drivers need not know or care about. + +The structure representing a PCI device, struct pci_dev, contains several fields +that these callbacks operate on: + +struct pci_dev { + ... + pci_power_t current_state; /* Current operating state. */ + int pm_cap; /* PM capability offset in the + configuration space */ + unsigned int pme_support:5; /* Bitmask of states from which PME# + can be generated */ + unsigned int pme_interrupt:1;/* Is native PCIe PME signaling used? */ + unsigned int d1_support:1; /* Low power state D1 is supported */ + unsigned int d2_support:1; /* Low power state D2 is supported */ + unsigned int no_d1d2:1; /* D1 and D2 are forbidden */ + unsigned int wakeup_prepared:1; /* Device prepared for wake up */ + unsigned int d3_delay; /* D3->D0 transition time in ms */ + ... +}; + +They also indirectly use some fields of the struct device that is embedded in +struct pci_dev. + +2.2. Device Initialization +-------------------------- +The PCI subsystem's first task related to device power management is to +prepare the device for power management and initialize the fields of struct +pci_dev used for this purpose. This happens in two functions defined in +drivers/pci/pci.c, pci_pm_init() and platform_pci_wakeup_init(). + +The first of these functions checks if the device supports native PCI PM +and if that's the case the offset of its power management capability structure +in the configuration space is stored in the pm_cap field of the device's struct +pci_dev object. Next, the function checks which PCI low-power states are +supported by the device and from which low-power states the device can generate +native PCI PMEs. The power management fields of the device's struct pci_dev and +the struct device embedded in it are updated accordingly and the generation of +PMEs by the device is disabled. + +The second function checks if the device can be prepared to signal wakeup with +the help of the platform firmware, such as the ACPI BIOS. If that is the case, +the function updates the wakeup fields in struct device embedded in the +device's struct pci_dev and uses the firmware-provided method to prevent the +device from signaling wakeup. + +At this point the device is ready for power management. For driverless devices, +however, this functionality is limited to a few basic operations carried out +during system-wide transitions to a sleep state and back to the working state. + +2.3. Runtime Device Power Management +------------------------------------ +The PCI subsystem plays a vital role in the runtime power management of PCI +devices. For this purpose it uses the general runtime power management +(runtime PM) framework described in Documentation/power/runtime_pm.txt. +Namely, it provides subsystem-level callbacks: + + pci_pm_runtime_suspend() + pci_pm_runtime_resume() + pci_pm_runtime_idle() + +that are executed by the core runtime PM routines. It also implements the +entire mechanics necessary for handling runtime wakeup signals from PCI devices +in low-power states, which at the time of this writing works for both the native +PCI Express PME signaling and the ACPI GPE-based wakeup signaling described in +Section 1. + +First, a PCI device is put into a low-power state, or suspended, with the help +of pm_schedule_suspend() or pm_runtime_suspend() which for PCI devices call +pci_pm_runtime_suspend() to do the actual job. For this to work, the device's +driver has to provide a pm->runtime_suspend() callback (see below), which is +run by pci_pm_runtime_suspend() as the first action. If the driver's callback +returns successfully, the device's standard configuration registers are saved, +the device is prepared to generate wakeup signals and, finally, it is put into +the target low-power state. + +The low-power state to put the device into is the lowest-power (highest number) +state from which it can signal wakeup. The exact method of signaling wakeup is +system-dependent and is determined by the PCI subsystem on the basis of the +reported capabilities of the device and the platform firmware. To prepare the +device for signaling wakeup and put it into the selected low-power state, the +PCI subsystem can use the platform firmware as well as the device's native PCI +PM capabilities, if supported. + +It is expected that the device driver's pm->runtime_suspend() callback will +not attempt to prepare the device for signaling wakeup or to put it into a +low-power state. The driver ought to leave these tasks to the PCI subsystem +that has all of the information necessary to perform them. + +A suspended device is brought back into the "active" state, or resumed, +with the help of pm_request_resume() or pm_runtime_resume() which both call +pci_pm_runtime_resume() for PCI devices. Again, this only works if the device's +driver provides a pm->runtime_resume() callback (see below). However, before +the driver's callback is executed, pci_pm_runtime_resume() brings the device +back into the full-power state, prevents it from signaling wakeup while in that +state and restores its standard configuration registers. Thus the driver's +callback need not worry about the PCI-specific aspects of the device resume. + +Note that generally pci_pm_runtime_resume() may be called in two different +situations. First, it may be called at the request of the device's driver, for +example if there are some data for it to process. Second, it may be called +as a result of a wakeup signal from the device itself (this sometimes is +referred to as "remote wakeup"). Of course, for this purpose the wakeup signal +is handled in one of the ways described in Section 1 and finally converted into +a notification for the PCI subsystem after the source device has been +identified. + +The pci_pm_runtime_idle() function, called for PCI devices by pm_runtime_idle() +and pm_request_idle(), executes the device driver's pm->runtime_idle() +callback, if defined, and if that callback doesn't return error code (or is not +present at all), suspends the device with the help of pm_runtime_suspend(). +Sometimes pci_pm_runtime_idle() is called automatically by the PM core (for +example, it is called right after the device has just been resumed), in which +cases it is expected to suspend the device if that makes sense. Usually, +however, the PCI subsystem doesn't really know if the device really can be +suspended, so it lets the device's driver decide by running its +pm->runtime_idle() callback. + +2.4. System-Wide Power Transitions +---------------------------------- +There are a few different types of system-wide power transitions, described in +Documentation/power/devices.txt. Each of them requires devices to be handled +in a specific way and the PM core executes subsystem-level power management +callbacks for this purpose. They are executed in phases such that each phase +involves executing the same subsystem-level callback for every device belonging +to the given subsystem before the next phase begins. These phases always run +after tasks have been frozen. + +2.4.1. System Suspend + +When the system is going into a sleep state in which the contents of memory will +be preserved, such as one of the ACPI sleep states S1-S3, the phases are: + + prepare, suspend, suspend_noirq. + +The following PCI bus type's callbacks, respectively, are used in these phases: + + pci_pm_prepare() + pci_pm_suspend() + pci_pm_suspend_noirq() + +The pci_pm_prepare() routine first puts the device into the "fully functional" +state with the help of pm_runtime_resume(). Then, it executes the device +driver's pm->prepare() callback if defined (i.e. if the driver's struct +dev_pm_ops object is present and the prepare pointer in that object is valid). + +The pci_pm_suspend() routine first checks if the device's driver implements +legacy PCI suspend routines (see Section 3), in which case the driver's legacy +suspend callback is executed, if present, and its result is returned. Next, if +the device's driver doesn't provide a struct dev_pm_ops object (containing +pointers to the driver's callbacks), pci_pm_default_suspend() is called, which +simply turns off the device's bus master capability and runs +pcibios_disable_device() to disable it, unless the device is a bridge (PCI +bridges are ignored by this routine). Next, the device driver's pm->suspend() +callback is executed, if defined, and its result is returned if it fails. +Finally, pci_fixup_device() is called to apply hardware suspend quirks related +to the device if necessary. + +Note that the suspend phase is carried out asynchronously for PCI devices, so +the pci_pm_suspend() callback may be executed in parallel for any pair of PCI +devices that don't depend on each other in a known way (i.e. none of the paths +in the device tree from the root bridge to a leaf device contains both of them). + +The pci_pm_suspend_noirq() routine is executed after suspend_device_irqs() has +been called, which means that the device driver's interrupt handler won't be +invoked while this routine is running. It first checks if the device's driver +implements legacy PCI suspends routines (Section 3), in which case the legacy +late suspend routine is called and its result is returned (the standard +configuration registers of the device are saved if the driver's callback hasn't +done that). Second, if the device driver's struct dev_pm_ops object is not +present, the device's standard configuration registers are saved and the routine +returns success. Otherwise the device driver's pm->suspend_noirq() callback is +executed, if present, and its result is returned if it fails. Next, if the +device's standard configuration registers haven't been saved yet (one of the +device driver's callbacks executed before might do that), pci_pm_suspend_noirq() +saves them, prepares the device to signal wakeup (if necessary) and puts it into +a low-power state. + +The low-power state to put the device into is the lowest-power (highest number) +state from which it can signal wakeup while the system is in the target sleep +state. Just like in the runtime PM case described above, the mechanism of +signaling wakeup is system-dependent and determined by the PCI subsystem, which +is also responsible for preparing the device to signal wakeup from the system's +target sleep state as appropriate. + +PCI device drivers (that don't implement legacy power management callbacks) are +generally not expected to prepare devices for signaling wakeup or to put them +into low-power states. However, if one of the driver's suspend callbacks +(pm->suspend() or pm->suspend_noirq()) saves the device's standard configuration +registers, pci_pm_suspend_noirq() will assume that the device has been prepared +to signal wakeup and put into a low-power state by the driver (the driver is +then assumed to have used the helper functions provided by the PCI subsystem for +this purpose). PCI device drivers are not encouraged to do that, but in some +rare cases doing that in the driver may be the optimum approach. + +2.4.2. System Resume + +When the system is undergoing a transition from a sleep state in which the +contents of memory have been preserved, such as one of the ACPI sleep states +S1-S3, into the working state (ACPI S0), the phases are: + + resume_noirq, resume, complete. + +The following PCI bus type's callbacks, respectively, are executed in these +phases: + + pci_pm_resume_noirq() + pci_pm_resume() + pci_pm_complete() + +The pci_pm_resume_noirq() routine first puts the device into the full-power +state, restores its standard configuration registers and applies early resume +hardware quirks related to the device, if necessary. This is done +unconditionally, regardless of whether or not the device's driver implements +legacy PCI power management callbacks (this way all PCI devices are in the +full-power state and their standard configuration registers have been restored +when their interrupt handlers are invoked for the first time during resume, +which allows the kernel to avoid problems with the handling of shared interrupts +by drivers whose devices are still suspended). If legacy PCI power management +callbacks (see Section 3) are implemented by the device's driver, the legacy +early resume callback is executed and its result is returned. Otherwise, the +device driver's pm->resume_noirq() callback is executed, if defined, and its +result is returned. + +The pci_pm_resume() routine first checks if the device's standard configuration +registers have been restored and restores them if that's not the case (this +only is necessary in the error path during a failing suspend). Next, resume +hardware quirks related to the device are applied, if necessary, and if the +device's driver implements legacy PCI power management callbacks (see +Section 3), the driver's legacy resume callback is executed and its result is +returned. Otherwise, the device's wakeup signaling mechanisms are blocked and +its driver's pm->resume() callback is executed, if defined (the callback's +result is then returned). + +The resume phase is carried out asynchronously for PCI devices, like the +suspend phase described above, which means that if two PCI devices don't depend +on each other in a known way, the pci_pm_resume() routine may be executed for +the both of them in parallel. + +The pci_pm_complete() routine only executes the device driver's pm->complete() +callback, if defined. + +2.4.3. System Hibernation + +System hibernation is more complicated than system suspend, because it requires +a system image to be created and written into a persistent storage medium. The +image is created atomically and all devices are quiesced, or frozen, before that +happens. + +The freezing of devices is carried out after enough memory has been freed (at +the time of this writing the image creation requires at least 50% of system RAM +to be free) in the following three phases: + + prepare, freeze, freeze_noirq + +that correspond to the PCI bus type's callbacks: + + pci_pm_prepare() + pci_pm_freeze() + pci_pm_freeze_noirq() + +This means that the prepare phase is exactly the same as for system suspend. +The other two phases, however, are different. + +The pci_pm_freeze() routine is quite similar to pci_pm_suspend(), but it runs +the device driver's pm->freeze() callback, if defined, instead of pm->suspend(), +and it doesn't apply the suspend-related hardware quirks. It is executed +asynchronously for different PCI devices that don't depend on each other in a +known way. + +The pci_pm_freeze_noirq() routine, in turn, is similar to +pci_pm_suspend_noirq(), but it calls the device driver's pm->freeze_noirq() +routine instead of pm->suspend_noirq(). It also doesn't attempt to prepare the +device for signaling wakeup and put it into a low-power state. Still, it saves +the device's standard configuration registers if they haven't been saved by one +of the driver's callbacks. + +Once the image has been created, it has to be saved. However, at this point all +devices are frozen and they cannot handle I/O, while their ability to handle +I/O is obviously necessary for the image saving. Thus they have to be brought +back to the fully functional state and this is done in the following phases: + + thaw_noirq, thaw, complete + +using the following PCI bus type's callbacks: + + pci_pm_thaw_noirq() + pci_pm_thaw() + pci_pm_complete() + +respectively. + +The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq(), +but it doesn't put the device into the full power state and doesn't attempt to +restore its standard configuration registers. It also executes the device +driver's pm->thaw_noirq() callback, if defined, instead of pm->resume_noirq(). + +The pci_pm_thaw() routine is similar to pci_pm_resume(), but it runs the device +driver's pm->thaw() callback instead of pm->resume(). It is executed +asynchronously for different PCI devices that don't depend on each other in a +known way. + +The complete phase it the same as for system resume. + +After saving the image, devices need to be powered down before the system can +enter the target sleep state (ACPI S4 for ACPI-based systems). This is done in +three phases: + + prepare, poweroff, poweroff_noirq + +where the prepare phase is exactly the same as for system suspend. The other +two phases are analogous to the suspend and suspend_noirq phases, respectively. +The PCI subsystem-level callbacks they correspond to + + pci_pm_poweroff() + pci_pm_poweroff_noirq() + +work in analogy with pci_pm_suspend() and pci_pm_poweroff_noirq(), respectively, +although they don't attempt to save the device's standard configuration +registers. + +2.4.4. System Restore + +System restore requires a hibernation image to be loaded into memory and the +pre-hibernation memory contents to be restored before the pre-hibernation system +activity can be resumed. + +As described in Documentation/power/devices.txt, the hibernation image is loaded +into memory by a fresh instance of the kernel, called the boot kernel, which in +turn is loaded and run by a boot loader in the usual way. After the boot kernel +has loaded the image, it needs to replace its own code and data with the code +and data of the "hibernated" kernel stored within the image, called the image +kernel. For this purpose all devices are frozen just like before creating +the image during hibernation, in the + + prepare, freeze, freeze_noirq + +phases described above. However, the devices affected by these phases are only +those having drivers in the boot kernel; other devices will still be in whatever +state the boot loader left them. + +Should the restoration of the pre-hibernation memory contents fail, the boot +kernel would go through the "thawing" procedure described above, using the +thaw_noirq, thaw, and complete phases (that will only affect the devices having +drivers in the boot kernel), and then continue running normally. + +If the pre-hibernation memory contents are restored successfully, which is the +usual situation, control is passed to the image kernel, which then becomes +responsible for bringing the system back to the working state. To achieve this, +it must restore the devices' pre-hibernation functionality, which is done much +like waking up from the memory sleep state, although it involves different +phases: + + restore_noirq, restore, complete + +The first two of these are analogous to the resume_noirq and resume phases +described above, respectively, and correspond to the following PCI subsystem +callbacks: + + pci_pm_restore_noirq() + pci_pm_restore() + +These callbacks work in analogy with pci_pm_resume_noirq() and pci_pm_resume(), +respectively, but they execute the device driver's pm->restore_noirq() and +pm->restore() callbacks, if available. + +The complete phase is carried out in exactly the same way as during system +resume. + + +3. PCI Device Drivers and Power Management +========================================== + +3.1. Power Management Callbacks +------------------------------- +PCI device drivers participate in power management by providing callbacks to be +executed by the PCI subsystem's power management routines described above and by +controlling the runtime power management of their devices. + +At the time of this writing there are two ways to define power management +callbacks for a PCI device driver, the recommended one, based on using a +dev_pm_ops structure described in Documentation/power/devices.txt, and the +"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and +.resume() callbacks from struct pci_driver are used. The legacy approach, +however, doesn't allow one to define runtime power management callbacks and is +not really suitable for any new drivers. Therefore it is not covered by this +document (refer to the source code to learn more about it). + +It is recommended that all PCI device drivers define a struct dev_pm_ops object +containing pointers to power management (PM) callbacks that will be executed by +the PCI subsystem's PM routines in various circumstances. A pointer to the +driver's struct dev_pm_ops object has to be assigned to the driver.pm field in +its struct pci_driver object. Once that has happened, the "legacy" PM callbacks +in struct pci_driver are ignored (even if they are not NULL). + +The PM callbacks in struct dev_pm_ops are not mandatory and if they are not +defined (i.e. the respective fields of struct dev_pm_ops are unset) the PCI +subsystem will handle the device in a simplified default manner. If they are +defined, though, they are expected to behave as described in the following +subsections. + +3.1.1. prepare() + +The prepare() callback is executed during system suspend, during hibernation +(when a hibernation image is about to be created), during power-off after +saving a hibernation image and during system restore, when a hibernation image +has just been loaded into memory. + +This callback is only necessary if the driver's device has children that in +general may be registered at any time. In that case the role of the prepare() +callback is to prevent new children of the device from being registered until +one of the resume_noirq(), thaw_noirq(), or restore_noirq() callbacks is run. + +In addition to that the prepare() callback may carry out some operations +preparing the device to be suspended, although it should not allocate memory +(if additional memory is required to suspend the device, it has to be +preallocated earlier, for example in a suspend/hibernate notifier as described +in Documentation/power/notifiers.txt). + +3.1.2. suspend() + +The suspend() callback is only executed during system suspend, after prepare() +callbacks have been executed for all devices in the system. + +This callback is expected to quiesce the device and prepare it to be put into a +low-power state by the PCI subsystem. It is not required (in fact it even is +not recommended) that a PCI driver's suspend() callback save the standard +configuration registers of the device, prepare it for waking up the system, or +put it into a low-power state. All of these operations can very well be taken +care of by the PCI subsystem, without the driver's participation. + +However, in some rare case it is convenient to carry out these operations in +a PCI driver. Then, pci_save_state(), pci_prepare_to_sleep(), and +pci_set_power_state() should be used to save the device's standard configuration +registers, to prepare it for system wakeup (if necessary), and to put it into a +low-power state, respectively. Moreover, if the driver calls pci_save_state(), +the PCI subsystem will not execute either pci_prepare_to_sleep(), or +pci_set_power_state() for its device, so the driver is then responsible for +handling the device as appropriate. + +While the suspend() callback is being executed, the driver's interrupt handler +can be invoked to handle an interrupt from the device, so all suspend-related +operations relying on the driver's ability to handle interrupts should be +carried out in this callback. + +3.1.3. suspend_noirq() + +The suspend_noirq() callback is only executed during system suspend, after +suspend() callbacks have been executed for all devices in the system and +after device interrupts have been disabled by the PM core. + +The difference between suspend_noirq() and suspend() is that the driver's +interrupt handler will not be invoked while suspend_noirq() is running. Thus +suspend_noirq() can carry out operations that would cause race conditions to +arise if they were performed in suspend(). + +3.1.4. freeze() + +The freeze() callback is hibernation-specific and is executed in two situations, +during hibernation, after prepare() callbacks have been executed for all devices +in preparation for the creation of a system image, and during restore, +after a system image has been loaded into memory from persistent storage and the +prepare() callbacks have been executed for all devices. + +The role of this callback is analogous to the role of the suspend() callback +described above. In fact, they only need to be different in the rare cases when +the driver takes the responsibility for putting the device into a low-power state. -The first walk allows a graceful recovery in the event of a failure, since none -of the devices have actually been powered down. - -In both walks, in particular the second, all children of a bridge are touched -before the actual bridge itself. This allows the bridge to retain power while -its children are being accessed. - -Upon resuming from sleep, just the opposite must be true: all bridges must be -powered on and restored before their children are powered on. This is easily -accomplished with a breadth-first walk of the PCI device tree. - - -3. PCI Utility Functions -~~~~~~~~~~~~~~~~~~~~~~~~ - -These are helper functions designed to be called by individual device drivers. -Assuming that a device behaves as advertised, these should be applicable in most -cases. However, results may vary. - -Note that these functions are never implicitly called for the driver. The driver -is always responsible for deciding when and if to call these. - - -pci_save_state --------------- - -Usage: - pci_save_state(struct pci_dev *dev); - -Description: - Save first 64 bytes of PCI config space, along with any additional - PCI-Express or PCI-X information. - - -pci_restore_state ------------------ - -Usage: - pci_restore_state(struct pci_dev *dev); - -Description: - Restore previously saved config space. - - -pci_set_power_state -------------------- - -Usage: - pci_set_power_state(struct pci_dev *dev, pci_power_t state); - -Description: - Transition device to low power state using PCI PM Capabilities - registers. - - Will fail under one of the following conditions: - - If state is less than current state, but not D0 (illegal transition) - - Device doesn't support PM Capabilities - - Device does not support requested state - - -pci_enable_wake ---------------- - -Usage: - pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); - -Description: - Enable device to generate PME# during low power state using PCI PM - Capabilities. - - Checks whether if device supports generating PME# from requested state - and fail if it does not, unless enable == 0 (request is to disable wake - events, which is implicit if it doesn't even support it in the first - place). - - Note that the PMC Register in the device's PM Capabilities has a bitmask - of the states it supports generating PME# from. D3hot is bit 3 and - D3cold is bit 4. So, while a value of 4 as the state may not seem - semantically correct, it is. - - -4. PCI Device Drivers -~~~~~~~~~~~~~~~~~~~~~ - -These functions are intended for use by individual drivers, and are defined in -struct pci_driver: - - int (*suspend) (struct pci_dev *dev, pm_message_t state); - int (*resume) (struct pci_dev *dev); - - -suspend -------- - -Usage: - -if (dev->driver && dev->driver->suspend) - dev->driver->suspend(dev,state); - -A driver uses this function to actually transition the device into a low power -state. This should include disabling I/O, IRQs, and bus-mastering, as well as -physically transitioning the device to a lower power state; it may also include -calls to pci_enable_wake(). - -Bus mastering may be disabled by doing: - -pci_disable_device(dev); - -For devices that support the PCI PM Spec, this may be used to set the device's -power state to match the suspend() parameter: - -pci_set_power_state(dev,state); - -The driver is also responsible for disabling any other device-specific features -(e.g blanking screen, turning off on-card memory, etc). - -The driver should be sure to track the current state of the device, as it may -obviate the need for some operations. - -The driver should update the current_state field in its pci_dev structure in -this function, except for PM-capable devices when pci_set_power_state is used. - -resume ------- - -Usage: - -if (dev->driver && dev->driver->resume) - dev->driver->resume(dev) +In that cases the freeze() callback should not prepare the device system wakeup +or put it into a low-power state. Still, either it or freeze_noirq() should +save the device's standard configuration registers using pci_save_state(). -The resume callback may be called from any power state, and is always meant to -transition the device to the D0 state. +3.1.5. freeze_noirq() -The driver is responsible for reenabling any features of the device that had -been disabled during previous suspend calls, such as IRQs and bus mastering, -as well as calling pci_restore_state(). +The freeze_noirq() callback is hibernation-specific. It is executed during +hibernation, after prepare() and freeze() callbacks have been executed for all +devices in preparation for the creation of a system image, and during restore, +after a system image has been loaded into memory and after prepare() and +freeze() callbacks have been executed for all devices. It is always executed +after device interrupts have been disabled by the PM core. -If the device is currently in D3, it may need to be reinitialized in resume(). +The role of this callback is analogous to the role of the suspend_noirq() +callback described above and it very rarely is necessary to define +freeze_noirq(). - * Some types of devices, like bus controllers, will preserve context in D3hot - (using Vcc power). Their drivers will often want to avoid re-initializing - them after re-entering D0 (perhaps to avoid resetting downstream devices). +The difference between freeze_noirq() and freeze() is analogous to the +difference between suspend_noirq() and suspend(). - * Other kinds of devices in D3hot will discard device context as part of a - soft reset when re-entering the D0 state. - - * Devices resuming from D3cold always go through a power-on reset. Some - device context can also be preserved using Vaux power. +3.1.6. poweroff() - * Some systems hide D3cold resume paths from drivers. For example, on PCs - the resume path for suspend-to-disk often runs BIOS powerup code, which - will sometimes re-initialize the device. +The poweroff() callback is hibernation-specific. It is executed when the system +is about to be powered off after saving a hibernation image to a persistent +storage. prepare() callbacks are executed for all devices before poweroff() is +called. -To handle resets during D3 to D0 transitions, it may be convenient to share -device initialization code between probe() and resume(). Device parameters -can also be saved before the driver suspends into D3, avoiding re-probe. +The role of this callback is analogous to the role of the suspend() and freeze() +callbacks described above, although it does not need to save the contents of +the device's registers. In particular, if the driver wants to put the device +into a low-power state itself instead of allowing the PCI subsystem to do that, +the poweroff() callback should use pci_prepare_to_sleep() and +pci_set_power_state() to prepare the device for system wakeup and to put it +into a low-power state, respectively, but it need not save the device's standard +configuration registers. -If the device supports the PCI PM Spec, it can use this to physically transition -the device to D0: +3.1.7. poweroff_noirq() -pci_set_power_state(dev,0); +The poweroff_noirq() callback is hibernation-specific. It is executed after +poweroff() callbacks have been executed for all devices in the system. -Note that if the entire system is transitioning out of a global sleep state, all -devices will be placed in the D0 state, so this is not necessary. However, in -the event that the device is placed in the D3 state during normal operation, -this call is necessary. It is impossible to determine which of the two events is -taking place in the driver, so it is always a good idea to make that call. +The role of this callback is analogous to the role of the suspend_noirq() and +freeze_noirq() callbacks described above, but it does not need to save the +contents of the device's registers. -The driver should take note of the state that it is resuming from in order to -ensure correct (and speedy) operation. +The difference between poweroff_noirq() and poweroff() is analogous to the +difference between suspend_noirq() and suspend(). -The driver should update the current_state field in its pci_dev structure in -this function, except for PM-capable devices when pci_set_power_state is used. +3.1.8. resume_noirq() +The resume_noirq() callback is only executed during system resume, after the +PM core has enabled the non-boot CPUs. The driver's interrupt handler will not +be invoked while resume_noirq() is running, so this callback can carry out +operations that might race with the interrupt handler. +Since the PCI subsystem unconditionally puts all devices into the full power +state in the resume_noirq phase of system resume and restores their standard +configuration registers, resume_noirq() is usually not necessary. In general +it should only be used for performing operations that would lead to race +conditions if carried out by resume(). -A reference implementation -------------------------- -.suspend() -{ - /* driver specific operations */ +3.1.9. resume() - /* Disable IRQ */ - free_irq(); - /* If using MSI */ - pci_disable_msi(); +The resume() callback is only executed during system resume, after +resume_noirq() callbacks have been executed for all devices in the system and +device interrupts have been enabled by the PM core. - pci_save_state(); - pci_enable_wake(); - /* Disable IO/bus master/irq router */ - pci_disable_device(); - pci_set_power_state(pci_choose_state()); -} +This callback is responsible for restoring the pre-suspend configuration of the +device and bringing it back to the fully functional state. The device should be +able to process I/O in a usual way after resume() has returned. -.resume() -{ - pci_set_power_state(PCI_D0); - pci_restore_state(); - /* device's irq possibly is changed, driver should take care */ - pci_enable_device(); - pci_set_master(); +3.1.10. thaw_noirq() - /* if using MSI, device's vector possibly is changed */ - pci_enable_msi(); +The thaw_noirq() callback is hibernation-specific. It is executed after a +system image has been created and the non-boot CPUs have been enabled by the PM +core, in the thaw_noirq phase of hibernation. It also may be executed if the +loading of a hibernation image fails during system restore (it is then executed +after enabling the non-boot CPUs). The driver's interrupt handler will not be +invoked while thaw_noirq() is running. - request_irq(); - /* driver specific operations; */ -} +The role of this callback is analogous to the role of resume_noirq(). The +difference between these two callbacks is that thaw_noirq() is executed after +freeze() and freeze_noirq(), so in general it does not need to modify the +contents of the device's registers. -This is a typical implementation. Drivers can slightly change the order -of the operations in the implementation, ignore some operations or add -more driver specific operations in it, but drivers should do something like -this on the whole. +3.1.11. thaw() -5. Resources -~~~~~~~~~~~~ +The thaw() callback is hibernation-specific. It is executed after thaw_noirq() +callbacks have been executed for all devices in the system and after device +interrupts have been enabled by the PM core. -PCI Local Bus Specification -PCI Bus Power Management Interface Specification +This callback is responsible for restoring the pre-freeze configuration of +the device, so that it will work in a usual way after thaw() has returned. - http://www.pcisig.com +3.1.12. restore_noirq() +The restore_noirq() callback is hibernation-specific. It is executed in the +restore_noirq phase of hibernation, when the boot kernel has passed control to +the image kernel and the non-boot CPUs have been enabled by the image kernel's +PM core. + +This callback is analogous to resume_noirq() with the exception that it cannot +make any assumption on the previous state of the device, even if the BIOS (or +generally the platform firmware) is known to preserve that state over a +suspend-resume cycle. + +For the vast majority of PCI device drivers there is no difference between +resume_noirq() and restore_noirq(). + +3.1.13. restore() + +The restore() callback is hibernation-specific. It is executed after +restore_noirq() callbacks have been executed for all devices in the system and +after the PM core has enabled device drivers' interrupt handlers to be invoked. + +This callback is analogous to resume(), just like restore_noirq() is analogous +to resume_noirq(). Consequently, the difference between restore_noirq() and +restore() is analogous to the difference between resume_noirq() and resume(). + +For the vast majority of PCI device drivers there is no difference between +resume() and restore(). + +3.1.14. complete() + +The complete() callback is executed in the following situations: + - during system resume, after resume() callbacks have been executed for all + devices, + - during hibernation, before saving the system image, after thaw() callbacks + have been executed for all devices, + - during system restore, when the system is going back to its pre-hibernation + state, after restore() callbacks have been executed for all devices. +It also may be executed if the loading of a hibernation image into memory fails +(in that case it is run after thaw() callbacks have been executed for all +devices that have drivers in the boot kernel). + +This callback is entirely optional, although it may be necessary if the +prepare() callback performs operations that need to be reversed. + +3.1.15. runtime_suspend() + +The runtime_suspend() callback is specific to device runtime power management +(runtime PM). It is executed by the PM core's runtime PM framework when the +device is about to be suspended (i.e. quiesced and put into a low-power state) +at run time. + +This callback is responsible for freezing the device and preparing it to be +put into a low-power state, but it must allow the PCI subsystem to perform all +of the PCI-specific actions necessary for suspending the device. + +3.1.16. runtime_resume() + +The runtime_resume() callback is specific to device runtime PM. It is executed +by the PM core's runtime PM framework when the device is about to be resumed +(i.e. put into the full-power state and programmed to process I/O normally) at +run time. + +This callback is responsible for restoring the normal functionality of the +device after it has been put into the full-power state by the PCI subsystem. +The device is expected to be able to process I/O in the usual way after +runtime_resume() has returned. + +3.1.17. runtime_idle() + +The runtime_idle() callback is specific to device runtime PM. It is executed +by the PM core's runtime PM framework whenever it may be desirable to suspend +the device according to the PM core's information. In particular, it is +automatically executed right after runtime_resume() has returned in case the +resume of the device has happened as a result of a spurious event. + +This callback is optional, but if it is not implemented or if it returns 0, the +PCI subsystem will call pm_runtime_suspend() for the device, which in turn will +cause the driver's runtime_suspend() callback to be executed. + +3.1.18. Pointing Multiple Callback Pointers to One Routine + +Although in principle each of the callbacks described in the previous +subsections can be defined as a separate function, it often is convenient to +point two or more members of struct dev_pm_ops to the same routine. There are +a few convenience macros that can be used for this purpose. + +The SIMPLE_DEV_PM_OPS macro declares a struct dev_pm_ops object with one +suspend routine pointed to by the .suspend(), .freeze(), and .poweroff() +members and one resume routine pointed to by the .resume(), .thaw(), and +.restore() members. The other function pointers in this struct dev_pm_ops are +unset. + +The UNIVERSAL_DEV_PM_OPS macro is similar to SIMPLE_DEV_PM_OPS, but it +additionally sets the .runtime_resume() pointer to the same value as +.resume() (and .thaw(), and .restore()) and the .runtime_suspend() pointer to +the same value as .suspend() (and .freeze() and .poweroff()). + +The SET_SYSTEM_SLEEP_PM_OPS can be used inside of a declaration of struct +dev_pm_ops to indicate that one suspend routine is to be pointed to by the +.suspend(), .freeze(), and .poweroff() members and one resume routine is to +be pointed to by the .resume(), .thaw(), and .restore() members. + +3.2. Device Runtime Power Management +------------------------------------ +In addition to providing device power management callbacks PCI device drivers +are responsible for controlling the runtime power management (runtime PM) of +their devices. + +The PCI device runtime PM is optional, but it is recommended that PCI device +drivers implement it at least in the cases where there is a reliable way of +verifying that the device is not used (like when the network cable is detached +from an Ethernet adapter or there are no devices attached to a USB controller). + +To support the PCI runtime PM the driver first needs to implement the +runtime_suspend() and runtime_resume() callbacks. It also may need to implement +the runtime_idle() callback to prevent the device from being suspended again +every time right after the runtime_resume() callback has returned +(alternatively, the runtime_suspend() callback will have to check if the +device should really be suspended and return -EAGAIN if that is not the case). + +The runtime PM of PCI devices is disabled by default. It is also blocked by +pci_pm_init() that runs the pm_runtime_forbid() helper function. If a PCI +driver implements the runtime PM callbacks and intends to use the runtime PM +framework provided by the PM core and the PCI subsystem, it should enable this +feature by executing the pm_runtime_enable() helper function. However, the +driver should not call the pm_runtime_allow() helper function unblocking +the runtime PM of the device. Instead, it should allow user space or some +platform-specific code to do that (user space can do it via sysfs), although +once it has called pm_runtime_enable(), it must be prepared to handle the +runtime PM of the device correctly as soon as pm_runtime_allow() is called +(which may happen at any time). [It also is possible that user space causes +pm_runtime_allow() to be called via sysfs before the driver is loaded, so in +fact the driver has to be prepared to handle the runtime PM of the device as +soon as it calls pm_runtime_enable().] + +The runtime PM framework works by processing requests to suspend or resume +devices, or to check if they are idle (in which cases it is reasonable to +subsequently request that they be suspended). These requests are represented +by work items put into the power management workqueue, pm_wq. Although there +are a few situations in which power management requests are automatically +queued by the PM core (for example, after processing a request to resume a +device the PM core automatically queues a request to check if the device is +idle), device drivers are generally responsible for queuing power management +requests for their devices. For this purpose they should use the runtime PM +helper functions provided by the PM core, discussed in +Documentation/power/runtime_pm.txt. + +Devices can also be suspended and resumed synchronously, without placing a +request into pm_wq. In the majority of cases this also is done by their +drivers that use helper functions provided by the PM core for this purpose. + +For more information on the runtime PM of devices refer to +Documentation/power/runtime_pm.txt. + + +4. Resources +============ + +PCI Local Bus Specification, Rev. 3.0 +PCI Bus Power Management Interface Specification, Rev. 1.2 +Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b +PCI Express Base Specification, Rev. 2.0 +Documentation/power/devices.txt +Documentation/power/runtime_pm.txt -- cgit v1.2.3 From 8f6bce3c4f48bd79b57d6ac9f337f5aabee43ea7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 15 May 2010 23:18:16 +0200 Subject: PCI hotplug: Use kmemdup Use kmemdup when some other buffer is immediately copied into the allocated region. A simplified version of the semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression from,to,size,flag; statement S; @@ - to = \(kmalloc\|kzalloc\)(size,flag); + to = kmemdup(from,size,flag); if (to==NULL || ...) S - memcpy(to, from, size); // Signed-off-by: Julia Lawall Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/cpqphp_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index f184d1d2ecbe..cb7818ffd5cf 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -1075,13 +1075,12 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* make our own copy of the pci bus structure, * as we like tweaking it a lot */ - ctrl->pci_bus = kmalloc(sizeof(*ctrl->pci_bus), GFP_KERNEL); + ctrl->pci_bus = kmemdup(pdev->bus, sizeof(*ctrl->pci_bus), GFP_KERNEL); if (!ctrl->pci_bus) { err("out of memory\n"); rc = -ENOMEM; goto err_free_ctrl; } - memcpy(ctrl->pci_bus, pdev->bus, sizeof(*ctrl->pci_bus)); ctrl->bus = pdev->bus->number; ctrl->rev = pdev->revision; -- cgit v1.2.3 From a02ce953a14d6a8aab721b129b3c8ff4981a76e6 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 5 May 2010 17:08:49 +0800 Subject: x86/PCI: make ACPI MCFG reserved error messages ACPI specific Both ACPI and SFI firmwares will have MCFG space, but the error message isn't valid on SFI, so don't print the message in that case. Signed-off-by: Feng Tang Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 39b9ebe8f886..a918553ebc75 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -483,16 +483,17 @@ static void __init pci_mmcfg_reject_broken(int early) list_for_each_entry(cfg, &pci_mmcfg_list, list) { int valid = 0; - if (!early && !acpi_disabled) + if (!early && !acpi_disabled) { valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); - if (valid) - continue; - - if (!early) - printk(KERN_ERR FW_BUG PREFIX - "MMCONFIG at %pR not reserved in " - "ACPI motherboard resources\n", &cfg->res); + if (valid) + continue; + else + printk(KERN_ERR FW_BUG PREFIX + "MMCONFIG at %pR not reserved in " + "ACPI motherboard resources\n", + &cfg->res); + } /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ -- cgit v1.2.3 From 3322340a9db2251ac9d09bc7b8d49e872298ae95 Mon Sep 17 00:00:00 2001 From: Felix Radensky Date: Sun, 28 Mar 2010 16:02:02 +0300 Subject: PCI: Allow manual resource allocation for PCI hotplug bridges At the moment only PCI-E briges can be flagged as hotplug, thus allowing manual resource preallocation via pci=hpmemsize=nnM and pci=hpiosize=nnM kernel parameters. Some PCI hotplug bridges, e.g. PLX 6254 can also benefit from this functionalily, as kernel fails to properly allocate their resources when hotplug device is added and PCI bus is rescanned. This patch adds header quirk for PLX 6254 that marks this bridge as hotplug. Other PCI bridges with similar problems can use it as well. Signed-off-by: Felix Radensky Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e5861d5a2e4d..b7512cf08c58 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2554,6 +2554,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1518, quirk_i82576_sriov); #endif /* CONFIG_PCI_IOV */ +/* Allow manual resource allocation for PCI hotplug bridges + * via pci=hpmemsize=nnM and pci=hpiosize=nnM parameters. For + * some PCI-PCI hotplug bridges, like PLX 6254 (former HINT HB6), + * kernel fails to allocate resources when hotplug device is + * inserted and PCI bus is rescanned. + */ +static void __devinit quirk_hotplug_bridge(struct pci_dev *dev) +{ + dev->is_hotplug_bridge = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HINT, 0x0020, quirk_hotplug_bridge); + /* * This is a quirk for the Ricoh MMC controller found as a part of * some mulifunction chips. -- cgit v1.2.3 From 3f6ea84a3035cc0ef7488f8e93bc76766799e082 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Thu, 1 Apr 2010 11:43:30 -0700 Subject: PCI: read memory ranges out of Broadcom CNB20LE host bridge Read the memory ranges behind the Broadcom CNB20LE host bridge out of the hardware. This allows PCI hotplugging to work, since we know which memory range to allocate PCI BAR's from. The x86 PCI code automatically prefers the ACPI _CRS information when it is available. In that case, this information is not used. Signed-off-by: Ira W. Snyder Signed-off-by: Jesse Barnes --- arch/x86/Kconfig | 8 ++++ arch/x86/pci/Makefile | 2 + arch/x86/pci/broadcom_bus.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 arch/x86/pci/broadcom_bus.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9458685902bd..677b87d60a36 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1931,6 +1931,14 @@ config PCI_MMCONFIG bool "Support mmconfig PCI config space access" depends on X86_64 && PCI && ACPI +config PCI_CNB20LE_QUIRK + bool "Read CNB20LE Host Bridge Windows" + depends on PCI + help + Read the PCI windows out of the CNB20LE host bridge. This allows + PCI hotplug to work on systems with the CNB20LE chipset which do + not have ACPI. + config DMAR bool "Support for DMA Remapping Devices (EXPERIMENTAL)" depends on PCI_MSI && ACPI && EXPERIMENTAL diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index b110d97fb925..a0207a7fdf39 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -18,6 +18,8 @@ obj-$(CONFIG_X86_MRST) += mrst.o obj-y += common.o early.o obj-y += amd_bus.o bus_numa.o +obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o + ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c new file mode 100644 index 000000000000..0846a5bbbfbd --- /dev/null +++ b/arch/x86/pci/broadcom_bus.c @@ -0,0 +1,101 @@ +/* + * Read address ranges from a Broadcom CNB20LE Host Bridge + * + * Copyright (c) 2010 Ira W. Snyder + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include + +#include "bus_numa.h" + +static void __devinit cnb20le_res(struct pci_dev *dev) +{ + struct pci_root_info *info; + struct resource res; + u16 word1, word2; + u8 fbus, lbus; + int i; + + /* + * The x86_pci_root_bus_res_quirks() function already refuses to use + * this information if ACPI _CRS was used. Therefore, we don't bother + * checking if ACPI is enabled, and just generate the information + * for both the ACPI _CRS and no ACPI cases. + */ + + info = &pci_root_info[pci_root_num]; + pci_root_num++; + + /* read the PCI bus numbers */ + pci_read_config_byte(dev, 0x44, &fbus); + pci_read_config_byte(dev, 0x45, &lbus); + info->bus_min = fbus; + info->bus_max = lbus; + + /* + * Add the legacy IDE ports on bus 0 + * + * These do not exist anywhere in the bridge registers, AFAICT. I do + * not have the datasheet, so this is the best I can do. + */ + if (fbus == 0) { + update_res(info, 0x01f0, 0x01f7, IORESOURCE_IO, 0); + update_res(info, 0x03f6, 0x03f6, IORESOURCE_IO, 0); + update_res(info, 0x0170, 0x0177, IORESOURCE_IO, 0); + update_res(info, 0x0376, 0x0376, IORESOURCE_IO, 0); + update_res(info, 0xffa0, 0xffaf, IORESOURCE_IO, 0); + } + + /* read the non-prefetchable memory window */ + pci_read_config_word(dev, 0xc0, &word1); + pci_read_config_word(dev, 0xc2, &word2); + if (word1 != word2) { + res.start = (word1 << 16) | 0x0000; + res.end = (word2 << 16) | 0xffff; + res.flags = IORESOURCE_MEM; + update_res(info, res.start, res.end, res.flags, 0); + } + + /* read the prefetchable memory window */ + pci_read_config_word(dev, 0xc4, &word1); + pci_read_config_word(dev, 0xc6, &word2); + if (word1 != word2) { + res.start = (word1 << 16) | 0x0000; + res.end = (word2 << 16) | 0xffff; + res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; + update_res(info, res.start, res.end, res.flags, 0); + } + + /* read the IO port window */ + pci_read_config_word(dev, 0xd0, &word1); + pci_read_config_word(dev, 0xd2, &word2); + if (word1 != word2) { + res.start = word1; + res.end = word2; + res.flags = IORESOURCE_IO; + update_res(info, res.start, res.end, res.flags, 0); + } + + /* print information about this host bridge */ + res.start = fbus; + res.end = lbus; + res.flags = IORESOURCE_BUS; + dev_info(&dev->dev, "CNB20LE PCI Host Bridge (domain %04x %pR)\n", + pci_domain_nr(dev->bus), &res); + + for (i = 0; i < info->res_num; i++) + dev_info(&dev->dev, "host bridge window %pR\n", &info->res[i]); +} + +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, + cnb20le_res); + -- cgit v1.2.3 From ac81860ea073daed50246af54db706c6e491f240 Mon Sep 17 00:00:00 2001 From: Praveen Kalamegham Date: Wed, 19 May 2010 17:03:12 -0500 Subject: PCI: hotplug: pciehp: Removed check for hotplug of display devices Removed check to prevent hotplug of display devices within pciehp. Originally this was thought to have been required within the PCI Hotplug specification for some legacy devices. However there is no such requirement in the most recent revision. The check prevents hotplug of not only display devices but also computational GPUs which require serviceability. Signed-off-by: Praveen Kalamegham Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_pci.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c index 0a16444c14c9..2fce726758d2 100644 --- a/drivers/pci/hotplug/pciehp_pci.c +++ b/drivers/pci/hotplug/pciehp_pci.c @@ -84,12 +84,6 @@ int pciehp_configure_device(struct slot *p_slot) dev = pci_get_slot(parent, PCI_DEVFN(0, fn)); if (!dev) continue; - if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - ctrl_err(ctrl, "Cannot hot-add display device %s\n", - pci_name(dev)); - pci_dev_put(dev); - continue; - } if ((dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) || (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) { pciehp_add_bridge(dev); @@ -133,15 +127,9 @@ int pciehp_unconfigure_device(struct slot *p_slot) presence = 0; for (j = 0; j < 8; j++) { - struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j)); + struct pci_dev *temp = pci_get_slot(parent, PCI_DEVFN(0, j)); if (!temp) continue; - if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - ctrl_err(ctrl, "Cannot remove display device %s\n", - pci_name(temp)); - pci_dev_put(temp); - continue; - } if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) { pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl); if (bctl & PCI_BRIDGE_CTL_VGA) { @@ -149,7 +137,8 @@ int pciehp_unconfigure_device(struct slot *p_slot) "Cannot remove display device %s\n", pci_name(temp)); pci_dev_put(temp); - continue; + rc = EINVAL; + break; } } pci_remove_bus_device(temp); -- cgit v1.2.3