summaryrefslogtreecommitdiff
path: root/drivers/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/dmar.c46
-rw-r--r--drivers/pci/hotplug/acpiphp.h2
-rw-r--r--drivers/pci/hotplug/acpiphp_core.c2
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c4
-rw-r--r--drivers/pci/hotplug/cpqphp_core.c2
-rw-r--r--drivers/pci/hotplug/cpqphp_pci.c2
-rw-r--r--drivers/pci/hotplug/ibmphp_core.c7
-rw-r--r--drivers/pci/hotplug/pciehp_core.c23
-rw-r--r--drivers/pci/hotplug/rpadlpar_core.c69
-rw-r--r--drivers/pci/intel-iommu.c944
-rw-r--r--drivers/pci/intr_remapping.c77
-rw-r--r--drivers/pci/msi.c55
-rw-r--r--drivers/pci/pci-sysfs.c4
-rw-r--r--drivers/pci/pci.c2
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c2
-rw-r--r--drivers/pci/pcie/aspm.c29
-rw-r--r--drivers/pci/probe.c4
-rw-r--r--drivers/pci/quirks.c187
-rw-r--r--drivers/pci/slot.c1
19 files changed, 1207 insertions, 255 deletions
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 691b3adeb870..f5a662a50acb 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -191,26 +191,17 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
{
struct acpi_dmar_hardware_unit *drhd;
- static int include_all;
int ret = 0;
drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
- if (!dmaru->include_all)
- ret = dmar_parse_dev_scope((void *)(drhd + 1),
+ if (dmaru->include_all)
+ return 0;
+
+ ret = dmar_parse_dev_scope((void *)(drhd + 1),
((void *)drhd) + drhd->header.length,
&dmaru->devices_cnt, &dmaru->devices,
drhd->segment);
- else {
- /* Only allow one INCLUDE_ALL */
- if (include_all) {
- printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
- "device scope is allowed\n");
- ret = -EINVAL;
- }
- include_all = 1;
- }
-
if (ret) {
list_del(&dmaru->list);
kfree(dmaru);
@@ -384,12 +375,21 @@ int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
struct dmar_drhd_unit *
dmar_find_matched_drhd_unit(struct pci_dev *dev)
{
- struct dmar_drhd_unit *drhd = NULL;
+ struct dmar_drhd_unit *dmaru = NULL;
+ struct acpi_dmar_hardware_unit *drhd;
- list_for_each_entry(drhd, &dmar_drhd_units, list) {
- if (drhd->include_all || dmar_pci_device_match(drhd->devices,
- drhd->devices_cnt, dev))
- return drhd;
+ list_for_each_entry(dmaru, &dmar_drhd_units, list) {
+ drhd = container_of(dmaru->hdr,
+ struct acpi_dmar_hardware_unit,
+ header);
+
+ if (dmaru->include_all &&
+ drhd->segment == pci_domain_nr(dev->bus))
+ return dmaru;
+
+ if (dmar_pci_device_match(dmaru->devices,
+ dmaru->devices_cnt, dev))
+ return dmaru;
}
return NULL;
@@ -491,6 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
int map_size;
u32 ver;
static int iommu_allocated = 0;
+ int agaw;
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -506,6 +507,15 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+ agaw = iommu_calculate_agaw(iommu);
+ if (agaw < 0) {
+ printk(KERN_ERR
+ "Cannot get a valid agaw for iommu (seq_id = %d)\n",
+ iommu->seq_id);
+ goto error;
+ }
+ iommu->agaw = agaw;
+
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index f9e244da30ae..9bcb6cbd5aa9 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -113,7 +113,7 @@ struct acpiphp_slot {
u8 device; /* pci device# */
- u32 sun; /* ACPI _SUN (slot unique number) */
+ unsigned long long sun; /* ACPI _SUN (slot unique number) */
u32 flags; /* see below */
};
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index 95b536a23d25..43c10bd261b4 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -337,7 +337,7 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
slot->hotplug_slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
acpiphp_slot->slot = slot;
- snprintf(name, SLOT_NAME_SIZE, "%u", slot->acpi_slot->sun);
+ snprintf(name, SLOT_NAME_SIZE, "%llu", slot->acpi_slot->sun);
retval = pci_hp_register(slot->hotplug_slot,
acpiphp_slot->bridge->pci_bus,
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 955aae4071f7..3affc6472e65 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -255,13 +255,13 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
bridge->nr_slots++;
- dbg("found ACPI PCI Hotplug slot %d at PCI %04x:%02x:%02x\n",
+ dbg("found ACPI PCI Hotplug slot %llu at PCI %04x:%02x:%02x\n",
slot->sun, pci_domain_nr(bridge->pci_bus),
bridge->pci_bus->number, slot->device);
retval = acpiphp_register_hotplug_slot(slot);
if (retval) {
if (retval == -EBUSY)
- warn("Slot %d already registered by another "
+ warn("Slot %llu already registered by another "
"hotplug driver\n", slot->sun);
else
warn("acpiphp_register_hotplug_slot failed "
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 8514c3a1746a..c2e1bcbb28a7 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -45,7 +45,7 @@
#include "cpqphp.h"
#include "cpqphp_nvram.h"
-#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */
+#include <asm/pci_x86.h>
/* Global variables */
diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c
index 09021930589f..df146be9d2e9 100644
--- a/drivers/pci/hotplug/cpqphp_pci.c
+++ b/drivers/pci/hotplug/cpqphp_pci.c
@@ -37,7 +37,7 @@
#include "../pci.h"
#include "cpqphp.h"
#include "cpqphp_nvram.h"
-#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */
+#include <asm/pci_x86.h>
u8 cpqhp_nic_irq;
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index c892daae74d6..dd18f857dfb0 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -35,7 +35,7 @@
#include <linux/delay.h>
#include <linux/wait.h>
#include "../pci.h"
-#include "../../../arch/x86/pci/pci.h" /* for struct irq_routing_table */
+#include <asm/pci_x86.h> /* for struct irq_routing_table */
#include "ibmphp.h"
#define attn_on(sl) ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON)
@@ -1402,10 +1402,6 @@ static int __init ibmphp_init(void)
goto error;
}
- /* lock ourselves into memory with a module
- * count of -1 so that no one can unload us. */
- module_put(THIS_MODULE);
-
exit:
return rc;
@@ -1423,4 +1419,3 @@ static void __exit ibmphp_exit(void)
}
module_init(ibmphp_init);
-module_exit(ibmphp_exit);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 4b23bc39b11e..39cf248d24e3 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -432,18 +432,19 @@ static int pciehp_probe(struct pcie_device *dev, const struct pcie_port_service_
goto err_out_release_ctlr;
}
+ /* Check if slot is occupied */
t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-
- t_slot->hpc_ops->get_adapter_status(t_slot, &value); /* Check if slot is occupied */
- if (value && pciehp_force) {
- rc = pciehp_enable_slot(t_slot);
- if (rc) /* -ENODEV: shouldn't happen, but deal with it */
- value = 0;
- }
- if ((POWER_CTRL(ctrl)) && !value) {
- rc = t_slot->hpc_ops->power_off_slot(t_slot); /* Power off slot if not occupied*/
- if (rc)
- goto err_out_free_ctrl_slot;
+ t_slot->hpc_ops->get_adapter_status(t_slot, &value);
+ if (value) {
+ if (pciehp_force)
+ pciehp_enable_slot(t_slot);
+ } else {
+ /* Power off slot if not occupied */
+ if (POWER_CTRL(ctrl)) {
+ rc = t_slot->hpc_ops->power_off_slot(t_slot);
+ if (rc)
+ goto err_out_free_ctrl_slot;
+ }
}
return 0;
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index 9c2a22fed18b..4e3e0382c16e 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -14,6 +14,9 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#undef DEBUG
+
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/string.h>
@@ -151,20 +154,20 @@ static void dlpar_pci_add_bus(struct device_node *dn)
return;
}
+ /* Scan below the new bridge */
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
of_scan_pci_bridge(dn, dev);
- pcibios_fixup_new_pci_devices(dev->subordinate);
-
- /* Claim new bus resources */
- pcibios_claim_one_bus(dev->bus);
-
/* Map IO space for child bus, which may or may not succeed */
pcibios_map_io_space(dev->subordinate);
- /* Add new devices to global lists. Register in proc, sysfs. */
- pci_bus_add_devices(phb->bus);
+ /* Finish adding it : resource allocation, adding devices, etc...
+ * Note that we need to perform the finish pass on the -parent-
+ * bus of the EADS bridge so the bridge device itself gets
+ * properly added
+ */
+ pcibios_finish_adding_to_bus(phb->bus);
}
static int dlpar_add_pci_slot(char *drc_name, struct device_node *dn)
@@ -203,27 +206,6 @@ static int dlpar_add_pci_slot(char *drc_name, struct device_node *dn)
return 0;
}
-static int dlpar_remove_root_bus(struct pci_controller *phb)
-{
- struct pci_bus *phb_bus;
- int rc;
-
- phb_bus = phb->bus;
- if (!(list_empty(&phb_bus->children) &&
- list_empty(&phb_bus->devices))) {
- return -EBUSY;
- }
-
- rc = pcibios_remove_root_bus(phb);
- if (rc)
- return -EIO;
-
- device_unregister(phb_bus->bridge);
- pci_remove_bus(phb_bus);
-
- return 0;
-}
-
static int dlpar_remove_phb(char *drc_name, struct device_node *dn)
{
struct slot *slot;
@@ -235,18 +217,15 @@ static int dlpar_remove_phb(char *drc_name, struct device_node *dn)
/* If pci slot is hotplugable, use hotplug to remove it */
slot = find_php_slot(dn);
- if (slot) {
- if (rpaphp_deregister_slot(slot)) {
- printk(KERN_ERR
- "%s: unable to remove hotplug slot %s\n",
- __func__, drc_name);
- return -EIO;
- }
+ if (slot && rpaphp_deregister_slot(slot)) {
+ printk(KERN_ERR "%s: unable to remove hotplug slot %s\n",
+ __func__, drc_name);
+ return -EIO;
}
pdn = dn->data;
BUG_ON(!pdn || !pdn->phb);
- rc = dlpar_remove_root_bus(pdn->phb);
+ rc = remove_phb_dynamic(pdn->phb);
if (rc < 0)
return rc;
@@ -378,26 +357,38 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn)
if (!bus)
return -EINVAL;
- /* If pci slot is hotplugable, use hotplug to remove it */
+ pr_debug("PCI: Removing PCI slot below EADS bridge %s\n",
+ bus->self ? pci_name(bus->self) : "<!PHB!>");
+
slot = find_php_slot(dn);
if (slot) {
+ pr_debug("PCI: Removing hotplug slot for %04x:%02x...\n",
+ pci_domain_nr(bus), bus->number);
+
if (rpaphp_deregister_slot(slot)) {
printk(KERN_ERR
"%s: unable to remove hotplug slot %s\n",
__func__, drc_name);
return -EIO;
}
- } else
- pcibios_remove_pci_devices(bus);
+ }
+
+ /* Remove all devices below slot */
+ pcibios_remove_pci_devices(bus);
+ /* Unmap PCI IO space */
if (pcibios_unmap_io_space(bus)) {
printk(KERN_ERR "%s: failed to unmap bus range\n",
__func__);
return -ERANGE;
}
+ /* Remove the EADS bridge device itself */
BUG_ON(!bus->self);
+ pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self));
+ eeh_remove_bus_device(bus->self);
pci_remove_bus_device(bus->self);
+
return 0;
}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa43ac9c..235fb7a5a8a5 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -27,7 +27,6 @@
#include <linux/slab.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
-#include <linux/sysdev.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/dmar.h>
@@ -35,6 +34,7 @@
#include <linux/mempool.h>
#include <linux/timer.h>
#include <linux/iova.h>
+#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -54,6 +54,195 @@
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+
+/* global iommu list, set NULL for ignored DMAR units */
+static struct intel_iommu **g_iommus;
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 val;
+ u64 rsvd1;
+};
+#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+ return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+ root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+ root->val |= value & VTD_PAGE_MASK;
+}
+
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+ return (struct context_entry *)
+ (root_present(root)?phys_to_virt(
+ root->val & VTD_PAGE_MASK) :
+ NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+
+static inline bool context_present(struct context_entry *context)
+{
+ return (context->lo & 1);
+}
+static inline void context_set_present(struct context_entry *context)
+{
+ context->lo |= 1;
+}
+
+static inline void context_set_fault_enable(struct context_entry *context)
+{
+ context->lo &= (((u64)-1) << 2) | 1;
+}
+
+#define CONTEXT_TT_MULTI_LEVEL 0
+
+static inline void context_set_translation_type(struct context_entry *context,
+ unsigned long value)
+{
+ context->lo &= (((u64)-1) << 4) | 3;
+ context->lo |= (value & 3) << 2;
+}
+
+static inline void context_set_address_root(struct context_entry *context,
+ unsigned long value)
+{
+ context->lo |= value & VTD_PAGE_MASK;
+}
+
+static inline void context_set_address_width(struct context_entry *context,
+ unsigned long value)
+{
+ context->hi |= value & 7;
+}
+
+static inline void context_set_domain_id(struct context_entry *context,
+ unsigned long value)
+{
+ context->hi |= (value & ((1 << 16) - 1)) << 8;
+}
+
+static inline void context_clear_entry(struct context_entry *context)
+{
+ context->lo = 0;
+ context->hi = 0;
+}
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+ u64 val;
+};
+
+static inline void dma_clear_pte(struct dma_pte *pte)
+{
+ pte->val = 0;
+}
+
+static inline void dma_set_pte_readable(struct dma_pte *pte)
+{
+ pte->val |= DMA_PTE_READ;
+}
+
+static inline void dma_set_pte_writable(struct dma_pte *pte)
+{
+ pte->val |= DMA_PTE_WRITE;
+}
+
+static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
+{
+ pte->val = (pte->val & ~3) | (prot & 3);
+}
+
+static inline u64 dma_pte_addr(struct dma_pte *pte)
+{
+ return (pte->val & VTD_PAGE_MASK);
+}
+
+static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
+{
+ pte->val |= (addr & VTD_PAGE_MASK);
+}
+
+static inline bool dma_pte_present(struct dma_pte *pte)
+{
+ return (pte->val & 3) != 0;
+}
+
+/* devices under the same p2p bridge are owned in one domain */
+#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
+
+/* domain represents a virtual machine, more than one devices
+ * across iommus may be owned in one domain, e.g. kvm guest.
+ */
+#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
+
+struct dmar_domain {
+ int id; /* domain id */
+ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
+
+ struct list_head devices; /* all devices' list */
+ struct iova_domain iovad; /* iova's that belong to this domain */
+
+ struct dma_pte *pgd; /* virtual address */
+ spinlock_t mapping_lock; /* page table lock */
+ int gaw; /* max guest address width */
+
+ /* adjusted guest address width, 0 is level 2 30-bit */
+ int agaw;
+
+ int flags; /* flags to find out type of domain */
+
+ int iommu_coherency;/* indicate coherency of iommu access */
+ int iommu_count; /* reference count of iommu */
+ spinlock_t iommu_lock; /* protect iommu set in domain */
+ u64 max_addr; /* maximum mapped address */
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+ struct list_head link; /* link to domain siblings */
+ struct list_head global; /* link to global list */
+ u8 bus; /* PCI bus numer */
+ u8 devfn; /* PCI devfn number */
+ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+ struct dmar_domain *domain; /* pointer to domain */
+};
static void flush_unmaps_timeout(unsigned long data);
@@ -88,6 +277,8 @@ static int intel_iommu_strict;
static DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
+static struct iommu_ops intel_iommu_ops;
+
static int __init intel_iommu_setup(char *str)
{
if (!str)
@@ -184,6 +375,87 @@ void free_iova_mem(struct iova *iova)
kmem_cache_free(iommu_iova_cache, iova);
}
+
+static inline int width_to_agaw(int width);
+
+/* calculate agaw for each iommu.
+ * "SAGAW" may be different across iommus, use a default agaw, and
+ * get a supported less agaw for iommus that don't support the default agaw.
+ */
+int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+ unsigned long sagaw;
+ int agaw = -1;
+
+ sagaw = cap_sagaw(iommu->cap);
+ for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ agaw >= 0; agaw--) {
+ if (test_bit(agaw, &sagaw))
+ break;
+ }
+
+ return agaw;
+}
+
+/* in native case, each domain is related to only one iommu */
+static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
+{
+ int iommu_id;
+
+ BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
+
+ iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+ if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
+ return NULL;
+
+ return g_iommus[iommu_id];
+}
+
+/* "Coherency" capability may be different across iommus */
+static void domain_update_iommu_coherency(struct dmar_domain *domain)
+{
+ int i;
+
+ domain->iommu_coherency = 1;
+
+ i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+ for (; i < g_num_of_iommus; ) {
+ if (!ecap_coherent(g_iommus[i]->ecap)) {
+ domain->iommu_coherency = 0;
+ break;
+ }
+ i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
+ }
+}
+
+static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
+{
+ struct dmar_drhd_unit *drhd = NULL;
+ int i;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+
+ for (i = 0; i < drhd->devices_cnt; i++)
+ if (drhd->devices[i]->bus->number == bus &&
+ drhd->devices[i]->devfn == devfn)
+ return drhd->iommu;
+
+ if (drhd->include_all)
+ return drhd->iommu;
+ }
+
+ return NULL;
+}
+
+static void domain_flush_cache(struct dmar_domain *domain,
+ void *addr, int size)
+{
+ if (!domain->iommu_coherency)
+ clflush_cache_range(addr, size);
+}
+
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
@@ -226,7 +498,7 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
ret = 0;
goto out;
}
- ret = context_present(context[devfn]);
+ ret = context_present(&context[devfn]);
out:
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
@@ -242,7 +514,7 @@ static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
root = &iommu->root_entry[bus];
context = get_context_addr_from_root(root);
if (context) {
- context_clear_entry(context[devfn]);
+ context_clear_entry(&context[devfn]);
__iommu_flush_cache(iommu, &context[devfn], \
sizeof(*context));
}
@@ -339,7 +611,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
if (level == 1)
break;
- if (!dma_pte_present(*pte)) {
+ if (!dma_pte_present(pte)) {
tmp_page = alloc_pgtable_page();
if (!tmp_page) {
@@ -347,18 +619,17 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
flags);
return NULL;
}
- __iommu_flush_cache(domain->iommu, tmp_page,
- PAGE_SIZE);
- dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
+ domain_flush_cache(domain, tmp_page, PAGE_SIZE);
+ dma_set_pte_addr(pte, virt_to_phys(tmp_page));
/*
* high level table always sets r/w, last level page
* table control read/write
*/
- dma_set_pte_readable(*pte);
- dma_set_pte_writable(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ dma_set_pte_readable(pte);
+ dma_set_pte_writable(pte);
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
- parent = phys_to_virt(dma_pte_addr(*pte));
+ parent = phys_to_virt(dma_pte_addr(pte));
level--;
}
@@ -381,9 +652,9 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
if (level == total)
return pte;
- if (!dma_pte_present(*pte))
+ if (!dma_pte_present(pte))
break;
- parent = phys_to_virt(dma_pte_addr(*pte));
+ parent = phys_to_virt(dma_pte_addr(pte));
total--;
}
return NULL;
@@ -398,8 +669,8 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
pte = dma_addr_level_pte(domain, addr, 1);
if (pte) {
- dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ dma_clear_pte(pte);
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
}
@@ -445,10 +716,9 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
pte = dma_addr_level_pte(domain, tmp, level);
if (pte) {
free_pgtable_page(
- phys_to_virt(dma_pte_addr(*pte)));
- dma_clear_pte(*pte);
- __iommu_flush_cache(domain->iommu,
- pte, sizeof(*pte));
+ phys_to_virt(dma_pte_addr(pte)));
+ dma_clear_pte(pte);
+ domain_flush_cache(domain, pte, sizeof(*pte));
}
tmp += level_size(level);
}
@@ -950,17 +1220,28 @@ static int iommu_init_domains(struct intel_iommu *iommu)
static void domain_exit(struct dmar_domain *domain);
+static void vm_domain_exit(struct dmar_domain *domain);
void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
int i;
+ unsigned long flags;
i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
- domain_exit(domain);
+
+ spin_lock_irqsave(&domain->iommu_lock, flags);
+ if (--domain->iommu_count == 0) {
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ vm_domain_exit(domain);
+ else
+ domain_exit(domain);
+ }
+ spin_unlock_irqrestore(&domain->iommu_lock, flags);
+
i = find_next_bit(iommu->domain_ids,
cap_ndoms(iommu->cap), i+1);
}
@@ -978,6 +1259,17 @@ void free_dmar_iommu(struct intel_iommu *iommu)
kfree(iommu->domains);
kfree(iommu->domain_ids);
+ g_iommus[iommu->seq_id] = NULL;
+
+ /* if all iommus are freed, free g_iommus */
+ for (i = 0; i < g_num_of_iommus; i++) {
+ if (g_iommus[i])
+ break;
+ }
+
+ if (i == g_num_of_iommus)
+ kfree(g_iommus);
+
/* free context mapping */
free_context_table(iommu);
}
@@ -1006,7 +1298,9 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
set_bit(num, iommu->domain_ids);
domain->id = num;
- domain->iommu = iommu;
+ memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+ set_bit(iommu->seq_id, &domain->iommu_bmp);
+ domain->flags = 0;
iommu->domains[num] = domain;
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1016,10 +1310,13 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
static void iommu_free_domain(struct dmar_domain *domain)
{
unsigned long flags;
+ struct intel_iommu *iommu;
+
+ iommu = domain_get_iommu(domain);
- spin_lock_irqsave(&domain->iommu->lock, flags);
- clear_bit(domain->id, domain->iommu->domain_ids);
- spin_unlock_irqrestore(&domain->iommu->lock, flags);
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(domain->id, iommu->domain_ids);
+ spin_unlock_irqrestore(&iommu->lock, flags);
}
static struct iova_domain reserved_iova_list;
@@ -1094,11 +1391,12 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
spin_lock_init(&domain->mapping_lock);
+ spin_lock_init(&domain->iommu_lock);
domain_reserve_special_ranges(domain);
/* calculate AGAW */
- iommu = domain->iommu;
+ iommu = domain_get_iommu(domain);
if (guest_width > cap_mgaw(iommu->cap))
guest_width = cap_mgaw(iommu->cap);
domain->gaw = guest_width;
@@ -1115,6 +1413,13 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain->agaw = agaw;
INIT_LIST_HEAD(&domain->devices);
+ if (ecap_coherent(iommu->ecap))
+ domain->iommu_coherency = 1;
+ else
+ domain->iommu_coherency = 0;
+
+ domain->iommu_count = 1;
+
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page();
if (!domain->pgd)
@@ -1151,28 +1456,82 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
u8 bus, u8 devfn)
{
struct context_entry *context;
- struct intel_iommu *iommu = domain->iommu;
unsigned long flags;
+ struct intel_iommu *iommu;
+ struct dma_pte *pgd;
+ unsigned long num;
+ unsigned long ndomains;
+ int id;
+ int agaw;
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
BUG_ON(!domain->pgd);
+
+ iommu = device_to_iommu(bus, devfn);
+ if (!iommu)
+ return -ENODEV;
+
context = device_to_context_entry(iommu, bus, devfn);
if (!context)
return -ENOMEM;
spin_lock_irqsave(&iommu->lock, flags);
- if (context_present(*context)) {
+ if (context_present(context)) {
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
- context_set_domain_id(*context, domain->id);
- context_set_address_width(*context, domain->agaw);
- context_set_address_root(*context, virt_to_phys(domain->pgd));
- context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
- context_set_fault_enable(*context);
- context_set_present(*context);
- __iommu_flush_cache(iommu, context, sizeof(*context));
+ id = domain->id;
+ pgd = domain->pgd;
+
+ if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+ int found = 0;
+
+ /* find an available domain id for this device in iommu */
+ ndomains = cap_ndoms(iommu->cap);
+ num = find_first_bit(iommu->domain_ids, ndomains);
+ for (; num < ndomains; ) {
+ if (iommu->domains[num] == domain) {
+ id = num;
+ found = 1;
+ break;
+ }
+ num = find_next_bit(iommu->domain_ids,
+ cap_ndoms(iommu->cap), num+1);
+ }
+
+ if (found == 0) {
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num >= ndomains) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ printk(KERN_ERR "IOMMU: no free domain ids\n");
+ return -EFAULT;
+ }
+
+ set_bit(num, iommu->domain_ids);
+ iommu->domains[num] = domain;
+ id = num;
+ }
+
+ /* Skip top levels of page tables for
+ * iommu which has less agaw than default.
+ */
+ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ context_set_domain_id(context, id);
+ context_set_address_width(context, iommu->agaw);
+ context_set_address_root(context, virt_to_phys(pgd));
+ context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
+ context_set_fault_enable(context);
+ context_set_present(context);
+ domain_flush_cache(domain, context, sizeof(*context));
/* it's a non-present to present mapping */
if (iommu->flush.flush_context(iommu, domain->id,
@@ -1183,6 +1542,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
spin_unlock_irqrestore(&iommu->lock, flags);
+
+ spin_lock_irqsave(&domain->iommu_lock, flags);
+ if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
+ domain->iommu_count++;
+ domain_update_iommu_coherency(domain);
+ }
+ spin_unlock_irqrestore(&domain->iommu_lock, flags);
return 0;
}
@@ -1218,13 +1584,17 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
tmp->bus->number, tmp->devfn);
}
-static int domain_context_mapped(struct dmar_domain *domain,
- struct pci_dev *pdev)
+static int domain_context_mapped(struct pci_dev *pdev)
{
int ret;
struct pci_dev *tmp, *parent;
+ struct intel_iommu *iommu;
+
+ iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return -ENODEV;
- ret = device_context_mapped(domain->iommu,
+ ret = device_context_mapped(iommu,
pdev->bus->number, pdev->devfn);
if (!ret)
return ret;
@@ -1235,17 +1605,17 @@ static int domain_context_mapped(struct dmar_domain *domain,
/* Secondary interface's bus number and devfn 0 */
parent = pdev->bus->self;
while (parent != tmp) {
- ret = device_context_mapped(domain->iommu, parent->bus->number,
+ ret = device_context_mapped(iommu, parent->bus->number,
parent->devfn);
if (!ret)
return ret;
parent = parent->bus->self;
}
if (tmp->is_pcie)
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->subordinate->number, 0);
else
- return device_context_mapped(domain->iommu,
+ return device_context_mapped(iommu,
tmp->bus->number, tmp->devfn);
}
@@ -1273,22 +1643,25 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
/* We don't need lock here, nobody else
* touches the iova range
*/
- BUG_ON(dma_pte_addr(*pte));
- dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
- dma_set_pte_prot(*pte, prot);
- __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
+ BUG_ON(dma_pte_addr(pte));
+ dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
+ dma_set_pte_prot(pte, prot);
+ domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
}
return 0;
}
-static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
- clear_context_table(domain->iommu, bus, devfn);
- domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
+ if (!iommu)
+ return;
+
+ clear_context_table(iommu, bus, devfn);
+ iommu->flush.flush_context(iommu, 0, 0, 0,
DMA_CCMD_GLOBAL_INVL, 0);
- domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH, 0);
}
@@ -1296,6 +1669,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
{
struct device_domain_info *info;
unsigned long flags;
+ struct intel_iommu *iommu;
spin_lock_irqsave(&device_domain_lock, flags);
while (!list_empty(&domain->devices)) {
@@ -1307,7 +1681,8 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
info->dev->dev.archdata.iommu = NULL;
spin_unlock_irqrestore(&device_domain_lock, flags);
- detach_domain_for_dev(info->domain, info->bus, info->devfn);
+ iommu = device_to_iommu(info->bus, info->devfn);
+ iommu_detach_dev(iommu, info->bus, info->devfn);
free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags);
@@ -1400,7 +1775,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
info->dev = NULL;
info->domain = domain;
/* This domain is shared by devices under p2p bridge */
- domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
+ domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
/* pcie-to-pci bridge already has a domain, uses it */
found = NULL;
@@ -1563,6 +1938,11 @@ static void __init iommu_prepare_gfx_mapping(void)
printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
}
}
+#else /* !CONFIG_DMAR_GFX_WA */
+static inline void iommu_prepare_gfx_mapping(void)
+{
+ return;
+}
#endif
#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -1590,7 +1970,7 @@ static inline void iommu_prepare_isa(void)
}
#endif /* !CONFIG_DMAR_FLPY_WA */
-int __init init_dmars(void)
+static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
struct dmar_rmrr_unit *rmrr;
@@ -1613,9 +1993,18 @@ int __init init_dmars(void)
*/
}
+ g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
+ GFP_KERNEL);
+ if (!g_iommus) {
+ printk(KERN_ERR "Allocating global iommu array failed\n");
+ ret = -ENOMEM;
+ goto error;
+ }
+
deferred_flush = kzalloc(g_num_of_iommus *
sizeof(struct deferred_flush_tables), GFP_KERNEL);
if (!deferred_flush) {
+ kfree(g_iommus);
ret = -ENOMEM;
goto error;
}
@@ -1625,6 +2014,7 @@ int __init init_dmars(void)
continue;
iommu = drhd->iommu;
+ g_iommus[iommu->seq_id] = iommu;
ret = iommu_init_domains(iommu);
if (ret)
@@ -1737,6 +2127,7 @@ error:
iommu = drhd->iommu;
free_iommu(iommu);
}
+ kfree(g_iommus);
return ret;
}
@@ -1805,7 +2196,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
}
/* make sure context mapping is ok */
- if (unlikely(!domain_context_mapped(domain, pdev))) {
+ if (unlikely(!domain_context_mapped(pdev))) {
ret = domain_context_mapping(domain, pdev);
if (ret) {
printk(KERN_ERR
@@ -1827,6 +2218,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
struct iova *iova;
int prot = 0;
int ret;
+ struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -1836,6 +2228,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
if (!domain)
return 0;
+ iommu = domain_get_iommu(domain);
size = aligned_size((u64)paddr, size);
iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
@@ -1849,7 +2242,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -1865,10 +2258,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
goto error;
/* it's a non-present to present mapping */
- ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ ret = iommu_flush_iotlb_psi(iommu, domain->id,
start_paddr, size >> VTD_PAGE_SHIFT, 1);
if (ret)
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -1895,10 +2288,11 @@ static void flush_unmaps(void)
/* just flush them all */
for (i = 0; i < g_num_of_iommus; i++) {
- if (deferred_flush[i].next) {
- struct intel_iommu *iommu =
- deferred_flush[i].domain[0]->iommu;
+ struct intel_iommu *iommu = g_iommus[i];
+ if (!iommu)
+ continue;
+ if (deferred_flush[i].next) {
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH, 0);
for (j = 0; j < deferred_flush[i].next; j++) {
@@ -1925,12 +2319,14 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
{
unsigned long flags;
int next, iommu_id;
+ struct intel_iommu *iommu;
spin_lock_irqsave(&async_umap_flush_lock, flags);
if (list_size == HIGH_WATER_MARK)
flush_unmaps();
- iommu_id = dom->iommu->seq_id;
+ iommu = domain_get_iommu(dom);
+ iommu_id = iommu->seq_id;
next = deferred_flush[iommu_id].next;
deferred_flush[iommu_id].domain[next] = dom;
@@ -1952,12 +2348,15 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
struct dmar_domain *domain;
unsigned long start_addr;
struct iova *iova;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
BUG_ON(!domain);
+ iommu = domain_get_iommu(domain);
+
iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
if (!iova)
return;
@@ -1973,9 +2372,9 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
if (intel_iommu_strict) {
- if (iommu_flush_iotlb_psi(domain->iommu,
+ if (iommu_flush_iotlb_psi(iommu,
domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
} else {
@@ -2036,11 +2435,15 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
size_t size = 0;
void *addr;
struct scatterlist *sg;
+ struct intel_iommu *iommu;
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return;
domain = find_domain(pdev);
+ BUG_ON(!domain);
+
+ iommu = domain_get_iommu(domain);
iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
if (!iova)
@@ -2057,9 +2460,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
/* free page tables */
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
+ if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
size >> VTD_PAGE_SHIFT, 0))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
/* free iova */
__free_iova(&domain->iovad, iova);
@@ -2093,6 +2496,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
int ret;
struct scatterlist *sg;
unsigned long start_addr;
+ struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
@@ -2102,6 +2506,8 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
if (!domain)
return 0;
+ iommu = domain_get_iommu(domain);
+
for_each_sg(sglist, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg);
addr = (void *)virt_to_phys(addr);
@@ -2119,7 +2525,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
* mappings..
*/
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
- !cap_zlr(domain->iommu->cap))
+ !cap_zlr(iommu->cap))
prot |= DMA_PTE_READ;
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
prot |= DMA_PTE_WRITE;
@@ -2151,9 +2557,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
}
/* it's a non-present to present mapping */
- if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
+ if (iommu_flush_iotlb_psi(iommu, domain->id,
start_addr, offset >> VTD_PAGE_SHIFT, 1))
- iommu_flush_write_buffer(domain->iommu);
+ iommu_flush_write_buffer(iommu);
return nelems;
}
@@ -2325,10 +2731,220 @@ int __init intel_iommu_init(void)
init_timer(&unmap_timer);
force_iommu = 1;
dma_ops = &intel_dma_ops;
+
+ register_iommu(&intel_iommu_ops);
+
+ return 0;
+}
+
+static int vm_domain_add_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ unsigned long flags;
+
+ info = alloc_devinfo_mem();
+ if (!info)
+ return -ENOMEM;
+
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->dev = pdev;
+ info->domain = domain;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_add(&info->link, &domain->devices);
+ list_add(&info->global, &device_domain_list);
+ pdev->dev.archdata.iommu = info;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return 0;
+}
+
+static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
+ struct pci_dev *pdev)
+{
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ int found = 0;
+ struct list_head *entry, *tmp;
+
+ iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ list_for_each_safe(entry, tmp, &domain->devices) {
+ info = list_entry(entry, struct device_domain_info, link);
+ if (info->bus == pdev->bus->number &&
+ info->devfn == pdev->devfn) {
+ list_del(&info->link);
+ list_del(&info->global);
+ if (info->dev)
+ info->dev->dev.archdata.iommu = NULL;
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ iommu_detach_dev(iommu, info->bus, info->devfn);
+ free_devinfo_mem(info);
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+
+ if (found)
+ break;
+ else
+ continue;
+ }
+
+ /* if there is no other devices under the same iommu
+ * owned by this domain, clear this iommu in iommu_bmp
+ * update iommu count and coherency
+ */
+ if (device_to_iommu(info->bus, info->devfn) == iommu)
+ found = 1;
+ }
+
+ if (found == 0) {
+ unsigned long tmp_flags;
+ spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
+ clear_bit(iommu->seq_id, &domain->iommu_bmp);
+ domain->iommu_count--;
+ domain_update_iommu_coherency(domain);
+ spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
+ }
+
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
+{
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags1, flags2;
+
+ spin_lock_irqsave(&device_domain_lock, flags1);
+ while (!list_empty(&domain->devices)) {
+ info = list_entry(domain->devices.next,
+ struct device_domain_info, link);
+ list_del(&info->link);
+ list_del(&info->global);
+ if (info->dev)
+ info->dev->dev.archdata.iommu = NULL;
+
+ spin_unlock_irqrestore(&device_domain_lock, flags1);
+
+ iommu = device_to_iommu(info->bus, info->devfn);
+ iommu_detach_dev(iommu, info->bus, info->devfn);
+
+ /* clear this iommu in iommu_bmp, update iommu count
+ * and coherency
+ */
+ spin_lock_irqsave(&domain->iommu_lock, flags2);
+ if (test_and_clear_bit(iommu->seq_id,
+ &domain->iommu_bmp)) {
+ domain->iommu_count--;
+ domain_update_iommu_coherency(domain);
+ }
+ spin_unlock_irqrestore(&domain->iommu_lock, flags2);
+
+ free_devinfo_mem(info);
+ spin_lock_irqsave(&device_domain_lock, flags1);
+ }
+ spin_unlock_irqrestore(&device_domain_lock, flags1);
+}
+
+/* domain id for virtual machine, it won't be set in context */
+static unsigned long vm_domid;
+
+static int vm_domain_min_agaw(struct dmar_domain *domain)
+{
+ int i;
+ int min_agaw = domain->agaw;
+
+ i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+ for (; i < g_num_of_iommus; ) {
+ if (min_agaw > g_iommus[i]->agaw)
+ min_agaw = g_iommus[i]->agaw;
+
+ i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
+ }
+
+ return min_agaw;
+}
+
+static struct dmar_domain *iommu_alloc_vm_domain(void)
+{
+ struct dmar_domain *domain;
+
+ domain = alloc_domain_mem();
+ if (!domain)
+ return NULL;
+
+ domain->id = vm_domid++;
+ memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
+ domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
+
+ return domain;
+}
+
+static int vm_domain_init(struct dmar_domain *domain, int guest_width)
+{
+ int adjust_width;
+
+ init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
+ spin_lock_init(&domain->mapping_lock);
+ spin_lock_init(&domain->iommu_lock);
+
+ domain_reserve_special_ranges(domain);
+
+ /* calculate AGAW */
+ domain->gaw = guest_width;
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ domain->agaw = width_to_agaw(adjust_width);
+
+ INIT_LIST_HEAD(&domain->devices);
+
+ domain->iommu_count = 0;
+ domain->iommu_coherency = 0;
+ domain->max_addr = 0;
+
+ /* always allocate the top pgd */
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+ if (!domain->pgd)
+ return -ENOMEM;
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
-void intel_iommu_domain_exit(struct dmar_domain *domain)
+static void iommu_free_vm_domain(struct dmar_domain *domain)
+{
+ unsigned long flags;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long i;
+ unsigned long ndomains;
+
+ for_each_drhd_unit(drhd) {
+ if (drhd->ignored)
+ continue;
+ iommu = drhd->iommu;
+
+ ndomains = cap_ndoms(iommu->cap);
+ i = find_first_bit(iommu->domain_ids, ndomains);
+ for (; i < ndomains; ) {
+ if (iommu->domains[i] == domain) {
+ spin_lock_irqsave(&iommu->lock, flags);
+ clear_bit(i, iommu->domain_ids);
+ iommu->domains[i] = NULL;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ break;
+ }
+ i = find_next_bit(iommu->domain_ids, ndomains, i+1);
+ }
+ }
+}
+
+static void vm_domain_exit(struct dmar_domain *domain)
{
u64 end;
@@ -2336,6 +2952,9 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
if (!domain)
return;
+ vm_domain_remove_all_dev_info(domain);
+ /* destroy iovas */
+ put_iova_domain(&domain->iovad);
end = DOMAIN_MAX_ADDR(domain->gaw);
end = end & (~VTD_PAGE_MASK);
@@ -2345,94 +2964,167 @@ void intel_iommu_domain_exit(struct dmar_domain *domain)
/* free page tables */
dma_pte_free_pagetable(domain, 0, end);
- iommu_free_domain(domain);
+ iommu_free_vm_domain(domain);
free_domain_mem(domain);
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+static int intel_iommu_domain_init(struct iommu_domain *domain)
{
- struct dmar_drhd_unit *drhd;
- struct dmar_domain *domain;
- struct intel_iommu *iommu;
-
- drhd = dmar_find_matched_drhd_unit(pdev);
- if (!drhd) {
- printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
- return NULL;
- }
+ struct dmar_domain *dmar_domain;
- iommu = drhd->iommu;
- if (!iommu) {
- printk(KERN_ERR
- "intel_iommu_domain_alloc: iommu == NULL\n");
- return NULL;
- }
- domain = iommu_alloc_domain(iommu);
- if (!domain) {
+ dmar_domain = iommu_alloc_vm_domain();
+ if (!dmar_domain) {
printk(KERN_ERR
- "intel_iommu_domain_alloc: domain == NULL\n");
- return NULL;
+ "intel_iommu_domain_init: dmar_domain == NULL\n");
+ return -ENOMEM;
}
- if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
printk(KERN_ERR
- "intel_iommu_domain_alloc: domain_init() failed\n");
- intel_iommu_domain_exit(domain);
- return NULL;
+ "intel_iommu_domain_init() failed\n");
+ vm_domain_exit(dmar_domain);
+ return -ENOMEM;
}
- return domain;
+ domain->priv = dmar_domain;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
-int intel_iommu_context_mapping(
- struct dmar_domain *domain, struct pci_dev *pdev)
+static void intel_iommu_domain_destroy(struct iommu_domain *domain)
{
- int rc;
- rc = domain_context_mapping(domain, pdev);
- return rc;
+ struct dmar_domain *dmar_domain = domain->priv;
+
+ domain->priv = NULL;
+ vm_domain_exit(dmar_domain);
}
-EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
-int intel_iommu_page_mapping(
- struct dmar_domain *domain, dma_addr_t iova,
- u64 hpa, size_t size, int prot)
+static int intel_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
- int rc;
- rc = domain_page_mapping(domain, iova, hpa, size, prot);
- return rc;
+ struct dmar_domain *dmar_domain = domain->priv;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct intel_iommu *iommu;
+ int addr_width;
+ u64 end;
+ int ret;
+
+ /* normally pdev is not mapped */
+ if (unlikely(domain_context_mapped(pdev))) {
+ struct dmar_domain *old_domain;
+
+ old_domain = find_domain(pdev);
+ if (old_domain) {
+ if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+ vm_domain_remove_one_dev_info(old_domain, pdev);
+ else
+ domain_remove_dev_info(old_domain);
+ }
+ }
+
+ iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ /* check if this iommu agaw is sufficient for max mapped address */
+ addr_width = agaw_to_width(iommu->agaw);
+ end = DOMAIN_MAX_ADDR(addr_width);
+ end = end & VTD_PAGE_MASK;
+ if (end < dmar_domain->max_addr) {
+ printk(KERN_ERR "%s: iommu agaw (%d) is not "
+ "sufficient for the mapped address (%llx)\n",
+ __func__, iommu->agaw, dmar_domain->max_addr);
+ return -EFAULT;
+ }
+
+ ret = domain_context_mapping(dmar_domain, pdev);
+ if (ret)
+ return ret;
+
+ ret = vm_domain_add_dev_info(dmar_domain, pdev);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+static void intel_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
{
- detach_domain_for_dev(domain, bus, devfn);
+ struct dmar_domain *dmar_domain = domain->priv;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ vm_domain_remove_one_dev_info(dmar_domain, pdev);
}
-EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
-struct dmar_domain *
-intel_iommu_find_domain(struct pci_dev *pdev)
+static int intel_iommu_map_range(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t hpa,
+ size_t size, int iommu_prot)
{
- return find_domain(pdev);
+ struct dmar_domain *dmar_domain = domain->priv;
+ u64 max_addr;
+ int addr_width;
+ int prot = 0;
+ int ret;
+
+ if (iommu_prot & IOMMU_READ)
+ prot |= DMA_PTE_READ;
+ if (iommu_prot & IOMMU_WRITE)
+ prot |= DMA_PTE_WRITE;
+
+ max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
+ if (dmar_domain->max_addr < max_addr) {
+ int min_agaw;
+ u64 end;
+
+ /* check if minimum agaw is sufficient for mapped address */
+ min_agaw = vm_domain_min_agaw(dmar_domain);
+ addr_width = agaw_to_width(min_agaw);
+ end = DOMAIN_MAX_ADDR(addr_width);
+ end = end & VTD_PAGE_MASK;
+ if (end < max_addr) {
+ printk(KERN_ERR "%s: iommu agaw (%d) is not "
+ "sufficient for the mapped address (%llx)\n",
+ __func__, min_agaw, max_addr);
+ return -EFAULT;
+ }
+ dmar_domain->max_addr = max_addr;
+ }
+
+ ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
+ return ret;
}
-EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
-int intel_iommu_found(void)
+static void intel_iommu_unmap_range(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
- return g_num_of_iommus;
+ struct dmar_domain *dmar_domain = domain->priv;
+ dma_addr_t base;
+
+ /* The address might not be aligned */
+ base = iova & VTD_PAGE_MASK;
+ size = VTD_PAGE_ALIGN(size);
+ dma_pte_clear_range(dmar_domain, base, base + size);
+
+ if (dmar_domain->max_addr == base + size)
+ dmar_domain->max_addr = base;
}
-EXPORT_SYMBOL_GPL(intel_iommu_found);
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova)
{
+ struct dmar_domain *dmar_domain = domain->priv;
struct dma_pte *pte;
- u64 pfn;
-
- pfn = 0;
- pte = addr_to_dma_pte(domain, iova);
+ u64 phys = 0;
+ pte = addr_to_dma_pte(dmar_domain, iova);
if (pte)
- pfn = dma_pte_addr(*pte);
+ phys = dma_pte_addr(pte);
- return pfn >> VTD_PAGE_SHIFT;
+ return phys;
}
-EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
+
+static struct iommu_ops intel_iommu_ops = {
+ .domain_init = intel_iommu_domain_init,
+ .domain_destroy = intel_iommu_domain_destroy,
+ .attach_dev = intel_iommu_attach_device,
+ .detach_dev = intel_iommu_detach_device,
+ .map = intel_iommu_map_range,
+ .unmap = intel_iommu_unmap_range,
+ .iova_to_phys = intel_iommu_iova_to_phys,
+};
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 2de5a3238c94..f78371b22529 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
#include <asm/io_apic.h>
+#include <asm/smp.h>
#include <linux/intel-iommu.h>
#include "intr_remapping.h"
@@ -19,17 +20,75 @@ struct irq_2_iommu {
u8 irte_mask;
};
-static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+{
+ struct irq_2_iommu *iommu;
+ int node;
+
+ node = cpu_to_node(cpu);
+
+ iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
+ printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node);
+
+ return iommu;
+}
static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
{
- return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+
+ if (WARN_ON_ONCE(!desc))
+ return NULL;
+
+ return desc->irq_2_iommu;
+}
+
+static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
+{
+ struct irq_desc *desc;
+ struct irq_2_iommu *irq_iommu;
+
+ /*
+ * alloc irq desc if not allocated already.
+ */
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ return NULL;
+ }
+
+ irq_iommu = desc->irq_2_iommu;
+
+ if (!irq_iommu)
+ desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu);
+
+ return desc->irq_2_iommu;
}
static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
{
+ return irq_2_iommu_alloc_cpu(irq, boot_cpu_id);
+}
+
+#else /* !CONFIG_SPARSE_IRQ */
+
+static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+ if (irq < nr_irqs)
+ return &irq_2_iommuX[irq];
+
+ return NULL;
+}
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
return irq_2_iommu(irq);
}
+#endif
static DEFINE_SPINLOCK(irq_2_ir_lock);
@@ -86,9 +145,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
if (!count)
return -1;
+#ifndef CONFIG_SPARSE_IRQ
/* protect irq_2_iommu_alloc later */
if (irq >= nr_irqs)
return -1;
+#endif
/*
* start the IRTE search from index 0.
@@ -130,6 +191,12 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
table->base[i].present = 1;
irq_iommu = irq_2_iommu_alloc(irq);
+ if (!irq_iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ printk(KERN_ERR "can't allocate irq_2_iommu\n");
+ return -1;
+ }
+
irq_iommu->iommu = iommu;
irq_iommu->irte_index = index;
irq_iommu->sub_handle = 0;
@@ -177,6 +244,12 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
irq_iommu = irq_2_iommu_alloc(irq);
+ if (!irq_iommu) {
+ spin_unlock(&irq_2_ir_lock);
+ printk(KERN_ERR "can't allocate irq_2_iommu\n");
+ return -1;
+ }
+
irq_iommu->iommu = iommu;
irq_iommu->irte_index = index;
irq_iommu->sub_handle = subhandle;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 74801f7df9c9..11a51f8ed3b3 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -103,11 +103,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable)
}
}
-static void msix_flush_writes(unsigned int irq)
+static void msix_flush_writes(struct irq_desc *desc)
{
struct msi_desc *entry;
- entry = get_irq_msi(irq);
+ entry = get_irq_desc_msi(desc);
BUG_ON(!entry || !entry->dev);
switch (entry->msi_attrib.type) {
case PCI_CAP_ID_MSI:
@@ -135,11 +135,11 @@ static void msix_flush_writes(unsigned int irq)
* Returns 1 if it succeeded in masking the interrupt and 0 if the device
* doesn't support MSI masking.
*/
-static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
+static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
{
struct msi_desc *entry;
- entry = get_irq_msi(irq);
+ entry = get_irq_desc_msi(desc);
BUG_ON(!entry || !entry->dev);
switch (entry->msi_attrib.type) {
case PCI_CAP_ID_MSI:
@@ -172,9 +172,9 @@ static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
return 1;
}
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
- struct msi_desc *entry = get_irq_msi(irq);
+ struct msi_desc *entry = get_irq_desc_msi(desc);
switch(entry->msi_attrib.type) {
case PCI_CAP_ID_MSI:
{
@@ -211,9 +211,16 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
}
}
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+void read_msi_msg(unsigned int irq, struct msi_msg *msg)
{
- struct msi_desc *entry = get_irq_msi(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ read_msi_msg_desc(desc, msg);
+}
+
+void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+{
+ struct msi_desc *entry = get_irq_desc_msi(desc);
switch (entry->msi_attrib.type) {
case PCI_CAP_ID_MSI:
{
@@ -252,21 +259,31 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
entry->msg = *msg;
}
+void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ write_msi_msg_desc(desc, msg);
+}
+
void mask_msi_irq(unsigned int irq)
{
- msi_set_mask_bits(irq, 1, 1);
- msix_flush_writes(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ msi_set_mask_bits(desc, 1, 1);
+ msix_flush_writes(desc);
}
void unmask_msi_irq(unsigned int irq)
{
- msi_set_mask_bits(irq, 1, 0);
- msix_flush_writes(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ msi_set_mask_bits(desc, 1, 0);
+ msix_flush_writes(desc);
}
static int msi_free_irqs(struct pci_dev* dev);
-
static struct msi_desc* alloc_msi_entry(void)
{
struct msi_desc *entry;
@@ -303,9 +320,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
pci_intx_for_msi(dev, 0);
msi_set_enable(dev, 0);
write_msi_msg(dev->irq, &entry->msg);
- if (entry->msi_attrib.maskbit)
- msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask,
+ if (entry->msi_attrib.maskbit) {
+ struct irq_desc *desc = irq_to_desc(dev->irq);
+ msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
entry->msi_attrib.masked);
+ }
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
control &= ~PCI_MSI_FLAGS_QSIZE;
@@ -327,8 +346,9 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
msix_set_enable(dev, 0);
list_for_each_entry(entry, &dev->msi_list, list) {
+ struct irq_desc *desc = irq_to_desc(entry->irq);
write_msi_msg(entry->irq, &entry->msg);
- msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked);
+ msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
}
BUG_ON(list_empty(&dev->msi_list));
@@ -596,7 +616,8 @@ void pci_msi_shutdown(struct pci_dev* dev)
/* Return the the pci reset with msi irqs unmasked */
if (entry->msi_attrib.maskbit) {
u32 mask = entry->msi_attrib.maskbits_mask;
- msi_set_mask_bits(dev->irq, mask, ~mask);
+ struct irq_desc *desc = irq_to_desc(dev->irq);
+ msi_set_mask_bits(desc, mask, ~mask);
}
if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
return;
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 5d72866897a8..c88485860a0a 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -74,7 +74,7 @@ static ssize_t local_cpus_show(struct device *dev,
int len;
mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
- len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
+ len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask);
buf[len++] = '\n';
buf[len] = '\0';
return len;
@@ -88,7 +88,7 @@ static ssize_t local_cpulist_show(struct device *dev,
int len;
mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
- len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask);
+ len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask);
buf[len++] = '\n';
buf[len] = '\0';
return len;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 28af496b441e..061d1ee0046a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2042,7 +2042,7 @@ static int __devinit pci_init(void)
return 0;
}
-static int __devinit pci_setup(char *str)
+static int __init pci_setup(char *str)
{
while (str) {
char *k = strchr(str, ',');
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index dfc63d01f20a..aac7006949f1 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -252,7 +252,7 @@ static void report_resume(struct pci_dev *dev, void *data)
if (!dev->driver ||
!dev->driver->err_handler ||
- !dev->driver->err_handler->slot_reset)
+ !dev->driver->err_handler->resume)
return;
err_handler = dev->driver->err_handler;
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 8f63f4c6b85f..9aad608bcf3f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -16,6 +16,7 @@
#include <linux/pm.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/jiffies.h>
#include <linux/pci-aspm.h>
#include "../pci.h"
@@ -161,11 +162,12 @@ static void pcie_check_clock_pm(struct pci_dev *pdev)
*/
static void pcie_aspm_configure_common_clock(struct pci_dev *pdev)
{
- int pos, child_pos;
+ int pos, child_pos, i = 0;
u16 reg16 = 0;
struct pci_dev *child_dev;
int same_clock = 1;
-
+ unsigned long start_jiffies;
+ u16 child_regs[8], parent_reg;
/*
* all functions of a slot should have the same Slot Clock
* Configuration, so just check one function
@@ -191,16 +193,19 @@ static void pcie_aspm_configure_common_clock(struct pci_dev *pdev)
child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
pci_read_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
&reg16);
+ child_regs[i] = reg16;
if (same_clock)
reg16 |= PCI_EXP_LNKCTL_CCC;
else
reg16 &= ~PCI_EXP_LNKCTL_CCC;
pci_write_config_word(child_dev, child_pos + PCI_EXP_LNKCTL,
reg16);
+ i++;
}
/* Configure upstream component */
pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
+ parent_reg = reg16;
if (same_clock)
reg16 |= PCI_EXP_LNKCTL_CCC;
else
@@ -212,12 +217,30 @@ static void pcie_aspm_configure_common_clock(struct pci_dev *pdev)
pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
/* Wait for link training end */
- while (1) {
+ /* break out after waiting for 1 second */
+ start_jiffies = jiffies;
+ while ((jiffies - start_jiffies) < HZ) {
pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, &reg16);
if (!(reg16 & PCI_EXP_LNKSTA_LT))
break;
cpu_relax();
}
+ /* training failed -> recover */
+ if ((jiffies - start_jiffies) >= HZ) {
+ dev_printk (KERN_ERR, &pdev->dev, "ASPM: Could not configure"
+ " common clock\n");
+ i = 0;
+ list_for_each_entry(child_dev, &pdev->subordinate->devices,
+ bus_list) {
+ child_pos = pci_find_capability(child_dev,
+ PCI_CAP_ID_EXP);
+ pci_write_config_word(child_dev,
+ child_pos + PCI_EXP_LNKCTL,
+ child_regs[i]);
+ i++;
+ }
+ pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, parent_reg);
+ }
}
/*
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 003a9b3c293f..5b3f5937ecf5 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -55,8 +55,8 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
cpumask = pcibus_to_cpumask(to_pci_bus(dev));
ret = type?
- cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask):
- cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
+ cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) :
+ cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask);
buf[ret++] = '\n';
buf[ret] = '\0';
return ret;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 5049a47030ac..ce0985615133 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -22,6 +22,7 @@
#include <linux/delay.h>
#include <linux/acpi.h>
#include <linux/kallsyms.h>
+#include <linux/dmi.h>
#include "pci.h"
int isa_dma_bridge_buggy;
@@ -605,27 +606,6 @@ static void __init quirk_ioapic_rmw(struct pci_dev *dev)
sis_apic_bug = 1;
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_ANY_ID, quirk_ioapic_rmw);
-
-#define AMD8131_revA0 0x01
-#define AMD8131_revB0 0x11
-#define AMD8131_MISC 0x40
-#define AMD8131_NIOAMODE_BIT 0
-static void quirk_amd_8131_ioapic(struct pci_dev *dev)
-{
- unsigned char tmp;
-
- if (nr_ioapics == 0)
- return;
-
- if (dev->revision == AMD8131_revA0 || dev->revision == AMD8131_revB0) {
- dev_info(&dev->dev, "Fixing up AMD8131 IOAPIC mode\n");
- pci_read_config_byte( dev, AMD8131_MISC, &tmp);
- tmp &= ~(1 << AMD8131_NIOAMODE_BIT);
- pci_write_config_byte( dev, AMD8131_MISC, tmp);
- }
-}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic);
-DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic);
#endif /* CONFIG_X86_IO_APIC */
/*
@@ -1422,6 +1402,155 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2609, quirk_intel_pcie_pm);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260a, quirk_intel_pcie_pm);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm);
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * Boot interrupts on some chipsets cannot be turned off. For these chipsets,
+ * remap the original interrupt in the linux kernel to the boot interrupt, so
+ * that a PCI device's interrupt handler is installed on the boot interrupt
+ * line instead.
+ */
+static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev)
+{
+ if (noioapicquirk || noioapicreroute)
+ return;
+
+ dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT;
+
+ printk(KERN_INFO "PCI quirk: reroute interrupts for 0x%04x:0x%04x\n",
+ dev->vendor, dev->device);
+ return;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel);
+
+/*
+ * On some chipsets we can disable the generation of legacy INTx boot
+ * interrupts.
+ */
+
+/*
+ * IO-APIC1 on 6300ESB generates boot interrupts, see intel order no
+ * 300641-004US, section 5.7.3.
+ */
+#define INTEL_6300_IOAPIC_ABAR 0x40
+#define INTEL_6300_DISABLE_BOOT_IRQ (1<<14)
+
+static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev)
+{
+ u16 pci_config_word;
+
+ if (noioapicquirk)
+ return;
+
+ pci_read_config_word(dev, INTEL_6300_IOAPIC_ABAR, &pci_config_word);
+ pci_config_word |= INTEL_6300_DISABLE_BOOT_IRQ;
+ pci_write_config_word(dev, INTEL_6300_IOAPIC_ABAR, pci_config_word);
+
+ printk(KERN_INFO "disabled boot interrupt on device 0x%04x:0x%04x\n",
+ dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt);
+
+/*
+ * disable boot interrupts on HT-1000
+ */
+#define BC_HT1000_FEATURE_REG 0x64
+#define BC_HT1000_PIC_REGS_ENABLE (1<<0)
+#define BC_HT1000_MAP_IDX 0xC00
+#define BC_HT1000_MAP_DATA 0xC01
+
+static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev)
+{
+ u32 pci_config_dword;
+ u8 irq;
+
+ if (noioapicquirk)
+ return;
+
+ pci_read_config_dword(dev, BC_HT1000_FEATURE_REG, &pci_config_dword);
+ pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword |
+ BC_HT1000_PIC_REGS_ENABLE);
+
+ for (irq = 0x10; irq < 0x10 + 32; irq++) {
+ outb(irq, BC_HT1000_MAP_IDX);
+ outb(0x00, BC_HT1000_MAP_DATA);
+ }
+
+ pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword);
+
+ printk(KERN_INFO "disabled boot interrupts on PCI device"
+ "0x%04x:0x%04x\n", dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt);
+
+/*
+ * disable boot interrupts on AMD and ATI chipsets
+ */
+/*
+ * NOIOAMODE needs to be disabled to disable "boot interrupts". For AMD 8131
+ * rev. A0 and B0, NOIOAMODE needs to be disabled anyway to fix IO-APIC mode
+ * (due to an erratum).
+ */
+#define AMD_813X_MISC 0x40
+#define AMD_813X_NOIOAMODE (1<<0)
+
+static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev)
+{
+ u32 pci_config_dword;
+
+ if (noioapicquirk)
+ return;
+
+ pci_read_config_dword(dev, AMD_813X_MISC, &pci_config_dword);
+ pci_config_dword &= ~AMD_813X_NOIOAMODE;
+ pci_write_config_dword(dev, AMD_813X_MISC, pci_config_dword);
+
+ printk(KERN_INFO "disabled boot interrupts on PCI device "
+ "0x%04x:0x%04x\n", dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt);
+
+#define AMD_8111_PCI_IRQ_ROUTING 0x56
+
+static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev)
+{
+ u16 pci_config_word;
+
+ if (noioapicquirk)
+ return;
+
+ pci_read_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, &pci_config_word);
+ if (!pci_config_word) {
+ printk(KERN_INFO "boot interrupts on PCI device 0x%04x:0x%04x "
+ "already disabled\n",
+ dev->vendor, dev->device);
+ return;
+ }
+ pci_write_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, 0);
+ printk(KERN_INFO "disabled boot interrupts on PCI device "
+ "0x%04x:0x%04x\n", dev->vendor, dev->device);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt);
+#endif /* CONFIG_X86_IO_APIC */
+
/*
* Toshiba TC86C001 IDE controller reports the standard 8-byte BAR0 size
* but the PIO transfers won't work if BAR0 falls at the odd 8 bytes.
@@ -1828,6 +1957,22 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS,
PCI_DEVICE_ID_SERVERWORKS_HT1000_PXB,
ht_enable_msi_mapping);
+/* The P5N32-SLI Premium motherboard from Asus has a problem with msi
+ * for the MCP55 NIC. It is not yet determined whether the msi problem
+ * also affects other devices. As for now, turn off msi for this device.
+ */
+static void __devinit nvenet_msi_disable(struct pci_dev *dev)
+{
+ if (dmi_name_in_vendors("P5N32-SLI PREMIUM")) {
+ dev_info(&dev->dev,
+ "Disabling msi for MCP55 NIC on P5N32-SLI Premium\n");
+ dev->no_msi = 1;
+ }
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA,
+ PCI_DEVICE_ID_NVIDIA_NVENET_15,
+ nvenet_msi_disable);
+
static void __devinit nv_msi_ht_cap_quirk(struct pci_dev *dev)
{
struct pci_dev *host_bridge;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 4dd1c3e157ae..5a8ccb4f604d 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -253,6 +253,7 @@ placeholder:
__func__, pci_domain_nr(parent), parent->number, slot_nr);
out:
+ kfree(slot_name);
up_write(&pci_bus_sem);
return slot;
err: