summaryrefslogtreecommitdiff
path: root/drivers/perf/fsl_imx8_ddr_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/perf/fsl_imx8_ddr_perf.c')
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c322
1 files changed, 286 insertions, 36 deletions
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 726ed8f59868..5d3e7a495385 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -5,6 +5,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/clk.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -14,12 +15,15 @@
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/perf_event.h>
+#include <linux/spinlock.h>
#include <linux/slab.h>
#define COUNTER_CNTL 0x0
#define COUNTER_READ 0x20
#define COUNTER_DPCR1 0x30
+#define COUNTER_MUX_CNTL 0x50
+#define COUNTER_MASK_COMP 0x54
#define CNTL_OVER 0x1
#define CNTL_CLEAR 0x2
@@ -28,9 +32,18 @@
#define CNTL_CLEAR_MASK 0xFFFFFFFD
#define CNTL_OVER_MASK 0xFFFFFFFE
+#define CNTL_CP_SHIFT 16
+#define CNTL_CP_MASK (0xFF << CNTL_CP_SHIFT)
#define CNTL_CSV_SHIFT 24
#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT)
+#define READ_PORT_SHIFT 0
+#define READ_PORT_MASK (0x7 << READ_PORT_SHIFT)
+#define READ_CHANNEL_REVERT 0x00000008 /* bit 3 for read channel select */
+#define WRITE_PORT_SHIFT 8
+#define WRITE_PORT_MASK (0x7 << WRITE_PORT_SHIFT)
+#define WRITE_CHANNEL_REVERT 0x00000800 /* bit 11 for write channel select */
+
#define EVENT_CYCLES_ID 0
#define EVENT_CYCLES_COUNTER 0
#define NUM_COUNTERS 4
@@ -40,27 +53,56 @@
#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
#define DDR_PERF_DEV_NAME "imx8_ddr"
+#define DB_PERF_DEV_NAME "imx8_db"
#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu"
static DEFINE_IDA(ddr_ida);
+static DEFINE_IDA(db_ida);
/* DDR Perf hardware feature */
#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */
+#define DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER 0x4 /* support AXI ID PORT CHANNEL filter */
+
+/* Perf type */
+#define DDR_PERF_TYPE 0x1 /* ddr Perf */
+#define DB_PERF_TYPE 0x2 /* db Perf */
struct fsl_ddr_devtype_data {
unsigned int quirks; /* quirks needed for different DDR Perf core */
+ unsigned int type; /* types of Perf, point the location of Perf */
};
-static const struct fsl_ddr_devtype_data imx8_devtype_data;
+static const struct fsl_ddr_devtype_data imx8_devtype_data = {
+ .type = DDR_PERF_TYPE,
+};
static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
.quirks = DDR_CAP_AXI_ID_FILTER,
+ .type = DDR_PERF_TYPE,
+};
+
+static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED,
+ .type = DDR_PERF_TYPE,
+};
+
+static const struct fsl_ddr_devtype_data imx8dxl_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER,
+ .type = DDR_PERF_TYPE,
+};
+
+static const struct fsl_ddr_devtype_data imx8dxl_db_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER,
+ .type = DB_PERF_TYPE,
};
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
+ { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
+ { .compatible = "fsl,imx8dxl-ddr-pmu", .data = &imx8dxl_devtype_data},
+ { .compatible = "fsl,imx8dxl-db-pmu", .data = &imx8dxl_db_devtype_data},
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
@@ -77,6 +119,68 @@ struct ddr_pmu {
const struct fsl_ddr_devtype_data *devtype_data;
int irq;
int id;
+ spinlock_t lock;
+ struct clk *clk_ipg;
+ struct clk *clk_cnt;
+};
+
+enum ddr_perf_filter_capabilities {
+ PERF_CAP_AXI_ID_FILTER = 0,
+ PERF_CAP_AXI_ID_FILTER_ENHANCED,
+ PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER,
+ PERF_CAP_AXI_ID_FEAT_MAX,
+};
+
+static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap)
+{
+ u32 quirks = pmu->devtype_data->quirks;
+
+ switch (cap) {
+ case PERF_CAP_AXI_ID_FILTER:
+ return !!(quirks & DDR_CAP_AXI_ID_FILTER);
+ case PERF_CAP_AXI_ID_FILTER_ENHANCED:
+ quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ case PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER:
+ return !!(quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER);
+ default:
+ WARN(1, "unknown filter cap %d\n", cap);
+ }
+
+ return 0;
+}
+
+static ssize_t ddr_perf_filter_cap_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int cap = (long)ea->var;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ ddr_perf_filter_cap_get(pmu, cap));
+}
+
+#define PERF_EXT_ATTR_ENTRY(_name, _func, _var) \
+ (&((struct dev_ext_attribute) { \
+ __ATTR(_name, 0444, _func, NULL), (void *)_var \
+ }).attr.attr)
+
+#define PERF_FILTER_EXT_ATTR_ENTRY(_name, _var) \
+ PERF_EXT_ATTR_ENTRY(_name, ddr_perf_filter_cap_show, _var)
+
+static struct attribute *ddr_perf_filter_cap_attr[] = {
+ PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER),
+ PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED),
+ PERF_FILTER_EXT_ATTR_ENTRY(super_filter, PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER),
+ NULL,
+};
+
+static struct attribute_group ddr_perf_filter_cap_attr_group = {
+ .name = "caps",
+ .attrs = ddr_perf_filter_cap_attr,
};
static ssize_t ddr_perf_cpumask_show(struct device *dev,
@@ -156,14 +260,30 @@ static struct attribute_group ddr_perf_events_attr_group = {
.attrs = ddr_perf_events_attrs,
};
+static struct attribute *db_perf_events_attrs[] = {
+ IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
+ IMX8_DDR_PMU_EVENT_ATTR(axid-read, 0x41),
+ IMX8_DDR_PMU_EVENT_ATTR(axid-write, 0x42),
+ NULL,
+};
+
+static struct attribute_group db_perf_events_attr_group = {
+ .name = "events",
+ .attrs = db_perf_events_attrs,
+};
+
PMU_FORMAT_ATTR(event, "config:0-7");
PMU_FORMAT_ATTR(axi_id, "config1:0-15");
PMU_FORMAT_ATTR(axi_mask, "config1:16-31");
+PMU_FORMAT_ATTR(axi_port, "config2:0-2");
+PMU_FORMAT_ATTR(axi_channel, "config2:3-3");
static struct attribute *ddr_perf_format_attrs[] = {
&format_attr_event.attr,
&format_attr_axi_id.attr,
&format_attr_axi_mask.attr,
+ &format_attr_axi_port.attr,
+ &format_attr_axi_channel.attr,
NULL,
};
@@ -172,13 +292,44 @@ static struct attribute_group ddr_perf_format_attr_group = {
.attrs = ddr_perf_format_attrs,
};
-static const struct attribute_group *attr_groups[] = {
+static const struct attribute_group *ddr_attr_groups[] = {
+
&ddr_perf_events_attr_group,
&ddr_perf_format_attr_group,
&ddr_perf_cpumask_attr_group,
+ &ddr_perf_filter_cap_attr_group,
NULL,
};
+static const struct attribute_group *db_attr_groups[] = {
+ &db_perf_events_attr_group,
+ &ddr_perf_format_attr_group,
+ &ddr_perf_cpumask_attr_group,
+ &ddr_perf_filter_cap_attr_group,
+ NULL,
+};
+
+static int ddr_perf_clks_enable(struct ddr_pmu *pmu)
+{
+ int err;
+
+ err = clk_prepare_enable(pmu->clk_ipg);
+ if (err)
+ return err;
+
+ err = clk_prepare_enable(pmu->clk_cnt);
+ if (err)
+ clk_disable_unprepare(pmu->clk_ipg);
+
+ return err;
+}
+
+static void ddr_perf_clks_disable(struct ddr_pmu *pmu)
+{
+ clk_disable_unprepare(pmu->clk_cnt);
+ clk_disable_unprepare(pmu->clk_ipg);
+}
+
static bool ddr_perf_is_filtered(struct perf_event *event)
{
return event->attr.config == 0x41 || event->attr.config == 0x42;
@@ -307,16 +458,20 @@ static void ddr_perf_event_update(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
u64 delta, prev_raw_count, new_raw_count;
int counter = hwc->idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pmu->lock, flags);
- do {
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = ddr_perf_read_counter(pmu, counter);
- } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = ddr_perf_read_counter(pmu, counter);
delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
local64_add(delta, &event->count);
+ local64_set(&hwc->prev_count, new_raw_count);
+
+ spin_unlock_irqrestore(&pmu->lock, flags);
+
}
static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
@@ -335,14 +490,37 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
writel(0, pmu->base + reg);
val = CNTL_EN | CNTL_CLEAR;
val |= FIELD_PREP(CNTL_CSV_MASK, config);
+
+ /*
+ * Workaround for i.MX8MP:
+ * Common counters and byte counters share the same COUNTER_CNTL,
+ * and byte counters could overflow before cycle counter. Need set
+ * counter parameter(CP) of cycle counter to give it initial value
+ * which can speed up cycle counter overflow frequency.
+ */
+ if ((pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) ==
+ DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+ if (counter == EVENT_CYCLES_COUNTER)
+ val |= FIELD_PREP(CNTL_CP_MASK, 0xe8);
+ }
+
writel(val, pmu->base + reg);
} else {
/* Disable counter */
- val = readl_relaxed(pmu->base + reg) & CNTL_EN_MASK;
+ val = readl(pmu->base + reg) & CNTL_EN_MASK;
writel(val, pmu->base + reg);
}
}
+static bool ddr_perf_counter_overflow(struct ddr_pmu *pmu, int counter)
+{
+ int val;
+
+ val = readl_relaxed(pmu->base + counter * 4 + COUNTER_CNTL);
+
+ return val & CNTL_OVER ? true : false;
+}
+
static void ddr_perf_event_start(struct perf_event *event, int flags)
{
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
@@ -363,6 +541,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
int counter;
int cfg = event->attr.config;
int cfg1 = event->attr.config1;
+ int cfg2 = event->attr.config2;
if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
int i;
@@ -386,6 +565,28 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
return -EOPNOTSUPP;
}
+ if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER) {
+ if (ddr_perf_is_filtered(event)) {
+ /* revert axi id masking(axi_mask) value */
+ cfg1 ^= AXI_MASKING_REVERT;
+ writel(cfg1, pmu->base + COUNTER_MASK_COMP + ((counter - 1) << 4));
+
+ if (cfg == 0x41) {
+ /* revert axi read channel(axi_channel) value */
+ cfg2 ^= READ_CHANNEL_REVERT;
+
+ cfg2 |= FIELD_PREP(READ_PORT_MASK, cfg2);
+ } else {
+ /* revert axi write channel(axi_channel) value */
+ cfg2 ^= WRITE_CHANNEL_REVERT;
+
+ cfg2 |= FIELD_PREP(WRITE_PORT_MASK, cfg2);
+ }
+
+ writel(cfg2, pmu->base + COUNTER_MUX_CNTL + ((counter - 1) << 4));
+ }
+ }
+
pmu->events[counter] = event;
pmu->active_events++;
hwc->idx = counter;
@@ -446,7 +647,7 @@ static void ddr_perf_pmu_disable(struct pmu *pmu)
false);
}
-static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
struct device *dev)
{
*pmu = (struct ddr_pmu) {
@@ -454,7 +655,6 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
.module = THIS_MODULE,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
- .attr_groups = attr_groups,
.event_init = ddr_perf_event_init,
.add = ddr_perf_event_add,
.del = ddr_perf_event_del,
@@ -467,16 +667,13 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
.base = base,
.dev = dev,
};
-
- pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
- return pmu->id;
}
static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
{
- int i;
+ int i, ret;
struct ddr_pmu *pmu = (struct ddr_pmu *) p;
- struct perf_event *event, *cycle_event = NULL;
+ struct perf_event *event;
/* all counter will stop if cycle counter disabled */
ddr_perf_counter_enable(pmu,
@@ -486,12 +683,7 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
/*
* When the cycle counter overflows, all counters are stopped,
* and an IRQ is raised. If any other counter overflows, it
- * continues counting, and no IRQ is raised.
- *
- * Cycles occur at least 4 times as often as other events, so we
- * can update all events on a cycle counter overflow and not
- * lose events.
- *
+ * will stop and no IRQ is raised.
*/
for (i = 0; i < NUM_COUNTERS; i++) {
@@ -501,17 +693,41 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
event = pmu->events[i];
ddr_perf_event_update(event);
+ }
+
+ spin_lock(&pmu->lock);
+
+ for (i = 0; i < NUM_COUNTERS; i++) {
+ if (!pmu->events[i])
+ continue;
+
+ if (i == EVENT_CYCLES_COUNTER)
+ continue;
+
+ event = pmu->events[i];
+
+ /* check non-cycle counters overflow */
+ ret = ddr_perf_counter_overflow(pmu, event->hw.idx);
+ if (ret)
+ dev_warn(pmu->dev, "Counter%d (not cycle counter) overflow happened, data incorrect!\n", i);
+
+ /* clear non-cycle counters */
+ ddr_perf_counter_enable(pmu, event->attr.config, event->hw.idx, true);
- if (event->hw.idx == EVENT_CYCLES_COUNTER)
- cycle_event = event;
+ /* update the prev_conter */
+ local64_set(&event->hw.prev_count, 0);
}
+ if (pmu->events[EVENT_CYCLES_ID])
+ local64_set(&pmu->events[EVENT_CYCLES_ID]->hw.prev_count, 0);
+
+ /* enable cycle counter to start all counters */
ddr_perf_counter_enable(pmu,
EVENT_CYCLES_ID,
EVENT_CYCLES_COUNTER,
true);
- if (cycle_event)
- ddr_perf_event_update(cycle_event);
+
+ spin_unlock(&pmu->lock);
return IRQ_HANDLED;
}
@@ -542,7 +758,6 @@ static int ddr_perf_probe(struct platform_device *pdev)
struct device_node *np;
void __iomem *base;
char *name;
- int num;
int ret;
int irq;
@@ -556,18 +771,42 @@ static int ddr_perf_probe(struct platform_device *pdev)
if (!pmu)
return -ENOMEM;
- num = ddr_perf_init(pmu, base, &pdev->dev);
+ ddr_perf_init(pmu, base, &pdev->dev);
platform_set_drvdata(pdev, pmu);
-
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
- num);
- if (!name) {
- ret = -ENOMEM;
- goto cpuhp_state_err;
- }
+ spin_lock_init(&pmu->lock);
pmu->devtype_data = of_device_get_match_data(&pdev->dev);
+ if (pmu->devtype_data->type & DDR_PERF_TYPE) {
+ pmu->pmu.attr_groups = ddr_attr_groups;
+ pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id);
+ } else if (pmu->devtype_data->type & DB_PERF_TYPE) {
+ pmu->pmu.attr_groups = db_attr_groups;
+ pmu->id = ida_simple_get(&db_ida, 0, 0, GFP_KERNEL);
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DB_PERF_DEV_NAME "%d", pmu->id);
+
+ pmu->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
+ if (IS_ERR(pmu->clk_ipg)) {
+ dev_err(&pdev->dev, "no ipg clock defined\n");
+ return PTR_ERR(pmu->clk_ipg);
+ }
+
+ pmu->clk_cnt = devm_clk_get(&pdev->dev, "cnt");
+ if (IS_ERR(pmu->clk_cnt)) {
+ dev_err(&pdev->dev, "no cnt clock defined\n");
+ return PTR_ERR(pmu->clk_cnt);
+ }
+
+ ret = ddr_perf_clks_enable(pmu);
+ if (ret)
+ return ret;
+ } else
+ return -EINVAL;
+ if (!name) {
+ ret = -ENOMEM;
+ goto cpuhp_state_err;
+ }
pmu->cpu = raw_smp_processor_id();
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
@@ -625,7 +864,12 @@ ddr_perf_err:
cpuhp_instance_err:
cpuhp_remove_multi_state(pmu->cpuhp_state);
cpuhp_state_err:
- ida_simple_remove(&ddr_ida, pmu->id);
+ if (pmu->devtype_data->type & DDR_PERF_TYPE)
+ ida_simple_remove(&ddr_ida, pmu->id);
+ else {
+ ddr_perf_clks_disable(pmu);
+ ida_simple_remove(&db_ida, pmu->id);
+ }
dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
return ret;
}
@@ -640,7 +884,13 @@ static int ddr_perf_remove(struct platform_device *pdev)
perf_pmu_unregister(&pmu->pmu);
- ida_simple_remove(&ddr_ida, pmu->id);
+ if (pmu->devtype_data->type & DDR_PERF_TYPE)
+ ida_simple_remove(&ddr_ida, pmu->id);
+ else {
+ ddr_perf_clks_disable(pmu);
+ ida_simple_remove(&db_ida, pmu->id);
+ }
+
return 0;
}