diff options
Diffstat (limited to 'drivers/perf/fsl_imx8_ddr_perf.c')
-rw-r--r-- | drivers/perf/fsl_imx8_ddr_perf.c | 322 |
1 files changed, 286 insertions, 36 deletions
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 726ed8f59868..5d3e7a495385 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -5,6 +5,7 @@ */ #include <linux/bitfield.h> +#include <linux/clk.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -14,12 +15,15 @@ #include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/perf_event.h> +#include <linux/spinlock.h> #include <linux/slab.h> #define COUNTER_CNTL 0x0 #define COUNTER_READ 0x20 #define COUNTER_DPCR1 0x30 +#define COUNTER_MUX_CNTL 0x50 +#define COUNTER_MASK_COMP 0x54 #define CNTL_OVER 0x1 #define CNTL_CLEAR 0x2 @@ -28,9 +32,18 @@ #define CNTL_CLEAR_MASK 0xFFFFFFFD #define CNTL_OVER_MASK 0xFFFFFFFE +#define CNTL_CP_SHIFT 16 +#define CNTL_CP_MASK (0xFF << CNTL_CP_SHIFT) #define CNTL_CSV_SHIFT 24 #define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT) +#define READ_PORT_SHIFT 0 +#define READ_PORT_MASK (0x7 << READ_PORT_SHIFT) +#define READ_CHANNEL_REVERT 0x00000008 /* bit 3 for read channel select */ +#define WRITE_PORT_SHIFT 8 +#define WRITE_PORT_MASK (0x7 << WRITE_PORT_SHIFT) +#define WRITE_CHANNEL_REVERT 0x00000800 /* bit 11 for write channel select */ + #define EVENT_CYCLES_ID 0 #define EVENT_CYCLES_COUNTER 0 #define NUM_COUNTERS 4 @@ -40,27 +53,56 @@ #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) #define DDR_PERF_DEV_NAME "imx8_ddr" +#define DB_PERF_DEV_NAME "imx8_db" #define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu" static DEFINE_IDA(ddr_ida); +static DEFINE_IDA(db_ida); /* DDR Perf hardware feature */ #define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ #define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */ +#define DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER 0x4 /* support AXI ID PORT CHANNEL filter */ + +/* Perf type */ +#define DDR_PERF_TYPE 0x1 /* ddr Perf */ +#define DB_PERF_TYPE 0x2 /* db Perf */ struct fsl_ddr_devtype_data { unsigned int quirks; /* quirks needed for different DDR Perf core */ + unsigned int type; /* types of Perf, point the location of Perf */ }; -static const struct fsl_ddr_devtype_data imx8_devtype_data; +static const struct fsl_ddr_devtype_data imx8_devtype_data = { + .type = DDR_PERF_TYPE, +}; static const struct fsl_ddr_devtype_data imx8m_devtype_data = { .quirks = DDR_CAP_AXI_ID_FILTER, + .type = DDR_PERF_TYPE, +}; + +static const struct fsl_ddr_devtype_data imx8mp_devtype_data = { + .quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED, + .type = DDR_PERF_TYPE, +}; + +static const struct fsl_ddr_devtype_data imx8dxl_devtype_data = { + .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER, + .type = DDR_PERF_TYPE, +}; + +static const struct fsl_ddr_devtype_data imx8dxl_db_devtype_data = { + .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER, + .type = DB_PERF_TYPE, }; static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data}, { .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data}, + { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data}, + { .compatible = "fsl,imx8dxl-ddr-pmu", .data = &imx8dxl_devtype_data}, + { .compatible = "fsl,imx8dxl-db-pmu", .data = &imx8dxl_db_devtype_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); @@ -77,6 +119,68 @@ struct ddr_pmu { const struct fsl_ddr_devtype_data *devtype_data; int irq; int id; + spinlock_t lock; + struct clk *clk_ipg; + struct clk *clk_cnt; +}; + +enum ddr_perf_filter_capabilities { + PERF_CAP_AXI_ID_FILTER = 0, + PERF_CAP_AXI_ID_FILTER_ENHANCED, + PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER, + PERF_CAP_AXI_ID_FEAT_MAX, +}; + +static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap) +{ + u32 quirks = pmu->devtype_data->quirks; + + switch (cap) { + case PERF_CAP_AXI_ID_FILTER: + return !!(quirks & DDR_CAP_AXI_ID_FILTER); + case PERF_CAP_AXI_ID_FILTER_ENHANCED: + quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED; + return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED; + case PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER: + return !!(quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER); + default: + WARN(1, "unknown filter cap %d\n", cap); + } + + return 0; +} + +static ssize_t ddr_perf_filter_cap_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + struct dev_ext_attribute *ea = + container_of(attr, struct dev_ext_attribute, attr); + int cap = (long)ea->var; + + return snprintf(buf, PAGE_SIZE, "%u\n", + ddr_perf_filter_cap_get(pmu, cap)); +} + +#define PERF_EXT_ATTR_ENTRY(_name, _func, _var) \ + (&((struct dev_ext_attribute) { \ + __ATTR(_name, 0444, _func, NULL), (void *)_var \ + }).attr.attr) + +#define PERF_FILTER_EXT_ATTR_ENTRY(_name, _var) \ + PERF_EXT_ATTR_ENTRY(_name, ddr_perf_filter_cap_show, _var) + +static struct attribute *ddr_perf_filter_cap_attr[] = { + PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER), + PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED), + PERF_FILTER_EXT_ATTR_ENTRY(super_filter, PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER), + NULL, +}; + +static struct attribute_group ddr_perf_filter_cap_attr_group = { + .name = "caps", + .attrs = ddr_perf_filter_cap_attr, }; static ssize_t ddr_perf_cpumask_show(struct device *dev, @@ -156,14 +260,30 @@ static struct attribute_group ddr_perf_events_attr_group = { .attrs = ddr_perf_events_attrs, }; +static struct attribute *db_perf_events_attrs[] = { + IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID), + IMX8_DDR_PMU_EVENT_ATTR(axid-read, 0x41), + IMX8_DDR_PMU_EVENT_ATTR(axid-write, 0x42), + NULL, +}; + +static struct attribute_group db_perf_events_attr_group = { + .name = "events", + .attrs = db_perf_events_attrs, +}; + PMU_FORMAT_ATTR(event, "config:0-7"); PMU_FORMAT_ATTR(axi_id, "config1:0-15"); PMU_FORMAT_ATTR(axi_mask, "config1:16-31"); +PMU_FORMAT_ATTR(axi_port, "config2:0-2"); +PMU_FORMAT_ATTR(axi_channel, "config2:3-3"); static struct attribute *ddr_perf_format_attrs[] = { &format_attr_event.attr, &format_attr_axi_id.attr, &format_attr_axi_mask.attr, + &format_attr_axi_port.attr, + &format_attr_axi_channel.attr, NULL, }; @@ -172,13 +292,44 @@ static struct attribute_group ddr_perf_format_attr_group = { .attrs = ddr_perf_format_attrs, }; -static const struct attribute_group *attr_groups[] = { +static const struct attribute_group *ddr_attr_groups[] = { + &ddr_perf_events_attr_group, &ddr_perf_format_attr_group, &ddr_perf_cpumask_attr_group, + &ddr_perf_filter_cap_attr_group, NULL, }; +static const struct attribute_group *db_attr_groups[] = { + &db_perf_events_attr_group, + &ddr_perf_format_attr_group, + &ddr_perf_cpumask_attr_group, + &ddr_perf_filter_cap_attr_group, + NULL, +}; + +static int ddr_perf_clks_enable(struct ddr_pmu *pmu) +{ + int err; + + err = clk_prepare_enable(pmu->clk_ipg); + if (err) + return err; + + err = clk_prepare_enable(pmu->clk_cnt); + if (err) + clk_disable_unprepare(pmu->clk_ipg); + + return err; +} + +static void ddr_perf_clks_disable(struct ddr_pmu *pmu) +{ + clk_disable_unprepare(pmu->clk_cnt); + clk_disable_unprepare(pmu->clk_ipg); +} + static bool ddr_perf_is_filtered(struct perf_event *event) { return event->attr.config == 0x41 || event->attr.config == 0x42; @@ -307,16 +458,20 @@ static void ddr_perf_event_update(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 delta, prev_raw_count, new_raw_count; int counter = hwc->idx; + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); - do { - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = ddr_perf_read_counter(pmu, counter); - } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count); + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = ddr_perf_read_counter(pmu, counter); delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF; local64_add(delta, &event->count); + local64_set(&hwc->prev_count, new_raw_count); + + spin_unlock_irqrestore(&pmu->lock, flags); + } static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, @@ -335,14 +490,37 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, writel(0, pmu->base + reg); val = CNTL_EN | CNTL_CLEAR; val |= FIELD_PREP(CNTL_CSV_MASK, config); + + /* + * Workaround for i.MX8MP: + * Common counters and byte counters share the same COUNTER_CNTL, + * and byte counters could overflow before cycle counter. Need set + * counter parameter(CP) of cycle counter to give it initial value + * which can speed up cycle counter overflow frequency. + */ + if ((pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) == + DDR_CAP_AXI_ID_FILTER_ENHANCED) { + if (counter == EVENT_CYCLES_COUNTER) + val |= FIELD_PREP(CNTL_CP_MASK, 0xe8); + } + writel(val, pmu->base + reg); } else { /* Disable counter */ - val = readl_relaxed(pmu->base + reg) & CNTL_EN_MASK; + val = readl(pmu->base + reg) & CNTL_EN_MASK; writel(val, pmu->base + reg); } } +static bool ddr_perf_counter_overflow(struct ddr_pmu *pmu, int counter) +{ + int val; + + val = readl_relaxed(pmu->base + counter * 4 + COUNTER_CNTL); + + return val & CNTL_OVER ? true : false; +} + static void ddr_perf_event_start(struct perf_event *event, int flags) { struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); @@ -363,6 +541,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) int counter; int cfg = event->attr.config; int cfg1 = event->attr.config1; + int cfg2 = event->attr.config2; if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) { int i; @@ -386,6 +565,28 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) return -EOPNOTSUPP; } + if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER) { + if (ddr_perf_is_filtered(event)) { + /* revert axi id masking(axi_mask) value */ + cfg1 ^= AXI_MASKING_REVERT; + writel(cfg1, pmu->base + COUNTER_MASK_COMP + ((counter - 1) << 4)); + + if (cfg == 0x41) { + /* revert axi read channel(axi_channel) value */ + cfg2 ^= READ_CHANNEL_REVERT; + + cfg2 |= FIELD_PREP(READ_PORT_MASK, cfg2); + } else { + /* revert axi write channel(axi_channel) value */ + cfg2 ^= WRITE_CHANNEL_REVERT; + + cfg2 |= FIELD_PREP(WRITE_PORT_MASK, cfg2); + } + + writel(cfg2, pmu->base + COUNTER_MUX_CNTL + ((counter - 1) << 4)); + } + } + pmu->events[counter] = event; pmu->active_events++; hwc->idx = counter; @@ -446,7 +647,7 @@ static void ddr_perf_pmu_disable(struct pmu *pmu) false); } -static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, +static void ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, struct device *dev) { *pmu = (struct ddr_pmu) { @@ -454,7 +655,6 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, .module = THIS_MODULE, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, .task_ctx_nr = perf_invalid_context, - .attr_groups = attr_groups, .event_init = ddr_perf_event_init, .add = ddr_perf_event_add, .del = ddr_perf_event_del, @@ -467,16 +667,13 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, .base = base, .dev = dev, }; - - pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL); - return pmu->id; } static irqreturn_t ddr_perf_irq_handler(int irq, void *p) { - int i; + int i, ret; struct ddr_pmu *pmu = (struct ddr_pmu *) p; - struct perf_event *event, *cycle_event = NULL; + struct perf_event *event; /* all counter will stop if cycle counter disabled */ ddr_perf_counter_enable(pmu, @@ -486,12 +683,7 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p) /* * When the cycle counter overflows, all counters are stopped, * and an IRQ is raised. If any other counter overflows, it - * continues counting, and no IRQ is raised. - * - * Cycles occur at least 4 times as often as other events, so we - * can update all events on a cycle counter overflow and not - * lose events. - * + * will stop and no IRQ is raised. */ for (i = 0; i < NUM_COUNTERS; i++) { @@ -501,17 +693,41 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p) event = pmu->events[i]; ddr_perf_event_update(event); + } + + spin_lock(&pmu->lock); + + for (i = 0; i < NUM_COUNTERS; i++) { + if (!pmu->events[i]) + continue; + + if (i == EVENT_CYCLES_COUNTER) + continue; + + event = pmu->events[i]; + + /* check non-cycle counters overflow */ + ret = ddr_perf_counter_overflow(pmu, event->hw.idx); + if (ret) + dev_warn(pmu->dev, "Counter%d (not cycle counter) overflow happened, data incorrect!\n", i); + + /* clear non-cycle counters */ + ddr_perf_counter_enable(pmu, event->attr.config, event->hw.idx, true); - if (event->hw.idx == EVENT_CYCLES_COUNTER) - cycle_event = event; + /* update the prev_conter */ + local64_set(&event->hw.prev_count, 0); } + if (pmu->events[EVENT_CYCLES_ID]) + local64_set(&pmu->events[EVENT_CYCLES_ID]->hw.prev_count, 0); + + /* enable cycle counter to start all counters */ ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID, EVENT_CYCLES_COUNTER, true); - if (cycle_event) - ddr_perf_event_update(cycle_event); + + spin_unlock(&pmu->lock); return IRQ_HANDLED; } @@ -542,7 +758,6 @@ static int ddr_perf_probe(struct platform_device *pdev) struct device_node *np; void __iomem *base; char *name; - int num; int ret; int irq; @@ -556,18 +771,42 @@ static int ddr_perf_probe(struct platform_device *pdev) if (!pmu) return -ENOMEM; - num = ddr_perf_init(pmu, base, &pdev->dev); + ddr_perf_init(pmu, base, &pdev->dev); platform_set_drvdata(pdev, pmu); - - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", - num); - if (!name) { - ret = -ENOMEM; - goto cpuhp_state_err; - } + spin_lock_init(&pmu->lock); pmu->devtype_data = of_device_get_match_data(&pdev->dev); + if (pmu->devtype_data->type & DDR_PERF_TYPE) { + pmu->pmu.attr_groups = ddr_attr_groups; + pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id); + } else if (pmu->devtype_data->type & DB_PERF_TYPE) { + pmu->pmu.attr_groups = db_attr_groups; + pmu->id = ida_simple_get(&db_ida, 0, 0, GFP_KERNEL); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DB_PERF_DEV_NAME "%d", pmu->id); + + pmu->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(pmu->clk_ipg)) { + dev_err(&pdev->dev, "no ipg clock defined\n"); + return PTR_ERR(pmu->clk_ipg); + } + + pmu->clk_cnt = devm_clk_get(&pdev->dev, "cnt"); + if (IS_ERR(pmu->clk_cnt)) { + dev_err(&pdev->dev, "no cnt clock defined\n"); + return PTR_ERR(pmu->clk_cnt); + } + + ret = ddr_perf_clks_enable(pmu); + if (ret) + return ret; + } else + return -EINVAL; + if (!name) { + ret = -ENOMEM; + goto cpuhp_state_err; + } pmu->cpu = raw_smp_processor_id(); ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, @@ -625,7 +864,12 @@ ddr_perf_err: cpuhp_instance_err: cpuhp_remove_multi_state(pmu->cpuhp_state); cpuhp_state_err: - ida_simple_remove(&ddr_ida, pmu->id); + if (pmu->devtype_data->type & DDR_PERF_TYPE) + ida_simple_remove(&ddr_ida, pmu->id); + else { + ddr_perf_clks_disable(pmu); + ida_simple_remove(&db_ida, pmu->id); + } dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); return ret; } @@ -640,7 +884,13 @@ static int ddr_perf_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); - ida_simple_remove(&ddr_ida, pmu->id); + if (pmu->devtype_data->type & DDR_PERF_TYPE) + ida_simple_remove(&ddr_ida, pmu->id); + else { + ddr_perf_clks_disable(pmu); + ida_simple_remove(&db_ida, pmu->id); + } + return 0; } |