summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/gen.c2
-rw-r--r--tools/bpf/bpftool/map.c6
-rw-r--r--tools/include/uapi/linux/bpf.h4
-rw-r--r--tools/lib/bpf/btf_dump.c11
-rw-r--r--tools/lib/bpf/libbpf.c3
-rw-r--r--tools/lib/bpf/netlink.c63
-rw-r--r--tools/lib/bpf/xsk.c11
-rw-r--r--tools/perf/builtin-stat.c2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/cache.json111
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/floating-point.json24
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json18
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/memory.json96
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json461
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bind_perm.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_dummy.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_lirc_mode2.sh5
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh10
-rwxr-xr-xtools/testing/selftests/bpf/test_tcp_check_syncookie.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh4
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh62
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh4
-rw-r--r--tools/testing/selftests/bpf/xdping.c5
-rw-r--r--tools/testing/selftests/lkdtm/config1
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c6
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh19
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_under_vrf.sh8
-rw-r--r--tools/testing/selftests/net/timestamping.c4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh4
-rw-r--r--tools/testing/selftests/sgx/Makefile2
-rw-r--r--tools/testing/selftests/vm/Makefile12
-rw-r--r--tools/testing/selftests/x86/Makefile6
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh2
-rw-r--r--tools/virtio/virtio_test.c1
49 files changed, 807 insertions, 258 deletions
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index d40d92bbf0e4..07fa502a4ac1 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -870,7 +870,6 @@ static int do_skeleton(int argc, char **argv)
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
goto err; \n\
- obj->skeleton = s; \n\
\n\
s->sz = sizeof(*s); \n\
s->name = \"%1$s\"; \n\
@@ -955,6 +954,7 @@ static int do_skeleton(int argc, char **argv)
\n\
\"; \n\
\n\
+ obj->skeleton = s; \n\
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 407071d54ab1..72ef9ddae260 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1042,11 +1042,9 @@ static void print_key_value(struct bpf_map_info *info, void *key,
json_writer_t *btf_wtr;
struct btf *btf;
- btf = btf__load_from_kernel_by_id(info->btf_id);
- if (libbpf_get_error(btf)) {
- p_err("failed to get btf");
+ btf = get_map_kv_btf(info);
+ if (libbpf_get_error(btf))
return;
- }
if (json_output) {
print_entry_json(info, key, value, btf);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 791f31dd0abe..e2c8f946c541 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2276,8 +2276,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if current task belongs to the cgroup2.
- * * 1, if current task does not belong to the cgroup2.
+ * * 1, if current task belongs to the cgroup2.
+ * * 0, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 8c9325802793..841cc68e3f42 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1481,6 +1481,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
if (s->name_resolved)
return *cached_name ? *cached_name : orig_name;
+ if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) {
+ s->name_resolved = 1;
+ return orig_name;
+ }
+
dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
if (dup_cnt > 1) {
const size_t max_len = 256;
@@ -1829,14 +1834,16 @@ static int btf_dump_array_data(struct btf_dump *d,
{
const struct btf_array *array = btf_array(t);
const struct btf_type *elem_type;
- __u32 i, elem_size = 0, elem_type_id;
+ __u32 i, elem_type_id;
+ __s64 elem_size;
bool is_array_member;
elem_type_id = array->type;
elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
elem_size = btf__resolve_size(d->btf, elem_type_id);
if (elem_size <= 0) {
- pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+ pr_warn("unexpected elem size %zd for array type [%u]\n",
+ (ssize_t)elem_size, id);
return -EINVAL;
}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0ad29203cbfb..693e14799fb9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10809,6 +10809,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
{
+ if (!s)
+ return;
+
if (s->progs)
bpf_object__detach_skeleton(s);
if (s->obj)
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 39f25e09b51e..fadde7d80a51 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -87,29 +87,75 @@ enum {
NL_DONE,
};
+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(sock, mhdr, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (len < 0)
+ return -errno;
+ return len;
+}
+
+static int alloc_iov(struct iovec *iov, int len)
+{
+ void *nbuf;
+
+ nbuf = realloc(iov->iov_base, len);
+ if (!nbuf)
+ return -ENOMEM;
+
+ iov->iov_base = nbuf;
+ iov->iov_len = len;
+ return 0;
+}
+
static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
__dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
void *cookie)
{
+ struct iovec iov = {};
+ struct msghdr mhdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
bool multipart = true;
struct nlmsgerr *err;
struct nlmsghdr *nh;
- char buf[4096];
int len, ret;
+ ret = alloc_iov(&iov, 4096);
+ if (ret)
+ goto done;
+
while (multipart) {
start:
multipart = false;
- len = recv(sock, buf, sizeof(buf), 0);
+ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len > iov.iov_len) {
+ ret = alloc_iov(&iov, len);
+ if (ret)
+ goto done;
+ }
+
+ len = netlink_recvmsg(sock, &mhdr, 0);
if (len < 0) {
- ret = -errno;
+ ret = len;
goto done;
}
if (len == 0)
break;
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != nl_pid) {
ret = -LIBBPF_ERRNO__WRNGPID;
@@ -130,7 +176,8 @@ start:
libbpf_nla_dump_errormsg(nh);
goto done;
case NLMSG_DONE:
- return 0;
+ ret = 0;
+ goto done;
default:
break;
}
@@ -142,15 +189,17 @@ start:
case NL_NEXT:
goto start;
case NL_DONE:
- return 0;
+ ret = 0;
+ goto done;
default:
- return ret;
+ goto done;
}
}
}
}
ret = 0;
done:
+ free(iov.iov_base);
return ret;
}
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e9b619aa0cdf..a27b3141463a 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -1210,12 +1210,23 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
int xsk_umem__delete(struct xsk_umem *umem)
{
+ struct xdp_mmap_offsets off;
+ int err;
+
if (!umem)
return 0;
if (umem->refcount)
return -EBUSY;
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err && umem->fill_save && umem->comp_save) {
+ munmap(umem->fill_save->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp_save->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
close(umem->fd);
free(umem);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f0ecfda34ece..1a194edb5452 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -956,10 +956,10 @@ try_again_reset:
* Enable counters and exec the command:
*/
if (forks) {
- evlist__start_workload(evsel_list);
err = enable_counters();
if (err)
return -1;
+ evlist__start_workload(evsel_list);
t0 = rdclock();
clock_gettime(CLOCK_MONOTONIC, &ref_time);
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
index 9ff67206ade4..821d2f2a8f25 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -315,6 +315,19 @@
"UMask": "0x82"
},
{
+ "BriefDescription": "All retired memory instructions.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.ANY",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x83"
+ },
+ {
"BriefDescription": "Retired load instructions with locked access.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
@@ -358,6 +371,7 @@
"EventCode": "0xD0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
"PEBS": "1",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).",
"SampleAfterValue": "100003",
"UMask": "0x11"
},
@@ -370,6 +384,7 @@
"EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
"L1_Hit_Indication": "1",
"PEBS": "1",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).",
"SampleAfterValue": "100003",
"UMask": "0x12"
},
@@ -733,7 +748,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010491",
+ "MSRValue": "0x10491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -772,7 +787,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0491",
+ "MSRValue": "0x4003C0491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -785,7 +800,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0491",
+ "MSRValue": "0x1003C0491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -798,7 +813,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0491",
+ "MSRValue": "0x8003C0491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -811,7 +826,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010490",
+ "MSRValue": "0x10490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -850,7 +865,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0490",
+ "MSRValue": "0x4003C0490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -863,7 +878,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0490",
+ "MSRValue": "0x1003C0490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -876,7 +891,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0490",
+ "MSRValue": "0x8003C0490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -889,7 +904,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010120",
+ "MSRValue": "0x10120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -928,7 +943,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0120",
+ "MSRValue": "0x4003C0120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -941,7 +956,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0120",
+ "MSRValue": "0x1003C0120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -954,7 +969,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0120",
+ "MSRValue": "0x8003C0120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -967,7 +982,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010122",
+ "MSRValue": "0x10122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1006,7 +1021,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0122",
+ "MSRValue": "0x4003C0122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1019,7 +1034,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0122",
+ "MSRValue": "0x1003C0122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1032,7 +1047,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0122",
+ "MSRValue": "0x8003C0122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1045,7 +1060,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010004",
+ "MSRValue": "0x10004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1084,7 +1099,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0004",
+ "MSRValue": "0x4003C0004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1097,7 +1112,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0004",
+ "MSRValue": "0x1003C0004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1110,7 +1125,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0004",
+ "MSRValue": "0x8003C0004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1123,7 +1138,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010001",
+ "MSRValue": "0x10001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1162,7 +1177,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0001",
+ "MSRValue": "0x4003C0001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1175,7 +1190,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0001",
+ "MSRValue": "0x1003C0001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1188,7 +1203,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0001",
+ "MSRValue": "0x8003C0001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1201,7 +1216,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010002",
+ "MSRValue": "0x10002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1240,7 +1255,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0002",
+ "MSRValue": "0x4003C0002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1253,7 +1268,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0002",
+ "MSRValue": "0x1003C0002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1266,7 +1281,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0002",
+ "MSRValue": "0x8003C0002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1279,7 +1294,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010400",
+ "MSRValue": "0x10400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1318,7 +1333,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0400",
+ "MSRValue": "0x4003C0400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1331,7 +1346,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0400",
+ "MSRValue": "0x1003C0400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1344,7 +1359,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0400",
+ "MSRValue": "0x8003C0400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1357,7 +1372,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010010",
+ "MSRValue": "0x10010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1396,7 +1411,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0010",
+ "MSRValue": "0x4003C0010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1409,7 +1424,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0010",
+ "MSRValue": "0x1003C0010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1422,7 +1437,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0010",
+ "MSRValue": "0x8003C0010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1435,7 +1450,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010020",
+ "MSRValue": "0x10020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1474,7 +1489,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0020",
+ "MSRValue": "0x4003C0020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1487,7 +1502,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0020",
+ "MSRValue": "0x1003C0020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1500,7 +1515,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0020",
+ "MSRValue": "0x8003C0020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1513,7 +1528,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010080",
+ "MSRValue": "0x10080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1552,7 +1567,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0080",
+ "MSRValue": "0x4003C0080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1565,7 +1580,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0080",
+ "MSRValue": "0x1003C0080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1578,7 +1593,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0080",
+ "MSRValue": "0x8003C0080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1591,7 +1606,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010100",
+ "MSRValue": "0x10100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1630,7 +1645,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0100",
+ "MSRValue": "0x4003C0100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1643,7 +1658,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0100",
+ "MSRValue": "0x1003C0100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1656,7 +1671,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0100",
+ "MSRValue": "0x8003C0100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
index 503737ed3a83..9e873ab22450 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -1,73 +1,81 @@
[
{
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "PublicDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
{
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instruction retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "PublicDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
{
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "PublicDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
{
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "PublicDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
{
- "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
{
- "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x80"
},
{
- "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "PublicDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
{
- "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "PublicDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index 078706a50091..ecce4273ae52 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -30,7 +30,21 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
+ "BriefDescription": "Retired Instructions who experienced DSB miss.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x1",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xC6",
@@ -38,7 +52,7 @@
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
"PEBS": "1",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
"SampleAfterValue": "100007",
"TakenAlone": "1",
"UMask": "0x1"
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
index 6f29b02fa320..60c286b4fe54 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -299,7 +299,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00491",
+ "MSRValue": "0x83FC00491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -312,7 +312,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00491",
+ "MSRValue": "0x63FC00491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -325,7 +325,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000491",
+ "MSRValue": "0x604000491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -338,7 +338,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800491",
+ "MSRValue": "0x63B800491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -377,7 +377,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00490",
+ "MSRValue": "0x83FC00490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -390,7 +390,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00490",
+ "MSRValue": "0x63FC00490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -403,7 +403,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000490",
+ "MSRValue": "0x604000490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -416,7 +416,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800490",
+ "MSRValue": "0x63B800490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -455,7 +455,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00120",
+ "MSRValue": "0x83FC00120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -468,7 +468,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00120",
+ "MSRValue": "0x63FC00120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -481,7 +481,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000120",
+ "MSRValue": "0x604000120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -494,7 +494,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800120",
+ "MSRValue": "0x63B800120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -533,7 +533,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00122",
+ "MSRValue": "0x83FC00122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -546,7 +546,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00122",
+ "MSRValue": "0x63FC00122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -559,7 +559,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000122",
+ "MSRValue": "0x604000122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -572,7 +572,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800122",
+ "MSRValue": "0x63B800122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -611,7 +611,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00004",
+ "MSRValue": "0x83FC00004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -624,7 +624,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00004",
+ "MSRValue": "0x63FC00004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -637,7 +637,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000004",
+ "MSRValue": "0x604000004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -650,7 +650,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800004",
+ "MSRValue": "0x63B800004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -689,7 +689,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00001",
+ "MSRValue": "0x83FC00001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -702,7 +702,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00001",
+ "MSRValue": "0x63FC00001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -715,7 +715,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000001",
+ "MSRValue": "0x604000001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -728,7 +728,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800001",
+ "MSRValue": "0x63B800001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -767,7 +767,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00002",
+ "MSRValue": "0x83FC00002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -780,7 +780,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00002",
+ "MSRValue": "0x63FC00002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -793,7 +793,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000002",
+ "MSRValue": "0x604000002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -806,7 +806,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800002",
+ "MSRValue": "0x63B800002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -845,7 +845,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00400",
+ "MSRValue": "0x83FC00400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -858,7 +858,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00400",
+ "MSRValue": "0x63FC00400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -871,7 +871,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000400",
+ "MSRValue": "0x604000400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -884,7 +884,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800400",
+ "MSRValue": "0x63B800400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -923,7 +923,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00010",
+ "MSRValue": "0x83FC00010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -936,7 +936,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00010",
+ "MSRValue": "0x63FC00010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -949,7 +949,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000010",
+ "MSRValue": "0x604000010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -962,7 +962,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800010",
+ "MSRValue": "0x63B800010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1001,7 +1001,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00020",
+ "MSRValue": "0x83FC00020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1014,7 +1014,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00020",
+ "MSRValue": "0x63FC00020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1027,7 +1027,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000020",
+ "MSRValue": "0x604000020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1040,7 +1040,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800020",
+ "MSRValue": "0x63B800020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1079,7 +1079,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00080",
+ "MSRValue": "0x83FC00080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1092,7 +1092,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00080",
+ "MSRValue": "0x63FC00080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1105,7 +1105,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000080",
+ "MSRValue": "0x604000080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1118,7 +1118,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800080",
+ "MSRValue": "0x63B800080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1157,7 +1157,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00100",
+ "MSRValue": "0x83FC00100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1170,7 +1170,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00100",
+ "MSRValue": "0x63FC00100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1183,7 +1183,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000100",
+ "MSRValue": "0x604000100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1196,7 +1196,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800100",
+ "MSRValue": "0x63B800100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index ca5748120666..12eabae3e224 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -436,6 +436,17 @@
"SampleAfterValue": "2000003"
},
{
+ "BriefDescription": "Number of all retired NOP instructions.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.NOP",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
"BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
"Counter": "1",
"CounterHTOff": "1",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 863c9e103969..b016f7d1ff3d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -1,26 +1,167 @@
[
{
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Frontend_Bound",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Frontend_Bound_SMT",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Bad_Speculation",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Bad_Speculation_SMT",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
+ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Backend_Bound",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Backend_Bound_SMT",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Retiring",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. "
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Retiring_SMT",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) )",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "Mispredictions"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts_SMT",
+ "MetricName": "Mispredictions_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (OFFCORE_REQUESTS_BUFFER.SQ_FULL / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "Memory_Bandwidth"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
+ "MetricGroup": "Mem;MemoryBW;Offcore_SMT",
+ "MetricName": "Memory_Bandwidth_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) )",
+ "MetricGroup": "Mem;MemoryLat;Offcore",
+ "MetricName": "Memory_Latency"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) )",
+ "MetricGroup": "Mem;MemoryLat;Offcore_SMT",
+ "MetricName": "Memory_Latency_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / CPU_CLK_UNHALTED.THREAD) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "Memory_Data_TLBs"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
+ "MetricGroup": "Mem;MemoryTLB;_SMT",
+ "MetricName": "Memory_Data_TLBs_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * CPU_CLK_UNHALTED.THREAD))",
+ "MetricGroup": "Ret",
+ "MetricName": "Branching_Overhead"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricGroup": "Ret_SMT",
+ "MetricName": "Branching_Overhead_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))",
+ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB",
+ "MetricName": "Big_Code"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB_SMT",
+ "MetricName": "Big_Code_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "Instruction_Fetch_BW"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
+ "MetricGroup": "Fed;FetchBW;Frontend_SMT",
+ "MetricName": "Instruction_Fetch_BW_SMT"
+ },
+ {
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Summary",
+ "MetricGroup": "Ret;Summary",
"MetricName": "IPC"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
- "MetricGroup": "Pipeline;Retire",
+ "MetricGroup": "Pipeline;Ret;Retire",
"MetricName": "UPI"
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;FetchBW;PGO",
- "MetricName": "IpTB"
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;FetchBW",
+ "MetricName": "UpTB"
},
{
"BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
- "MetricGroup": "Pipeline",
+ "MetricGroup": "Pipeline;Mem",
"MetricName": "CPI"
},
{
@@ -30,39 +171,84 @@
"MetricName": "CLKS"
},
{
- "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TmaL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "TmaL1_SMT",
+ "MetricName": "SLOTS_SMT"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "Execute_per_Issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "SMT;TmaL1",
+ "MetricGroup": "Ret;SMT;TmaL1",
"MetricName": "CoreIPC"
},
{
- "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
- "MetricGroup": "SMT;TmaL1",
+ "MetricGroup": "Ret;SMT;TmaL1_SMT",
"MetricName": "CoreIPC_SMT"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
"MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Flops",
+ "MetricGroup": "Ret;Flops",
"MetricName": "FLOPc"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
"MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
- "MetricGroup": "Flops_SMT",
+ "MetricGroup": "Ret;Flops_SMT",
"MetricName": "FLOPc_SMT"
},
{
+ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)",
+ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "FP_Arith_Utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting."
+ },
+ {
+ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
+ "MetricGroup": "Cor;Flops;HPC_SMT",
+ "MetricName": "FP_Arith_Utilization_SMT",
+ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Pipeline;PortsUtil",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "ILP"
},
{
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "Branch_Misprediction_Cost"
+ },
+ {
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts_SMT",
+ "MetricName": "Branch_Misprediction_Cost_SMT"
+ },
+ {
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "BrMispredicts",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "IpMispredict"
},
{
@@ -86,122 +272,249 @@
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Branches;InsType",
+ "MetricGroup": "Branches;Fed;InsType",
"MetricName": "IpBranch"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
- "MetricGroup": "Branches",
+ "MetricGroup": "Branches;Fed;PGO",
"MetricName": "IpCall"
},
{
+ "BriefDescription": "Instruction per taken branch",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO",
+ "MetricName": "IpTB"
+ },
+ {
"BriefDescription": "Branch instructions per taken branch. ",
"MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;PGO",
+ "MetricGroup": "Branches;Fed;PGO",
"MetricName": "BpTkBranch"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
- "MetricGroup": "Flops;FpArith;InsType",
+ "MetricGroup": "Flops;InsType",
"MetricName": "IpFLOP"
},
{
+ "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) )",
+ "MetricGroup": "Flops;InsType",
+ "MetricName": "IpArith",
+ "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "IpArith_Scalar_SP",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "IpArith_Scalar_DP",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "IpArith_AVX128",
+ "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "IpArith_AVX256",
+ "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "IpArith_AVX512",
+ "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
"BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1",
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Average number of Uops issued by front-end when it issued something",
+ "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "Fetch_UpC"
+ },
+ {
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
- "MetricGroup": "DSB;FetchBW",
+ "MetricGroup": "DSB;Fed;FetchBW",
"MetricName": "DSB_Coverage"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
+ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / CPU_CLK_UNHALTED.THREAD / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "DSB_Misses_Cost"
+ },
+ {
+ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
+ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
+ "MetricGroup": "DSBmiss;Fed_SMT",
+ "MetricName": "DSB_Misses_Cost_SMT"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative DSB miss",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "IpDSB_Miss_Ret"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are non-taken conditionals",
+ "MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "Cond_NT"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are taken conditionals",
+ "MetricExpr": "( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "Cond_TK"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "CallRet"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "Jump"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
- "MetricGroup": "MemoryBound;MemoryLat",
- "MetricName": "Load_Miss_Real_Latency"
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "Load_Miss_Real_Latency",
+ "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
"MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "MemoryBound;MemoryBW",
+ "MetricGroup": "Mem;MemoryBound;MemoryBW",
"MetricName": "MLP"
},
{
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricConstraint": "NO_NMI_WATCHDOG",
- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )",
- "MetricGroup": "MemoryTLB",
- "MetricName": "Page_Walks_Utilization"
- },
- {
"BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW",
+ "MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW"
},
{
"BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW",
+ "MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW",
+ "MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW;Offcore",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
"MetricName": "L3_Cache_Access_BW"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;CacheMisses",
"MetricName": "L1MPKI"
},
{
+ "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L1MPKI_Load"
+ },
+ {
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;Backend;CacheMisses",
"MetricName": "L2MPKI"
},
{
"BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Offcore",
+ "MetricGroup": "Mem;CacheMisses;Offcore",
"MetricName": "L2MPKI_All"
},
{
+ "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L2MPKI_Load"
+ },
+ {
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;CacheMisses",
"MetricName": "L2HPKI_All"
},
{
+ "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L2HPKI_Load"
+ },
+ {
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;CacheMisses",
"MetricName": "L3MPKI"
},
{
+ "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "FB_HPKI"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
+ "MetricGroup": "Mem;MemoryTLB_SMT",
+ "MetricName": "Page_Walks_Utilization_SMT"
+ },
+ {
"BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
"MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Server",
+ "MetricGroup": "L2Evicts;Mem;Server",
"MetricName": "L2_Evictions_Silent_PKI"
},
{
"BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
"MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Server",
+ "MetricGroup": "L2Evicts;Mem;Server",
"MetricName": "L2_Evictions_NonSilent_PKI"
},
{
@@ -219,7 +532,7 @@
{
"BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
- "MetricGroup": "Flops;HPC",
+ "MetricGroup": "Cor;Flops;HPC",
"MetricName": "GFLOPs"
},
{
@@ -229,6 +542,48 @@
"MetricName": "Turbo_Utilization"
},
{
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Power",
+ "MetricName": "Power_License0_Utilization",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "Power_SMT",
+ "MetricName": "Power_License0_Utilization_SMT",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Power",
+ "MetricName": "Power_License1_Utilization",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "Power_SMT",
+ "MetricName": "Power_License1_Utilization_SMT",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Power",
+ "MetricName": "Power_License2_Utilization",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "Power_SMT",
+ "MetricName": "Power_License2_Utilization_SMT",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
"MetricGroup": "SMT",
@@ -241,33 +596,45 @@
"MetricName": "Kernel_Utilization"
},
{
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "Kernel_CPI"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
- "MetricGroup": "HPC;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC",
"MetricName": "DRAM_BW_Use"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
"MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
- "MetricGroup": "MemoryLat;SoC",
+ "MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "MEM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
"MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
- "MetricGroup": "MemoryBW;SoC",
+ "MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "MEM_Parallel_Reads"
},
{
+ "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
+ "MetricExpr": "1000000000 * ( UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS ) / imc_0@event\\=0x0@",
+ "MetricGroup": "Mem;MemoryLat;SoC;Server",
+ "MetricName": "MEM_DRAM_Read_Latency"
+ },
+ {
"BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
- "MetricGroup": "IoBW;SoC;Server",
+ "MetricGroup": "IoBW;Mem;SoC;Server",
"MetricName": "IO_Write_BW"
},
{
"BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
"MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
- "MetricGroup": "IoBW;SoC;Server",
+ "MetricGroup": "IoBW;Mem;SoC;Server",
"MetricName": "IO_Read_BW"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
index 6ed92bc5c129..06c5ca26ca3f 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
@@ -538,6 +538,18 @@
"Unit": "IIO"
},
{
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS",
+ "FCMask": "0x4",
+ "PerPkg": "1",
+ "PortMask": "0x0f",
+ "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
"Counter": "0,1,2,3",
"EventCode": "0xC2",
@@ -586,6 +598,17 @@
"Unit": "IIO"
},
{
+ "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3",
+ "Counter": "2,3",
+ "EventCode": "0xD5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3",
+ "UMask": "0x0f",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0",
"Counter": "2,3",
"EventCode": "0xD5",
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index d0f06e40c16d..eac71fbb24ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -1,13 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/capability.h>
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
static int duration;
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ return 0;
+}
+
void try_bind(int family, int port, int expected_errno)
{
struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
struct bind_perm *skel;
int cgroup_fd;
+ if (create_netns())
+ return;
+
cgroup_fd = test__join_cgroup("/bind_perm");
if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
return;
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 1cfeb940cf9f..5f0e0bfc151e 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -23,7 +23,7 @@ struct {
__uint(value_size, sizeof(__u32));
} mim_hash SEC(".maps");
-SEC("xdp_mimtest")
+SEC("xdp")
int xdp_mimtest0(struct xdp_md *ctx)
{
int value = 123;
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 81b57b9aaaea..7967348b11af 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -113,7 +113,7 @@ static void tpcpy(struct bpf_tcp_sock *dst,
#define RET_LOG() ({ \
linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
+ bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \
return CG_OK; \
})
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index 47cbe2eeae43..fac7ef99f9a6 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -156,7 +156,7 @@ int check_syncookie_clsact(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("xdp/check_syncookie")
+SEC("xdp")
int check_syncookie_xdp(struct xdp_md *ctx)
{
check_syncookie(ctx, (void *)(long)ctx->data,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 31f9bce37491..e6aa2fc6ce6b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -210,7 +210,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 3d66599eee2e..199c61b7d062 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -2,7 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_adjust_tail_grow")
+SEC("xdp")
int _xdp_adjust_tail_grow(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index 22065a9cfb25..b7448253d135 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -9,9 +9,7 @@
#include <linux/if_ether.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
-SEC("xdp_adjust_tail_shrink")
+SEC("xdp")
int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
index b360ba2bd441..807bf895f42c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dm_log")
+SEC("xdp")
int xdpdm_devlog(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
index eb93ea95d1d8..ee7d6ac0f615 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -5,7 +5,7 @@
char LICENSE[] SEC("license") = "GPL";
-SEC("xdp/handler")
+SEC("xdp")
int xdp_handler(struct xdp_md *xdp)
{
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index fcabcda30ba3..27eb52dda92c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -206,7 +206,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 3a67921f62b5..596c4e71bf3a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -797,7 +797,7 @@ out:
return XDP_DROP;
}
-SEC("xdp-test-v4")
+SEC("xdp")
int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
@@ -816,7 +816,7 @@ int balancer_ingress_v4(struct xdp_md *ctx)
return XDP_DROP;
}
-SEC("xdp-test-v6")
+SEC("xdp")
int balancer_ingress_v6(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 59ee4f182ff8..532025057711 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -12,13 +12,13 @@ struct {
__uint(max_entries, 4);
} cpu_map SEC(".maps");
-SEC("xdp_redir")
+SEC("xdp")
int xdp_redir_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&cpu_map, 1, 0);
}
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 0ac086497722..1e6b9c38ea6d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -9,7 +9,7 @@ struct {
__uint(max_entries, 4);
} dm_ports SEC(".maps");
-SEC("xdp_redir")
+SEC("xdp")
int xdp_redir_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&dm_ports, 1, 0);
@@ -18,7 +18,7 @@ int xdp_redir_prog(struct xdp_md *ctx)
/* invalid program on DEVMAP entry;
* SEC name means expected attach type not set
*/
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c
index ea25e8881992..d988b2e0cee8 100644
--- a/tools/testing/selftests/bpf/progs/xdp_dummy.c
+++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c
@@ -4,7 +4,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 880debcbcd65..8395782b6e0a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -34,7 +34,7 @@ struct {
__uint(max_entries, 128);
} mac_map SEC(".maps");
-SEC("xdp_redirect_map_multi")
+SEC("xdp")
int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
@@ -63,7 +63,7 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
}
/* The following 2 progs are for 2nd devmap prog testing */
-SEC("xdp_redirect_map_ingress")
+SEC("xdp")
int xdp_redirect_map_all_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&map_egress, 0,
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 6b9ca40bd1f4..4ad73847b8a5 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -86,7 +86,7 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type)
return XDP_TX;
}
-SEC("xdpclient")
+SEC("xdp")
int xdping_client(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
@@ -150,7 +150,7 @@ int xdping_client(struct xdp_md *ctx)
return XDP_TX;
}
-SEC("xdpserver")
+SEC("xdp")
int xdping_server(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
index ec4e15948e40..5252b91f48a1 100755
--- a/tools/testing/selftests/bpf/test_lirc_mode2.sh
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -3,6 +3,7 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+ret=$ksft_skip
msg="skip all tests:"
if [ $UID != 0 ]; then
@@ -25,7 +26,7 @@ do
fi
done
-if [ -n $LIRCDEV ];
+if [ -n "$LIRCDEV" ];
then
TYPE=lirc_mode2
./test_lirc_mode2_user $LIRCDEV $INPUTDEV
@@ -36,3 +37,5 @@ then
echo -e ${GREEN}"PASS: $TYPE"${NC}
fi
fi
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index b497bb85b667..6c69c42b1d60 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -120,6 +120,14 @@ setup()
ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ # disable IPv6 DAD because it sometimes takes too long and fails tests
+ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
@@ -289,7 +297,7 @@ test_ping()
ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
RET=$?
elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
+ ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
RET=$?
else
echo " test_ping: unknown PROTO: ${PROTO}"
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 9b3617d770a5..fed765157c53 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -77,7 +77,7 @@ TEST_IF=lo
MAX_PING_TRIES=5
BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
CLSACT_SECTION="clsact/check_syncookie"
-XDP_SECTION="xdp/check_syncookie"
+XDP_SECTION="xdp"
BPF_PROG_ID=0
PROG="${DIR}/test_tcp_check_syncookie_user"
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c033850886f4..57c8db9972a6 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -52,8 +52,8 @@ test_xdp_redirect()
return 0
fi
- ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
- ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+ ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
index bedff7aa7023..cc57cb87e65f 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress"
PASS=0
FAIL=0
LOG_DIR=$(mktemp -d)
+declare -a NS
+NS[0]="ns0-$(mktemp -u XXXXXX)"
+NS[1]="ns1-$(mktemp -u XXXXXX)"
+NS[2]="ns2-$(mktemp -u XXXXXX)"
+NS[3]="ns3-$(mktemp -u XXXXXX)"
test_pass()
{
@@ -47,11 +52,9 @@ test_fail()
clean_up()
{
- for i in $(seq $NUM); do
- ip link del veth$i 2> /dev/null
- ip netns del ns$i 2> /dev/null
+ for i in $(seq 0 $NUM); do
+ ip netns del ${NS[$i]} 2> /dev/null
done
- ip netns del ns0 2> /dev/null
}
# Kselftest framework requirement - SKIP code is 4.
@@ -79,23 +82,22 @@ setup_ns()
mode="xdpdrv"
fi
- ip netns add ns0
+ ip netns add ${NS[0]}
for i in $(seq $NUM); do
- ip netns add ns$i
- ip -n ns$i link add veth0 index 2 type veth \
- peer name veth$i netns ns0 index $((1 + $i))
- ip -n ns0 link set veth$i up
- ip -n ns$i link set veth0 up
-
- ip -n ns$i addr add 192.0.2.$i/24 dev veth0
- ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+ ip netns add ${NS[$i]}
+ ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
+ ip -n ${NS[$i]} link set veth0 up
+ ip -n ${NS[0]} link set veth$i up
+
+ ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
+ ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
# Add a neigh entry for IPv4 ping test
- ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
- ip -n ns$i link set veth0 $mode obj \
- xdp_dummy.o sec xdp_dummy &> /dev/null || \
+ ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+ ip -n ${NS[$i]} link set veth0 $mode obj \
+ xdp_dummy.o sec xdp &> /dev/null || \
{ test_fail "Unable to load dummy xdp" && exit 1; }
IFACES="$IFACES veth$i"
- veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}')
+ veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
done
}
@@ -104,10 +106,10 @@ do_egress_tests()
local mode=$1
# mac test
- ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
- ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
+ ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
sleep 0.5
- ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
sleep 0.5
pkill tcpdump
@@ -123,18 +125,18 @@ do_ping_tests()
local mode=$1
# ping6 test: echo request should be redirect back to itself, not others
- ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+ ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
- ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
- ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
- ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
+ ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
+ ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
sleep 0.5
# ARP test
- ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254
+ ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
# IPv4 test
- ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
# IPv6 test
- ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
sleep 0.5
pkill tcpdump
@@ -180,7 +182,7 @@ do_tests()
xdpgeneric) drv_p="-S";;
esac
- ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
+ ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
xdp_pid=$!
sleep 1
if ! ps -p $xdp_pid > /dev/null; then
@@ -197,10 +199,10 @@ do_tests()
kill $xdp_pid
}
-trap clean_up EXIT
-
check_env
+trap clean_up EXIT
+
for mode in ${DRV_MODE}; do
setup_ns $mode
do_tests $mode
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index 995278e684b6..a3a1eaee26ea 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -107,9 +107,9 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp
ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp
trap cleanup EXIT
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 842d9155d36c..79a3453dab25 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -178,9 +178,8 @@ int main(int argc, char **argv)
return 1;
}
- main_prog = bpf_object__find_program_by_title(obj,
- server ? "xdpserver" :
- "xdpclient");
+ main_prog = bpf_object__find_program_by_name(obj,
+ server ? "xdping_server" : "xdping_client");
if (main_prog)
prog_fd = bpf_program__fd(main_prog);
if (!main_prog || prog_fd < 0) {
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 38edea25631b..b642411ceb6c 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -6,5 +6,6 @@ CONFIG_HARDENED_USERCOPY=y
# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN=y
CONFIG_UBSAN_BOUNDS=y
CONFIG_UBSAN_TRAP=y
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 3dece8b29253..b57e91e1c3f2 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -218,10 +218,10 @@ main(int argc, char **argv)
/* Test 1:
* veriyf that SIGURG is
- * delivered and 63 bytes are
- * read and oob is '@'
+ * delivered, 63 bytes are
+ * read, oob is '@', and POLLPRI works.
*/
- wait_for_data(pfd, POLLIN | POLLPRI);
+ wait_for_data(pfd, POLLPRI);
read_oob(pfd, &oob);
len = read_data(pfd, buf, 1024);
if (!signal_recvd || len != 63 || oob != '@') {
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 559173a8e387..d75fa97609c1 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -445,6 +445,8 @@ do_transfer()
local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -537,6 +539,23 @@ do_transfer()
fi
fi
+ if $checksum; then
+ local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+ local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
+ if [ $csum_err_s_nr -gt 0 ]; then
+ printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
+ rets=1
+ fi
+
+ local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
+ if [ $csum_err_c_nr -gt 0 ]; then
+ printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
+ retc=1
+ fi
+ fi
+
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
printf "[ OK ]"
fi
diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index 534c8b7699ab..6fadc8e2f116 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -118,11 +118,11 @@ echo "[ OK ]"
# Move the underlay to a non-default VRF
ip -netns hv-1 link set veth0 vrf vrf-underlay
-ip -netns hv-1 link set veth0 down
-ip -netns hv-1 link set veth0 up
+ip -netns hv-1 link set vxlan0 down
+ip -netns hv-1 link set vxlan0 up
ip -netns hv-2 link set veth0 vrf vrf-underlay
-ip -netns hv-2 link set veth0 down
-ip -netns hv-2 link set veth0 up
+ip -netns hv-2 link set vxlan0 down
+ip -netns hv-2 link set vxlan0 up
echo -n "Check VM connectivity through VXLAN (underlay in a VRF) "
ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aee631c5284e..044bc0e9ed81 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -325,8 +325,8 @@ int main(int argc, char **argv)
struct ifreq device;
struct ifreq hwtstamp;
struct hwtstamp_config hwconfig, hwconfig_requested;
- struct so_timestamping so_timestamping_get = { 0, -1 };
- struct so_timestamping so_timestamping = { 0, -1 };
+ struct so_timestamping so_timestamping_get = { 0, 0 };
+ struct so_timestamping so_timestamping = { 0, 0 };
struct sockaddr_in addr;
struct ip_mreq imr;
struct in_addr iaddr;
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 363f56081eff..66f0f724a1a6 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -71,8 +71,8 @@ usage () {
echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
- echo " --doall"
- echo " --doallmodconfig / --do-no-allmodconfig"
+ echo " --do-all"
+ echo " --do-allmodconfig / --do-no-allmodconfig"
echo " --do-clocksourcewd / --do-no-clocksourcewd"
echo " --do-kasan / --do-no-kasan"
echo " --do-kcsan / --do-no-kcsan"
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 7f12d55b97f8..472b27ccd7dc 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
.PHONY: all clean
-CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
../x86/trivial_64bit_program.c)
ifndef OBJCOPY
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index d9605bd10f2d..a7fde142e814 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
+LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
+
include local_config.mk
uname_M := $(shell uname -m 2>/dev/null || echo not)
@@ -48,9 +50,9 @@ TEST_GEN_FILES += split_huge_page_test
TEST_GEN_FILES += ksm_tests
ifeq ($(MACHINE),x86_64)
-CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
TARGETS := protection_keys
BINARIES_32 := $(TARGETS:%=%_32)
@@ -139,10 +141,6 @@ endif
$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
-$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
-
-$(OUTPUT)/hmm-tests: local_config.h
-
# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b4142cd1c5c2..02a77056bca3 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,9 +6,9 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf54..8c669c0d662e 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
TESTPROG="$2"
shift 2
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
echo 1
else
echo 0
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index cb3f29c09aff..23f142af544a 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
memset(dev, 0, sizeof *dev);
dev->vdev.features = features;
INIT_LIST_HEAD(&dev->vdev.vqs);
+ spin_lock_init(&dev->vdev.vqs_list_lock);
dev->buf_size = 1024;
dev->buf = malloc(dev->buf_size);
assert(dev->buf);