1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-05 09:14:03 +00:00

- Support for uncore counting events: one fixed PMC with the uncore

domain clock, 8 programmable PMC.
- Westmere based CPU (Xeon 5600, Corei7 980X) support.
- New man pages with events list for core and uncore.
- Updated Corei7 events with Intel 253669-033US December 2009 doc.
  There is some removed events in the documentation, they have been
  kept in the code but documented in the man page as obsolete.
- Offcore response events can be setup with rsp token.

Sponsored by: NETASQ
This commit is contained in:
Fabien Thomas 2010-04-02 13:23:49 +00:00
parent 93ce19df3a
commit 1fa7f10bac
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=206089
20 changed files with 8651 additions and 977 deletions

View File

@ -27,11 +27,16 @@ MAN+= pmc.atom.3
MAN+= pmc.core.3
MAN+= pmc.core2.3
MAN+= pmc.iaf.3
MAN+= pmc.ucf.3
MAN+= pmc.k7.3
MAN+= pmc.k8.3
MAN+= pmc.p4.3
MAN+= pmc.p5.3
MAN+= pmc.p6.3
MAN+= pmc.corei7.3
MAN+= pmc.corei7uc.3
MAN+= pmc.westmere.3
MAN+= pmc.westmereuc.3
MAN+= pmc.tsc.3
.elif ${MACHINE_ARCH} == "arm" && ${CPUTYPE} == "xscale"
MAN+= pmc.xscale.3

View File

@ -54,6 +54,10 @@ static int iaf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
static int iap_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
static int ucf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
static int ucp_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
@ -144,6 +148,7 @@ PMC_CLASSDEP_TABLE(p5, P5);
PMC_CLASSDEP_TABLE(p6, P6);
PMC_CLASSDEP_TABLE(xscale, XSCALE);
PMC_CLASSDEP_TABLE(mips24k, MIPS24K);
PMC_CLASSDEP_TABLE(ucf, UCF);
#undef __PMC_EV_ALIAS
#define __PMC_EV_ALIAS(N,CODE) { N, PMC_EV_##CODE },
@ -169,6 +174,21 @@ static const struct pmc_event_descr corei7_event_table[] =
__PMC_EV_ALIAS_COREI7()
};
static const struct pmc_event_descr westmere_event_table[] =
{
__PMC_EV_ALIAS_WESTMERE()
};
static const struct pmc_event_descr corei7uc_event_table[] =
{
__PMC_EV_ALIAS_COREI7UC()
};
static const struct pmc_event_descr westmereuc_event_table[] =
{
__PMC_EV_ALIAS_WESTMEREUC()
};
/*
* PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...)
*
@ -182,7 +202,8 @@ static const struct pmc_event_descr corei7_event_table[] =
PMC_MDEP_TABLE(atom, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(core, IAP, PMC_CLASS_TSC);
PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
PMC_MDEP_TABLE(k7, K7, PMC_CLASS_TSC);
PMC_MDEP_TABLE(k8, K8, PMC_CLASS_TSC);
PMC_MDEP_TABLE(p4, P4, PMC_CLASS_TSC);
@ -215,6 +236,10 @@ PMC_CLASS_TABLE_DESC(atom, IAP, atom, iap);
PMC_CLASS_TABLE_DESC(core, IAP, core, iap);
PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap);
PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap);
PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap);
PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf);
PMC_CLASS_TABLE_DESC(corei7uc, UCP, corei7uc, ucp);
PMC_CLASS_TABLE_DESC(westmereuc, UCP, westmereuc, ucp);
#endif
#if defined(__i386__)
PMC_CLASS_TABLE_DESC(k7, K7, k7, k7);
@ -302,7 +327,7 @@ struct pmc_masks {
const uint32_t pm_value;
};
#define PMCMASK(N,V) { .pm_name = #N, .pm_value = (V) }
#define NULLMASK PMCMASK(NULL,0)
#define NULLMASK { .pm_name = NULL }
#if defined(__amd64__) || defined(__i386__)
static int
@ -495,6 +520,8 @@ static struct pmc_event_alias core2_aliases_without_iaf[] = {
#define atom_aliases_without_iaf core2_aliases_without_iaf
#define corei7_aliases core2_aliases
#define corei7_aliases_without_iaf core2_aliases_without_iaf
#define westmere_aliases core2_aliases
#define westmere_aliases_without_iaf core2_aliases_without_iaf
#define IAF_KW_OS "os"
#define IAF_KW_USR "usr"
@ -545,6 +572,7 @@ iaf_allocate_pmc(enum pmc_event pe, char *ctrspec,
#define IAP_KW_SNOOPTYPE "snooptype"
#define IAP_KW_TRANSITION "trans"
#define IAP_KW_USR "usr"
#define IAP_KW_RSP "rsp"
static struct pmc_masks iap_core_mask[] = {
PMCMASK(all, (0x3 << 14)),
@ -592,19 +620,38 @@ static struct pmc_masks iap_transition_mask[] = {
NULLMASK
};
static struct pmc_masks iap_rsp_mask[] = {
PMCMASK(DMND_DATA_RD, (1 << 0)),
PMCMASK(DMND_RFO, (1 << 1)),
PMCMASK(DMND_IFETCH, (1 << 2)),
PMCMASK(WB, (1 << 3)),
PMCMASK(PF_DATA_RD, (1 << 4)),
PMCMASK(PF_RFO, (1 << 5)),
PMCMASK(PF_IFETCH, (1 << 6)),
PMCMASK(OTHER, (1 << 7)),
PMCMASK(UNCORE_HIT, (1 << 8)),
PMCMASK(OTHER_CORE_HIT_SNP, (1 << 9)),
PMCMASK(OTHER_CORE_HITM, (1 << 10)),
PMCMASK(REMOTE_CACHE_FWD, (1 << 12)),
PMCMASK(REMOTE_DRAM, (1 << 13)),
PMCMASK(LOCAL_DRAM, (1 << 14)),
PMCMASK(NON_DRAM, (1 << 15)),
NULLMASK
};
static int
iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
struct pmc_op_pmcallocate *pmc_config)
{
char *e, *p, *q;
uint32_t cachestate, evmask;
uint32_t cachestate, evmask, rsp;
int count, n;
pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
PMC_CAP_QUALIFIER);
pmc_config->pm_md.pm_iap.pm_iap_config = 0;
cachestate = evmask = 0;
cachestate = evmask = rsp = 0;
/* Parse additional modifiers if present */
while ((p = strsep(&ctrspec, ",")) != NULL) {
@ -651,8 +698,7 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
return (-1);
} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_ATOM ||
cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2 ||
cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME ||
cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7) {
cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME) {
if (KWPREFIXMATCH(p, IAP_KW_SNOOPRESPONSE "=")) {
n = pmc_parse_mask(iap_snoopresponse_mask, p,
&evmask);
@ -661,6 +707,12 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
&evmask);
} else
return (-1);
} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7 ||
cpu_info.pm_cputype == PMC_CPU_INTEL_WESTMERE) {
if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
n = pmc_parse_mask(iap_rsp_mask, p, &rsp);
} else
return (-1);
} else
return (-1);
@ -693,6 +745,69 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
}
pmc_config->pm_md.pm_iap.pm_iap_config |= cachestate;
pmc_config->pm_md.pm_iap.pm_iap_rsp = rsp;
return (0);
}
/*
* Intel Uncore.
*/
static int
ucf_allocate_pmc(enum pmc_event pe, char *ctrspec,
struct pmc_op_pmcallocate *pmc_config)
{
(void) pe;
(void) ctrspec;
pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
pmc_config->pm_md.pm_ucf.pm_ucf_flags = 0;
return (0);
}
#define UCP_KW_CMASK "cmask"
#define UCP_KW_EDGE "edge"
#define UCP_KW_INV "inv"
static int
ucp_allocate_pmc(enum pmc_event pe, char *ctrspec,
struct pmc_op_pmcallocate *pmc_config)
{
char *e, *p, *q;
int count, n;
(void) pe;
pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
PMC_CAP_QUALIFIER);
pmc_config->pm_md.pm_ucp.pm_ucp_config = 0;
/* Parse additional modifiers if present */
while ((p = strsep(&ctrspec, ",")) != NULL) {
n = 0;
if (KWPREFIXMATCH(p, UCP_KW_CMASK "=")) {
q = strchr(p, '=');
if (*++q == '\0') /* skip '=' */
return (-1);
count = strtol(q, &e, 0);
if (e == q || *e != '\0')
return (-1);
pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
pmc_config->pm_md.pm_ucp.pm_ucp_config |=
UCP_CMASK(count);
} else if (KWMATCH(p, UCP_KW_EDGE)) {
pmc_config->pm_caps |= PMC_CAP_EDGE;
} else if (KWMATCH(p, UCP_KW_INV)) {
pmc_config->pm_caps |= PMC_CAP_INVERT;
} else
return (-1);
if (n < 0) /* Parsing failed. */
return (-1);
}
return (0);
}
@ -2392,6 +2507,31 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
ev = corei7_event_table;
count = PMC_EVENT_TABLE_SIZE(corei7);
break;
case PMC_CPU_INTEL_WESTMERE:
ev = westmere_event_table;
count = PMC_EVENT_TABLE_SIZE(westmere);
break;
}
break;
case PMC_CLASS_UCF:
ev = ucf_event_table;
count = PMC_EVENT_TABLE_SIZE(ucf);
break;
case PMC_CLASS_UCP:
/*
* Return the most appropriate set of event name
* spellings for the current CPU.
*/
switch (cpu_info.pm_cputype) {
default:
case PMC_CPU_INTEL_COREI7:
ev = corei7uc_event_table;
count = PMC_EVENT_TABLE_SIZE(corei7uc);
break;
case PMC_CPU_INTEL_WESTMERE:
ev = westmereuc_event_table;
count = PMC_EVENT_TABLE_SIZE(westmereuc);
break;
}
break;
case PMC_CLASS_TSC:
@ -2605,8 +2745,15 @@ pmc_init(void)
PMC_MDEP_INIT_INTEL_V2(core2);
break;
case PMC_CPU_INTEL_COREI7:
pmc_class_table[n++] = &ucf_class_table_descr;
pmc_class_table[n++] = &corei7uc_class_table_descr;
PMC_MDEP_INIT_INTEL_V2(corei7);
break;
case PMC_CPU_INTEL_WESTMERE:
pmc_class_table[n++] = &ucf_class_table_descr;
pmc_class_table[n++] = &westmereuc_class_table_descr;
PMC_MDEP_INIT_INTEL_V2(westmere);
break;
case PMC_CPU_INTEL_PIV:
PMC_MDEP_INIT(p4);
pmc_class_table[n] = &p4_class_table_descr;
@ -2719,10 +2866,30 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu)
ev = corei7_event_table;
evfence = corei7_event_table + PMC_EVENT_TABLE_SIZE(corei7);
break;
case PMC_CPU_INTEL_WESTMERE:
ev = westmere_event_table;
evfence = westmere_event_table + PMC_EVENT_TABLE_SIZE(westmere);
break;
default: /* Unknown CPU type. */
break;
}
} if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) {
} else if (pe >= PMC_EV_UCF_FIRST && pe <= PMC_EV_UCF_LAST) {
ev = ucf_event_table;
evfence = ucf_event_table + PMC_EVENT_TABLE_SIZE(ucf);
} else if (pe >= PMC_EV_UCP_FIRST && pe <= PMC_EV_UCP_LAST) {
switch (cpu) {
case PMC_CPU_INTEL_COREI7:
ev = corei7uc_event_table;
evfence = corei7uc_event_table + PMC_EVENT_TABLE_SIZE(corei7uc);
break;
case PMC_CPU_INTEL_WESTMERE:
ev = westmereuc_event_table;
evfence = westmereuc_event_table + PMC_EVENT_TABLE_SIZE(westmereuc);
break;
default: /* Unknown CPU type. */
break;
}
} else if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) {
ev = k7_event_table;
evfence = k7_event_table + PMC_EVENT_TABLE_SIZE(k7);
} else if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) {

1581
lib/libpmc/pmc.corei7.3 Normal file

File diff suppressed because it is too large Load Diff

880
lib/libpmc/pmc.corei7uc.3 Normal file
View File

@ -0,0 +1,880 @@
.\" Copyright (c) 2010 Fabien Thomas. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" This software is provided by Joseph Koshy ``as is'' and
.\" any express or implied warranties, including, but not limited to, the
.\" implied warranties of merchantability and fitness for a particular purpose
.\" are disclaimed. in no event shall Joseph Koshy be liable
.\" for any direct, indirect, incidental, special, exemplary, or consequential
.\" damages (including, but not limited to, procurement of substitute goods
.\" or services; loss of use, data, or profits; or business interruption)
.\" however caused and on any theory of liability, whether in contract, strict
.\" liability, or tort (including negligence or otherwise) arising in any way
.\" out of the use of this software, even if advised of the possibility of
.\" such damage.
.\"
.\" $FreeBSD$
.\"
.Dd March 24, 2010
.Os
.Dt PMC.COREI7UC 3
.Sh NAME
.Nm pmc.corei7uc
.Nd uncore measurement events for
.Tn Intel
.Tn Core i7 and Xeon 5500
family CPUs
.Sh LIBRARY
.Lb libpmc
.Sh SYNOPSIS
.In pmc.h
.Sh DESCRIPTION
.Tn Intel
.Tn "Core i7"
CPUs contain PMCs conforming to version 2 of the
.Tn Intel
performance measurement architecture.
These CPUs contain 2 classes of PMCs:
.Bl -tag -width "Li PMC_CLASS_UCP"
.It Li PMC_CLASS_UCF
Fixed-function counters that count only one hardware event per counter.
.It Li PMC_CLASS_UCP
Programmable counters that may be configured to count one of a defined
set of hardware events.
.El
.Pp
The number of PMCs available in each class and their widths need to be
determined at run time by calling
.Xr pmc_cpuinfo 3 .
.Pp
Intel Core i7 and Xeon 5500 PMCs are documented in
.Rs
.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
.%T "Volume 3B: System Programming Guide, Part 2"
.%N "Order Number: 253669-033US"
.%D December 2009
.%Q "Intel Corporation"
.Re
.Ss COREI7 AND XEON 5500 UNCORE FIXED FUNCTION PMCS
These PMCs and their supported events are documented in
.Xr pmc.ucf 3 .
.Ss COREI7 AND XEON 5500 UNCORE PROGRAMMABLE PMCS
The programmable PMCs support the following capabilities:
.Bl -column "PMC_CAP_INTERRUPT" "Support"
.It Em Capability Ta Em Support
.It PMC_CAP_CASCADE Ta \&No
.It PMC_CAP_EDGE Ta Yes
.It PMC_CAP_INTERRUPT Ta \&No
.It PMC_CAP_INVERT Ta Yes
.It PMC_CAP_READ Ta Yes
.It PMC_CAP_PRECISE Ta \&No
.It PMC_CAP_SYSTEM Ta \&No
.It PMC_CAP_TAGGING Ta \&No
.It PMC_CAP_THRESHOLD Ta Yes
.It PMC_CAP_USER Ta \&No
.It PMC_CAP_WRITE Ta Yes
.El
.Ss Event Qualifiers
Event specifiers for these PMCs support the following common
qualifiers:
.Bl -tag -width indent
.It Li cmask= Ns Ar value
Configure the PMC to increment only if the number of configured
events measured in a cycle is greater than or equal to
.Ar value .
.It Li edge
Configure the PMC to count the number of de-asserted to asserted
transitions of the conditions expressed by the other qualifiers.
If specified, the counter will increment only once whenever a
condition becomes true, irrespective of the number of clocks during
which the condition remains true.
.It Li inv
Invert the sense of comparison when the
.Dq Li cmask
qualifier is present, making the counter increment when the number of
events per cycle is less than the value specified by the
.Dq Li cmask
qualifier.
.El
.Ss Event Specifiers (Programmable PMCs)
Core i7 and Xeon 5500 uncore programmable PMCs support the following events:
.Bl -tag -width indent
.It Li GQ_CYCLES_FULL.READ_TRACKER
.Pq Event 00H , Umask 01H
Uncore cycles Global Queue read tracker is full.
.It Li GQ_CYCLES_FULL.WRITE_TRACKER
.Pq Event 00H , Umask 02H
Uncore cycles Global Queue write tracker is full.
.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER
.Pq Event 00H , Umask 04H
Uncore cycles Global Queue peer probe tracker is full. The peer probe
tracker queue tracks snoops from the IOH and remote sockets.
.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER
.Pq Event 01H , Umask 01H
Uncore cycles were Global Queue read tracker has at least one valid entry.
.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER
.Pq Event 01H , Umask 02H
Uncore cycles were Global Queue write tracker has at least one valid entry.
.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER
.Pq Event 01H , Umask 04H
Uncore cycles were Global Queue peer probe tracker has at least one valid
entry. The peer probe tracker queue tracks IOH and remote socket snoops.
.It Li GQ_ALLOC.READ_TRACKER
.Pq Event 03H , Umask 01H
Counts the number of tread tracker allocate to deallocate entries. The GQ
read tracker allocate to deallocate occupancy count is divided by the count
to obtain the average read tracker latency.
.It Li GQ_ALLOC.RT_L3_MISS
.Pq Event 03H , Umask 02H
Counts the number GQ read tracker entries for which a full cache line read
has missed the L3. The GQ read tracker L3 miss to fill occupancy count is
divided by this count to obtain the average cache line read L3 miss latency.
The latency represents the time after which the L3 has determined that the
cache line has missed. The time between a GQ read tracker allocation and the
L3 determining that the cache line has missed is the average L3 hit latency.
The total L3 cache line read miss latency is the hit latency + L3 miss
latency.
.It Li GQ_ALLOC.RT_TO_L3_RESP
.Pq Event 03H , Umask 04H
Counts the number of GQ read tracker entries that are allocated in the read
tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy
count is divided by this count to obtain the average L3 hit latency.
.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED
.Pq Event 03H , Umask 08H
Counts the number of GQ read tracker entries that are allocated in the read
tracker, have missed in the L3 and have not acquired a Request Transaction
ID. The GQ read tracker L3 miss to RTID acquired occupancy count is
divided by this count to obtain the average latency for a read L3 miss to
acquire an RTID.
.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED
.Pq Event 03H , Umask 10H
Counts the number of GQ write tracker entries that are allocated in the
write tracker, have missed in the L3 and have not acquired a Request
Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is
divided by this count to obtain the average latency for a write L3 miss to
acquire an RTID.
.It Li GQ_ALLOC.WRITE_TRACKER
.Pq Event 03H , Umask 20H
Counts the number of GQ write tracker entries that are allocated in the
write tracker queue that miss the L3. The GQ write tracker occupancy count
is divided by the this count to obtain the average L3 write miss latency.
.It Li GQ_ALLOC.PEER_PROBE_TRACKER
.Pq Event 03H , Umask 40H
Counts the number of GQ peer probe tracker (snoop) entries that are
allocated in the peer probe tracker queue that miss the L3. The GQ peer
probe occupancy count is divided by this count to obtain the average L3 peer
probe miss latency.
.It Li GQ_DATA.FROM_QPI
.Pq Event 04H , Umask 01H
Cycles Global Queue Quickpath Interface input data port is busy importing
data from the Quickpath Interface. Each cycle the input port can transfer 8
or 16 bytes of data.
.It Li GQ_DATA.FROM_QMC
.Pq Event 04H , Umask 02H
Cycles Global Queue Quickpath Memory Interface input data port is busy
importing data from the Quickpath Memory Interface. Each cycle the input
port can transfer 8 or 16 bytes of data.
.It Li GQ_DATA.FROM_L3
.Pq Event 04H , Umask 04H
Cycles GQ L3 input data port is busy importing data from the Last Level
Cache. Each cycle the input port can transfer 32 bytes of data.
.It Li GQ_DATA.FROM_CORES_02
.Pq Event 04H , Umask 08H
Cycles GQ Core 0 and 2 input data port is busy importing data from processor
cores 0 and 2. Each cycle the input port can transfer 32 bytes of data.
.It Li GQ_DATA.FROM_CORES_13
.Pq Event 04H , Umask 10H
Cycles GQ Core 1 and 3 input data port is busy importing data from processor
cores 1 and 3. Each cycle the input port can transfer 32 bytes of data.
.It Li GQ_DATA.TO_QPI_QMC
.Pq Event 05H , Umask 01H
Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath
Interface or Quickpath Memory Interface. Each cycle the output port can
transfer 32 bytes of data.
.It Li GQ_DATA.TO_L3
.Pq Event 05H , Umask 02H
Cycles GQ L3 output data port is busy sending data to the Last Level Cache.
Each cycle the output port can transfer 32 bytes of data.
.It Li GQ_DATA.TO_CORES
.Pq Event 05H , Umask 04H
Cycles GQ Core output data port is busy sending data to the Cores. Each
cycle the output port can transfer 32 bytes of data.
.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE
.Pq Event 06H , Umask 01H
Number of snoop responses to the local home that L3 does not have the
referenced cache line.
.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE
.Pq Event 06H , Umask 02H
Number of snoop responses to the local home that L3 has the referenced line
cached in the S state.
.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE
.Pq Event 06H , Umask 04H
Number of responses to code or data read snoops to the local home that the
L3 has the referenced cache line in the E state. The L3 cache line state is
changed to the S state and the line is forwarded to the local home in the S
state.
.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE
.Pq Event 06H , Umask 08H
Number of responses to read invalidate snoops to the local home that the L3
has the referenced cache line in the M state. The L3 cache line state is
invalidated and the line is forwarded to the local home in the M state.
.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT
.Pq Event 06H , Umask 10H
Number of conflict snoop responses sent to the local home.
.It Li SNP_RESP_TO_LOCAL_HOME.WB
.Pq Event 06H , Umask 20H
Number of responses to code or data read snoops to the local home that the
L3 has the referenced line cached in the M state.
.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE
.Pq Event 07H , Umask 01H
Number of snoop responses to a remote home that L3 does not have the
referenced cache line.
.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE
.Pq Event 07H , Umask 02H
Number of snoop responses to a remote home that L3 has the referenced line
cached in the S state.
.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE
.Pq Event 07H , Umask 04H
Number of responses to code or data read snoops to a remote home that the L3
has the referenced cache line in the E state. The L3 cache line state is
changed to the S state and the line is forwarded to the remote home in the S
state.
.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE
.Pq Event 07H , Umask 08H
Number of responses to read invalidate snoops to a remote home that the L3
has the referenced cache line in the M state. The L3 cache line state is
invalidated and the line is forwarded to the remote home in the M state.
.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT
.Pq Event 07H , Umask 10H
Number of conflict snoop responses sent to the local home.
.It Li SNP_RESP_TO_REMOTE_HOME.WB
.Pq Event 07H , Umask 20H
Number of responses to code or data read snoops to a remote home that the L3
has the referenced line cached in the M state.
.It Li SNP_RESP_TO_REMOTE_HOME.HITM
.Pq Event 07H , Umask 24H
Number of HITM snoop responses to a remote home
.It Li L3_HITS.READ
.Pq Event 08H , Umask 01H
Number of code read, data read and RFO requests that hit in the L3
.It Li L3_HITS.WRITE
.Pq Event 08H , Umask 02H
Number of writeback requests that hit in the L3. Writebacks from the cores
will always result in L3 hits due to the inclusive property of the L3.
.It Li L3_HITS.PROBE
.Pq Event 08H , Umask 04H
Number of snoops from IOH or remote sockets that hit in the L3.
.It Li L3_HITS.ANY
.Pq Event 08H , Umask 03H
Number of reads and writes that hit the L3.
.It Li L3_MISS.READ
.Pq Event 09H , Umask 01H
Number of code read, data read and RFO requests that miss the L3.
.It Li L3_MISS.WRITE
.Pq Event 09H , Umask 02H
Number of writeback requests that miss the L3. Should always be zero as
writebacks from the cores will always result in L3 hits due to the inclusive
property of the L3.
.It Li L3_MISS.PROBE
.Pq Event 09H , Umask 04H
Number of snoops from IOH or remote sockets that miss the L3.
.It Li L3_MISS.ANY
.Pq Event 09H , Umask 03H
Number of reads and writes that miss the L3.
.It Li L3_LINES_IN.M_STATE
.Pq Event 0AH , Umask 01H
Counts the number of L3 lines allocated in M state. The only time a cache
line is allocated in the M state is when the line was forwarded in M state
is forwarded due to a Snoop Read Invalidate Own request.
.It Li L3_LINES_IN.E_STATE
.Pq Event 0AH , Umask 02H
Counts the number of L3 lines allocated in E state.
.It Li L3_LINES_IN.S_STATE
.Pq Event 0AH , Umask 04H
Counts the number of L3 lines allocated in S state.
.It Li L3_LINES_IN.F_STATE
.Pq Event 0AH , Umask 08H
Counts the number of L3 lines allocated in F state.
.It Li L3_LINES_IN.ANY
.Pq Event 0AH , Umask 0FH
Counts the number of L3 lines allocated in any state.
.It Li L3_LINES_OUT.M_STATE
.Pq Event 0BH , Umask 01H
Counts the number of L3 lines victimized that were in the M state. When the
victim cache line is in M state, the line is written to its home cache agent
which can be either local or remote.
.It Li L3_LINES_OUT.E_STATE
.Pq Event 0BH , Umask 02H
Counts the number of L3 lines victimized that were in the E state.
.It Li L3_LINES_OUT.S_STATE
.Pq Event 0BH , Umask 04H
Counts the number of L3 lines victimized that were in the S state.
.It Li L3_LINES_OUT.I_STATE
.Pq Event 0BH , Umask 08H
Counts the number of L3 lines victimized that were in the I state.
.It Li L3_LINES_OUT.F_STATE
.Pq Event 0BH , Umask 10H
Counts the number of L3 lines victimized that were in the F state.
.It Li L3_LINES_OUT.ANY
.Pq Event 0BH , Umask 1FH
Counts the number of L3 lines victimized in any state.
.It Li QHL_REQUESTS.IOH_READS
.Pq Event 20H , Umask 01H
Counts number of Quickpath Home Logic read requests from the IOH.
.It Li QHL_REQUESTS.IOH_WRITES
.Pq Event 20H , Umask 02H
Counts number of Quickpath Home Logic write requests from the IOH.
.It Li QHL_REQUESTS.REMOTE_READS
.Pq Event 20H , Umask 04H
Counts number of Quickpath Home Logic read requests from a remote socket.
.It Li QHL_REQUESTS.REMOTE_WRITES
.Pq Event 20H , Umask 08H
Counts number of Quickpath Home Logic write requests from a remote socket.
.It Li QHL_REQUESTS.LOCAL_READS
.Pq Event 20H , Umask 10H
Counts number of Quickpath Home Logic read requests from the local socket.
.It Li QHL_REQUESTS.LOCAL_WRITES
.Pq Event 20H , Umask 20H
Counts number of Quickpath Home Logic write requests from the local socket.
.It Li QHL_CYCLES_FULL.IOH
.Pq Event 21H , Umask 01H
Counts uclk cycles all entries in the Quickpath Home Logic IOH are full.
.It Li QHL_CYCLES_FULL.REMOTE
.Pq Event 21H , Umask 02H
Counts uclk cycles all entries in the Quickpath Home Logic remote tracker
are full.
.It Li QHL_CYCLES_FULL.LOCAL
.Pq Event 21H , Umask 04H
Counts uclk cycles all entries in the Quickpath Home Logic local tracker are
full.
.It Li QHL_CYCLES_NOT_EMPTY.IOH
.Pq Event 22H , Umask 01H
Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy.
.It Li QHL_CYCLES_NOT_EMPTY.REMOTE
.Pq Event 22H , Umask 02H
Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is
busy.
.It Li QHL_CYCLES_NOT_EMPTY.LOCAL
.Pq Event 22H , Umask 04H
Counts uclk cycles all entries in the Quickpath Home Logic local tracker is
busy.
.It Li QHL_OCCUPANCY.IOH
.Pq Event 23H , Umask 01H
QHL IOH tracker allocate to deallocate read occupancy.
.It Li QHL_OCCUPANCY.REMOTE
.Pq Event 23H , Umask 02H
QHL remote tracker allocate to deallocate read occupancy.
.It Li QHL_OCCUPANCY.LOCAL
.Pq Event 23H , Umask 04H
QHL local tracker allocate to deallocate read occupancy.
.It Li QHL_ADDRESS_CONFLICTS.2WAY
.Pq Event 24H , Umask 02H
Counts number of QHL Active Address Table (AAT) entries that saw a max of 2
conflicts. The AAT is a structure that tracks requests that are in conflict.
The requests themselves are in the home tracker entries. The count is
reported when an AAT entry deallocates.
.It Li QHL_ADDRESS_CONFLICTS.3WAY
.Pq Event 24H , Umask 04H
Counts number of QHL Active Address Table (AAT) entries that saw a max of 3
conflicts. The AAT is a structure that tracks requests that are in conflict.
The requests themselves are in the home tracker entries. The count is
reported when an AAT entry deallocates.
.It Li QHL_CONFLICT_CYCLES.IOH
.Pq Event 25H , Umask 01H
Counts cycles the Quickpath Home Logic IOH Tracker contains two or more
requests with an address conflict. A max of 3 requests can be in conflict.
.It Li QHL_CONFLICT_CYCLES.REMOTE
.Pq Event 25H , Umask 02H
Counts cycles the Quickpath Home Logic Remote Tracker contains two or more
requests with an address conflict. A max of 3 requests can be in conflict.
.It Li QHL_CONFLICT_CYCLES.LOCAL
.Pq Event 25H , Umask 04H
Counts cycles the Quickpath Home Logic Local Tracker contains two or more
requests with an address conflict. A max of 3 requests can be in conflict.
.It Li QHL_TO_QMC_BYPASS
.Pq Event 26H , Umask 01H
Counts number or requests to the Quickpath Memory Controller that bypass the
Quickpath Home Logic. All local accesses can be bypassed. For remote
requests, only read requests can be bypassed.
.It Li QMC_NORMAL_FULL.READ.CH0
.Pq Event 27H , Umask 01H
Uncore cycles all the entries in the DRAM channel 0 medium or low priority
queue are occupied with read requests.
.It Li QMC_NORMAL_FULL.READ.CH1
.Pq Event 27H , Umask 02H
Uncore cycles all the entries in the DRAM channel 1 medium or low priority
queue are occupied with read requests.
.It Li QMC_NORMAL_FULL.READ.CH2
.Pq Event 27H , Umask 04H
Uncore cycles all the entries in the DRAM channel 2 medium or low priority
queue are occupied with read requests.
.It Li QMC_NORMAL_FULL.WRITE.CH0
.Pq Event 27H , Umask 08H
Uncore cycles all the entries in the DRAM channel 0 medium or low priority
queue are occupied with write requests.
.It Li QMC_NORMAL_FULL.WRITE.CH1
.Pq Event 27H , Umask 10H
Counts cycles all the entries in the DRAM channel 1 medium or low priority
queue are occupied with write requests.
.It Li QMC_NORMAL_FULL.WRITE.CH2
.Pq Event 27H , Umask 20H
Uncore cycles all the entries in the DRAM channel 2 medium or low priority
queue are occupied with write requests.
.It Li QMC_ISOC_FULL.READ.CH0
.Pq Event 28H , Umask 01H
Counts cycles all the entries in the DRAM channel 0 high priority queue are
occupied with isochronous read requests.
.It Li QMC_ISOC_FULL.READ.CH1
.Pq Event 28H , Umask 02H
Counts cycles all the entries in the DRAM channel 1high priority queue are
occupied with isochronous read requests.
.It Li QMC_ISOC_FULL.READ.CH2
.Pq Event 28H , Umask 04H
Counts cycles all the entries in the DRAM channel 2 high priority queue are
occupied with isochronous read requests.
.It Li QMC_ISOC_FULL.WRITE.CH0
.Pq Event 28H , Umask 08H
Counts cycles all the entries in the DRAM channel 0 high priority queue are
occupied with isochronous write requests.
.It Li QMC_ISOC_FULL.WRITE.CH1
.Pq Event 28H , Umask 10H
Counts cycles all the entries in the DRAM channel 1 high priority queue are
occupied with isochronous write requests.
.It Li QMC_ISOC_FULL.WRITE.CH2
.Pq Event 28H , Umask 20H
Counts cycles all the entries in the DRAM channel 2 high priority queue are
occupied with isochronous write requests.
.It Li QMC_BUSY.READ.CH0
.Pq Event 29H , Umask 01H
Counts cycles where Quickpath Memory Controller has at least 1 outstanding
read request to DRAM channel 0.
.It Li QMC_BUSY.READ.CH1
.Pq Event 29H , Umask 02H
Counts cycles where Quickpath Memory Controller has at least 1 outstanding
read request to DRAM channel 1.
.It Li QMC_BUSY.READ.CH2
.Pq Event 29H , Umask 04H
Counts cycles where Quickpath Memory Controller has at least 1 outstanding
read request to DRAM channel 2.
.It Li QMC_BUSY.WRITE.CH0
.Pq Event 29H , Umask 08H
Counts cycles where Quickpath Memory Controller has at least 1 outstanding
write request to DRAM channel 0.
.It Li QMC_BUSY.WRITE.CH1
.Pq Event 29H , Umask 10H
Counts cycles where Quickpath Memory Controller has at least 1 outstanding
write request to DRAM channel 1.
.It Li QMC_BUSY.WRITE.CH2
.Pq Event 29H , Umask 20H
Counts cycles where Quickpath Memory Controller has at least 1 outstanding
write request to DRAM channel 2.
.It Li QMC_OCCUPANCY.CH0
.Pq Event 2AH , Umask 01H
IMC channel 0 normal read request occupancy.
.It Li QMC_OCCUPANCY.CH1
.Pq Event 2AH , Umask 02H
IMC channel 1 normal read request occupancy.
.It Li QMC_OCCUPANCY.CH2
.Pq Event 2AH , Umask 04H
IMC channel 2 normal read request occupancy.
.It Li QMC_ISSOC_OCCUPANCY.CH0
.Pq Event 2BH , Umask 01H
IMC channel 0 issoc read request occupancy.
.It Li QMC_ISSOC_OCCUPANCY.CH1
.Pq Event 2BH , Umask 02H
IMC channel 1 issoc read request occupancy.
.It Li QMC_ISSOC_OCCUPANCY.CH2
.Pq Event 2BH , Umask 04H
IMC channel 2 issoc read request occupancy.
.It Li QMC_ISSOC_READS.ANY
.Pq Event 2BH , Umask 07H
IMC issoc read request occupancy.
.It Li QMC_NORMAL_READS.CH0
.Pq Event 2CH , Umask 01H
Counts the number of Quickpath Memory Controller channel 0 medium and low
priority read requests. The QMC channel 0 normal read occupancy divided by
this count provides the average QMC channel 0 read latency.
.It Li QMC_NORMAL_READS.CH1
.Pq Event 2CH , Umask 02H
Counts the number of Quickpath Memory Controller channel 1 medium and low
priority read requests. The QMC channel 1 normal read occupancy divided by
this count provides the average QMC channel 1 read latency.
.It Li QMC_NORMAL_READS.CH2
.Pq Event 2CH , Umask 04H
Counts the number of Quickpath Memory Controller channel 2 medium and low
priority read requests. The QMC channel 2 normal read occupancy divided by
this count provides the average QMC channel 2 read latency.
.It Li QMC_NORMAL_READS.ANY
.Pq Event 2CH , Umask 07H
Counts the number of Quickpath Memory Controller medium and low priority
read requests. The QMC normal read occupancy divided by this count provides
the average QMC read latency.
.It Li QMC_HIGH_PRIORITY_READS.CH0
.Pq Event 2DH , Umask 01H
Counts the number of Quickpath Memory Controller channel 0 high priority
isochronous read requests.
.It Li QMC_HIGH_PRIORITY_READS.CH1
.Pq Event 2DH , Umask 02H
Counts the number of Quickpath Memory Controller channel 1 high priority
isochronous read requests.
.It Li QMC_HIGH_PRIORITY_READS.CH2
.Pq Event 2DH , Umask 04H
Counts the number of Quickpath Memory Controller channel 2 high priority
isochronous read requests.
.It Li QMC_HIGH_PRIORITY_READS.ANY
.Pq Event 2DH , Umask 07H
Counts the number of Quickpath Memory Controller high priority isochronous
read requests.
.It Li QMC_CRITICAL_PRIORITY_READS.CH0
.Pq Event 2EH , Umask 01H
Counts the number of Quickpath Memory Controller channel 0 critical priority
isochronous read requests.
.It Li QMC_CRITICAL_PRIORITY_READS.CH1
.Pq Event 2EH , Umask 02H
Counts the number of Quickpath Memory Controller channel 1 critical priority
isochronous read requests.
.It Li QMC_CRITICAL_PRIORITY_READS.CH2
.Pq Event 2EH , Umask 04H
Counts the number of Quickpath Memory Controller channel 2 critical priority
isochronous read requests.
.It Li QMC_CRITICAL_PRIORITY_READS.ANY
.Pq Event 2EH , Umask 07H
Counts the number of Quickpath Memory Controller critical priority
isochronous read requests.
.It Li QMC_WRITES.FULL.CH0
.Pq Event 2FH , Umask 01H
Counts number of full cache line writes to DRAM channel 0.
.It Li QMC_WRITES.FULL.CH1
.Pq Event 2FH , Umask 02H
Counts number of full cache line writes to DRAM channel 1.
.It Li QMC_WRITES.FULL.CH2
.Pq Event 2FH , Umask 04H
Counts number of full cache line writes to DRAM channel 2.
.It Li QMC_WRITES.FULL.ANY
.Pq Event 2FH , Umask 07H
Counts number of full cache line writes to DRAM.
.It Li QMC_WRITES.PARTIAL.CH0
.Pq Event 2FH , Umask 08H
Counts number of partial cache line writes to DRAM channel 0.
.It Li QMC_WRITES.PARTIAL.CH1
.Pq Event 2FH , Umask 10H
Counts number of partial cache line writes to DRAM channel 1.
.It Li QMC_WRITES.PARTIAL.CH2
.Pq Event 2FH , Umask 20H
Counts number of partial cache line writes to DRAM channel 2.
.It Li QMC_WRITES.PARTIAL.ANY
.Pq Event 2FH , Umask 38H
Counts number of partial cache line writes to DRAM.
.It Li QMC_CANCEL.CH0
.Pq Event 30H , Umask 01H
Counts number of DRAM channel 0 cancel requests.
.It Li QMC_CANCEL.CH1
.Pq Event 30H , Umask 02H
Counts number of DRAM channel 1 cancel requests.
.It Li QMC_CANCEL.CH2
.Pq Event 30H , Umask 04H
Counts number of DRAM channel 2 cancel requests.
.It Li QMC_CANCEL.ANY
.Pq Event 30H , Umask 07H
Counts number of DRAM cancel requests.
.It Li QMC_PRIORITY_UPDATES.CH0
.Pq Event 31H , Umask 01H
Counts number of DRAM channel 0 priority updates. A priority update occurs
when an ISOC high or critical request is received by the QHL and there is a
matching request with normal priority that has already been issued to the
QMC. In this instance, the QHL will send a priority update to QMC to
expedite the request.
.It Li QMC_PRIORITY_UPDATES.CH1
.Pq Event 31H , Umask 02H
Counts number of DRAM channel 1 priority updates. A priority update occurs
when an ISOC high or critical request is received by the QHL and there is a
matching request with normal priority that has already been issued to the
QMC. In this instance, the QHL will send a priority update to QMC to
expedite the request.
.It Li QMC_PRIORITY_UPDATES.CH2
.Pq Event 31H , Umask 04H
Counts number of DRAM channel 2 priority updates. A priority update occurs
when an ISOC high or critical request is received by the QHL and there is a
matching request with normal priority that has already been issued to the
QMC. In this instance, the QHL will send a priority update to QMC to
expedite the request.
.It Li QMC_PRIORITY_UPDATES.ANY
.Pq Event 31H , Umask 07H
Counts number of DRAM priority updates. A priority update occurs when an
ISOC high or critical request is received by the QHL and there is a matching
request with normal priority that has already been issued to the QMC. In
this instance, the QHL will send a priority update to QMC to expedite the
request.
.It Li QHL_FRC_ACK_CNFLTS.LOCAL
.Pq Event 33H , Umask 04H
Counts number of Force Acknowledge Conflict messages sent by the Quickpath
Home Logic to the local home.
.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0
.Pq Event 40H , Umask 01H
Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled
due to lack of a VNA and VN0 credit. Note that this event does not filter
out when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0
.Pq Event 40H , Umask 02H
Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled
due to lack of a VNA and VN0 credit. Note that this event does not filter
out when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0
.Pq Event 40H , Umask 04H
Counts cycles the Quickpath outbound link 0 non-data response virtual
channel is stalled due to lack of a VNA and VN0 credit. Note that this event
does not filter out when a flit would not have been selected for arbitration
because another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1
.Pq Event 40H , Umask 08H
Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled
due to lack of a VNA and VN0 credit. Note that this event does not filter
out when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1
.Pq Event 40H , Umask 10H
Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled
due to lack of a VNA and VN0 credit. Note that this event does not filter
out when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1
.Pq Event 40H , Umask 20H
Counts cycles the Quickpath outbound link 1 non-data response virtual
channel is stalled due to lack of a VNA and VN0 credit. Note that this event
does not filter out when a flit would not have been selected for arbitration
because another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0
.Pq Event 40H , Umask 07H
Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
to lack of a VNA and VN0 credit. Note that this event does not filter out
when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1
.Pq Event 40H , Umask 38H
Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
to lack of a VNA and VN0 credit. Note that this event does not filter out
when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0
.Pq Event 41H , Umask 01H
Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is
stalled due to lack of VNA and VN0 credits. Note that this event does not
filter out when a flit would not have been selected for arbitration because
another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0
.Pq Event 41H , Umask 02H
Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual
channel is stalled due to lack of VNA and VN0 credits. Note that this event
does not filter out when a flit would not have been selected for arbitration
because another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0
.Pq Event 41H , Umask 04H
Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual
channel is stalled due to lack of VNA and VN0 credits. Note that this event
does not filter out when a flit would not have been selected for arbitration
because another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1
.Pq Event 41H , Umask 08H
Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is
stalled due to lack of VNA and VN0 credits. Note that this event does not
filter out when a flit would not have been selected for arbitration because
another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1
.Pq Event 41H , Umask 10H
Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual
channel is stalled due to lack of VNA and VN0 credits. Note that this event
does not filter out when a flit would not have been selected for arbitration
because another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1
.Pq Event 41H , Umask 20H
Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual
channel is stalled due to lack of VNA and VN0 credits. Note that this event
does not filter out when a flit would not have been selected for arbitration
because another virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0
.Pq Event 41H , Umask 07H
Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
to lack of VNA and VN0 credits. Note that this event does not filter out
when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1
.Pq Event 41H , Umask 38H
Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
to lack of VNA and VN0 credits. Note that this event does not filter out
when a flit would not have been selected for arbitration because another
virtual channel is getting arbitrated.
.It Li QPI_TX_HEADER.BUSY.LINK_0
.Pq Event 42H , Umask 02H
Number of cycles that the header buffer in the Quickpath Interface outbound
link 0 is busy.
.It Li QPI_TX_HEADER.BUSY.LINK_1
.Pq Event 42H , Umask 08H
Number of cycles that the header buffer in the Quickpath Interface outbound
link 1 is busy.
.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0
.Pq Event 43H , Umask 01H
Number of cycles that snoop packets incoming to the Quickpath Interface link
0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
does not have any available entries.
.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1
.Pq Event 43H , Umask 02H
Number of cycles that snoop packets incoming to the Quickpath Interface link
1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
does not have any available entries.
.It Li DRAM_OPEN.CH0
.Pq Event 60H , Umask 01H
Counts number of DRAM Channel 0 open commands issued either for read or
write. To read or write data, the referenced DRAM page must first be opened.
.It Li DRAM_OPEN.CH1
.Pq Event 60H , Umask 02H
Counts number of DRAM Channel 1 open commands issued either for read or
write. To read or write data, the referenced DRAM page must first be opened.
.It Li DRAM_OPEN.CH2
.Pq Event 60H , Umask 04H
Counts number of DRAM Channel 2 open commands issued either for read or
write. To read or write data, the referenced DRAM page must first be opened.
.It Li DRAM_PAGE_CLOSE.CH0
.Pq Event 61H , Umask 01H
DRAM channel 0 command issued to CLOSE a page due to page idle timer
expiration. Closing a page is done by issuing a precharge.
.It Li DRAM_PAGE_CLOSE.CH1
.Pq Event 61H , Umask 02H
DRAM channel 1 command issued to CLOSE a page due to page idle timer
expiration. Closing a page is done by issuing a precharge.
.It Li DRAM_PAGE_CLOSE.CH2
.Pq Event 61H , Umask 04H
DRAM channel 2 command issued to CLOSE a page due to page idle timer
expiration. Closing a page is done by issuing a precharge.
.It Li DRAM_PAGE_MISS.CH0
.Pq Event 62H , Umask 01H
Counts the number of precharges (PRE) that were issued to DRAM channel 0
because there was a page miss. A page miss refers to a situation in which a
page is currently open and another page from the same bank needs to be
opened. The new page experiences a page miss. Closing of the old page is
done by issuing a precharge.
.It Li DRAM_PAGE_MISS.CH1
.Pq Event 62H , Umask 02H
Counts the number of precharges (PRE) that were issued to DRAM channel 1
because there was a page miss. A page miss refers to a situation in which a
page is currently open and another page from the same bank needs to be
opened. The new page experiences a page miss. Closing of the old page is
done by issuing a precharge.
.It Li DRAM_PAGE_MISS.CH2
.Pq Event 62H , Umask 04H
Counts the number of precharges (PRE) that were issued to DRAM channel 2
because there was a page miss. A page miss refers to a situation in which a
page is currently open and another page from the same bank needs to be
opened. The new page experiences a page miss. Closing of the old page is
done by issuing a precharge.
.It Li DRAM_READ_CAS.CH0
.Pq Event 63H , Umask 01H
Counts the number of times a read CAS command was issued on DRAM channel 0.
.It Li DRAM_READ_CAS.AUTOPRE_CH0
.Pq Event 63H , Umask 02H
Counts the number of times a read CAS command was issued on DRAM channel 0
where the command issued used the auto-precharge (auto page close) mode.
.It Li DRAM_READ_CAS.CH1
.Pq Event 63H , Umask 04H
Counts the number of times a read CAS command was issued on DRAM channel 1.
.It Li DRAM_READ_CAS.AUTOPRE_CH1
.Pq Event 63H , Umask 08H
Counts the number of times a read CAS command was issued on DRAM channel 1
where the command issued used the auto-precharge (auto page close) mode.
.It Li DRAM_READ_CAS.CH2
.Pq Event 63H , Umask 10H
Counts the number of times a read CAS command was issued on DRAM channel 2.
.It Li DRAM_READ_CAS.AUTOPRE_CH2
.Pq Event 63H , Umask 20H
Counts the number of times a read CAS command was issued on DRAM channel 2
where the command issued used the auto-precharge (auto page close) mode.
.It Li DRAM_WRITE_CAS.CH0
.Pq Event 64H , Umask 01H
Counts the number of times a write CAS command was issued on DRAM channel 0.
.It Li DRAM_WRITE_CAS.AUTOPRE_CH0
.Pq Event 64H , Umask 02H
Counts the number of times a write CAS command was issued on DRAM channel 0
where the command issued used the auto-precharge (auto page close) mode.
.It Li DRAM_WRITE_CAS.CH1
.Pq Event 64H , Umask 04H
Counts the number of times a write CAS command was issued on DRAM channel 1.
.It Li DRAM_WRITE_CAS.AUTOPRE_CH1
.Pq Event 64H , Umask 08H
Counts the number of times a write CAS command was issued on DRAM channel 1
where the command issued used the auto-precharge (auto page close) mode.
.It Li DRAM_WRITE_CAS.CH2
.Pq Event 64H , Umask 10H
Counts the number of times a write CAS command was issued on DRAM channel 2.
.It Li DRAM_WRITE_CAS.AUTOPRE_CH2
.Pq Event 64H , Umask 20H
Counts the number of times a write CAS command was issued on DRAM channel 2
where the command issued used the auto-precharge (auto page close) mode.
.It Li DRAM_REFRESH.CH0
.Pq Event 65H , Umask 01H
Counts number of DRAM channel 0 refresh commands. DRAM loses data content
over time. In order to keep correct data content, the data values have to be
refreshed periodically.
.It Li DRAM_REFRESH.CH1
.Pq Event 65H , Umask 02H
Counts number of DRAM channel 1 refresh commands. DRAM loses data content
over time. In order to keep correct data content, the data values have to be
refreshed periodically.
.It Li DRAM_REFRESH.CH2
.Pq Event 65H , Umask 04H
Counts number of DRAM channel 2 refresh commands. DRAM loses data content
over time. In order to keep correct data content, the data values have to be
refreshed periodically.
.It Li DRAM_PRE_ALL.CH0
.Pq Event 66H , Umask 01H
Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close
all open pages in a rank. PREALL is issued when the DRAM needs to be
refreshed or needs to go into a power down mode.
.It Li DRAM_PRE_ALL.CH1
.Pq Event 66H , Umask 02H
Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close
all open pages in a rank. PREALL is issued when the DRAM needs to be
refreshed or needs to go into a power down mode.
.It Li DRAM_PRE_ALL.CH2
.Pq Event 66H , Umask 04H
Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close
all open pages in a rank. PREALL is issued when the DRAM needs to be
refreshed or needs to go into a power down mode.
.El
.Sh SEE ALSO
.Xr pmc 3 ,
.Xr pmc.atom 3 ,
.Xr pmc.core 3 ,
.Xr pmc.iaf 3 ,
.Xr pmc.ucf 3 ,
.Xr pmc.k7 3 ,
.Xr pmc.k8 3 ,
.Xr pmc.p4 3 ,
.Xr pmc.p5 3 ,
.Xr pmc.p6 3 ,
.Xr pmc.corei7 3 ,
.Xr pmc.westmere 3 ,
.Xr pmc.westmereuc 3 ,
.Xr pmc.tsc 3 ,
.Xr pmc_cpuinfo 3 ,
.Xr pmclog 3 ,
.Xr hwpmc 4
.Sh HISTORY
The
.Nm pmc
library first appeared in
.Fx 6.0 .
.Sh AUTHORS
The
.Lb libpmc
library was written by
.An "Joseph Koshy"
.Aq jkoshy@FreeBSD.org .

115
lib/libpmc/pmc.ucf.3 Normal file
View File

@ -0,0 +1,115 @@
.\" Copyright (c) 2010 Fabien Thomas. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" This software is provided by Joseph Koshy ``as is'' and
.\" any express or implied warranties, including, but not limited to, the
.\" implied warranties of merchantability and fitness for a particular purpose
.\" are disclaimed. in no event shall Joseph Koshy be liable
.\" for any direct, indirect, incidental, special, exemplary, or consequential
.\" damages (including, but not limited to, procurement of substitute goods
.\" or services; loss of use, data, or profits; or business interruption)
.\" however caused and on any theory of liability, whether in contract, strict
.\" liability, or tort (including negligence or otherwise) arising in any way
.\" out of the use of this software, even if advised of the possibility of
.\" such damage.
.\"
.\" $FreeBSD$
.\"
.Dd March 30, 2010
.Os
.Dt PMC.UCF 3
.Sh NAME
.Nm pmc.ucf
.Nd measurement events for
.Tn Intel
uncore fixed function performance counters.
.Sh LIBRARY
.Lb libpmc
.Sh SYNOPSIS
.In pmc.h
.Sh DESCRIPTION
Each fixed-function PMC measures a specific hardware event.
The number of fixed-function PMCs implemented in a CPU can vary.
The number of fixed-function PMCs present can be determined at runtime
by using function
.Xr pmc_cpuinfo 3 .
.Pp
Intel uncore fixed-function PMCs are documented in
.Rs
.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
.%T "Volume 3B: System Programming Guide, Part 2"
.%N "Order Number: 253669-033US"
.%D December 2009
.%Q "Intel Corporation"
.Re
.Pp
.Ss PMC Capabilities
Fixed-function PMCs support the following capabilities:
.Bl -column "PMC_CAP_INTERRUPT" "Support"
.It Em Capability Ta Em Support
.It PMC_CAP_CASCADE Ta \&No
.It PMC_CAP_EDGE Ta \&No
.It PMC_CAP_INTERRUPT Ta \&No
.It PMC_CAP_INVERT Ta \&No
.It PMC_CAP_READ Ta Yes
.It PMC_CAP_PRECISE Ta \&No
.It PMC_CAP_SYSTEM Ta \&No
.It PMC_CAP_TAGGING Ta \&No
.It PMC_CAP_THRESHOLD Ta \&No
.It PMC_CAP_USER Ta \&No
.It PMC_CAP_WRITE Ta Yes
.El
.Ss Class Name Prefix
These PMCs are named using a class name prefix of
.Dq Li ucf- .
.Ss Event Specifiers (Fixed Function PMCs)
The fixed function PMCs are selectable using the following
event names:
.Bl -tag -width indent
.It Li UCLOCK
.Pq Fixed Function Counter 0
The fixed-function uncore counter increments at the rate of the U-clock.
The frequency of the uncore clock domain can be determined from the uncore
clock ratio which is available in the PCI configuration space register at
offset C0H under device number 0 and Function 0.
.El
.Sh SEE ALSO
.Xr pmc 3 ,
.Xr pmc.atom 3 ,
.Xr pmc.core 3 ,
.Xr pmc.core2 3 ,
.Xr pmc.iaf 3 ,
.Xr pmc.k7 3 ,
.Xr pmc.k8 3 ,
.Xr pmc.p4 3 ,
.Xr pmc.p5 3 ,
.Xr pmc.p6 3 ,
.Xr pmc.corei7 3 ,
.Xr pmc.corei7uc 3 ,
.Xr pmc.westmere 3 ,
.Xr pmc.westmereuc 3 ,
.Xr pmc.tsc 3 ,
.Xr pmc_cpuinfo 3 ,
.Xr pmclog 3 ,
.Xr hwpmc 4
.Sh HISTORY
The
.Nm pmc
library first appeared in
.Fx 6.0 .
.Sh AUTHORS
The
.Lb libpmc
library was written by
.An "Joseph Koshy"
.Aq jkoshy@FreeBSD.org .

1329
lib/libpmc/pmc.westmere.3 Normal file

File diff suppressed because it is too large Load Diff

1083
lib/libpmc/pmc.westmereuc.3 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -43,17 +43,20 @@ struct pmc_mdep;
#include <dev/hwpmc/hwpmc_core.h>
#include <dev/hwpmc/hwpmc_piv.h>
#include <dev/hwpmc/hwpmc_tsc.h>
#include <dev/hwpmc/hwpmc_uncore.h>
/*
* Intel processors implementing V2 and later of the Intel performance
* measurement architecture have PMCs of the following classes: TSC,
* IAF and IAP.
* IAF, IAP, UCF and UCP.
*/
#define PMC_MDEP_CLASS_INDEX_TSC 0
#define PMC_MDEP_CLASS_INDEX_K8 1
#define PMC_MDEP_CLASS_INDEX_P4 1
#define PMC_MDEP_CLASS_INDEX_IAP 1
#define PMC_MDEP_CLASS_INDEX_IAF 2
#define PMC_MDEP_CLASS_INDEX_UCP 3
#define PMC_MDEP_CLASS_INDEX_UCF 4
/*
* On the amd64 platform we support the following PMCs.
@ -63,12 +66,16 @@ struct pmc_mdep;
* PIV Intel P4/HTT and P4/EMT64
* IAP Intel Core/Core2/Atom CPUs in 64 bits mode.
* IAF Intel fixed-function PMCs in Core2 and later CPUs.
* UCP Intel Uncore programmable PMCs.
* UCF Intel Uncore fixed-function PMCs.
*/
union pmc_md_op_pmcallocate {
struct pmc_md_amd_op_pmcallocate pm_amd;
struct pmc_md_iaf_op_pmcallocate pm_iaf;
struct pmc_md_iap_op_pmcallocate pm_iap;
struct pmc_md_ucf_op_pmcallocate pm_ucf;
struct pmc_md_ucp_op_pmcallocate pm_ucp;
struct pmc_md_p4_op_pmcallocate pm_p4;
uint64_t __pad[4];
};
@ -83,6 +90,8 @@ union pmc_md_pmc {
struct pmc_md_amd_pmc pm_amd;
struct pmc_md_iaf_pmc pm_iaf;
struct pmc_md_iap_pmc pm_iap;
struct pmc_md_ucf_pmc pm_ucf;
struct pmc_md_ucp_pmc pm_ucp;
struct pmc_md_p4_pmc pm_p4;
};

View File

@ -198,6 +198,7 @@ dev/hptrr/hptrr_config.c optional hptrr
dev/hwpmc/hwpmc_amd.c optional hwpmc
dev/hwpmc/hwpmc_intel.c optional hwpmc
dev/hwpmc/hwpmc_core.c optional hwpmc
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_piv.c optional hwpmc
dev/hwpmc/hwpmc_tsc.c optional hwpmc
dev/hwpmc/hwpmc_x86.c optional hwpmc

View File

@ -179,6 +179,7 @@ dev/hptrr/hptrr_config.c optional hptrr
dev/hwpmc/hwpmc_amd.c optional hwpmc
dev/hwpmc/hwpmc_intel.c optional hwpmc
dev/hwpmc/hwpmc_core.c optional hwpmc
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_pentium.c optional hwpmc
dev/hwpmc/hwpmc_piv.c optional hwpmc
dev/hwpmc/hwpmc_ppro.c optional hwpmc

View File

@ -99,6 +99,7 @@ dev/fe/if_fe_cbus.c optional fe isa
dev/hwpmc/hwpmc_amd.c optional hwpmc
dev/hwpmc/hwpmc_intel.c optional hwpmc
dev/hwpmc/hwpmc_core.c optional hwpmc
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_pentium.c optional hwpmc
dev/hwpmc/hwpmc_piv.c optional hwpmc
dev/hwpmc/hwpmc_ppro.c optional hwpmc

File diff suppressed because it is too large Load Diff

View File

@ -46,6 +46,7 @@ struct pmc_md_iaf_op_pmcallocate {
*/
struct pmc_md_iap_op_pmcallocate {
uint32_t pm_iap_config;
uint32_t pm_iap_rsp;
};
#define IAP_EVSEL(C) ((C) & 0xFF)
@ -59,6 +60,8 @@ struct pmc_md_iap_op_pmcallocate {
#define IAP_INV (1 << 23)
#define IAP_CMASK(C) (((C) & 0xFF) << 24)
#define IA_OFFCORE_RSP_MASK 0xF7FF
#ifdef _KERNEL
/*
@ -76,16 +79,15 @@ struct pmc_md_iap_op_pmcallocate {
/*
* Programmable counters.
*/
#define IAP_PMC0 0x0C1
#define IAP_PMC1 0x0C2
#define IAP_PMC0 0x0C1
#define IAP_EVSEL0 0x186
#define IAP_EVSEL1 0x187
/*
* Simplified programming interface in Intel Performance Architecture
* v2 and later.
*/
#define IA_GLOBAL_STATUS 0x38E
#define IA_GLOBAL_CTRL 0x38F
#define IA_GLOBAL_OVF_CTRL 0x390
@ -93,12 +95,19 @@ struct pmc_md_iap_op_pmcallocate {
#define IA_GLOBAL_STATUS_FLAG_CONDCHG (1ULL << 63)
#define IA_GLOBAL_STATUS_FLAG_OVFBUF (1ULL << 62)
/*
* Offcore response configuration.
*/
#define IA_OFFCORE_RSP0 0x1A6
#define IA_OFFCORE_RSP1 0x1A7
struct pmc_md_iaf_pmc {
uint64_t pm_iaf_ctrl;
};
struct pmc_md_iap_pmc {
uint32_t pm_iap_evsel;
uint32_t pm_iap_rsp;
};
/*

View File

@ -133,8 +133,14 @@ pmc_intel_initialize(void)
case 0x1A:
case 0x1E: /* Per Intel document 253669-032 9/2009, pages A-2 and A-57 */
case 0x1F: /* Per Intel document 253669-032 9/2009, pages A-2 and A-57 */
case 0x2E:
cputype = PMC_CPU_INTEL_COREI7;
nclasses = 3;
nclasses = 5;
break;
case 0x25: /* Per Intel document 253669-033US 12/2009. */
case 0x2C: /* Per Intel document 253669-033US 12/2009. */
cputype = PMC_CPU_INTEL_WESTMERE;
nclasses = 5;
break;
}
break;
@ -176,6 +182,7 @@ pmc_intel_initialize(void)
case PMC_CPU_INTEL_CORE2:
case PMC_CPU_INTEL_CORE2EXTREME:
case PMC_CPU_INTEL_COREI7:
case PMC_CPU_INTEL_WESTMERE:
error = pmc_core_initialize(pmc_mdep, ncpus);
break;
@ -226,6 +233,22 @@ pmc_intel_initialize(void)
KASSERT(0, ("[intel,%d] Unknown CPU type", __LINE__));
}
/*
* Init the uncore class.
*/
#if defined(__i386__) || defined(__amd64__)
switch (cputype) {
/*
* Intel Corei7 and Westmere processors.
*/
case PMC_CPU_INTEL_COREI7:
case PMC_CPU_INTEL_WESTMERE:
error = pmc_uncore_initialize(pmc_mdep, ncpus);
break;
default:
break;
}
#endif
error:
if (error) {
@ -247,6 +270,8 @@ pmc_intel_finalize(struct pmc_mdep *md)
case PMC_CPU_INTEL_CORE:
case PMC_CPU_INTEL_CORE2:
case PMC_CPU_INTEL_CORE2EXTREME:
case PMC_CPU_INTEL_COREI7:
case PMC_CPU_INTEL_WESTMERE:
pmc_core_finalize(md);
break;
@ -269,4 +294,18 @@ pmc_intel_finalize(struct pmc_mdep *md)
default:
KASSERT(0, ("[intel,%d] unknown CPU type", __LINE__));
}
/*
* Uncore.
*/
#if defined(__i386__) || defined(__amd64__)
switch (md->pmd_cputype) {
case PMC_CPU_INTEL_COREI7:
case PMC_CPU_INTEL_WESTMERE:
pmc_uncore_finalize(md);
break;
default:
break;
}
#endif
}

1121
sys/dev/hwpmc/hwpmc_uncore.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,120 @@
/*-
* Copyright (c) 2010 Fabien Thomas
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _DEV_HWPMC_UNCORE_H_
#define _DEV_HWPMC_UNCORE_H_ 1
/*
* Fixed-function PMCs.
*/
struct pmc_md_ucf_op_pmcallocate {
uint16_t pm_ucf_flags; /* additional flags */
};
#define UCF_EN 0x1
#define UCF_PMI 0x4
/*
* Programmable PMCs.
*/
struct pmc_md_ucp_op_pmcallocate {
uint32_t pm_ucp_config;
};
#define UCP_EVSEL(C) ((C) & 0xFF)
#define UCP_UMASK(C) ((C) & 0xFF00)
#define UCP_CTRR (1 << 17)
#define UCP_EDGE (1 << 18)
#define UCP_INT (1 << 20)
#define UCP_EN (1 << 22)
#define UCP_INV (1 << 23)
#define UCP_CMASK(C) (((C) & 0xFF) << 24)
#ifdef _KERNEL
#define DCTL_FLAG_UNC_PMI (1ULL << 13)
/*
* Fixed-function counters.
*/
#define UCF_MASK 0xF
#define UCF_CTR0 0x394
#define UCF_OFFSET 32
#define UCF_CTRL 0x395
/*
* Programmable counters.
*/
#define UCP_PMC0 0x3B0
#define UCP_EVSEL0 0x3C0
#define UCP_OPCODE_MATCH 0x396
/*
* Simplified programming interface in Intel Performance Architecture
* v2 and later.
*/
#define UC_GLOBAL_STATUS 0x392
#define UC_GLOBAL_CTRL 0x391
#define UC_GLOBAL_OVF_CTRL 0x393
#define UC_GLOBAL_STATUS_FLAG_CLRCHG (1ULL << 63)
#define UC_GLOBAL_STATUS_FLAG_OVFPMI (1ULL << 61)
#define UC_GLOBAL_CTRL_FLAG_FRZ (1ULL << 63)
#define UC_GLOBAL_CTRL_FLAG_ENPMICORE0 (1ULL << 48)
struct pmc_md_ucf_pmc {
uint64_t pm_ucf_ctrl;
};
struct pmc_md_ucp_pmc {
uint32_t pm_ucp_evsel;
};
/*
* Prototypes.
*/
int pmc_uncore_initialize(struct pmc_mdep *_md, int _maxcpu);
void pmc_uncore_finalize(struct pmc_mdep *_md);
void pmc_uncore_mark_started(int _cpu, int _pmc);
int pmc_ucf_initialize(struct pmc_mdep *_md, int _maxcpu, int _npmc, int _width);
void pmc_ucf_finalize(struct pmc_mdep *_md);
int pmc_ucp_initialize(struct pmc_mdep *_md, int _maxcpu, int _npmc, int _width,
int _flags);
void pmc_ucp_finalize(struct pmc_mdep *_md);
#endif /* _KERNEL */
#endif /* _DEV_HWPMC_UNCORE_H */

File diff suppressed because it is too large Load Diff

View File

@ -49,6 +49,8 @@ struct pmc_mdep;
* PENTIUM Intel Pentium MMX.
* IAP Intel Core/Core2/Atom programmable PMCs.
* IAF Intel fixed-function PMCs.
* UCP Intel Uncore programmable PMCs.
* UCF Intel Uncore fixed-function PMCs.
*/
#include <dev/hwpmc/hwpmc_amd.h> /* K7 and K8 */
@ -57,11 +59,12 @@ struct pmc_mdep;
#include <dev/hwpmc/hwpmc_ppro.h>
#include <dev/hwpmc/hwpmc_pentium.h>
#include <dev/hwpmc/hwpmc_tsc.h>
#include <dev/hwpmc/hwpmc_uncore.h>
/*
* Intel processors implementing V2 and later of the Intel performance
* measurement architecture have PMCs of the following classes: TSC,
* IAF and IAP.
* IAF, IAP, UCF and UCP.
*/
#define PMC_MDEP_CLASS_INDEX_TSC 0
#define PMC_MDEP_CLASS_INDEX_K7 1
@ -71,6 +74,8 @@ struct pmc_mdep;
#define PMC_MDEP_CLASS_INDEX_P6 1
#define PMC_MDEP_CLASS_INDEX_IAP 1
#define PMC_MDEP_CLASS_INDEX_IAF 2
#define PMC_MDEP_CLASS_INDEX_UCP 3
#define PMC_MDEP_CLASS_INDEX_UCF 4
/*
* Architecture specific extensions to <sys/pmc.h> structures.
@ -80,6 +85,8 @@ union pmc_md_op_pmcallocate {
struct pmc_md_amd_op_pmcallocate pm_amd;
struct pmc_md_iaf_op_pmcallocate pm_iaf;
struct pmc_md_iap_op_pmcallocate pm_iap;
struct pmc_md_ucf_op_pmcallocate pm_ucf;
struct pmc_md_ucp_op_pmcallocate pm_ucp;
struct pmc_md_p4_op_pmcallocate pm_p4;
struct pmc_md_pentium_op_pmcallocate pm_pentium;
struct pmc_md_ppro_op_pmcallocate pm_ppro;
@ -97,6 +104,8 @@ union pmc_md_pmc {
struct pmc_md_amd_pmc pm_amd;
struct pmc_md_iaf_pmc pm_iaf;
struct pmc_md_iap_pmc pm_iap;
struct pmc_md_ucf_pmc pm_ucf;
struct pmc_md_ucp_pmc pm_ucp;
struct pmc_md_p4_pmc pm_p4;
struct pmc_md_pentium_pmc pm_pentium;
struct pmc_md_ppro_pmc pm_ppro;

View File

@ -10,7 +10,7 @@ SRCS= hwpmc_mod.c hwpmc_logging.c vnode_if.h
.if ${MACHINE_ARCH} == "amd64"
SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c hwpmc_piv.c hwpmc_tsc.c
SRCS+= hwpmc_x86.c
SRCS+= hwpmc_x86.c hwpmc_uncore.c
SRCS+= device_if.h bus_if.h
.endif
@ -20,7 +20,7 @@ SRCS+= hwpmc_arm.c
.if ${MACHINE_ARCH} == "i386"
SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c hwpmc_piv.c hwpmc_ppro.c
SRCS+= hwpmc_pentium.c hwpmc_tsc.c hwpmc_x86.c
SRCS+= hwpmc_pentium.c hwpmc_tsc.c hwpmc_x86.c hwpmc_uncore.c
SRCS+= device_if.h bus_if.h
.endif

View File

@ -40,7 +40,7 @@
#define PMC_MODULE_NAME "hwpmc"
#define PMC_NAME_MAX 16 /* HW counter name size */
#define PMC_CLASS_MAX 4 /* max #classes of PMCs per-system */
#define PMC_CLASS_MAX 6 /* max #classes of PMCs per-system */
/*
* Kernel<->userland API version number [MMmmpppp]
@ -85,6 +85,7 @@
__PMC_CPU(INTEL_CORE2EXTREME, 0x89, "Intel Core2 Extreme") \
__PMC_CPU(INTEL_ATOM, 0x8A, "Intel Atom") \
__PMC_CPU(INTEL_COREI7, 0x8B, "Intel Core i7") \
__PMC_CPU(INTEL_WESTMERE, 0x8C, "Intel Westmere") \
__PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \
__PMC_CPU(MIPS_24K, 0x200, "MIPS 24K")
@ -110,6 +111,8 @@ enum pmc_cputype {
__PMC_CLASS(P4) /* Intel Pentium-IV counters */ \
__PMC_CLASS(IAF) /* Intel Core2/Atom, fixed function */ \
__PMC_CLASS(IAP) /* Intel Core...Atom, programmable */ \
__PMC_CLASS(UCF) /* Intel Uncore programmable */ \
__PMC_CLASS(UCP) /* Intel Uncore fixed function */ \
__PMC_CLASS(XSCALE) /* Intel XScale counters */ \
__PMC_CLASS(MIPS24K) /* MIPS 24K */