Processor Counter Monitor
|
CPU Performance Monitor. More...
#include <cpucounters.h>
Classes | |
struct | CustomCoreEventDescription |
Custom Core event description. More... | |
struct | CustomIIOEventDescription |
struct | ExtendedCustomCoreEventDescription |
Extended custom core event description. More... | |
struct | RawPMUConfig |
struct | SimplePCIeDevInfo |
Public Types | |
enum | { MAX_C_STATE = 10 } |
enum | ProgramMode { DEFAULT_EVENTS = 0, CUSTOM_CORE_EVENTS = 1, EXT_CUSTOM_CORE_EVENTS = 2, INVALID_MODE } |
Mode of programming (parameter in the program() method) More... | |
enum | ErrorCode { Success = 0, MSRAccessDenied = 1, PMUBusy = 2, UnknownError } |
Return codes (e.g. for program(..) method) | |
enum | PerfmonField { INVALID, OPCODE, EVENT_SELECT, UMASK, RESET, EDGE_DET, IGNORED, OVERFLOW_ENABLE, ENABLE, INVERT, THRESH, CH_MASK, FC_MASK, H_EVENT_NAME, V_EVENT_NAME, MULTIPLIER, DIVIDER, COUNTER_INDEX } |
enum | PCIeWidthMode { X1, X4, X8, X16, XFF } |
enum | { IIO_CBDMA = 0, IIO_PCIe0 = 1, IIO_PCIe1 = 2, IIO_PCIe2 = 3, IIO_MCP0 = 4, IIO_MCP1 = 5, IIO_STACK_COUNT = 6 } |
enum | SkylakeIIOStacks { SKX_IIO_CBDMA_DMI = 0, SKX_IIO_PCIe0 = 1, SKX_IIO_PCIe1 = 2, SKX_IIO_PCIe2 = 3, SKX_IIO_MCP0 = 4, SKX_IIO_MCP1 = 5, SKX_IIO_STACK_COUNT = 6 } |
enum | IcelakeIIOStacks { ICX_IIO_PCIe0 = 0, ICX_IIO_PCIe1 = 1, ICX_IIO_MCP0 = 2, ICX_IIO_PCIe2 = 3, ICX_IIO_PCIe3 = 4, ICX_IIO_CBDMA_DMI = 5, ICX_IIO_STACK_COUNT = 6 } |
enum | SnowridgeIIOStacks { SNR_IIO_QAT = 0, SNR_IIO_CBDMA_DMI = 1, SNR_IIO_NIS = 2, SNR_IIO_HQM = 3, SNR_IIO_PCIe0 = 4, SNR_IIO_STACK_COUNT = 5 } |
enum | MSREventPosition { index = 0, type = 1 } |
enum | MSRType { Static = 0, Freerun = 1 } |
enum | EventPosition { TOR_OCCUPANCY = 0, TOR_INSERTS = 1, REQUESTS_ALL = 2, REQUESTS_LOCAL = 3 } |
enum | { OCR0Pos = 1, OCR1Pos = 2, LoadLatencyPos = 3, FrontendPos = 4 } |
enum | SupportedCPUModels { NEHALEM_EP = 26, NEHALEM = 30, ATOM = 28, ATOM_2 = 53, CENTERTON = 54, BAYTRAIL = 55, AVOTON = 77, CHERRYTRAIL = 76, APOLLO_LAKE = 92, DENVERTON = 95, SNOWRIDGE = 134, CLARKDALE = 37, WESTMERE_EP = 44, NEHALEM_EX = 46, WESTMERE_EX = 47, SANDY_BRIDGE = 42, JAKETOWN = 45, IVY_BRIDGE = 58, HASWELL = 60, HASWELL_ULT = 69, HASWELL_2 = 70, IVYTOWN = 62, HASWELLX = 63, BROADWELL = 61, BROADWELL_XEON_E3 = 71, BDX_DE = 86, SKL_UY = 78, KBL = 158, KBL_1 = 142, CML = 166, CML_1 = 165, ICL = 126, ICL_1 = 125, RKL = 167, TGL = 140, TGL_1 = 141, ADL = 151, ADL_1 = 154, BDX = 79, KNL = 87, SKL = 94, SKX = 85, ICX_D = 108, ICX = 106, END_OF_MODEL_LIST = 0x0ffff } |
Identifiers of supported CPU models. | |
enum | PCIeEventCode { PCIeRdCur = 0x19E, PCIeNSRd = 0x1E4, PCIeWiLF = 0x194, PCIeItoM = 0x19C, PCIeNSWr = 0x1E5, PCIeNSWrF = 0x1E6, RFO = 0x180, CRd = 0x181, DRd = 0x182, PRd = 0x187, WiL = 0x18F, ItoM = 0x1C8, SKX_RFO = 0x200, SKX_CRd = 0x201, SKX_DRd = 0x202, SKX_PRd = 0x207, SKX_WiL = 0x20F, SKX_RdCur = 0x21E, SKX_ItoM = 0x248 } |
enum | ChaPipelineQueue { None, IRQ, PRQ } |
enum | CBoEventTid { RFOtid = 0x3E, ItoMtid = 0x3E } |
typedef std::pair< std::array< uint64, 5 >, std::string > | RawEventConfig |
typedef std::map< std::string, RawPMUConfig > | RawPMUConfigs |
Public Member Functions | |
bool | isCoreCStateResidencySupported (int state) |
Returns true if the specified core C-state residency metric is supported. | |
bool | isPackageCStateResidencySupported (int state) |
Returns true if the specified package C-state residency metric is supported. | |
void | restoreOutput () |
Restores output, closes output file if opened. | |
void | setRunState (int new_state) |
Set Run State. | |
int | getRunState (void) |
Returns program's Run State. | |
bool | isBlocked (void) |
void | setBlocked (const bool new_blocked) |
bool | isHWTMAL1Supported () const |
check if TMA level 1 metrics are supported | |
bool | isSecureBoot () const |
check if in secure boot mode | |
bool | useLinuxPerfForUncore () const |
true if Linux perf for uncore PMU programming should AND can be used internally | |
SystemRoot const & | getSystemTopology () const |
The system, sockets, uncores, cores and threads are structured like a tree. More... | |
void | printDetailedSystemTopology () |
prints detailed system topology | |
bool | QOSMetricAvailable () const |
checks if QOS monitoring support present More... | |
bool | L3QOSMetricAvailable () const |
checks L3 cache support for QOS present More... | |
bool | L3CacheOccupancyMetricAvailable () const |
checks if L3 cache monitoring present More... | |
bool | CoreLocalMemoryBWMetricAvailable () const |
checks if local memory bandwidth monitoring present More... | |
bool | CoreRemoteMemoryBWMetricAvailable () const |
checks if total memory bandwidth monitoring present More... | |
unsigned | getMaxRMID () const |
returns the max number of RMID supported by socket More... | |
uint32 | getMaxNumOfCBoxes () const |
Returns the number of CBO or CHA units per socket. | |
uint32 | getMaxNumOfIIOStacks () const |
Returns the number of IIO stacks per socket. | |
bool | good () |
Checks the status of PCM object. More... | |
const std::string & | getErrorMessage () const |
Returns the error message. More... | |
ErrorCode | program (const ProgramMode mode_=DEFAULT_EVENTS, const void *parameter_=NULL, const bool silent=false, const int pid=-1) |
Programs performance counters. More... | |
void | checkError (const ErrorCode code) |
checks the error and suggests solution and/or exits the process More... | |
ErrorCode | programServerUncoreLatencyMetrics (bool enable_pmm) |
Programs uncore latency counters on microarchitectures codename SandyBridge-EP and later Xeon uarch. More... | |
ErrorCode | programServerUncorePowerMetrics (int mc_profile, int pcu_profile, int *freq_bands=NULL) |
Programs uncore power/energy counters on microarchitectures codename SandyBridge-EP and later Xeon uarch. More... | |
ErrorCode | programServerUncoreMemoryMetrics (const ServerUncoreMemoryMetrics &metrics, int rankA=-1, int rankB=-1) |
ErrorCode | program (const RawPMUConfigs &curPMUConfigs, const bool silent=false, const int pid=-1) |
std::pair< unsigned, unsigned > | getOCREventNr (const int event, const unsigned coreID) const |
void | freezeServerUncoreCounters () |
Freezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown) | |
void | unfreezeServerUncoreCounters () |
Unfreezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown) | |
ServerUncoreCounterState | getServerUncoreCounterState (uint32 socket) |
Reads the power/energy counter state of a socket (works only on microarchitecture codename SandyBridge-EP) More... | |
void | cleanup (const bool silent=false) |
Cleanups resources and stops performance counting. More... | |
void | resetPMU () |
Forces PMU reset. More... | |
void | getAllCounterStates (SystemCounterState &systemState, std::vector< SocketCounterState > &socketStates, std::vector< CoreCounterState > &coreStates, const bool readAndAggregateSocketUncoreCounters=true) |
Reads all counter states (including system, sockets and cores) More... | |
void | getUncoreCounterStates (SystemCounterState &systemState, std::vector< SocketCounterState > &socketStates) |
Reads uncore counter states (including system and sockets) but no core counters. More... | |
bool | isCoreOnline (int32 os_core_id) const |
Return true if the core in online. More... | |
bool | isSocketOnline (int32 socket_id) const |
Return true if the socket in online. More... | |
SystemCounterState | getSystemCounterState () |
Reads the counter state of the system. More... | |
SocketCounterState | getSocketCounterState (uint32 socket) |
Reads the counter state of a socket. More... | |
CoreCounterState | getCoreCounterState (uint32 core) |
Reads the counter state of a (logical) core. More... | |
uint32 | getNumCores () const |
Reads number of logical cores in the system. More... | |
uint32 | getNumOnlineCores () const |
Reads number of online logical cores in the system. More... | |
uint32 | getNumSockets () const |
Reads number of sockets (CPUs) in the system. More... | |
uint32 | getNumOnlineSockets () const |
Reads number of online sockets (CPUs) in the system. More... | |
uint32 | getThreadsPerCore () const |
Reads how many hardware threads has a physical core "Hardware thread" is a logical core in a different terminology. If Intel(r) Hyperthreading(tm) is enabled then this function returns 2. More... | |
bool | getSMT () const |
Checks if SMT (HyperThreading) is enabled. More... | |
uint64 | getNominalFrequency () const |
Reads the nominal core frequency. More... | |
uint32 | getL3ScalingFactor () const |
runs CPUID.0xF.0x01 to get the L3 up scaling factor to calculate L3 Occupancy Scaling factor is returned in EBX register after running the CPU instruction More... | |
bool | isSomeCoreOfflined () |
runs CPUID.0xB.0x01 to get maximum logical cores (including SMT) per socket. max_lcores_per_socket is returned in EBX[15:0]. Compare this value with number of cores per socket detected in the system to see if some cores are offlined More... | |
int32 | getMaxCustomCoreEvents () |
Returns the maximum number of custom (general-purpose) core events supported by CPU. | |
uint32 | getCPUModel () const |
Reads CPU model id. More... | |
uint32 | getCPUStepping () const |
Reads CPU stepping id. More... | |
int32 | getThreadId (uint32 os_id) const |
Determines physical thread of given processor ID within a core. More... | |
int32 | getCoreId (uint32 os_id) const |
Determines physical core of given processor ID within a socket. More... | |
int32 | getTileId (uint32 os_id) const |
Determines physical tile (cores sharing L2 cache) of given processor ID. More... | |
int32 | getSocketId (uint32 core_id) const |
Determines socket of given core. More... | |
uint64 | getQPILinksPerSocket () const |
Returns the number of Intel(r) Quick Path Interconnect(tm) links per socket. More... | |
uint32 | getMCPerSocket () const |
Returns the number of detected integrated memory controllers per socket. | |
size_t | getMCChannelsPerSocket () const |
Returns the total number of detected memory channels on all integrated memory controllers per socket. | |
size_t | getMCChannels (uint32 socket, uint32 controller) const |
Returns the number of detected memory channels on given integrated memory controllers. More... | |
size_t | getEDCChannelsPerSocket () const |
Returns the total number of detected memory channels on all integrated memory controllers per socket. | |
uint32 | getMaxIPC () const |
Returns the max number of instructions per cycle. More... | |
uint64 | getPCUFrequency () const |
Returns the frequency of Power Control Unit. | |
bool | isServerCPU () const |
Returns whether it is a server part. | |
bool | isClientCPU () const |
Returns whether it is a client part. | |
uint64 | getTickCount (uint64 multiplier=1000, uint32 core=0) |
Return TSC timer value in time units. More... | |
uint64 | getInvariantTSC_Fast (uint32 core=0) |
uint64 | getUncoreClocks (const uint32 socket_) |
Returns uncore clock ticks on specified socket. | |
uint64 | getQPILinkSpeed (uint32 socketNr, uint32 linkNr) const |
Return QPI Link Speed in GBytes/second. More... | |
double | getJoulesPerEnergyUnit () const |
Returns how many joules are in an internal processor energy unit. | |
int32 | getPackageThermalSpecPower () const |
Returns thermal specification power of the package domain in Watt. | |
int32 | getPackageMinimumPower () const |
Returns minimum power derived from electrical spec of the package domain in Watt. | |
int32 | getPackageMaximumPower () const |
Returns maximum power derived from electrical spec of the package domain in Watt. | |
void | disableJKTWorkaround () |
void | programPCIeEventGroup (eventGroup_t &eventGroup) |
Program uncore PCIe monitoring event(s) More... | |
uint64 | getPCIeCounterData (const uint32 socket_, const uint32 ctr_) |
void | programCbo (const uint64 *events, const uint32 opCode=0, const uint32 nc_=0, const uint32 llc_lookup_tid_filter=0, const uint32 loc=1, const uint32 rem=1) |
Program CBO (or CHA on SKX+) counters. More... | |
void | programCboRaw (const uint64 *events, const uint64 filter0, const uint64 filter1) |
Program CBO (or CHA on SKX+) counters. More... | |
PCIeCounterState | getPCIeCounterState (const uint32 socket_, const uint32 ctr_=0) |
Get the state of PCIe counter(s) More... | |
void | programIIOCounters (uint64 rawEvents[4], int IIOStack=-1) |
Program uncore IIO events. More... | |
void | programIRPCounters (uint64 rawEvents[4], int IIOStack=-1) |
Program uncore IRP events. More... | |
IIOCounterState | getIIOCounterState (int socket, int IIOStack, int counter) |
Get the state of IIO counter. More... | |
void | getIIOCounterStates (int socket, int IIOStack, IIOCounterState *result) |
Get the states of the four IIO counters in bulk (faster than four single reads) More... | |
uint64 | extractCoreGenCounterValue (uint64 val) |
uint64 | extractCoreFixedCounterValue (uint64 val) |
uint64 | extractUncoreGenCounterValue (uint64 val) |
uint64 | extractUncoreFixedCounterValue (uint64 val) |
uint64 | extractQOSMonitoring (uint64 val) |
const char * | getUArchCodename (const int32 cpu_model_=-1) const |
Get a string describing the codename of the processor microarchitecture. More... | |
std::string | getCPUFamilyModelString () |
void | enableForceRTMAbortMode (const bool silent=false) |
Enables "force all RTM transaction abort" mode also enabling 4+ programmable counters on Skylake generation processors. | |
bool | isForceRTMAbortModeEnabled () const |
queries status of "force all RTM transaction abort" mode | |
void | disableForceRTMAbortMode (const bool silent=false) |
Disables "force all RTM transaction abort" mode restricting the number of programmable counters on Skylake generation processors to 3. | |
bool | isForceRTMAbortModeAvailable () const |
queries availability of "force all RTM transaction abort" mode | |
int64 | getCPUMicrocodeLevel () const |
Get microcode level (returns -1 if retrieval not supported due to some restrictions) | |
bool | isAtom () const |
returns true if CPU is Atom-based | |
bool | packageEnergyMetricsAvailable () const |
bool | dramEnergyMetricsAvailable () const |
bool | packageThermalMetricsAvailable () const |
bool | outgoingQPITrafficMetricsAvailable () const |
bool | incomingQPITrafficMetricsAvailable () const |
bool | localMemoryRequestRatioMetricAvailable () const |
bool | qpiUtilizationMetricsAvailable () const |
bool | memoryTrafficMetricsAvailable () const |
bool | MCDRAMmemoryTrafficMetricsAvailable () const |
bool | memoryIOTrafficMetricAvailable () const |
bool | IIOEventsAvailable () const |
bool | uncoreFrequencyMetricAvailable () const |
bool | LatencyMetricsAvailable () const |
bool | DDRLatencyMetricsAvailable () const |
bool | PMMTrafficMetricsAvailable () const |
bool | LLCReadMissLatencyMetricsAvailable () const |
bool | hasBecktonUncore () const |
bool | hasPCICFGUncore () const |
bool | isSkxCompatible () const |
bool | hasUPI () const |
const char * | xPI () const |
bool | hasCHA () const |
bool | supportsHLE () const |
bool | supportsRTM () const |
bool | supportsRDTSCP () const |
bool | useSkylakeEvents () const |
bool | hasClientMCCounters () const |
double | getBytesPerFlit () const |
double | getDataBytesPerFlit () const |
double | getBytesPerLinkCycle () const |
double | getBytesPerLinkTransfer () const |
void | setupCustomCoreEventsForNuma (PCM::ExtendedCustomCoreEventDescription &conf) const |
Setup ExtendedCustomCoreEventDescription object to read offcore (numa) counters for each processor type. More... | |
bool | isActiveRelativeFrequencyAvailable () const |
Static Public Member Functions | |
static void | setOutput (const std::string filename, const bool cerrToo=false) |
Redirects output destination to provided file, instead of std::cout and std::cerr (optional) | |
static PCM * | getInstance () |
Returns PCM object. More... | |
static int | getCPUModelFromCPUID () |
Returns cpu model id number from cpuid instruction. | |
static bool | initWinRing0Lib () |
Loads and initializes Winring0 third party library for access to processor model specific and PCI configuration registers. More... | |
static std::string | getCPUBrandString () |
Get Brand string of processor. | |
static bool | isAtom (const int32 cpu_model_) |
returns true if CPU model is Atom-based | |
static bool | hasUPI (const int32 cpu_model_) |
static double | getBytesPerFlit (int32 cpu_model_) |
static double | getDataBytesPerFlit (int32 cpu_model_) |
static double | getFlitsPerLinkCycle (int32 cpu_model_) |
static double | getBytesPerLinkCycle (int32 cpu_model_) |
static double | getLinkTransfersPerLinkCycle () |
Friends | |
class | BasicCounterState |
class | UncoreCounterState |
class | Socket |
class | ServerUncore |
class | PerfVirtualControlRegister |
class | Aggregator |
class | ServerPCICFGUncore |
CPU Performance Monitor.
This singleton object needs to be instantiated for each process before accessing counting and measuring routines
Mode of programming (parameter in the program() method)
Enumerator | |
---|---|
DEFAULT_EVENTS | Default choice of events, the additional parameter is not needed and ignored |
CUSTOM_CORE_EVENTS | Custom set of core events specified in the parameter to the program method. The parameter must be a pointer to array of four |
EXT_CUSTOM_CORE_EVENTS | Custom set of core events specified in the parameter to the program method. The parameter must be a pointer to a |
INVALID_MODE | Non-programmed mode |
void pcm::PCM::checkError | ( | const ErrorCode | code | ) |
checks the error and suggests solution and/or exits the process
code | error code from the 'program' call |
References resetPMU().
void pcm::PCM::cleanup | ( | const bool | silent = false | ) |
Cleanups resources and stops performance counting.
One needs to call this method when your program finishes or/and you are not going to use the performance counting routines anymore.
References disableForceRTMAbortMode().
bool pcm::PCM::CoreLocalMemoryBWMetricAvailable | ( | ) | const |
checks if local memory bandwidth monitoring present
References L3QOSMetricAvailable(), and QOSMetricAvailable().
bool pcm::PCM::CoreRemoteMemoryBWMetricAvailable | ( | ) | const |
checks if total memory bandwidth monitoring present
References L3QOSMetricAvailable(), and QOSMetricAvailable().
void pcm::PCM::getAllCounterStates | ( | SystemCounterState & | systemState, |
std::vector< SocketCounterState > & | socketStates, | ||
std::vector< CoreCounterState > & | coreStates, | ||
const bool | readAndAggregateSocketUncoreCounters = true |
||
) |
Reads all counter states (including system, sockets and cores)
systemState | system counter state (return parameter) |
socketStates | socket counter states (return parameter) |
coreStates | core counter states (return parameter) |
readAndAggregateSocketUncoreCounters | read and aggregate socket uncore counters |
References isCoreOnline().
CoreCounterState pcm::PCM::getCoreCounterState | ( | uint32 | core | ) |
Reads the counter state of a (logical) core.
Be aware that during the measurement other threads may be scheduled on the same core by the operating system (this is called context-switching). The performance events caused by these threads will be counted as well.
\param core core id \return State of counters in the core
Referenced by pcm::getCoreCounterState().
|
inline |
Determines physical core of given processor ID within a socket.
os_id | processor identifier |
|
inline |
Reads CPU model id.
Referenced by pcm::getDRAMClocks(), and pcm::getDRAMConsumedJoules().
|
inline |
Reads CPU stepping id.
|
inline |
Returns the error message.
Call this when good() returns false, otherwise return an empty string
IIOCounterState pcm::PCM::getIIOCounterState | ( | int | socket, |
int | IIOStack, | ||
int | counter | ||
) |
Get the state of IIO counter.
socket | socket of the IIO stack |
IIOStack | id of the IIO stack |
void pcm::PCM::getIIOCounterStates | ( | int | socket, |
int | IIOStack, | ||
IIOCounterState * | result | ||
) |
Get the states of the four IIO counters in bulk (faster than four single reads)
socket | socket of the IIO stack |
IIOStack | id of the IIO stack |
result | states of IIO counters (array of four IIOCounterState elements) |
|
static |
Returns PCM object.
Returns PCM object. If the PCM has not been created before than an instance is created. PCM is a singleton.
Referenced by pcm::ServerPCICFGUncore::computeQPISpeed(), pcm::getActiveAverageFrequency(), pcm::getActiveRelativeFrequency(), pcm::getAllIncomingQPILinkBytes(), pcm::getAllOutgoingQPILinkBytes(), pcm::getAverageUncoreFrequency(), pcm::getBackendBound(), pcm::getBadSpeculation(), pcm::getBytesReadFromEDC(), pcm::getBytesReadFromMC(), pcm::getBytesReadFromPMM(), pcm::getBytesWrittenToEDC(), pcm::getBytesWrittenToMC(), pcm::getBytesWrittenToPMM(), pcm::getConsumedJoules(), pcm::getCoreCounterState(), pcm::getCoreCStateResidency(), pcm::getCoreIPC(), pcm::getDRAMClocks(), pcm::getDRAMConsumedJoules(), pcm::getEDCCounter(), pcm::getFrontendBound(), pcm::getGTRequestBytesFromMC(), pcm::getIARequestBytesFromMC(), pcm::getIncomingQPILinkBytes(), pcm::getIncomingQPILinkUtilization(), pcm::getIORequestBytesFromMC(), pcm::getL2CacheHitRatio(), pcm::getL2CacheHits(), pcm::getL2CacheMisses(), pcm::getL3CacheHitRatio(), pcm::getL3CacheHits(), pcm::getL3CacheHitsNoSnoop(), pcm::getL3CacheHitsSnoop(), pcm::getL3CacheMisses(), pcm::getL3CacheOccupancy(), pcm::getLLCReadMissLatency(), pcm::getLocalMemoryBW(), pcm::getLocalMemoryRequestRatio(), pcm::getOutgoingQPILinkBytes(), pcm::getOutgoingQPILinkUtilization(), pcm::getPackageCStateResidency(), pcm::getQPItoMCTrafficRatio(), pcm::getRemoteMemoryBW(), pcm::getRetiring(), pcm::getSocketCounterState(), pcm::getSocketIncomingQPILinkBytes(), pcm::getSystemCounterState(), pcm::getTotalExecUsage(), PCMServiceNS::PCMService::OnStart(), pcm::ServerPCICFGUncore::program(), pcm::ServerPCICFGUncore::programServerUncoreMemoryMetrics(), and pcm::ServerPCICFGUncore::reportQPISpeed().
uint32 pcm::PCM::getL3ScalingFactor | ( | ) | const |
runs CPUID.0xF.0x01 to get the L3 up scaling factor to calculate L3 Occupancy Scaling factor is returned in EBX register after running the CPU instruction
|
inline |
Returns the max number of instructions per cycle.
unsigned pcm::PCM::getMaxRMID | ( | ) | const |
returns the max number of RMID supported by socket
|
inline |
Returns the number of detected memory channels on given integrated memory controllers.
socket | socket |
controller | controller |
uint64 pcm::PCM::getNominalFrequency | ( | ) | const |
Reads the nominal core frequency.
Referenced by pcm::getActiveAverageFrequency(), pcm::getIncomingQPILinkUtilization(), pcm::getOutgoingQPILinkBytes(), pcm::getOutgoingQPILinkUtilization(), and getTickCount().
uint32 pcm::PCM::getNumCores | ( | ) | const |
Reads number of logical cores in the system.
Referenced by pcm::getCoreIPC(), and pcm::getTotalExecUsage().
uint32 pcm::PCM::getNumOnlineCores | ( | ) | const |
Reads number of online logical cores in the system.
Referenced by pcm::getCoreIPC(), pcm::getIncomingQPILinkUtilization(), pcm::getOutgoingQPILinkBytes(), pcm::getOutgoingQPILinkUtilization(), and pcm::getTotalExecUsage().
uint32 pcm::PCM::getNumOnlineSockets | ( | ) | const |
Reads number of online sockets (CPUs) in the system.
uint32 pcm::PCM::getNumSockets | ( | ) | const |
Reads number of sockets (CPUs) in the system.
Referenced by pcm::getAllIncomingQPILinkBytes(), and pcm::getAllOutgoingQPILinkBytes().
PCIeCounterState pcm::PCM::getPCIeCounterState | ( | const uint32 | socket_, |
const uint32 | ctr_ = 0 |
||
) |
Get the state of PCIe counter(s)
socket_ | socket of the PCIe controller |
|
inline |
Return QPI Link Speed in GBytes/second.
Referenced by pcm::getIncomingQPILinkUtilization(), pcm::getOutgoingQPILinkBytes(), and pcm::getOutgoingQPILinkUtilization().
|
inline |
Returns the number of Intel(r) Quick Path Interconnect(tm) links per socket.
ServerUncoreCounterState pcm::PCM::getServerUncoreCounterState | ( | uint32 | socket | ) |
Reads the power/energy counter state of a socket (works only on microarchitecture codename SandyBridge-EP)
socket | socket id |
bool pcm::PCM::getSMT | ( | ) | const |
Checks if SMT (HyperThreading) is enabled.
SocketCounterState pcm::PCM::getSocketCounterState | ( | uint32 | socket | ) |
Reads the counter state of a socket.
socket | socket id |
References isCoreOnline().
Referenced by pcm::getSocketCounterState().
|
inline |
Determines socket of given core.
core_id | core identifier |
SystemCounterState pcm::PCM::getSystemCounterState | ( | ) |
Reads the counter state of the system.
System consists of several sockets (CPUs). Socket has a CPU in it. Socket (CPU) consists of several (logical) cores.
References isCoreOnline().
Referenced by pcm::getSystemCounterState().
|
inline |
The system, sockets, uncores, cores and threads are structured like a tree.
|
inline |
Determines physical thread of given processor ID within a core.
os_id | processor identifier |
uint32 pcm::PCM::getThreadsPerCore | ( | ) | const |
Reads how many hardware threads has a physical core "Hardware thread" is a logical core in a different terminology. If Intel(r) Hyperthreading(tm) is enabled then this function returns 2.
Referenced by pcm::getCoreIPC(), and pcm::getTotalExecUsage().
uint64 pcm::PCM::getTickCount | ( | uint64 | multiplier = 1000 , |
uint32 | core = 0 |
||
) |
Return TSC timer value in time units.
multiplier | use 1 for seconds, 1000 for ms, 1000000 for mks, etc (default is 1000: ms) |
core | core to read on-chip TSC value (default is 0) |
References getNominalFrequency().
|
inline |
Determines physical tile (cores sharing L2 cache) of given processor ID.
os_id | processor identifier |
const char * pcm::PCM::getUArchCodename | ( | const int32 | cpu_model_ = -1 | ) | const |
Get a string describing the codename of the processor microarchitecture.
cpu_model_ | cpu model (if no parameter provided the codename of the detected CPU is returned) |
void pcm::PCM::getUncoreCounterStates | ( | SystemCounterState & | systemState, |
std::vector< SocketCounterState > & | socketStates | ||
) |
Reads uncore counter states (including system and sockets) but no core counters.
systemState | system counter state (return parameter) |
socketStates | socket counter states (return parameter) |
bool pcm::PCM::good | ( | ) |
|
static |
Loads and initializes Winring0 third party library for access to processor model specific and PCI configuration registers.
Referenced by pcm::Driver::start().
bool pcm::PCM::isCoreOnline | ( | int32 | os_core_id | ) | const |
Return true if the core in online.
os_core_id | OS core id |
Referenced by getAllCounterStates(), getSocketCounterState(), and getSystemCounterState().
bool pcm::PCM::isSocketOnline | ( | int32 | socket_id | ) | const |
Return true if the socket in online.
socket_id | OS socket id |
References getCPUBrandString(), and isAtom().
bool pcm::PCM::isSomeCoreOfflined | ( | ) |
runs CPUID.0xB.0x01 to get maximum logical cores (including SMT) per socket. max_lcores_per_socket is returned in EBX[15:0]. Compare this value with number of cores per socket detected in the system to see if some cores are offlined
bool pcm::PCM::L3CacheOccupancyMetricAvailable | ( | ) | const |
checks if L3 cache monitoring present
References L3QOSMetricAvailable(), and QOSMetricAvailable().
bool pcm::PCM::L3QOSMetricAvailable | ( | ) | const |
checks L3 cache support for QOS present
References isSecureBoot().
Referenced by CoreLocalMemoryBWMetricAvailable(), CoreRemoteMemoryBWMetricAvailable(), and L3CacheOccupancyMetricAvailable().
PCM::ErrorCode pcm::PCM::program | ( | const ProgramMode | mode_ = DEFAULT_EVENTS , |
const void * | parameter_ = NULL , |
||
const bool | silent = false , |
||
const int | pid = -1 |
||
) |
Programs performance counters.
mode_ | mode of programming, see ProgramMode definition |
parameter_ | optional parameter for some of programming modes |
silent | set to true to silence diagnostic messages |
pid | restrict core metrics only to specified pid (process id) Call this method before you start using the performance counting routines. |
References CUSTOM_CORE_EVENTS, EXT_CUSTOM_CORE_EVENTS, isAtom(), and isHWTMAL1Supported().
void pcm::PCM::programCbo | ( | const uint64 * | events, |
const uint32 | opCode = 0 , |
||
const uint32 | nc_ = 0 , |
||
const uint32 | llc_lookup_tid_filter = 0 , |
||
const uint32 | loc = 1 , |
||
const uint32 | rem = 1 |
||
) |
Program CBO (or CHA on SKX+) counters.
events | array with four raw event values |
opCode | opcode match filter |
nc_ | match non-coherent requests |
llc_lookup_tid_filter | filter for LLC lookup event filter and TID filter (core and thread ID) |
loc | match on local node target |
rem | match on remote node target |
void pcm::PCM::programCboRaw | ( | const uint64 * | events, |
const uint64 | filter0, | ||
const uint64 | filter1 | ||
) |
Program CBO (or CHA on SKX+) counters.
events | array with four raw event values |
filter0 | raw filter value |
filter1 | raw filter1 value |
void pcm::PCM::programIIOCounters | ( | uint64 | rawEvents[4], |
int | IIOStack = -1 |
||
) |
Program uncore IIO events.
rawEvents | events to program (raw format) |
IIOStack | id of the IIO stack to program (-1 for all, if parameter omitted) |
void pcm::PCM::programIRPCounters | ( | uint64 | rawEvents[4], |
int | IIOStack = -1 |
||
) |
Program uncore IRP events.
rawEvents | events to program (raw format) |
IIOStack | id of the IIO stack to program (-1 for all, if parameter omitted) |
void pcm::PCM::programPCIeEventGroup | ( | eventGroup_t & | eventGroup | ) |
Program uncore PCIe monitoring event(s)
eventGroup | - events to program for the same run |
PCM::ErrorCode pcm::PCM::programServerUncoreLatencyMetrics | ( | bool | enable_pmm | ) |
Programs uncore latency counters on microarchitectures codename SandyBridge-EP and later Xeon uarch.
enable_pmm | enables DDR/PMM. See possible profile values in pcm-latency.cpp example |
Call this method before you start using the latency counter routines on microarchitecture codename SandyBridge-EP and later Xeon uarch
PCM::ErrorCode pcm::PCM::programServerUncorePowerMetrics | ( | int | mc_profile, |
int | pcu_profile, | ||
int * | freq_bands = NULL |
||
) |
Programs uncore power/energy counters on microarchitectures codename SandyBridge-EP and later Xeon uarch.
mc_profile | profile for integrated memory controller PMU. See possible profile values in pcm-power.cpp example |
pcu_profile | profile for power control unit PMU. See possible profile values in pcm-power.cpp example |
freq_bands | array of three integer values for core frequency band monitoring. See usage in pcm-power.cpp example |
Call this method before you start using the power counter routines on microarchitecture codename SandyBridge-EP and later Xeon uarch
bool pcm::PCM::QOSMetricAvailable | ( | ) | const |
checks if QOS monitoring support present
References isSecureBoot().
Referenced by CoreLocalMemoryBWMetricAvailable(), CoreRemoteMemoryBWMetricAvailable(), and L3CacheOccupancyMetricAvailable().
void pcm::PCM::resetPMU | ( | ) |
Forces PMU reset.
If there is no chance to free up PMU from other applications you might try to call this method at your own risk.
Referenced by checkError().
void pcm::PCM::setupCustomCoreEventsForNuma | ( | PCM::ExtendedCustomCoreEventDescription & | conf | ) | const |
Setup ExtendedCustomCoreEventDescription object to read offcore (numa) counters for each processor type.
conf | conf object to setup offcore MSR values |