Processor Counter Monitor
Classes | Public Types | Public Member Functions | Static Public Member Functions | Friends | List of all members
pcm::PCM Class Reference

CPU Performance Monitor. More...

#include <cpucounters.h>

Classes

struct  CustomCoreEventDescription
 Custom Core event description. More...
 
struct  CustomIIOEventDescription
 
struct  ExtendedCustomCoreEventDescription
 Extended custom core event description. More...
 
struct  RawPMUConfig
 
struct  SimplePCIeDevInfo
 

Public Types

enum  { MAX_C_STATE = 10 }
 
enum  ProgramMode { DEFAULT_EVENTS = 0, CUSTOM_CORE_EVENTS = 1, EXT_CUSTOM_CORE_EVENTS = 2, INVALID_MODE }
 Mode of programming (parameter in the program() method) More...
 
enum  ErrorCode { Success = 0, MSRAccessDenied = 1, PMUBusy = 2, UnknownError }
 Return codes (e.g. for program(..) method)
 
enum  PerfmonField {
  INVALID, OPCODE, EVENT_SELECT, UMASK,
  RESET, EDGE_DET, IGNORED, OVERFLOW_ENABLE,
  ENABLE, INVERT, THRESH, CH_MASK,
  FC_MASK, H_EVENT_NAME, V_EVENT_NAME, MULTIPLIER,
  DIVIDER, COUNTER_INDEX
}
 
enum  PCIeWidthMode {
  X1, X4, X8, X16,
  XFF
}
 
enum  {
  IIO_CBDMA = 0, IIO_PCIe0 = 1, IIO_PCIe1 = 2, IIO_PCIe2 = 3,
  IIO_MCP0 = 4, IIO_MCP1 = 5, IIO_STACK_COUNT = 6
}
 
enum  SkylakeIIOStacks {
  SKX_IIO_CBDMA_DMI = 0, SKX_IIO_PCIe0 = 1, SKX_IIO_PCIe1 = 2, SKX_IIO_PCIe2 = 3,
  SKX_IIO_MCP0 = 4, SKX_IIO_MCP1 = 5, SKX_IIO_STACK_COUNT = 6
}
 
enum  IcelakeIIOStacks {
  ICX_IIO_PCIe0 = 0, ICX_IIO_PCIe1 = 1, ICX_IIO_MCP0 = 2, ICX_IIO_PCIe2 = 3,
  ICX_IIO_PCIe3 = 4, ICX_IIO_CBDMA_DMI = 5, ICX_IIO_STACK_COUNT = 6
}
 
enum  SnowridgeIIOStacks {
  SNR_IIO_QAT = 0, SNR_IIO_CBDMA_DMI = 1, SNR_IIO_NIS = 2, SNR_IIO_HQM = 3,
  SNR_IIO_PCIe0 = 4, SNR_IIO_STACK_COUNT = 5
}
 
enum  MSREventPosition { index = 0, type = 1 }
 
enum  MSRType { Static = 0, Freerun = 1 }
 
enum  EventPosition { TOR_OCCUPANCY = 0, TOR_INSERTS = 1, REQUESTS_ALL = 2, REQUESTS_LOCAL = 3 }
 
enum  { OCR0Pos = 1, OCR1Pos = 2, LoadLatencyPos = 3, FrontendPos = 4 }
 
enum  SupportedCPUModels {
  NEHALEM_EP = 26, NEHALEM = 30, ATOM = 28, ATOM_2 = 53,
  CENTERTON = 54, BAYTRAIL = 55, AVOTON = 77, CHERRYTRAIL = 76,
  APOLLO_LAKE = 92, DENVERTON = 95, SNOWRIDGE = 134, CLARKDALE = 37,
  WESTMERE_EP = 44, NEHALEM_EX = 46, WESTMERE_EX = 47, SANDY_BRIDGE = 42,
  JAKETOWN = 45, IVY_BRIDGE = 58, HASWELL = 60, HASWELL_ULT = 69,
  HASWELL_2 = 70, IVYTOWN = 62, HASWELLX = 63, BROADWELL = 61,
  BROADWELL_XEON_E3 = 71, BDX_DE = 86, SKL_UY = 78, KBL = 158,
  KBL_1 = 142, CML = 166, CML_1 = 165, ICL = 126,
  ICL_1 = 125, RKL = 167, TGL = 140, TGL_1 = 141,
  ADL = 151, ADL_1 = 154, BDX = 79, KNL = 87,
  SKL = 94, SKX = 85, ICX_D = 108, ICX = 106,
  END_OF_MODEL_LIST = 0x0ffff
}
 Identifiers of supported CPU models.
 
enum  PCIeEventCode {
  PCIeRdCur = 0x19E, PCIeNSRd = 0x1E4, PCIeWiLF = 0x194, PCIeItoM = 0x19C,
  PCIeNSWr = 0x1E5, PCIeNSWrF = 0x1E6, RFO = 0x180, CRd = 0x181,
  DRd = 0x182, PRd = 0x187, WiL = 0x18F, ItoM = 0x1C8,
  SKX_RFO = 0x200, SKX_CRd = 0x201, SKX_DRd = 0x202, SKX_PRd = 0x207,
  SKX_WiL = 0x20F, SKX_RdCur = 0x21E, SKX_ItoM = 0x248
}
 
enum  ChaPipelineQueue { None, IRQ, PRQ }
 
enum  CBoEventTid { RFOtid = 0x3E, ItoMtid = 0x3E }
 
typedef std::pair< std::array< uint64, 5 >, std::string > RawEventConfig
 
typedef std::map< std::string, RawPMUConfigRawPMUConfigs
 

Public Member Functions

bool isCoreCStateResidencySupported (int state)
 Returns true if the specified core C-state residency metric is supported.
 
bool isPackageCStateResidencySupported (int state)
 Returns true if the specified package C-state residency metric is supported.
 
void restoreOutput ()
 Restores output, closes output file if opened.
 
void setRunState (int new_state)
 Set Run State.
 
int getRunState (void)
 Returns program's Run State.
 
bool isBlocked (void)
 
void setBlocked (const bool new_blocked)
 
bool isHWTMAL1Supported () const
 check if TMA level 1 metrics are supported
 
bool isSecureBoot () const
 check if in secure boot mode
 
bool useLinuxPerfForUncore () const
 true if Linux perf for uncore PMU programming should AND can be used internally
 
SystemRoot const & getSystemTopology () const
 The system, sockets, uncores, cores and threads are structured like a tree. More...
 
void printDetailedSystemTopology ()
 prints detailed system topology
 
bool QOSMetricAvailable () const
 checks if QOS monitoring support present More...
 
bool L3QOSMetricAvailable () const
 checks L3 cache support for QOS present More...
 
bool L3CacheOccupancyMetricAvailable () const
 checks if L3 cache monitoring present More...
 
bool CoreLocalMemoryBWMetricAvailable () const
 checks if local memory bandwidth monitoring present More...
 
bool CoreRemoteMemoryBWMetricAvailable () const
 checks if total memory bandwidth monitoring present More...
 
unsigned getMaxRMID () const
 returns the max number of RMID supported by socket More...
 
uint32 getMaxNumOfCBoxes () const
 Returns the number of CBO or CHA units per socket.
 
uint32 getMaxNumOfIIOStacks () const
 Returns the number of IIO stacks per socket.
 
bool good ()
 Checks the status of PCM object. More...
 
const std::string & getErrorMessage () const
 Returns the error message. More...
 
ErrorCode program (const ProgramMode mode_=DEFAULT_EVENTS, const void *parameter_=NULL, const bool silent=false, const int pid=-1)
 Programs performance counters. More...
 
void checkError (const ErrorCode code)
 checks the error and suggests solution and/or exits the process More...
 
ErrorCode programServerUncoreLatencyMetrics (bool enable_pmm)
 Programs uncore latency counters on microarchitectures codename SandyBridge-EP and later Xeon uarch. More...
 
ErrorCode programServerUncorePowerMetrics (int mc_profile, int pcu_profile, int *freq_bands=NULL)
 Programs uncore power/energy counters on microarchitectures codename SandyBridge-EP and later Xeon uarch. More...
 
ErrorCode programServerUncoreMemoryMetrics (const ServerUncoreMemoryMetrics &metrics, int rankA=-1, int rankB=-1)
 
ErrorCode program (const RawPMUConfigs &curPMUConfigs, const bool silent=false, const int pid=-1)
 
std::pair< unsigned, unsigned > getOCREventNr (const int event, const unsigned coreID) const
 
void freezeServerUncoreCounters ()
 Freezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown)
 
void unfreezeServerUncoreCounters ()
 Unfreezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown)
 
ServerUncoreCounterState getServerUncoreCounterState (uint32 socket)
 Reads the power/energy counter state of a socket (works only on microarchitecture codename SandyBridge-EP) More...
 
void cleanup (const bool silent=false)
 Cleanups resources and stops performance counting. More...
 
void resetPMU ()
 Forces PMU reset. More...
 
void getAllCounterStates (SystemCounterState &systemState, std::vector< SocketCounterState > &socketStates, std::vector< CoreCounterState > &coreStates, const bool readAndAggregateSocketUncoreCounters=true)
 Reads all counter states (including system, sockets and cores) More...
 
void getUncoreCounterStates (SystemCounterState &systemState, std::vector< SocketCounterState > &socketStates)
 Reads uncore counter states (including system and sockets) but no core counters. More...
 
bool isCoreOnline (int32 os_core_id) const
 Return true if the core in online. More...
 
bool isSocketOnline (int32 socket_id) const
 Return true if the socket in online. More...
 
SystemCounterState getSystemCounterState ()
 Reads the counter state of the system. More...
 
SocketCounterState getSocketCounterState (uint32 socket)
 Reads the counter state of a socket. More...
 
CoreCounterState getCoreCounterState (uint32 core)
 Reads the counter state of a (logical) core. More...
 
uint32 getNumCores () const
 Reads number of logical cores in the system. More...
 
uint32 getNumOnlineCores () const
 Reads number of online logical cores in the system. More...
 
uint32 getNumSockets () const
 Reads number of sockets (CPUs) in the system. More...
 
uint32 getNumOnlineSockets () const
 Reads number of online sockets (CPUs) in the system. More...
 
uint32 getThreadsPerCore () const
 Reads how many hardware threads has a physical core "Hardware thread" is a logical core in a different terminology. If Intel(r) Hyperthreading(tm) is enabled then this function returns 2. More...
 
bool getSMT () const
 Checks if SMT (HyperThreading) is enabled. More...
 
uint64 getNominalFrequency () const
 Reads the nominal core frequency. More...
 
uint32 getL3ScalingFactor () const
 runs CPUID.0xF.0x01 to get the L3 up scaling factor to calculate L3 Occupancy Scaling factor is returned in EBX register after running the CPU instruction More...
 
bool isSomeCoreOfflined ()
 runs CPUID.0xB.0x01 to get maximum logical cores (including SMT) per socket. max_lcores_per_socket is returned in EBX[15:0]. Compare this value with number of cores per socket detected in the system to see if some cores are offlined More...
 
int32 getMaxCustomCoreEvents ()
 Returns the maximum number of custom (general-purpose) core events supported by CPU.
 
uint32 getCPUModel () const
 Reads CPU model id. More...
 
uint32 getCPUStepping () const
 Reads CPU stepping id. More...
 
int32 getThreadId (uint32 os_id) const
 Determines physical thread of given processor ID within a core. More...
 
int32 getCoreId (uint32 os_id) const
 Determines physical core of given processor ID within a socket. More...
 
int32 getTileId (uint32 os_id) const
 Determines physical tile (cores sharing L2 cache) of given processor ID. More...
 
int32 getSocketId (uint32 core_id) const
 Determines socket of given core. More...
 
uint64 getQPILinksPerSocket () const
 Returns the number of Intel(r) Quick Path Interconnect(tm) links per socket. More...
 
uint32 getMCPerSocket () const
 Returns the number of detected integrated memory controllers per socket.
 
size_t getMCChannelsPerSocket () const
 Returns the total number of detected memory channels on all integrated memory controllers per socket.
 
size_t getMCChannels (uint32 socket, uint32 controller) const
 Returns the number of detected memory channels on given integrated memory controllers. More...
 
size_t getEDCChannelsPerSocket () const
 Returns the total number of detected memory channels on all integrated memory controllers per socket.
 
uint32 getMaxIPC () const
 Returns the max number of instructions per cycle. More...
 
uint64 getPCUFrequency () const
 Returns the frequency of Power Control Unit.
 
bool isServerCPU () const
 Returns whether it is a server part.
 
bool isClientCPU () const
 Returns whether it is a client part.
 
uint64 getTickCount (uint64 multiplier=1000, uint32 core=0)
 Return TSC timer value in time units. More...
 
uint64 getInvariantTSC_Fast (uint32 core=0)
 
uint64 getUncoreClocks (const uint32 socket_)
 Returns uncore clock ticks on specified socket.
 
uint64 getQPILinkSpeed (uint32 socketNr, uint32 linkNr) const
 Return QPI Link Speed in GBytes/second. More...
 
double getJoulesPerEnergyUnit () const
 Returns how many joules are in an internal processor energy unit.
 
int32 getPackageThermalSpecPower () const
 Returns thermal specification power of the package domain in Watt.
 
int32 getPackageMinimumPower () const
 Returns minimum power derived from electrical spec of the package domain in Watt.
 
int32 getPackageMaximumPower () const
 Returns maximum power derived from electrical spec of the package domain in Watt.
 
void disableJKTWorkaround ()
 
void programPCIeEventGroup (eventGroup_t &eventGroup)
 Program uncore PCIe monitoring event(s) More...
 
uint64 getPCIeCounterData (const uint32 socket_, const uint32 ctr_)
 
void programCbo (const uint64 *events, const uint32 opCode=0, const uint32 nc_=0, const uint32 llc_lookup_tid_filter=0, const uint32 loc=1, const uint32 rem=1)
 Program CBO (or CHA on SKX+) counters. More...
 
void programCboRaw (const uint64 *events, const uint64 filter0, const uint64 filter1)
 Program CBO (or CHA on SKX+) counters. More...
 
PCIeCounterState getPCIeCounterState (const uint32 socket_, const uint32 ctr_=0)
 Get the state of PCIe counter(s) More...
 
void programIIOCounters (uint64 rawEvents[4], int IIOStack=-1)
 Program uncore IIO events. More...
 
void programIRPCounters (uint64 rawEvents[4], int IIOStack=-1)
 Program uncore IRP events. More...
 
IIOCounterState getIIOCounterState (int socket, int IIOStack, int counter)
 Get the state of IIO counter. More...
 
void getIIOCounterStates (int socket, int IIOStack, IIOCounterState *result)
 Get the states of the four IIO counters in bulk (faster than four single reads) More...
 
uint64 extractCoreGenCounterValue (uint64 val)
 
uint64 extractCoreFixedCounterValue (uint64 val)
 
uint64 extractUncoreGenCounterValue (uint64 val)
 
uint64 extractUncoreFixedCounterValue (uint64 val)
 
uint64 extractQOSMonitoring (uint64 val)
 
const char * getUArchCodename (const int32 cpu_model_=-1) const
 Get a string describing the codename of the processor microarchitecture. More...
 
std::string getCPUFamilyModelString ()
 
void enableForceRTMAbortMode (const bool silent=false)
 Enables "force all RTM transaction abort" mode also enabling 4+ programmable counters on Skylake generation processors.
 
bool isForceRTMAbortModeEnabled () const
 queries status of "force all RTM transaction abort" mode
 
void disableForceRTMAbortMode (const bool silent=false)
 Disables "force all RTM transaction abort" mode restricting the number of programmable counters on Skylake generation processors to 3.
 
bool isForceRTMAbortModeAvailable () const
 queries availability of "force all RTM transaction abort" mode
 
int64 getCPUMicrocodeLevel () const
 Get microcode level (returns -1 if retrieval not supported due to some restrictions)
 
bool isAtom () const
 returns true if CPU is Atom-based
 
bool packageEnergyMetricsAvailable () const
 
bool dramEnergyMetricsAvailable () const
 
bool packageThermalMetricsAvailable () const
 
bool outgoingQPITrafficMetricsAvailable () const
 
bool incomingQPITrafficMetricsAvailable () const
 
bool localMemoryRequestRatioMetricAvailable () const
 
bool qpiUtilizationMetricsAvailable () const
 
bool memoryTrafficMetricsAvailable () const
 
bool MCDRAMmemoryTrafficMetricsAvailable () const
 
bool memoryIOTrafficMetricAvailable () const
 
bool IIOEventsAvailable () const
 
bool uncoreFrequencyMetricAvailable () const
 
bool LatencyMetricsAvailable () const
 
bool DDRLatencyMetricsAvailable () const
 
bool PMMTrafficMetricsAvailable () const
 
bool LLCReadMissLatencyMetricsAvailable () const
 
bool hasBecktonUncore () const
 
bool hasPCICFGUncore () const
 
bool isSkxCompatible () const
 
bool hasUPI () const
 
const char * xPI () const
 
bool hasCHA () const
 
bool supportsHLE () const
 
bool supportsRTM () const
 
bool supportsRDTSCP () const
 
bool useSkylakeEvents () const
 
bool hasClientMCCounters () const
 
double getBytesPerFlit () const
 
double getDataBytesPerFlit () const
 
double getBytesPerLinkCycle () const
 
double getBytesPerLinkTransfer () const
 
void setupCustomCoreEventsForNuma (PCM::ExtendedCustomCoreEventDescription &conf) const
 Setup ExtendedCustomCoreEventDescription object to read offcore (numa) counters for each processor type. More...
 
bool isActiveRelativeFrequencyAvailable () const
 

Static Public Member Functions

static void setOutput (const std::string filename, const bool cerrToo=false)
 Redirects output destination to provided file, instead of std::cout and std::cerr (optional)
 
static PCMgetInstance ()
 Returns PCM object. More...
 
static int getCPUModelFromCPUID ()
 Returns cpu model id number from cpuid instruction.
 
static bool initWinRing0Lib ()
 Loads and initializes Winring0 third party library for access to processor model specific and PCI configuration registers. More...
 
static std::string getCPUBrandString ()
 Get Brand string of processor.
 
static bool isAtom (const int32 cpu_model_)
 returns true if CPU model is Atom-based
 
static bool hasUPI (const int32 cpu_model_)
 
static double getBytesPerFlit (int32 cpu_model_)
 
static double getDataBytesPerFlit (int32 cpu_model_)
 
static double getFlitsPerLinkCycle (int32 cpu_model_)
 
static double getBytesPerLinkCycle (int32 cpu_model_)
 
static double getLinkTransfersPerLinkCycle ()
 

Friends

class BasicCounterState
 
class UncoreCounterState
 
class Socket
 
class ServerUncore
 
class PerfVirtualControlRegister
 
class Aggregator
 
class ServerPCICFGUncore
 

Detailed Description

CPU Performance Monitor.

This singleton object needs to be instantiated for each process before accessing counting and measuring routines

Member Enumeration Documentation

◆ ProgramMode

Mode of programming (parameter in the program() method)

Enumerator
DEFAULT_EVENTS 

Default choice of events, the additional parameter is not needed and ignored

CUSTOM_CORE_EVENTS 

Custom set of core events specified in the parameter to the program method. The parameter must be a pointer to array of four CustomCoreEventDescription values

EXT_CUSTOM_CORE_EVENTS 

Custom set of core events specified in the parameter to the program method. The parameter must be a pointer to a ExtendedCustomCoreEventDescription data structure

INVALID_MODE 

Non-programmed mode

Member Function Documentation

◆ checkError()

void pcm::PCM::checkError ( const ErrorCode  code)

checks the error and suggests solution and/or exits the process

Parameters
codeerror code from the 'program' call

References resetPMU().

◆ cleanup()

void pcm::PCM::cleanup ( const bool  silent = false)

Cleanups resources and stops performance counting.

One needs to call this method when your program finishes or/and you are not going to use the performance counting routines anymore.

References disableForceRTMAbortMode().

◆ CoreLocalMemoryBWMetricAvailable()

bool pcm::PCM::CoreLocalMemoryBWMetricAvailable ( ) const

checks if local memory bandwidth monitoring present

Returns
true or false

References L3QOSMetricAvailable(), and QOSMetricAvailable().

◆ CoreRemoteMemoryBWMetricAvailable()

bool pcm::PCM::CoreRemoteMemoryBWMetricAvailable ( ) const

checks if total memory bandwidth monitoring present

Returns
true or false

References L3QOSMetricAvailable(), and QOSMetricAvailable().

◆ getAllCounterStates()

void pcm::PCM::getAllCounterStates ( SystemCounterState systemState,
std::vector< SocketCounterState > &  socketStates,
std::vector< CoreCounterState > &  coreStates,
const bool  readAndAggregateSocketUncoreCounters = true 
)

Reads all counter states (including system, sockets and cores)

Parameters
systemStatesystem counter state (return parameter)
socketStatessocket counter states (return parameter)
coreStatescore counter states (return parameter)
readAndAggregateSocketUncoreCountersread and aggregate socket uncore counters

References isCoreOnline().

◆ getCoreCounterState()

CoreCounterState pcm::PCM::getCoreCounterState ( uint32  core)

Reads the counter state of a (logical) core.

Be aware that during the measurement other threads may be scheduled on the same core by the operating system (this is called context-switching). The performance events caused by these threads will be counted as well.

\param core core id
\return State of counters in the core

Referenced by pcm::getCoreCounterState().

◆ getCoreId()

int32 pcm::PCM::getCoreId ( uint32  os_id) const
inline

Determines physical core of given processor ID within a socket.

Parameters
os_idprocessor identifier
Returns
physical core identifier

◆ getCPUModel()

uint32 pcm::PCM::getCPUModel ( ) const
inline

Reads CPU model id.

Returns
CPU model ID

Referenced by pcm::getDRAMClocks(), and pcm::getDRAMConsumedJoules().

◆ getCPUStepping()

uint32 pcm::PCM::getCPUStepping ( ) const
inline

Reads CPU stepping id.

Returns
CPU stepping ID

◆ getErrorMessage()

const std::string& pcm::PCM::getErrorMessage ( ) const
inline

Returns the error message.

Call this when good() returns false, otherwise return an empty string

◆ getIIOCounterState()

IIOCounterState pcm::PCM::getIIOCounterState ( int  socket,
int  IIOStack,
int  counter 
)

Get the state of IIO counter.

Parameters
socketsocket of the IIO stack
IIOStackid of the IIO stack
Returns
State of IIO counter

◆ getIIOCounterStates()

void pcm::PCM::getIIOCounterStates ( int  socket,
int  IIOStack,
IIOCounterState result 
)

Get the states of the four IIO counters in bulk (faster than four single reads)

Parameters
socketsocket of the IIO stack
IIOStackid of the IIO stack
resultstates of IIO counters (array of four IIOCounterState elements)

◆ getInstance()

PCM * pcm::PCM::getInstance ( )
static

Returns PCM object.

Returns PCM object. If the PCM has not been created before than an instance is created. PCM is a singleton.

Returns
Pointer to PCM object

Referenced by pcm::ServerPCICFGUncore::computeQPISpeed(), pcm::getActiveAverageFrequency(), pcm::getActiveRelativeFrequency(), pcm::getAllIncomingQPILinkBytes(), pcm::getAllOutgoingQPILinkBytes(), pcm::getAverageUncoreFrequency(), pcm::getBackendBound(), pcm::getBadSpeculation(), pcm::getBytesReadFromEDC(), pcm::getBytesReadFromMC(), pcm::getBytesReadFromPMM(), pcm::getBytesWrittenToEDC(), pcm::getBytesWrittenToMC(), pcm::getBytesWrittenToPMM(), pcm::getConsumedJoules(), pcm::getCoreCounterState(), pcm::getCoreCStateResidency(), pcm::getCoreIPC(), pcm::getDRAMClocks(), pcm::getDRAMConsumedJoules(), pcm::getEDCCounter(), pcm::getFrontendBound(), pcm::getGTRequestBytesFromMC(), pcm::getIARequestBytesFromMC(), pcm::getIncomingQPILinkBytes(), pcm::getIncomingQPILinkUtilization(), pcm::getIORequestBytesFromMC(), pcm::getL2CacheHitRatio(), pcm::getL2CacheHits(), pcm::getL2CacheMisses(), pcm::getL3CacheHitRatio(), pcm::getL3CacheHits(), pcm::getL3CacheHitsNoSnoop(), pcm::getL3CacheHitsSnoop(), pcm::getL3CacheMisses(), pcm::getL3CacheOccupancy(), pcm::getLLCReadMissLatency(), pcm::getLocalMemoryBW(), pcm::getLocalMemoryRequestRatio(), pcm::getOutgoingQPILinkBytes(), pcm::getOutgoingQPILinkUtilization(), pcm::getPackageCStateResidency(), pcm::getQPItoMCTrafficRatio(), pcm::getRemoteMemoryBW(), pcm::getRetiring(), pcm::getSocketCounterState(), pcm::getSocketIncomingQPILinkBytes(), pcm::getSystemCounterState(), pcm::getTotalExecUsage(), PCMServiceNS::PCMService::OnStart(), pcm::ServerPCICFGUncore::program(), pcm::ServerPCICFGUncore::programServerUncoreMemoryMetrics(), and pcm::ServerPCICFGUncore::reportQPISpeed().

◆ getL3ScalingFactor()

uint32 pcm::PCM::getL3ScalingFactor ( ) const

runs CPUID.0xF.0x01 to get the L3 up scaling factor to calculate L3 Occupancy Scaling factor is returned in EBX register after running the CPU instruction

Returns
L3 up scaling factor

◆ getMaxIPC()

uint32 pcm::PCM::getMaxIPC ( ) const
inline

Returns the max number of instructions per cycle.

Returns
max number of instructions per cycle

◆ getMaxRMID()

unsigned pcm::PCM::getMaxRMID ( ) const

returns the max number of RMID supported by socket

Returns
maximum number of RMID supported by socket

◆ getMCChannels()

size_t pcm::PCM::getMCChannels ( uint32  socket,
uint32  controller 
) const
inline

Returns the number of detected memory channels on given integrated memory controllers.

Parameters
socketsocket
controllercontroller

◆ getNominalFrequency()

uint64 pcm::PCM::getNominalFrequency ( ) const

◆ getNumCores()

uint32 pcm::PCM::getNumCores ( ) const

Reads number of logical cores in the system.

Returns
Number of logical cores in the system

Referenced by pcm::getCoreIPC(), and pcm::getTotalExecUsage().

◆ getNumOnlineCores()

uint32 pcm::PCM::getNumOnlineCores ( ) const

Reads number of online logical cores in the system.

Returns
Number of online logical cores in the system

Referenced by pcm::getCoreIPC(), pcm::getIncomingQPILinkUtilization(), pcm::getOutgoingQPILinkBytes(), pcm::getOutgoingQPILinkUtilization(), and pcm::getTotalExecUsage().

◆ getNumOnlineSockets()

uint32 pcm::PCM::getNumOnlineSockets ( ) const

Reads number of online sockets (CPUs) in the system.

Returns
Number of online sockets in the system

◆ getNumSockets()

uint32 pcm::PCM::getNumSockets ( ) const

Reads number of sockets (CPUs) in the system.

Returns
Number of sockets in the system

Referenced by pcm::getAllIncomingQPILinkBytes(), and pcm::getAllOutgoingQPILinkBytes().

◆ getPCIeCounterState()

PCIeCounterState pcm::PCM::getPCIeCounterState ( const uint32  socket_,
const uint32  ctr_ = 0 
)

Get the state of PCIe counter(s)

Parameters
socket_socket of the PCIe controller
Returns
State of PCIe counter(s)

◆ getQPILinkSpeed()

uint64 pcm::PCM::getQPILinkSpeed ( uint32  socketNr,
uint32  linkNr 
) const
inline

Return QPI Link Speed in GBytes/second.

Warning
Works only for Nehalem-EX (Xeon 7500) and Xeon E7 and E5 processors
Returns
QPI Link Speed in GBytes/second

Referenced by pcm::getIncomingQPILinkUtilization(), pcm::getOutgoingQPILinkBytes(), and pcm::getOutgoingQPILinkUtilization().

◆ getQPILinksPerSocket()

uint64 pcm::PCM::getQPILinksPerSocket ( ) const
inline

Returns the number of Intel(r) Quick Path Interconnect(tm) links per socket.

Returns
number of QPI links per socket

◆ getServerUncoreCounterState()

ServerUncoreCounterState pcm::PCM::getServerUncoreCounterState ( uint32  socket)

Reads the power/energy counter state of a socket (works only on microarchitecture codename SandyBridge-EP)

Parameters
socketsocket id
Returns
State of power counters in the socket

◆ getSMT()

bool pcm::PCM::getSMT ( ) const

Checks if SMT (HyperThreading) is enabled.

Returns
true iff SMT (HyperThreading) is enabled.

◆ getSocketCounterState()

SocketCounterState pcm::PCM::getSocketCounterState ( uint32  socket)

Reads the counter state of a socket.

Parameters
socketsocket id
Returns
State of counters in the socket

References isCoreOnline().

Referenced by pcm::getSocketCounterState().

◆ getSocketId()

int32 pcm::PCM::getSocketId ( uint32  core_id) const
inline

Determines socket of given core.

Parameters
core_idcore identifier
Returns
socket identifier

◆ getSystemCounterState()

SystemCounterState pcm::PCM::getSystemCounterState ( )

Reads the counter state of the system.

System consists of several sockets (CPUs). Socket has a CPU in it. Socket (CPU) consists of several (logical) cores.

Returns
State of counters in the entire system

References isCoreOnline().

Referenced by pcm::getSystemCounterState().

◆ getSystemTopology()

SystemRoot const& pcm::PCM::getSystemTopology ( ) const
inline

The system, sockets, uncores, cores and threads are structured like a tree.

Returns
a reference to a const System object representing the root of the tree

◆ getThreadId()

int32 pcm::PCM::getThreadId ( uint32  os_id) const
inline

Determines physical thread of given processor ID within a core.

Parameters
os_idprocessor identifier
Returns
physical thread identifier

◆ getThreadsPerCore()

uint32 pcm::PCM::getThreadsPerCore ( ) const

Reads how many hardware threads has a physical core "Hardware thread" is a logical core in a different terminology. If Intel(r) Hyperthreading(tm) is enabled then this function returns 2.

Returns
Number of hardware threads per physical core

Referenced by pcm::getCoreIPC(), and pcm::getTotalExecUsage().

◆ getTickCount()

uint64 pcm::PCM::getTickCount ( uint64  multiplier = 1000,
uint32  core = 0 
)

Return TSC timer value in time units.

Parameters
multiplieruse 1 for seconds, 1000 for ms, 1000000 for mks, etc (default is 1000: ms)
corecore to read on-chip TSC value (default is 0)
Returns
time counter value

References getNominalFrequency().

◆ getTileId()

int32 pcm::PCM::getTileId ( uint32  os_id) const
inline

Determines physical tile (cores sharing L2 cache) of given processor ID.

Parameters
os_idprocessor identifier
Returns
physical tile identifier

◆ getUArchCodename()

const char * pcm::PCM::getUArchCodename ( const int32  cpu_model_ = -1) const

Get a string describing the codename of the processor microarchitecture.

Parameters
cpu_model_cpu model (if no parameter provided the codename of the detected CPU is returned)

◆ getUncoreCounterStates()

void pcm::PCM::getUncoreCounterStates ( SystemCounterState systemState,
std::vector< SocketCounterState > &  socketStates 
)

Reads uncore counter states (including system and sockets) but no core counters.

Parameters
systemStatesystem counter state (return parameter)
socketStatessocket counter states (return parameter)

◆ good()

bool pcm::PCM::good ( )

Checks the status of PCM object.

Call this method to check if PCM gained access to model specific registers. The method is deprecated, see program error code instead.

Returns
true iff access to model specific registers works without problems

◆ initWinRing0Lib()

static bool pcm::PCM::initWinRing0Lib ( )
static

Loads and initializes Winring0 third party library for access to processor model specific and PCI configuration registers.

Returns
returns true in case of success

Referenced by pcm::Driver::start().

◆ isCoreOnline()

bool pcm::PCM::isCoreOnline ( int32  os_core_id) const

Return true if the core in online.

Parameters
os_core_idOS core id

Referenced by getAllCounterStates(), getSocketCounterState(), and getSystemCounterState().

◆ isSocketOnline()

bool pcm::PCM::isSocketOnline ( int32  socket_id) const

Return true if the socket in online.

Parameters
socket_idOS socket id

References getCPUBrandString(), and isAtom().

◆ isSomeCoreOfflined()

bool pcm::PCM::isSomeCoreOfflined ( )

runs CPUID.0xB.0x01 to get maximum logical cores (including SMT) per socket. max_lcores_per_socket is returned in EBX[15:0]. Compare this value with number of cores per socket detected in the system to see if some cores are offlined

Returns
true iff max_lcores_per_socket == number of cores per socket detected

◆ L3CacheOccupancyMetricAvailable()

bool pcm::PCM::L3CacheOccupancyMetricAvailable ( ) const

checks if L3 cache monitoring present

Returns
true or false

References L3QOSMetricAvailable(), and QOSMetricAvailable().

◆ L3QOSMetricAvailable()

bool pcm::PCM::L3QOSMetricAvailable ( ) const

checks L3 cache support for QOS present

Returns
true or false

References isSecureBoot().

Referenced by CoreLocalMemoryBWMetricAvailable(), CoreRemoteMemoryBWMetricAvailable(), and L3CacheOccupancyMetricAvailable().

◆ program()

PCM::ErrorCode pcm::PCM::program ( const ProgramMode  mode_ = DEFAULT_EVENTS,
const void *  parameter_ = NULL,
const bool  silent = false,
const int  pid = -1 
)

Programs performance counters.

Parameters
mode_mode of programming, see ProgramMode definition
parameter_optional parameter for some of programming modes
silentset to true to silence diagnostic messages
pidrestrict core metrics only to specified pid (process id)
    Call this method before you start using the performance counting routines.
Warning
Using this routines with other tools that program Performance Monitoring Units (PMUs) on CPUs is not recommended because PMU can not be shared. Tools that are known to program PMUs: Intel(r) VTune(tm), Intel(r) Performance Tuning Utility (PTU). This code may make VTune or PTU measurements invalid. VTune or PTU measurement may make measurement with this code invalid. Please enable either usage of these routines or VTune/PTU/etc.

References CUSTOM_CORE_EVENTS, EXT_CUSTOM_CORE_EVENTS, isAtom(), and isHWTMAL1Supported().

◆ programCbo()

void pcm::PCM::programCbo ( const uint64 *  events,
const uint32  opCode = 0,
const uint32  nc_ = 0,
const uint32  llc_lookup_tid_filter = 0,
const uint32  loc = 1,
const uint32  rem = 1 
)

Program CBO (or CHA on SKX+) counters.

Parameters
eventsarray with four raw event values
opCodeopcode match filter
nc_match non-coherent requests
llc_lookup_tid_filterfilter for LLC lookup event filter and TID filter (core and thread ID)
locmatch on local node target
remmatch on remote node target

◆ programCboRaw()

void pcm::PCM::programCboRaw ( const uint64 *  events,
const uint64  filter0,
const uint64  filter1 
)

Program CBO (or CHA on SKX+) counters.

Parameters
eventsarray with four raw event values
filter0raw filter value
filter1raw filter1 value

◆ programIIOCounters()

void pcm::PCM::programIIOCounters ( uint64  rawEvents[4],
int  IIOStack = -1 
)

Program uncore IIO events.

Parameters
rawEventsevents to program (raw format)
IIOStackid of the IIO stack to program (-1 for all, if parameter omitted)

◆ programIRPCounters()

void pcm::PCM::programIRPCounters ( uint64  rawEvents[4],
int  IIOStack = -1 
)

Program uncore IRP events.

Parameters
rawEventsevents to program (raw format)
IIOStackid of the IIO stack to program (-1 for all, if parameter omitted)

◆ programPCIeEventGroup()

void pcm::PCM::programPCIeEventGroup ( eventGroup_t &  eventGroup)

Program uncore PCIe monitoring event(s)

Parameters
eventGroup- events to program for the same run

◆ programServerUncoreLatencyMetrics()

PCM::ErrorCode pcm::PCM::programServerUncoreLatencyMetrics ( bool  enable_pmm)

Programs uncore latency counters on microarchitectures codename SandyBridge-EP and later Xeon uarch.

Parameters
enable_pmmenables DDR/PMM. See possible profile values in pcm-latency.cpp example

Call this method before you start using the latency counter routines on microarchitecture codename SandyBridge-EP and later Xeon uarch

Warning
After this call the memory and QPI bandwidth counters on microarchitecture codename SandyBridge-EP and later Xeon uarch will not work.
Using this routines with other tools that program Performance Monitoring Units (PMUs) on CPUs is not recommended because PMU can not be shared. Tools that are known to program PMUs: Intel(r) VTune(tm), Intel(r) Performance Tuning Utility (PTU). This code may make VTune or PTU measurements invalid. VTune or PTU measurement may make measurement with this code invalid. Please enable either usage of these routines or VTune/PTU/etc.

◆ programServerUncorePowerMetrics()

PCM::ErrorCode pcm::PCM::programServerUncorePowerMetrics ( int  mc_profile,
int  pcu_profile,
int *  freq_bands = NULL 
)

Programs uncore power/energy counters on microarchitectures codename SandyBridge-EP and later Xeon uarch.

Parameters
mc_profileprofile for integrated memory controller PMU. See possible profile values in pcm-power.cpp example
pcu_profileprofile for power control unit PMU. See possible profile values in pcm-power.cpp example
freq_bandsarray of three integer values for core frequency band monitoring. See usage in pcm-power.cpp example

Call this method before you start using the power counter routines on microarchitecture codename SandyBridge-EP and later Xeon uarch

Warning
After this call the memory and QPI bandwidth counters on microarchitecture codename SandyBridge-EP and later Xeon uarch will not work.
Using this routines with other tools that program Performance Monitoring Units (PMUs) on CPUs is not recommended because PMU can not be shared. Tools that are known to program PMUs: Intel(r) VTune(tm), Intel(r) Performance Tuning Utility (PTU). This code may make VTune or PTU measurements invalid. VTune or PTU measurement may make measurement with this code invalid. Please enable either usage of these routines or VTune/PTU/etc.

◆ QOSMetricAvailable()

bool pcm::PCM::QOSMetricAvailable ( ) const

checks if QOS monitoring support present

Returns
true or false

References isSecureBoot().

Referenced by CoreLocalMemoryBWMetricAvailable(), CoreRemoteMemoryBWMetricAvailable(), and L3CacheOccupancyMetricAvailable().

◆ resetPMU()

void pcm::PCM::resetPMU ( )

Forces PMU reset.

If there is no chance to free up PMU from other applications you might try to call this method at your own risk.

Referenced by checkError().

◆ setupCustomCoreEventsForNuma()

void pcm::PCM::setupCustomCoreEventsForNuma ( PCM::ExtendedCustomCoreEventDescription conf) const

Setup ExtendedCustomCoreEventDescription object to read offcore (numa) counters for each processor type.

Parameters
confconf object to setup offcore MSR values

The documentation for this class was generated from the following files: