BRE12
PIXEventsCommon.h
1 /*==========================================================================;
2 *
3 * Copyright (C) Microsoft Corporation. All Rights Reserved.
4 *
5 * File: PIXEventsCommon.h
6 * Content: PIX include file
7 * Don't include this file directly - use pix3.h
8 *
9 ****************************************************************************/
10 #pragma once
11 
12 #ifndef _PIXEventsCommon_H_
13 #define _PIXEventsCommon_H_
14 
15 #if defined(_AMD64_) || defined(_X86_)
16 #include <emmintrin.h>
17 #endif // _AMD64_ || _X86_
18 
19 extern "C" UINT64 WINAPI PIXEventsReplaceBlock(bool getEarliestTime);
20 
21 enum PIXEventType
22 {
23  PIXEvent_EndEvent = 0x000,
24  PIXEvent_BeginEvent_VarArgs = 0x001,
25  PIXEvent_BeginEvent_NoArgs = 0x002,
26  PIXEvent_SetMarker_VarArgs = 0x007,
27  PIXEvent_SetMarker_NoArgs = 0x008,
28 
29  PIXEvent_EndEvent_OnContext = 0x010,
30  PIXEvent_BeginEvent_OnContext_VarArgs = 0x011,
31  PIXEvent_BeginEvent_OnContext_NoArgs = 0x012,
32  PIXEvent_SetMarker_OnContext_VarArgs = 0x017,
33  PIXEvent_SetMarker_OnContext_NoArgs = 0x018,
34 };
35 
36 static const UINT64 PIXEventsReservedRecordSpaceQwords = 64;
37 //this is used to make sure SSE string copy always will end 16-byte write in the current block
38 //this way only a check if destination < limit can be performed, instead of destination < limit - 1
39 //since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve
40 //so even if SSE overwrites 8 extra bytes, those will still belong to the correct block
41 //on next iteration check destination will be greater than limit
42 //this is used as well for fixed size UMD events and PIXEndEvent since these require less space
43 //than other variable length user events and do not need big reserved space
44 static const UINT64 PIXEventsReservedTailSpaceQwords = 2;
45 static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords;
46 static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64;
47 
48 //Bits 7-19 (13 bits)
49 static const UINT64 PIXEventsBlockEndMarker = 0x00000000000FFF80;
50 
51 //Bits 10-19 (10 bits)
52 static const UINT64 PIXEventsTypeReadMask = 0x00000000000FFC00;
53 static const UINT64 PIXEventsTypeWriteMask = 0x00000000000003FF;
54 static const UINT64 PIXEventsTypeBitShift = 10;
55 
56 //Bits 20-63 (44 bits)
57 static const UINT64 PIXEventsTimestampReadMask = 0xFFFFFFFFFFF00000;
58 static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF;
59 static const UINT64 PIXEventsTimestampBitShift = 20;
60 
61 inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType)
62 {
63  return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) |
64  (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift);
65 }
66 
67 //Bits 60-63 (4)
68 static const UINT64 PIXEventsStringAlignmentWriteMask = 0x000000000000000F;
69 static const UINT64 PIXEventsStringAlignmentReadMask = 0xF000000000000000;
70 static const UINT64 PIXEventsStringAlignmentBitShift = 60;
71 
72 //Bits 55-59 (5)
73 static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F;
74 static const UINT64 PIXEventsStringCopyChunkSizeReadMask = 0x0F80000000000000;
75 static const UINT64 PIXEventsStringCopyChunkSizeBitShift = 55;
76 
77 //Bit 54
78 static const UINT64 PIXEventsStringIsANSIWriteMask = 0x0000000000000001;
79 static const UINT64 PIXEventsStringIsANSIReadMask = 0x0040000000000000;
80 static const UINT64 PIXEventsStringIsANSIBitShift = 54;
81 
82 //Bit 53
83 static const UINT64 PIXEventsStringIsShortcutWriteMask = 0x0000000000000001;
84 static const UINT64 PIXEventsStringIsShortcutReadMask = 0x0020000000000000;
85 static const UINT64 PIXEventsStringIsShortcutBitShift = 53;
86 
87 inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut)
88 {
89  return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) |
90  ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) |
91  (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) |
92  (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift);
93 }
94 
95 template<UINT alignment, class T>
96 inline bool PIXIsPointerAligned(T* pointer)
97 {
98  return !(((UINT64)pointer) & (alignment - 1));
99 }
100 
101 template<class T>
102 inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument)
103 {
104  if (destination < limit)
105  {
106  *((T*)destination) = argument;
107  ++destination;
108  }
109 }
110 
111 //floats must be cast to double during writing the data to be properly printed later when reading the data
112 //this is needed because when float is passed to varargs function it's cast to double
113 template<>
114 inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument)
115 {
116  if (destination < limit)
117  {
118  *((double*)destination) = (double)(argument);
119  ++destination;
120  }
121 }
122 
123 //char has to be cast to a longer signed integer type
124 //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
125 template<>
126 inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument)
127 {
128  if (destination < limit)
129  {
130  *((INT64*)destination) = (INT64)(argument);
131  ++destination;
132  }
133 }
134 
135 //unsigned char has to be cast to a longer unsigned integer type
136 //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier
137 template<>
138 inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument)
139 {
140  if (destination < limit)
141  {
142  *destination = (UINT64)(argument);
143  ++destination;
144  }
145 }
146 
147 //bool has to be cast to an integer since it's not explicitly supported by string format routines
148 //there's no format specifier for bool type, but it should work with integer format specifiers
149 template<>
150 inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument)
151 {
152  if (destination < limit)
153  {
154  *destination = (UINT64)(argument);
155  ++destination;
156  }
157 }
158 
159 inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
160 {
161  *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
162  while (destination < limit)
163  {
164  UINT64 c = argument[0];
165  if (!c)
166  {
167  *destination++ = 0;
168  return;
169  }
170  UINT64 x = c;
171  c = argument[1];
172  if (!c)
173  {
174  *destination++ = x;
175  return;
176  }
177  x |= c << 8;
178  c = argument[2];
179  if (!c)
180  {
181  *destination++ = x;
182  return;
183  }
184  x |= c << 16;
185  c = argument[3];
186  if (!c)
187  {
188  *destination++ = x;
189  return;
190  }
191  x |= c << 24;
192  c = argument[4];
193  if (!c)
194  {
195  *destination++ = x;
196  return;
197  }
198  x |= c << 32;
199  c = argument[5];
200  if (!c)
201  {
202  *destination++ = x;
203  return;
204  }
205  x |= c << 40;
206  c = argument[6];
207  if (!c)
208  {
209  *destination++ = x;
210  return;
211  }
212  x |= c << 48;
213  c = argument[7];
214  if (!c)
215  {
216  *destination++ = x;
217  return;
218  }
219  x |= c << 56;
220  *destination++ = x;
221  argument += 8;
222  }
223 }
224 
225 inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
226 {
227  if (PIXIsPointerAligned<8>(argument))
228  {
229  *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE);
230  UINT64* source = (UINT64*)argument;
231  while (destination < limit)
232  {
233  UINT64 qword = *source++;
234  *destination++ = qword;
235  //check if any of the characters is a terminating zero
236  if (!((qword & 0xFF00000000000000) &&
237  (qword & 0xFF000000000000) &&
238  (qword & 0xFF0000000000) &&
239  (qword & 0xFF00000000) &&
240  (qword & 0xFF000000) &&
241  (qword & 0xFF0000) &&
242  (qword & 0xFF00) &&
243  (qword & 0xFF)))
244  {
245  break;
246  }
247  }
248  }
249  else
250  {
251  PIXCopyEventArgumentSlowest(destination, limit, argument);
252  }
253 }
254 
255 template<>
256 inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument)
257 {
258  if (destination < limit)
259  {
260  if (argument != nullptr)
261  {
262 #if defined(_AMD64_) || defined(_X86_)
263  if (PIXIsPointerAligned<16>(argument))
264  {
265  *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE);
266  __m128i zero = _mm_setzero_si128();
267  if (PIXIsPointerAligned<16>(destination))
268  {
269  while (destination < limit)
270  {
271  __m128i mem = _mm_load_si128((__m128i*)argument);
272  _mm_store_si128((__m128i*)destination, mem);
273  //check if any of the characters is a terminating zero
274  __m128i res = _mm_cmpeq_epi8(mem, zero);
275  destination += 2;
276  if (_mm_movemask_epi8(res))
277  break;
278  argument += 16;
279  }
280  }
281  else
282  {
283  while (destination < limit)
284  {
285  __m128i mem = _mm_load_si128((__m128i*)argument);
286  _mm_storeu_si128((__m128i*)destination, mem);
287  //check if any of the characters is a terminating zero
288  __m128i res = _mm_cmpeq_epi8(mem, zero);
289  destination += 2;
290  if (_mm_movemask_epi8(res))
291  break;
292  argument += 16;
293  }
294  }
295  }
296  else
297 #endif // _AMD64_ || _X86_
298  {
299  PIXCopyEventArgumentSlow(destination, limit, argument);
300  }
301  }
302  else
303  {
304  *destination++ = 0ull;
305  }
306  }
307 }
308 
309 template<>
310 inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument)
311 {
312  PIXCopyEventArgument(destination, limit, (PCSTR)argument);
313 }
314 
315 inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
316 {
317  *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
318  while (destination < limit)
319  {
320  UINT64 c = argument[0];
321  if (!c)
322  {
323  *destination++ = 0;
324  return;
325  }
326  UINT64 x = c;
327  c = argument[1];
328  if (!c)
329  {
330  *destination++ = x;
331  return;
332  }
333  x |= c << 16;
334  c = argument[2];
335  if (!c)
336  {
337  *destination++ = x;
338  return;
339  }
340  x |= c << 32;
341  c = argument[3];
342  if (!c)
343  {
344  *destination++ = x;
345  return;
346  }
347  x |= c << 48;
348  *destination++ = x;
349  argument += 4;
350  }
351 }
352 
353 inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
354 {
355  if (PIXIsPointerAligned<8>(argument))
356  {
357  *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE);
358  UINT64* source = (UINT64*)argument;
359  while (destination < limit)
360  {
361  UINT64 qword = *source++;
362  *destination++ = qword;
363  //check if any of the characters is a terminating zero
364  //TODO: check if reversed condition is faster
365  if (!((qword & 0xFFFF000000000000) &&
366  (qword & 0xFFFF00000000) &&
367  (qword & 0xFFFF0000) &&
368  (qword & 0xFFFF)))
369  {
370  break;
371  }
372  }
373  }
374  else
375  {
376  PIXCopyEventArgumentSlowest(destination, limit, argument);
377  }
378 }
379 
380 template<>
381 inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument)
382 {
383  if (destination < limit)
384  {
385  if (argument != nullptr)
386  {
387 #if defined(_AMD64_) || defined(_X86_)
388  if (PIXIsPointerAligned<16>(argument))
389  {
390  *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE);
391  __m128i zero = _mm_setzero_si128();
392  if (PIXIsPointerAligned<16>(destination))
393  {
394  while (destination < limit)
395  {
396  __m128i mem = _mm_load_si128((__m128i*)argument);
397  _mm_store_si128((__m128i*)destination, mem);
398  //check if any of the characters is a terminating zero
399  __m128i res = _mm_cmpeq_epi16(mem, zero);
400  destination += 2;
401  if (_mm_movemask_epi8(res))
402  break;
403  argument += 8;
404  }
405  }
406  else
407  {
408  while (destination < limit)
409  {
410  __m128i mem = _mm_load_si128((__m128i*)argument);
411  _mm_storeu_si128((__m128i*)destination, mem);
412  //check if any of the characters is a terminating zero
413  __m128i res = _mm_cmpeq_epi16(mem, zero);
414  destination += 2;
415  if (_mm_movemask_epi8(res))
416  break;
417  argument += 8;
418  }
419  }
420  }
421  else
422 #endif // _AMD64_ || _X86_
423  {
424  PIXCopyEventArgumentSlow(destination, limit, argument);
425  }
426  }
427  else
428  {
429  *destination++ = 0ull;
430  }
431  }
432 }
433 
434 template<>
435 inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument)
436 {
437  PIXCopyEventArgument(destination, limit, (PCWSTR)argument);
438 };
439 
440 #if defined(__d3d12_x_h__) || defined(__d3d12_h__)
441 
442 inline void PIXSetMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
443 {
444  commandList->SetMarker(D3D12_EVENT_METADATA, data, size);
445 }
446 
447 inline void PIXSetMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
448 {
449  commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size);
450 }
451 
452 inline void PIXBeginEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size)
453 {
454  commandList->BeginEvent(D3D12_EVENT_METADATA, data, size);
455 }
456 
457 inline void PIXBeginEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size)
458 {
459  commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size);
460 }
461 inline void PIXEndEventOnContext(_In_ ID3D12GraphicsCommandList* commandList)
462 {
463  commandList->EndEvent();
464 }
465 
466 inline void PIXEndEventOnContext(_In_ ID3D12CommandQueue* commandQueue)
467 {
468  commandQueue->EndEvent();
469 }
470 
471 #endif //__d3d12_x_h__
472 
473 template<class T> struct PIXInferScopedEventType { typedef T Type; };
474 template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; };
475 template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; };
476 template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; };
477 template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; };
478 template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; };
479 template<> struct PIXInferScopedEventType<INT64> { typedef void Type; };
480 template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; };
481 template<> struct PIXInferScopedEventType<UINT> { typedef void Type; };
482 template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; };
483 template<> struct PIXInferScopedEventType<INT> { typedef void Type; };
484 template<> struct PIXInferScopedEventType<const INT> { typedef void Type; };
485 #endif //_PIXEventsCommon_H_
Definition: PIXEventsCommon.h:473