Skip to content

Commit 48bfe76

Browse files
committed
Use NVTX payload API
1 parent f513309 commit 48bfe76

File tree

10 files changed

+236
-18
lines changed

10 files changed

+236
-18
lines changed

Compiler/src/timing.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ if ccall(:jl_timing_enabled, Cint, ()) != 0
66
file = QuoteNode(file)
77

88
# XXX: This buffer must be large enough to store any jl_timing_block_t (runtime-checked)
9-
buffer = (0, 0, 0, 0, 0, 0, 0)
9+
buffer = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
1010
buffer_size = Core.sizeof(buffer)
1111
return quote
1212
if $event[] === C_NULL
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
2de9e71cd90dd01f6041d3f292daceb7
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
671afacadbddd58b090e12f2c56852b0b46d07db935fbf4f85de8cbdaef8ade4ef36ea0096d84dcf2be78dd6bdd68a7a328e1e9d67e9e51d83309e4f75c56003

deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5

Lines changed: 0 additions & 1 deletion
This file was deleted.

deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512

Lines changed: 0 additions & 1 deletion
This file was deleted.

deps/nvtx.version

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# -*- makefile -*-
22
## source build
3-
NVTX_BRANCH=dev
4-
NVTX_SHA1=733fb419540bc1d152bc682d2ca066c7bb79da29
3+
NVTX_BRANCH=v3.4.0
4+
NVTX_SHA1=60147cb506070b6af9ac8f2ca6a09ae034fb8d5d

src/julia_threads.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@
1919
#ifndef _OS_WINDOWS_
2020
#include "pthread.h"
2121
#endif
22+
23+
#ifdef USE_NVTX
24+
#pragma GCC visibility push(default)
25+
#include <nvtx3/nvToolsExt.h>
26+
#pragma GCC visibility pop
27+
#include <nvtx3/nvToolsExtPayload.h>
28+
#endif
2229
// threading ------------------------------------------------------------------
2330

2431
#ifdef __cplusplus
@@ -273,6 +280,9 @@ typedef struct _jl_task_t {
273280
jl_ptls_t ptls; // == jl_all_tls_states[tid]
274281
#ifdef USE_TRACY
275282
const char *name;
283+
#endif
284+
#ifdef USE_NVTX
285+
nvtxEventAttributes_t nvtx_attrs;
276286
#endif
277287
// saved exception stack
278288
jl_excstack_t *excstack;

src/task.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1566,10 +1566,25 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
15661566
ct->ctx.bufsz = ssize;
15671567
}
15681568

1569-
#ifdef USE_TRACY
1569+
#if defined(USE_TRACY) || defined(USE_NVTX)
15701570
char *unique_string = (char *)malloc(strlen("Root") + 1);
15711571
strcpy(unique_string, "Root");
1572+
#ifdef USE_TRACY
15721573
ct->name = unique_string;
1574+
#endif
1575+
#ifdef USE_NVTX
1576+
nvtxEventAttributes_t nvtx_attrs = {0};
1577+
nvtx_attrs.version = NVTX_VERSION;
1578+
nvtx_attrs.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
1579+
1580+
nvtx_attrs.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
1581+
nvtx_attrs.message.registered = nvtxDomainRegisterStringA(jl_timing_nvtx_task_domain, unique_string);
1582+
1583+
nvtx_attrs.colorType = NVTX_COLOR_ARGB;
1584+
nvtx_attrs.color = 0;
1585+
1586+
ct->nvtx_attrs = nvtx_attrs;
1587+
#endif
15731588
#endif
15741589
ct->ctx.started = 1;
15751590
ct->next = jl_nothing;

src/timing.c

Lines changed: 168 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#define DISABLE_FREQUENT_EVENTS
1111
#endif
1212

13+
#ifdef USE_NVTX
14+
#include <nvtx3/nvToolsExtPayload.h>
15+
#endif
16+
1317
#ifdef __cplusplus
1418
extern "C" {
1519
#endif
@@ -57,7 +61,14 @@ static jl_mutex_t jl_timing_ittapi_events_lock;
5761
#endif //USE_ITTAPI
5862

5963
#ifdef USE_NVTX
60-
static nvtxDomainHandle_t jl_timing_nvtx_domain;
64+
65+
nvtxDomainHandle_t jl_timing_nvtx_domain;
66+
nvtxDomainHandle_t jl_timing_nvtx_task_domain;
67+
68+
static uint64_t jl_timing_nvtx_signature_schemaid;
69+
static uint64_t jl_timing_nvtx_module_schemaid;
70+
static uint64_t jl_timing_nvtx_location_schemaid;
71+
6172
#endif
6273

6374
#ifdef USE_TIMING_COUNTS
@@ -155,6 +166,82 @@ void jl_init_timing(void)
155166
for (int i = 0; i < JL_TIMING_SUBSYSTEM_LAST; i++) {
156167
nvtxDomainNameCategoryA(jl_timing_nvtx_domain, i + 1, jl_timing_subsystems[i]);
157168
}
169+
170+
nvtxPayloadSchemaEntry_t jl_timing_nvtx_signature_schema_entries[] = {
171+
{
172+
.flags = NVTX_PAYLOAD_ENTRY_FLAG_UNUSED,
173+
.type = NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE,
174+
.name = "signature",
175+
.offset = 0,
176+
}
177+
};
178+
179+
nvtxPayloadSchemaAttr_t jl_timing_nvtx_signature_schema = {
180+
.fieldMask = NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_TYPE | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ENTRIES | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NUM_ENTRIES,
181+
.type = NVTX_PAYLOAD_SCHEMA_TYPE_STATIC,
182+
.flags = NVTX_PAYLOAD_SCHEMA_FLAG_NONE,
183+
.entries = jl_timing_nvtx_signature_schema_entries,
184+
.numEntries = 1,
185+
.payloadStaticSize = sizeof(nvtxStringHandle_t),
186+
};
187+
jl_timing_nvtx_signature_schemaid = nvtxPayloadSchemaRegister(jl_timing_nvtx_domain, &jl_timing_nvtx_signature_schema);
188+
189+
190+
nvtxPayloadSchemaEntry_t jl_timing_nvtx_module_schema_entries[] = {
191+
{
192+
.flags = NVTX_PAYLOAD_ENTRY_FLAG_UNUSED,
193+
.type = NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE,
194+
.name = "name",
195+
.offset = offsetof(jl_timing_nvtx_module_t, name),
196+
},
197+
{
198+
.flags = NVTX_PAYLOAD_ENTRY_FLAG_UNUSED,
199+
.type = NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE,
200+
.name = "root",
201+
.offset = offsetof(jl_timing_nvtx_module_t, root),
202+
},
203+
};
204+
205+
nvtxPayloadSchemaAttr_t jl_timing_nvtx_module_schema = {
206+
.fieldMask = NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_TYPE | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ENTRIES | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NUM_ENTRIES,
207+
.name = "module",
208+
.type = NVTX_PAYLOAD_SCHEMA_TYPE_STATIC,
209+
.flags = NVTX_PAYLOAD_SCHEMA_FLAG_NONE,
210+
.entries = jl_timing_nvtx_module_schema_entries,
211+
.numEntries = sizeof(jl_timing_nvtx_module_schema_entries) / sizeof(jl_timing_nvtx_module_schema_entries[0]),
212+
.payloadStaticSize = sizeof(jl_timing_nvtx_module_t),
213+
};
214+
jl_timing_nvtx_module_schemaid = nvtxPayloadSchemaRegister(jl_timing_nvtx_domain, &jl_timing_nvtx_module_schema);
215+
216+
nvtxPayloadSchemaEntry_t jl_timing_nvtx_location_schema_entries[] = {
217+
{
218+
.flags = NVTX_PAYLOAD_ENTRY_FLAG_UNUSED,
219+
.type = NVTX_PAYLOAD_ENTRY_TYPE_NVTX_REGISTERED_STRING_HANDLE,
220+
.name = "file",
221+
.offset = offsetof(jl_timing_nvtx_location_t, file),
222+
},
223+
{
224+
.flags = NVTX_PAYLOAD_ENTRY_FLAG_UNUSED,
225+
.type = NVTX_PAYLOAD_ENTRY_TYPE_INT,
226+
.name = "line",
227+
.offset = offsetof(jl_timing_nvtx_location_t, line),
228+
}
229+
};
230+
231+
nvtxPayloadSchemaAttr_t jl_timing_nvtx_location_schema = {
232+
.fieldMask = NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NAME | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_TYPE | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_FLAGS | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_ENTRIES | NVTX_PAYLOAD_SCHEMA_ATTR_FIELD_NUM_ENTRIES,
233+
.name = "location",
234+
.type = NVTX_PAYLOAD_SCHEMA_TYPE_STATIC,
235+
.flags = NVTX_PAYLOAD_SCHEMA_FLAG_NONE,
236+
.entries = jl_timing_nvtx_location_schema_entries,
237+
.numEntries = sizeof(jl_timing_nvtx_location_schema_entries) / sizeof(jl_timing_nvtx_location_schema_entries[0]),
238+
.payloadStaticSize = sizeof(jl_timing_nvtx_location_t),
239+
};
240+
jl_timing_nvtx_location_schemaid = nvtxPayloadSchemaRegister(jl_timing_nvtx_domain, &jl_timing_nvtx_location_schema);
241+
242+
243+
244+
jl_timing_nvtx_task_domain = nvtxDomainCreateA("julia tasks");
158245
#endif
159246

160247
int i __attribute__((unused)) = 0;
@@ -401,7 +488,18 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) {
401488
if (block->is_running) {
402489
uint64_t t = cycleclock(); (void)t;
403490
_ITTAPI_STOP(block);
404-
_NVTX_STOP(block);
491+
#ifdef USE_NVTX
492+
size_t nvtx_payload_count = 0;
493+
nvtxPayloadData_t nvtx_payloads[JL_NVTX_MAX_PAYLOADS];
494+
495+
if (block->nvtx_payload.flags & JL_NVTX_PAYLOAD_FLAG_SIGNATURE)
496+
nvtx_payloads[nvtx_payload_count++] = (nvtxPayloadData_t){jl_timing_nvtx_signature_schemaid, sizeof(nvtxStringHandle_t), &block->nvtx_payload.signature};
497+
if (block->nvtx_payload.flags & JL_NVTX_PAYLOAD_FLAG_LOCATION)
498+
nvtx_payloads[nvtx_payload_count++] = (nvtxPayloadData_t){jl_timing_nvtx_location_schemaid, sizeof(jl_timing_nvtx_location_t), &block->nvtx_payload.location};
499+
if (block->nvtx_payload.flags & JL_NVTX_PAYLOAD_FLAG_MODULE)
500+
nvtx_payloads[nvtx_payload_count++] = (nvtxPayloadData_t){jl_timing_nvtx_module_schemaid, sizeof(jl_timing_nvtx_module_t), &block->nvtx_payload.module};
501+
nvtxRangePopPayload(jl_timing_nvtx_domain, nvtx_payloads, nvtx_payload_count);
502+
#endif
405503
_TRACY_STOP(block->tracy_ctx);
406504
_COUNTS_STOP(block, t);
407505

@@ -432,6 +530,9 @@ void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t
432530
}
433531
}
434532

533+
#ifdef USE_NVTX
534+
nvtxDomainRangePushEx(jl_timing_nvtx_task_domain, &ct->nvtx_attrs);
535+
#endif
435536
#ifdef USE_TRACY
436537
TracyCFiberEnter(ct->name);
437538
#else
@@ -441,6 +542,9 @@ void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t
441542

442543
jl_timing_block_t *jl_timing_block_task_exit(jl_task_t *ct, jl_ptls_t ptls)
443544
{
545+
#ifdef USE_NVTX
546+
nvtxDomainRangePop(jl_timing_nvtx_task_domain);
547+
#endif
444548
#ifdef USE_TRACY
445549
// Tracy is fairly strict about not leaving a fiber that hasn't
446550
// been entered, which happens often when connecting to a running
@@ -483,15 +587,22 @@ JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block)
483587

484588
JL_DLLEXPORT void jl_timing_show_module(jl_module_t *m, jl_timing_block_t *cur_block)
485589
{
486-
#ifdef USE_TRACY
590+
#if defined(USE_TRACY) || defined(USE_NVTX)
487591
jl_module_t *root = jl_module_root(m);
592+
#endif
593+
#ifdef USE_TRACY
488594
if (root == m || root == jl_main_module) {
489595
const char *module_name = jl_symbol_name(m->name);
490596
TracyCZoneText(cur_block->tracy_ctx, module_name, strlen(module_name));
491597
} else {
492598
jl_timing_printf(cur_block, "%s.%s", jl_symbol_name(root->name), jl_symbol_name(m->name));
493599
}
494600
#endif
601+
#ifdef USE_NVTX
602+
cur_block->nvtx_payload.module.name = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, jl_symbol_name(m->name));
603+
cur_block->nvtx_payload.module.root = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, jl_symbol_name(root->name));
604+
cur_block->nvtx_payload.flags |= JL_NVTX_PAYLOAD_FLAG_MODULE;
605+
#endif
495606
}
496607

497608
JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *cur_block)
@@ -500,12 +611,19 @@ JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *c
500611
const char *filename = gnu_basename(path);
501612
TracyCZoneText(cur_block->tracy_ctx, filename, strlen(filename));
502613
#endif
614+
#ifdef USE_NVTX
615+
cur_block->nvtx_payload.location.file = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, path);
616+
cur_block->nvtx_payload.location.line = 0;
617+
cur_block->nvtx_payload.flags |= JL_NVTX_PAYLOAD_FLAG_LOCATION;
618+
#endif
503619
}
504620

505621
JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_t* mod, jl_timing_block_t *cur_block)
506622
{
507-
#ifdef USE_TRACY
623+
#if defined(USE_TRACY) || defined(USE_NVTX)
508624
jl_module_t *root = jl_module_root(mod);
625+
#endif
626+
#ifdef USE_TRACY
509627
if (root == mod || root == jl_main_module) {
510628
jl_timing_printf(cur_block, "%s:%d in %s",
511629
gnu_basename(file),
@@ -520,6 +638,14 @@ JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_
520638
jl_symbol_name(mod->name));
521639
}
522640
#endif
641+
#ifdef USE_NVTX
642+
cur_block->nvtx_payload.location.file = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, file);
643+
cur_block->nvtx_payload.location.line = line;
644+
cur_block->nvtx_payload.module.name = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, jl_symbol_name(mod->name));
645+
cur_block->nvtx_payload.module.root = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, jl_symbol_name(root->name));
646+
cur_block->nvtx_payload.flags |= JL_NVTX_PAYLOAD_FLAG_LOCATION | JL_NVTX_PAYLOAD_FLAG_MODULE;
647+
#endif
648+
#endif
523649
}
524650

525651
JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block)
@@ -537,28 +663,45 @@ JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_ti
537663
JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block)
538664
{
539665
jl_timing_show((jl_value_t *)method, cur_block);
666+
#ifdef USE_NVTX
667+
668+
#endif
540669
jl_timing_show_location(jl_symbol_name(method->file), method->line, method->module, cur_block);
541670
}
542671

543672
JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block)
544673
{
545-
#ifdef USE_TRACY
674+
#if defined(USE_TRACY) || defined(USE_NVTX)
546675
ios_t buf;
547676
ios_mem(&buf, IOS_INLSIZE);
548677
buf.growable = 0; // Restrict to inline buffer to avoid allocation
549678

550679
jl_static_show_config_t config = { /* quiet */ 1 };
551680
jl_static_show_func_sig_((JL_STREAM*)&buf, v, config);
681+
#endif
682+
#ifdef USE_TRACY
552683
if (buf.size == buf.maxsize)
553684
memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
554-
555685
TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size);
556686
#endif
687+
#ifdef USE_NVTX
688+
if (buf.size == buf.maxsize) {
689+
memset(&buf.buf[IOS_INLSIZE - 4], '.', 3);
690+
memset(&buf.buf[buf.size-1], 0, 1);
691+
} else {
692+
memset(&buf.buf[buf.size], 0, 1);
693+
}
694+
cur_block->nvtx_payload.signature = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, buf.buf);
695+
cur_block->nvtx_payload.flags |= JL_NVTX_PAYLOAD_FLAG_SIGNATURE;
696+
#endif
557697
}
558698

559699
JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* lno, jl_module_t* mod, jl_timing_block_t *cur_block)
560700
{
561701
jl_timing_printf(cur_block, "%s", jl_symbol_name(macro->def.method->name));
702+
#ifdef USE_NVTX
703+
704+
#endif
562705
assert(jl_typetagis(lno, jl_linenumbernode_type));
563706
jl_timing_show_location(jl_symbol_name((jl_sym_t*)jl_fieldref(lno, 1)),
564707
jl_unbox_int64(jl_fieldref(lno, 0)),
@@ -593,7 +736,7 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str)
593736

594737
void jl_timing_task_init(jl_task_t *t)
595738
{
596-
#ifdef USE_TRACY
739+
#if defined(USE_TRACY) || defined(USE_NVTX)
597740
jl_value_t *start_type = jl_typeof(t->start);
598741
const char *start_name = "";
599742
if (jl_is_datatype(start_type))
@@ -622,9 +765,26 @@ void jl_timing_task_init(jl_task_t *t)
622765
snprintf(fiber_name, fiber_name_len, "Task %d (\"%s\")",
623766
task_id++, start_name);
624767
}
625-
768+
#ifdef USE_TRACY
626769
t->name = fiber_name;
627770
#endif
771+
#ifdef USE_NVTX
772+
nvtxEventAttributes_t nvtx_attrs = {0};
773+
nvtx_attrs.version = NVTX_VERSION;
774+
nvtx_attrs.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
775+
776+
nvtx_attrs.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
777+
nvtx_attrs.message.registered = nvtxDomainRegisterStringA(jl_timing_nvtx_task_domain, fiber_name);
778+
779+
// 0 is the default (unnamed) category
780+
nvtx_attrs.payloadType = NVTX_PAYLOAD_TYPE_UNSIGNED_INT64;
781+
nvtx_attrs.payload.ullValue = (uint64_t)t; // cast pointer to uint64_t for identification
782+
// simple Knuth hash to get nice colors
783+
nvtx_attrs.colorType = NVTX_COLOR_ARGB;
784+
nvtx_attrs.color = (((nvtx_attrs.payload.ullValue >> 32) + (nvtx_attrs.payload.ullValue & 0xffffffff)) * 2654435769) >> 8;
785+
786+
t->nvtx_attrs = nvtx_attrs;
787+
#endif
628788
}
629789

630790
JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled)

0 commit comments

Comments
 (0)