From 057a8975362ea679e4314c3ceda9a250057a753a Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Thu, 3 Mar 2022 13:15:00 -0800 Subject: [PATCH 1/5] only flush to adios2 trace from thread 0 Former-commit-id: fe37764876931214de00d06026e6111896f1a57d --- plugins/examples/Tau_plugin_adios2_trace.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/examples/Tau_plugin_adios2_trace.cpp b/plugins/examples/Tau_plugin_adios2_trace.cpp index 147a2588e..0bcbe9c6e 100644 --- a/plugins/examples/Tau_plugin_adios2_trace.cpp +++ b/plugins/examples/Tau_plugin_adios2_trace.cpp @@ -1311,8 +1311,9 @@ int Tau_plugin_adios2_function_exit(Tau_plugin_event_function_exit_data_t* data) static std::mutex timer_lock; if (tau_plugin::thePluginOptions().env_periodic && !tau_plugin::thePluginOptions().env_one_file) { - // is it time to write? (and not in an MPI or ADIOS call) + // is it time to write? (and thread 0, and not in an MPI or ADIOS call) if (steady_clock::now() > next_write && !plugin_done && + data->tid == 0 && strstr(data->timer_name, "MPI_") == NULL) { bool mine = false; // only let one thread do this From 55d1f5861c3cf0f2eff61b4ed56e1c3ae3a55a1c Mon Sep 17 00:00:00 2001 From: Sameer Shende Date: Fri, 4 Mar 2022 05:48:15 -0800 Subject: [PATCH 2/5] We should not instrument (with Tau_start and Tau_stop) functions that are being compiled for the AMD GPU. We determine this by looking at the module that the function resides in. If the target triple for the module matches amdgcn-amd-amdhsa, we suppress instrumentation in this module as there is no implementation of Tau_start/Tau_stop in the AMD GPU code. We get the events for entry/exit for routines that execute on the GPU using rocprofiler/roctracer APIs. To prevent Tau_start/Tau_stop calls from being added to the IR for this module, we check by comparing the triple to this string (amdgcm-amd-amdhsa). If it matches (is_host_func = 0), we set the instrumentation to false. This compiles the code bound for the GPU with tau_cxx.sh properly using compiler-based instrumentation. Former-commit-id: da3ce2de197b892a016d37ad8e0d4bb396b19fda --- plugins/llvm/src/Instrument.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/plugins/llvm/src/Instrument.cpp b/plugins/llvm/src/Instrument.cpp index 4e862d265..bdccce1f6 100644 --- a/plugins/llvm/src/Instrument.cpp +++ b/plugins/llvm/src/Instrument.cpp @@ -34,6 +34,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/IR/InstIterator.h" +#include "llvm/ADT/Triple.h" + #include #include #include @@ -224,6 +226,7 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu #else Instrument() : PassInfoMixin() { #endif + errs() <<"TauInputFile: "<getTargetTriple(); + bool is_host_func = triple.compare(std::string("amdgcn-amd-amdhsa")); // returns 0 if it matches + // Compare similarly for other GPUs. If it matches, do not instrument it. + /* This big test was explanded for readability */ bool instrumentHere = false; - //errs() << "Name " << prettycallName << " full " << callName << "\n"; + + if (is_host_func == false) { + errs() << "Name " << prettycallName << " GPU bound, instrument = "< X("tau-prof", "TAU Profiling", false, false); +static RegisterPass X("TAU", "TAU Profiling", false, false); // Automatically enable the pass. // http://adriansampson.net/blog/clangpass.html @@ -629,14 +642,16 @@ RegisterMyPass(PassManagerBuilder::EP_EarlyAsPossible, registerInstrumentPass); class PluginInstrument : public clang::PluginASTAction { protected: std::unique_ptr CreateASTConsumer(clang::CompilerInstance &CI, StringRef file) { + errs() <<"INSIDE PluginInstrument::CreateASTConsumer\n"; // VERBOSE return std::make_unique(); } bool ParseArgs(const clang::CompilerInstance &CI, const std::vector &args) { + errs() <<"INSIDE PluginInstrument::ParseArgs "< X("tau-prof", "TAU profiling"); +static clang::FrontendPluginRegistry::Add X("TAU", "TAU profiling"); #endif From 88ffe3b5ba9fc5839e8784c84f6eb07569b2d068 Mon Sep 17 00:00:00 2001 From: Sameer Shende Date: Fri, 4 Mar 2022 09:24:29 -0800 Subject: [PATCH 3/5] Added a -no-pthread-create configure option (enabled by default with hipcc) that uses a SUPPRESS_PTHREAD_CREATE_WRAPPER flag to disable the pthread_create wrapper in the src/wrappers/pthread directory. Required for hipcc. Former-commit-id: 789ffca822dff25010c2e00d89b0a736f9bbd1a3 --- configure | 19 +++++++++++++++++-- include/Makefile.skel | 3 ++- src/wrappers/pthread/pthread_wrap.c | 11 +++++++++++ utils/FixMakefile | 4 ++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 662a017ec..c44e2aa96 100755 --- a/configure +++ b/configure @@ -686,6 +686,7 @@ usage() { echo "Other Options:" echo "-iowrapper .................................... Build POSIX IO Wrapper." echo "-pthread .................................. Use pthread thread package." + echo "-no_pthread_create ................... Suppress pthread_create wrapper." echo "-papi= ............... Specify location of PAPI (Performance API)." echo "-likwid= ............................. Specify location of LIKWID." echo "-otf= ....... Specify location of Open Trace Format (OTF) Package." @@ -751,8 +752,8 @@ for arg in "$@"; do echo " this is used for cross-compilation" echo "-pdt= ........ Specify location of PDT (Program Database Toolkit)." echo "-pdt_c++= ............ specify a different PDT C++ compiler." - echo " options [CC|KCC|g++|*xlC*|cxx|pgc++|pgcpp|FCC|guidec++|aCC|c++|ecpc|" - echo " clang++|bgclang++|g++4|icpc|icpx|scgcc|pathCC|orCC]." + echo " options [CC|KCC|g++|*xlC*|cxx|pgc++|pgcpp|FCC|guidec++|aCC|c++|ecpc|" + echo " clang++|bgclang++|g++4|icpc|icpx|scgcc|pathCC|orCC]." echo "-pdtcompdir= . specify a different PDT compiler directory." echo "-pdtarchdir= . specify a different PDT architecture directory." echo "-useropt='' .......... arguments to compilers (defaults to -O2)." @@ -765,6 +766,7 @@ for arg in "$@"; do echo " arm_linux|arm_android]" echo "-iowrapper .................................... Build POSIX IO Wrapper." echo "-pthread .................................. Use pthread thread package." + echo "-no_pthread_create ................... Suppress pthread_create wrapper." echo "-papithread .................................. Use PAPI thread package." echo "-charm= .............................. Use charm++ thread package." echo "-sproc .................................. Use SGI sproc thread package." @@ -1033,6 +1035,7 @@ sicortex=no extradir= extrashlibopts= pthread=no +suppress_pthread_create_wrapper=no tbb=no papithread=no papipfm=no @@ -1408,6 +1411,11 @@ for arg in "$@"; do shift ;; + -no_pthread_create) + suppress_pthread_create_wrapper=yes + shift + ;; + -pthread) pthread=yes shift @@ -3557,6 +3565,10 @@ if [ "$c_compiler" != "" -a "$c_compiler" != "default" ]; then export TAU_CCOM fi +if [ "$cxx_compiler" = "hipcc" ]; then + suppress_pthread_create_wrapper=yes +fi + echo "-------------------- TAU configure script ------------------" @@ -10477,6 +10489,9 @@ fi ###################################################################### +if [ $suppress_pthread_create_wrapper = yes ]; then + fixmakeargs="$fixmakeargs SUPPRESS_PTHREAD_CREATE_WRAPPER" +fi # If TBB support is requested, then make that the tag, not pthread if [ $tbb = yes ] ; then fixmakeargs="$fixmakeargs PTHREAD_AVAILABLE ptdir=$ptdir" diff --git a/include/Makefile.skel b/include/Makefile.skel index 486fa382b..f4aeabbc4 100644 --- a/include/Makefile.skel +++ b/include/Makefile.skel @@ -745,6 +745,7 @@ JDKBINDIR = $(JDKDIR)/bin #ROCTRACER#PROFILEOPT115 = -DTAU_ENABLE_ROCTRACER -DTAU_GPU -I$(TAU_ROCM_DIR)/include -I$(TAU_ROCM_DIR)/include/hsa -I$(TAU_ROCTRACER)/include/roctracer -I$(TAU_ROCTRACER)/include -DHIP_VDI=1 -DHSA_DEPRECATED="" -DHSA_LARGE_MODEL="" -DAMD_INTERNAL_BUILD -D__HIP_PLATFORM_HCC__ #ENDIF# #ROCTRACER_HSA#PROFILEOPT115 = -DTAU_ENABLE_ROCTRACER -DTAU_GPU -I$(TAU_ROCTRACER)/include/roctracer -I$(TAU_ROCTRACER)/include -I$(TAU_ROCTRACER)/inc -I$(TAU_ROCTRACER) -I$(TAU_ROCM_DIR)/hsa/include/hsa -DAMD_INTERNAL_BUILD -DLOCAL_BUILD=1 -DHIP_VDI=1 #ENDIF# #HIP#PROFILEOPT116 = -DTAU_ENABLE_HIP -I$(TAU_HIP)/include -D__HIP_PLATFORM_HCC__ #ENDIF# +#SUPPRESS_PTHREAD_CREATE_WRAPPER#PROFILEOPT117 = -DTAU_SUPPRESS_PTHREAD_CREATE_WRAPPER #ENDIF# MRNET_ROOT= MRNET_LW_OPTS= @@ -1384,7 +1385,7 @@ PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \ $(PROFILEOPT104) $(PROFILEOPT106) $(PROFILEOPT107) \ $(PROFILEOPT108) $(PROFILEOPT109) $(PROFILEOPT110) \ $(PROFILEOPT111) $(PROFILEOPT112) $(PROFILEOPT113) $(PROFILEOPT114) \ - $(PROFILEOPT115) $(PROFILEOPT116) $(TRACEOPT) \ + $(PROFILEOPT115) $(PROFILEOPT116) $(PROFILEOPT117) $(TRACEOPT) \ $(TAU_SOS_INCLUDE_OPTS) $(TAU_ADIOS_INCLUDE_OPTS) \ $(TAU_OTF2_INCLUDE_OPTS) $(TAU_CALIPER_INCLUDE_OPTS) \ $(TAU_CORESYMBOLICATION_INCLUDE_OPTS) $(TAU_ELF_BFD_PROFILEOPT) \ diff --git a/src/wrappers/pthread/pthread_wrap.c b/src/wrappers/pthread/pthread_wrap.c index 36b5b0923..f97615a29 100644 --- a/src/wrappers/pthread/pthread_wrap.c +++ b/src/wrappers/pthread/pthread_wrap.c @@ -141,11 +141,22 @@ int pthread_barrier_wait(pthread_barrier_t * barrier) #else // Wrap via the the link line. +#ifndef TAU_SUPPRESS_PTHREAD_CREATE_WRAPPER int __real_pthread_create(pthread_t *, const pthread_attr_t *, start_routine_p, void *); int __wrap_pthread_create(pthread_t * thread, const pthread_attr_t * attr, start_routine_p start_routine, void * arg) { return tau_pthread_create_wrapper(__real_pthread_create, thread, attr, start_routine, arg); } +/* +#else +int __real___wrap_pthread_create(pthread_t *, const pthread_attr_t *, start_routine_p, void *); +int __wrap___wrap_pthread_create(pthread_t * thread, const pthread_attr_t * attr, start_routine_p start_routine, void * arg) +{ + printf("Inside __wrap___wrap_pthread_create\n"); + return tau_pthread_create_wrapper(__real___wrap_pthread_create, thread, attr, start_routine, arg); +} +*/ +#endif /* TAU_WRAP_PTHREAD_CREATE */ int __real_pthread_join(pthread_t, void **); int __wrap_pthread_join(pthread_t thread, void **retval) diff --git a/utils/FixMakefile b/utils/FixMakefile index 453bda5cc..07681b8f3 100755 --- a/utils/FixMakefile +++ b/utils/FixMakefile @@ -1688,6 +1688,10 @@ case $1 in echo "NOTE: Using pthreads as the thread package. ***" echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout ;; + SUPPRESS_PTHREAD_CREATE_WRAPPER) + echo "NOTE: Not generating the pthread_create wrapper" + echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout + ;; TBB_AVAILABLE) echo "NOTE: Using pthreads as the thread package for TBB support. ***" echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout From 9753323e63810248dd529de2dfd0f5ded96eed7a Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Fri, 4 Mar 2022 11:03:27 -0800 Subject: [PATCH 4/5] Don't force disable of throttling when using the generic API Former-commit-id: 0b4a8d3c8174d44991e91d90e805a9cc34a1abcf --- src/Profile/TauGenericAPI.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Profile/TauGenericAPI.cpp b/src/Profile/TauGenericAPI.cpp index a7fc1d07a..8a95f07b3 100644 --- a/src/Profile/TauGenericAPI.cpp +++ b/src/Profile/TauGenericAPI.cpp @@ -87,7 +87,8 @@ void ps_tool_initialize(void) { /* Disable throttling, because if users use ps_tool_stop_current(), * throttling will cause Tau_start() to do nothing for throttled events, * but Tau_global_stop() will stop the timer on the stop of the stack */ - TauEnv_set_throttle(0); + /* Then again, if someone calls ps_stop_current(), they get what they deserve */ + // TauEnv_set_throttle(0); Tau_create_top_level_timer_if_necessary(); } From baa77dd213ceb9ef4ab00552fc0af0e2106fa275 Mon Sep 17 00:00:00 2001 From: Camille Coti Date: Sat, 5 Mar 2022 07:23:36 -0800 Subject: [PATCH 5/5] LLVM plugin: add Tau_init and Tau_set_node at the beginning of the main function Former-commit-id: 6efd540d2c7bc117f02a8f046a1739c484de972f --- plugins/llvm/src/Instrument.cpp | 59 +++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/plugins/llvm/src/Instrument.cpp b/plugins/llvm/src/Instrument.cpp index bdccce1f6..02adc524a 100644 --- a/plugins/llvm/src/Instrument.cpp +++ b/plugins/llvm/src/Instrument.cpp @@ -34,8 +34,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/IR/InstIterator.h" -#include "llvm/ADT/Triple.h" - #include #include #include @@ -124,6 +122,9 @@ TauDryRun("tau-dry-run", +auto TauInitFunc = "Tau_init"; // arguments to pass: argc, argv +auto TauSetNodeFunc = "Tau_set_node"; // argument to pass: 0 + // Demangling technique borrowed/modified from // https://github.com/eklitzke/demangle/blob/master/src/demangle.cc static StringRef normalize_name(StringRef mangled_name) { @@ -226,7 +227,6 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu #else Instrument() : PassInfoMixin() { #endif - errs() <<"TauInputFile: "<getTargetTriple(); - bool is_host_func = triple.compare(std::string("amdgcn-amd-amdhsa")); // returns 0 if it matches - // Compare similarly for other GPUs. If it matches, do not instrument it. - /* This big test was explanded for readability */ bool instrumentHere = false; - - if (is_host_func == false) { - errs() << "Name " << prettycallName << " GPU bound, instrument = "< b4( b ); + + /* TauInitFunc takes two arguments: argc and argv */ + + SmallVector mainArgsVect; + for( Argument &arg : func.args() ){ + mainArgsVect.push_back( &arg ); + } + b4.CreateCall( initfun, mainArgsVect ); + + /* TauSetNodeFunc takes one argument: 0 */ + + Value* z = ConstantInt::get( context, llvm::APInt( 32, 0, false ) ); + SmallVector zero{ z }; + b4.CreateCall( setnodefun, zero ); + + mutated = true; + } + + /* Add regular TAU calls */ + std::string filename = getFilename( func ); std::string location( "[{" + getFilename( func ) + "} {" + getLineAndCol( func ) + "}]" ); @@ -597,8 +618,6 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu Instruction* i = &*pi; IRBuilder<> before( i ); - bool mutated = false; // TODO - // This is the recommended way of creating a string constant (to be used // as an argument to runtime functions) Value *strArg = before.CreateGlobalStringPtr( ( prettyname + " " + location ).str() ); @@ -626,7 +645,7 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu char Instrument::ID = 0; -static RegisterPass X("TAU", "TAU Profiling", false, false); +static RegisterPass X("tau-prof", "TAU Profiling", false, false); // Automatically enable the pass. // http://adriansampson.net/blog/clangpass.html @@ -642,16 +661,14 @@ RegisterMyPass(PassManagerBuilder::EP_EarlyAsPossible, registerInstrumentPass); class PluginInstrument : public clang::PluginASTAction { protected: std::unique_ptr CreateASTConsumer(clang::CompilerInstance &CI, StringRef file) { - errs() <<"INSIDE PluginInstrument::CreateASTConsumer\n"; // VERBOSE return std::make_unique(); } bool ParseArgs(const clang::CompilerInstance &CI, const std::vector &args) { - errs() <<"INSIDE PluginInstrument::ParseArgs "< X("TAU", "TAU profiling"); +static clang::FrontendPluginRegistry::Add X("tau-prof", "TAU profiling"); #endif