diff --git a/configure b/configure index d10cafccb..49dfff599 100755 --- a/configure +++ b/configure @@ -686,6 +686,7 @@ usage() { echo "Other Options:" echo "-iowrapper .................................... Build POSIX IO Wrapper." echo "-pthread .................................. Use pthread thread package." + echo "-no_pthread_create ................... Suppress pthread_create wrapper." echo "-papi= ............... Specify location of PAPI (Performance API)." echo "-likwid= ............................. Specify location of LIKWID." echo "-otf= ....... Specify location of Open Trace Format (OTF) Package." @@ -751,8 +752,8 @@ for arg in "$@"; do echo " this is used for cross-compilation" echo "-pdt= ........ Specify location of PDT (Program Database Toolkit)." echo "-pdt_c++= ............ specify a different PDT C++ compiler." - echo " options [CC|KCC|g++|*xlC*|cxx|pgc++|pgcpp|FCC|guidec++|aCC|c++|ecpc|" - echo " clang++|bgclang++|g++4|icpc|icpx|scgcc|pathCC|orCC]." + echo " options [CC|KCC|g++|*xlC*|cxx|pgc++|pgcpp|FCC|guidec++|aCC|c++|ecpc|" + echo " clang++|bgclang++|g++4|icpc|icpx|scgcc|pathCC|orCC]." echo "-pdtcompdir= . specify a different PDT compiler directory." echo "-pdtarchdir= . specify a different PDT architecture directory." echo "-useropt='' .......... arguments to compilers (defaults to -O2)." @@ -765,6 +766,7 @@ for arg in "$@"; do echo " arm_linux|arm_android]" echo "-iowrapper .................................... Build POSIX IO Wrapper." echo "-pthread .................................. Use pthread thread package." + echo "-no_pthread_create ................... Suppress pthread_create wrapper." echo "-papithread .................................. Use PAPI thread package." echo "-charm= .............................. Use charm++ thread package." echo "-sproc .................................. Use SGI sproc thread package." @@ -1033,6 +1035,7 @@ sicortex=no extradir= extrashlibopts= pthread=no +suppress_pthread_create_wrapper=no tbb=no papithread=no papipfm=no @@ -1408,6 +1411,11 @@ for arg in "$@"; do shift ;; + -no_pthread_create) + suppress_pthread_create_wrapper=yes + shift + ;; + -pthread) pthread=yes shift @@ -3557,6 +3565,10 @@ if [ "$c_compiler" != "" -a "$c_compiler" != "default" ]; then export TAU_CCOM fi +if [ "$cxx_compiler" = "hipcc" ]; then + suppress_pthread_create_wrapper=yes +fi + echo "-------------------- TAU configure script ------------------" @@ -10477,6 +10489,9 @@ fi ###################################################################### +if [ $suppress_pthread_create_wrapper = yes ]; then + fixmakeargs="$fixmakeargs SUPPRESS_PTHREAD_CREATE_WRAPPER" +fi # If TBB support is requested, then make that the tag, not pthread if [ $tbb = yes ] ; then fixmakeargs="$fixmakeargs PTHREAD_AVAILABLE ptdir=$ptdir" diff --git a/include/Makefile.skel b/include/Makefile.skel index 486fa382b..f4aeabbc4 100644 --- a/include/Makefile.skel +++ b/include/Makefile.skel @@ -745,6 +745,7 @@ JDKBINDIR = $(JDKDIR)/bin #ROCTRACER#PROFILEOPT115 = -DTAU_ENABLE_ROCTRACER -DTAU_GPU -I$(TAU_ROCM_DIR)/include -I$(TAU_ROCM_DIR)/include/hsa -I$(TAU_ROCTRACER)/include/roctracer -I$(TAU_ROCTRACER)/include -DHIP_VDI=1 -DHSA_DEPRECATED="" -DHSA_LARGE_MODEL="" -DAMD_INTERNAL_BUILD -D__HIP_PLATFORM_HCC__ #ENDIF# #ROCTRACER_HSA#PROFILEOPT115 = -DTAU_ENABLE_ROCTRACER -DTAU_GPU -I$(TAU_ROCTRACER)/include/roctracer -I$(TAU_ROCTRACER)/include -I$(TAU_ROCTRACER)/inc -I$(TAU_ROCTRACER) -I$(TAU_ROCM_DIR)/hsa/include/hsa -DAMD_INTERNAL_BUILD -DLOCAL_BUILD=1 -DHIP_VDI=1 #ENDIF# #HIP#PROFILEOPT116 = -DTAU_ENABLE_HIP -I$(TAU_HIP)/include -D__HIP_PLATFORM_HCC__ #ENDIF# +#SUPPRESS_PTHREAD_CREATE_WRAPPER#PROFILEOPT117 = -DTAU_SUPPRESS_PTHREAD_CREATE_WRAPPER #ENDIF# MRNET_ROOT= MRNET_LW_OPTS= @@ -1384,7 +1385,7 @@ PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \ $(PROFILEOPT104) $(PROFILEOPT106) $(PROFILEOPT107) \ $(PROFILEOPT108) $(PROFILEOPT109) $(PROFILEOPT110) \ $(PROFILEOPT111) $(PROFILEOPT112) $(PROFILEOPT113) $(PROFILEOPT114) \ - $(PROFILEOPT115) $(PROFILEOPT116) $(TRACEOPT) \ + $(PROFILEOPT115) $(PROFILEOPT116) $(PROFILEOPT117) $(TRACEOPT) \ $(TAU_SOS_INCLUDE_OPTS) $(TAU_ADIOS_INCLUDE_OPTS) \ $(TAU_OTF2_INCLUDE_OPTS) $(TAU_CALIPER_INCLUDE_OPTS) \ $(TAU_CORESYMBOLICATION_INCLUDE_OPTS) $(TAU_ELF_BFD_PROFILEOPT) \ diff --git a/plugins/examples/Tau_plugin_adios2_trace.cpp b/plugins/examples/Tau_plugin_adios2_trace.cpp index 147a2588e..0bcbe9c6e 100644 --- a/plugins/examples/Tau_plugin_adios2_trace.cpp +++ b/plugins/examples/Tau_plugin_adios2_trace.cpp @@ -1311,8 +1311,9 @@ int Tau_plugin_adios2_function_exit(Tau_plugin_event_function_exit_data_t* data) static std::mutex timer_lock; if (tau_plugin::thePluginOptions().env_periodic && !tau_plugin::thePluginOptions().env_one_file) { - // is it time to write? (and not in an MPI or ADIOS call) + // is it time to write? (and thread 0, and not in an MPI or ADIOS call) if (steady_clock::now() > next_write && !plugin_done && + data->tid == 0 && strstr(data->timer_name, "MPI_") == NULL) { bool mine = false; // only let one thread do this diff --git a/plugins/llvm/src/Instrument.cpp b/plugins/llvm/src/Instrument.cpp index 4e862d265..02adc524a 100644 --- a/plugins/llvm/src/Instrument.cpp +++ b/plugins/llvm/src/Instrument.cpp @@ -122,6 +122,9 @@ TauDryRun("tau-dry-run", +auto TauInitFunc = "Tau_init"; // arguments to pass: argc, argv +auto TauSetNodeFunc = "Tau_set_node"; // argument to pass: 0 + // Demangling technique borrowed/modified from // https://github.com/eklitzke/demangle/blob/master/src/demangle.cc static StringRef normalize_name(StringRef mangled_name) { @@ -575,7 +578,38 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu #endif // LLVM_VERSION_MAJOR <= 8 errs() << "Adding instrumentation in " << prettyname << '\n'; + bool mutated = false; // TODO + + /* Add TAU init in main */ + + if( 0 == prettyname.compare( "main" ) ){ + errs() << "\tmain function: adding init\n"; + auto initfun = getVoidFunc( TauInitFunc, context, module ); + auto setnodefun = getVoidFunc( TauSetNodeFunc, context, module ); + + auto beg = inst_begin( &func ); + Instruction* b = &*beg; + IRBuilder<> b4( b ); + + /* TauInitFunc takes two arguments: argc and argv */ + + SmallVector mainArgsVect; + for( Argument &arg : func.args() ){ + mainArgsVect.push_back( &arg ); + } + b4.CreateCall( initfun, mainArgsVect ); + + /* TauSetNodeFunc takes one argument: 0 */ + + Value* z = ConstantInt::get( context, llvm::APInt( 32, 0, false ) ); + SmallVector zero{ z }; + b4.CreateCall( setnodefun, zero ); + + mutated = true; + } + /* Add regular TAU calls */ + std::string filename = getFilename( func ); std::string location( "[{" + getFilename( func ) + "} {" + getLineAndCol( func ) + "}]" ); @@ -584,8 +618,6 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu Instruction* i = &*pi; IRBuilder<> before( i ); - bool mutated = false; // TODO - // This is the recommended way of creating a string constant (to be used // as an argument to runtime functions) Value *strArg = before.CreateGlobalStringPtr( ( prettyname + " " + location ).str() ); diff --git a/src/Profile/TauGenericAPI.cpp b/src/Profile/TauGenericAPI.cpp index a7fc1d07a..8a95f07b3 100644 --- a/src/Profile/TauGenericAPI.cpp +++ b/src/Profile/TauGenericAPI.cpp @@ -87,7 +87,8 @@ void ps_tool_initialize(void) { /* Disable throttling, because if users use ps_tool_stop_current(), * throttling will cause Tau_start() to do nothing for throttled events, * but Tau_global_stop() will stop the timer on the stop of the stack */ - TauEnv_set_throttle(0); + /* Then again, if someone calls ps_stop_current(), they get what they deserve */ + // TauEnv_set_throttle(0); Tau_create_top_level_timer_if_necessary(); } diff --git a/src/wrappers/pthread/pthread_wrap.c b/src/wrappers/pthread/pthread_wrap.c index 36b5b0923..f97615a29 100644 --- a/src/wrappers/pthread/pthread_wrap.c +++ b/src/wrappers/pthread/pthread_wrap.c @@ -141,11 +141,22 @@ int pthread_barrier_wait(pthread_barrier_t * barrier) #else // Wrap via the the link line. +#ifndef TAU_SUPPRESS_PTHREAD_CREATE_WRAPPER int __real_pthread_create(pthread_t *, const pthread_attr_t *, start_routine_p, void *); int __wrap_pthread_create(pthread_t * thread, const pthread_attr_t * attr, start_routine_p start_routine, void * arg) { return tau_pthread_create_wrapper(__real_pthread_create, thread, attr, start_routine, arg); } +/* +#else +int __real___wrap_pthread_create(pthread_t *, const pthread_attr_t *, start_routine_p, void *); +int __wrap___wrap_pthread_create(pthread_t * thread, const pthread_attr_t * attr, start_routine_p start_routine, void * arg) +{ + printf("Inside __wrap___wrap_pthread_create\n"); + return tau_pthread_create_wrapper(__real___wrap_pthread_create, thread, attr, start_routine, arg); +} +*/ +#endif /* TAU_WRAP_PTHREAD_CREATE */ int __real_pthread_join(pthread_t, void **); int __wrap_pthread_join(pthread_t thread, void **retval) diff --git a/utils/FixMakefile b/utils/FixMakefile index 453bda5cc..07681b8f3 100755 --- a/utils/FixMakefile +++ b/utils/FixMakefile @@ -1688,6 +1688,10 @@ case $1 in echo "NOTE: Using pthreads as the thread package. ***" echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout ;; + SUPPRESS_PTHREAD_CREATE_WRAPPER) + echo "NOTE: Not generating the pthread_create wrapper" + echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout + ;; TBB_AVAILABLE) echo "NOTE: Using pthreads as the thread package for TBB support. ***" echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout