diff --git a/configure b/configure
index d10cafccb..49dfff599 100755
--- a/configure
+++ b/configure
@@ -686,6 +686,7 @@ usage() {
echo "Other Options:"
echo "-iowrapper .................................... Build POSIX IO Wrapper."
echo "-pthread .................................. Use pthread thread package."
+ echo "-no_pthread_create ................... Suppress pthread_create wrapper."
echo "-papi=
............... Specify location of PAPI (Performance API)."
echo "-likwid= ............................. Specify location of LIKWID."
echo "-otf= ....... Specify location of Open Trace Format (OTF) Package."
@@ -751,8 +752,8 @@ for arg in "$@"; do
echo " this is used for cross-compilation"
echo "-pdt= ........ Specify location of PDT (Program Database Toolkit)."
echo "-pdt_c++= ............ specify a different PDT C++ compiler."
- echo " options [CC|KCC|g++|*xlC*|cxx|pgc++|pgcpp|FCC|guidec++|aCC|c++|ecpc|"
- echo " clang++|bgclang++|g++4|icpc|icpx|scgcc|pathCC|orCC]."
+ echo " options [CC|KCC|g++|*xlC*|cxx|pgc++|pgcpp|FCC|guidec++|aCC|c++|ecpc|"
+ echo " clang++|bgclang++|g++4|icpc|icpx|scgcc|pathCC|orCC]."
echo "-pdtcompdir= . specify a different PDT compiler directory."
echo "-pdtarchdir= . specify a different PDT architecture directory."
echo "-useropt='' .......... arguments to compilers (defaults to -O2)."
@@ -765,6 +766,7 @@ for arg in "$@"; do
echo " arm_linux|arm_android]"
echo "-iowrapper .................................... Build POSIX IO Wrapper."
echo "-pthread .................................. Use pthread thread package."
+ echo "-no_pthread_create ................... Suppress pthread_create wrapper."
echo "-papithread .................................. Use PAPI thread package."
echo "-charm= .............................. Use charm++ thread package."
echo "-sproc .................................. Use SGI sproc thread package."
@@ -1033,6 +1035,7 @@ sicortex=no
extradir=
extrashlibopts=
pthread=no
+suppress_pthread_create_wrapper=no
tbb=no
papithread=no
papipfm=no
@@ -1408,6 +1411,11 @@ for arg in "$@"; do
shift
;;
+ -no_pthread_create)
+ suppress_pthread_create_wrapper=yes
+ shift
+ ;;
+
-pthread)
pthread=yes
shift
@@ -3557,6 +3565,10 @@ if [ "$c_compiler" != "" -a "$c_compiler" != "default" ]; then
export TAU_CCOM
fi
+if [ "$cxx_compiler" = "hipcc" ]; then
+ suppress_pthread_create_wrapper=yes
+fi
+
echo "-------------------- TAU configure script ------------------"
@@ -10477,6 +10489,9 @@ fi
######################################################################
+if [ $suppress_pthread_create_wrapper = yes ]; then
+ fixmakeargs="$fixmakeargs SUPPRESS_PTHREAD_CREATE_WRAPPER"
+fi
# If TBB support is requested, then make that the tag, not pthread
if [ $tbb = yes ] ; then
fixmakeargs="$fixmakeargs PTHREAD_AVAILABLE ptdir=$ptdir"
diff --git a/include/Makefile.skel b/include/Makefile.skel
index 486fa382b..f4aeabbc4 100644
--- a/include/Makefile.skel
+++ b/include/Makefile.skel
@@ -745,6 +745,7 @@ JDKBINDIR = $(JDKDIR)/bin
#ROCTRACER#PROFILEOPT115 = -DTAU_ENABLE_ROCTRACER -DTAU_GPU -I$(TAU_ROCM_DIR)/include -I$(TAU_ROCM_DIR)/include/hsa -I$(TAU_ROCTRACER)/include/roctracer -I$(TAU_ROCTRACER)/include -DHIP_VDI=1 -DHSA_DEPRECATED="" -DHSA_LARGE_MODEL="" -DAMD_INTERNAL_BUILD -D__HIP_PLATFORM_HCC__ #ENDIF#
#ROCTRACER_HSA#PROFILEOPT115 = -DTAU_ENABLE_ROCTRACER -DTAU_GPU -I$(TAU_ROCTRACER)/include/roctracer -I$(TAU_ROCTRACER)/include -I$(TAU_ROCTRACER)/inc -I$(TAU_ROCTRACER) -I$(TAU_ROCM_DIR)/hsa/include/hsa -DAMD_INTERNAL_BUILD -DLOCAL_BUILD=1 -DHIP_VDI=1 #ENDIF#
#HIP#PROFILEOPT116 = -DTAU_ENABLE_HIP -I$(TAU_HIP)/include -D__HIP_PLATFORM_HCC__ #ENDIF#
+#SUPPRESS_PTHREAD_CREATE_WRAPPER#PROFILEOPT117 = -DTAU_SUPPRESS_PTHREAD_CREATE_WRAPPER #ENDIF#
MRNET_ROOT=
MRNET_LW_OPTS=
@@ -1384,7 +1385,7 @@ PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \
$(PROFILEOPT104) $(PROFILEOPT106) $(PROFILEOPT107) \
$(PROFILEOPT108) $(PROFILEOPT109) $(PROFILEOPT110) \
$(PROFILEOPT111) $(PROFILEOPT112) $(PROFILEOPT113) $(PROFILEOPT114) \
- $(PROFILEOPT115) $(PROFILEOPT116) $(TRACEOPT) \
+ $(PROFILEOPT115) $(PROFILEOPT116) $(PROFILEOPT117) $(TRACEOPT) \
$(TAU_SOS_INCLUDE_OPTS) $(TAU_ADIOS_INCLUDE_OPTS) \
$(TAU_OTF2_INCLUDE_OPTS) $(TAU_CALIPER_INCLUDE_OPTS) \
$(TAU_CORESYMBOLICATION_INCLUDE_OPTS) $(TAU_ELF_BFD_PROFILEOPT) \
diff --git a/plugins/examples/Tau_plugin_adios2_trace.cpp b/plugins/examples/Tau_plugin_adios2_trace.cpp
index 147a2588e..0bcbe9c6e 100644
--- a/plugins/examples/Tau_plugin_adios2_trace.cpp
+++ b/plugins/examples/Tau_plugin_adios2_trace.cpp
@@ -1311,8 +1311,9 @@ int Tau_plugin_adios2_function_exit(Tau_plugin_event_function_exit_data_t* data)
static std::mutex timer_lock;
if (tau_plugin::thePluginOptions().env_periodic &&
!tau_plugin::thePluginOptions().env_one_file) {
- // is it time to write? (and not in an MPI or ADIOS call)
+ // is it time to write? (and thread 0, and not in an MPI or ADIOS call)
if (steady_clock::now() > next_write && !plugin_done &&
+ data->tid == 0 &&
strstr(data->timer_name, "MPI_") == NULL) {
bool mine = false;
// only let one thread do this
diff --git a/plugins/llvm/src/Instrument.cpp b/plugins/llvm/src/Instrument.cpp
index 4e862d265..02adc524a 100644
--- a/plugins/llvm/src/Instrument.cpp
+++ b/plugins/llvm/src/Instrument.cpp
@@ -122,6 +122,9 @@ TauDryRun("tau-dry-run",
+auto TauInitFunc = "Tau_init"; // arguments to pass: argc, argv
+auto TauSetNodeFunc = "Tau_set_node"; // argument to pass: 0
+
// Demangling technique borrowed/modified from
// https://github.com/eklitzke/demangle/blob/master/src/demangle.cc
static StringRef normalize_name(StringRef mangled_name) {
@@ -575,7 +578,38 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu
#endif // LLVM_VERSION_MAJOR <= 8
errs() << "Adding instrumentation in " << prettyname << '\n';
+ bool mutated = false; // TODO
+
+ /* Add TAU init in main */
+
+ if( 0 == prettyname.compare( "main" ) ){
+ errs() << "\tmain function: adding init\n";
+ auto initfun = getVoidFunc( TauInitFunc, context, module );
+ auto setnodefun = getVoidFunc( TauSetNodeFunc, context, module );
+
+ auto beg = inst_begin( &func );
+ Instruction* b = &*beg;
+ IRBuilder<> b4( b );
+
+ /* TauInitFunc takes two arguments: argc and argv */
+
+ SmallVector mainArgsVect;
+ for( Argument &arg : func.args() ){
+ mainArgsVect.push_back( &arg );
+ }
+ b4.CreateCall( initfun, mainArgsVect );
+
+ /* TauSetNodeFunc takes one argument: 0 */
+
+ Value* z = ConstantInt::get( context, llvm::APInt( 32, 0, false ) );
+ SmallVector zero{ z };
+ b4.CreateCall( setnodefun, zero );
+
+ mutated = true;
+ }
+ /* Add regular TAU calls */
+
std::string filename = getFilename( func );
std::string location( "[{" + getFilename( func ) + "} {" + getLineAndCol( func ) + "}]" );
@@ -584,8 +618,6 @@ static FunctionCallee getVoidFunc(StringRef funcname, LLVMContext &context, Modu
Instruction* i = &*pi;
IRBuilder<> before( i );
- bool mutated = false; // TODO
-
// This is the recommended way of creating a string constant (to be used
// as an argument to runtime functions)
Value *strArg = before.CreateGlobalStringPtr( ( prettyname + " " + location ).str() );
diff --git a/src/Profile/TauGenericAPI.cpp b/src/Profile/TauGenericAPI.cpp
index a7fc1d07a..8a95f07b3 100644
--- a/src/Profile/TauGenericAPI.cpp
+++ b/src/Profile/TauGenericAPI.cpp
@@ -87,7 +87,8 @@ void ps_tool_initialize(void) {
/* Disable throttling, because if users use ps_tool_stop_current(),
* throttling will cause Tau_start() to do nothing for throttled events,
* but Tau_global_stop() will stop the timer on the stop of the stack */
- TauEnv_set_throttle(0);
+ /* Then again, if someone calls ps_stop_current(), they get what they deserve */
+ // TauEnv_set_throttle(0);
Tau_create_top_level_timer_if_necessary();
}
diff --git a/src/wrappers/pthread/pthread_wrap.c b/src/wrappers/pthread/pthread_wrap.c
index 36b5b0923..f97615a29 100644
--- a/src/wrappers/pthread/pthread_wrap.c
+++ b/src/wrappers/pthread/pthread_wrap.c
@@ -141,11 +141,22 @@ int pthread_barrier_wait(pthread_barrier_t * barrier)
#else // Wrap via the the link line.
+#ifndef TAU_SUPPRESS_PTHREAD_CREATE_WRAPPER
int __real_pthread_create(pthread_t *, const pthread_attr_t *, start_routine_p, void *);
int __wrap_pthread_create(pthread_t * thread, const pthread_attr_t * attr, start_routine_p start_routine, void * arg)
{
return tau_pthread_create_wrapper(__real_pthread_create, thread, attr, start_routine, arg);
}
+/*
+#else
+int __real___wrap_pthread_create(pthread_t *, const pthread_attr_t *, start_routine_p, void *);
+int __wrap___wrap_pthread_create(pthread_t * thread, const pthread_attr_t * attr, start_routine_p start_routine, void * arg)
+{
+ printf("Inside __wrap___wrap_pthread_create\n");
+ return tau_pthread_create_wrapper(__real___wrap_pthread_create, thread, attr, start_routine, arg);
+}
+*/
+#endif /* TAU_WRAP_PTHREAD_CREATE */
int __real_pthread_join(pthread_t, void **);
int __wrap_pthread_join(pthread_t thread, void **retval)
diff --git a/utils/FixMakefile b/utils/FixMakefile
index 453bda5cc..07681b8f3 100755
--- a/utils/FixMakefile
+++ b/utils/FixMakefile
@@ -1688,6 +1688,10 @@ case $1 in
echo "NOTE: Using pthreads as the thread package. ***"
echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout
;;
+ SUPPRESS_PTHREAD_CREATE_WRAPPER)
+ echo "NOTE: Not generating the pthread_create wrapper"
+ echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout
+ ;;
TBB_AVAILABLE)
echo "NOTE: Using pthreads as the thread package for TBB support. ***"
echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout