diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..f432a2169 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.o +*.a +*.so +a.out diff --git a/Changes b/Changes index 8a52bf15f..135159344 100644 --- a/Changes +++ b/Changes @@ -49,6 +49,12 @@ Version 2.21 changes (from 2.20): 35. Reduced MPI overhead using a hash table, fixed BGP & BGQTIMERS. 36. Added support for ARM (arm_linux) and Intel MIC. 37. Added support for OTF2 (tau2otf2 -> tau2otf). +38. Added support for UPC wrapper generator with communication tracking. +39. Improved ParaProf 3D displays with wraparound torus configurations. +40. Added support for Fujitsu FX10. +41. Added support MPC. +42. Added support for LLVM. +43. Added support for TAU_LITE=1 for reduced overhead profiling. diff --git a/Makefile b/Makefile index d7260cb85..da6fcaa14 100644 --- a/Makefile +++ b/Makefile @@ -82,8 +82,13 @@ TAUCUDA=src/wrappers/cuda #CUPTI#TAU_CUPTI_AVAIL=utils/tau_cupti_avail #ENDIF# TAUOPENCL=src/wrappers/opencl MEMORYWRAPPER=src/wrappers/memory -#GASNET#TAU_UPCC_WRAPPER=src/wrappers/upc/bupc/upcr #ENDIF# -#UPCC#UPCWRAPPER=$(TAU_UPCC_WRAPPER) #ENDIF# + +#BUPC#TAU_UPCC_WRAPPER=src/wrappers/upc/bupc/upcr #ENDIF# +#BUPC#UPCWRAPPER=$(TAU_UPCC_WRAPPER) #ENDIF# + +#GUPC#TAU_UPCC_WRAPPER=src/wrappers/upc/gupc #ENDIF# +#GUPC#UPCWRAPPER=$(TAU_UPCC_WRAPPER) #ENDIF# + #COMPINST_CRAYCC#UPCWRAPPER=src/wrappers/upc/crayupc #ENDIF# #TAU_PTHREAD_WRAP#PTHREAD_WRAPPER=src/wrappers/pthread #ENDIF# diff --git a/configure b/configure index 24cf3ce16..259dca2ec 100755 --- a/configure +++ b/configure @@ -650,7 +650,7 @@ extrashlibopts= pthread=no papithread=no papipfm=no -gnu_upc=no +gupc=no papiperfctr=no papisubdir= charm=no @@ -807,7 +807,7 @@ for arg in "$@"; do -cc=*) myarg=`echo $arg | sed 's/-cc=//'` - if [ $myarg = cc -o $myarg = gcc -o $myarg = gcc4 -o $myarg = KCC -o $myarg = pgcc -o $myarg = guidec -o $myarg = xlc -o $myarg = ecc -o $myarg = icc -o $myarg = powerpc64-linux-gcc -o $myarg = pathcc -o $myarg = fcc -o $myarg = orcc -o $myarg = qk-pgcc -o $myarg = scgcc -o $myarg = scpathcc -o $myarg = mips64el-gentoo-linux-gnu-gcc -o $myarg = powerpc64-bgq-linux-gcc -o $myarg = powerpc-bgp-linux-gcc -o $myarg = x86_64-w64-mingw32-gcc -o $myarg = mpiicc -o $myarg = mpifcc -o $myarg = mpifccpx -o $myarg = mpc_cc ] + if [ $myarg = cc -o $myarg = gcc -o $myarg = gcc4 -o $myarg = KCC -o $myarg = pgcc -o $myarg = guidec -o $myarg = xlc -o $myarg = ecc -o $myarg = icc -o $myarg = powerpc64-linux-gcc -o $myarg = pathcc -o $myarg = fcc -o $myarg = orcc -o $myarg = opencc -o $myarg = qk-pgcc -o $myarg = scgcc -o $myarg = scpathcc -o $myarg = mips64el-gentoo-linux-gnu-gcc -o $myarg = powerpc64-bgq-linux-gcc -o $myarg = powerpc-bgp-linux-gcc -o $myarg = x86_64-w64-mingw32-gcc -o $myarg = mpiicc -o $myarg = mpifcc -o $myarg = mpifccpx -o $myarg = clang -o $myarg = bgclang -o $myarg = mpc_cc -o $myarg = mpicc ] then c_compiler=$myarg else @@ -815,7 +815,7 @@ for arg in "$@"; do testmp=`echo $myarg | sed -e 's/^mp//'` if [ "y$testxlc" = "y$myarg" -a "y$testmp" = "y$myarg" ] then - echo "WARNING: valid options for cc are 'cc', 'gcc', 'gcc4', 'KCC', 'guidec', '*xlc*', 'ecc', 'icc', 'pathcc', 'fcc', 'orcc', 'qk-pgcc', 'powerpc64-bgq-linux-gcc', 'powerpc-bgp-linux-gcc', 'x86_64-w64-mingw32-gcc', 'mpiicc', and 'pgcc' " + echo "WARNING: valid options for cc are 'cc', 'gcc', 'gcc4', 'KCC', 'guidec', '*xlc*', 'ecc', 'icc', 'pathcc', 'fcc', 'orcc', 'opencc', 'qk-pgcc', 'powerpc64-bgq-linux-gcc', 'powerpc-bgp-linux-gcc', 'x86_64-w64-mingw32-gcc', 'mpiicc', 'pgcc' , 'mpc_cc', 'mpifcc', 'mpifccpx', 'clang', 'bgclang', and 'mpicc'" # exit 1 else if [ "y$testxlc" != "y$myarg" ] @@ -831,7 +831,7 @@ for arg in "$@"; do -c++=*) myarg=`echo $arg | sed 's/-c++=//'` - if [ $myarg = CC -o $myarg = KCC -o $myarg = g++ -o $myarg = g++4 -o $myarg = cxx -o $myarg = NCC -o $myarg = pgCC -o $myarg = pgcpp -o $myarg = egcs -o $myarg = FCC -o $myarg = guidec++ -o $myarg = aCC -o $myarg = c++ -o $myarg = ecpc -o $myarg = icpc -o $myarg = powerpc64-linux-g++ -o $myarg = pathCC -o $myarg = orCC -o $myarg = qk-pgCC -o $myarg = scg++ -o $myarg = scpathCC -o $myarg = mips64el-gentoo-linux-gnu-g++ -o $myarg = powerpc-bgp-linux-g++ -o $myarg = powerpc64-bgq-linux-g++ -o $myarg = mpicxx -o $myarg = x86_64-w64-mingw32-g++ -o $myarg = mpiicpc -o $myarg = mpiFCC -o $myarg = mpiFCCpx -o $myarg = mpc_cxx ] + if [ $myarg = CC -o $myarg = KCC -o $myarg = g++ -o $myarg = g++4 -o $myarg = cxx -o $myarg = NCC -o $myarg = pgCC -o $myarg = pgcpp -o $myarg = egcs -o $myarg = FCC -o $myarg = guidec++ -o $myarg = aCC -o $myarg = c++ -o $myarg = ecpc -o $myarg = icpc -o $myarg = powerpc64-linux-g++ -o $myarg = pathCC -o $myarg = orCC -o $myarg = openCC -o $myarg = qk-pgCC -o $myarg = scg++ -o $myarg = scpathCC -o $myarg = mips64el-gentoo-linux-gnu-g++ -o $myarg = powerpc-bgp-linux-g++ -o $myarg = powerpc64-bgq-linux-g++ -o $myarg = mpicxx -o $myarg = x86_64-w64-mingw32-g++ -o $myarg = mpiicpc -o $myarg = mpiFCC -o $myarg = mpiFCCpx -o $myarg = mpc_cxx -o $myarg = clang++ -o $myarg = bgclang++ -o $myarg = mpicxx ] then cxx_compiler=$myarg else @@ -839,7 +839,7 @@ for arg in "$@"; do testmp=`echo $myarg | sed -e 's/^mp//'` if [ "y$testxlc" = "y$myarg" -a "y$testmp" = "y$myarg" ] then - echo "WARNING: valid options for c++ are 'CC', 'KCC', 'g++', 'g++4', '*xlC*', 'cxx' , 'NCC', 'egcs', 'pgCC', 'pgcpp', 'FCC', 'guidec++', 'aCC', 'ecpc', 'icpc', 'pathCC', 'c++', 'qk-pgCC', 'x86_64-w64-mingw32-g++', 'mpiicpc', 'mpiFCC', 'mpiFCCpx', and 'orCC'" + echo "WARNING: valid options for c++ are 'CC', 'KCC', 'g++', 'g++4', '*xlC*', 'cxx' , 'NCC', 'egcs', 'pgCC', 'pgcpp', 'FCC', 'guidec++', 'aCC', 'ecpc', 'icpc', 'pathCC', 'c++', 'qk-pgCC', 'x86_64-w64-mingw32-g++', 'mpiicpc', 'mpiFCC', 'mpiFCCpx', 'orCC', 'openCC', 'mpc_cxx', 'clang++', 'bgclang++', and 'mpicxx' " # exit 1 else if [ "y$testxlc" != "y$myarg" ] @@ -863,14 +863,14 @@ for arg in "$@"; do -fortran=*) myarg=`echo $arg | sed 's/-fortran=//'` - if [ $myarg = gnu -o $myarg = sgi -o $myarg = ibm -o $myarg = ibm64 -o $myarg = hp -o $myarg = cray -o $myarg = pgi -o $myarg = absoft -o $myarg = fujitsu -o $myarg = sun -o $myarg = compaq -o $myarg = kai -o $myarg = hitachi -o $myarg = intel -o $myarg = nec -o $myarg = absoft -o $myarg = lahey -o $myarg = nagware -o $myarg = nag -o $myarg = pathscale -o $myarg = gfortran -o $myarg = gfortran4 -o $myarg = g95 -o $myarg = open64 -o $myarg = mpiifort -o $myarg = mpifrtpx -o $myarg = mpifrt -o $myarg = frt ] ; then + if [ $myarg = gnu -o $myarg = sgi -o $myarg = ibm -o $myarg = ibm64 -o $myarg = hp -o $myarg = cray -o $myarg = pgi -o $myarg = absoft -o $myarg = fujitsu -o $myarg = sun -o $myarg = compaq -o $myarg = kai -o $myarg = hitachi -o $myarg = intel -o $myarg = nec -o $myarg = absoft -o $myarg = lahey -o $myarg = nagware -o $myarg = nag -o $myarg = pathscale -o $myarg = gfortran -o $myarg = gfortran4 -o $myarg = g95 -o $myarg = open64 -o $myarg = openf90 -o $myarg = mpiifort -o $myarg = mpifrtpx -o $myarg = mpifrt -o $myarg = frt -o $myarg = mpif90 ] ; then if [ $myarg = gnu ] ; then fortran_compiler=gfortran else fortran_compiler=$myarg fi else - echo "WARNING: valid options for fortran are 'gnu', 'sgi', 'ibm', 'ibm64', 'hp', 'cray', 'pgi', 'absoft', 'fujitsu', 'sun', 'compaq', 'kai', 'hitachi', 'intel', 'nec' , 'absoft', 'lahey', 'nag/nagware', 'pathscale', 'gfortran', 'g95', 'open64', 'mpiifort', 'mpifrtpx', 'frtpx', 'frt', and 'mpifrt' " + echo "WARNING: valid options for fortran are 'gnu', 'sgi', 'ibm', 'ibm64', 'hp', 'cray', 'pgi', 'absoft', 'fujitsu', 'sun', 'compaq', 'kai', 'hitachi', 'intel', 'nec' , 'absoft', 'lahey', 'nag/nagware', 'pathscale', 'gfortran', 'g95', 'open64', 'openf90', 'mpiifort', 'mpifrtpx', 'frtpx', 'frt', 'mpifrt', or 'mpif90' " exit 1 fi shift @@ -1826,7 +1826,7 @@ for arg in "$@"; do echo "Error: Cannot access GPU include directory $cudainclude" exit 1 fi - useropt="$useropt#-DTAU_MAX_THREADS=32" +# useropt="$useropt#-DTAU_MAX_THREADS=32" cuda=yes shift ;; @@ -1848,7 +1848,7 @@ for arg in "$@"; do exit 1 fi if [ -f "$openclinclude/CL/cl.h" ] ; then - useropt="$useropt#-DTAU_MAX_THREADS=32" +# useropt="$useropt#-DTAU_MAX_THREADS=32" fixmakeargs="$fixmakeargs OPENCL TAU_USE_GPU openclinclude=$openclinclude" else echo "Error: Cannot find GPU headers, TAU currently supports either CUDA or OpenCL" @@ -1899,9 +1899,20 @@ for arg in "$@"; do esac done - - - +if [ "x$upcnetwork" == "x" ] ; then + if [ "$mpi" == "no" ] ; then + echo "NOTE: UPC network and MPI not specified. Assuming -upcnetwork=smp." + upcnetwork=smp + else + #echo "NOTE: UPC network not specified. Assuming -upcnetwork=mpi." + upcnetwork=mpi + fi +else + if [ "$upcnetwork" == mpi ] && [ "$mpi" == "no" ] ; then + echo "NOTE: Assuming -mpi because -upcnetwork=mpi." + mpi=yes + fi +fi # -- some functions @@ -2547,13 +2558,13 @@ case $machine in elif [ $cxx_compiler = mpicxx ] ; then cxx_compiler=mpicxx - c_compiler=mpicc - gnu=yes - if [ $machine = bgp ]; then - fixmakeargs="$fixmakeargs GNU BGP_GFORTRAN COMPINST_GNU" - else - fixmakeargs="$fixmakeargs GNU BGQ_GFORTRAN COMPINST_GNU GNU46PLUS" - fi +# c_compiler=mpicc +# gnu=yes +# if [ $machine = bgp ]; then +# fixmakeargs="$fixmakeargs GNU BGP_GFORTRAN COMPINST_GNU" +# else +# fixmakeargs="$fixmakeargs GNU BGQ_GFORTRAN COMPINST_GNU GNU46PLUS" +# fi fi @@ -2745,6 +2756,10 @@ case $machine in esac +if [ $cxx_compiler = clang++ -o $cxx_compiler = bgclang++ ]; then + fixmakeargs="$fixmakeargs CLANG" +fi + if [ "$machine" = "apple" ] ; then mkdir -p apple/lib cp tools/src/common/resources/tau-medium.png apple/lib/ @@ -3064,6 +3079,88 @@ if [ $debugprof = yes ] ; then fi +if [ $cxx_compiler = mpicxx ] +then + cxxloaded=`which mpicxx 2>/dev/null | wc -l` + mpi=yes + if [ $cxxloaded = 1 ] + then + c_compiler=`mpicc -show | awk '{ print $1;}' | sed -e 's@/.*/@@g'` + cxx_compiler=`mpicxx -show | awk '{ print $1;}' | sed -e 's@/.*/@@g'` + f90loaded=`which mpif90 2>/dev/null | wc -l` + if [ $f90loaded = 1 ] + then + fortran_compiler=`mpif90 -show | awk '{ print $1;}' | sed -e 's@/.*/@@g'` + if [ $fortran_compiler = ifort ]; then + fortran_compiler=intel + fi + else + fortran_compiler=no + fi + + echo "NOTE: Found mpicxx in the path" + if [ $machine = bgq -o $machine = bgp ]; then + c_compiler=mpicc + cxx_compiler=mpicxx + else +# All other machines besides IBM BG* + is_internal_compiler_loaded=`which $cxx_compiler 2>/dev/null | wc -l` + if [ $is_internal_compiler_loaded = 0 ]; then + # Compiler is not loaded, use full path to it + c_compiler=`mpicc -show | awk '{ print $1;}' ` + cxx_compiler=`mpicxx -show | awk '{ print $1;}' ` + fortran_compiler=`mpif90 -show | awk '{ print $1;}' ` + fi + if [ $mpilibrary = no ]; then + mpilibrary=`mpicc -show | cut -d' ' -f2- | sed -e 's@ @#@g'` + fi + full_cxx_compiler=`mpicxx -show | awk '{ print $1; }'` + full_cc_compiler=`mpicc -show | awk '{ print $1; }'` + fixmakeargs="$fixmakeargs fullcxx=$full_cxx_compiler fullcc=$full_cc_compiler" + mpi=yes + fi +# Do not use the internal compiler from mpicxx when BGP/BGQ is used. + else + echo "NOTE: Didn't find mpicxx in the path, using default values" + gnu=yes + tauoptions="${tauoptions}-gnu" + if [ $machine = bgq ] ; then + if [ -r /bgsys/drivers/ppcfloor/comm/gcc/bin/mpicxx -a -r /bgsys/drivers/ppcfloor/comm/gcc/bin/mpicc ] ; then + echo "NOTE: Using /bgsys/drivers/ppcfloor/comm/gcc/bin/mpicxx" + cxx_compiler=/bgsys/drivers/ppcfloor/comm/gcc/bin/mpicxx + c_compiler=/bgsys/drivers/ppcfloor/comm/gcc/bin/mpicc + else + echo "ERROR: Please set your path to include mpicxx and reconfigure TAU." + exit 1 + fi + elif [ $machine = bgp ]; then + if [ -r /soft/apps/gcc-4.3.2/comm/default/bin/mpicxx -a -r /soft/apps/gcc-4.3.2/comm/default/bin/mpicc ]; then + echo "NOTE: Using /soft/apps/gcc-4.3.2/comm/default/bin/mpicxx" + cxx_compiler=/soft/apps/gcc-4.3.2/comm/default/bin/mpicxx + c_compiler=/soft/apps/gcc-4.3.2/comm/default/bin/mpicc + else + echo "ERROR: Please set your path to include mpicxx and reconfigure TAU." + exit 1 + fi + fi + + fi +# Check for BG/P or BG/Q for GNU +# is_gnu_compiler=`$cxx_compiler -show 2>/dev/null | awk ' { print $1;}' | xargs grep g++ | wc -l' +# if [ "x$is_gnu_compiler" = "x1" ]; then +# gnu=yes +# if [ $machine = bgp ]; then +# fixmakeargs="$fixmakeargs GNU BGP_GFORTRAN COMPINST_GNU" +# tauoptions="${tauoptions}-gnu" +# elif [ $machine = bgq ]; then +# fixmakeargs="$fixmakeargs GNU BGQ_GFORTRAN COMPINST_GNU GNU46PLUS" +# NOTE: FOUND GNU configuration" +# tauoptions="${tauoptions}-gnu" +# fi +# fi + +fi + ###################################################################### # Set default C++ compiler in all Makefiles case $cxx_compiler in @@ -3071,6 +3168,14 @@ case $cxx_compiler in echo "Default C++ compiler will be CC" fixmakeargs="$fixmakeargs USE_CFRONT" ;; + *llvm*|*clang*) + echo "Using CLANG Compilers" + fixmakeargs="$fixmakeargs CLANG" + if [ $machine = apple ]; then +# MPI on Apple OS X has issues with shared objects + fixmakeargs="$fixmakeargs NOSHARED APPLECXX" + fi + ;; *g++) echo "Default C++ compiler will be " \ @@ -3211,6 +3316,9 @@ case $cxx_compiler in FCC|mpiFCCpx|mpiFCC) echo "Default C++ compiler will be Fujitsu C++ Compiler" fujitsu=yes + orig_useropt="-O2" + useropt="-O2" +# -g interferes with optimizations for Fujitsu. fixmakeargs="$fixmakeargs FX" #mpiinc=`which mpiFCCpx | sed -e 's/\/bin\/mpiFCCpx$//'`/include mpiinc=`which mpiFCCpx | sed -e 's/\/bin\/mpiFCCpx$//'`/include/mpi/fujitsu @@ -3265,10 +3373,10 @@ case $cxx_compiler in fi ;; - orCC) + openCC|orCC) echo "Default C++ compiler will ORC Open64 orCC compiler" fixmakeargs="$fixmakeargs OPEN64ORC" - tauoptions="${tauoptions}-orcc" + tauoptions="${tauoptions}-open64" if [ $c_compiler = default ] then c_compiler=orcc @@ -3407,25 +3515,41 @@ case $cxx_compiler in fi ;; - mpicxx) - if [ $machine = bgq ] ; then - echo "Using GNU Compilers for BGQ" - cxx_compiler=/bgsys/drivers/ppcfloor/comm/gcc/bin/mpicxx - c_compiler=/bgsys/drivers/ppcfloor/comm/gcc/bin/mpicc - tauoptions="${tauoptions}-gnu" - elif [ $machine = bgp ] ; then - echo "Using GNU Compilers for BGP" -# CWL - 4.3.2 won't quite work out of the box. Requires LD_LIBRARY_PATH to -# be set appropriately to account for libgmp.so.10 -# cxx_compiler=/soft/apps/gcc-4.3.2/comm/default/bin/mpicxx -# c_compiler=/soft/apps/gcc-4.3.2/comm/default/bin/mpicc - cxx_compiler=/bgsys/drivers/ppcfloor/comm/xl/bin/mpicxx - c_compiler=/bgsys/drivers/ppcfloor/comm/xl/bin/mpicc - tauoptions="${tauoptions}-gnu" - else - echo "ERROR Unknown C++ compiler: $cxx_compiler" - exit 1 - fi + *mpicxx) + internal_compiler=`$cxx_compiler -show | awk '{ print $1;}'` + case $internal_compiler in + *g++) + if [ $machine = bgq ] ; then + echo "Using GNU Compilers for BGQ" + fixmakeargs="$fixmakeargs GNU BGQ_GFORTRAN COMPINST_GNU GNU46PLUS" + elif [ $machine = bgp ] ; then + echo "Using GNU Compilers for BGP" + fixmakeargs="$fixmakeargs GNU BGP_GFORTRAN COMPINST_GNU" + else + echo "ERROR Unknown C++ compiler: $cxx_compiler" + fi + ;; + *xlC*) + echo "Using IBM Compilers" + ibmxlc=yes + fixmakeargs="$fixmakeargs USE_IBMXLC COMPINST_XL" + extradir=`which xlf90 | sed s/xlf90/../` + fixmakeargs="$fixmakeargs IBM_FORTRAN" + + ;; + *clang*) + echo "Using CLANG Compilers" + fixmakeargs="$fixmakeargs CLANG" + if [ $machine = apple ]; then +# MPI on Apple OS X has issues with shared objects + fixmakeargs="$fixmakeargs NOSHARED" +echo "HIIII -> fixmakeargs = $fixmakeargs" + fi + ;; + *) + echo "ERROR Unknown C++ compiler in mpicxx: $cxx_compiler" + ;; + esac ;; *) echo "ERROR Unknown C++ compiler: $cxx_compiler" @@ -3588,7 +3712,7 @@ case $fortran_compiler in guidef90=yes ;; - open64) + open64|openf90) echo "Default Fortran compiler will be ORC Open64 orf90" fixmakeargs="$fixmakeargs OPEN64ORC_FORTRAN" ;; @@ -3621,6 +3745,13 @@ if [ "x$full_cxx_compiler" = "x" ] then full_cxx_compiler=`which $cxx_compiler` full_cc_compiler=`which $c_compiler` + if [ $fujitsu = yes ] + then +# Fujitsu does not want full path to point to the absolute paths due to +# frequent updates to the compiler on that system. + full_cxx_compiler=$cxx_compiler + full_cc_compiler=$c_compiler + fi fi # This takes care of mpc_cxx setting this above if [ $shmem = yes ] @@ -3746,20 +3877,26 @@ then *) if [ $mpc = yes ] then - mpiinc=`which mpcrun | sed -e 's/\/bin\/mpcrun$//'`/include + mpiinc=`which mpcrun | sed -e 's/\/bin\/mpcrun$//' -e 's/\/bin64\/mpcrun$//'`/include else - mpiinc=`which mpirun | sed -e 's/\/bin\/mpirun$//'`/include - if [ -d $mpiinc ] + mpiinc=`which mpirun | sed -e 's/\/bin\/mpirun$//' -e 's/\/bin64\/mpcrun$//'`/include + if [ -d $mpiinc -a -r $mpiinc/mpi.h ] then echo "Using MPI include directory $mpiinc" echo "machine is $machine" else - mpiinc=`which mpiexec | sed -e 's/\/bin\/mpiexec$//'`/include - if [ -d $mpiinc ] + mpiinc=`which mpiexec | sed -e 's/\/bin\/mpiexec$//' -e 's/\/bin64\/mpiexec$//'`/include + if [ -d $mpiinc -a -r $mpiinc/mpi.h ] then echo "Using MPI include directory $mpiinc" + else + mpiinc="" fi fi + if [ ! -r $mpiinc/mpi.h -a -r $mpiinc/mpich2/mpi.h ] ; then + mpiinc=$mpiinc/mpich2 + echo "Correction: Using MPI include directory $mpiinc instead" + fi fi ;; esac @@ -3883,13 +4020,15 @@ then then mpilib=`which mpcrun | sed -e 's/\/bin\/mpcrun$//'`/lib else - mpilib=`which mpirun | sed -e 's/\/bin\/mpirun$//'`/lib + mpilib=`which mpirun | sed -e 's/\/bin\/mpirun$//' -e 's/\/bin64\/mpirun$//'`/lib if [ ! -d $mpilib ] ; then # Try the ".../lib64" directory instead of ".../lib" - mpilib=`which mpirun | sed -e 's/\/bin\/mpirun$//'`/lib64 + mpilib=`which mpirun | sed -e 's/\/bin\/mpirun$//' -e 's/\/bin64\/mpirun$//'`/lib64 fi if [ -d $mpilib ] ; then echo "Using MPI lib directory $mpilib" + else + mpilib="" fi fi ;; @@ -4327,7 +4466,7 @@ fi if [ $upc_compiler = gcc -o $upc_compiler = upc ] then - gnu_upc=yes + gupc=yes fi if [ $c_compiler != gcc4 ] then @@ -5189,6 +5328,9 @@ EOF echo "yes" if [ -r $mpilib/libmpi.so -a -r $mpilib/libmpi++.so -a -r $mpilib/libsma.so -a -r $mpilib/libxmpi.so ] ; then # Don't enable this for SGI Altix. Their MPI2 is not complete + # SGI MPI2 is ok now. We can enable it. Perhaps... + useropt="$useropt#-DTAU_MPI_F_STATUSES_IGNORE_ABSENT" + fixmakeargs="$fixmakeargs MPI2 useropt=$useropt" echo "SGI MPI detected. Disabling MPI2 wrappers for SGI MPI" elif [ -r $mpilib/libfmpi.so -a -r $mpilib/libmpio.so -a -r $mpilib/libtvscampi.so ] ; then # Scali does not use MPI2 PMPI interface properly. @@ -6667,31 +6809,60 @@ if [ $vampirtrace = yes ] ; then fi fi +#if [ $scorep = yes ] ; then +# fixmakeargs="$fixmakeargs SCOREP scorepdir=$scorepdir" +# tauoptions="${tauoptions}-scorep" +# scorepCommonLibs="-lbfd#-liberty#-lotf2#-lscorep_utilities" +# if [ $mpi = yes ] ; then +# if [ $openmp = yes ] ; then +# fixmakeargs="$fixmakeargs SCOREPOMPI" +# scorepLibs="-L${scorepdir}/lib#-lscorep_mpi#$scorepCommonLibs" +# else +# fixmakeargs="$fixmakeargs SCOREPMPI" +# scorepLibs="-L${scorepdir}/lib#-lscorep_mpi#$scorepCommonLibs" +# fi +# else +# if [ $openmp = yes ] ; then +# fixmakeargs="$fixmakeargs SCOREPOMP" +# scorepLibs="-L${scorepdir}/lib#-lscorep_serial#$scorepCommonLibs" +# else +# scorepLibs="-L${scorepdir}/lib#-lscorep_serial#$scorepCommonLibs" +# fi +# fi + +# fixmakeargs="$fixmakeargs scorepmpilibs=$scorepLibs" +# fixmakeargs="$fixmakeargs scoreplibs=$scorepLibs" +# fixmakeargs="$fixmakeargs scorepincs=-I$scorepdir/include#-I$scorepdir/include/scorep" + + if [ $scorep = yes ] ; then fixmakeargs="$fixmakeargs SCOREP scorepdir=$scorepdir" tauoptions="${tauoptions}-scorep" - scorepCommonLibs="-lbfd#-liberty#-lotf2#-lscorep_utilities" if [ $mpi = yes ] ; then if [ $openmp = yes ] ; then fixmakeargs="$fixmakeargs SCOREPOMPI" - scorepLibs="-L${scorepdir}/lib#-lscorep_mpi#$scorepCommonLibs" + scorepLibs=`${scorepdir}/bin/scorep-config --hyb --libs` else fixmakeargs="$fixmakeargs SCOREPMPI" - scorepLibs="-L${scorepdir}/lib#-lscorep_mpi#$scorepCommonLibs" + scorepLibs=`${scorepdir}/bin/scorep-config --mpi --libs` fi else if [ $openmp = yes ] ; then fixmakeargs="$fixmakeargs SCOREPOMP" - scorepLibs="-L${scorepdir}/lib#-lscorep_serial#$scorepCommonLibs" + scorepLibs=`${scorepdir}/bin/scorep-config --omp --libs` else - scorepLibs="-L${scorepdir}/lib#-lscorep_serial#$scorepCommonLibs" + scorepLibs=`${scorepdir}/bin/scorep-config --seq --libs` fi fi - + scorepLibs=`echo $scorepLibs | sed "s/ /#/g"` + scorepLibs=`echo $scorepLibs | sed "s/,//g"` +echo $scorepLibs fixmakeargs="$fixmakeargs scorepmpilibs=$scorepLibs" fixmakeargs="$fixmakeargs scoreplibs=$scorepLibs" fixmakeargs="$fixmakeargs scorepincs=-I$scorepdir/include#-I$scorepdir/include/scorep" + + if [ $phase = yes ] ; then echo "********scorep and phase are on" fi @@ -6804,19 +6975,20 @@ if [ $upc_compiler = cc -o $upc_compiler = default ]; then fi if [ $upc = yes ]; then - tauoptions="${tauoptions}-upc" # If MPI is used -mpi already appears in the name. If it is not, add it - if [ "x$upcnetwork" != "xmpi" -a $upc_compiler = upcc ]; then + if [ "x$upcnetwork" != "xmpi" ]; then tauoptions="${tauoptions}-$upcnetwork" fi - if [ $upc_compiler = upcc ] - then - fixmakeargs="$fixmakeargs UPCC upcnetwork=$upcnetwork" - fi - if [ $upc_compiler = gcc -o $upc_compiler = upc ]; then - tauoptions="${tauoptions}-gnuupc" - fixmakeargs="$fixmakeargs GNU_UPC" + if [ "$upc_compiler" == "upcc" ] ; then + tauoptions="${tauoptions}-bupc" + fixmakeargs="$fixmakeargs BUPC upcnetwork=$upcnetwork" + elif [ $upc_compiler = gcc -o $upc_compiler = upc ]; then + tauoptions="${tauoptions}-gupc" + fixmakeargs="$fixmakeargs GUPC upcnetwork=$upcnetwork" + else + # default case: probably Cray upc + tauoptions="${tauoptions}-upc" fi fi diff --git a/examples/upc/Makefile b/examples/upc/Makefile index e92e39ef1..7cccb4329 100644 --- a/examples/upc/Makefile +++ b/examples/upc/Makefile @@ -1,4 +1,5 @@ -UPC=tau_upc.sh -tau_makefile=../../include/Makefile +UPC=tau_upc.sh -tau_makefile=../../include/Makefile -optTrackUPCR -optTauSelectFile=select.tau # +#-g -optCompInst APP=CAMEL # APP=upctest diff --git a/examples/upc/key b/examples/upc/key new file mode 100644 index 000000000..5b1c9a7ce --- /dev/null +++ b/examples/upc/key @@ -0,0 +1 @@ +0x4925e232 diff --git a/include/Makefile b/include/Makefile index c4c013c7d..048138295 100644 --- a/include/Makefile +++ b/include/Makefile @@ -180,21 +180,25 @@ F90_ABI = $(ABI) ############################################## # UPC Flags ############################################## -#UPCC#UPCC_C_PREFIX=-Wc,#ENDIF# -#UPCC#UPCC_LINK_PREFIX=-Wl,#ENDIF# -#UPCC#TAU_UPCC= upcc $(UPCC_C_PREFIX)-fPIC "-inst-toolname=gasp-tau" -inst -network=$(UPCNETWORK) #ENDIF# -#GNU_UPC#TAU_UPCC= upc $(UPCC_C_PREFIX)-fPIC -inst #ENDIF# - -#UPCC#TAU_GASP_O=TauGASP.o #ENDIF# -#UPCC#TAU_GASPU_O=TauGASPU.o #ENDIF# -#GNU_UPC#TAU_GASP_O=TauGASP.o #ENDIF# -#GNU_UPC#TAU_GASPU_O=TauGASPU.o #ENDIF# + +#BUPC#UPCC_C_PREFIX=-Wc,#ENDIF# +#BUPC#UPCC_LINK_PREFIX=-Wl,#ENDIF# +#BUPC#TAU_UPCC=upcc -inst-toolname=gasp-tau -inst -network=$(UPCNETWORK) $(UPCC_C_PREFIX)-fPIC #ENDIF# +#BUPC#TAU_GASP_O=TauGASP.o #ENDIF# +#BUPC#TAU_GASPU_UPC=$(TAU_INC_DIR)/Profile/TauGASPU.upc #ENDIF# + +#GUPC#UPCC_C_PREFIX= #ENDIF# +#GUPC#UPCC_LINK_PREFIX=-Wl,#ENDIF# +#GUPC#TAU_UPCC=upc -fupc-instrument -fupc-debug -fPIC #ENDIF# +#GUPC#TAU_GASP_O=TauGASP.o #ENDIF# +#####GUPC#TAU_GASPU_UPC=$(TAU_INC_DIR)/Profile/TauGASPU.upc #ENDIF# + ############# Standard Defines ############## TAU_CC = $(CONFIG_CC) $(ABI) $(ISA) TAU_CXX = $(CONFIG_CXX) $(ABI) $(ISA) TAU_RUN_CC = $(FULL_CC) $(ABI) $(ISA) -#UPCC#TAU_RUN_CC = $(TAU_UPCC) #ENDIF# -#GNU_UPC#TAU_RUN_CC = $(TAU_UPCC) #ENDIF# +#BUPC#TAU_RUN_CC = $(TAU_UPCC) #ENDIF# +#GUPC#TAU_RUN_CC = $(TAU_UPCC) #ENDIF# TAU_RUN_CXX = $(FULL_CXX) $(ABI) $(ISA) TAU_INSTALL = /bin/cp TAU_SHELL = /bin/sh @@ -252,6 +256,7 @@ TAU_OPENMP_OPTION = #COMPAQCXX_OPENMP#TAU_OPENMP_OPTION = -omp #ENDIF# #IBMXLC_OPENMP#TAU_OPENMP_OPTION = -qsmp=omp -qthreaded #ENDIF# #OPEN64_OPENMP#TAU_OPENMP_OPTION = -mp #ENDIF# +#OPEN64_OPENMP#TAU_OPARI2_OPTION = --nodecl #ENDIF# #GUIDE#TAU_OPENMP_OPTION = #ENDIF# #PGIOPENMP#TAU_OPENMP_OPTION = -mp #ENDIF# #INTELOPENMP#TAU_OPENMP_OPTION = -openmp #ENDIF# @@ -307,6 +312,7 @@ TAU_OPENMP_OPTION = #PATHSCALE_FORTRAN#TAU_F90 = pathf90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #SC_PATHSCALE#TAU_F90 = scpathf95 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #OPEN64ORC_FORTRAN#TAU_F90 = orf90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#OPEN64ORC_FORTRAN#TAU_F90 = openf90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #NEC_FORTRAN#TAU_F90 = f90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #CRAYCNL#TAU_F90 = ftn $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #ZEPTO_GFORTRAN#TAU_F90 = /bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-gfortran $(F90_ABI) $(TAU_F90_OPT) #ENDIF# @@ -341,10 +347,11 @@ TAU_F77 = $(TAU_F90) #NORTTI#PROFILEOPT9 = -DNO_RTTI #ENDIF# #RTTI#PROFILEOPT9 = -DRTTI #ENDIF# #USE_CC#PROFILEOPT10 = -DTAU_DOT_H_LESS_HEADERS #ENDIF# +#CLANG#PROFILEOPT10 = -DTAU_CLANG -DTAU_DOT_H_LESS_HEADERS -fPIC #ENDIF# #CRAYCNL_SHARED#PROFILEOPT10 = -DTAU_DOT_H_LESS_HEADERS -fPIC #ENDIF# #GNU#PROFILEOPT10 = -DTAU_GNU -DTAU_DOT_H_LESS_HEADERS #ENDIF# #MPC#PROFILEOPT10 = -DTAU_MPC -DTAU_DOT_H_LESS_HEADERS -DTAU_MPI_F_STATUSES_IGNORE_ABSENT -fPIC #ENDIF# -#INTEL12FIX#PROFILEOPT10 = -DTAU_INTEL12 #ENDIF# +#INTEL12FIX#PROFILEOPT10 = -DTAU_INTEL12 -DTAU_DOT_H_LESS_HEADERS #ENDIF# #CRAYXMT#PROFILEOPT10 = -DTAU_CRAYXMT -DTAU_DOT_H_LESS_HEADERS #ENDIF# #APPLECXX#PROFILEOPT10 = -DTAU_GNU -DTAU_DOT_H_LESS_HEADERS -fPIC #ENDIF# #SOL2CC#PROFILEOPT10 = -DTAU_SOL2CC -DTAU_DOT_H_LESS_HEADERS #ENDIF# @@ -517,6 +524,7 @@ SDDF_DIR= #For F90 support for all platforms FWRAPPER = TauFMpi.o #MPI2#MPI2EXTENSIONS = TauMpiExtensions.o #ENDIF# +#APPLECXX#MPI2EXTENSIONS = #ENDIF# #CRAYX1CC#MPI2EXTENSIONS = #ENDIF# #SGICOUNTERS#LEXTRA = -lperfex #ENDIF# @@ -669,6 +677,7 @@ TAU_LINKER_OPT2 = $(LEXTRA) #PATHSCALE_FORTRAN#TAU_CXXLIBS = -lstdc++ #ENDIF# #LAHEY_FORTRAN#TAU_CXXLIBS = $(TAU_GNUCXXLIBS) -lc #ENDIF# #NAGWARE_FORTRAN#TAU_CXXLIBS = $(TAUGCCSTDCXXLIBDIR)/libstdc++.a #ENDIF# +#CLANG#TAU_CXXLIBS = -lstdc++ -ldl #ENDIF# #PGI#TAU_CXXLIBS = -lstd -lC #ENDIF# #APPLEPGI#TAU_CXXLIBS = -lstd -lcpp #ENDIF# #PGI_FORTRAN#TAU_CXXLIBS_PGI_CNL = -L$(EXTRADIR)/lib -lstd -lC -lstdc++ #ENDIF# @@ -709,6 +718,11 @@ TAU_SGI_INIT = /usr/lib32/c++init.o #FUJITSU_SOLARIS#TAU_CXXLIBS = -lstd -lstdm #ENDIF# #FX#TAU_CXXLIBS = -lmpi_cxx -lstdc++ -lstd_mt -lm #ENDIF# #FX#TAU_CXXLIBS = -lmpi_cxx -Ntl_notrt -lmpi_f90 -lmpi_f77 --linkfortran #ENDIF# + +#FX#TAU_CXXLIBS = -Ntl_notrt -lmpi_f90 -lmpi_f77 #ENDIF# +# NOTE: No -lmpi_cxx and --linkfortran for Fujitsu by default. +# Please add manually if needed + #PPC64XLC#TAU_PPC64_LIBS = -L$(EXTRADIRCXX)/$(TAU_FORLIBDIR) -libmc++ -lstdc++ #ENDIF# #IBM64LINUX_XLC#TAU_IBM64LINUX_LIBS = -L$(EXTRADIRCXX)/$(TAU_FORLIBDIR) -libmc++ -lstdc++ #ENDIF# #USE_IBMXLC#TAU_XLCLIBS = $(TAU_IBM64LINUX_LIBS) $(TAU_PPC64_LIBS) #ENDIF# @@ -728,6 +742,7 @@ TAU_SGI_INIT = /usr/lib32/c++init.o #INTELCXXLIBICC#TAU_CXXLIBS_INTEL = -lcprts #ENDIF# #USE_INTELCXX#TAU_CXXLIBS = $(TAU_CXXLIBS_INTEL) #ENDIF# #APPLECXX#TAU_CXXLIBS = -lstdc++ -L$(TAUGCCLIBDIR) $(TAUGCCLIBOPTS) -lgcc_s.1 -lSystemStubs #ENDIF# +#APPLECXX#TAU_CXXLIBS = -lstdc++ -L$(TAUGCCLIBDIR) $(TAUGCCLIBOPTS) -lgcc_s.1 #ENDIF# # EXTERNAL PACKAGES: EPILOG #SCALASCA#TAU_ELG_SERIAL_SUFFIX =.ser #ENDIF# @@ -808,6 +823,7 @@ TAU_LINKER_OPT4 = $(LEXTRA1) ############################################## # Flags to build a shared object: TAU_SHFLAGS #GNU#AR_SHFLAGS = -shared #ENDIF# +#CLANG#AR_SHFLAGS = -shared #ENDIF# #USE_PATHCC#AR_SHFLAGS = -shared -fPIC #ENDIF# #PGI#AR_SHFLAGS = -shared -fPIC #ENDIF# #APPLEPGI#AR_SHFLAGS = -shared #ENDIF# @@ -899,9 +915,11 @@ BFDLIBS= #PTHREAD_AVAILABLE#TAU_COMPINST_PTHREAD_OPTIONS= -Dpthread_create=tau_pthread_create -Dpthread_exit=tau_pthread_exit#ENDIF# #COMPINST_INTEL9#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-tcollect #ENDIF# #COMPINST_INTEL9#TAU_COMPINST_LINKING= #ENDIF# -#COMPINST_GNU#TAU_COMPINST_OPTION=$(UPCC_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# -#UPCC#TAU_COMPINST_OPTION=$(UPCC_PREFIX) $(UPCC_C_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# -#MINGW#TAU_COMPINST_OPTION=$(UPCC_PREFIX)-gstabs3 $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# +#COMPINST_GNU#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# +#OPEN64ORC#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# +#CLANG#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# +#BUPC#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# +#MINGW#TAU_COMPINST_OPTION=-gstabs3 -finstrument-functions #ENDIF# #TAU_BFD#TAU_LINKER_OPT13=$(TAU_BFDLIB) #ENDIF# #COMPINST_PGI#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-Mprof=func #ENDIF# #COMPINST_PGI#TAU_COMPINST_LINKING=$(UPCC_LINK_PREFIX)-Mprof=func #ENDIF# @@ -951,6 +969,14 @@ EBS_CLOCK_RES= #LD_AUDITOR_AVAILABLE#PROFILEOPT88 = -DTAU_TRACK_LD_LOADER #ENDIF# +############################################## +# DL_AUDITOR +############################################## +#BUPC#PROFILEOPT89= -DTAU_UPC -DTAU_BUPC +#GUPC#PROFILEOPT89= -DTAU_UPC -DTAU_GPUC +#CRAY_UPC#PROFILEOPT89= -DTAU_UPC -DTAU_CRAY_UPC + + ############################################## PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \ @@ -975,7 +1001,7 @@ PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \ $(PROFILEOPT77) $(PROFILEOPT78) $(PROFILEOPT79) $(PROFILEOPT80) \ $(PROFILEOPT81) $(PROFILEOPT82) $(PROFILEOPT83) $(PROFILEOPT84) \ $(PROFILEOPT85) $(PROFILEOPT86) $(PROFILEOPT87) $(PROFILEOPT88)\ - $(TRACEOPT) + $(PROFILEOPT89) $(TRACEOPT) ############################################## @@ -1099,8 +1125,8 @@ TAU_TRACE_INPUT_LIB = -L$(TAU_LIB_DIR) -lTAU_traceinput$(TAU_CONFIG) #GPSHMEM#SHMEM_OBJS = TauShmemGpshmem.o #ENDIF# TAU_CCOMPILER = $(TAU_CC) -#UPCC##TAU_CCOMPILER = $(TAU_UPCC) #ENDIF# -#GNU_UPC#TAU_CCOMPILER = $(TAU_UPCC) #ENDIF# +#BUPC##TAU_CCOMPILER = $(TAU_UPCC) #ENDIF# +#GUPC#TAU_CCOMPILER = $(TAU_UPCC) #ENDIF# #CRAY_UPC#TAU_UPC_COMPILER_OPTIONS = -h upc #ENDIF# @@ -1131,6 +1157,10 @@ MIC_OFFLOAD_LINKING=$(TAUHELPER) $(MIC_OFFLOAD_FLAGS) $(MIC_OFFLOAD_LINKING_PREF MIC_OFFLOAD_SHARED_LINKING=$(TAUHELPER) $(MIC_OFFLOAD_FLAGS) $(MIC_OFFLOAD_LINKING_PREFIX)='-L$(TAU_PREFIX_INSTALL_DIR)/mic_linux/lib -lTAUsh$(TAU_CONFIG)' -L$(TAU_PREFIX_INSTALL_DIR)/x86_64/lib -lTAUsh$(TAU_CONFIG) $(TAU_LINKER_OPTS) $(TAU_LINKER_SHOPTS) $(TAU_MPI_LIB) $(TAU_LIBDL) +TAU_MPI_COMPILE_INCLUDE=$(TAU_MPI_INCLUDE) +#FX#TAU_MPI_COMPILE_INCLUDE= #ENDIF# +# Fujitsu doesn't want TAU_MPI_INC to propagate to the command line + ############################################# # TAU COMPILER SHELL SCRIPT OPTIONS TAUCOMPILEROPTS= -optPdtDir="$(PDTDIR)/${PDTARCHDIR}"\ @@ -1139,11 +1169,12 @@ TAUCOMPILEROPTS= -optPdtDir="$(PDTDIR)/${PDTARCHDIR}"\ -optTauInstr="$(TAU_BIN_DIR)/tau_instrumentor" \ -optNoMpi \ -optOpariDir="$(OPARIDIR)" -optOpari2Tool="$(TAU_OPARI2_TOOL)" -optOpari2ConfigTool="$(TAU_OPARI_CONFIG_TOOL)" \ + -optOpari2Opts="$(TAU_OPARI2_OPTION)" \ -optOpariTool="$(TAU_OPARI_TOOL)" \ -optTauCC="$(TAU_CCOMPILER)" \ -optTauIncludes="$(TAU_INCLUDE) $(TAU_MPI_INCLUDE) $(TAU_SHMEM_INCLUDE) $(OPARIINCDIR)" \ -optTauDefs="$(TAU_DEFS)" $(TAU_IBM_NOCOMPINST) \ - -optTauCompile="$(TAU_INCLUDE) $(TAU_MPI_INCLUDE) $(TAU_DEFS) "\ + -optTauCompile="$(TAU_INCLUDE) $(TAU_MPI_COMPILE_INCLUDE) $(TAU_DEFS) "\ -optLinking="$(TAU_MPI_FLIBS) $(TAU_LIBS) $(TAU_LDFLAGS) $(TAU_CXXLIBS) $(TAU_SHMEM_LIBS) "\ -optSharedLinking="-L$(TAU_LIB_DIR)/shared$(TAU_CONFIG) $(TAU_MPI_FLIBS) $(TAU_SHMEM_LIBS) $(TAU_EXLIBS) $(TAU_LDFLAGS) $(TAU_CXXLIBS)"\ $(TAU_COMPILER_EXTRA_OPTIONS) \ diff --git a/include/Makefile.orig b/include/Makefile.orig index 2c3148464..21c683280 100644 --- a/include/Makefile.orig +++ b/include/Makefile.orig @@ -35,11 +35,18 @@ CONFIG_CXX=g++ TAU_CC_FE=$(CONFIG_CC) TAU_CXX_FE=$(CONFIG_CXX) +TAU_CUDA_CXX=g++ + # Front end C/C++ Compilers #BGL#TAU_CC_FE=gcc #ENDIF# #BGL#TAU_CXX_FE=g++ #ENDIF# #BGP#TAU_CC_FE=gcc #ENDIF# #BGP#TAU_CXX_FE=g++ #ENDIF# +#BGQ#TAU_CC_FE=gcc #ENDIF# +#BGQ#TAU_CXX_FE=g++ #ENDIF# +#MMIC#TAU_CXX_FE=g++ #ENDIF# +#MMIC#TAU_CC_FE=gcc #ENDIF# +#CATAMOUNT#TAU_CC_FE=gcc #ENDIF# #CATAMOUNT#TAU_CC_FE=gcc #ENDIF# #CATAMOUNT#TAU_CXX_FE=g++ #ENDIF# #CRAYCNL#TAU_CC_FE=gcc #ENDIF# @@ -72,7 +79,9 @@ PDTCOMPDIR= DYNINSTDIR= JDKDIR= OPARIDIR= +OPARIINCDIR= ARMCIDIR= +DARSHANDIR= EPILOGDIR= EPILOGBINDIR= EPILOGINCDIR= @@ -104,8 +113,10 @@ TAU_MPI_INC= TAU_MPI_LIB= TAU_MPI_FLIB= TAU_MPILIB_DIR= +TAU_MPILIB_DIRLIB= TAU_MPI_NOWRAP_LIB= TAU_MPI_NOWRAP_FLIB= +TAU_GASNET_DIR= FULL_CXX= FULL_CC= TAU_PREFIX_INSTALL_DIR= @@ -114,9 +125,18 @@ TAU_BIN_DIR=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/bin TAU_INC_DIR=$(TAU_PREFIX_INSTALL_DIR)/include TAU_LIB_DIR=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/lib +UNWIND_FLAG= +UNWIND_INC= +UNWIND_LIB= +UNWIND_LIB_FLAG= +UNWIND_EXTRAS= + ####################################################################### #OPARI#TAU_OPARI_TOOL=$(TAU_BIN_DIR)/opari #ENDIF# +#OPARI2#TAU_OPARI2_TOOL=$(TAU_BIN_DIR)/opari2 #ENDIF# +#OPARI2#TAU_OPARI_CONFIG_TOOL=$(TAU_BIN_DIR)/opari2-config #ENDIF# +#OPARI2#OPARIINCDIR=-I$(TAUROOT)/utils/opari2/include #ENDIF# #ENABLE64BIT#ABI = -mabi=64 #ENDIF# #ENABLEN32BIT#ABI = -mabi=n32 #ENDIF# #ENABLE32BIT#ABI = -mabi=32 #ENDIF# @@ -154,22 +174,31 @@ TAU_LIB_DIR=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/lib ####################################################################### F90_ABI = $(ABI) #IBM64_FORTRAN#F90_ABI = -q64 #ENDIF# +#MMIC#F90_ABI = -mmic #ENDIF# ####################################################################### ############################################## # UPC Flags ############################################## -#UPCC#UPCC_C_PREFIX=-Wc,#ENDIF# -#UPCC#UPCC_LINK_PREFIX=-Wl,#ENDIF# -#UPCC#TAU_UPCC= upcc "-inst-toolname=gasp-tau" -inst -network=$(UPCNETWORK) #ENDIF# -#UPCC#TAU_GASP_O=TauGASP.o #ENDIF# -#UPCC#TAU_GASPU_O=TauGASPU.o #ENDIF# +#BUPC#UPCC_C_PREFIX=-Wc,#ENDIF# +#BUPC#UPCC_LINK_PREFIX=-Wl,#ENDIF# +#BUPC#TAU_UPCC=upcc -inst-toolname=gasp-tau -inst -network=$(UPCNETWORK) $(UPCC_C_PREFIX)-fPIC #ENDIF# +#BUPC#TAU_GASP_O=TauGASP.o #ENDIF# +#BUPC#TAU_GASPU_UPC=$(TAU_INC_DIR)/Profile/TauGASPU.upc #ENDIF# + +#GUPC#UPCC_C_PREFIX= #ENDIF# +#GUPC#UPCC_LINK_PREFIX=-Wl,#ENDIF# +#GUPC#TAU_UPCC=upc -fupc-instrument -fPIC #ENDIF# +#GUPC#TAU_GASP_O=TauGASP.o #ENDIF# +#GUPC#TAU_GASPU_UPC=$(TAU_INC_DIR)/Profile/TauGASPU.upc #ENDIF# + ############# Standard Defines ############## TAU_CC = $(CONFIG_CC) $(ABI) $(ISA) TAU_CXX = $(CONFIG_CXX) $(ABI) $(ISA) TAU_RUN_CC = $(FULL_CC) $(ABI) $(ISA) -#UPCC#TAU_RUN_CC = $(TAU_UPCC) #ENDIF# +#BUPC#TAU_RUN_CC = $(TAU_UPCC) #ENDIF# +#GUPC#TAU_RUN_CC = $(TAU_UPCC) #ENDIF# TAU_RUN_CXX = $(FULL_CXX) $(ABI) $(ISA) TAU_INSTALL = /bin/cp TAU_SHELL = /bin/sh @@ -201,7 +230,8 @@ JDKARCH = linux ############################################# OPARI_O = TauKojakOpariDisable.o #ENDIF# #OPARI#OPARI_O = TauOpari.o #ENDIF# -#KOJAKOPARI#OPARI_O = TauKojakOpari.o #ENDIF# +#KOJAKOPARI#OPARI_O = TauKojakOpari.o#ENDIF# +#OPARI2#OPARI_O = pomp2_lib.o pomp2_region_info.o pomp2_fwrapper.o pomp2_fwrapper_base.o#ENDIF# #EPILOG#OPARI_O = #ENDIF# #VAMPIRTRACE#OPARI_O = #ENDIF# @@ -218,7 +248,7 @@ DYNINST_PLATFORM = $(PLATFORM) ############# OpenMP Fortran Option ######## -TAU_OPENMP_OPTION = -mp +TAU_OPENMP_OPTION = #COMPINST_CRAYCC#TAU_OPENMP_OPTION= -h omp #ENDIF# #SOL2CC_OPENMP#TAU_OPENMP_OPTION = -xopenmp #ENDIF# #SUNCC_OPENMP#TAU_OPENMP_OPTION = -xopenmp=parallel #ENDIF# @@ -237,6 +267,8 @@ TAU_OPENMP_OPTION = -mp #ZEPTO_GFORTRAN#TAU_F90_OPT = -fno-underscoring #ENDIF# #THREADSAFE_COMPILERS#TAU_R =_r #ENDIF# +#BGP#TAU_R =_r #ENDIF# +#BGQ#TAU_R =_r #ENDIF# ############# Fortran Compiler ############# #GNU_FORTRAN#TAU_F90 = g77 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# @@ -246,13 +278,16 @@ TAU_OPENMP_OPTION = -mp #SC_GFORTRAN#TAU_F90 = scgfortran $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #SGI_FORTRAN#TAU_F90 = f90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #IBM_FORTRAN#TAU_F90 = xlf90$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# -#BGP#TAU_F90 = /bgsys/drivers/ppcfloor/comm/bin/mpixlf90$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# -#BGP_GFORTRAN#TAU_F90 = /bgsys/drivers/ppcfloor/comm/bin/mpif90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#BGP#TAU_F90 = /bgsys/drivers/ppcfloor/comm/xl/bin/mpixlf90$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#BGP_GFORTRAN#TAU_F90 = mpif90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#BGQ#TAU_F90 = /bgsys/drivers/ppcfloor/comm/xl/bin/mpixlf90$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#BGQ_GFORTRAN#TAU_F90 = /bgsys/drivers/ppcfloor/comm/gcc/bin/mpif90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #BGL#TAU_F90 = blrts_xlf90$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #IBM64_FORTRAN#TAU_F90 = xlf90$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #IBMXLFAPPLE#TAU_F90 = xlf90$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #CRAY_FORTRAN#TAU_F90 = f90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #CRAY_X1_FORTRAN#TAU_F90 = ftn $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#OPENSHMEM#TAU_F90 = oshfort $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #PGI_FORTRAN#TAU_F90 = pgf90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #PGI_CATAMOUNT#TAU_F90 = qk-pgf90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #CATAMOUNT#TAU_F90 = ftn $(F90_ABI) $(TAU_F90_OPT) #ENDIF# @@ -271,16 +306,20 @@ TAU_OPENMP_OPTION = -mp #INTEL_FORTRAN#TAU_F90 = efc $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #INTEL32_FORTRAN#TAU_F90 = ifc $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #INTELIFORT#TAU_F90 = ifort $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#MPIIFORT#TAU_F90 = mpiifort $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#FX#TAU_F90 = mpifrtpx $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #PATHSCALE_FORTRAN#TAU_F90 = pathf90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #SC_PATHSCALE#TAU_F90 = scpathf95 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #OPEN64ORC_FORTRAN#TAU_F90 = orf90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #NEC_FORTRAN#TAU_F90 = f90 $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #CRAYCNL#TAU_F90 = ftn $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #ZEPTO_GFORTRAN#TAU_F90 = /bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-gfortran $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#ZEPTO_GFORTRAN#TAU_F90 = /bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gfortran $(F90_ABI) $(TAU_F90_OPT) #ENDIF# TAU_F77 = $(TAU_F90) #IBM_FORTRAN#TAU_F77 = xlf$(TAU_R) $(F90_ABI) $(TAU_F90_OPT) #ENDIF# #BGP#TAU_F77 = mpixlf77_r $(F90_ABI) $(TAU_F90_OPT) #ENDIF# +#BGQ#TAU_F77 = mpixlf77_r $(F90_ABI) $(TAU_F90_OPT) #ENDIF# @@ -308,18 +347,21 @@ TAU_F77 = $(TAU_F90) #USE_CC#PROFILEOPT10 = -DTAU_DOT_H_LESS_HEADERS #ENDIF# #CRAYCNL_SHARED#PROFILEOPT10 = -DTAU_DOT_H_LESS_HEADERS -fPIC #ENDIF# #GNU#PROFILEOPT10 = -DTAU_GNU -DTAU_DOT_H_LESS_HEADERS #ENDIF# +#MPC#PROFILEOPT10 = -DTAU_MPC -DTAU_DOT_H_LESS_HEADERS -DTAU_MPI_F_STATUSES_IGNORE_ABSENT -fPIC #ENDIF# +#INTEL12FIX#PROFILEOPT10 = -DTAU_INTEL12 -DTAU_DOT_H_LESS_HEADERS #ENDIF# #CRAYXMT#PROFILEOPT10 = -DTAU_CRAYXMT -DTAU_DOT_H_LESS_HEADERS #ENDIF# #APPLECXX#PROFILEOPT10 = -DTAU_GNU -DTAU_DOT_H_LESS_HEADERS -fPIC #ENDIF# #SOL2CC#PROFILEOPT10 = -DTAU_SOL2CC -DTAU_DOT_H_LESS_HEADERS #ENDIF# #SUNCC#PROFILEOPT10 = -DTAU_SOL2CC -DTAU_DOT_H_LESS_HEADERS #ENDIF# #USE_PATHCC#PROFILEOPT10 = -DTAU_DOT_H_LESS_HEADERS -DTAU_PATHSCALE #ENDIF# #OPEN64ORC#PROFILEOPT10 = -DTAU_DOT_H_LESS_HEADERS -DTAU_OPEN64ORC -fpic #ENDIF# -#PROFILEOPT11 +#DMAPP#PROFILEOPT11=-DTAU_DMAPP #ENDIF# #PGI1.7#PROFILEOPT12 = -DPGI #ENDIF# #CRAYKAI#PROFILEOPT12 = -DCRAYKAI #ENDIF# #HP_FORTRAN#PROFILEOPT12 = -DHP_FORTRAN #ENDIF# #CRAYCC#PROFILEOPT13 = -h instantiate=used -DCRAYCC -DTAU_DOT_H_LESS_HEADERS #ENDIF# -#COMPINST_CRAYCC#PROFILEOPT13 = -DCRAYCC -DTAU_DOT_H_LESS_HEADERS #ENDIF# +#MINGW#PROFILEOPT13 = -DTAU_WINDOWS -DTAU_DISABLE_SIGUSR -DTAU_MINGW -DTAU_ENABLE -DTAU_GNU #ENDIF# +#COMPINST_CRAYCC#PROFILEOPT13 = -DCRAYCC -DTAU_DOT_H_LESS_HEADERS #ENDIF# #CRAYX1CC#PROFILEOPT13 = -DTAU_DOT_H_LESS_HEADERS #ENDIF# #SGICC#TAU_CXX = $(CONFIG_CXX) $(ABI) $(ISA) $(USER_OPT) -LANG:std #ENDIF# #INTELCXXLIBICC#TAU_CXX = $(CONFIG_CXX) $(ABI) $(ISA) $(USER_OPT) -cxxlib-icc #ENDIF# @@ -333,7 +375,7 @@ TAU_F77 = $(TAU_F90) #KTAU_MERGE#KTAU_MERGE_O = KtauFuncInfo.o KtauMergeInfo.o ktau_syscall.o #ENDIF# #KTAU_SHCTR#KTAU_SHCTR_O = KtauCounters.o #ENDIF# #MPITRACE#TRACEOPT = -DTAU_MPITRACE -DTRACING_ON #ENDIF# -#PTHREAD_AVAILABLE#THR_O = PthreadLayer.o #ENDIF# +#PTHREAD_AVAILABLE#THR_O = PthreadLayer.o TauWrapSyscalls.o #ENDIF# #TAU_PAPI_THREADS#THR_O = PapiThreadLayer.o #ENDIF# #TAU_SPROC#THR_O = SprocLayer.o #ENDIF# #JAVA#THR_O = JavaThreadLayer.o #ENDIF# @@ -351,7 +393,8 @@ TAU_F77 = $(TAU_F90) #PGI#PROFILEOPT19 = -DTAU_DOT_H_LESS_HEADERS -fPIC #ENDIF# #APPLEPGI#PROFILEOPT19 = -DTAU_DOT_H_LESS_HEADERS -DTAU_APPLE_PGI #ENDIF# #ACC#PROFILEOPT19 = -AA +z -DTAU_DOT_H_LESS_HEADERS -DTAU_HPUX #ENDIF# -#FUJITSU#PROFILEOPT19 = -DFUJITSU -DTAU_DOT_H_LESS_HEADERS #ENDIF# +#FUJITSU#PROFILEOPT19 = -DTAU_FUJITSU -DTAU_DOT_H_LESS_HEADERS #ENDIF# +#FX#PROFILEOPT19 = -Xg -DTAU_FUJITSU -DTAU_DOT_H_LESS_HEADERS -fPIC #ENDIF# #KAINOEX#PROFILEOPT20 = --no_exceptions #ENDIF# #SGICCNOEX#PROFILEOPT20 = -LANG:exceptions=off #ENDIF# #HPGNU#PROFILEOPT21 = -fPIC #ENDIF# @@ -367,9 +410,11 @@ TAU_F77 = $(TAU_F90) #JAVA#PROFILEOPT23 = -DJAVA #ENDIF# #JVMTI#PROFILEOPT23 = -DJAVA -DTAU_JVMTI #ENDIF# PROFILEOPT24 = -#JAVA#PROFILEOPT25 = -I$(JDKDIR)/include -I$(JDKDIR)/include/$(JDKARCH) #ENDIF# +#JAVA#PROFILEOPT25 = -I$(JDKDIR)/include -I$(JDKDIR)/include/$(JDKARCH) -I$(JDKDIR)/Headers#ENDIF# #MPI#PROFILEOPT26 = -DTAU_MPI -DTAU_UNIFY #ENDIF# #MPI_THREADED#PROFILEOPT26 = -DTAU_MPI -DTAU_UNIFY -DTAU_MPI_THREADED #ENDIF# +JDKBINDIR = $(JDKDIR)/bin +#APPLECXX#JDKBINDIR = $(JDKDIR)/Commands#ENDIF# #PGIOPENMP#TAU_OPENMP_EXTRA_DEFS = -U_RWSTD_MULTI_THREAD -U_REENTRANT #ENDIF# #OPENMP#PROFILEOPT27 = $(TAU_OPENMP_OPTION) $(TAU_OPENMP_EXTRA_DEFS) -DTAU_OPENMP#ENDIF# @@ -379,6 +424,7 @@ PROFILEOPT24 = #SGITIMERS#PROFILEOPT30 = -DSGI_TIMERS #ENDIF# #BGLTIMERS#PROFILEOPT30 = -DBGL_TIMERS -I/bgl/BlueLight/ppcfloor/bglsys/include #ENDIF# #BGPTIMERS#PROFILEOPT30 = -DBGP_TIMERS -I/bgsys/drivers/ppcfloor/arch/include/common -I/bgsys/drivers/ppcfloor/arch/include -I/bgsys/drivers/ppcfloor/arch/include/spi #ENDIF# +#BGQTIMERS#PROFILEOPT30 = -DBGQ_TIMERS #ENDIF# #CRAYTIMERS#PROFILEOPT30 = -DCRAY_TIMERS #ENDIF# #LINUXTIMERS#PROFILEOPT31 = -DTAU_LINUX_TIMERS #ENDIF# #ALPHATIMERS#PROFILEOPT31 = -DTAU_ALPHA_TIMERS #ENDIF# @@ -392,7 +438,8 @@ PROFILEOPT24 = #VAMPIRTRACE#PROFILEOPT36 = -DTAU_VAMPIRTRACE $(VAMPIRTRACEINCS) $(TAU_VAMPIRTRACEOPTS)#ENDIF# #PROFILECALLPATH#PROFILEOPT36 = -DTAU_CALLPATH #ENDIF# #PROFILEPHASE#PROFILEOPT36 = -DTAU_CALLPATH -DTAU_PROFILEPHASE#ENDIF# -#SCOREP#PROFILEOPT36 = -DTAU_SILC -DTAU_SCOREP $(SCOREPINCS) #ENDIF# +#PROFILEPHASE#SCOREPPROFILE= -DTAU_CALLPATH -DTAU_PROFILEPHASE#ENDIF# +#SCOREP#PROFILEOPT36 = -DTAU_SILC -DTAU_SCOREP $(SCOREPINCS) $(SCOREPPROFILE)#ENDIF# #PYTHON#PROFILEOPT37 = -I$(PYTHON_INCDIR) #ENDIF# #NOCOMM#PROFILEOPT38 = -DTAU_NOCOMM #ENDIF# #SETNODE0#PROFILEOPT40 = -DTAU_SETNODE0 #ENDIF# @@ -417,8 +464,11 @@ PROFILEOPT24 = #PROFILEHEADROOM#PROFILEOPT58 = -DTAU_PROFILEHEADROOM #ENDIF# #TAU_LARGEFILE#PROFILEOPT60 = -DTAU_LARGEFILE -D_LARGEFILE64_SOURCE #ENDIF# #BGP#PROFILEOPT60 = -DTAU_LARGEFILE -D_LARGEFILE64_SOURCE #ENDIF# +#BGQ#PROFILEOPT60 = -DTAU_LARGEFILE -D_LARGEFILE64_SOURCE #ENDIF# # Omit the -D_LARGETFILE64_SOURCE till we can check the IBM crash #SHMEM#PROFILEOPT61 = -DTAU_SHMEM #ENDIF# +#OPENSHMEM#PROFILEOPT61 = $(TAU_SHMEM_INC) -DTAU_OPENSHMEM -DTAU_SHMEM #ENDIF# +#SGI_MPT_SHMEM#PROFILEOPT61 = -DTAU_SHMEM -DTAU_SGI_MPT_SHMEM #ENDIF# #KTAU#PROFILEOPT62 = -DTAUKTAU -DKTAU_USER_SRC_COMPILE -I$(KTAU_INCDIR) -I$(KTAU_INCUSERDIR) -DKTAU_INCUSERDIR=\"$(KTAU_INCUSERDIR)\" -DKTAU_KALLSYMS_PATH=\"$(KTAU_KALLSYMS_PATH)\" #ENDIF# #KTAU_MERGE#PROFILEOPT63 = -DTAUKTAU_MERGE -DKTAU_USER_SRC_COMPILE -I$(KTAU_INCDIR) -DKTAU_INCUSERDIR=\"$(KTAU_INCUSERDIR)\" -DKTAU_KALLSYMS_PATH=\"$(KTAU_KALLSYMS_PATH)\" #ENDIF# #FREEBSD#PROFILEOPT64 = -DTAU_FREEBSD #ENDIF# @@ -439,8 +489,10 @@ PROFILEOPT24 = #TAU_DEMANGLE#PROFILEOPT77 = -DHAVE_GNU_DEMANGLE #ENDIF# #VAMPIRTRACE_5_7_API#PROFILEOPT78 = -DTAU_VAMPIRTRACE_5_7_API #ENDIF# +#VAMPIRTRACE_5_12_API#PROFILEOPT78 = -DTAU_VAMPIRTRACE_5_12_API #ENDIF# #TAU_TR1_HASH_MAP#PROFILEOPT79 = -DHAVE_TR1_HASH_MAP #ENDIF# +#DARSHAN#PROFILEOPT80 = -DTAU_DARSHAN #ENDIF# MRNET_ROOT= MRNET_LW_OPTS= @@ -464,6 +516,7 @@ SDDF_DIR= #BGL#PROFILEOPT72 = -DTAU_BGL -I/bgl/BlueLight/ppcfloor/bglsys/include #ENDIF# #BGP#PROFILEOPT72 = -DTAU_BGP -I/bgsys/drivers/ppcfloor/arch/include/common -I/bgsys/drivers/ppcfloor/arch/include -I/bgsys/drivers/ppcfloor/arch/include/spi #ENDIF# +#BGQ#PROFILEOPT72 = -DTAU_BGQ #ENDIF# #For F90 support for all platforms FWRAPPER = TauFMpi.o @@ -481,6 +534,7 @@ FWRAPPER = TauFMpi.o #PAPI#TAU_CNL_PAPI_FLAGS = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.a #ENDIF# #PAPI#TAU_CNL_PAPI_CRAYCC_FLAGS = -L$(PAPIDIR)/$(PAPISUBDIR) -lpapi #ENDIF# #COMPINST_CRAYCC#TAU_CNL_PAPI_FLAGS = $(TAU_CNL_PAPI_CRAYCC_FLAGS) #ENDIF# +#CRAYCNL_PAPI_SHARED#TAU_CNL_PAPI_FLAGS = $(TAU_CNL_PAPI_CRAYCC_FLAGS) #ENDIF# #CRAYCNL#LEXTRA = $(TAU_CNL_PAPI_FLAGS) #ENDIF# #SOL2PAPI#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.a -lcpc #ENDIF# #IBMPAPI#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.a -L/usr/lpp/pmtoolkit/lib -L/usr/pmapi/lib -lpmapi#ENDIF# @@ -488,6 +542,7 @@ FWRAPPER = TauFMpi.o #BGLPAPI_RTS#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.rts.a -L/bgl/BlueLight/ppcfloor/bglsys/lib -lbgl_perfctr.rts -lrts.rts -ldevices.rts #ENDIF# #BGLPAPI#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.a -L/bgl/BlueLight/ppcfloor/bglsys/lib -lbgl_perfctr.rts -lrts.rts -ldevices.rts #ENDIF# #BGPPAPI#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.a -L/bgsys/drivers/ppcfloor/runtime/SPI -lSPI.cna #ENDIF# +#BGQPAPI#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.a -L/bgsys/drivers/ppcfloor/spi/lib -lSPI #ENDIF# #IBM64PAPI#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi64.a -L/usr/lpp/pmtoolkit/lib -L/usr/pmapi/lib -lpmapi #ENDIF# #IBM64PAPILINUX#LEXTRA = $(PAPIDIR)/$(PAPISUBDIR)/libpapi.a #ENDIF# #SGI64PAPI#LEXTRA = -L$(PAPIDIR)/$(PAPISUBDIR) -lpapi64 #ENDIF# @@ -504,6 +559,7 @@ TAU_PAPI_EXTRA_FLAGS = $(LEXTRA) #PPC64PAPI#TAU_PAPI_RPATH = #ENDIF# #BGLPAPI#TAU_PAPI_RPATH = #ENDIF# #BGPPAPI#TAU_PAPI_RPATH = #ENDIF# +#BGQPAPI#TAU_PAPI_RPATH = #ENDIF# #USE_INTELCXX#TAU_PAPI_RPATH = #ENDIF# #CRAYX1CC#TAU_PAPI_RPATH = #ENDIF# #PGI#TAU_PAPI_RPATH = -R$(PAPIDIR)/$(PAPISUBDIR) #ENDIF# @@ -527,6 +583,7 @@ TAU_GCCLIB = -lgcc_s #INTEL32_ON_64#TAU_GCCLIB = -lgcc #ENDIF# #FREEBSD#TAU_GCCLIB = -lgcc #ENDIF# #BGL#TAU_GCCLIB = -lgcc #ENDIF# +#BGQ#TAU_GCCLIB = -lgcc #ENDIF# #GNU#TAU_FORTRANLIBS = -L$(TAUGCCLIBDIR) $(TAUGCCLIBOPTS) -lstdc++ $(TAU_GCCLIB) #ENDIF# #OPEN64ORC_FORTRAN#TAU_FORTRANLIBS = -lfortran -lffio #ENDIF# #PATHSCALE_FORTRAN#TAU_FORTRANLIBS = -lpathfstart -lpathfortran #ENDIF# @@ -537,27 +594,32 @@ TAU_GCCLIB = -lgcc_s #GNU_GFORTRAN#TAU_FORTRANLIBS = -L$(TAUGFORTRANLIBDIR) -lgfortran -lgfortranbegin #ENDIF# #SC_GFORTRAN#TAU_FORTRANLIBS = -lgfortran -lgfortranbegin #ENDIF# #SGI_FORTRAN#TAU_FORTRANLIBS = -lfortran -lftn #ENDIF# -#USE_IBMXLC#TAU_IBM_FORTRANLIBS = -bh:4 -bpT:0x10000000 -bpD:0x20000000 /lib/crt0.o -lxlf90 -lm -lc #ENDIF# -#GNU#TAU_IBM_FORTRANLIBS = -Wl,-bh:4 -Wl,-bpT:0x10000000 -Wl,-bpD:0x20000000 /lib/crt0.o -lxlf90 -lm -lc #ENDIF# -#KAI#TAU_IBM_FORTRANLIBS = --backend -bh:4 --backend -bpT:0x10000000 --backend -bpD:0x20000000 /lib/crt0.o -lxlf90 -lm -lc #ENDIF# +#USE_IBMXLC#TAU_IBM_FORTRANLIBS = -bh:4 -bpT:0x10000000 -bpD:0x20000000 /lib/crt0.o -lxlf90$(TAU_R) -lm -lc #ENDIF# +#GNU#TAU_IBM_FORTRANLIBS = -Wl,-bh:4 -Wl,-bpT:0x10000000 -Wl,-bpD:0x20000000 /lib/crt0.o -lxlf90$(TAU_R) -lm -lc #ENDIF# +#KAI#TAU_IBM_FORTRANLIBS = --backend -bh:4 --backend -bpT:0x10000000 --backend -bpD:0x20000000 /lib/crt0.o -lxlf90$(TAU_R) -lm -lc #ENDIF# +#FX#TAU_FORTRANLIBS = -Ntl_notrt -lmpi_f90 -lmpi_f77 --linkfortran #ENDIF# #IBM_FORTRAN#TAU_FORTRANLIBS = $(TAU_IBM_FORTRANLIBS) #ENDIF# -#USE_IBMXLC#TAU_IBM64_FORTRANLIBS = -bh:4 -bpT:0x10000000 -bpD:0x20000000 -b64 /lib/crt0_64.o -lxlf90 -lm -lc #ENDIF# -#GNU#TAU_IBM64_FORTRANLIBS = -Wl,-bh:4 -Wl,-bpT:0x10000000 -Wl,-bpD:0x20000000 -Wl,-b64 /lib/crt0_64.o -lxlf90 -lm -lc #ENDIF# -#KAI#TAU_IBM64_FORTRANLIBS = --backend -bh:4 --backend -bpT:0x10000000 --backend -bpD:0x20000000 --backend -b64 /lib/crt0_64.o -lxlf90 -lm -lc #ENDIF# +#USE_IBMXLC#TAU_IBM64_FORTRANLIBS = -bh:4 -bpT:0x10000000 -bpD:0x20000000 -b64 /lib/crt0_64.o -lxlf90$(TAU_R) -lm -lc #ENDIF# +#GNU#TAU_IBM64_FORTRANLIBS = -Wl,-bh:4 -Wl,-bpT:0x10000000 -Wl,-bpD:0x20000000 -Wl,-b64 /lib/crt0_64.o -lxlf90$(TAU_R) -lm -lc #ENDIF# +#KAI#TAU_IBM64_FORTRANLIBS = --backend -bh:4 --backend -bpT:0x10000000 --backend -bpD:0x20000000 --backend -b64 /lib/crt0_64.o -lxlf90$(TAU_R) -lm -lc #ENDIF# #IBM64_FORTRAN#TAU_FORTRANLIBS = $(TAU_IBM64_FORTRANLIBS) #ENDIF# #IBM64_FORTRAN#TAU_FORLIBDIR=lib64 #ENDIF# #IBM_FORTRAN#TAU_FORLIBDIR=lib #ENDIF# #BGL#TAU_FORLIBDIR=blrts_dev_lib #ENDIF# #BGP#TAU_FORLIBDIR=bglib #ENDIF# -#PPC64XLF#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -lxlf90 -lxlfmath -lxl -lxlomp_ser #ENDIF# -#IBM64LINUX_XLF#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -lxlf90 -lxlfmath -lxl -lxlomp_ser #ENDIF# -#BGL#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -L$(EXTRADIR)/blrts_lib -lxlf90 -lxlfmath -lxl #ENDIF# +#BGQ#TAU_FORLIBDIR=bglib64 #ENDIF# +#PPC64XLF#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -lxlf90$(TAU_R) -lxlfmath -lxl -lxlomp_ser #ENDIF# +#IBM64LINUX_XLF#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -lxlf90$(TAU_R) -lxlfmath -lxl -lxlomp_ser #ENDIF# +#BGL#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -L$(EXTRADIR)/blrts_lib -lxlf90$(TAU_R) -lxlfmath -lxl #ENDIF# +#BGP#TAU_IBM_FORTRANLIBS = -L/bgsys/drivers/ppcfloor/comm/xl/lib -lxlf90$(TAU_R) -lm -lc #ENDIF# +#BGQ#TAU_IBM_FORTRANLIBS = -L/bgsys/drivers/ppcfloor/comm/xl/lib -lxlf90$(TAU_R) -lm -lc #ENDIF# + #BGP#TAU_BGL_OMP_SERIAL= -lxlomp_ser #ENDIF# #OPENMP#TAU_BGL_OMP_SERIAL= #ENDIF# #BGP#TAU_OMP_SERIAL=$(TAU_BGL_OMP_SERIAL) #ENDIF# -#BGP#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -lxlf90 -lxlfmath $(TAU_OMP_SERIAL) #ENDIF# +#BGP#TAU_FORTRANLIBS = -L$(EXTRADIR)/$(TAU_FORLIBDIR) -lxlf90$(TAU_R) -lxlfmath $(TAU_OMP_SERIAL) #ENDIF# #BGP_GFORTRAN#TAU_FORTRANLIBS = -lgfortran -lgfortranbegin -lm #ENDIF# #IBMXLFAPPLE#TAU_FORTRANLIBS = -L$(EXTRADIR)/lib -lxlf90 -lxlfmath -lxl #ENDIF# @@ -571,6 +633,7 @@ TAU_GCCLIB = -lgcc_s #INTEL_FORTRAN#TAU_FORTRANLIBS = -lcprts -lPEPCF90 #ENDIF# #INTEL32_FORTRAN#TAU_FORTRANLIBS = -lcprts -lCEPCF90 -lF90 #ENDIF# #INTELIFORT#TAU_FORTRANLIBS = -lcprts #ENDIF# +#CRAYCNL#TAU_FORTRANLIBS = #ENDIF# #INTEL81FIX#TAU_FORTRANLIBS = -L$(TAUGCCLIBDIR) $(TAUGCCLIBOPTS) -lstdc++ $(TAU_GCCLIB) -lcxa -lunwind -L$(EXTRADIR)/lib -lifcore $(EXTRADIR)/lib/for_main.o #ENDIF# #INTEL10FIX#TAU_FORTRANLIBS = -L$(TAUGCCLIBDIR) $(TAUGCCLIBOPTS) -lstdc++ $(TAU_GCCLIB) -L$(IFORTLIBDIR) -lifcore $(IFORTLIBDIR)/for_main.o #ENDIF# #INTELCXXLIBICC#TAU_FORTRANLIBS = -lcprts -L$(EXTRADIR)/lib -lifcore $(EXTRADIR)/lib/for_main.o #ENDIF# @@ -617,16 +680,19 @@ TAU_LINKER_OPT2 = $(LEXTRA) #PATHSCALE_FORTRAN#TAU_CXXLIBS_PATHSCALE_CNL = -L$(EXTRADIR)/lib -lstdc++ #ENDIF# #GNU_GFORTRAN#TAU_CXXLIBS_GNU_CNL = -L$(EXTRADIR)/lib -lstdc++ -lgcc_eh #ENDIF# #INTELIFORT#TAU_CXXLIBS_INTEL_CNL = -lstdc++ #ENDIF# -#MPI#TAU_MPICHCXX_CRAY_CNL_LIBS = -lmpichcxx #ENDIF# +TAU_MPICHCXX_CNL_SHARED_LIB = -lmpichcxx +#CRAYCNL_CCE#TAU_MPICHCXX_CNL_SHARED_LIB = -lmpichcxx_cray #ENDIF# +#MPI#TAU_MPICHCXX_CRAY_CNL_LIBS = $(TAU_MPILIB_DIRLIB) $(TAU_MPICHCXX_CNL_SHARED_LIB) #ENDIF# #CRAY_FORTRAN#TAU_CXXLIBS_CRAY_CNL = -L$(EXTRADIR) -lcray-c++-rts -lstdc++ -lgcc_eh $(TAU_MPICHCXX_CRAY_CNL_LIBS) #ENDIF# #CRAYCNL#TAU_CXXLIBS = $(TAU_CXXLIBS_PGI_CNL) $(TAU_CXXLIBS_PATHSCALE_CNL) $(TAU_CXXLIBS_GNU_CNL) $(TAU_CXXLIBS_INTEL_CNL) $(TAU_CXXLIBS_CRAY_CNL) #ENDIF# #CATAMOUNT#TAU_CXXLIBS = $(TAU_CXXLIBS_PGI_CNL) $(TAU_CXXLIBS_PATHSCALE_CNL) $(TAU_CXXLIBS_GNU_CNL) #ENDIF# #CRAYX1CC#TAU_CXXLIBS = -L/opt/ctl/CC/CC/lib -lC #ENDIF# +#MPC#TAU_CXXLIBS = -lstdc++ #ENDIF# ## Set the gfortran options on BGP -#BGP_GFORTRAN#TAU_CXXLIBS = -lstdc++ #ENDIF# -#BGP_GFORTRAN#TAU_GNULIBS = -lstdc++ #ENDIF# -#BGP_GFORTRAN#TAU_CXXLIBS_GNU_CNL = -lstdc++ #ENDIF# +#BGP_GFORTRAN#TAU_CXXLIBS = -lstdc++ -lm #ENDIF# +#BGP_GFORTRAN#TAU_GNULIBS = -lstdc++ -lm #ENDIF# +#BGP_GFORTRAN#TAU_CXXLIBS_GNU_CNL = -lstdc++ -lm #ENDIF# ## Set the gfortran options on Zepto BGP #ZEPTO_GFORTRAN#TAU_CXXLIBS = -lstdc++ #ENDIF# @@ -645,13 +711,16 @@ TAU_SGI_INIT = /usr/lib32/c++init.o #SUNCC#TAU_CXXLIBS = $(TAU_CXXLIBS_SUNCC) #ENDIF# #SUNX86_64#TAU_CXXLIBS = $(TAU_CXXLIBS_SUN_OPTERON) #ENDIF# #FUJITSU_SOLARIS#TAU_CXXLIBS = -lstd -lstdm #ENDIF# +#FX#TAU_CXXLIBS = -lmpi_cxx -lstdc++ -lstd_mt -lm #ENDIF# +#FX#TAU_CXXLIBS = -lmpi_cxx -Ntl_notrt -lmpi_f90 -lmpi_f77 --linkfortran #ENDIF# #PPC64XLC#TAU_PPC64_LIBS = -L$(EXTRADIRCXX)/$(TAU_FORLIBDIR) -libmc++ -lstdc++ #ENDIF# #IBM64LINUX_XLC#TAU_IBM64LINUX_LIBS = -L$(EXTRADIRCXX)/$(TAU_FORLIBDIR) -libmc++ -lstdc++ #ENDIF# #USE_IBMXLC#TAU_XLCLIBS = $(TAU_IBM64LINUX_LIBS) $(TAU_PPC64_LIBS) #ENDIF# #IBMXLCAPPLE#TAU_FORLIBDIR =lib #ENDIF# #IBMXLCAPPLE#TAU_XLCLIBS = -L$(EXTRADIRCXX)/$(TAU_FORLIBDIR) -libmc++ -lstdc++ #ENDIF# #BGL#TAU_XLCLIBS = -L$(EXTRADIRCXX)/blrts_dev_lib -L$(EXTRADIRCXX)/blrts_lib -libmc++ -L/bgl/BlueLight/ppcfloor/blrts-gnu/powerpc-bgl-blrts-gnu/lib -lstdc++ #ENDIF# -#BGP#TAU_XLCLIBS = -L$(EXTRADIRCXX)/bglib -libmc++ -lstdc++ #ENDIF# +#BGP#TAU_XLCLIBS = -L$(EXTRADIRCXX)/bglib -libmc++ -lstdc++ -L$(EXTRADIR)/bglib -lxlf90$(TAU_R) -lxlfmath $(TAU_OMP_SERIAL) #ENDIF# +#BGQ#TAU_XLCLIBS = -L$(EXTRADIRCXX)/bglib64 -libmc++ -lstdc++ -L$(EXTRADIR)/bglib64 -lxlf90_r -lxlfmath #ENDIF# #SP1#TAU_XLCLIBS = -lC -lm #ENDIF# #USE_IBMXLC#TAU_CXXLIBS = $(TAU_XLCLIBS) #ENDIF# #USE_DECCXX#TAU_CXXLIBS = -lcxxstd -lcxx #ENDIF# @@ -673,6 +742,7 @@ TAU_SGI_INIT = /usr/lib32/c++init.o #EPILOGOMP#TAU_LINKER_OPT3 = -L$(EPILOGLIBDIR) -lelg.omp $(EPILOGEXTRALINKCMD) $(TAU_HITACHI_EXTRA) #ENDIF# #USE_IBMXLC#TAU_IBM_NOCOMPINST = -optNoCompInst #ENDIF# +#COMPINST_CRAYCC#TAU_SHLIB_EXTRA_CXXLIBS = $(TAU_CXXLIBS) #ENDIF# @@ -747,18 +817,24 @@ TAU_LINKER_OPT4 = $(LEXTRA1) #APPLEPGI#AR_SHFLAGS = -shared #ENDIF# #SGICC#AR_SHFLAGS = -shared #ENDIF# #OPEN64ORC#AR_SHFLAGS = -shared -fpic #ENDIF# +#MPC#AR_SHFLAGS = -shared -fPIC #ENDIF# #APPLECXX#TAU_APPLE_SHFLAGS = -dynamiclib -flat_namespace -undefined suppress #ENDIF# #PYTHON#TAU_APPLE_SHFLAGS = -bundle -flat_namespace -undefined suppress #ENDIF# #USE_INTELCXX#AR_SHFLAGS = -shared #ENDIF# +#MMIC#AR_SHFLAGS = -shared -mmic #ENDIF# #APPLECXX#AR_SHFLAGS = $(TAU_APPLE_SHFLAGS) #ENDIF# #SOL2#AR_SHFLAGS = -G #ENDIF# #SUNCC#AR_SHFLAGS = -G #ENDIF# #SOL2CC#AR_SHFLAGS = -G #ENDIF# #CRAYCNL#AR_SHFLAGS = -shared #ENDIF# +#FX#AR_SHFLAGS = -shared #ENDIF# TAU_BGP_AR_SHFLAGS = -shared #USE_IBMXLC#AR_SHFLAGS = -G #ENDIF# #USE_IBMXLC#TAU_BGP_AR_SHFLAGS = -qmkshrobj #ENDIF# #BGP#AR_SHFLAGS = $(TAU_BGP_AR_SHFLAGS) #ENDIF# +TAU_BGQ_AR_SHFLAGS = -shared +#USE_IBMXLC#TAU_BGQ_AR_SHFLAGS = -qmkshrobj #ENDIF# +#BGQ#AR_SHFLAGS = $(TAU_BGQ_AR_SHFLAGS) #ENDIF# #USE_DECCXX#AR_SHFLAGS = -shared #ENDIF# #ACC#AR_SHFLAGS = -b #ENDIF# TAU_ARFLAGS = $(AR_SHFLAGS) -o @@ -775,9 +851,11 @@ TAU_RANLIB_FE = echo "Built" #CRAYXMT#TAU_RANLIB_FE = /usr/bin/ranlib #ENDIF# ############################################## -TAU_AR = ar #ENDIF# +TAU_AR = ar +#MINGW#TAU_AR = x86_64-w64-mingw32-ar #ENDIF# #SP1#TAU_AR = ar -X32 #ENDIF# #IBM64#TAU_AR = ar -X64 #ENDIF# +#FX#TAU_AR = sparc64-unknown-linux-gnu-ar #ENDIF# #PPC64#TAU_AR = ar #ENDIF# #IBM64LINUX#TAU_AR = ar #ENDIF# @@ -800,9 +878,18 @@ HPCTOOLKIT_LINK= ############################################## # Extra linker args (e.g. -Wl,-export-dynamic) ############################################## +# *CWL* NOTE: The extra space between EXTRA_LINKER_ARGS and "=" in the +# case of APPLECXX is necessary so FixMakefile.sed.default will not +# blow that entry away. It does so currently and I believe it may +# be because the order of the sed commands are not being respected +# by Mac OS X. Given the order, all ENDIF replacements should have +# been taken care of before any restoration attempts are taken for +# other variables like EXTRA_LINKER_ARGS. We are going to have to +# continually keep an eye on similar situations for Mac OS X until +# we can find a permanent fix. EXTRA_LINKER_ARGS= -#APPLECXX#EXTRA_LINKER_ARGS= -rdynamic #ENDIF# -TAU_LINKER_OPT14=$(UPCC_LINK_PREFIX)$(EXTRA_LINKER_ARGS) +#APPLECXX#EXTRA_LINKER_ARGS = -rdynamic#ENDIF# +TAU_LINKER_OPT14=$(EXTRA_LINKER_ARGS) ############################################## @@ -817,6 +904,8 @@ BFDLIBS= #COMPINST_INTEL9#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-tcollect #ENDIF# #COMPINST_INTEL9#TAU_COMPINST_LINKING= #ENDIF# #COMPINST_GNU#TAU_COMPINST_OPTION=$(UPCC_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# +#BUPC#TAU_COMPINST_OPTION=$(UPCC_PREFIX) $(UPCC_C_PREFIX)-g $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# +#MINGW#TAU_COMPINST_OPTION=$(UPCC_PREFIX)-gstabs3 $(UPCC_C_PREFIX)-finstrument-functions #ENDIF# #TAU_BFD#TAU_LINKER_OPT13=$(TAU_BFDLIB) #ENDIF# #COMPINST_PGI#TAU_COMPINST_OPTION=$(UPCC_C_PREFIX)-Mprof=func #ENDIF# #COMPINST_PGI#TAU_COMPINST_LINKING=$(UPCC_LINK_PREFIX)-Mprof=func #ENDIF# @@ -830,8 +919,40 @@ BFDLIBS= #MRNET#TAU_LINKER_OPT15=$(MRNET_LIBS) #ENDIF# TAU_FORTRAN_COMPINST_OPTION=$(TAU_COMPINST_OPTION) #NAGWARE_FORTRAN#TAU_FORTRAN_COMPINST_OPTION=-Wc,-finstrument-functions#ENDIF# + +#TAU_LINKS_RT#TAU_LINKER_OPT16= -lrt#ENDIF# + +############################################## +# Various Sampling Support Options +############################################## +#TAU_SS_ALLOC_SUPPORT#PROFILEOPT82 = -DTAU_SS_ALLOC_SUPPORT #ENDIF# +# *CWL* - Watch out for potential snafus for this. Right now, if EBS_HAS_RT +# is set, EBS_CLOCK_RES must have a valid value. +EBS_CLOCK_RES= +#EBS_HAS_RT#PROFILEOPT83 = -DEBS_CLOCK_RES=$(EBS_CLOCK_RES) #ENDIF# + +############################################## +# Support for non-portable features ############################################## +#TAU_STRSIGNAL_OK#PROFILEOPT84 = -DTAU_STRSIGNAL_OK#ENDIF# +############################################## +# TAU Unwinder flags +############################################## +#TAU_UNWIND#PROFILEOPT85 = -DTAU_UNWIND $(UNWIND_FLAG) -I$(UNWIND_INC) #ENDIF# +#TAU_UNWIND#TAU_LINKER_OPT17 = -L$(UNWIND_LIB) $(UNWIND_LIB_FLAG) $(UNWIND_EXTRAS)#ENDIF# +#MMIC#TAU_LINKER_OPT18=-mmic #ENDIF# + +############################################## +# KTAU-NG!! +############################################## +#KTAU_NG#PROFILEOPT86 = -DKTAU_NG #ENDIF# +#MMIC#PROFILEOPT87 = -mmic #ENDIF# + +############################################## +# DL_AUDITOR +############################################## +#LD_AUDITOR_AVAILABLE#PROFILEOPT88 = -DTAU_TRACK_LD_LOADER #ENDIF# ############################################## @@ -856,7 +977,9 @@ PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \ $(PROFILEOPT69) $(PROFILEOPT70) $(PROFILEOPT71) $(PROFILEOPT72) \ $(PROFILEOPT73) $(PROFILEOPT74) $(PROFILEOPT75) $(PROFILEOPT76) \ $(PROFILEOPT77) $(PROFILEOPT78) $(PROFILEOPT79) $(PROFILEOPT80) \ - $(PROFILEOPT81) $(TRACEOPT) + $(PROFILEOPT81) $(PROFILEOPT82) $(PROFILEOPT83) $(PROFILEOPT84) \ + $(PROFILEOPT85) $(PROFILEOPT86) $(PROFILEOPT87) $(PROFILEOPT88)\ + $(TRACEOPT) ############################################## @@ -867,6 +990,11 @@ PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \ #USE_IBMXLC#TAU_BGP_INTERNAL_FLAG2 = -DTAU_IBM_XLC_BGP #ENDIF# #BGP#TAU_INTERNAL_FLAG1 = $(TAU_BGP_INTERNAL_FLAG1) #ENDIF# #BGPTIMERS#TAU_INTERNAL_FLAG1 = $(TAU_BGP_INTERNAL_FLAG2) #ENDIF# +#USE_IBMXLC#TAU_BGQ_INTERNAL_FLAG1 = -qpic -DTAU_IBM_XLC_BGQ #ENDIF# +#USE_IBMXLC#TAU_BGQ_INTERNAL_FLAG2 = -DTAU_IBM_XLC_BGQ #ENDIF# +#BGQ#TAU_INTERNAL_FLAG1 = $(TAU_BGQ_INTERNAL_FLAG1) #ENDIF# +#MMIC#TAU_INTERNAL_FLAG1 = -mmic #ENDIF# +#BGQTIMERS#TAU_INTERNAL_FLAG1 = $(TAU_BGQ_INTERNAL_FLAG2) #ENDIF# #GNU#TAU_INTERNAL_FLAG1 = -fPIC #ENDIF# #CRAYXMT#TAU_INTERNAL_FLAG1 = #ENDIF# @@ -881,7 +1009,8 @@ TAU_LINKER_OPTS = $(TAU_LINKER_OPT1) $(TAU_LINKER_OPT2) $(TAU_LINKER_OPT3) \ $(TAU_LINKER_OPT7) $(TAU_LINKER_OPT8) $(TAU_LINKER_OPT9) \ $(TAU_LINKER_OPT10) $(TAU_LINKER_OPT11) $(TAU_LINKER_OPT12) \ $(TAU_LINKER_OPT13) $(PERFSUITELINK) $(VAMPIRTRACELIBS) $(SCOREPLIBS) $(HPCTOOLKIT_LINK) \ - $(TAU_LINKER_OPT14) $(TAU_LINKER_OPT15) + $(TAU_LINKER_OPT14) $(TAU_LINKER_OPT15) $(TAU_LINKER_OPT16) \ + $(TAU_LINKER_OPT17) $(TAU_LINKER_OPT18) ############################################## @@ -909,7 +1038,14 @@ TAU_DEFS = $(TAUDEFS) TAU_INCLUDE = -I$(TAU_INC_DIR) #PERFLIB#TAU_INCLUDE = -I$(PERFINCDIR) #ENDIF# #PERFLIB#TAU_DEFS = #ENDIF# -#PERFLIB#TAU_COMPILER_EXTRA_OPTIONS=-optTau=-p #ENDIF# +#PERFLIB#TAU_COMPILER_EXTRA_OPTIONS1=-optTau=-p #ENDIF# + +#FX#TAU_COMPILER_EXTRA_OPTIONS1=-optFujitsu #ENDIF# + +#PTHREAD_AVAILABLE#TAU_COMPILER_EXTRA_OPTIONS2=-optTrackPthread #ENDIF# +#MPC#TAU_COMPILER_EXTRA_OPTIONS2=-optTrackMPCThread #ENDIF# + +TAU_COMPILER_EXTRA_OPTIONS=$(TAU_COMPILER_EXTRA_OPTIONS1) $(TAU_COMPILER_EXTRA_OPTIONS2) TAU_INCLUDE_MEMORY = -I$(TAU_INC_DIR)/Memory #IBMXLCAPPLE#TAU_INCLUDE_MEMORY = -I$(TAU_INC_DIR)/MemoryWrapper #ENDIF# @@ -917,11 +1053,15 @@ TAU_INCLUDE_MEMORY = -I$(TAU_INC_DIR)/Memory #TAU_LIBS = $(TAUHELPER) -L$(TAU_LIB_DIR) -ltau$(TAU_CONFIG) $(TAU_LINKER_OPTS) -L/usr/local/packages/sw1.2beta1/x86_64-unknown-linux2.4/lib -lstackwalk -lsymtabAPI -lcommon -L/usr/local/packages/libdwarf-20091230/lib -ldwarf -lelf -liberty -ldlmalloc -TAU_LIBS = $(TAUHELPER) -L$(TAU_LIB_DIR) -ltau$(TAU_CONFIG) $(TAU_LINKER_OPTS) +TAU_LIBDL=-ldl +#TAU_WINDOWS#TAU_LIBDL= #ENDIF# + +#GNU46PLUS#TAU_EXTRA_MPI_LIBRARIES_FOR_GCC=$(TAU_MPI_NOWRAP_LIB) $(TAU_LIBDL) -lm #ENDIF# +TAU_LIBS = $(TAUHELPER) -L$(TAU_LIB_DIR) -ltau$(TAU_CONFIG) $(TAU_LINKER_OPTS) $(TAU_EXTRA_MPI_LIBRARIES_FOR_GCC) #PERFLIB#TAU_LIBS = #ENDIF# -TAU_SHLIBS = $(TAUHELPER) -L$(TAU_LIB_DIR) -lTAUsh$(TAU_CONFIG) $(TAU_LINKER_OPTS) $(TAU_LINKER_SHOPTS) $(TAU_MPI_LIB) -ldl +TAU_SHLIBS = $(TAUHELPER) -L$(TAU_LIB_DIR) -lTAUsh$(TAU_CONFIG) $(TAU_LINKER_OPTS) $(TAU_LINKER_SHOPTS) $(TAU_MPI_LIB) $(TAU_LIBDL) #PERFLIB#TAU_SHLIBS = #ENDIF# TAU_EXLIBS = $(TAUHELPER) -lTAU $(TAU_LINKER_OPTS) $(TAU_LINKER_SHOPTS) $(TAU_MPI_LIB) @@ -955,17 +1095,24 @@ TAU_TRACE_INPUT_LIB = -L$(TAU_LIB_DIR) -lTAU_traceinput$(TAU_CONFIG) #SP1#TAUMPILIBS = $(TAU_MPI_LIBS) #ENDIF# #SP1#TAUMPIFLIBS = $(TAU_MPI_FLIBS) #ENDIF# ############################################# -#SHMEM#TAU_SHMEM_OBJS = TauShmemCray.o #ENDIF# -#SP1#TAU_SHMEM_OBJS = TauShmemTurbo.o #ENDIF# -#GPSHMEM#TAU_SHMEM_OBJS = TauShmemGpshmem.o #ENDIF# +#SHMEM#SHMEM_OBJS = TauShmemCray.o #ENDIF# +#OPENSHMEM#SHMEM_OBJS = TauShmemOpenShmemC.o TauShmemOpenShmemF.o #ENDIF# +#CRAYCNL#SHMEM_OBJS = TauShmemCrayC.o TauShmemCrayF.o #ENDIF# +#SGI_MPT_SHMEM#SHMEM_OBJS = TauShmemSgiC.o TauShmemSgiF.o #ENDIF# +#SP1#SHMEM_OBJS = TauShmemTurbo.o #ENDIF# +#GPSHMEM#SHMEM_OBJS = TauShmemGpshmem.o #ENDIF# TAU_CCOMPILER = $(TAU_CC) -#UPCC#TAU_CCOMPILER = $(TAU_UPCC) #ENDIF# +#BUPC##TAU_CCOMPILER = $(TAU_UPCC) #ENDIF# +#GUPC#TAU_CCOMPILER = $(TAU_UPCC) #ENDIF# +#CRAY_UPC#TAU_UPC_COMPILER_OPTIONS = -h upc #ENDIF# #SHMEM#TAU_SHMEM_INCLUDE = $(TAU_SHMEM_INC) #ENDIF# #SHMEM#TAU_SHMEM_LIBS = -L$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/ -lTauShmem$(TAU_CONFIG) $(TAU_SHMEM_LIB) #ENDIF# +#CRAYCNL#TAU_SHMEM_LIBS = $(TAU_SHMEM_LIB) #ENDIF# +#SGI_MPT_SHMEM#TAU_SHMEM_LIBS = $(TAU_SHMEM_LIB) #ENDIF# ############################################# # TAU GPU LIBRARIES @@ -975,6 +1122,8 @@ TAU_OPENCL_INC= TAU_CUPTI_INC= #CUDA#TAU_CUDA_INCLUDE=-I$(TAU_CUDA_INC)#ENDIF# #CUPTI#TAU_CUPTI_INCLUDE=-I$(TAU_CUPTI_INC)#ENDIF# +#CUPTI#LEXTRA=-L$(TAU_CUPTI_INC)/../lib -L$(TAU_CUPTI_INC)/../lib64 -lcupti#ENDIF# +TAU_CUDA_LIBRARY=-L/opt/nvidia/cudatoolkit/4.1.28/lib64 -L/opt/nvidia/cudatoolkit/4.1.28/extras/CUPTI/lib64 -lcupti -L/opt/cray/nvidia/default/lib64 -lcuda ############################################# #CUPTI#TAU_INTERNAL_FLAGS+=-DCUPTI #ENDIF# @@ -984,23 +1133,24 @@ TAU_CUPTI_INC= ############################################# # TAU COMPILER SHELL SCRIPT OPTIONS TAUCOMPILEROPTS= -optPdtDir="$(PDTDIR)/${PDTARCHDIR}"\ - -optPdtCOpts="$(TAU_INCLUDE) $(TAU_DEFS) $(TAU_MPI_INCLUDE) $(TAU_SHMEM_INCLUDE)"\ - -optPdtCxxOpts="$(TAU_INCLUDE) $(TAU_DEFS) $(TAU_MPI_INCLUDE) $(TAU_SHMEM_INCLUDE)"\ + -optPdtCOpts="$(TAU_INCLUDE) $(TAU_DEFS) $(TAU_MPI_INCLUDE) $(TAU_SHMEM_INCLUDE) $(OPARIINCDIR)"\ + -optPdtCxxOpts="$(TAU_INCLUDE) $(TAU_DEFS) $(TAU_MPI_INCLUDE) $(TAU_SHMEM_INCLUDE) $(OPARIINCDIR)"\ -optTauInstr="$(TAU_BIN_DIR)/tau_instrumentor" \ -optNoMpi \ - -optOpariDir="$(OPARIDIR)" -optOpariTool="$(TAU_OPARI_TOOL)" \ + -optOpariDir="$(OPARIDIR)" -optOpari2Tool="$(TAU_OPARI2_TOOL)" -optOpari2ConfigTool="$(TAU_OPARI_CONFIG_TOOL)" \ + -optOpariTool="$(TAU_OPARI_TOOL)" \ -optTauCC="$(TAU_CCOMPILER)" \ - -optTauIncludes="$(TAU_INCLUDE) $(TAU_MPI_INCLUDE) $(TAU_SHMEM_INCLUDE)" \ + -optTauIncludes="$(TAU_INCLUDE) $(TAU_MPI_INCLUDE) $(TAU_SHMEM_INCLUDE) $(OPARIINCDIR)" \ -optTauDefs="$(TAU_DEFS)" $(TAU_IBM_NOCOMPINST) \ -optTauCompile="$(TAU_INCLUDE) $(TAU_MPI_INCLUDE) $(TAU_DEFS) "\ - -optLinking="$(TAU_MPI_FLIBS) $(TAU_SHMEM_LIBS) $(TAU_LIBS) $(TAU_LDFLAGS) $(TAU_CXXLIBS)"\ + -optLinking="$(TAU_MPI_FLIBS) $(TAU_LIBS) $(TAU_LDFLAGS) $(TAU_CXXLIBS) $(TAU_SHMEM_LIBS) "\ -optSharedLinking="-L$(TAU_LIB_DIR)/shared$(TAU_CONFIG) $(TAU_MPI_FLIBS) $(TAU_SHMEM_LIBS) $(TAU_EXLIBS) $(TAU_LDFLAGS) $(TAU_CXXLIBS)"\ $(TAU_COMPILER_EXTRA_OPTIONS) \ -optIncludeMemory="$(TAU_INCLUDE_MEMORY)"\ -optCompInstOption="$(TAU_COMPINST_OPTION) $(TAU_COMPINST_PTHREAD_OPTIONS)"\ -optCompInstFortranOption="$(TAU_FORTRAN_COMPINST_OPTION) $(TAU_COMPINST_PTHREAD_OPTIONS)"\ -optCompInstLinking="$(TAU_COMPINST_LINKING)"\ - -optWrappersDir="$(TAU_LIB_DIR)/wrappers" + -optWrappersDir="$(TAU_LIB_DIR)/wrappers" $(TAU_EXTRA_WRAPPER_OPTIONS) ############################################# TAU_SHAREDLIBS=$(TAUHELPER) -L$(TAU_LIB_DIR) -lTAU $(TAU_LINKER_OPTS) $(TAU_LINKER_SHOPTS) diff --git a/include/Profile/CuptiActivity.h b/include/Profile/CuptiActivity.h index 096548262..b0d41ff6e 100644 --- a/include/Profile/CuptiActivity.h +++ b/include/Profile/CuptiActivity.h @@ -2,7 +2,6 @@ #include #include #include -#include #if CUPTI_API_VERSION >= 2 @@ -111,10 +110,6 @@ bool registered_sync = false; bool cupti_api_runtime(); bool cupti_api_driver(); -void record_gpu_occupancy(CUpti_ActivityKernel *k, const char *name, GpuEventAttributes *m); - -std::map deviceMap; - #define CAST_TO_RUNTIME_MEMCPY_TYPE_AND_CALL(name, id, info, kind, count) \ if ((id) == CUPTI_RUNTIME_TRACE_CBID_##name##_v3020) \ { \ diff --git a/include/Profile/CuptiLayer.h b/include/Profile/CuptiLayer.h index 44a6e5829..1d3a94979 100644 --- a/include/Profile/CuptiLayer.h +++ b/include/Profile/CuptiLayer.h @@ -82,8 +82,9 @@ extern void Tau_CuptiLayer_disable() {} extern void Tau_CuptiLayer_register_counter(CuptiCounterEvent* ev) {} +extern void Tau_CuptiLayer_Initialize_Map(); + counter_map_t Tau_CuptiLayer_Counter_Map; -extern counter_map_t Tau_CuptiLayer_map() {return Tau_CuptiLayer_Counter_Map;} /* mapping the metric number to the cupti metric number */ counter_id_map_t internal_id_map; @@ -102,7 +103,9 @@ extern void Tau_CuptiLayer_finalize(); extern void Tau_CuptiLayer_register_counter(CuptiCounterEvent* ev); -extern counter_map_t Tau_CuptiLayer_map(); +extern void Tau_CuptiLayer_Initialize_Map(); + +extern counter_map_t Tau_CuptiLayer_Counter_Map; extern counter_id_map_t interal_id_map(); #endif diff --git a/include/Profile/FunctionInfo.h b/include/Profile/FunctionInfo.h index 6893ab208..48058232a 100644 --- a/include/Profile/FunctionInfo.h +++ b/include/Profile/FunctionInfo.h @@ -157,8 +157,8 @@ class FunctionInfo char *Type; char *GroupName; char *AllGroups; + char const * FullName; x_uint64 FunctionId; - std::string *FullName; /* For EBS Sampling Profiles */ // *CWL* - these need to be per-thread structures, just like the @@ -207,8 +207,7 @@ class FunctionInfo AllGroups = strdup(newname.c_str()); /* to make it to the profile */ } - std::string *GetFullName(); /* created on demand, cached */ - + char const * GetFullName(); /* created on demand, cached */ x_uint64 GetFunctionId() ; long GetCalls(int tid) { return NumCalls[tid]; } diff --git a/include/Profile/Profiler.h b/include/Profile/Profiler.h index 3e8f93c0c..c5f618ff7 100644 --- a/include/Profile/Profiler.h +++ b/include/Profile/Profiler.h @@ -104,7 +104,16 @@ int tau_track_pthread_barrier_wait(pthread_barrier_t *barrier); #else #ifndef TAU_MAX_THREADS +/* *CWL* - If useropt is not specified, then GPUs need to override the non-threaded default of 1. + - If thread packages are used, their defaults (> 32) are used. + Ultimately, we would like some way of setting TAU_MAX_THREADS as a cumulative value of + each component value (e.g., PTHREADS + GPU = 128 + 32 = 160). +*/ +#ifdef TAU_GPU +#define TAU_MAX_THREADS 32 +#else /* TAU_GPU */ #define TAU_MAX_THREADS 1 +#endif /* TAU_GPU */ #endif /* TAU_MAX_THREADS */ #endif /* PTHREADS || TULIPTHREADS || JAVA || TAU_WINDOWS || OPENMP || SPROC */ diff --git a/include/Profile/TauAPI.h b/include/Profile/TauAPI.h index fadf7ae43..35c47b3d4 100644 --- a/include/Profile/TauAPI.h +++ b/include/Profile/TauAPI.h @@ -49,8 +49,8 @@ extern "C" { #define TAU_PROFILE_CREATE_TIMER(var,name,type,group) Tau_profile_c_timer(&var, name, type, group, #group); -#define TAU_PROFILE_START(var) Tau_start_timer(var, 0, Tau_get_tid()); -#define TAU_PROFILE_STOP(var) Tau_stop_timer(var, Tau_get_tid()); +#define TAU_PROFILE_START(var) Tau_lite_start_timer(var, 0, Tau_get_tid()); +#define TAU_PROFILE_STOP(var) Tau_lite_stop_timer(var, Tau_get_tid()); #define TAU_PROFILE_STMT(stmt) stmt; @@ -408,7 +408,9 @@ void TAUDECL Tau_reducescatter_data(int data); void TAUDECL Tau_scan_data(int data); void TAUDECL Tau_set_node(int node); void TAUDECL Tau_start_timer(void *profiler, int phase, int tid); +void TAUDECL Tau_lite_start_timer(void *profiler, int phase, int tid); int TAUDECL Tau_stop_timer(void *profiler, int tid); +int TAUDECL Tau_lite_stop_timer(void *profiler, int tid); void TAUDECL Tau_trace_sendmsg(int type, int destination, int length); void TAUDECL Tau_trace_recvmsg(int type, int source, int length); void TAUDECL Tau_trace_recvmsg_remote(int type, int source, int length, int remoteid); diff --git a/include/Profile/TauEnv.h b/include/Profile/TauEnv.h index 2d1d6860f..8246f8aaf 100644 --- a/include/Profile/TauEnv.h +++ b/include/Profile/TauEnv.h @@ -43,6 +43,7 @@ extern "C" { void TAUDECL TauEnv_set_depth_limit(int value); int TAUDECL TauEnv_get_comm_matrix(); int TAUDECL TauEnv_get_track_message(); + int TAUDECL TauEnv_get_lite(); int TAUDECL TauEnv_get_compensate(); int TAUDECL TauEnv_get_track_memory_heap(); int TAUDECL TauEnv_get_track_memory_leaks(); diff --git a/src/Profile/TauGASPU.upc b/include/Profile/TauGASPU.upc similarity index 66% rename from src/Profile/TauGASPU.upc rename to include/Profile/TauGASPU.upc index c4270b6c1..ce1820754 100644 --- a/src/Profile/TauGASPU.upc +++ b/include/Profile/TauGASPU.upc @@ -1,6 +1,6 @@ -/* $Source: /mnt/fast/tau2git/cvsroot/tau2/src/Profile/TauGASPU.upc,v $ */ -/* $Date: 2009/11/07 09:38:24 $ */ -/* $Revision: 1.1 $ */ +/* $Source: /var/local/cvs/upcr/profile/dump/gaspu.upc,v $ */ +/* $Date: 2007/01/25 18:50:17 $ */ +/* $Revision: 1.8 $ */ /* Description: upcalls from GASP instrumentation tool into UPC code */ /* Copyright 2005, Dan Bonachea */ @@ -12,9 +12,6 @@ #include #include #include -#include - -#include /* NOTE: this file is currently compiled with every instrumented application, therefore its contents should be kept to a minimum */ @@ -22,32 +19,50 @@ /* disable instrumentation in this file, if possible */ #pragma pupc off -int gaspu_zero = 0; +#ifdef __BERKELEY_UPC__ + /* ensure code in this file does not disturb line numbering */ + #pragma UPCR NO_SRCPOS +#endif +typedef uint64_t gasp_tick_t; + +int gaspu_zero = 0; void gaspu_init(int *pmythread, int *pthreads) { *pmythread = MYTHREAD; *pthreads = THREADS; if (gaspu_zero) gasp_init(0,0,0); /* this is never called - just used to force libgasp linkage */ - #ifdef __BERKELEY_UPC__ - /* ensure code in this file does not disturb line numbering */ - #pragma UPCR NO_SRCPOS - #endif } - void gaspu_barrier() { upc_barrier; } - -void gaspu_ticks_now(gasp_tick_t *pval) { - struct timeval st; - gettimeofday(&st, NULL); - *pval = st.tv_sec * 1e6 + st.tv_usec; -} - -void gaspu_ticks_to_sec(gasp_tick_t ticks, double *pval) { - *pval = ticks * 1e-6; -} - +#ifdef __BERKELEY_UPC_RUNTIME__ + void gaspu_ticks_now(gasp_tick_t *pval) { + *pval = (gasp_tick_t)bupc_ticks_now(); + } + void gaspu_ticks_to_sec(gasp_tick_t ticks, double *pval) { + *pval = bupc_ticks_to_ns((bupc_tick_t)ticks)/1E9; + } +#else /* assume Berkeley timers unavailable */ + #include + #include + void gaspu_ticks_now(gasp_tick_t *pval) { + struct timeval tv; + #if defined(_CRAY) || defined(_UNICOSMP) + retry: + #endif + if (gettimeofday(&tv, NULL)) { perror("gettimeofday"); {return ;} } + *pval = ((int64_t)tv.tv_sec) * 1000000 + tv.tv_usec; + #if defined(_CRAY) || defined(_UNICOSMP) + /* fix an empirically observed bug in UNICOS gettimeofday(), + which occasionally returns ridiculously incorrect values + */ + if(*pval < (((int64_t)3) << 48)) goto retry; + #endif + } + void gaspu_ticks_to_sec(gasp_tick_t ticks, double *pval) { + *pval = ticks/1E6; + } +#endif void gaspu_dump_shared(void *ptr_to_ptr_to_shared, char *outputbuf, int bufsz) { shared void *pts = *(shared void **)ptr_to_ptr_to_shared; #ifdef __BERKELEY_UPC__ @@ -59,7 +74,6 @@ void gaspu_dump_shared(void *ptr_to_ptr_to_shared, char *outputbuf, int bufsz) { (int)upc_phaseof(pts)); #endif } - void gaspu_getenv(const char *key, const char **val) { *val = getenv(key); } @@ -71,15 +85,18 @@ typedef struct { #define FLAG(name) { name, #name } void gaspu_flags_to_string(int flags, char *str, int sz) { -#ifdef __UPC_COLLECTIVE__ char *p = str; static flaginfo_t known_flags[] = { + #ifdef UPC_IN_ALLSYNC FLAG(UPC_IN_ALLSYNC), FLAG(UPC_IN_MYSYNC), FLAG(UPC_IN_NOSYNC), FLAG(UPC_OUT_ALLSYNC), FLAG(UPC_OUT_MYSYNC), FLAG(UPC_OUT_NOSYNC), + #else + { -1, "UNKNOWN" }, + #endif { 0, 0 } }; for (int i=0; known_flags[i].name && sz > 1; i++) { @@ -93,7 +110,6 @@ void gaspu_flags_to_string(int flags, char *str, int sz) { p += v; } } -#endif } void gaspu_collop_to_string(int op, char *str, int sz) { @@ -116,7 +132,7 @@ void gaspu_collop_to_string(int op, char *str, int sz) { if (known_flags[i].flag == op) { strncpy(str, known_flags[i].name, sz); str[sz-1] = '\0'; - return; + {return ;} } } #endif diff --git a/include/Profile/TauMetaData.h b/include/Profile/TauMetaData.h index 494deb3f8..bfd50108d 100644 --- a/include/Profile/TauMetaData.h +++ b/include/Profile/TauMetaData.h @@ -22,18 +22,19 @@ #include -#include #include -// Putting "using namespace" statements in header files can create ambiguity -// between user-defined symbols and std symbols, creating unparsable code -// or even changing the behavior of user codes. This is also widely considered -// to be bad practice. Here's a code PDT can't parse because of this line: -// EX: #include -// EX: typedef double real; -// -//using namespace std; - -std::map &Tau_metadata_getMetaData(); + +// Note: using std::string in a std::map is dangerous +// for some libstdc++ implementations. The risky code is: +// std::map mymap; +// mymap["hello"] = "world"; +// You need to remember to use: +// mymap["hello"] = string("world"); +// It's faster, safer, and easier to use a char*, especially +// since TAU (over)uses strdup on most function arguments. +typedef std::map metadata_map_t; + +metadata_map_t & Tau_metadata_getMetaData(); int Tau_metadata_writeMetaData(Tau_util_outputDevice *out, int counter, int tid); int Tau_metadata_writeMetaData(FILE *fp, int counter, int tid); int Tau_metadata_writeMetaData(Tau_util_outputDevice *out, int tid); diff --git a/include/Profile/TauUnify.h b/include/Profile/TauUnify.h index 2e44aca2e..e147d2f03 100644 --- a/include/Profile/TauUnify.h +++ b/include/Profile/TauUnify.h @@ -97,7 +97,7 @@ class FunctionEventLister : public EventLister { return TheFunctionDB().size(); } const char *getEvent(int id) { - return TheFunctionDB()[id]->GetFullName()->c_str(); + return TheFunctionDB()[id]->GetFullName(); } }; diff --git a/include/Profile/TauUtil.h b/include/Profile/TauUtil.h index 637c8a08a..8275cef77 100644 --- a/include/Profile/TauUtil.h +++ b/include/Profile/TauUtil.h @@ -41,7 +41,7 @@ void Tau_util_destroyOutputDevice(Tau_util_outputDevice *out); int Tau_util_getOutputBufferLength(Tau_util_outputDevice *out); int Tau_util_output(Tau_util_outputDevice *out, const char *format, ...); int Tau_util_readFullLine(char *line, FILE *fp); -char *Tau_util_removeRuns(char *str); +char const * Tau_util_removeRuns(char const * str); void TAU_ABORT(const char *format, ...); diff --git a/src/Profile/Comp_gnu.cpp b/src/Profile/Comp_gnu.cpp index 7c09dbe38..f2f6c886c 100644 --- a/src/Profile/Comp_gnu.cpp +++ b/src/Profile/Comp_gnu.cpp @@ -155,6 +155,11 @@ void ___cyg_profile_func_exit(void*, void*); __attribute__((no_instrument_function)) void __pat_tp_func_return(const void *ea, const void *ra); +__attribute__((no_instrument_function)) +void profile_func_enter(void*, void*); + +__attribute__((no_instrument_function)) +void profile_func_exit(void*, void*); #if (defined(TAU_SICORTEX) || defined(TAU_SCOREP)) #pragma weak __cyg_profile_func_enter @@ -296,6 +301,9 @@ void __pat_tp_func_entry(const void *ea, const void *ra) { } +void profile_func_enter(void* func, void* callsite) { + __cyg_profile_func_enter(func, callsite); +} void ___cyg_profile_func_enter(void* func, void* callsite) { __cyg_profile_func_enter(func, callsite); @@ -348,6 +356,10 @@ void ___cyg_profile_func_exit(void* func, void* callsite) { __cyg_profile_func_exit(func, callsite); } +void profile_func_exit(void* func, void* callsite) { + __cyg_profile_func_exit(func, callsite); +} + void __pat_tp_func_return(const void *ea, const void *ra) { __cyg_profile_func_exit((void *)ea, (void *)ra); } diff --git a/src/Profile/CuptiActivity.cpp b/src/Profile/CuptiActivity.cpp index 3841600d4..be88c6cd1 100644 --- a/src/Profile/CuptiActivity.cpp +++ b/src/Profile/CuptiActivity.cpp @@ -287,7 +287,7 @@ void Tau_cupti_record_activity(CUpti_Activity *record) //cerr << "recording kernel: " << kernel->name << ", " << kernel->end - kernel->start << "ns.\n" << endl; GpuEventAttributes *map; - int map_size = 9; + int map_size = 5; map = (GpuEventAttributes *) malloc(sizeof(GpuEventAttributes) * map_size); static TauContextUserEvent* bs; static TauContextUserEvent* dm; @@ -310,7 +310,6 @@ void Tau_cupti_record_activity(CUpti_Activity *record) map[4].userEvent = lr; map[4].data = kernel->registersPerThread; - const char* name; uint32_t id; if (cupti_api_runtime()) @@ -325,8 +324,6 @@ void Tau_cupti_record_activity(CUpti_Activity *record) name = demangleName(kernel->name); //cerr << "recording kernel (device/stream/context/correlation): " << //kernel->deviceId << "/" << kernel->streamId << "/" << kernel->contextId << "/" << id << endl; - record_gpu_occupancy(kernel, name, map); - Tau_cupti_register_gpu_event(name, kernel->deviceId, kernel->streamId, kernel->contextId, id, map, map_size, kernel->start / 1e3, kernel->end / 1e3); @@ -338,16 +335,13 @@ void Tau_cupti_record_activity(CUpti_Activity *record) kernel->start / 1e3, kernel->end / 1e3); */ - - break; + break; } case CUPTI_ACTIVITY_KIND_DEVICE: { CUpti_ActivityDevice *device = (CUpti_ActivityDevice *)record; - - int nMeta = 17; - GpuMetadata *metadata = (GpuMetadata *) malloc(sizeof(GpuMetadata) * nMeta); + GpuMetadata *metadata = (GpuMetadata *) malloc(sizeof(GpuMetadata) * 16); int id = 0; //first the name. metadata[id].name = "GPU Name"; @@ -367,141 +361,17 @@ void Tau_cupti_record_activity(CUpti_Activity *record) RECORD_DEVICE_METADATA(maxSharedMemoryPerBlock, device); RECORD_DEVICE_METADATA(maxThreadsPerBlock, device); RECORD_DEVICE_METADATA(maxWarpsPerMultiprocessor, device); - RECORD_DEVICE_METADATA(maxBlocksPerMultiprocessor, device); RECORD_DEVICE_METADATA(numMemcpyEngines, device); RECORD_DEVICE_METADATA(numMultiprocessors, device); RECORD_DEVICE_METADATA(numThreadsPerWarp, device); //cerr << "recording metadata (device): " << device->id << endl; - deviceMap[device->id] = *device; - Tau_cupti_register_metadata(device->id, metadata, nMeta); + Tau_cupti_register_metadata(device->id, metadata, 16); break; } } } -//Helper function givens ceiling with given significance. -int ceil(float value, int significance) -{ - return ceil(value/significance)*significance; -} - -void record_gpu_occupancy(CUpti_ActivityKernel *kernel, const char *name, GpuEventAttributes *map) -{ - CUpti_ActivityDevice device = deviceMap[kernel->deviceId]; - - if ((device.computeCapabilityMajor > 3) || - device.computeCapabilityMajor == 3 && - device.computeCapabilityMinor > 5) - { - TAU_VERBOSE("Warning: GPU occupancy calculator is not implemented for devices of compute capability > 3.5."); - return; - } - - int myWarpsPerBlock = ceil( - (kernel->blockX * kernel->blockY * kernel->blockZ)/ - device.numThreadsPerWarp - ); - - int allocatable_warps = min( - (int)device.maxBlocksPerMultiprocessor, - (int)floor( - (float) device.maxWarpsPerMultiprocessor/ - myWarpsPerBlock - ) - ); - - - static TauContextUserEvent* alW; - Tau_get_context_userevent((void **) &alW, "Allocatable Blocks per SM given Thread count (Blocks)"); - map[5].userEvent = alW; - map[5].data = allocatable_warps; - - int myRegistersPerBlock = device.computeCapabilityMajor < 2 ? - ceil( - ceil( - (float)myWarpsPerBlock, 2 - )* - kernel->registersPerThread* - device.numThreadsPerWarp, - device.computeCapabilityMinor < 2 ? 256 : 512 - ) : - ceil( - kernel->registersPerThread* - device.numThreadsPerWarp, - device.computeCapabilityMajor < 3 ? 128 : 256 - )* - ceil( - myWarpsPerBlock, device.computeCapabilityMajor < 3 ? 2 : 4 - ); - - int allocatable_registers = (int)floor( - device.maxRegistersPerBlock/ - max( - myRegistersPerBlock, 1 - ) - ); - - if (allocatable_registers == 0) - allocatable_registers = device.maxBlocksPerMultiprocessor; - - - static TauContextUserEvent* alR; - Tau_get_context_userevent((void **) &alR, "Allocatable Blocks Per SM given Registers used (Blocks)"); - map[6].userEvent = alR; - map[6].data = allocatable_registers; - - int sharedMemoryUnit; - switch(device.computeCapabilityMajor) - { - case 1: sharedMemoryUnit = 512; break; - case 2: sharedMemoryUnit = 128; break; - case 3: sharedMemoryUnit = 256; break; - } - int mySharedMemoryPerBlock = ceil( - kernel->staticSharedMemory, - sharedMemoryUnit - ); - - int allocatable_shared_memory = mySharedMemoryPerBlock > 0 ? - floor( - device.maxSharedMemoryPerBlock/ - mySharedMemoryPerBlock - ) : - device.maxThreadsPerBlock - ; - - static TauContextUserEvent* alS; - Tau_get_context_userevent((void **) &alS, "Allocatable Blocks Per SM given Shared Memory usage (Blocks)"); - map[7].userEvent = alS; - map[7].data = allocatable_shared_memory; - - int allocatable_blocks = min(allocatable_warps, min(allocatable_registers, allocatable_shared_memory)); - - int occupancy = myWarpsPerBlock * allocatable_blocks; - -//#define RESULTS_TO_STDOUT 1 -#ifdef RESULTS_TO_STDOUT - printf("[%s] occupancy calculator:\n", name); - - printf("myWarpsPerBlock = %d.\n", myWarpsPerBlock); - printf("allocatable warps = %d.\n", allocatable_warps); - printf("myRegistersPerBlock = %d.\n", myRegistersPerBlock); - printf("allocatable registers = %d.\n", allocatable_registers); - printf("mySharedMemoryPerBlock = %d.\n", mySharedMemoryPerBlock); - printf("allocatable shared memory = %d.\n", allocatable_shared_memory); - - printf(" >> occupancy = %d (%2.0f%% of %d).\n", - occupancy, ((float)occupancy/device.maxWarpsPerMultiprocessor)*100, device.maxWarpsPerMultiprocessor); -#endif - - static TauContextUserEvent* occ; - Tau_get_context_userevent((void **) &occ, "GPU Occupancy (Warps)"); - map[8].userEvent = occ; - map[8].data = occupancy; - -} - bool function_is_sync(CUpti_CallbackId id) { return ( diff --git a/src/Profile/CuptiLayer.cpp b/src/Profile/CuptiLayer.cpp index aa535efbd..06fb5f585 100644 --- a/src/Profile/CuptiLayer.cpp +++ b/src/Profile/CuptiLayer.cpp @@ -22,7 +22,6 @@ void Tau_CuptiLayer_register_counter(CuptiCounterEvent* ev) {} void Tau_CuptiLayer_read_counter(uint64_t * cBuffer, int id) {} counter_map_t Tau_CuptiLayer_Counter_Map; -counter_map_t Tau_CuptiLayer_map() {return Tau_CuptiLayer_Counter_Map;} counter_id_map_t internal_id_map; counter_id_map_t internal_id_map() {return internal_id_map;} @@ -344,7 +343,7 @@ uint64_t Tau_CuptiLayer_read_counter(int id) free(counterDataBuffer); return cb; } -void retrieve_available_counters() +void Tau_CuptiLayer_Initialize_Map() { CUdevice currDevice = -1; uint32_t num_domains = -1; @@ -411,25 +410,17 @@ void retrieve_available_counters() //ev->print(); } -/* Fills in the Tau_CuptiLayer_Counter_Map */ -counter_map_t Tau_CuptiLayer_map() -{ - if (Tau_CuptiLayer_Counter_Map.empty()) - { - //printf("create...\n"); - retrieve_available_counters(); - } - return Tau_CuptiLayer_Counter_Map; -} - bool Tau_CuptiLayer_is_cupti_counter(char* str) { - return Tau_CuptiLayer_map().count(string(str)) > 0; + if (Tau_CuptiLayer_Counter_Map.empty()) { + Tau_CuptiLayer_Initialize_Map(); + } + return Tau_CuptiLayer_Counter_Map.count(string(str)) > 0; } void Tau_CuptiLayer_register_string(char *str, int metric_n) { - Tau_CuptiLayer_register_counter(Tau_CuptiLayer_map()[str]); + Tau_CuptiLayer_register_counter(Tau_CuptiLayer_Counter_Map[str]); internal_id_map[metric_n] = Tau_CuptiLayer_Added_counters.size() - 1; //printf("adding counter with id: %d.\n", metric_n); } diff --git a/src/Profile/FunctionInfo.cpp b/src/Profile/FunctionInfo.cpp index c91bbbd3c..d10ed528b 100644 --- a/src/Profile/FunctionInfo.cpp +++ b/src/Profile/FunctionInfo.cpp @@ -508,7 +508,7 @@ void tauCreateFI(void **ptr, const string& name, const string& type, } -string *FunctionInfo::GetFullName() { +char const * FunctionInfo::GetFullName() { if (FullName == NULL) { ostringstream ostr; @@ -517,13 +517,8 @@ string *FunctionInfo::GetFullName() { } else { ostr << GetName() << ":GROUP:" << GetAllGroups(); } - FullName = new string; - - string tmpstr = ostr.str(); - char *tmp = strdup(tmpstr.c_str()); - tmp = Tau_util_removeRuns(tmp); - *FullName = tmp; + FullName = Tau_util_removeRuns(ostr.str().c_str()); } return FullName; } diff --git a/src/Profile/Makefile b/src/Profile/Makefile index 1ca321f1b..954ff9f49 100644 --- a/src/Profile/Makefile +++ b/src/Profile/Makefile @@ -135,7 +135,7 @@ OBJS = Profiler.o UserEvent.o FunctionInfo.o TauMemoryWrap.o \ $(PAPI_O) $(TAU_JAVA_O) $(OPENMP_O) $(OPARI_O) \ $(MULT_O) $(PLATFORM_O) $(PYTHON_O) $(TAU_SAMPLING_O) \ TauIoWrap.o TauCompensate.o $(KTAU_O) $(KTAU_MERGE_O) $(KTAU_SHCTR_O) \ - $(RENCI_STFF_O) $(TAU_GASP_O) $(TAU_GASPU_O) $(TAU_CUPTI_COUNTER_OBJS) \ + $(RENCI_STFF_O) $(TAU_GASP_O) $(TAU_CUPTI_COUNTER_OBJS) \ $(TAU_WINDOWS_OBJ) $(TAU_SHMEM_OBJS) $(TAU_IBM_BG_HWP_OBJS) # CWL - BFD Functionality for libTAU.so is faked if no shared BFD object can @@ -207,6 +207,7 @@ TAU_DISABLE_SHARED = libTauDisable$(TAU_SHLIBX) # For making libTAU.so Dynamic Shared Object ############################################## #GNU#AR_SHFLAGS = -shared #ENDIF# +#CLANG#AR_SHFLAGS = -shared #ENDIF# #SUNX86_64#AR_SHFLAGS = -shared #ENDIF# #SOL2CC#AR_SHFLAGS = -G #ENDIF# #USE_PATHCC#AR_SHFLAGS = -shared -fPIC #ENDIF# @@ -227,6 +228,8 @@ TAU_DISABLE_SHARED = libTauDisable$(TAU_SHLIBX) TAU_BGP_AR_SHFLAGS = -shared #USE_IBMXLC#AR_SHFLAGS = -G #ENDIF# #USE_IBMXLC#TAU_BGP_AR_SHFLAGS = -qmkshrobj #ENDIF# +#USE_IBMXLC#TAU_BGQ_AR_SHFLAGS = -qmkshrobj #ENDIF# +#BGQ# AR_SHFLAGS = $(TAU_BGQ_AR_SHFLAGS) #ENDIF# #BGP#AR_SHFLAGS = $(TAU_BGP_AR_SHFLAGS) #ENDIF# #USE_DECCXX#AR_SHFLAGS = -shared #ENDIF# #ACC#AR_SHFLAGS = -b #ENDIF# @@ -839,9 +842,6 @@ TauMemory.o : TauMemory.cpp $(HDRS) TauGASP.o : TauGASP.c $(HDRS) $(TAU_UPCC) $(UPCFLAGS) -c $(@:.o=.c) -TauGASPU.o : TauGASPU.upc $(HDRS) - $(TAU_UPCC) $(UPCFLAGS) -c $(@:.o=.upc) - TauBgHwpCounters.o : TauBgHwpCounters.c $(HDRS) $(CC) $(CFLAGS) -c $(@:.o=.c) diff --git a/src/Profile/Profiler.cpp b/src/Profile/Profiler.cpp index 833641092..041249d1b 100644 --- a/src/Profile/Profiler.cpp +++ b/src/Profile/Profiler.cpp @@ -56,7 +56,6 @@ using namespace std; #include #include - #include #include #include @@ -693,7 +692,7 @@ void Profiler::Stop(int tid, bool useLastTimeStamp) { //ThisKtauProfiler->KernProf.DumpKProfile(); ThisKtauProfiler->KernProf.DumpKProfileOut(); #endif /*TAUKTAU */ - + #ifdef TAU_TRACK_IDLE_THREADS /* Check if we need to shut off .TAU applications on other tids */ if (tid == 0) { int i; @@ -1286,7 +1285,11 @@ int TauProfiler_StoreData(int tid) { RtsLayer::LockDB(); if (profileWriteWarningPrinted == 0) { profileWriteWarningPrinted = 1; - fprintf (stderr, "TAU: Warning: Profile data for at least one thread has been written out more than 10 times!\nTAU: This could cause extreme overhead and be due to an error\nTAU: in instrumentation (lack of top level timer).\nTAU: If using OpenMP, make sure -opari is enabled.\n"); + fprintf (stderr, + "TAU: Warning: Profile data for at least one thread has been written out more than 10 times!\n" + "TAU: This could cause extreme overhead and be due to an error\n" + "TAU: in instrumentation (lack of top level timer).\n" + "TAU: If using OpenMP, make sure -opari is enabled.\n"); } RtsLayer::UnLockDB(); } @@ -1304,9 +1307,7 @@ int TauProfiler_StoreData(int tid) { } #endif if (TauEnv_get_profiling()) { - Tau_snapshot_writeFinal("final"); - if (TauEnv_get_profile_format() == TAU_FORMAT_PROFILE) { TauProfiler_DumpData(false, tid, "profile"); } @@ -1321,6 +1322,20 @@ int TauProfiler_StoreData(int tid) { } } #endif /* PTHREADS */ + +#if defined(TAU_OPENMP) + //fprintf(stderr, "Total Threads: %d\n", RtsLayer::getTotalThreads()); + if (RtsLayer::getTotalThreads() == 1) { + // issue a warning, because this is a multithreaded config, + // and we saw no threads other than 0! + fprintf(stderr, + "\nTAU: WARNING! TAU did not detect more than one thread.\n" + "If running an OpenMP application with tau_exec and you expected\n" + "more than one thread, try using the '-T pthread' configuration,\n" + "or instrument your code with TAU.\n\n"); + } +#endif /* OPENMP */ + return 1; } @@ -1344,9 +1359,6 @@ else profiledir = new char[profile_dir_len]; written_bytes = sprintf(profiledir, "%s.", KTAU_NG_PREFIX); gethostname(profiledir + written_bytes, profile_dir_len - written_bytes); - - // profiledir = new char[KTAU_NG_PREFIX_LEN + (Tau_metadata_getMetaData()["Hostname"]).length() + 1]; //This will remain in memory until TAU closes since their is no corresponding delete. - // sprintf(profiledir, "%s.%s", KTAU_NG_PREFIX, Tau_metadata_getMetaData()["Hostname"].c_str()); } #else profiledir = TauEnv_get_profiledir(); diff --git a/src/Profile/TauCAPI.cpp b/src/Profile/TauCAPI.cpp index b98ec0a30..3c4b2ed38 100644 --- a/src/Profile/TauCAPI.cpp +++ b/src/Profile/TauCAPI.cpp @@ -60,6 +60,7 @@ void esd_exit (elg_ui4 rid); #include #endif +extern int tau_env_lite; extern "C" void * Tau_get_profiler(const char *fname, const char *type, TauGroup_t group, const char *gr_name) { FunctionInfo *f; @@ -353,6 +354,50 @@ extern "C" void Tau_start_timer(void *functionInfo, int phase, int tid) { Tau_global_insideTAU[tid]--; } +/////////////////////////////////////////////////////////////////////////// +extern "C" void Tau_lite_start_timer(void *functionInfo, int phase, int tid) { + if (tau_env_lite){ + // move the stack pointer + Tau_global_stackpos[tid]++; /* push */ + FunctionInfo *fi = (FunctionInfo *) functionInfo; + Profiler *pp = TauInternal_ParentProfiler(tid); + if (fi) { + fi->IncrNumCalls(tid); // increment number of calls + } + if (pp && pp->ThisFunction) { + pp->ThisFunction->IncrNumSubrs(tid); // increment parent's child calls + } + + + if (Tau_global_stackpos[tid] >= Tau_global_stackdepth[tid]) { + int oldDepth = Tau_global_stackdepth[tid]; + int newDepth = oldDepth + STACK_DEPTH_INCREMENT; + Profiler *newStack = (Profiler *) malloc(sizeof(Profiler)*newDepth); + memcpy(newStack, Tau_global_stack[tid], oldDepth*sizeof(Profiler)); + Tau_global_stack[tid] = newStack; + Tau_global_stackdepth[tid] = newDepth; + } + Profiler *p = &(Tau_global_stack[tid][Tau_global_stackpos[tid]]); + RtsLayer::getUSecD(tid, p->StartTime); + + p->MyProfileGroup_ = fi->GetProfileGroup(); + p->ThisFunction = fi; + p->ParentProfiler = pp; + + // if this function is not already on the callstack, put it + if (fi->GetAlreadyOnStack(tid) == false) { + p->AddInclFlag = true; + fi->SetAlreadyOnStack(true,tid); + } else { + p->AddInclFlag = false; + } + + } else { // not lite - default + Tau_start_timer(functionInfo, phase, tid); + } +} + + @@ -488,6 +533,48 @@ extern "C" int Tau_stop_timer(void *function_info, int tid ) { return 0; } +/////////////////////////////////////////////////////////////////////////// +extern "C" int Tau_lite_stop_timer(void *function_info, int tid ) { + if (tau_env_lite) { + double timeStamp[TAU_MAX_COUNTERS] = {0}; + double delta [TAU_MAX_COUNTERS] = {0}; + RtsLayer::getUSecD(tid, timeStamp); + + FunctionInfo *fi = (FunctionInfo *) function_info; + Profiler *profiler; + profiler = (Profiler *) &(Tau_global_stack[tid][Tau_global_stackpos[tid]]); + + for (int k=0; kStartTime[k]; + } + + if (profiler && profiler->ThisFunction != fi) { /* Check for overlapping timers */ + reportOverlap (profiler->ThisFunction, fi); + } + if (profiler && profiler->AddInclFlag == true) { + fi->SetAlreadyOnStack(false, tid); // while exiting + fi->AddInclTime(delta, tid); // ok to add both excl and incl times + } + else { + //printf("Couldn't add incl time: profiler= %p, profiler->AddInclFlag=%d\n", profiler, profiler->AddInclFlag); + } + fi->AddExclTime(delta, tid); + Profiler *pp = TauInternal_ParentProfiler(tid); + + if (pp) { + pp->ThisFunction->ExcludeTime(delta, tid); + } + else { + //printf("Tau_lite_stop: parent profiler = 0x0: Function name = %s, StoreData?\n", fi->GetName()); + TauProfiler_StoreData(tid); + } + Tau_global_stackpos[tid]--; /* pop */ + + } else { + Tau_stop_timer(function_info, tid); + } +} + /////////////////////////////////////////////////////////////////////////// extern "C" int Tau_stop_current_timer() { @@ -820,15 +907,15 @@ extern "C" TauGroup_t Tau_disable_all_groups(void) { extern "C" int& tau_totalnodes(int set_or_get, int value) { static int nodes = 1; - if (set_or_get == 1) - { - nodes = value; - } + if (set_or_get == 1) { + nodes = value; + } return nodes; } -#if (defined(TAU_MPI) || defined(TAU_SHMEM) || defined(TAU_DMAPP) ) + +#if (defined(TAU_MPI) || defined(TAU_SHMEM) || defined(TAU_DMAPP) || defined(TAU_UPC)) @@ -891,11 +978,10 @@ extern "C" int shmem_n_pes(void); #endif /* TAU_SHMEM */ /////////////////////////////////////////////////////////////////////////// -extern "C" void Tau_trace_sendmsg(int type, int destination, int length) { +extern "C" void Tau_trace_sendmsg(int type, int destination, int length) +{ if (!RtsLayer::TheEnableInstrumentation()) return; static int initialize = register_events(); -#ifdef DEBUG_PROF -#endif /* DEBUG_PROF */ #ifdef TAU_PROFILEPARAM #ifndef TAU_DISABLE_PROFILEPARAM_IN_MPI @@ -907,11 +993,15 @@ extern "C" void Tau_trace_sendmsg(int type, int destination, int length) { if (TauEnv_get_comm_matrix()) { if (destination >= tau_totalnodes(0,0)) { -#if (defined (TAU_SHMEM)) +#ifdef TAU_SHMEM tau_totalnodes(1,shmem_n_pes()); register_events(); #else /* TAU_SHMEM */ - fprintf(stderr, "TAU Error: Comm Matrix destination %d exceeds node count %d. Was MPI_Init/shmem_init wrapper never called? Please disable TAU_COMM_MATRIX or add calls to the init function in your source code.\n", destination, tau_totalnodes(0,0)); + fprintf(stderr, + "TAU Error: Comm Matrix destination %d exceeds node count %d. " + "Was MPI_Init/shmem_init wrapper never called? " + "Please disable TAU_COMM_MATRIX or add calls to the init function in your source code.\n", + destination, tau_totalnodes(0,0)); exit(-1); #endif /* TAU_SHMEM */ } @@ -944,8 +1034,8 @@ extern "C" void Tau_trace_recvmsg(int type, int source, int length) { } /////////////////////////////////////////////////////////////////////////// -extern "C" void Tau_trace_recvmsg_remote(int type, int source, int length, int remoteid) { - +extern "C" void Tau_trace_recvmsg_remote(int type, int source, int length, int remoteid) +{ if (!RtsLayer::TheEnableInstrumentation()) return; if (TauEnv_get_tracing()) { if (source >= 0) { @@ -955,7 +1045,8 @@ extern "C" void Tau_trace_recvmsg_remote(int type, int source, int length, int r } /////////////////////////////////////////////////////////////////////////// -extern "C" void Tau_trace_sendmsg_remote(int type, int destination, int length, int remoteid) { +extern "C" void Tau_trace_sendmsg_remote(int type, int destination, int length, int remoteid) +{ if (!RtsLayer::TheEnableInstrumentation()) return; if (TauEnv_get_tracing()) { @@ -963,32 +1054,32 @@ extern "C" void Tau_trace_sendmsg_remote(int type, int destination, int length, TauTraceSendMsgRemote(type, destination, length, remoteid); } } + if (TauEnv_get_comm_matrix()) { - static int initialize = register_events(); -#ifdef DEBUG_PROF -#endif /* DEBUG_PROF */ + static int initialize = register_events(); #ifdef TAU_PROFILEPARAM #ifndef TAU_DISABLE_PROFILEPARAM_IN_MPI - TAU_PROFILE_PARAM1L(length, "message size"); + TAU_PROFILE_PARAM1L(length, "message size"); #endif /* TAU_DISABLE_PROFILEPARAM_IN_MPI */ #endif /* TAU_PROFILEPARAM */ - //TAU_EVENT(TheSendEvent(), length); - - if (TauEnv_get_comm_matrix()) { - if (destination >= tau_totalnodes(0,0)) { - -#if (defined (TAU_SHMEM)) - tau_totalnodes(1,shmem_n_pes()); - register_events(); + if (TauEnv_get_comm_matrix()) { + if (destination >= tau_totalnodes(0,0)) { +#ifdef TAU_SHMEM + tau_totalnodes(1,shmem_n_pes()); + register_events(); #else /* TAU_SHMEM */ - fprintf(stderr, "TAU Error: Comm Matrix destination %d exceeds node count %d. Was MPI_Init/shmem_init wrapper never called? Please disable TAU_COMM_MATRIX or add calls to the init function in your source code.\n", destination, tau_totalnodes(0,0)); - exit(-1); + fprintf(stderr, + "TAU Error: Comm Matrix destination %d exceeds node count %d. " + "Was MPI_Init/shmem_init wrapper never called? " + "Please disable TAU_COMM_MATRIX or add calls to the init function in your source code.\n", + destination, tau_totalnodes(0,0)); + exit(-1); #endif /* TAU_SHMEM */ + } + TheMsgVolContextEvent()[remoteid]->TriggerEvent(length, RtsLayer::myThread()); } - TheMsgVolContextEvent()[remoteid]->TriggerEvent(length, RtsLayer::myThread()); - } } } @@ -1033,7 +1124,9 @@ extern "C" void Tau_scan_data(int data) { extern "C" void Tau_reducescatter_data(int data) { TAU_EVENT(TheReduceScatterEvent(), data); } + #else /* !(TAU_MPI || TAU_SHMEM || TAU_DMAPP)*/ + /////////////////////////////////////////////////////////////////////////// extern "C" void Tau_trace_sendmsg(int type, int destination, int length) { TauTraceSendMsg(type, destination, length); @@ -1050,6 +1143,7 @@ extern "C" void Tau_trace_recvmsg(int type, int source, int length) { extern "C" void Tau_trace_recvmsg_remote(int type, int source, int length, int remoteid) { TauTraceRecvMsgRemote(type, source, length, remoteid); } + #endif /* TAU_MPI || TAU_SHMEM*/ /////////////////////////////////////////////////////////////////////////// @@ -1143,27 +1237,30 @@ extern "C" void Tau_event_disable_stddev(void *ue) { /////////////////////////////////////////////////////////////////////////// -extern "C" void Tau_profile_c_timer(void **ptr, const char *name, const char *type, TauGroup_t group, - const char *group_name) { +extern "C" void Tau_profile_c_timer(void **ptr, const char *name, const char *type, + TauGroup_t group, const char *group_name) +{ if (*ptr == 0) { RtsLayer::LockEnv(); if (*ptr == 0) { Tau_global_incr_insideTAU(); // remove garbage characters from the end of name - char *fixedname = strdup(name); - for (unsigned int i=0; i::iterator it = ThePureMap().find(n); @@ -1683,7 +1780,6 @@ extern "C" int Tau_create_tid(void) { // ensuring that the profiles are written out while the objects are still valid void Tau_destructor_trigger() { Tau_stop_top_level_timer_if_necessary(); - //printf ("FIvector destructor\n"); Tau_global_setLightsOut(); if ((TheUsingDyninst() || TheUsingCompInst()) && TheSafeToDumpData()) { #ifndef TAU_VAMPIRTRACE diff --git a/src/Profile/TauEnv.cpp b/src/Profile/TauEnv.cpp index 7898c506b..435be84f7 100644 --- a/src/Profile/TauEnv.cpp +++ b/src/Profile/TauEnv.cpp @@ -115,6 +115,8 @@ #define TAU_TRACK_MESSAGE_DEFAULT 0 +#define TAU_LITE_DEFAULT 0 + #define TAU_TRACK_IO_PARAMS_DEFAULT 0 #define TAU_TRACK_SIGNALS_DEFAULT 0 @@ -379,6 +381,7 @@ char * Tau_check_dirname(const char * dir) { /****************************************************************************/ extern "C" { /* C linkage */ + static int env_synchronize_clocks = 0; static int env_verbose = 0; static int env_throttle = 0; @@ -395,6 +398,7 @@ static int env_depth_limit = 0; static int env_track_message = 0; static int env_comm_matrix = 0; static int env_track_memory_heap = 0; +int tau_env_lite = 0; static int env_track_memory_leaks = 0; static int env_track_memory_headroom = 0; static int env_track_io_params = 0; @@ -526,6 +530,10 @@ int TauEnv_get_track_message() { return env_track_message; } +int TauEnv_get_lite() { + return tau_env_lite; +} + int TauEnv_get_track_memory_heap() { return env_track_memory_heap; } @@ -647,7 +655,8 @@ int TauEnv_get_mic_offload(){ /********************************************************************* * Initialize the TauEnv module, get configuration values ********************************************************************/ -void TauEnv_initialize() { +void TauEnv_initialize() +{ char tmpstr[512]; /* unset LD_PRELOAD so that vt_unify and elg_unify work */ @@ -668,9 +677,15 @@ void TauEnv_initialize() { /* Read the configuration file */ TauConf_read(); - TAU_VERBOSE("TAU: Initialized TAU (TAU_VERBOSE=1)\n"); /*** Options that can be used with Scalasca and VampirTrace ***/ + tmp = getconf("TAU_LITE"); + if (parse_bool(tmp,tau_env_lite)) { + TAU_VERBOSE("TAU: LITE measurement enabled\n"); + TAU_METADATA("TAU_LITE", "on"); + tau_env_lite = 1; + } + tmp = getconf("TAU_TRACK_HEAP"); if (parse_bool(tmp, env_track_memory_heap)) { TAU_VERBOSE("TAU: Entry/Exit Memory tracking Enabled\n"); @@ -712,12 +727,12 @@ void TauEnv_initialize() { env_extras = 1; tmp = getconf("TAU_SIGNALS_GDB"); if (parse_bool(tmp, env_signals_gdb)) { - TAU_VERBOSE("TAU: SIGNALS GDB output enabled\n"); - TAU_METADATA("TAU_SIGNALS_GDB", "on"); - env_signals_gdb = 1; + TAU_VERBOSE("TAU: SIGNALS GDB output enabled\n"); + TAU_METADATA("TAU_SIGNALS_GDB", "on"); + env_signals_gdb = 1; } else { - TAU_METADATA("TAU_SIGNALS_GDB", "off"); - env_signals_gdb = 0; + TAU_METADATA("TAU_SIGNALS_GDB", "off"); + env_signals_gdb = 0; } } else { TAU_METADATA("TAU_TRACK_SIGNALS", "off"); @@ -730,7 +745,6 @@ void TauEnv_initialize() { TAU_VERBOSE("TAU: Generating only summary data: TAU_SUMMARY enabled\n"); TAU_METADATA("TAU_SUMMARY", "on"); env_summary_only = 1; - env_extras = 1; } else { TAU_METADATA("TAU_SUMMARY", "off"); env_summary_only = 0; @@ -741,7 +755,6 @@ void TauEnv_initialize() { TAU_VERBOSE("TAU: IBM UPC HWP counter data collection enabled\n"); TAU_METADATA("TAU_IBM_BG_HWP_COUNTERS", "on"); env_ibm_bg_hwp_counters = 1; - env_extras = 1; } else { TAU_METADATA("TAU_IBM_BG_HWP_COUNTERS", "off"); env_ibm_bg_hwp_counters = 0; @@ -779,13 +792,11 @@ void TauEnv_initialize() { if ((env_profiledir = getconf("PROFILEDIR")) == NULL) { env_profiledir = "."; /* current directory */ } - /* env_profiledir=Tau_check_dirname(env_profiledir); */ TAU_VERBOSE("TAU: PROFILEDIR is \"%s\"\n", env_profiledir); if ((env_tracedir = getconf("TRACEDIR")) == NULL) { env_tracedir = "."; /* current directory */ } - /* env_tracedir=Tau_check_dirname(env_tracedir); */ TAU_VERBOSE("TAU: TRACEDIR is \"%s\"\n", env_tracedir); int profiling_default = TAU_PROFILING_DEFAULT; @@ -862,7 +873,7 @@ void TauEnv_initialize() { sprintf(tmpstr, "%d", env_callsite_limit); TAU_METADATA("TAU_CALLSITE_LIMIT", tmpstr); -#if (defined(TAU_MPI) || defined(TAU_SHMEM) || defined(TAU_DMAPP)) +#if (defined(TAU_MPI) || defined(TAU_SHMEM) || defined(TAU_DMAPP) || defined(TAU_UPC)) /* track comm (opposite of old -nocomm option) */ tmp = getconf("TAU_TRACK_MESSAGE"); if (parse_bool(tmp, env_track_message)) { @@ -891,27 +902,25 @@ void TauEnv_initialize() { TAU_VERBOSE("TAU: Message Tracking Disabled\n"); TAU_METADATA("TAU_TRACK_MESSAGE", "off"); } -#endif /* TAU_MPI || TAU_SHMEM || TAU_DMAPP */ +#endif /* TAU_MPI || TAU_SHMEM || TAU_DMAPP || TAU_UPC */ /* clock synchronization */ if (env_tracing == 0) { env_synchronize_clocks = 0; } else { - tmp = getconf("TAU_SYNCHRONIZE_CLOCKS"); - if (parse_bool(tmp, TAU_SYNCHRONIZE_CLOCKS_DEFAULT)) { - env_synchronize_clocks = 1; - } else { - env_synchronize_clocks = 0; - } #ifndef TAU_MPI /* If there is no MPI, there can't be any sync, so forget it */ env_synchronize_clocks = 0; TAU_VERBOSE("TAU: Clock Synchronization Disabled (MPI not available)\n"); + TAU_METADATA("TAU_SYNCHRONIZE_CLOCKS", "off"); #else - if (env_synchronize_clocks) { + tmp = getconf("TAU_SYNCHRONIZE_CLOCKS"); + if (parse_bool(tmp, TAU_SYNCHRONIZE_CLOCKS_DEFAULT)) { + env_synchronize_clocks = 1; TAU_VERBOSE("TAU: Clock Synchronization Enabled\n"); TAU_METADATA("TAU_SYNCHRONIZE_CLOCKS", "on"); } else { + env_synchronize_clocks = 0; TAU_VERBOSE("TAU: Clock Synchronization Disabled\n"); TAU_METADATA("TAU_SYNCHRONIZE_CLOCKS", "off"); } @@ -923,9 +932,6 @@ void TauEnv_initialize() { env_callpath_depth = TAU_CALLPATH_DEPTH_DEFAULT; if (depth) { env_callpath_depth = atoi(depth); -/* if (env_callpath_depth <= 1) { */ -/* env_callpath_depth = TAU_CALLPATH_DEPTH_DEFAULT; */ -/* } */ if (env_callpath_depth < 0) { env_callpath_depth = TAU_CALLPATH_DEPTH_DEFAULT; } @@ -1050,7 +1056,7 @@ void TauEnv_initialize() { env_ebs_keep_unresolved_addr = 0; TAU_METADATA("TAU_EBS_KEEP_UNRESOLVED_ADDR", "off"); } - + if (TauEnv_get_ebs_enabled()) { // *CWL* Acquire the sampling source. This has to be done first @@ -1062,7 +1068,7 @@ void TauEnv_initialize() { // sensitive (ie. 1000 microseconds is fine for timer // interrutps, but 1000 PAPI_TOT_CYC is way too small). if ((env_ebs_source = getconf("TAU_EBS_SOURCE")) == NULL) { - env_ebs_source = "itimer"; + env_ebs_source = "itimer"; } sprintf(tmpstr, "%s", env_ebs_source); TAU_METADATA("TAU_EBS_PERIOD", tmpstr); @@ -1083,18 +1089,18 @@ void TauEnv_initialize() { // The check for PAPI sources will be extremely naive for // now. if (strncmp(env_ebs_source, "PAPI", 4) == 0) { - default_ebs_period = 133337; + default_ebs_period = 133337; } env_ebs_period = default_ebs_period; if (ebs_period) { - // Try setting it to the user value. - env_ebs_period = atoi(ebs_period); - // *CWL* - 0 is not a valid ebs_period. Plus atoi() returns 0 - // if the string is not a number. - if (env_ebs_period <= 0) { - // go back to default on failure or bad value. - env_ebs_period = default_ebs_period; - } + // Try setting it to the user value. + env_ebs_period = atoi(ebs_period); + // *CWL* - 0 is not a valid ebs_period. Plus atoi() returns 0 + // if the string is not a number. + if (env_ebs_period <= 0) { + // go back to default on failure or bad value. + env_ebs_period = default_ebs_period; + } } TAU_VERBOSE("TAU: EBS period = %d \n", env_ebs_period); sprintf(tmpstr, "%d", env_ebs_period); @@ -1103,59 +1109,59 @@ void TauEnv_initialize() { bool ebs_period_forced = false; #ifdef EBS_CLOCK_RES if (strcmp(env_ebs_source, "itimer") != 0) { - // *CWL* - force the clock period to be of a sane value - // if the desired (or default) value is not - // supported by the machine. ONLY valid for "itimer" - // EBS_SOURCE. - if (env_ebs_period < EBS_CLOCK_RES) { - env_ebs_period = EBS_CLOCK_RES; - ebs_period_forced = true; - } + // *CWL* - force the clock period to be of a sane value + // if the desired (or default) value is not + // supported by the machine. ONLY valid for "itimer" + // EBS_SOURCE. + if (env_ebs_period < EBS_CLOCK_RES) { + env_ebs_period = EBS_CLOCK_RES; + ebs_period_forced = true; + } } #endif if (ebs_period_forced) { - sprintf(tmpstr, "%d", env_ebs_period); - TAU_METADATA("TAU_EBS_PERIOD (FORCED)", tmpstr); + sprintf(tmpstr, "%d", env_ebs_period); + TAU_METADATA("TAU_EBS_PERIOD (FORCED)", tmpstr); } - + const char *ebs_inclusive = getconf("TAU_EBS_INCLUSIVE"); env_ebs_inclusive = TAU_EBS_INCLUSIVE_DEFAULT; if (ebs_inclusive) { - env_ebs_inclusive = atoi(ebs_inclusive); - if (env_ebs_inclusive < 0) { - env_ebs_inclusive = TAU_EBS_INCLUSIVE_DEFAULT; - } + env_ebs_inclusive = atoi(ebs_inclusive); + if (env_ebs_inclusive < 0) { + env_ebs_inclusive = TAU_EBS_INCLUSIVE_DEFAULT; + } } TAU_VERBOSE("TAU: EBS inclusive = %d usec\n", env_ebs_inclusive); sprintf(tmpstr, "%d usec", env_ebs_inclusive); TAU_METADATA("TAU_EBS_INCLUSIVE", tmpstr); - + #ifdef TAU_UNWIND tmp = getconf("TAU_EBS_UNWIND"); if (parse_bool(tmp, TAU_EBS_UNWIND_DEFAULT)) { - env_ebs_unwind_enabled = 1; - TAU_METADATA("TAU_EBS_UNWIND", "on"); + env_ebs_unwind_enabled = 1; + TAU_METADATA("TAU_EBS_UNWIND", "on"); } else { - env_ebs_unwind_enabled = 0; - TAU_METADATA("TAU_EBS_UNWIND", "off"); + env_ebs_unwind_enabled = 0; + TAU_METADATA("TAU_EBS_UNWIND", "off"); } if (env_ebs_unwind_enabled == 1) { - const char *depth = getconf("TAU_EBS_UNWIND_DEPTH"); - env_ebs_unwind_depth = TAU_EBS_UNWIND_DEPTH_DEFAULT; - if (depth) { - env_ebs_unwind_depth = atoi(depth); - if (env_ebs_unwind_depth < 0) { - env_ebs_unwind_depth = TAU_CALLPATH_DEPTH_DEFAULT; - } - } + const char *depth = getconf("TAU_EBS_UNWIND_DEPTH"); + env_ebs_unwind_depth = TAU_EBS_UNWIND_DEPTH_DEFAULT; + if (depth) { + env_ebs_unwind_depth = atoi(depth); + if (env_ebs_unwind_depth < 0) { + env_ebs_unwind_depth = TAU_CALLPATH_DEPTH_DEFAULT; + } + } } #endif /* TAU_UNWIND */ - + if (TauEnv_get_tracing()) { - env_callpath = 1; - env_callpath_depth = 300; - TAU_VERBOSE("TAU: EBS Overriding callpath settings, callpath enabled, depth = 300\n"); + env_callpath = 1; + env_callpath_depth = 300; + TAU_VERBOSE("TAU: EBS Overriding callpath settings, callpath enabled, depth = 300\n"); } } @@ -1182,7 +1188,7 @@ void TauEnv_initialize() { } else { env_child_forkdirs = 0; /*TAU_VERBOSE("TAU: Profiling Disabled\n"); - TAU_METADATA("TAU_PROFILE", "off");*/ + TAU_METADATA("TAU_PROFILE", "off");*/ } env_cupti_api = getconf("TAU_CUPTI_API"); @@ -1190,8 +1196,8 @@ void TauEnv_initialize() { env_cupti_api = TAU_CUPTI_API_DEFAULT; TAU_VERBOSE("TAU: CUPTI API tracking: %s\n", env_cupti_api); TAU_METADATA("TAU_CUPTI_API", env_cupti_api); - } - else { + } + else { TAU_VERBOSE("TAU: CUPTI API tracking: %s\n", env_cupti_api); TAU_METADATA("TAU_CUPTI_API", env_cupti_api); } @@ -1201,6 +1207,11 @@ void TauEnv_initialize() { TAU_VERBOSE("TAU: MIC offloading Enabled\n"); TAU_METADATA("TAU_MIC_OFFLOAD", "on"); } + + initialized = 1; + TAU_VERBOSE("TAU: Initialized TAU (TAU_VERBOSE=1)\n"); } + } + } /* C linkage */ diff --git a/src/Profile/TauGASP.c b/src/Profile/TauGASP.c index 5d063eb30..cf8db2a32 100644 --- a/src/Profile/TauGASP.c +++ b/src/Profile/TauGASP.c @@ -22,6 +22,14 @@ #include +/* disable instrumentation in this file, if possible */ +#pragma pupc off + +#ifdef __BERKELEY_UPC__ + /* ensure code in this file does not disturb line numbering */ + #pragma UPCR NO_SRCPOS +#endif + /* internal tool events, placed at end of user event range */ #define GASPI_EVT_BASE (GASP_UPC_USEREVT_END - GASPI_RESERVEDEVTS) #define GASPI_INIT GASPI_EVT_BASE+0 @@ -108,9 +116,10 @@ unsigned int gasp_create_event(gasp_context_t context, const char *name, const c return retval; } -gasp_context_t gasp_init(gasp_model_t srcmodel, int *argc, char ***argv) { - +gasp_context_t gasp_init(gasp_model_t srcmodel, int *argc, char ***argv) +{ int nodeid; + /* allocate a local context */ gasp_context_t context = (gasp_context_t)calloc(1,sizeof(struct _gasp_context_S)); assert(context->srcmodel == GASP_MODEL_UPC); /* for now */ @@ -119,22 +128,42 @@ gasp_context_t gasp_init(gasp_model_t srcmodel, int *argc, char ***argv) { context->enabled = 1; /* query system parameters */ - context->forceflush = gaspi_getenvYN(context,"GASP_FLUSH",0); - //gasp_event_notify(context, GASPI_INIT, GASP_ATOMIC, NULL, 0, 0, argstr); - Tau_create_top_level_timer_if_necessary(); -//TAU_REGISTER_THREAD(); + + if (TauEnv_get_ebs_enabled()) { + Tau_sampling_init_if_necessary(); + } + +#ifndef TAU_DISABLE_SIGUSR + Tau_signal_initialization(); +#endif + +#ifdef TAU_MONITORING + Tau_mon_connect(); +#endif /* TAU_MONITORING */ + +#ifdef TAU_BGP + if (TauEnv_get_ibm_bg_hwp_counters()) { + int upcErr; + Tau_Bg_hwp_counters_start(&upcErr); + if (upcErr != 0) { + printf("TAU ERROR: ** Error starting IBM BGP UPC hardware performance counters\n"); + } + } +#endif /* TAU_BGP */ + nodeid = TAU_PROFILE_GET_NODE(); if (nodeid == -1) { TAU_PROFILE_SET_NODE(context->mythread); } -//Tau_set_usesMPI(1); -// printf("Thread is %i\n",context->mythread); - //TAU_START("main"); - //TAU_PROFILE_TIMER(context->tautimer, "otherstuff", " ", TAU_MESSAGE); +#ifdef TAU_MPI + if (TauEnv_get_synchronize_clocks()) { + TauSyncClocks(); + } +#endif return context; } @@ -322,13 +351,23 @@ void gasp_event_notifyVA(gasp_context_t context, unsigned int evttag, gasp_evtty #endif default: - if (evttag >= GASP_UPC_USEREVT_START && - evttag <= GASP_UPC_USEREVT_END) { /* it's a user event */ + if (evttag >= GASP_UPC_USEREVT_START && + evttag <= GASP_UPC_USEREVT_END) + { + /* it's a user event */ int id = evttag - GASP_UPC_USEREVT_START; - assert(id < context->userevt_cnt); - tagstr = context->userevt[id].name; - argstr = context->userevt[id].desc; - is_user_evt = 1; + if(id < context->userevt_cnt) { + tagstr = context->userevt[id].name; + argstr = context->userevt[id].desc; + is_user_evt = 1; + } + #if 1 + else { + printf("ERROR: id=%d < userevt_cnt=%d. " + "Check that %s was compiled with UPC compiler.\n", + __FILE__, id, context->userevt_cnt); + } + #endif } } @@ -336,12 +375,11 @@ void gasp_event_notifyVA(gasp_context_t context, unsigned int evttag, gasp_evtty if(evttype==GASP_START) { - - TAU_START(tagstr); + TAU_START(tagstr); } else if(evttype==GASP_END) { - TAU_STOP(tagstr); + TAU_STOP(tagstr); } else if(evttype=GASP_ATOMIC) { @@ -350,10 +388,10 @@ void gasp_event_notifyVA(gasp_context_t context, unsigned int evttag, gasp_evtty #ifdef GASP_BUPC_STATIC_SHARED if (evttag == GASP_BUPC_STATIC_SHARED) { int nblocks, nbytes; - nblocks = (int)va_arg(argptr, int); - nbytes = (int)va_arg(argptr, int); - TAU_REGISTER_EVENT(variable, tagstr); - TAU_EVENT(variable, nbytes); + nblocks = (int)va_arg(argptr, int); + nbytes = (int)va_arg(argptr, int); + TAU_REGISTER_EVENT(variable, tagstr); + TAU_EVENT(variable, nbytes); } #endif /* GASP_BUPC_STATIC_SHARED */ } diff --git a/src/Profile/TauHooks.cpp b/src/Profile/TauHooks.cpp index 7ad23fcd3..901424824 100644 --- a/src/Profile/TauHooks.cpp +++ b/src/Profile/TauHooks.cpp @@ -544,6 +544,19 @@ void tau_trace_exit(int* id) { traceExit(*id); } +#ifndef TAU_PEBIL_DISABLE +#include +void* tool_thread_init(pthread_t args) { + dprintf("TAU: initializing thread %#lx\n", args); + Tau_create_top_level_timer_if_necessary(); +} + +void* tool_thread_fini(pthread_t args) { + dprintf("TAU: finalizing thread %#lx\n", args); + Tau_stop_top_level_timer_if_necessary(); +} +#endif /* TAU_PEBIL_DISABLE */ + } /* extern "C" */ diff --git a/src/Profile/TauInit.cpp b/src/Profile/TauInit.cpp index fe00fa0b5..f366f4802 100644 --- a/src/Profile/TauInit.cpp +++ b/src/Profile/TauInit.cpp @@ -132,7 +132,9 @@ static void TauInitialize_kill_handlers() { sighdlr[SIGSEGV] = signal (SIGSEGV, wrap_up); # endif # ifdef SIGCHLD +#ifndef TAU_UPC sighdlr[SIGCHLD] = signal (SIGCHLD, wrap_up); +#endif # endif } @@ -147,7 +149,6 @@ extern int Tau_Backtrace_writeMetadata(int i, char *token1, unsigned long addr); #ifndef TAU_DISABLE_SIGUSR -//static void tauBacktraceHandler(int sig) { extern "C" void finalizeCallSites_if_necessary(); void tauBacktraceHandler(int sig, siginfo_t *si, void *context) { char str[100+4096]; @@ -423,7 +424,6 @@ extern "C" int Tau_init_initializeTAU() { /* we need the timestamp of the "start" */ Tau_snapshot_initialization(); - #ifndef TAU_DISABLE_SIGUSR /* register SIGUSR1 handler */ if (signal(SIGUSR1, tauSignalHandler) == SIG_ERR) { @@ -464,8 +464,6 @@ extern "C" int Tau_init_initializeTAU() { TauInitialize_kill_handlers(); } - //TauInitialize_kill_handlers(); - /* initialize sampling if requested */ if (TauEnv_get_ebs_enabled()) { /* Work-around for MVAPHICH 2 to move sampling initialization to diff --git a/src/Profile/TauMetaData.cpp b/src/Profile/TauMetaData.cpp index a7c1dd527..9b98b7982 100644 --- a/src/Profile/TauMetaData.cpp +++ b/src/Profile/TauMetaData.cpp @@ -125,38 +125,40 @@ int tau_bgq_init(void) { #include #include - - -class MetaDataRepo : public map { +// STL containers are not designed for this. +// They do not have virtual destructors, so overriding the destructor +// in this way is unsafe. Is there some reason atexit() isn't enough? +class MetaDataRepo : public metadata_map_t { public : - ~MetaDataRepo() { + virtual ~MetaDataRepo() { Tau_destructor_trigger(); } }; -// Static holder for metadata name/value pairs + // These come from Tau_metadata_register calls -map &Tau_metadata_getMetaData_task(int tid) { +metadata_map_t & Tau_metadata_getMetaData_task(int tid) { static MetaDataRepo metadata[TAU_MAX_THREADS]; return metadata[tid]; } -map &Tau_metadata_getMetaData(void) { +metadata_map_t & Tau_metadata_getMetaData(void) { return Tau_metadata_getMetaData_task(0); } -extern "C" void Tau_metadata_task(char *name, const char *value, int tid) { +extern "C" void Tau_metadata_task(char const * name, char const * value, int tid) { #ifdef TAU_DISABLE_METADATA return; -#endif - +#else // make copies - char *myName = strdup(name); - char *myValue = strdup(value); + char * myName = strdup(name); + char * myValue = strdup(value); + //TAU_VERBOSE("Metadata: %s = %s\n", name, value); RtsLayer::LockDB(); Tau_metadata_getMetaData_task(tid)[myName] = myValue; RtsLayer::UnLockDB(); +#endif } extern "C" void Tau_metadata(char *name, const char *value) { Tau_metadata_task(name, value, 0); @@ -176,16 +178,13 @@ void Tau_metadata_register(char *name, const char *value) { } -int Tau_metadata_fillMetaData() { - - +int Tau_metadata_fillMetaData() +{ #ifdef TAU_DISABLE_METADATA return 0; #else - static int filled = 0; - if (filled) { return 0; } @@ -197,24 +196,23 @@ int Tau_metadata_fillMetaData() { #else const char *timeFormat = "%lld"; #endif - + char tmpstr[4096]; sprintf (tmpstr, timeFormat, TauMetrics_getInitialTimeStamp()); Tau_metadata_register("Starting Timestamp", tmpstr); - time_t theTime = time(NULL); struct tm *thisTime = gmtime(&theTime); - strftime (tmpstr,4096,"%Y-%m-%dT%H:%M:%SZ", thisTime); + strftime(tmpstr,4096,"%Y-%m-%dT%H:%M:%SZ", thisTime); Tau_metadata_register("UTC Time", tmpstr); thisTime = localtime(&theTime); char buf[4096]; strftime (buf,4096,"%Y-%m-%dT%H:%M:%S", thisTime); - + char tzone[7]; strftime (tzone, 7, "%z", thisTime); if (strlen(tzone) == 5) { @@ -227,7 +225,7 @@ int Tau_metadata_fillMetaData() { Tau_metadata_register("Local Time", tmpstr); - // write out the timestamp (number of microseconds since epoch (unsigned long long) + // write out the timestamp (number of microseconds since epoch (unsigned long long) sprintf (tmpstr, timeFormat, TauMetrics_getTimeOfDay()); Tau_metadata_register("Timestamp", tmpstr); @@ -264,15 +262,15 @@ int Tau_metadata_fillMetaData() { BGLPersonality_getLocationString(&personality, location); sprintf (bglbuffer, "(%d,%d,%d)", BGLPersonality_xCoord(&personality), - BGLPersonality_yCoord(&personality), - BGLPersonality_zCoord(&personality)); + BGLPersonality_yCoord(&personality), + BGLPersonality_zCoord(&personality)); Tau_metadata_register("BGL Coords", bglbuffer); Tau_metadata_register("BGL Processor ID", rts_get_processor_id()); sprintf (bglbuffer, "(%d,%d,%d)", BGLPersonality_xSize(&personality), - BGLPersonality_ySize(&personality), - BGLPersonality_zSize(&personality)); + BGLPersonality_ySize(&personality), + BGLPersonality_zSize(&personality)); Tau_metadata_register("BGL Size", bglbuffer); @@ -283,8 +281,8 @@ int Tau_metadata_fillMetaData() { } sprintf (bglbuffer, "(%d,%d,%d)", BGLPersonality_isTorusX(&personality), - BGLPersonality_isTorusY(&personality), - BGLPersonality_isTorusZ(&personality)); + BGLPersonality_isTorusY(&personality), + BGLPersonality_isTorusZ(&personality)); Tau_metadata_register("BGL isTorus", bglbuffer); Tau_metadata_register("BGL DDRSize", BGLPersonality_DDRSize(&personality)); @@ -297,18 +295,18 @@ int Tau_metadata_fillMetaData() { Tau_metadata_register("BGL numPsets", BGLPersonality_numPsets(&personality)); sprintf (bglbuffer, "(%d,%d,%d)", BGLPersonality_xPsetSize(&personality), - BGLPersonality_yPsetSize(&personality), - BGLPersonality_zPsetSize(&personality)); + BGLPersonality_yPsetSize(&personality), + BGLPersonality_zPsetSize(&personality)); Tau_metadata_register("BGL PsetSize", bglbuffer); sprintf (bglbuffer, "(%d,%d,%d)", BGLPersonality_xPsetOrigin(&personality), - BGLPersonality_yPsetOrigin(&personality), - BGLPersonality_zPsetOrigin(&personality)); + BGLPersonality_yPsetOrigin(&personality), + BGLPersonality_zPsetOrigin(&personality)); Tau_metadata_register("BGL PsetOrigin", bglbuffer); sprintf (bglbuffer, "(%d,%d,%d)", BGLPersonality_xPsetCoord(&personality), - BGLPersonality_yPsetCoord(&personality), - BGLPersonality_zPsetCoord(&personality)); + BGLPersonality_yPsetCoord(&personality), + BGLPersonality_zPsetCoord(&personality)); Tau_metadata_register("BGL PsetCoord", bglbuffer); #endif /* TAU_BGL */ @@ -321,15 +319,15 @@ int Tau_metadata_fillMetaData() { BGP_Personality_getLocationString(&personality, location); sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xCoord(&personality), - BGP_Personality_yCoord(&personality), - BGP_Personality_zCoord(&personality)); + BGP_Personality_yCoord(&personality), + BGP_Personality_zCoord(&personality)); Tau_metadata_register("BGP Coords", bgpbuffer); Tau_metadata_register("BGP Processor ID", Kernel_PhysicalProcessorID()); sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xSize(&personality), - BGP_Personality_ySize(&personality), - BGP_Personality_zSize(&personality)); + BGP_Personality_ySize(&personality), + BGP_Personality_zSize(&personality)); Tau_metadata_register("BGP Size", bgpbuffer); @@ -341,128 +339,128 @@ int Tau_metadata_fillMetaData() { } sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_isTorusX(&personality), - BGP_Personality_isTorusY(&personality), - BGP_Personality_isTorusZ(&personality)); + BGP_Personality_isTorusY(&personality), + BGP_Personality_isTorusZ(&personality)); Tau_metadata_register("BGP isTorus", bgpbuffer); Tau_metadata_register("BGP DDRSize (MB)", BGP_Personality_DDRSizeMB(&personality)); -/* CHECK: - Tau_metadata_register("BGP DDRModuleType", personality.DDRModuleType); -*/ + /* CHECK: + Tau_metadata_register("BGP DDRModuleType", personality.DDRModuleType); + */ Tau_metadata_register("BGP Location", location); Tau_metadata_register("BGP rankInPset", BGP_Personality_rankInPset(&personality)); -/* - Tau_metadata_register("BGP numNodesInPset", Kernel_ProcessCount()); -*/ + /* + Tau_metadata_register("BGP numNodesInPset", Kernel_ProcessCount()); + */ Tau_metadata_register("BGP psetSize", BGP_Personality_psetSize(&personality)); Tau_metadata_register("BGP psetNum", BGP_Personality_psetNum(&personality)); Tau_metadata_register("BGP numPsets", BGP_Personality_numComputeNodes(&personality)); -/* CHECK: - sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xPsetSize(&personality), - BGP_Personality_yPsetSize(&personality), - BGP_Personality_zPsetSize(&personality)); - Tau_metadata_register("BGP PsetSize", bgpbuffer); - - sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xPsetOrigin(&personality), - BGP_Personality_yPsetOrigin(&personality), - BGP_Personality_zPsetOrigin(&personality)); - Tau_metadata_register("BGP PsetOrigin", bgpbuffer); - - sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xPsetCoord(&personality), - BGP_Personality_yPsetCoord(&personality), - BGP_Personality_zPsetCoord(&personality)); - Tau_metadata_register("BGP PsetCoord", bgpbuffer); -*/ + /* CHECK: + sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xPsetSize(&personality), + BGP_Personality_yPsetSize(&personality), + BGP_Personality_zPsetSize(&personality)); + Tau_metadata_register("BGP PsetSize", bgpbuffer); + + sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xPsetOrigin(&personality), + BGP_Personality_yPsetOrigin(&personality), + BGP_Personality_zPsetOrigin(&personality)); + Tau_metadata_register("BGP PsetOrigin", bgpbuffer); + + sprintf (bgpbuffer, "(%d,%d,%d)", BGP_Personality_xPsetCoord(&personality), + BGP_Personality_yPsetCoord(&personality), + BGP_Personality_zPsetCoord(&personality)); + Tau_metadata_register("BGP PsetCoord", bgpbuffer); + */ #endif /* TAU_BGP */ #ifdef TAU_BGQ -/* NOTE: Please refer to Scalasca's elg_pform_bgq.c [www.scalasca.org] for - details on IBM BGQ Axis mapping. */ - static int bgq_init = tau_bgq_init(); - char bgqbuffer[4096]; - static char tau_axis_map[] = "EFABCD"; - /* EF -> x, AB -> y, CD -> z */ + /* NOTE: Please refer to Scalasca's elg_pform_bgq.c [www.scalasca.org] for + details on IBM BGQ Axis mapping. */ + static int bgq_init = tau_bgq_init(); + char bgqbuffer[4096]; + static char tau_axis_map[] = "EFABCD"; + /* EF -> x, AB -> y, CD -> z */ + +#define TAU_BGQ_IDX(i) tau_axis_map[i] - 'A' - #define TAU_BGQ_IDX(i) tau_axis_map[i] - 'A' + int x = tau_torus_coord[TAU_BGQ_IDX(0)] * tau_torus_size[TAU_BGQ_IDX(1)] + + tau_torus_coord[TAU_BGQ_IDX(1)]; + int y = tau_torus_coord[TAU_BGQ_IDX(2)] * tau_torus_size[TAU_BGQ_IDX(3)] + + tau_torus_coord[TAU_BGQ_IDX(3)]; + int z = tau_torus_coord[TAU_BGQ_IDX(4)] * tau_torus_size[TAU_BGQ_IDX(5)] + + tau_torus_coord[TAU_BGQ_IDX(5)]; - int x = tau_torus_coord[TAU_BGQ_IDX(0)] * tau_torus_size[TAU_BGQ_IDX(1)] - + tau_torus_coord[TAU_BGQ_IDX(1)]; - int y = tau_torus_coord[TAU_BGQ_IDX(2)] * tau_torus_size[TAU_BGQ_IDX(3)] - + tau_torus_coord[TAU_BGQ_IDX(3)]; - int z = tau_torus_coord[TAU_BGQ_IDX(4)] * tau_torus_size[TAU_BGQ_IDX(5)] - + tau_torus_coord[TAU_BGQ_IDX(5)]; + sprintf(bgqbuffer, "(%d,%d,%d)", x,y,z); + Tau_metadata_register("BGQ Coords", bgqbuffer); - sprintf(bgqbuffer, "(%d,%d,%d)", x,y,z); - Tau_metadata_register("BGQ Coords", bgqbuffer); + int size_x = tau_torus_size[TAU_BGQ_IDX(0)] * tau_torus_size[TAU_BGQ_IDX(1)]; + int size_y = tau_torus_size[TAU_BGQ_IDX(2)] * tau_torus_size[TAU_BGQ_IDX(3)]; + int size_z = tau_torus_size[TAU_BGQ_IDX(4)] * tau_torus_size[TAU_BGQ_IDX(5)]; - int size_x = tau_torus_size[TAU_BGQ_IDX(0)] * tau_torus_size[TAU_BGQ_IDX(1)]; - int size_y = tau_torus_size[TAU_BGQ_IDX(2)] * tau_torus_size[TAU_BGQ_IDX(3)]; - int size_z = tau_torus_size[TAU_BGQ_IDX(4)] * tau_torus_size[TAU_BGQ_IDX(5)]; + sprintf(bgqbuffer, "(%d,%d,%d,%d,%d,%d)", tau_torus_size[0], tau_torus_size[1], tau_torus_size[2], + tau_torus_size[3], tau_torus_size[4], tau_torus_size[5]); + Tau_metadata_register("BGQ Size", bgqbuffer); - sprintf(bgqbuffer, "(%d,%d,%d,%d,%d,%d)", tau_torus_size[0], tau_torus_size[1], tau_torus_size[2], - tau_torus_size[3], tau_torus_size[4], tau_torus_size[5]); - Tau_metadata_register("BGQ Size", bgqbuffer); - - int wrap_x = tau_torus_wraparound[TAU_BGQ_IDX(0)] && tau_torus_wraparound[TAU_BGQ_IDX(1)]; - int wrap_y = tau_torus_wraparound[TAU_BGQ_IDX(2)] && tau_torus_wraparound[TAU_BGQ_IDX(3)]; - int wrap_z = tau_torus_wraparound[TAU_BGQ_IDX(4)] && tau_torus_wraparound[TAU_BGQ_IDX(5)]; + int wrap_x = tau_torus_wraparound[TAU_BGQ_IDX(0)] && tau_torus_wraparound[TAU_BGQ_IDX(1)]; + int wrap_y = tau_torus_wraparound[TAU_BGQ_IDX(2)] && tau_torus_wraparound[TAU_BGQ_IDX(3)]; + int wrap_z = tau_torus_wraparound[TAU_BGQ_IDX(4)] && tau_torus_wraparound[TAU_BGQ_IDX(5)]; - sprintf(bgqbuffer, "(%d,%d,%d)", wrap_x,wrap_y,wrap_z); - Tau_metadata_register("BGQ Period", bgqbuffer); + sprintf(bgqbuffer, "(%d,%d,%d)", wrap_x,wrap_y,wrap_z); + Tau_metadata_register("BGQ Period", bgqbuffer); - BG_UniversalComponentIdentifier uci = tau_bgq_personality.Kernel_Config.UCI; - unsigned int row, col, mp, nb, cc; - bg_decodeComputeCardOnNodeBoardUCI(uci, &row, &col, &mp, &nb, &cc); - sprintf(bgqbuffer, "R%x%x-M%d-N%02x-J%02x <%d,%d,%d,%d,%d>", row, col, mp, nb, cc, - tau_torus_coord[0], tau_torus_coord[1], tau_torus_coord[2], - tau_torus_coord[3], tau_torus_coord[4]); - Tau_metadata_register("BGQ Node Name", bgqbuffer); + BG_UniversalComponentIdentifier uci = tau_bgq_personality.Kernel_Config.UCI; + unsigned int row, col, mp, nb, cc; + bg_decodeComputeCardOnNodeBoardUCI(uci, &row, &col, &mp, &nb, &cc); + sprintf(bgqbuffer, "R%x%x-M%d-N%02x-J%02x <%d,%d,%d,%d,%d>", row, col, mp, nb, cc, + tau_torus_coord[0], tau_torus_coord[1], tau_torus_coord[2], + tau_torus_coord[3], tau_torus_coord[4]); + Tau_metadata_register("BGQ Node Name", bgqbuffer); - sprintf(bgqbuffer, "%ld", ((uci>>38)&0xFFFFF)); /* encode row,col,mp,nb,cc*/ - Tau_metadata_register("BGQ Node ID", bgqbuffer); + sprintf(bgqbuffer, "%ld", ((uci>>38)&0xFFFFF)); /* encode row,col,mp,nb,cc*/ + Tau_metadata_register("BGQ Node ID", bgqbuffer); - sprintf(bgqbuffer, "%ld", Kernel_PhysicalProcessorID()); - Tau_metadata_register("BGQ Physical Processor ID", bgqbuffer); + sprintf(bgqbuffer, "%ld", Kernel_PhysicalProcessorID()); + Tau_metadata_register("BGQ Physical Processor ID", bgqbuffer); - sprintf(bgqbuffer, "%d", tau_bgq_personality.Kernel_Config.FreqMHz); - Tau_metadata_register("CPU MHz", bgqbuffer); + sprintf(bgqbuffer, "%d", tau_bgq_personality.Kernel_Config.FreqMHz); + Tau_metadata_register("CPU MHz", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_GetJobID()); - Tau_metadata_register("BGQ Job ID", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_GetJobID()); + Tau_metadata_register("BGQ Job ID", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_ProcessorID()); - Tau_metadata_register("BGQ Processor ID", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_ProcessorID()); + Tau_metadata_register("BGQ Processor ID", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_PhysicalHWThreadID()); - Tau_metadata_register("BGQ Physical HW Thread ID", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_PhysicalHWThreadID()); + Tau_metadata_register("BGQ Physical HW Thread ID", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_ProcessCount()); - Tau_metadata_register("BGQ Process Count", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_ProcessCount()); + Tau_metadata_register("BGQ Process Count", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_ProcessorCount()); - Tau_metadata_register("BGQ Processor Count", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_ProcessorCount()); + Tau_metadata_register("BGQ Processor Count", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_MyTcoord()); - Tau_metadata_register("BGQ tCoord", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_MyTcoord()); + Tau_metadata_register("BGQ tCoord", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_ProcessorCoreID()); - Tau_metadata_register("BGQ Processor Core ID", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_ProcessorCoreID()); + Tau_metadata_register("BGQ Processor Core ID", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_ProcessorThreadID()); - Tau_metadata_register("BGQ Processor Thread ID", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_ProcessorThreadID()); + Tau_metadata_register("BGQ Processor Thread ID", bgqbuffer); - sprintf(bgqbuffer, "%d", Kernel_BlockThreadId()); - Tau_metadata_register("BGQ Block Thread ID", bgqbuffer); + sprintf(bgqbuffer, "%d", Kernel_BlockThreadId()); + Tau_metadata_register("BGQ Block Thread ID", bgqbuffer); - // Returns the Rank associated with the current process - sprintf(bgqbuffer, "%d", Kernel_GetRank()); - Tau_metadata_register("BGQ Rank", bgqbuffer); + // Returns the Rank associated with the current process + sprintf(bgqbuffer, "%d", Kernel_GetRank()); + Tau_metadata_register("BGQ Rank", bgqbuffer); - sprintf(bgqbuffer, "%d", tau_bgq_personality.DDR_Config.DDRSizeMB); - Tau_metadata_register("BGQ DDR Size (MB)", bgqbuffer); + sprintf(bgqbuffer, "%d", tau_bgq_personality.DDR_Config.DDRSizeMB); + Tau_metadata_register("BGQ DDR Size (MB)", bgqbuffer); #endif /* TAU_BGQ */ @@ -476,43 +474,47 @@ int Tau_metadata_fillMetaData() { if (f) { char line[4096]; while (Tau_util_readFullLine(line, f)) { - char *value = strstr(line,":"); + char const * value = strstr(line,":"); if (!value) { - break; + break; } else { - /* skip over colon */ - value += 2; + /* skip over colon */ + value += 2; } + // Allocates a string value = Tau_util_removeRuns(value); if (strncmp(line, "vendor_id", 9) == 0) { - Tau_metadata_register("CPU Vendor", value); + Tau_metadata_register("CPU Vendor", value); } if (strncmp(line, "vendor", 6) == 0) { - Tau_metadata_register("CPU Vendor", value); + Tau_metadata_register("CPU Vendor", value); } if (strncmp(line, "cpu MHz", 7) == 0) { - Tau_metadata_register("CPU MHz", value); + Tau_metadata_register("CPU MHz", value); } if (strncmp(line, "clock", 5) == 0) { - Tau_metadata_register("CPU MHz", value); + Tau_metadata_register("CPU MHz", value); } if (strncmp(line, "model name", 10) == 0) { - Tau_metadata_register("CPU Type", value); + Tau_metadata_register("CPU Type", value); } if (strncmp(line, "family", 6) == 0) { - Tau_metadata_register("CPU Type", value); + Tau_metadata_register("CPU Type", value); } if (strncmp(line, "cpu\t", 4) == 0) { - Tau_metadata_register("CPU Type", value); + Tau_metadata_register("CPU Type", value); } if (strncmp(line, "cache size", 10) == 0) { - Tau_metadata_register("Cache Size", value); + Tau_metadata_register("Cache Size", value); } if (strncmp(line, "cpu cores", 9) == 0) { - Tau_metadata_register("CPU Cores", value); + Tau_metadata_register("CPU Cores", value); } + + // Deallocates the string + free((void*)value); } fclose(f); } @@ -521,19 +523,22 @@ int Tau_metadata_fillMetaData() { if (f) { char line[4096]; while (Tau_util_readFullLine(line, f)) { - char *value = strstr(line,":"); + char const * value = strstr(line,":"); if (!value) { - break; + break; } else { - value += 2; + value += 2; } + // Allocates a string value = Tau_util_removeRuns(value); if (strncmp(line, "MemTotal", 8) == 0) { - Tau_metadata_register("Memory Size", value); + Tau_metadata_register("Memory Size", value); } + + free((void*)value); } fclose(f); } @@ -556,21 +561,21 @@ int Tau_metadata_fillMetaData() { char line[4096]; /* *CWL* - STL cannot be used in PGI init sections??? - std::ostringstream os; - - while (Tau_util_readFullLine(line, f)) { - if (os.str().length() != 0) { - os << " "; - } - os << line; - } - Tau_metadata_register("Command Line", os.str().c_str()); - */ + std::ostringstream os; + + while (Tau_util_readFullLine(line, f)) { + if (os.str().length() != 0) { + os << " "; + } + os << line; + } + Tau_metadata_register("Command Line", os.str().c_str()); + */ string os; // *CWL* - The following loop performs newline to space conversions while (Tau_util_readFullLine(line, f)) { if (os.length() != 0) { - os.append(" "); + os.append(" "); } os.append(string(line)); } @@ -595,14 +600,12 @@ static int writeMetaData(Tau_util_outputDevice *out, bool newline, int counter, if (newline) { endl = "\n"; } - Tau_util_output (out, "%s", endl); if (counter != -1) { Tau_XML_writeAttribute(out, "Metric Name", RtsLayer::getCounterName(counter), newline); } - // Write data from the Tau_metadata_register environment variable // char *tauMetaDataEnvVar = getenv("Tau_metadata_register"); // if (tauMetaDataEnvVar != NULL) { @@ -613,17 +616,16 @@ static int writeMetaData(Tau_util_outputDevice *out, bool newline, int counter, // } // } - // write out the user-specified (some from TAU) attributes - for (map::iterator it = Tau_metadata_getMetaData_task(tid).begin(); it != Tau_metadata_getMetaData_task(tid).end(); ++it) { - const char *name = it->first.c_str(); - const char *value = it->second.c_str(); + metadata_map_t const & metadata = Tau_metadata_getMetaData_task(tid); + for (metadata_map_t::const_iterator it=metadata.begin(); it != metadata.end(); it++) { + const char *name = it->first; + const char *value = it->second; Tau_XML_writeAttribute(out, name, value, newline); } Tau_util_output (out, "%s", endl); return 0; - } @@ -631,31 +633,28 @@ static int writeMetaData(Tau_util_outputDevice *out, bool newline, int counter, extern "C" void Tau_context_metadata(char *name, char *value) { - #ifdef TAU_DISABLE_METADATA return; -#endif - +#else // get the current calling context Profiler *current = TauInternal_CurrentProfiler(RtsLayer::getTid()); FunctionInfo *fi = current->ThisFunction; const char *fname = fi->GetName(); char *myName = (char*) malloc (strlen(name) + strlen(fname) + 10); - sprintf (myName, "%s => %s", fname, name); + sprintf(myName, "%s => %s", fname, name); char *myValue = strdup(value); RtsLayer::LockDB(); Tau_metadata_getMetaData()[myName] = myValue; RtsLayer::UnLockDB(); +#endif } extern "C" void Tau_phase_metadata(char *name, char *value) { - #ifdef TAU_DISABLE_METADATA return; -#endif - - #ifdef TAU_PROFILEPHASE +#else +#ifdef TAU_PROFILEPHASE // get the current calling context Profiler *current = TauInternal_CurrentProfiler(RtsLayer::getTid()); std::string myString = ""; @@ -675,9 +674,10 @@ extern "C" void Tau_phase_metadata(char *name, char *value) { RtsLayer::LockDB(); Tau_metadata_getMetaData()[myName] = myValue; RtsLayer::UnLockDB(); - #else +#else Tau_context_metadata(name, value); - #endif +#endif +#endif } @@ -715,16 +715,15 @@ int Tau_metadata_writeMetaData(FILE *fp, int counter, int tid) { } - - Tau_util_outputDevice *Tau_metadata_generateMergeBuffer() { Tau_util_outputDevice *out = Tau_util_createBufferOutputDevice(); Tau_util_output(out,"%d%c", Tau_metadata_getMetaData().size(), '\0'); - for (map::iterator it = Tau_metadata_getMetaData().begin(); it != Tau_metadata_getMetaData().end(); ++it) { - const char *name = it->first.c_str(); - const char *value = it->second.c_str(); + metadata_map_t const & metadata = Tau_metadata_getMetaData(); + for (metadata_map_t::const_iterator it=metadata.begin(); it != metadata.end(); it++) { + const char *name = it->first; + const char *value = it->second; Tau_util_output(out,"%s%c", name, '\0'); Tau_util_output(out,"%s%c", value, '\0'); } @@ -748,11 +747,12 @@ void Tau_metadata_removeDuplicates(char *buffer, int buflen) { const char *value = buffer; buffer = strchr(buffer, '\0')+1; - map::iterator iter = Tau_metadata_getMetaData().find(attribute); - if (iter != Tau_metadata_getMetaData().end()) { - const char *my_value = iter->second.c_str(); + metadata_map_t const & metadata = Tau_metadata_getMetaData(); + metadata_map_t::const_iterator it = metadata.find(attribute); + if (it != metadata.end()) { + const char *my_value = it->second; if (0 == strcmp(value, my_value)) { - Tau_metadata_getMetaData().erase(attribute); + Tau_metadata_getMetaData().erase(attribute); } } } diff --git a/src/Profile/TauMpi.c b/src/Profile/TauMpi.c index adc2dfb3d..9115f1465 100644 --- a/src/Profile/TauMpi.c +++ b/src/Profile/TauMpi.c @@ -1520,7 +1520,9 @@ int MPI_Finalize( ) Note that we do not want to shut down the timers as yet. There is still potentially life after MPI_Finalize where TAU is concerned. */ - TauProfiler_updateAllIntermediateStatistics(); + /* KAH - NO! this is the wrong time to do this. THis is also done in the + * snapshot writer. If you do it twice, you get double values for main... */ + //TauProfiler_updateAllIntermediateStatistics(); Tau_mergeProfiles(); } @@ -1658,7 +1660,9 @@ int *provided; TAU_METADATA("MPI Processor Name", procname); if (TauEnv_get_synchronize_clocks()) { - TauSyncClocks(procid_0, size); + TauSyncClocks(); + //TauSyncClocks takes no arguments. + //TauSyncClocks(procid_0, size); } return returnVal; @@ -1666,7 +1670,7 @@ int *provided; #endif /* TAU_MPI_THREADED */ - +#if 0 int MPI_Initialized( flag ) int * flag; { @@ -1682,6 +1686,7 @@ int * flag; return returnVal; } +#endif #ifdef TAU_ENABLE_MPI_WTIME diff --git a/src/Profile/TauMpiExtensions.c b/src/Profile/TauMpiExtensions.c index c7ec8e159..e45d513a9 100644 --- a/src/Profile/TauMpiExtensions.c +++ b/src/Profile/TauMpiExtensions.c @@ -2879,15 +2879,19 @@ int MPI_File_open( MPI_Comm comm, char * filename, int amode, MPI_Info info, MPI /****************************************************** *** MPI_File_open wrapper function ******************************************************/ -void MPI_FILE_OPEN( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr) +void MPI_FILE_OPEN( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr, int file_len) { MPI_Comm local_comm; MPI_Info local_info; MPI_File local_fh; + char *newfilename = (char *) malloc ((file_len +1) * sizeof(char)); + strncpy(newfilename, filename, file_len); + newfilename[file_len] = '\0'; local_comm = MPI_Comm_f2c(*comm); local_info = MPI_Info_f2c(*info); - *ierr = MPI_File_open( local_comm, filename, *amode, local_info, &local_fh) ; + *ierr = MPI_File_open( local_comm, newfilename, *amode, local_info, &local_fh) ; + free(newfilename); *fh = MPI_File_c2f(local_fh); return ; } @@ -2895,27 +2899,27 @@ void MPI_FILE_OPEN( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fi /****************************************************** *** MPI_File_open wrapper function ******************************************************/ -void mpi_file_open( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr) +void mpi_file_open( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr, int filename_len) { - MPI_FILE_OPEN( comm, filename, amode, info, fh, ierr) ; + MPI_FILE_OPEN( comm, filename, amode, info, fh, ierr, filename_len) ; return ; } /****************************************************** *** MPI_File_open wrapper function ******************************************************/ -void mpi_file_open_( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr) +void mpi_file_open_( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr, int filename_len) { - MPI_FILE_OPEN( comm, filename, amode, info, fh, ierr) ; + MPI_FILE_OPEN( comm, filename, amode, info, fh, ierr, filename_len) ; return ; } /****************************************************** *** MPI_File_open wrapper function ******************************************************/ -void mpi_file_open__( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr) +void mpi_file_open__( MPI_Fint * comm, char * filename, MPI_Fint * amode, MPI_Fint * info, MPI_Fint * fh, MPI_Fint * ierr, int filename_len) { - MPI_FILE_OPEN( comm, filename, amode, info, fh, ierr) ; + MPI_FILE_OPEN( comm, filename, amode, info, fh, ierr, filename_len) ; return ; } @@ -2994,37 +2998,41 @@ int MPI_File_delete( char * filename, MPI_Info info) /****************************************************** *** MPI_File_delete wrapper function ******************************************************/ -void MPI_FILE_DELETE( char * filename, MPI_Fint * info, MPI_Fint * ierr) +void MPI_FILE_DELETE( char * filename, MPI_Fint * info, MPI_Fint * ierr, int filename_length) { - MPI_Info local_info = MPI_Info_f2c(*info); - *ierr = MPI_File_delete( filename, local_info) ; + MPI_Info local_info = PMPI_Info_f2c(*info); + char *newfilename = (char *) malloc((filename_length + 1) * sizeof(char)); + strncpy (newfilename, filename, filename_length); + newfilename[filename_length] = '\0'; + *ierr = MPI_File_delete( newfilename, local_info) ; + free (newfilename); return ; } /****************************************************** *** MPI_File_delete wrapper function ******************************************************/ -void mpi_file_delete( char * filename, MPI_Fint * info, MPI_Fint * ierr) +void mpi_file_delete( char * filename, MPI_Fint * info, MPI_Fint * ierr, int filename_length) { - MPI_FILE_DELETE( filename, info, ierr) ; + MPI_FILE_DELETE( filename, info, ierr, filename_length) ; return ; } /****************************************************** *** MPI_File_delete wrapper function ******************************************************/ -void mpi_file_delete_( char * filename, MPI_Fint * info, MPI_Fint * ierr) +void mpi_file_delete_( char * filename, MPI_Fint * info, MPI_Fint * ierr, int filename_length) { - MPI_FILE_DELETE( filename, info, ierr) ; + MPI_FILE_DELETE( filename, info, ierr, filename_length) ; return ; } /****************************************************** *** MPI_File_delete wrapper function ******************************************************/ -void mpi_file_delete__( char * filename, MPI_Fint * info, MPI_Fint * ierr) +void mpi_file_delete__( char * filename, MPI_Fint * info, MPI_Fint * ierr, int filename_length) { - MPI_FILE_DELETE( filename, info, ierr) ; + MPI_FILE_DELETE( filename, info, ierr, filename_length) ; return ; } diff --git a/src/Profile/TauProfileMerge.cpp b/src/Profile/TauProfileMerge.cpp index 0fa64da7c..067ed435f 100644 --- a/src/Profile/TauProfileMerge.cpp +++ b/src/Profile/TauProfileMerge.cpp @@ -123,7 +123,7 @@ int Tau_mergeProfiles() { PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Comm_size(MPI_COMM_WORLD, &size); - buflen = Tau_snapshot_getBufferLength(); + buflen = Tau_snapshot_getBufferLength()+1; buf = (char *) malloc(buflen); Tau_snapshot_getBuffer(buf); @@ -293,7 +293,7 @@ int Tau_mergeProfiles() { } Tau_snapshot_writeMetaDataBlock(); - buflen = Tau_snapshot_getBufferLength(); + buflen = Tau_snapshot_getBufferLength()+1; buf = (char *) malloc(buflen); Tau_snapshot_getBuffer(buf); fwrite (buf, buflen, 1, f); @@ -389,8 +389,11 @@ int Tau_mergeProfiles() { } #endif /* TAU_UNIFY */ + fflush(f); + +#ifdef TAU_FCLOSE_MERGE fclose(f); - +#endif } else { /* recieve ok to go */ diff --git a/src/Profile/TauSampling.cpp b/src/Profile/TauSampling.cpp index 00028d588..7cedcfc47 100644 --- a/src/Profile/TauSampling.cpp +++ b/src/Profile/TauSampling.cpp @@ -1091,9 +1091,36 @@ void Tau_sampling_handle_sampleProfile(void *pc, ucontext_t *context) { double deltaValues[TAU_MAX_COUNTERS]; TauMetrics_getMetrics(tid, values); int localIndex = tid*TAU_MAX_COUNTERS; + + int ebsSourceMetricIndex = + TauMetrics_getMetricIndexFromName(TauEnv_get_ebs_source()); + // printf("%s\n", TauMetrics_getMetricName(ebsSourceMetricIndex)); + int ebsPeriod = TauEnv_get_ebs_period(); for (int i = 0; i < Tau_Global_numCounters; i++) { - deltaValues[i] = values[i] - previousTimestamp[localIndex + i]; - previousTimestamp[localIndex + i] = values[i]; + /* + if (previousTimestamp[localIndex + i] == 0) { + // "We don't believe you!". Should only happen for non EBS_SOURCE + // metrics. Hypothesis - the first sample would find the + // previousTimestamp for events unset. + previousTimestamp[localIndex + i] == profiler->StartTime[i]; + } + */ + if ((ebsSourceMetricIndex == i) && (values[i] < ebsPeriod)) { + // "We don't believe you either!". Should only happen for EBS_SOURCE. + // Hypothesis: Triggering PAPI overflows resets the values to 0. + // (or close to 0). + deltaValues[i] = ebsPeriod; + previousTimestamp[localIndex + i] += ebsPeriod; + } else { + deltaValues[i] = values[i] - previousTimestamp[localIndex + i]; + /* + printf("[%s] tid=%d ctr=%d, Delta computed as %f minus %lld = %f\n", + samplingContext->GetName(), + tid, i, + values[i], previousTimestamp[localIndex + i], deltaValues[i]); + */ + previousTimestamp[localIndex + i] = values[i]; + } } samplingContext->addPcSample(pcStack, tid, deltaValues); @@ -1123,7 +1150,40 @@ void Tau_sampling_event_start(int tid, void **addresses) { #endif /* TAU_UNWIND */ if (TauEnv_get_profiling()) { - // nothing for now + // *CWL* - 8/18/2012. The new way of measuring a sample's contribution + // (in light of the uneven distribution of samples in threads) + // necessitates the use of a measured event's time stamp to + // serve as a bounding value for subsequent deductions. + // + // Note that this is still a fudge. In the face of limited + // measured events, this can end up accounting metric + // contributions to samples that can sometimes seem bizarre. + // (e.g., Source=PAPI_TOT_CYC, Metric=PAPI_FP_OPS can result + // in strange attribution of values to samples depending on + // the interplay of high FLOPS/s events and low FLOPS/s + // events). + // + // Without handling the event boundaries, another observed + // (bad) effect is in cases where PAPI_FP_OPS is used as + // TAU_EBS_SOURCE. A good chunk of the events leading up + // to a reasonable period of say 1,000,000 FP_OPS as a + // sample are likely to do very little FP_OPS. However, + // at the first sample, the deltas computed for the sample's + // TIME metric are likely to stretch all the way back to + // main() if event boundary limits are not established. + // + // Statistical sampling, being what it is, can never avoid + // this fudging. The previous approach of counting had the + // advantage of limiting the fudge factor to some factor of + // TAU_EBS_PERIOD. + + double values[TAU_MAX_COUNTERS]; + TauMetrics_getMetrics(tid, values); + int localIndex = tid*TAU_MAX_COUNTERS; + for (int i = 0; i < Tau_Global_numCounters; i++) { + previousTimestamp[localIndex + i] = values[i]; + } + } Tau_global_decr_insideTAU_tid(tid); } @@ -1257,6 +1317,21 @@ int Tau_sampling_init(int tid) { static struct itimerval itval; +#ifdef TAU_BGQ + static bool warningPrinted = false; + // *CWL* - Vesta is having issues translating PC addresses now. + // This warning is issued as a part of a punt for EBS + // support on the BGQ for the August 2012 release. + // + // Please remove this check after the problem is fixed. + int myNode = RtsLayer::TheNode(); + if ((myNode <= 0) && (tid == 0)) { + // Only one process will print this warning exactly once on thread 0. (Node 0 or -1). + printf("Warning: No current EBS support for the BlueGene/Q. No Samples will be recorded.\n"); + warningPrinted = true; + return -1; + } +#endif /* TAU_BGQ */ Tau_global_incr_insideTAU_tid(tid); @@ -1430,6 +1505,9 @@ int Tau_sampling_init(int tid) { } TAU_VERBOSE("Tau_sampling_init: pid = %d, tid = %d setitimer called.\n", getpid(), tid); + + /* + *CWL* - 8/18/2012. I think this is an unnecessarily strict check. if (ovalue.it_interval.tv_sec != pvalue.it_interval.tv_sec || ovalue.it_interval.tv_usec != pvalue.it_interval.tv_usec || ovalue.it_value.tv_sec != pvalue.it_value.tv_sec || @@ -1438,6 +1516,7 @@ int Tau_sampling_init(int tid) { fprintf(stderr,"[tid = %d]: %d %d %d %d, %d %d %d %d.\n", tid, ovalue.it_interval.tv_sec, ovalue.it_interval.tv_usec, ovalue.it_value.tv_sec, ovalue.it_value.tv_usec, pvalue.it_interval.tv_sec, pvalue.it_interval.tv_usec, pvalue.it_value.tv_sec, pvalue.it_value.tv_usec); return -1; } + */ TAU_VERBOSE("Tau_sampling_init: pid = %d, tid = %d Signals set up.\n", getpid(), tid); // set up the base timers diff --git a/src/Profile/TauShmemOpenShmemC.c b/src/Profile/TauShmemOpenShmemC.c index a0931b6c5..de3593f4d 100644 --- a/src/Profile/TauShmemOpenShmemC.c +++ b/src/Profile/TauShmemOpenShmemC.c @@ -2634,6 +2634,7 @@ int shmem_version(int * a1, int * a2) { } +#ifdef TAU_OPENSHMEM_EXTENSION_1 /********************************************************** shmem_malloc **********************************************************/ @@ -2694,6 +2695,7 @@ void * shmem_memalign(size_t a1, size_t a2) { return retval; } +#endif /* TAU_OPENSHMEM_EXTENSION_1 */ /********************************************************** @@ -2714,6 +2716,7 @@ char * sherror() { +#ifdef TAU_OPENSHMEM_EXTENSION_1 /********************************************************** shmem_error **********************************************************/ @@ -2728,6 +2731,7 @@ char * shmem_error() { return retval; } +#endif /* TAU_OPENSHMEM_EXTENSION_1 */ /********************************************************** diff --git a/src/Profile/TauTranslateRank.cpp b/src/Profile/TauTranslateRank.cpp index 48a254ad6..518a47472 100644 --- a/src/Profile/TauTranslateRank.cpp +++ b/src/Profile/TauTranslateRank.cpp @@ -7,7 +7,9 @@ using namespace std; typedef std::map rank_map; typedef std::map comm_map; -comm_map comms; +// this is STATIC, because otherwise it can get freed by std::map more than once, +// when using tau_exec on an instrumented program. +static comm_map comms; extern "C" diff --git a/src/Profile/TauUtil.cpp b/src/Profile/TauUtil.cpp index 6f666a8e1..50d673c65 100644 --- a/src/Profile/TauUtil.cpp +++ b/src/Profile/TauUtil.cpp @@ -37,7 +37,8 @@ void TAU_ABORT(const char *format, ...) { /********************************************************************* * Create an buffer output device ********************************************************************/ -Tau_util_outputDevice *Tau_util_createBufferOutputDevice() { +Tau_util_outputDevice *Tau_util_createBufferOutputDevice() +{ Tau_util_outputDevice *out = (Tau_util_outputDevice*) TAU_UTIL_MALLOC (sizeof(Tau_util_outputDevice)); if (out == NULL) { return NULL; @@ -45,7 +46,7 @@ Tau_util_outputDevice *Tau_util_createBufferOutputDevice() { out->type = TAU_UTIL_OUTPUT_BUFFER; out->bufidx = 0; out->buflen = TAU_UTIL_INITIAL_BUFFER; - out->buffer = (char *) malloc (out->buflen); + out->buffer = (char *)malloc(out->buflen + 1); return out; } @@ -107,41 +108,46 @@ int Tau_util_readFullLine(char *line, FILE *fp) { while ( (ch = fgetc(fp)) && ch != EOF && ch != (int) '\n') { line[i++] = (unsigned char) ch; } + // Be careful to check that line is large enough: + // sizeof(line) == strlen(str) + 1 line[i] = '\0'; return i; } /********************************************************************* - * Replaces all the runs of spaces with a single space in a string. - * This modifies the string, but the user should use the return string - * because the pointer may change while removing leading whitespace. + * Duplicates a string and replaces all the runs of spaces with a + * single space. ********************************************************************/ -char *Tau_util_removeRuns(char *str) { - int i, idx; - int len; - - if (!str) { - return str; /* do nothing with a null string */ +char const * Tau_util_removeRuns(char const * spaced_str) +{ + if (!spaced_str) { + return spaced_str; /* do nothing with a null string */ } - // also remove leading whitespace - while (*str && *str == ' ') { - str++; + // Skip over spaces at start of string + while (*spaced_str && *spaced_str == ' ') { + ++spaced_str; } - len = strlen(str); - for (i=0; ienabled == 1) { + if (offsetInfo->enabled) { timestamp = timestamp - offsetInfo->beginOffset + offsetInfo->syncOffset; - return timestamp; - } else { - // return 0 until sync'd - return 0.0; - } + } + return timestamp; } @@ -96,12 +94,12 @@ x_uint64 TauTraceGetTimeStamp(int tid) { // RtsLayer::getUSecD(tid, tracerValues); // double value = tracerValues[0]; - x_uint64 value = (x_uint64) TauMetrics_getTraceMetricValue(tid); + x_uint64 value = (x_uint64)TauMetrics_getTraceMetricValue(tid); if (TauEnv_get_synchronize_clocks()) { - return (x_uint64) TauSyncAdjustTimeStamp(value); + return (x_uint64)TauSyncAdjustTimeStamp(value); } else { - return (x_uint64) value; + return value; } } @@ -177,7 +175,10 @@ void TauTraceFlushBuffer(int tid) { if (TauTraceFd[tid] == -1) { printf("Error: TauTraceFlush(%d): Fd is -1. Trace file not initialized \n", tid); if (RtsLayer::myNode() == -1) { - fprintf (stderr, "ERROR in configuration. Trace file not initialized. If this is an MPI application, please ensure that TAU MPI wrapper library is linked. If not, please ensure that TAU_PROFILE_SET_NODE(id); is called in the program (0 for sequential).\n"); + fprintf (stderr, + "TAU: ERROR in configuration. Trace file not initialized.\n" + "TAU: If this is an MPI application, please ensure that TAU MPI wrapper library is linked.\n" + "TAU: If not, please ensure that TAU_PROFILE_SET_NODE(id); is called in the program (0 for sequential).\n"); exit(1); } } @@ -223,10 +224,11 @@ int TauTraceInit(int tid) { if (!TauBufferAllocated()[tid]) { TauMaxTraceRecords = (unsigned long long) TauEnv_get_max_records(); TauBufferSize = sizeof(TAU_EV)*TauMaxTraceRecords; - //TraceBuffer[tid] = (TAU_EV*) malloc(TAU_BUFFER_SIZE); TraceBuffer[tid] = (TAU_EV*) malloc(TauBufferSize); if (TraceBuffer[tid] == (TAU_EV *) NULL) { - fprintf(stderr, "TAU: FATAL Error: Trace buffer malloc failed. Please rerun the application with the TAU_MAX_RECORDS environment variable set to a smaller value\n"); + fprintf(stderr, + "TAU: FATAL Error: Trace buffer malloc failed.\n" + "TAU: Please rerun the application with the TAU_MAX_RECORDS environment variable set to a smaller value\n"); exit(1); } TauBufferAllocated()[tid] = true; @@ -404,9 +406,7 @@ void TauTraceClose(int tid) { // to tracing the current profiler ////////////////////////////////////////////////////////////////////// void TraceCallStack(int tid, Profiler *current) { - if (current == 0) { - return; - } else { + if (current) { // Trace all the previous records before tracing self TraceCallStack(tid, current->ParentProfiler); TauTraceEventSimple(current->ThisFunction->GetFunctionId(), 1, tid); @@ -610,62 +610,7 @@ void TauTraceOneSidedMsg(bool type, GpuEvent *gpu, int length, int threadId) #endif -////////////////////////////////////////////////////////////////////// -// TraceSendMsg traces the message send -////////////////////////////////////////////////////////////////////// -void TauTraceSendMsgOld(int type, int destination, int length) { - x_int64 parameter; - x_uint64 xother, xtype, xlength, xcomm; - - if (RtsLayer::isEnabled(TAU_MESSAGE)) { - parameter = 0; - /* for send, othernode is receiver or destination */ - xtype = type; - xlength = length; - xother = destination; - xcomm = 0; - - /* Format for parameter is - 63 ..... 56 55 ..... 48 47............. 32 - other type length - - These are the high order bits, below are the low order bits - - 31 ..... 24 23 ..... 16 15..............0 - other type length - e.g. - - xtype = 0xAABB; - xother = 0xCCDD; - xlength = 0xDEADBEEF; - result = 0xccaaDEADdddbbBEEF - - parameter = ((xlength >> 16) << 32) | - ((xtype >> 8 & 0xFF) << 48) | - ((xother >> 8 & 0xFF) << 56) | - (xlength & 0xFFFF) | - ((xtype & 0xFF) << 16) | - ((xother & 0xFF) << 24); - - */ - - parameter = (xlength >> 16 << 54 >> 22) | - ((xtype >> 8 & 0xFF) << 48) | - ((xother >> 8 & 0xFF) << 56) | - (xlength & 0xFFFF) | - ((xtype & 0xFF) << 16) | - ((xother & 0xFF) << 24) | - (xcomm << 58 >> 16); - - TauTraceEventSimple(TAU_MESSAGE_SEND, parameter, RtsLayer::myThread()); - } -} - - -////////////////////////////////////////////////////////////////////// -// TraceRecvMsg traces the message recv -////////////////////////////////////////////////////////////////////// extern "C" void TauTraceMsg(int send_or_recv, int type, int other_id, int length, x_uint64 ts, int use_ts, int node_id) { x_int64 parameter; x_uint64 xother, xtype, xlength, xcomm; @@ -733,7 +678,7 @@ void TauTraceSendMsg(int type, int destination, int length) { ////////////////////////////////////////////////////////////////////// -// TauTraceRecvMsgRemote traces the message recv for a remote RMA operation +// TauTraceRecvMsgRemote traces the message recv for an RMA operation ////////////////////////////////////////////////////////////////////// void TauTraceRecvMsgRemote(int type, int source, int length, int remote_id) { TauTraceMsg(TAU_MESSAGE_RECV, type, source, length, 0, 0, remote_id); @@ -741,7 +686,7 @@ void TauTraceRecvMsgRemote(int type, int source, int length, int remote_id) { } ////////////////////////////////////////////////////////////////////// -// TraceSendMsgRemote traces the message send for a remote RMA operation +// TraceSendMsgRemote traces the message send for an RMA operation ////////////////////////////////////////////////////////////////////// void TauTraceSendMsgRemote(int type, int destination, int length, int remote_id) { TauTraceMsg(TAU_MESSAGE_SEND, type, destination, length, 0, 0, remote_id); diff --git a/src/wrappers/dmapp/dmapp_wrap.c b/src/wrappers/dmapp/dmapp_wrap.c index 65bcdfcaf..730053534 100644 --- a/src/wrappers/dmapp/dmapp_wrap.c +++ b/src/wrappers/dmapp/dmapp_wrap.c @@ -8,8 +8,8 @@ int tau_dmapp_tagid=0 ; extern int TAUDECL tau_totalnodes(int set_or_get, int value); #define dprintf if (0) printf -#define TAU_DMAPP_TAGID tau_dmapp_tagid=tau_dmapp_tagid%250 -#define TAU_DMAPP_TAGID_NEXT (++tau_dmapp_tagid) % 250 +#define TAU_DMAPP_TAGID (tau_dmapp_tagid = (tau_dmapp_tagid & 255)) +#define TAU_DMAPP_TAGID_NEXT ((++tau_dmapp_tagid) & 255) int Tau_get_dmapp_size(dmapp_type_t data) { int size; @@ -505,11 +505,11 @@ dmapp_return_t __wrap_dmapp_put_ixpe_nb(void * a1, dmapp_seg_desc_t * a2, dmapp TAU_PROFILE_START(t); for (i = 0; i < a4; i ++) { /* a3 contains target_pe_list and a4 is num_target_pes */ - TAU_TRACE_SENDMSG((TAU_DMAPP_TAGID+1+i)%250, a3[i], len); + TAU_TRACE_SENDMSG((TAU_DMAPP_TAGID+1+i)&255, a3[i], len); } retval = __real_dmapp_put_ixpe_nb(a1, a2, a3, a4, a5, a6, a7, a8); for (i = 0; i < a4; i ++) { - TAU_TRACE_RECVMSG_REMOTE((TAU_DMAPP_TAGID+1+i)%250, Tau_get_node(), len, a3[i]); + TAU_TRACE_RECVMSG_REMOTE((TAU_DMAPP_TAGID+1+i)&255, Tau_get_node(), len, a3[i]); } TAU_PROFILE_STOP(t); return retval; @@ -533,12 +533,12 @@ dmapp_return_t __wrap_dmapp_put_ixpe_nbi(void * a1, dmapp_seg_desc_t * a2, dmap for (i = 0; i < a4; i ++) { /* a3 contains target_pe_list and a4 is num_target_pes */ dprintf("__wrap_dmapp_put_ixpe_nbi:<%d>: Before sendmsg: i = %d, a3[i] = %d\n", Tau_get_node(), i, a3[i]); - TAU_TRACE_SENDMSG((TAU_DMAPP_TAGID+1+i)%250, a3[i], len); + TAU_TRACE_SENDMSG((TAU_DMAPP_TAGID+1+i)&255, a3[i], len); } retval = __real_dmapp_put_ixpe_nbi(a1, a2, a3, a4, a5, a6, a7); for (i = 0; i < a4; i ++) { dprintf("__wrap_dmapp_put_ixpe_nbi:<%d>: i = %d, a3[i] = %d\n", Tau_get_node(), i, a3[i]); - TAU_TRACE_RECVMSG_REMOTE((TAU_DMAPP_TAGID+1+i)%250, Tau_get_node(), len, a3[i]); + TAU_TRACE_RECVMSG_REMOTE((TAU_DMAPP_TAGID+1+i)&255, Tau_get_node(), len, a3[i]); } TAU_PROFILE_STOP(t); return retval; @@ -560,11 +560,11 @@ dmapp_return_t __wrap_dmapp_put_ixpe(void * a1, dmapp_seg_desc_t * a2, dmapp_pe TAU_PROFILE_START(t); for (i = 0; i < a4; i ++) { /* a3 contains target_pe_list and a4 is num_target_pes */ - TAU_TRACE_SENDMSG((TAU_DMAPP_TAGID+1+i)%250, a3[i], len); + TAU_TRACE_SENDMSG((TAU_DMAPP_TAGID+1+i)&255, a3[i], len); } retval = __real_dmapp_put_ixpe(a1, a2, a3, a4, a5, a6, a7); for (i = 0; i < a4; i ++) { - TAU_TRACE_RECVMSG_REMOTE((TAU_DMAPP_TAGID+1+i)%250, Tau_get_node(), len, a3[i]); + TAU_TRACE_RECVMSG_REMOTE((TAU_DMAPP_TAGID+1+i)&255, Tau_get_node(), len, a3[i]); } TAU_PROFILE_STOP(t); return retval; diff --git a/src/wrappers/taupreload/Makefile b/src/wrappers/taupreload/Makefile index 1d8ae0e38..444db9870 100644 --- a/src/wrappers/taupreload/Makefile +++ b/src/wrappers/taupreload/Makefile @@ -87,7 +87,7 @@ dl_auditor.o: dl_auditor.c $(CC) $(CFLAGS) -c dl_auditor.c libTAU-dl-auditor$(TAU_SHLIBX): dl_auditor.o - $(CC) -lc -ldl $(TAU_SHFLAGS) libTAU-dl-auditor$(TAU_SHLIBX) dl_auditor.o + $(CC) -lc -ldl $(TAU_SHFLAGS) libTAU-dl-auditor$(TAU_SHLIBX) dl_auditor.o -ldl clean: /bin/rm -f taupreload.o libTAU-preload$(TAU_SHLIBX) dl_auditor.o libTAU-dl-auditor$(TAU_SHLIBX) diff --git a/src/wrappers/upc/bupc/upcr/Makefile b/src/wrappers/upc/bupc/upcr/Makefile index d17042fc4..8c1746ed7 100644 --- a/src/wrappers/upc/bupc/upcr/Makefile +++ b/src/wrappers/upc/bupc/upcr/Makefile @@ -3,7 +3,7 @@ include ../../../../../include/Makefile upcdir=`which upcc | sed -e 's@bin/upcc@@g'` install: - TAU_MAKEFILE=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/lib/Makefile.tau$(TAU_CONFIG) PATH=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/bin:${PATH} ./tau_upc_runtime_wrapper tau_upcr.h -lm -gasnet=${TAU_GASNET_DIR} -gasnetconduit=${UPCNETWORK}-conduit -upc=${upcdir}/opt_inst + TAU_MAKEFILE=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/lib/Makefile.tau$(TAU_CONFIG) PATH=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/bin:${PATH} ./tau_upc_runtime_wrapper tau_upcr.h -lm -gasnet=${TAU_GASNET_DIR} -gasnetconduit=${UPCNETWORK} -upc=${upcdir}/opt_inst @echo "DONE......!" clean: diff --git a/src/wrappers/upc/bupc/upcr/README b/src/wrappers/upc/bupc/upcr/README deleted file mode 100644 index cda29bb51..000000000 --- a/src/wrappers/upc/bupc/upcr/README +++ /dev/null @@ -1,5 +0,0 @@ -Sample usage for this directory: -./configure -gasnet=/usr/local/packages/gasnet-1.18.0-mpi-smp -mpi -pdt=/usr/local/packages/pdt -bfd=download -iowrapper -upcnetwork=mpi -upc=upcc -make install -tau_upc_runtime_wrapper tau_upcr.h -lm -gasnet=/usr/local/packages/gasnet-1.18.0-mpi-smp -gasnetconduit=mpi-conduit -upc=/usr/local/packages/bupc-2.14.2/opt_inst - diff --git a/src/wrappers/upc/bupc/upcr/tau_upc_runtime_wrapper b/src/wrappers/upc/bupc/upcr/tau_upc_runtime_wrapper index 25ebf1457..fe4135d97 100755 --- a/src/wrappers/upc/bupc/upcr/tau_upc_runtime_wrapper +++ b/src/wrappers/upc/bupc/upcr/tau_upc_runtime_wrapper @@ -14,15 +14,15 @@ usage() echo " -upc= : specifies the upc directory" echo "NOTE: $cmd reads the TAU_MAKEFILE environment variable to get PDT settings" echo " Example: " - echo " % $cmd tau_upcr.h -lm -gasnet=/usr/local/packages/gasnet-1.18.0-mpi-smp -gasnetconduit=mpi-conduit -upc=/usr/local/packages/bupc-2.14.2/opt_inst" + echo " % $cmd tau_upcr.h -lm -gasnet=/usr/local/packages/gasnet-1.18.0-mpi-smp -gasnetconduit=mpi -upc=/usr/local/packages/bupc-2.14.2/opt_inst" echo " generates a wrapper library that may be linked in using TAU_OPTIONS -optTauWrapFile=/link_options.tau" echo "" exit 1 } -upcdir=/usr/local/packages/bupc-2.14.2/opt_inst -gasnetdir=/usr/local/packages/gasnet-1.18.0-mpi-smp -gasnetconduit=mpi-conduit +upcdir=/usr/local/packages/bupc/opt_inst +gasnetdir=/usr/local/packages/gasnet +gasnetconduit=mpi runtime_specified=0; header_redirection_specified=0; headerfile="" @@ -35,31 +35,31 @@ fi if [ $# -gt 1 ]; then headerfile=$1 library=$2 - shift; - shift; + shift + shift fi for arg in "$@"; do case $arg in -r) runtime_specified=1; - shift; + shift ;; -d) header_redirection_specified=1; - shift; + shift ;; -gasnet=*) gasnetdir=`echo $arg | sed -e 's/-gasnet=//g'` - shift; + shift ;; -gasnetconduit=*) - gasnetconduit=`echo $arg | sed -e 's/-gasnetconduit=//g'` - shift; + gasnetconduit="`echo $arg | sed -e 's/-gasnetconduit=//g'`-conduit" + shift ;; -upc=*) upcdir=`echo $arg | sed -e 's/-upc=//g'` - shift; + shift ;; esac done @@ -74,10 +74,24 @@ if [ ! -r $TAU_MAKEFILE ] ;then exit 1 fi +if [ ! -d "$upcdir" ] ; then + echo "ERROR: invalid UPC directory: $upcdir" + exit 1 +fi -pdtdir=`grep PDTDIR= $TAU_MAKEFILE | sed -e 's/PDTDIR=//g' ` -tauarch=`tau-config | grep TAUARCH | sed -e 's@TAUARCH=@@g' ` -basedir=`tau-config | grep BASEDIR | sed -e 's@BASEDIR=@@g' ` +if [ ! -d "$gasnetdir" ] ; then + echo "ERROR: invalid GASNet directory: $gasnetdir" + exit 1 +fi + +if [ ! -d "$gasnetdir/include/$gasnetconduit" ] ; then + echo "ERROR: GASNet conduit $gasnetconduit not found in $gasnetdir" + exit 1 +fi + +pdtdir="`grep PDTDIR= $TAU_MAKEFILE | sed -e 's/PDTDIR=//g' `" +tauarch="`tau-config | grep TAUARCH | sed -e 's@TAUARCH=@@g' `" +basedir="`tau-config | grep BASEDIR | sed -e 's@BASEDIR=@@g' `" parser=$pdtdir/$tauarch/bin/upcparse if [ ! -x $parser ]; then @@ -85,9 +99,10 @@ if [ ! -x $parser ]; then exit 1 fi baseheader=`basename $headerfile .h` + +# Parse the header file to create pdb file echo $parser $headerfile -DGASNET_SEQ -I$upcdir/include -DUPCRI_BUILDING_LIBUPCR=1 -I$gasnetdir/include -I$gasnetdir/include/$gasnetconduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS $parser $headerfile -DGASNET_SEQ -I$upcdir/include -DUPCRI_BUILDING_LIBUPCR=1 -I$gasnetdir/include -I$gasnetdir/include/$gasnetconduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS - mv $baseheader.pdb $headerfile.pdb if [ $runtime_specified = 1 ] ; then @@ -100,38 +115,37 @@ else fi fi -#echo "tau_wrap $headerfile.pdb $headerfile -o wr.c $runtimeopt $library $*" # Generate select.tau - -rm -f select.tau echo "BEGIN_INCLUDE_LIST" > select.tau -nm -A $upcdir/lib/*.a | grep " _upcr" | awk '{ print $3;}' | sed -e 's/_upcr/"# _upcr/g' -e 's/$/(#"/g' >> select.tau -nm -A $upcdir/lib/*.a | grep " UPCRL" | awk '{ print $3;}' | sed -e 's/UPCRL/"# UPCRL/g' -e 's/$/(#"/g' >> select.tau -nm -A $upcdir/lib/*.a | grep " upcr" | awk '{ print $3;}' | sed -e 's/upcr/"# upcr/g' -e 's/$/(#"/g' >> select.tau -nm -A $upcdir/lib/*.a | grep " _bupc_" | awk '{ print $3;}' | sed -e 's/_bupc_/"# _bupc_/g' -e 's/$/(#"/g' >> select.tau -#nm -A $upcdir/lib/*.a | grep " bupc_" | awk '{ print $3;}' | sed -e 's/bupc_/"# bupc_/g' -e 's/$/(#"/g' >> select.tau +for sym in _upcr UPCRL upcr _bupc_ ; do + nm -A $upcdir/lib/*.a | grep " $sym" | awk '{ print $3;}' | sed -e "s/$sym/\"# $sym/g" -e 's/$/(#"/g' >> select.tau +done echo "END_INCLUDE_LIST" >> select.tau -$basedir/bin/tau_wrap $headerfile.pdb $headerfile -o wr.c $runtimeopt $library $* -f select.tau -dirname=`basename ${headerfile} .h`_wrapper + +# Generate wr.c +echo $basedir/bin/tau_wrap $headerfile.pdb $headerfile -o wr.c $runtimeopt $library $* -f select.tau --upc berkeley +$basedir/bin/tau_wrap $headerfile.pdb $headerfile -o wr.c $runtimeopt $library $* -f select.tau --upc berkeley +headerbase=`basename $headerfile .h` +dirname="${headerbase}_wrapper" +libname="lib${headerbase}_wrap.a" # replace bupc_smemvec_t [] a with bupc_smemvec_t * a sed -e 's/\[\]/\*/g' ${dirname}/wr.c > ${dirname}/wr.c~ mv ${dirname}/wr.c~ ${dirname}/wr.c -# Fix the Makefile with the correct smp args. -sed -e "s@\$(TAU_MPI_INCLUDE)@\$(TAU_MPI_INCLUDE) -DGASNET_SEQ -I$upcdir/include -DUPCRI_BUILDING_LIBUPCR=1 -I$gasnetdir/include -I$gasnetdir/include/$gasnetconduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS @g" ${dirname}/Makefile > ${dirname}/Makefile~ -mv ${dirname}/Makefile~ ${dirname}/Makefile +# Add correct paths and arguments to Makefile +sed -e "s@^EXTRA_FLAGS=@EXTRA_FLAGS=-DGASNET_SEQ -I$upcdir/include -DUPCRI_BUILDING_LIBUPCR=1 -I$gasnetdir/include -I$gasnetdir/include/$gasnetconduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS@" $dirname/Makefile > $dirname/Makefile~ +mv $dirname/Makefile~ $dirname/Makefile -sed -e 's/-Wl,-wrap/-Wl,-Wl,-wrap/g' -e 's@-L.*$@-ltau_upcr_wrap@g' ${dirname}/link_options.tau > ${dirname}/link_options.tau~ +# Add library to link options +sed -e "s/@@WRAPPER_LIBRARY@@/-l${headerbase}_wrap/" $dirname/link_options.tau.master > $dirname/link_options.tau -mv ${dirname}/link_options.tau~ ${dirname}/link_options.tau -cd ${dirname} +cd $dirname make cd .. rm -f ${headerfile}.pdb -mkdir -p $basedir/lib/wrappers/upc mkdir -p $basedir/lib/wrappers/upc/bupc -echo cp ${dirname}/link_options.tau $basedir/lib/wrappers/upc/bupc -cp ${dirname}/link_options.tau $basedir/lib/wrappers/upc/bupc -echo cp ${dirname}/libtau_upcr_wrap.a $basedir/lib/ -cp ${dirname}/libtau_upcr_wrap.a $basedir/lib/ +echo cp $dirname/link_options.tau $basedir/lib/wrappers/upc/bupc +cp $dirname/link_options.tau $basedir/lib/wrappers/upc/bupc +echo cp $dirname/$libname $basedir/lib/ +cp $dirname/$libname $basedir/lib/ diff --git a/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/Makefile b/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/Makefile index 19b0d2860..d8f52146a 100644 --- a/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/Makefile +++ b/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/Makefile @@ -1,13 +1,16 @@ include ${TAU_MAKEFILE} CC=$(TAU_CC) +CFLAGS=$(TAU_DEFS) $(TAU_INCLUDE) $(TAU_MPI_INCLUDE) -I.. +EXTRA_FLAGS= + +AR=$(TAU_AR) ARFLAGS=rcv -CFLAGS=$(TAU_DEFS) $(TAU_INTERNAL_FLAG1) $(TAU_INCLUDE) $(TAU_MPI_INCLUDE) -DGASNET_SEQ -I/usr/local/packages/bupc//opt_inst/include -DUPCRI_BUILDING_LIBUPCR=1 -I/usr/local/packages/gasnet-1.18.0-mpi-smp/include -I/usr/local/packages/gasnet-1.18.0-mpi-smp/include/mpi-conduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS -I.. libtau_upcr_wrap.a: tau_upcr_wrap.o - $(TAU_AR) $(ARFLAGS) $@ $< + $(AR) $(ARFLAGS) $@ $< tau_upcr_wrap.o: wr.c - $(CC) $(CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(EXTRA_FLAGS) -c $< -o $@ clean: /bin/rm -f tau_upcr_wrap.o libtau_upcr_wrap.a diff --git a/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/link_options.tau.master b/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/link_options.tau.master new file mode 100644 index 000000000..104a7c2fb --- /dev/null +++ b/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/link_options.tau.master @@ -0,0 +1 @@ +-Wl,-Wl,-wrap,_bupc_thread_distance -Wl,-Wl,-wrap,_upcri_rand -Wl,-Wl,-wrap,_upcri_srand -Wl,-Wl,-wrap,_upcri_rand_init -Wl,-Wl,-wrap,upcri_clock_init -Wl,-Wl,-wrap,upcri_clock -Wl,-Wl,-wrap,_upcri_isvalid_shared -Wl,-Wl,-wrap,_upcri_isvalid_pshared -Wl,-Wl,-wrap,upcri_print_shared -Wl,-Wl,-wrap,upcri_print_pshared -Wl,-Wl,-wrap,_bupc_dump_shared -Wl,-Wl,-wrap,_bupc_local_to_shared -Wl,-Wl,-wrap,_bupc_inverse_cast -Wl,-Wl,-wrap,_upcri_locksystem_init -Wl,-Wl,-wrap,_upcr_global_lock_alloc -Wl,-Wl,-wrap,_upcr_all_lock_alloc -Wl,-Wl,-wrap,_upcr_lock -Wl,-Wl,-wrap,_upcr_lock_attempt -Wl,-Wl,-wrap,_upcr_unlock -Wl,-Wl,-wrap,_upcr_lock_free -Wl,-Wl,-wrap,_upcr_all_lock_free -Wl,-Wl,-wrap,_bupc_sem_alloc -Wl,-Wl,-wrap,_bupc_sem_free -Wl,-Wl,-wrap,_bupc_sem_post -Wl,-Wl,-wrap,_bupc_sem_postN -Wl,-Wl,-wrap,_bupc_sem_wait -Wl,-Wl,-wrap,_bupc_sem_waitN -Wl,-Wl,-wrap,_bupc_sem_try -Wl,-Wl,-wrap,_bupc_sem_tryN -Wl,-Wl,-wrap,_bupc_memput_signal -Wl,-Wl,-wrap,_bupc_memput_signal_async -Wl,-Wl,-wrap,_upcr_notify -Wl,-Wl,-wrap,_upcr_wait -Wl,-Wl,-wrap,_upcr_try_wait -Wl,-Wl,-wrap,upcri_coll_init -Wl,-Wl,-wrap,_upcri_coll_init_thread -Wl,-Wl,-wrap,_upcr_all_broadcast -Wl,-Wl,-wrap,_upcr_team_broadcast -Wl,-Wl,-wrap,_upcr_all_scatter -Wl,-Wl,-wrap,_upcr_all_gather -Wl,-Wl,-wrap,_upcr_all_gather_all -Wl,-Wl,-wrap,_upcr_all_exchange -Wl,-Wl,-wrap,_upcr_team_exchange -Wl,-Wl,-wrap,_upcr_all_permute -Wl,-Wl,-wrap,_upcr_team_split -Wl,-Wl,-wrap,_upcr_all_reduceC -Wl,-Wl,-wrap,_upcr_all_prefix_reduceC -Wl,-Wl,-wrap,_upcr_all_reduceUC -Wl,-Wl,-wrap,_upcr_all_prefix_reduceUC -Wl,-Wl,-wrap,_upcr_all_reduceS -Wl,-Wl,-wrap,_upcr_all_prefix_reduceS -Wl,-Wl,-wrap,_upcr_all_reduceUS -Wl,-Wl,-wrap,_upcr_all_prefix_reduceUS -Wl,-Wl,-wrap,_upcr_all_reduceI -Wl,-Wl,-wrap,_upcr_all_prefix_reduceI -Wl,-Wl,-wrap,_upcr_all_reduceUI -Wl,-Wl,-wrap,_upcr_all_prefix_reduceUI -Wl,-Wl,-wrap,_upcr_all_reduceL -Wl,-Wl,-wrap,_upcr_all_prefix_reduceL -Wl,-Wl,-wrap,_upcr_all_reduceUL -Wl,-Wl,-wrap,_upcr_all_prefix_reduceUL -Wl,-Wl,-wrap,_upcr_all_reduceF -Wl,-Wl,-wrap,_upcr_all_prefix_reduceF -Wl,-Wl,-wrap,_upcr_all_reduceD -Wl,-Wl,-wrap,_upcr_all_prefix_reduceD -Wl,-Wl,-wrap,_upcr_all_reduceLD -Wl,-Wl,-wrap,_upcr_all_prefix_reduceLD -Wl,-Wl,-wrap,_upcr_all_fopen -Wl,-Wl,-wrap,_upcr_all_fclose -Wl,-Wl,-wrap,_upcr_all_fsync -Wl,-Wl,-wrap,_upcr_all_fseek -Wl,-Wl,-wrap,_upcr_all_fset_size -Wl,-Wl,-wrap,_upcr_all_fget_size -Wl,-Wl,-wrap,_upcr_all_fpreallocate -Wl,-Wl,-wrap,_upcr_all_fcntl -Wl,-Wl,-wrap,_upcr_all_fread_local -Wl,-Wl,-wrap,_upcr_all_fwrite_local -Wl,-Wl,-wrap,_upcr_all_fread_shared -Wl,-Wl,-wrap,_upcr_all_fwrite_shared -Wl,-Wl,-wrap,_upcr_all_fread_list_local -Wl,-Wl,-wrap,_upcr_all_fread_list_shared -Wl,-Wl,-wrap,_upcr_all_fwrite_list_local -Wl,-Wl,-wrap,_upcr_all_fwrite_list_shared -Wl,-Wl,-wrap,_upcr_all_fread_local_async -Wl,-Wl,-wrap,_upcr_all_fwrite_local_async -Wl,-Wl,-wrap,_upcr_all_fread_shared_async -Wl,-Wl,-wrap,_upcr_all_fwrite_shared_async -Wl,-Wl,-wrap,_upcr_all_fread_list_local_async -Wl,-Wl,-wrap,_upcr_all_fread_list_shared_async -Wl,-Wl,-wrap,_upcr_all_fwrite_list_local_async -Wl,-Wl,-wrap,_upcr_all_fwrite_list_shared_async -Wl,-Wl,-wrap,_upcr_all_fwait_async -Wl,-Wl,-wrap,_upcr_all_ftest_async -Wl,-Wl,-wrap,_upcr_local_alloc -Wl,-Wl,-wrap,_upcr_global_alloc -Wl,-Wl,-wrap,_upcr_all_alloc -Wl,-Wl,-wrap,_upcr_free -Wl,-Wl,-wrap,_upcr_all_free -Wl,-Wl,-wrap,upcri_getheapstats -Wl,-Wl,-wrap,upcri_get_handlertable -Wl,-Wl,-wrap,upcri_get_handlertable_count -Wl,-Wl,-wrap,_upcr_do_memcpy -Wl,-Wl,-wrap,_bupc_waitsync -Wl,-Wl,-wrap,_bupc_trysync -Wl,-Wl,-wrap,_bupc_waitsync_all -Wl,-Wl,-wrap,_bupc_trysync_all -Wl,-Wl,-wrap,_bupc_waitsync_some -Wl,-Wl,-wrap,_bupc_trysync_some -Wl,-Wl,-wrap,_bupc_memcpy_async -Wl,-Wl,-wrap,_bupc_memget_async -Wl,-Wl,-wrap,_bupc_memput_async -Wl,-Wl,-wrap,_bupc_memset_async -Wl,-Wl,-wrap,_bupc_end_accessregion -Wl,-Wl,-wrap,_bupc_memcpy_vlist_async -Wl,-Wl,-wrap,_bupc_memput_vlist_async -Wl,-Wl,-wrap,_bupc_memget_vlist_async -Wl,-Wl,-wrap,_bupc_memcpy_ilist_async -Wl,-Wl,-wrap,_bupc_memput_ilist_async -Wl,-Wl,-wrap,_bupc_memget_ilist_async -Wl,-Wl,-wrap,_bupc_memcpy_fstrided_async -Wl,-Wl,-wrap,_bupc_memput_fstrided_async -Wl,-Wl,-wrap,_bupc_memget_fstrided_async -Wl,-Wl,-wrap,_bupc_memcpy_strided_async -Wl,-Wl,-wrap,_bupc_memput_strided_async -Wl,-Wl,-wrap,_bupc_memget_strided_async -Wl,-Wl,-wrap,upcr_startup_init -Wl,-Wl,-wrap,upcr_startup_attach -Wl,-Wl,-wrap,upcr_startup_spawn -Wl,-Wl,-wrap,upcr_exit -Wl,-Wl,-wrap,upcr_global_exit -Wl,-Wl,-wrap,upcri_do_exit -Wl,-Wl,-wrap,_upcr_startup_shalloc -Wl,-Wl,-wrap,_upcr_startup_pshalloc -Wl,-Wl,-wrap,_upcr_startup_initarray -Wl,-Wl,-wrap,_upcr_startup_initparray @@WRAPPER_LIBRARY@@ diff --git a/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/wr.c b/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/wr.c index 508ec8b42..1781fa383 100644 --- a/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/wr.c +++ b/src/wrappers/upc/bupc/upcr/tau_upcr_wrapper/wr.c @@ -1,6 +1,20 @@ #include #include #include + +#pragma pupc off + +#ifdef __BERKELEY_UPC__ +#pragma UPCR NO_SRCPOS +#endif + +static int tau_upc_node = -1; +static int tau_upc_tagid_f = 0; +#define TAU_UPC_TAGID (tau_upc_tagid_f = (tau_upc_tagid_f & 255)) +#define TAU_UPC_TAGID_NEXT ((++tau_upc_tagid_f) & 255) + +void tau_totalnodes(int, int); + #warning "TAU: Not generating wrapper for vararg function upcri_err" #warning "TAU: Not generating wrapper for vararg function upcri_errno" #warning "TAU: Not generating wrapper for vararg function upcri_gaserr" @@ -17,6 +31,15 @@ unsigned int __real__bupc_thread_distance(int a1, int a2) ; unsigned int __wrap__bupc_thread_distance(int a1, int a2) { unsigned int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_thread_distance(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"unsigned int _bupc_thread_distance(int, int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_thread_distance(a1, a2); @@ -34,6 +57,15 @@ int __real__upcri_rand() ; int __wrap__upcri_rand() { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcri_rand(); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcri_rand() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcri_rand(); @@ -50,6 +82,16 @@ int __wrap__upcri_rand() { void __real__upcri_srand(unsigned int a1) ; void __wrap__upcri_srand(unsigned int a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcri_srand(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcri_srand(unsigned int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcri_srand(a1); @@ -65,6 +107,16 @@ void __wrap__upcri_srand(unsigned int a1) { void __real__upcri_rand_init() ; void __wrap__upcri_rand_init() { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcri_rand_init(); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcri_rand_init() C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcri_rand_init(); @@ -80,6 +132,16 @@ void __wrap__upcri_rand_init() { void __real_upcri_clock_init() ; void __wrap_upcri_clock_init() { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcri_clock_init(); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcri_clock_init() C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcri_clock_init(); @@ -96,6 +158,15 @@ clock_t __real_upcri_clock() ; clock_t __wrap_upcri_clock() { clock_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upcri_clock(); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"clock_t upcri_clock() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upcri_clock(); @@ -113,6 +184,15 @@ int __real__upcri_isvalid_shared(upcr_shared_ptr_t a1) ; int __wrap__upcri_isvalid_shared(upcr_shared_ptr_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcri_isvalid_shared(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcri_isvalid_shared(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcri_isvalid_shared(a1); @@ -130,6 +210,15 @@ int __real__upcri_isvalid_pshared(upcr_pshared_ptr_t a1) ; int __wrap__upcri_isvalid_pshared(upcr_pshared_ptr_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcri_isvalid_pshared(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcri_isvalid_pshared(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcri_isvalid_pshared(a1); @@ -146,6 +235,16 @@ int __wrap__upcri_isvalid_pshared(upcr_pshared_ptr_t a1) { void __real_upcri_print_shared(upcr_shared_ptr_t a1) ; void __wrap_upcri_print_shared(upcr_shared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcri_print_shared(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcri_print_shared(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcri_print_shared(a1); @@ -161,6 +260,16 @@ void __wrap_upcri_print_shared(upcr_shared_ptr_t a1) { void __real_upcri_print_pshared(upcr_pshared_ptr_t a1) ; void __wrap_upcri_print_pshared(upcr_pshared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcri_print_pshared(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcri_print_pshared(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcri_print_pshared(a1); @@ -177,6 +286,15 @@ int __real__bupc_dump_shared(upcr_shared_ptr_t a1, char * a2, int a3) ; int __wrap__bupc_dump_shared(upcr_shared_ptr_t a1, char * a2, int a3) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_dump_shared(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _bupc_dump_shared(upcr_shared_ptr_t, char *, int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_dump_shared(a1, a2, a3); @@ -194,6 +312,15 @@ upcr_shared_ptr_t __real__bupc_local_to_shared(void * a1, int a2, int a3) ; upcr_shared_ptr_t __wrap__bupc_local_to_shared(void * a1, int a2, int a3) { upcr_shared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_local_to_shared(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_shared_ptr_t _bupc_local_to_shared(void *, int, int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_local_to_shared(a1, a2, a3); @@ -211,6 +338,15 @@ upcr_shared_ptr_t __real__bupc_inverse_cast(void * a1) ; upcr_shared_ptr_t __wrap__bupc_inverse_cast(void * a1) { upcr_shared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_inverse_cast(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_shared_ptr_t _bupc_inverse_cast(void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_inverse_cast(a1); @@ -227,6 +363,16 @@ upcr_shared_ptr_t __wrap__bupc_inverse_cast(void * a1) { void __real__upcri_locksystem_init() ; void __wrap__upcri_locksystem_init() { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcri_locksystem_init(); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcri_locksystem_init() C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcri_locksystem_init(); @@ -243,6 +389,15 @@ upcr_shared_ptr_t __real__upcr_global_lock_alloc() ; upcr_shared_ptr_t __wrap__upcr_global_lock_alloc() { upcr_shared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_global_lock_alloc(); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_shared_ptr_t _upcr_global_lock_alloc() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_global_lock_alloc(); @@ -260,6 +415,15 @@ upcr_shared_ptr_t __real__upcr_all_lock_alloc() ; upcr_shared_ptr_t __wrap__upcr_all_lock_alloc() { upcr_shared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_lock_alloc(); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_shared_ptr_t _upcr_all_lock_alloc() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_lock_alloc(); @@ -276,6 +440,16 @@ upcr_shared_ptr_t __wrap__upcr_all_lock_alloc() { void __real__upcr_lock(upcr_shared_ptr_t a1) ; void __wrap__upcr_lock(upcr_shared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_lock(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_lock(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_lock(a1); @@ -292,6 +466,15 @@ int __real__upcr_lock_attempt(upcr_shared_ptr_t a1) ; int __wrap__upcr_lock_attempt(upcr_shared_ptr_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_lock_attempt(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcr_lock_attempt(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_lock_attempt(a1); @@ -308,6 +491,16 @@ int __wrap__upcr_lock_attempt(upcr_shared_ptr_t a1) { void __real__upcr_unlock(upcr_shared_ptr_t a1) ; void __wrap__upcr_unlock(upcr_shared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_unlock(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_unlock(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_unlock(a1); @@ -323,6 +516,16 @@ void __wrap__upcr_unlock(upcr_shared_ptr_t a1) { void __real__upcr_lock_free(upcr_shared_ptr_t a1) ; void __wrap__upcr_lock_free(upcr_shared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_lock_free(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_lock_free(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_lock_free(a1); @@ -338,6 +541,16 @@ void __wrap__upcr_lock_free(upcr_shared_ptr_t a1) { void __real__upcr_all_lock_free(upcr_shared_ptr_t a1) ; void __wrap__upcr_all_lock_free(upcr_shared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_lock_free(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_lock_free(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_lock_free(a1); @@ -354,6 +567,15 @@ upcr_pshared_ptr_t __real__bupc_sem_alloc(int a1) ; upcr_pshared_ptr_t __wrap__bupc_sem_alloc(int a1) { upcr_pshared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_sem_alloc(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_pshared_ptr_t _bupc_sem_alloc(int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_sem_alloc(a1); @@ -370,6 +592,16 @@ upcr_pshared_ptr_t __wrap__bupc_sem_alloc(int a1) { void __real__bupc_sem_free(upcr_pshared_ptr_t a1) ; void __wrap__bupc_sem_free(upcr_pshared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_sem_free(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_sem_free(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_sem_free(a1); @@ -385,6 +617,16 @@ void __wrap__bupc_sem_free(upcr_pshared_ptr_t a1) { void __real__bupc_sem_post(upcr_pshared_ptr_t a1) ; void __wrap__bupc_sem_post(upcr_pshared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_sem_post(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_sem_post(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_sem_post(a1); @@ -400,6 +642,16 @@ void __wrap__bupc_sem_post(upcr_pshared_ptr_t a1) { void __real__bupc_sem_postN(upcr_pshared_ptr_t a1, size_t a2) ; void __wrap__bupc_sem_postN(upcr_pshared_ptr_t a1, size_t a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_sem_postN(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_sem_postN(upcr_pshared_ptr_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_sem_postN(a1, a2); @@ -415,6 +667,16 @@ void __wrap__bupc_sem_postN(upcr_pshared_ptr_t a1, size_t a2) { void __real__bupc_sem_wait(upcr_pshared_ptr_t a1) ; void __wrap__bupc_sem_wait(upcr_pshared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_sem_wait(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_sem_wait(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_sem_wait(a1); @@ -430,6 +692,16 @@ void __wrap__bupc_sem_wait(upcr_pshared_ptr_t a1) { void __real__bupc_sem_waitN(upcr_pshared_ptr_t a1, size_t a2) ; void __wrap__bupc_sem_waitN(upcr_pshared_ptr_t a1, size_t a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_sem_waitN(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_sem_waitN(upcr_pshared_ptr_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_sem_waitN(a1, a2); @@ -446,6 +718,15 @@ int __real__bupc_sem_try(upcr_pshared_ptr_t a1) ; int __wrap__bupc_sem_try(upcr_pshared_ptr_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_sem_try(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _bupc_sem_try(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_sem_try(a1); @@ -463,6 +744,15 @@ int __real__bupc_sem_tryN(upcr_pshared_ptr_t a1, size_t a2) ; int __wrap__bupc_sem_tryN(upcr_pshared_ptr_t a1, size_t a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_sem_tryN(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _bupc_sem_tryN(upcr_pshared_ptr_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_sem_tryN(a1, a2); @@ -479,6 +769,16 @@ int __wrap__bupc_sem_tryN(upcr_pshared_ptr_t a1, size_t a2) { void __real__bupc_memput_signal(upcr_shared_ptr_t a1, const void * a2, size_t a3, upcr_pshared_ptr_t a4, size_t a5) ; void __wrap__bupc_memput_signal(upcr_shared_ptr_t a1, const void * a2, size_t a3, upcr_pshared_ptr_t a4, size_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_memput_signal(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_memput_signal(upcr_shared_ptr_t, const void *, size_t, upcr_pshared_ptr_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_memput_signal(a1, a2, a3, a4, a5); @@ -494,6 +794,16 @@ void __wrap__bupc_memput_signal(upcr_shared_ptr_t a1, const void * a2, size_ void __real__bupc_memput_signal_async(upcr_shared_ptr_t a1, const void * a2, size_t a3, upcr_pshared_ptr_t a4, size_t a5) ; void __wrap__bupc_memput_signal_async(upcr_shared_ptr_t a1, const void * a2, size_t a3, upcr_pshared_ptr_t a4, size_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_memput_signal_async(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_memput_signal_async(upcr_shared_ptr_t, const void *, size_t, upcr_pshared_ptr_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_memput_signal_async(a1, a2, a3, a4, a5); @@ -509,6 +819,16 @@ void __wrap__bupc_memput_signal_async(upcr_shared_ptr_t a1, const void * a2, void __real__upcr_notify(int a1, int a2) ; void __wrap__upcr_notify(int a1, int a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_notify(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_notify(int, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_notify(a1, a2); @@ -524,6 +844,16 @@ void __wrap__upcr_notify(int a1, int a2) { void __real__upcr_wait(int a1, int a2) ; void __wrap__upcr_wait(int a1, int a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_wait(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_wait(int, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_wait(a1, a2); @@ -540,6 +870,15 @@ int __real__upcr_try_wait(int a1, int a2) ; int __wrap__upcr_try_wait(int a1, int a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_try_wait(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcr_try_wait(int, int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_try_wait(a1, a2); @@ -556,6 +895,16 @@ int __wrap__upcr_try_wait(int a1, int a2) { void __real_upcri_coll_init() ; void __wrap_upcri_coll_init() { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcri_coll_init(); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcri_coll_init() C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcri_coll_init(); @@ -571,6 +920,16 @@ void __wrap_upcri_coll_init() { void __real__upcri_coll_init_thread() ; void __wrap__upcri_coll_init_thread() { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcri_coll_init_thread(); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcri_coll_init_thread() C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcri_coll_init_thread(); @@ -586,6 +945,16 @@ void __wrap__upcri_coll_init_thread() { void __real__upcr_all_broadcast(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) ; void __wrap__upcr_all_broadcast(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_broadcast(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_broadcast(upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_broadcast(a1, a2, a3, a4); @@ -602,6 +971,15 @@ bupc_coll_handle_t __real__upcr_team_broadcast(bupc_team_t a1, upcr_shared_pt bupc_coll_handle_t __wrap__upcr_team_broadcast(bupc_team_t a1, upcr_shared_ptr_t a2, upcr_shared_ptr_t a3, size_t a4, upc_flag_t a5) { bupc_coll_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_team_broadcast(a1, a2, a3, a4, a5); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_coll_handle_t _upcr_team_broadcast(bupc_team_t, upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_team_broadcast(a1, a2, a3, a4, a5); @@ -618,6 +996,16 @@ bupc_coll_handle_t __wrap__upcr_team_broadcast(bupc_team_t a1, upcr_shared_pt void __real__upcr_all_scatter(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) ; void __wrap__upcr_all_scatter(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_scatter(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_scatter(upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_scatter(a1, a2, a3, a4); @@ -633,6 +1021,16 @@ void __wrap__upcr_all_scatter(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, si void __real__upcr_all_gather(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) ; void __wrap__upcr_all_gather(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_gather(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_gather(upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_gather(a1, a2, a3, a4); @@ -648,6 +1046,16 @@ void __wrap__upcr_all_gather(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, siz void __real__upcr_all_gather_all(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) ; void __wrap__upcr_all_gather_all(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_gather_all(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_gather_all(upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_gather_all(a1, a2, a3, a4); @@ -663,6 +1071,16 @@ void __wrap__upcr_all_gather_all(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, void __real__upcr_all_exchange(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) ; void __wrap__upcr_all_exchange(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_exchange(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_exchange(upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_exchange(a1, a2, a3, a4); @@ -679,6 +1097,15 @@ bupc_coll_handle_t __real__upcr_team_exchange(bupc_team_t a1, upcr_shared_ptr bupc_coll_handle_t __wrap__upcr_team_exchange(bupc_team_t a1, upcr_shared_ptr_t a2, upcr_shared_ptr_t a3, size_t a4, upc_flag_t a5) { bupc_coll_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_team_exchange(a1, a2, a3, a4, a5); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_coll_handle_t _upcr_team_exchange(bupc_team_t, upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_team_exchange(a1, a2, a3, a4, a5); @@ -695,6 +1122,16 @@ bupc_coll_handle_t __wrap__upcr_team_exchange(bupc_team_t a1, upcr_shared_ptr void __real__upcr_all_permute(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upcr_pshared_ptr_t a3, size_t a4, upc_flag_t a5) ; void __wrap__upcr_all_permute(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upcr_pshared_ptr_t a3, size_t a4, upc_flag_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_permute(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_permute(upcr_shared_ptr_t, upcr_shared_ptr_t, upcr_pshared_ptr_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_permute(a1, a2, a3, a4, a5); @@ -711,6 +1148,15 @@ bupc_team_t __real__upcr_team_split(bupc_team_t a1, int a2, int a3) ; bupc_team_t __wrap__upcr_team_split(bupc_team_t a1, int a2, int a3) { bupc_team_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_team_split(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_team_t _upcr_team_split(bupc_team_t, int, int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_team_split(a1, a2, a3); @@ -727,6 +1173,16 @@ bupc_team_t __wrap__upcr_team_split(bupc_team_t a1, int a2, int a3) { void __real__upcr_all_reduceC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceC(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceC(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, signed char (*) (signed char, signed char), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceC(a1, a2, a3, a4, a5, a6, a7, a8); @@ -742,6 +1198,16 @@ void __wrap__upcr_all_reduceC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, up void __real__upcr_all_prefix_reduceC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceC(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceC(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, signed char (*) (signed char, signed char), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceC(a1, a2, a3, a4, a5, a6, a7); @@ -757,6 +1223,16 @@ void __wrap__upcr_all_prefix_reduceC(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceUC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceUC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceUC(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceUC(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned char (*) (unsigned char, unsigned char), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceUC(a1, a2, a3, a4, a5, a6, a7, a8); @@ -772,6 +1248,16 @@ void __wrap__upcr_all_reduceUC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, u void __real__upcr_all_prefix_reduceUC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceUC(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceUC(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceUC(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned char (*) (unsigned char, unsigned char), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceUC(a1, a2, a3, a4, a5, a6, a7); @@ -787,6 +1273,16 @@ void __wrap__upcr_all_prefix_reduceUC(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceS(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceS(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, short (*) (short, short), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceS(a1, a2, a3, a4, a5, a6, a7, a8); @@ -802,6 +1298,16 @@ void __wrap__upcr_all_reduceS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, up void __real__upcr_all_prefix_reduceS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceS(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceS(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, short (*) (short, short), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceS(a1, a2, a3, a4, a5, a6, a7); @@ -817,6 +1323,16 @@ void __wrap__upcr_all_prefix_reduceS(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceUS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceUS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceUS(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceUS(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned short (*) (unsigned short, unsigned short), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceUS(a1, a2, a3, a4, a5, a6, a7, a8); @@ -832,6 +1348,16 @@ void __wrap__upcr_all_reduceUS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, u void __real__upcr_all_prefix_reduceUS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceUS(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceUS(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceUS(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned short (*) (unsigned short, unsigned short), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceUS(a1, a2, a3, a4, a5, a6, a7); @@ -847,6 +1373,16 @@ void __wrap__upcr_all_prefix_reduceUS(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceI(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceI(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, int (*) (int, int), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceI(a1, a2, a3, a4, a5, a6, a7, a8); @@ -862,6 +1398,16 @@ void __wrap__upcr_all_reduceI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, up void __real__upcr_all_prefix_reduceI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceI(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceI(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, int (*) (int, int), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceI(a1, a2, a3, a4, a5, a6, a7); @@ -877,6 +1423,16 @@ void __wrap__upcr_all_prefix_reduceI(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceUI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceUI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceUI(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceUI(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned int (*) (unsigned int, unsigned int), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceUI(a1, a2, a3, a4, a5, a6, a7, a8); @@ -892,6 +1448,16 @@ void __wrap__upcr_all_reduceUI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, u void __real__upcr_all_prefix_reduceUI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceUI(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceUI(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceUI(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned int (*) (unsigned int, unsigned int), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceUI(a1, a2, a3, a4, a5, a6, a7); @@ -907,6 +1473,16 @@ void __wrap__upcr_all_prefix_reduceUI(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceL(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceL(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, long (*) (long, long), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceL(a1, a2, a3, a4, a5, a6, a7, a8); @@ -922,6 +1498,16 @@ void __wrap__upcr_all_reduceL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, up void __real__upcr_all_prefix_reduceL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceL(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceL(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, long (*) (long, long), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceL(a1, a2, a3, a4, a5, a6, a7); @@ -937,6 +1523,16 @@ void __wrap__upcr_all_prefix_reduceL(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceUL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceUL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceUL(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceUL(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned long (*) (unsigned long, unsigned long), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceUL(a1, a2, a3, a4, a5, a6, a7, a8); @@ -952,6 +1548,16 @@ void __wrap__upcr_all_reduceUL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, u void __real__upcr_all_prefix_reduceUL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceUL(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceUL(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceUL(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, unsigned long (*) (unsigned long, unsigned long), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceUL(a1, a2, a3, a4, a5, a6, a7); @@ -967,6 +1573,16 @@ void __wrap__upcr_all_prefix_reduceUL(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceF(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceF(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceF(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceF(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, float (*) (float, float), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceF(a1, a2, a3, a4, a5, a6, a7, a8); @@ -982,6 +1598,16 @@ void __wrap__upcr_all_reduceF(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, up void __real__upcr_all_prefix_reduceF(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceF(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceF(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceF(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, float (*) (float, float), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceF(a1, a2, a3, a4, a5, a6, a7); @@ -997,6 +1623,16 @@ void __wrap__upcr_all_prefix_reduceF(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceD(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceD(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, double (*) (double, double), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceD(a1, a2, a3, a4, a5, a6, a7, a8); @@ -1012,6 +1648,16 @@ void __wrap__upcr_all_reduceD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, up void __real__upcr_all_prefix_reduceD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceD(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceD(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, double (*) (double, double), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceD(a1, a2, a3, a4, a5, a6, a7); @@ -1027,6 +1673,16 @@ void __wrap__upcr_all_prefix_reduceD(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__upcr_all_reduceLD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7, int a8) ; void __wrap__upcr_all_reduceLD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7, int a8) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_reduceLD(a1, a2, a3, a4, a5, a6, a7, a8); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_reduceLD(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, long double (*) (long double, long double), upc_flag_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_reduceLD(a1, a2, a3, a4, a5, a6, a7, a8); @@ -1042,6 +1698,16 @@ void __wrap__upcr_all_reduceLD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, u void __real__upcr_all_prefix_reduceLD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7) ; void __wrap__upcr_all_prefix_reduceLD(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_prefix_reduceLD(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_prefix_reduceLD(upcr_shared_ptr_t, upcr_shared_ptr_t, upc_op_t, size_t, size_t, long double (*) (long double, long double), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_prefix_reduceLD(a1, a2, a3, a4, a5, a6, a7); @@ -1058,6 +1724,15 @@ upcr_pshared_ptr_t __real__upcr_all_fopen(const char * a1, int a2, size_t a upcr_pshared_ptr_t __wrap__upcr_all_fopen(const char * a1, int a2, size_t a3, const upc_hint_t * a4) { upcr_pshared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fopen(a1, a2, a3, a4); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_pshared_ptr_t _upcr_all_fopen(const char *, int, size_t, const upc_hint_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fopen(a1, a2, a3, a4); @@ -1075,6 +1750,15 @@ int __real__upcr_all_fclose(upcr_pshared_ptr_t a1) ; int __wrap__upcr_all_fclose(upcr_pshared_ptr_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fclose(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcr_all_fclose(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fclose(a1); @@ -1092,6 +1776,15 @@ int __real__upcr_all_fsync(upcr_pshared_ptr_t a1) ; int __wrap__upcr_all_fsync(upcr_pshared_ptr_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fsync(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcr_all_fsync(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fsync(a1); @@ -1109,6 +1802,15 @@ upc_off_t __real__upcr_all_fseek(upcr_pshared_ptr_t a1, upc_off_t a2, int a upc_off_t __wrap__upcr_all_fseek(upcr_pshared_ptr_t a1, upc_off_t a2, int a3) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fseek(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fseek(upcr_pshared_ptr_t, upc_off_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fseek(a1, a2, a3); @@ -1126,6 +1828,15 @@ int __real__upcr_all_fset_size(upcr_pshared_ptr_t a1, upc_off_t a2) ; int __wrap__upcr_all_fset_size(upcr_pshared_ptr_t a1, upc_off_t a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fset_size(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcr_all_fset_size(upcr_pshared_ptr_t, upc_off_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fset_size(a1, a2); @@ -1143,6 +1854,15 @@ upc_off_t __real__upcr_all_fget_size(upcr_pshared_ptr_t a1) ; upc_off_t __wrap__upcr_all_fget_size(upcr_pshared_ptr_t a1) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fget_size(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fget_size(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fget_size(a1); @@ -1160,6 +1880,15 @@ int __real__upcr_all_fpreallocate(upcr_pshared_ptr_t a1, upc_off_t a2) ; int __wrap__upcr_all_fpreallocate(upcr_pshared_ptr_t a1, upc_off_t a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fpreallocate(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcr_all_fpreallocate(upcr_pshared_ptr_t, upc_off_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fpreallocate(a1, a2); @@ -1177,6 +1906,15 @@ int __real__upcr_all_fcntl(upcr_pshared_ptr_t a1, int a2, void * a3) ; int __wrap__upcr_all_fcntl(upcr_pshared_ptr_t a1, int a2, void * a3) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fcntl(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _upcr_all_fcntl(upcr_pshared_ptr_t, int, void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fcntl(a1, a2, a3); @@ -1194,6 +1932,15 @@ upc_off_t __real__upcr_all_fread_local(upcr_pshared_ptr_t a1, void * a2, siz upc_off_t __wrap__upcr_all_fread_local(upcr_pshared_ptr_t a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fread_local(a1, a2, a3, a4, a5); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fread_local(upcr_pshared_ptr_t, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fread_local(a1, a2, a3, a4, a5); @@ -1211,6 +1958,15 @@ upc_off_t __real__upcr_all_fwrite_local(upcr_pshared_ptr_t a1, void * a2, si upc_off_t __wrap__upcr_all_fwrite_local(upcr_pshared_ptr_t a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fwrite_local(a1, a2, a3, a4, a5); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fwrite_local(upcr_pshared_ptr_t, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fwrite_local(a1, a2, a3, a4, a5); @@ -1228,6 +1984,15 @@ upc_off_t __real__upcr_all_fread_shared(upcr_pshared_ptr_t a1, bupc_sharedptr upc_off_t __wrap__upcr_all_fread_shared(upcr_pshared_ptr_t a1, bupc_sharedptr_t a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fread_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fread_shared(upcr_pshared_ptr_t, bupc_sharedptr_t, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fread_shared(a1, a2, a3, a4, a5, a6); @@ -1245,6 +2010,15 @@ upc_off_t __real__upcr_all_fwrite_shared(upcr_pshared_ptr_t a1, bupc_sharedpt upc_off_t __wrap__upcr_all_fwrite_shared(upcr_pshared_ptr_t a1, bupc_sharedptr_t a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fwrite_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fwrite_shared(upcr_pshared_ptr_t, bupc_sharedptr_t, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fwrite_shared(a1, a2, a3, a4, a5, a6); @@ -1262,6 +2036,15 @@ upc_off_t __real__upcr_all_fread_list_local(upcr_pshared_ptr_t a1, size_t a2 upc_off_t __wrap__upcr_all_fread_list_local(upcr_pshared_ptr_t a1, size_t a2, const upc_local_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fread_list_local(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fread_list_local(upcr_pshared_ptr_t, size_t, const upc_local_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fread_list_local(a1, a2, a3, a4, a5, a6); @@ -1279,6 +2062,15 @@ upc_off_t __real__upcr_all_fread_list_shared(upcr_pshared_ptr_t a1, size_t a upc_off_t __wrap__upcr_all_fread_list_shared(upcr_pshared_ptr_t a1, size_t a2, const upc_shared_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fread_list_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fread_list_shared(upcr_pshared_ptr_t, size_t, const upc_shared_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fread_list_shared(a1, a2, a3, a4, a5, a6); @@ -1296,6 +2088,15 @@ upc_off_t __real__upcr_all_fwrite_list_local(upcr_pshared_ptr_t a1, size_t a upc_off_t __wrap__upcr_all_fwrite_list_local(upcr_pshared_ptr_t a1, size_t a2, const upc_local_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fwrite_list_local(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fwrite_list_local(upcr_pshared_ptr_t, size_t, const upc_local_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fwrite_list_local(a1, a2, a3, a4, a5, a6); @@ -1313,6 +2114,15 @@ upc_off_t __real__upcr_all_fwrite_list_shared(upcr_pshared_ptr_t a1, size_t upc_off_t __wrap__upcr_all_fwrite_list_shared(upcr_pshared_ptr_t a1, size_t a2, const upc_shared_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fwrite_list_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fwrite_list_shared(upcr_pshared_ptr_t, size_t, const upc_shared_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fwrite_list_shared(a1, a2, a3, a4, a5, a6); @@ -1329,6 +2139,16 @@ upc_off_t __wrap__upcr_all_fwrite_list_shared(upcr_pshared_ptr_t a1, size_t void __real__upcr_all_fread_local_async(upcr_pshared_ptr_t a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) ; void __wrap__upcr_all_fread_local_async(upcr_pshared_ptr_t a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fread_local_async(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fread_local_async(upcr_pshared_ptr_t, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fread_local_async(a1, a2, a3, a4, a5); @@ -1344,6 +2164,16 @@ void __wrap__upcr_all_fread_local_async(upcr_pshared_ptr_t a1, void * a2, si void __real__upcr_all_fwrite_local_async(upcr_pshared_ptr_t a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) ; void __wrap__upcr_all_fwrite_local_async(upcr_pshared_ptr_t a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fwrite_local_async(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fwrite_local_async(upcr_pshared_ptr_t, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fwrite_local_async(a1, a2, a3, a4, a5); @@ -1359,6 +2189,16 @@ void __wrap__upcr_all_fwrite_local_async(upcr_pshared_ptr_t a1, void * a2, s void __real__upcr_all_fread_shared_async(upcr_pshared_ptr_t a1, bupc_sharedptr_t a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) ; void __wrap__upcr_all_fread_shared_async(upcr_pshared_ptr_t a1, bupc_sharedptr_t a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fread_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fread_shared_async(upcr_pshared_ptr_t, bupc_sharedptr_t, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fread_shared_async(a1, a2, a3, a4, a5, a6); @@ -1374,6 +2214,16 @@ void __wrap__upcr_all_fread_shared_async(upcr_pshared_ptr_t a1, bupc_sharedpt void __real__upcr_all_fwrite_shared_async(upcr_pshared_ptr_t a1, bupc_sharedptr_t a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) ; void __wrap__upcr_all_fwrite_shared_async(upcr_pshared_ptr_t a1, bupc_sharedptr_t a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fwrite_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fwrite_shared_async(upcr_pshared_ptr_t, bupc_sharedptr_t, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fwrite_shared_async(a1, a2, a3, a4, a5, a6); @@ -1389,6 +2239,16 @@ void __wrap__upcr_all_fwrite_shared_async(upcr_pshared_ptr_t a1, bupc_sharedp void __real__upcr_all_fread_list_local_async(upcr_pshared_ptr_t a1, size_t a2, const upc_local_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) ; void __wrap__upcr_all_fread_list_local_async(upcr_pshared_ptr_t a1, size_t a2, const upc_local_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fread_list_local_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fread_list_local_async(upcr_pshared_ptr_t, size_t, const upc_local_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fread_list_local_async(a1, a2, a3, a4, a5, a6); @@ -1404,6 +2264,16 @@ void __wrap__upcr_all_fread_list_local_async(upcr_pshared_ptr_t a1, size_t a void __real__upcr_all_fread_list_shared_async(upcr_pshared_ptr_t a1, size_t a2, const upc_shared_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) ; void __wrap__upcr_all_fread_list_shared_async(upcr_pshared_ptr_t a1, size_t a2, const upc_shared_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fread_list_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fread_list_shared_async(upcr_pshared_ptr_t, size_t, const upc_shared_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fread_list_shared_async(a1, a2, a3, a4, a5, a6); @@ -1419,6 +2289,16 @@ void __wrap__upcr_all_fread_list_shared_async(upcr_pshared_ptr_t a1, size_t void __real__upcr_all_fwrite_list_local_async(upcr_pshared_ptr_t a1, size_t a2, const upc_local_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) ; void __wrap__upcr_all_fwrite_list_local_async(upcr_pshared_ptr_t a1, size_t a2, const upc_local_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fwrite_list_local_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fwrite_list_local_async(upcr_pshared_ptr_t, size_t, const upc_local_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fwrite_list_local_async(a1, a2, a3, a4, a5, a6); @@ -1434,6 +2314,16 @@ void __wrap__upcr_all_fwrite_list_local_async(upcr_pshared_ptr_t a1, size_t void __real__upcr_all_fwrite_list_shared_async(upcr_pshared_ptr_t a1, size_t a2, const upc_shared_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) ; void __wrap__upcr_all_fwrite_list_shared_async(upcr_pshared_ptr_t a1, size_t a2, const upc_shared_memvec_t * a3, size_t a4, const upc_filevec_t * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_fwrite_list_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_fwrite_list_shared_async(upcr_pshared_ptr_t, size_t, const upc_shared_memvec_t *, size_t, const upc_filevec_t *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_fwrite_list_shared_async(a1, a2, a3, a4, a5, a6); @@ -1450,6 +2340,15 @@ upc_off_t __real__upcr_all_fwait_async(upcr_pshared_ptr_t a1) ; upc_off_t __wrap__upcr_all_fwait_async(upcr_pshared_ptr_t a1) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_fwait_async(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_fwait_async(upcr_pshared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_fwait_async(a1); @@ -1467,6 +2366,15 @@ upc_off_t __real__upcr_all_ftest_async(upcr_pshared_ptr_t a1, int * a2) ; upc_off_t __wrap__upcr_all_ftest_async(upcr_pshared_ptr_t a1, int * a2) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_ftest_async(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t _upcr_all_ftest_async(upcr_pshared_ptr_t, int *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_ftest_async(a1, a2); @@ -1484,6 +2392,15 @@ upcr_shared_ptr_t __real__upcr_local_alloc(size_t a1, size_t a2) ; upcr_shared_ptr_t __wrap__upcr_local_alloc(size_t a1, size_t a2) { upcr_shared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_local_alloc(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_shared_ptr_t _upcr_local_alloc(size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_local_alloc(a1, a2); @@ -1501,6 +2418,15 @@ upcr_shared_ptr_t __real__upcr_global_alloc(size_t a1, size_t a2) ; upcr_shared_ptr_t __wrap__upcr_global_alloc(size_t a1, size_t a2) { upcr_shared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_global_alloc(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_shared_ptr_t _upcr_global_alloc(size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_global_alloc(a1, a2); @@ -1518,6 +2444,15 @@ upcr_shared_ptr_t __real__upcr_all_alloc(size_t a1, size_t a2) ; upcr_shared_ptr_t __wrap__upcr_all_alloc(size_t a1, size_t a2) { upcr_shared_ptr_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_all_alloc(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_shared_ptr_t _upcr_all_alloc(size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__upcr_all_alloc(a1, a2); @@ -1534,6 +2469,16 @@ upcr_shared_ptr_t __wrap__upcr_all_alloc(size_t a1, size_t a2) { void __real__upcr_free(upcr_shared_ptr_t a1) ; void __wrap__upcr_free(upcr_shared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_free(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_free(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_free(a1); @@ -1549,6 +2494,16 @@ void __wrap__upcr_free(upcr_shared_ptr_t a1) { void __real__upcr_all_free(upcr_shared_ptr_t a1) ; void __wrap__upcr_all_free(upcr_shared_ptr_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_all_free(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_all_free(upcr_shared_ptr_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_all_free(a1); @@ -1564,6 +2519,16 @@ void __wrap__upcr_all_free(upcr_shared_ptr_t a1) { void __real_upcri_getheapstats(const char * a1, char * a2, size_t a3) ; void __wrap_upcri_getheapstats(const char * a1, char * a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcri_getheapstats(a1, a2, a3); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcri_getheapstats(const char *, char *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcri_getheapstats(a1, a2, a3); @@ -1580,6 +2545,15 @@ gasnet_handlerentry_t * __real_upcri_get_handlertable() ; gasnet_handlerentry_t * __wrap_upcri_get_handlertable() { gasnet_handlerentry_t * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upcri_get_handlertable(); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"gasnet_handlerentry_t * upcri_get_handlertable() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upcri_get_handlertable(); @@ -1597,6 +2571,15 @@ size_t __real_upcri_get_handlertable_count() ; size_t __wrap_upcri_get_handlertable_count() { size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upcri_get_handlertable_count(); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"size_t upcri_get_handlertable_count() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upcri_get_handlertable_count(); @@ -1614,9 +2597,33 @@ upcr_handle_t __real__upcr_do_memcpy(upcr_shared_ptr_t a1, upcr_shared_ptr_t upcr_handle_t __wrap__upcr_do_memcpy(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3, int a4) { upcr_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__upcr_do_memcpy(a1, a2, a3, a4); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"upcr_handle_t _upcr_do_memcpy(upcr_shared_ptr_t, upcr_shared_ptr_t, size_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); + size_t dst_thread = upcr_threadof_shared(a1); + size_t src_thread = upcr_threadof_shared(a2); + size_t my_thread = upcr_mythread(); + if (my_thread == src_thread) { + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, dst_thread, a3); + } else { + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, dst_thread, a3, src_thread); + } + retval = __real__upcr_do_memcpy(a1, a2, a3, a4); + if (my_thread == src_thread) { + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, my_thread, a3, dst_thread); + } else { + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, src_thread, a3); + } + TAU_PROFILE_STOP(t); return retval; @@ -1630,6 +2637,16 @@ upcr_handle_t __wrap__upcr_do_memcpy(upcr_shared_ptr_t a1, upcr_shared_ptr_t void __real__bupc_waitsync(bupc_handle_t a1) ; void __wrap__bupc_waitsync(bupc_handle_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_waitsync(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_waitsync(bupc_handle_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_waitsync(a1); @@ -1646,6 +2663,15 @@ int __real__bupc_trysync(bupc_handle_t a1) ; int __wrap__bupc_trysync(bupc_handle_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_trysync(a1); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _bupc_trysync(bupc_handle_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_trysync(a1); @@ -1662,6 +2688,16 @@ int __wrap__bupc_trysync(bupc_handle_t a1) { void __real__bupc_waitsync_all(bupc_handle_t * a1, size_t a2) ; void __wrap__bupc_waitsync_all(bupc_handle_t * a1, size_t a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_waitsync_all(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_waitsync_all(bupc_handle_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_waitsync_all(a1, a2); @@ -1678,6 +2714,15 @@ int __real__bupc_trysync_all(bupc_handle_t * a1, size_t a2) ; int __wrap__bupc_trysync_all(bupc_handle_t * a1, size_t a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_trysync_all(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _bupc_trysync_all(bupc_handle_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_trysync_all(a1, a2); @@ -1694,6 +2739,16 @@ int __wrap__bupc_trysync_all(bupc_handle_t * a1, size_t a2) { void __real__bupc_waitsync_some(bupc_handle_t * a1, size_t a2) ; void __wrap__bupc_waitsync_some(bupc_handle_t * a1, size_t a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__bupc_waitsync_some(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _bupc_waitsync_some(bupc_handle_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__bupc_waitsync_some(a1, a2); @@ -1710,6 +2765,15 @@ int __real__bupc_trysync_some(bupc_handle_t * a1, size_t a2) ; int __wrap__bupc_trysync_some(bupc_handle_t * a1, size_t a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_trysync_some(a1, a2); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"int _bupc_trysync_some(bupc_handle_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_trysync_some(a1, a2); @@ -1727,9 +2791,33 @@ bupc_handle_t __real__bupc_memcpy_async(upcr_shared_ptr_t a1, upcr_shared_ptr bupc_handle_t __wrap__bupc_memcpy_async(upcr_shared_ptr_t a1, upcr_shared_ptr_t a2, size_t a3) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memcpy_async(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memcpy_async(upcr_shared_ptr_t, upcr_shared_ptr_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + size_t dst_thread = upcr_threadof_shared(a1); + size_t src_thread = upcr_threadof_shared(a2); + size_t my_thread = upcr_mythread(); + if (my_thread == src_thread) { + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, dst_thread, a3); + } else { + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, dst_thread, a3, src_thread); + } + retval = __real__bupc_memcpy_async(a1, a2, a3); + if (my_thread == src_thread) { + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, my_thread, a3, dst_thread); + } else { + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, src_thread, a3); + } + TAU_PROFILE_STOP(t); return retval; @@ -1744,9 +2832,20 @@ bupc_handle_t __real__bupc_memget_async(void * a1, upcr_shared_ptr_t a2, siz bupc_handle_t __wrap__bupc_memget_async(void * a1, upcr_shared_ptr_t a2, size_t a3) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memget_async(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memget_async(void *, upcr_shared_ptr_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, upcr_mythread(), a3, upcr_threadof_shared(a2)); retval = __real__bupc_memget_async(a1, a2, a3); + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, upcr_threadof_shared(a2), a3); TAU_PROFILE_STOP(t); return retval; @@ -1761,9 +2860,20 @@ bupc_handle_t __real__bupc_memput_async(upcr_shared_ptr_t a1, const void * a bupc_handle_t __wrap__bupc_memput_async(upcr_shared_ptr_t a1, const void * a2, size_t a3) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memput_async(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memput_async(upcr_shared_ptr_t, const void *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, upcr_threadof_shared(a1), a3); retval = __real__bupc_memput_async(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, upcr_mythread(), a3, upcr_threadof_shared(a1)); TAU_PROFILE_STOP(t); return retval; @@ -1778,9 +2888,20 @@ bupc_handle_t __real__bupc_memset_async(upcr_shared_ptr_t a1, int a2, size_t bupc_handle_t __wrap__bupc_memset_async(upcr_shared_ptr_t a1, int a2, size_t a3) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memset_async(a1, a2, a3); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memset_async(upcr_shared_ptr_t, int, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, upcr_threadof_shared(a1), a3); retval = __real__bupc_memset_async(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, upcr_mythread(), a3, upcr_threadof_shared(a1)); TAU_PROFILE_STOP(t); return retval; @@ -1795,6 +2916,15 @@ bupc_handle_t __real__bupc_end_accessregion() ; bupc_handle_t __wrap__bupc_end_accessregion() { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_end_accessregion(); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_end_accessregion() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_end_accessregion(); @@ -1812,6 +2942,15 @@ bupc_handle_t __real__bupc_memcpy_vlist_async(size_t a1, const bupc_smemvec_t bupc_handle_t __wrap__bupc_memcpy_vlist_async(size_t a1, const bupc_smemvec_t * a2, size_t a3, const bupc_smemvec_t * a4) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memcpy_vlist_async(a1, a2, a3, a4); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memcpy_vlist_async(size_t, const bupc_smemvec_t *, size_t, const bupc_smemvec_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memcpy_vlist_async(a1, a2, a3, a4); @@ -1829,6 +2968,15 @@ bupc_handle_t __real__bupc_memput_vlist_async(size_t a1, const bupc_smemvec_t bupc_handle_t __wrap__bupc_memput_vlist_async(size_t a1, const bupc_smemvec_t * a2, size_t a3, const bupc_pmemvec_t * a4) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memput_vlist_async(a1, a2, a3, a4); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memput_vlist_async(size_t, const bupc_smemvec_t *, size_t, const bupc_pmemvec_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memput_vlist_async(a1, a2, a3, a4); @@ -1846,6 +2994,15 @@ bupc_handle_t __real__bupc_memget_vlist_async(size_t a1, const bupc_pmemvec_t bupc_handle_t __wrap__bupc_memget_vlist_async(size_t a1, const bupc_pmemvec_t * a2, size_t a3, const bupc_smemvec_t * a4) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memget_vlist_async(a1, a2, a3, a4); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memget_vlist_async(size_t, const bupc_pmemvec_t *, size_t, const bupc_smemvec_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memget_vlist_async(a1, a2, a3, a4); @@ -1863,6 +3020,15 @@ bupc_handle_t __real__bupc_memcpy_ilist_async(size_t a1, const upcr_shared_pt bupc_handle_t __wrap__bupc_memcpy_ilist_async(size_t a1, const upcr_shared_ptr_t * a2, size_t a3, size_t a4, const upcr_shared_ptr_t * a5, size_t a6) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memcpy_ilist_async(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memcpy_ilist_async(size_t, const upcr_shared_ptr_t *, size_t, size_t, const upcr_shared_ptr_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memcpy_ilist_async(a1, a2, a3, a4, a5, a6); @@ -1880,6 +3046,15 @@ bupc_handle_t __real__bupc_memput_ilist_async(size_t a1, const upcr_shared_pt bupc_handle_t __wrap__bupc_memput_ilist_async(size_t a1, const upcr_shared_ptr_t * a2, size_t a3, size_t a4, const void *const * a5, size_t a6) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memput_ilist_async(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memput_ilist_async(size_t, const upcr_shared_ptr_t *, size_t, size_t, const void *const *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memput_ilist_async(a1, a2, a3, a4, a5, a6); @@ -1897,6 +3072,15 @@ bupc_handle_t __real__bupc_memget_ilist_async(size_t a1, void *const * a2, s bupc_handle_t __wrap__bupc_memget_ilist_async(size_t a1, void *const * a2, size_t a3, size_t a4, const upcr_shared_ptr_t * a5, size_t a6) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memget_ilist_async(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memget_ilist_async(size_t, void *const *, size_t, size_t, const upcr_shared_ptr_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memget_ilist_async(a1, a2, a3, a4, a5, a6); @@ -1914,6 +3098,15 @@ bupc_handle_t __real__bupc_memcpy_fstrided_async(upcr_shared_ptr_t a1, size_t bupc_handle_t __wrap__bupc_memcpy_fstrided_async(upcr_shared_ptr_t a1, size_t a2, size_t a3, size_t a4, upcr_shared_ptr_t a5, size_t a6, size_t a7, size_t a8) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memcpy_fstrided_async(a1, a2, a3, a4, a5, a6, a7, a8); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memcpy_fstrided_async(upcr_shared_ptr_t, size_t, size_t, size_t, upcr_shared_ptr_t, size_t, size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memcpy_fstrided_async(a1, a2, a3, a4, a5, a6, a7, a8); @@ -1931,6 +3124,15 @@ bupc_handle_t __real__bupc_memput_fstrided_async(upcr_shared_ptr_t a1, size_t bupc_handle_t __wrap__bupc_memput_fstrided_async(upcr_shared_ptr_t a1, size_t a2, size_t a3, size_t a4, void * a5, size_t a6, size_t a7, size_t a8) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memput_fstrided_async(a1, a2, a3, a4, a5, a6, a7, a8); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memput_fstrided_async(upcr_shared_ptr_t, size_t, size_t, size_t, void *, size_t, size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memput_fstrided_async(a1, a2, a3, a4, a5, a6, a7, a8); @@ -1948,6 +3150,15 @@ bupc_handle_t __real__bupc_memget_fstrided_async(void * a1, size_t a2, size_ bupc_handle_t __wrap__bupc_memget_fstrided_async(void * a1, size_t a2, size_t a3, size_t a4, upcr_shared_ptr_t a5, size_t a6, size_t a7, size_t a8) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memget_fstrided_async(a1, a2, a3, a4, a5, a6, a7, a8); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memget_fstrided_async(void *, size_t, size_t, size_t, upcr_shared_ptr_t, size_t, size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memget_fstrided_async(a1, a2, a3, a4, a5, a6, a7, a8); @@ -1965,6 +3176,15 @@ bupc_handle_t __real__bupc_memcpy_strided_async(upcr_shared_ptr_t a1, const s bupc_handle_t __wrap__bupc_memcpy_strided_async(upcr_shared_ptr_t a1, const size_t * a2, upcr_shared_ptr_t a3, const size_t * a4, const size_t * a5, size_t a6) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memcpy_strided_async(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memcpy_strided_async(upcr_shared_ptr_t, const size_t *, upcr_shared_ptr_t, const size_t *, const size_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memcpy_strided_async(a1, a2, a3, a4, a5, a6); @@ -1982,6 +3202,15 @@ bupc_handle_t __real__bupc_memput_strided_async(upcr_shared_ptr_t a1, const s bupc_handle_t __wrap__bupc_memput_strided_async(upcr_shared_ptr_t a1, const size_t * a2, const void * a3, const size_t * a4, const size_t * a5, size_t a6) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memput_strided_async(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memput_strided_async(upcr_shared_ptr_t, const size_t *, const void *, const size_t ()*, const size_t ()*, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memput_strided_async(a1, a2, a3, a4, a5, a6); @@ -1999,6 +3228,15 @@ bupc_handle_t __real__bupc_memget_strided_async(void * a1, const size_t * a2 bupc_handle_t __wrap__bupc_memget_strided_async(void * a1, const size_t * a2, upcr_shared_ptr_t a3, const size_t * a4, const size_t * a5, size_t a6) { bupc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real__bupc_memget_strided_async(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"bupc_handle_t _bupc_memget_strided_async(void *, const size_t *, upcr_shared_ptr_t, const size_t *, const size_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real__bupc_memget_strided_async(a1, a2, a3, a4, a5, a6); @@ -2015,6 +3253,16 @@ bupc_handle_t __wrap__bupc_memget_strided_async(void * a1, const size_t * a2 void __real_upcr_startup_init(int * a1, char *** a2, upcr_thread_t a3, upcr_thread_t a4, const char * a5) ; void __wrap_upcr_startup_init(int * a1, char *** a2, upcr_thread_t a3, upcr_thread_t a4, const char * a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcr_startup_init(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcr_startup_init(int *, char ***, upcr_thread_t, upcr_thread_t, const char *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcr_startup_init(a1, a2, a3, a4, a5); @@ -2030,6 +3278,16 @@ void __wrap_upcr_startup_init(int * a1, char *** a2, upcr_thread_t a3, upcr void __real_upcr_startup_attach(uintptr_t a1, uintptr_t a2, int a3) ; void __wrap_upcr_startup_attach(uintptr_t a1, uintptr_t a2, int a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcr_startup_attach(a1, a2, a3); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcr_startup_attach(uintptr_t, uintptr_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcr_startup_attach(a1, a2, a3); @@ -2045,6 +3303,16 @@ void __wrap_upcr_startup_attach(uintptr_t a1, uintptr_t a2, int a3) { void __real_upcr_startup_spawn(int * a1, char *** a2, uintptr_t a3, uintptr_t a4, struct upcr_startup_spawnfuncs * a5) ; void __wrap_upcr_startup_spawn(int * a1, char *** a2, uintptr_t a3, uintptr_t a4, struct upcr_startup_spawnfuncs * a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcr_startup_spawn(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcr_startup_spawn(int *, char ***, uintptr_t, uintptr_t, struct upcr_startup_spawnfuncs *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcr_startup_spawn(a1, a2, a3, a4, a5); @@ -2060,6 +3328,16 @@ void __wrap_upcr_startup_spawn(int * a1, char *** a2, uintptr_t a3, uintptr void __real_upcr_exit(int a1) ; void __wrap_upcr_exit(int a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcr_exit(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcr_exit(int) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcr_exit(a1); @@ -2075,6 +3353,16 @@ void __wrap_upcr_exit(int a1) { void __real_upcr_global_exit(int a1) ; void __wrap_upcr_global_exit(int a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcr_global_exit(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcr_global_exit(int) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcr_global_exit(a1); @@ -2090,6 +3378,16 @@ void __wrap_upcr_global_exit(int a1) { void __real_upcri_do_exit(int a1) ; void __wrap_upcri_do_exit(int a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upcri_do_exit(a1); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void upcri_do_exit(int) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upcri_do_exit(a1); @@ -2105,6 +3403,16 @@ void __wrap_upcri_do_exit(int a1) { void __real__upcr_startup_shalloc(upcr_startup_shalloc_t * a1, size_t a2) ; void __wrap__upcr_startup_shalloc(upcr_startup_shalloc_t * a1, size_t a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_startup_shalloc(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_startup_shalloc(upcr_startup_shalloc_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_startup_shalloc(a1, a2); @@ -2120,6 +3428,16 @@ void __wrap__upcr_startup_shalloc(upcr_startup_shalloc_t * a1, size_t a2) { void __real__upcr_startup_pshalloc(upcr_startup_pshalloc_t * a1, size_t a2) ; void __wrap__upcr_startup_pshalloc(upcr_startup_pshalloc_t * a1, size_t a2) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_startup_pshalloc(a1, a2); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_startup_pshalloc(upcr_startup_pshalloc_t *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_startup_pshalloc(a1, a2); @@ -2135,6 +3453,16 @@ void __wrap__upcr_startup_pshalloc(upcr_startup_pshalloc_t * a1, size_t a2) void __real__upcr_startup_initarray(upcr_shared_ptr_t a1, void * a2, upcr_startup_arrayinit_diminfo_t * a3, size_t a4, size_t a5, size_t a6) ; void __wrap__upcr_startup_initarray(upcr_shared_ptr_t a1, void * a2, upcr_startup_arrayinit_diminfo_t * a3, size_t a4, size_t a5, size_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_startup_initarray(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_startup_initarray(upcr_shared_ptr_t, void *, upcr_startup_arrayinit_diminfo_t *, size_t, size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_startup_initarray(a1, a2, a3, a4, a5, a6); @@ -2150,6 +3478,16 @@ void __wrap__upcr_startup_initarray(upcr_shared_ptr_t a1, void * a2, upcr_st void __real__upcr_startup_initparray(upcr_pshared_ptr_t a1, void * a2, upcr_startup_arrayinit_diminfo_t * a3, size_t a4, size_t a5, size_t a6) ; void __wrap__upcr_startup_initparray(upcr_pshared_ptr_t a1, void * a2, upcr_startup_arrayinit_diminfo_t * a3, size_t a4, size_t a5, size_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real__upcr_startup_initparray(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,upcr_threads()); + } + } + TAU_PROFILE_TIMER(t,"void _upcr_startup_initparray(upcr_pshared_ptr_t, void *, upcr_startup_arrayinit_diminfo_t *, size_t, size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real__upcr_startup_initparray(a1, a2, a3, a4, a5, a6); diff --git a/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/Makefile b/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/Makefile index f9f153b80..7f0c2a833 100644 --- a/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/Makefile +++ b/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/Makefile @@ -1,13 +1,16 @@ include ${TAU_MAKEFILE} CC=$(TAU_CC) +CFLAGS=$(TAU_DEFS) $(TAU_UPC_COMPILER_OPTIONS) $(TAU_INCLUDE) $(TAU_MPI_INCLUDE) -I.. +EXTRA_FLAGS= + +AR=$(TAU_AR) ARFLAGS=rcv -CFLAGS=$(TAU_DEFS) $(TAU_UPC_COMPILER_OPTIONS) $(UPCC_C_PREFIX)$(TAU_INTERNAL_FLAG1) $(TAU_INCLUDE) $(TAU_MPI_INCLUDE) -I.. libcray_upc_runtime_wrap.a: cray_upc_runtime_wrap.o - $(TAU_AR) $(ARFLAGS) $@ $< + $(AR) $(ARFLAGS) $@ $< cray_upc_runtime_wrap.o: wr.c - $(CC) $(CFLAGS) -c $< -o $@ + $(CC) $(CFLAGS) $(EXTRA_FLAGS) -c $< -o $@ clean: /bin/rm -f cray_upc_runtime_wrap.o libcray_upc_runtime_wrap.a diff --git a/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/link_options.tau.master b/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/link_options.tau.master new file mode 100644 index 000000000..bb3723e8d --- /dev/null +++ b/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/link_options.tau.master @@ -0,0 +1 @@ +-Wl,-wrap,upc_global_exit -Wl,-wrap,upc_global_alloc -Wl,-wrap,upc_all_alloc -Wl,-wrap,upc_alloc -Wl,-wrap,upc_free -Wl,-wrap,upc_threadof -Wl,-wrap,upc_phaseof -Wl,-wrap,upc_resetphase -Wl,-wrap,upc_addrfield -Wl,-wrap,upc_affinitysize -Wl,-wrap,upc_global_lock_alloc -Wl,-wrap,upc_all_lock_alloc -Wl,-wrap,upc_lock_free -Wl,-wrap,upc_lock -Wl,-wrap,upc_lock_attempt -Wl,-wrap,upc_unlock -Wl,-wrap,upc_memcpy -Wl,-wrap,upc_memget -Wl,-wrap,upc_memput -Wl,-wrap,upc_memset -Wl,-wrap,upc_all_fopen -Wl,-wrap,upc_all_fclose -Wl,-wrap,upc_all_fsync -Wl,-wrap,upc_all_fseek -Wl,-wrap,upc_all_fset_size -Wl,-wrap,upc_all_fget_size -Wl,-wrap,upc_all_fpreallocate -Wl,-wrap,upc_all_fcntl -Wl,-wrap,upc_all_fread_local -Wl,-wrap,upc_all_fread_local_async -Wl,-wrap,upc_all_fread_shared -Wl,-wrap,upc_all_fread_shared_async -Wl,-wrap,upc_all_fread_list_local -Wl,-wrap,upc_all_fread_list_local_async -Wl,-wrap,upc_all_fread_list_shared -Wl,-wrap,upc_all_fread_list_shared_async -Wl,-wrap,upc_all_fwrite_local -Wl,-wrap,upc_all_fwrite_local_async -Wl,-wrap,upc_all_fwrite_shared -Wl,-wrap,upc_all_fwrite_shared_async -Wl,-wrap,upc_all_fwrite_list_local -Wl,-wrap,upc_all_fwrite_list_local_async -Wl,-wrap,upc_all_fwrite_list_shared -Wl,-wrap,upc_all_fwrite_list_shared_async -Wl,-wrap,upc_all_fwait_async -Wl,-wrap,upc_all_ftest_async -Wl,-wrap,upc_all_broadcast -Wl,-wrap,upc_all_scatter -Wl,-wrap,upc_all_gather -Wl,-wrap,upc_all_gather_all -Wl,-wrap,upc_all_exchange -Wl,-wrap,upc_all_permute -Wl,-wrap,upc_all_reduceC -Wl,-wrap,upc_all_reduceUC -Wl,-wrap,upc_all_reduceS -Wl,-wrap,upc_all_reduceUS -Wl,-wrap,upc_all_reduceI -Wl,-wrap,upc_all_reduceUI -Wl,-wrap,upc_all_reduceL -Wl,-wrap,upc_all_reduceUL -Wl,-wrap,upc_all_reduceF -Wl,-wrap,upc_all_reduceD -Wl,-wrap,upc_all_reduceLD -Wl,-wrap,upc_all_prefix_reduceC -Wl,-wrap,upc_all_prefix_reduceUC -Wl,-wrap,upc_all_prefix_reduceS -Wl,-wrap,upc_all_prefix_reduceUS -Wl,-wrap,upc_all_prefix_reduceI -Wl,-wrap,upc_all_prefix_reduceUI -Wl,-wrap,upc_all_prefix_reduceL -Wl,-wrap,upc_all_prefix_reduceUL -Wl,-wrap,upc_all_prefix_reduceF -Wl,-wrap,upc_all_prefix_reduceD -Wl,-wrap,upc_all_prefix_reduceLD -Wl,-wrap,upc_all_free -Wl,-wrap,upc_all_lock_free -Wl,-wrap,upc_ticks_now -Wl,-wrap,upc_ticks_to_ns -Wl,-wrap,upc_cast -Wl,-wrap,upc_castable -Wl,-wrap,upc_thread_castable -Wl,-wrap,upc_memcpy_nb -Wl,-wrap,upc_memget_nb -Wl,-wrap,upc_memput_nb -Wl,-wrap,upc_sync_nb -Wl,-wrap,upc_test_nb -Wl,-wrap,upc_memcpy_nbi -Wl,-wrap,upc_memget_nbi -Wl,-wrap,upc_memput_nbi -Wl,-wrap,upc_nodeof @@WRAPPER_LIBRARY@@ diff --git a/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/wr.c b/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/wr.c index 6befc371f..725959202 100644 --- a/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/wr.c +++ b/src/wrappers/upc/crayupc/cray_upc_runtime_wrapper/wr.c @@ -2,6 +2,20 @@ #include #include +#pragma pupc off + +#ifdef __BERKELEY_UPC__ +#pragma UPCR NO_SRCPOS +#endif + +static int tau_upc_node = -1; +static int tau_upc_tagid_f = 0; +#define TAU_UPC_TAGID (tau_upc_tagid_f = (tau_upc_tagid_f & 255)) +#define TAU_UPC_TAGID_NEXT ((++tau_upc_tagid_f) & 255) + +void tau_totalnodes(int, int); + + /********************************************************** upc_global_exit **********************************************************/ @@ -9,6 +23,16 @@ void __real_upc_global_exit(int a1) ; void __wrap_upc_global_exit(int a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_global_exit(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_global_exit(int) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_global_exit(a1); @@ -25,6 +49,15 @@ shared void * __real_upc_global_alloc(size_t a1, size_t a2) ; shared void * __wrap_upc_global_alloc(size_t a1, size_t a2) { shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_global_alloc(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"shared[1] void * upc_global_alloc(size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_global_alloc(a1, a2); @@ -42,6 +75,15 @@ shared void * __real_upc_all_alloc(size_t a1, size_t a2) ; shared void * __wrap_upc_all_alloc(size_t a1, size_t a2) { shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_alloc(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"shared[1] void * upc_all_alloc(size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_alloc(a1, a2); @@ -59,6 +101,15 @@ shared void * __real_upc_alloc(size_t a1) ; shared void * __wrap_upc_alloc(size_t a1) { shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_alloc(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"shared[1] void * upc_alloc(size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_alloc(a1); @@ -75,6 +126,16 @@ shared void * __wrap_upc_alloc(size_t a1) { void __real_upc_free(shared[1] void * a1) ; void __wrap_upc_free(shared[1] void * a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_free(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_free(shared[1] void *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_free(a1); @@ -91,6 +152,15 @@ size_t __real_upc_threadof(shared[1] const void * a1) ; size_t __wrap_upc_threadof(shared[1] const void * a1) { size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_threadof(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"size_t upc_threadof(shared[1] const void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_threadof(a1); @@ -108,6 +178,15 @@ size_t __real_upc_phaseof(shared[1] const void * a1) ; size_t __wrap_upc_phaseof(shared[1] const void * a1) { size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_phaseof(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"size_t upc_phaseof(shared[1] const void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_phaseof(a1); @@ -125,6 +204,15 @@ shared void * __real_upc_resetphase(shared[1] const void * a1) ; shared void * __wrap_upc_resetphase(shared[1] const void * a1) { shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_resetphase(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"shared[1] void * upc_resetphase(shared[1] const void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_resetphase(a1); @@ -142,6 +230,15 @@ size_t __real_upc_addrfield(shared[1] const void * a1) ; size_t __wrap_upc_addrfield(shared[1] const void * a1) { size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_addrfield(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"size_t upc_addrfield(shared[1] const void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_addrfield(a1); @@ -159,6 +256,15 @@ size_t __real_upc_affinitysize(size_t a1, size_t a2, size_t a3) ; size_t __wrap_upc_affinitysize(size_t a1, size_t a2, size_t a3) { size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_affinitysize(a1, a2, a3); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"size_t upc_affinitysize(size_t, size_t, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_affinitysize(a1, a2, a3); @@ -176,6 +282,15 @@ upc_lock_t * __real_upc_global_lock_alloc() ; upc_lock_t * __wrap_upc_global_lock_alloc() { upc_lock_t * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_global_lock_alloc(); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_lock_t * upc_global_lock_alloc() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_global_lock_alloc(); @@ -193,6 +308,15 @@ upc_lock_t * __real_upc_all_lock_alloc() ; upc_lock_t * __wrap_upc_all_lock_alloc() { upc_lock_t * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_lock_alloc(); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_lock_t * upc_all_lock_alloc() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_lock_alloc(); @@ -209,6 +333,16 @@ upc_lock_t * __wrap_upc_all_lock_alloc() { void __real_upc_lock_free(upc_lock_t * a1) ; void __wrap_upc_lock_free(upc_lock_t * a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_lock_free(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_lock_free(upc_lock_t *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_lock_free(a1); @@ -224,6 +358,16 @@ void __wrap_upc_lock_free(upc_lock_t * a1) { void __real_upc_lock(upc_lock_t * a1) ; void __wrap_upc_lock(upc_lock_t * a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_lock(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_lock(upc_lock_t *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_lock(a1); @@ -240,6 +384,15 @@ int __real_upc_lock_attempt(upc_lock_t * a1) ; int __wrap_upc_lock_attempt(upc_lock_t * a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_lock_attempt(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_lock_attempt(upc_lock_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_lock_attempt(a1); @@ -256,6 +409,16 @@ int __wrap_upc_lock_attempt(upc_lock_t * a1) { void __real_upc_unlock(upc_lock_t * a1) ; void __wrap_upc_unlock(upc_lock_t * a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_unlock(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_unlock(upc_lock_t *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_unlock(a1); @@ -271,9 +434,34 @@ void __wrap_upc_unlock(upc_lock_t * a1) { void __real_upc_memcpy(shared[1] void *restrict a1, shared[1] const void *restrict a2, size_t a3) ; void __wrap_upc_memcpy(shared[1] void *restrict a1, shared[1] const void *restrict a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memcpy(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_memcpy(shared[1] void *restrict, shared[1] const void *restrict, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + size_t dst_thread = __real_upc_threadof(a1); + size_t src_thread = __real_upc_threadof(a2); + size_t my_thread = MYTHREAD; + if (my_thread == src_thread) { + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, dst_thread, a3); + } else { + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, dst_thread, a3, src_thread); + } + __real_upc_memcpy(a1, a2, a3); + if (my_thread == src_thread) { + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, my_thread, a3, dst_thread); + } else { + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, src_thread, a3); + } + TAU_PROFILE_STOP(t); } @@ -286,9 +474,21 @@ void __wrap_upc_memcpy(shared[1] void *restrict a1, shared[1] const void *res void __real_upc_memget(void *restrict a1, shared[1] const void *restrict a2, size_t a3) ; void __wrap_upc_memget(void *restrict a1, shared[1] const void *restrict a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memget(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_memget(void *restrict, shared[1] const void *restrict, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, MYTHREAD, a3, __real_upc_threadof(a2)); __real_upc_memget(a1, a2, a3); + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, __real_upc_threadof(a2), a3); TAU_PROFILE_STOP(t); } @@ -301,9 +501,21 @@ void __wrap_upc_memget(void *restrict a1, shared[1] const void *restrict a2, void __real_upc_memput(shared[1] void *restrict a1, const void *restrict a2, size_t a3) ; void __wrap_upc_memput(shared[1] void *restrict a1, const void *restrict a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memput(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_memput(shared[1] void *restrict, const void *restrict, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, __real_upc_threadof(a1), a3); __real_upc_memput(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, MYTHREAD, a3, __real_upc_threadof(a1)); TAU_PROFILE_STOP(t); } @@ -316,9 +528,21 @@ void __wrap_upc_memput(shared[1] void *restrict a1, const void *restrict a2, void __real_upc_memset(shared[1] void * a1, int a2, size_t a3) ; void __wrap_upc_memset(shared[1] void * a1, int a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memset(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_memset(shared[1] void *, int, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, __real_upc_threadof(a1), a3); __real_upc_memset(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, MYTHREAD, a3, __real_upc_threadof(a1)); TAU_PROFILE_STOP(t); } @@ -332,6 +556,15 @@ upc_file_t * __real_upc_all_fopen(const char * a1, int a2, size_t a3, const upc_file_t * __wrap_upc_all_fopen(const char * a1, int a2, size_t a3, const struct upc_hint * a4) { upc_file_t * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fopen(a1, a2, a3, a4); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_file_t * upc_all_fopen(const char *, int, size_t, const struct upc_hint *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fopen(a1, a2, a3, a4); @@ -349,6 +582,15 @@ int __real_upc_all_fclose(upc_file_t * a1) ; int __wrap_upc_all_fclose(upc_file_t * a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fclose(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_all_fclose(upc_file_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fclose(a1); @@ -366,6 +608,15 @@ int __real_upc_all_fsync(upc_file_t * a1) ; int __wrap_upc_all_fsync(upc_file_t * a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fsync(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_all_fsync(upc_file_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fsync(a1); @@ -383,6 +634,15 @@ upc_off_t __real_upc_all_fseek(upc_file_t * a1, upc_off_t a2, int a3) ; upc_off_t __wrap_upc_all_fseek(upc_file_t * a1, upc_off_t a2, int a3) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fseek(a1, a2, a3); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fseek(upc_file_t *, upc_off_t, int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fseek(a1, a2, a3); @@ -400,6 +660,15 @@ int __real_upc_all_fset_size(upc_file_t * a1, upc_off_t a2) ; int __wrap_upc_all_fset_size(upc_file_t * a1, upc_off_t a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fset_size(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_all_fset_size(upc_file_t *, upc_off_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fset_size(a1, a2); @@ -417,6 +686,15 @@ upc_off_t __real_upc_all_fget_size(upc_file_t * a1) ; upc_off_t __wrap_upc_all_fget_size(upc_file_t * a1) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fget_size(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fget_size(upc_file_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fget_size(a1); @@ -434,6 +712,15 @@ int __real_upc_all_fpreallocate(upc_file_t * a1, upc_off_t a2) ; int __wrap_upc_all_fpreallocate(upc_file_t * a1, upc_off_t a2) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fpreallocate(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_all_fpreallocate(upc_file_t *, upc_off_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fpreallocate(a1, a2); @@ -451,6 +738,15 @@ int __real_upc_all_fcntl(upc_file_t * a1, int a2, void * a3) ; int __wrap_upc_all_fcntl(upc_file_t * a1, int a2, void * a3) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fcntl(a1, a2, a3); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_all_fcntl(upc_file_t *, int, void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fcntl(a1, a2, a3); @@ -468,6 +764,15 @@ upc_off_t __real_upc_all_fread_local(upc_file_t * a1, void * a2, size_t a3, upc_off_t __wrap_upc_all_fread_local(upc_file_t * a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fread_local(a1, a2, a3, a4, a5); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fread_local(upc_file_t *, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fread_local(a1, a2, a3, a4, a5); @@ -484,6 +789,16 @@ upc_off_t __wrap_upc_all_fread_local(upc_file_t * a1, void * a2, size_t a3, void __real_upc_all_fread_local_async(upc_file_t * a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) ; void __wrap_upc_all_fread_local_async(upc_file_t * a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fread_local_async(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fread_local_async(upc_file_t *, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fread_local_async(a1, a2, a3, a4, a5); @@ -500,6 +815,15 @@ upc_off_t __real_upc_all_fread_shared(upc_file_t * a1, shared[1] void * a2, upc_off_t __wrap_upc_all_fread_shared(upc_file_t * a1, shared[1] void * a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fread_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fread_shared(upc_file_t *, shared[1] void *, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fread_shared(a1, a2, a3, a4, a5, a6); @@ -516,6 +840,16 @@ upc_off_t __wrap_upc_all_fread_shared(upc_file_t * a1, shared[1] void * a2, void __real_upc_all_fread_shared_async(upc_file_t * a1, shared[1] void * a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) ; void __wrap_upc_all_fread_shared_async(upc_file_t * a1, shared[1] void * a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fread_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fread_shared_async(upc_file_t *, shared[1] void *, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fread_shared_async(a1, a2, a3, a4, a5, a6); @@ -532,6 +866,15 @@ upc_off_t __real_upc_all_fread_list_local(upc_file_t * a1, size_t a2, const upc_off_t __wrap_upc_all_fread_list_local(upc_file_t * a1, size_t a2, const struct upc_local_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fread_list_local(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fread_list_local(upc_file_t *, size_t, const struct upc_local_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fread_list_local(a1, a2, a3, a4, a5, a6); @@ -548,6 +891,16 @@ upc_off_t __wrap_upc_all_fread_list_local(upc_file_t * a1, size_t a2, const void __real_upc_all_fread_list_local_async(upc_file_t * a1, size_t a2, const struct upc_local_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) ; void __wrap_upc_all_fread_list_local_async(upc_file_t * a1, size_t a2, const struct upc_local_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fread_list_local_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fread_list_local_async(upc_file_t *, size_t, const struct upc_local_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fread_list_local_async(a1, a2, a3, a4, a5, a6); @@ -564,6 +917,15 @@ upc_off_t __real_upc_all_fread_list_shared(upc_file_t * a1, size_t a2, const upc_off_t __wrap_upc_all_fread_list_shared(upc_file_t * a1, size_t a2, const struct upc_shared_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fread_list_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fread_list_shared(upc_file_t *, size_t, const struct upc_shared_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fread_list_shared(a1, a2, a3, a4, a5, a6); @@ -580,6 +942,16 @@ upc_off_t __wrap_upc_all_fread_list_shared(upc_file_t * a1, size_t a2, const void __real_upc_all_fread_list_shared_async(upc_file_t * a1, size_t a2, const struct upc_shared_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) ; void __wrap_upc_all_fread_list_shared_async(upc_file_t * a1, size_t a2, const struct upc_shared_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fread_list_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fread_list_shared_async(upc_file_t *, size_t, const struct upc_shared_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fread_list_shared_async(a1, a2, a3, a4, a5, a6); @@ -596,6 +968,15 @@ upc_off_t __real_upc_all_fwrite_local(upc_file_t * a1, void * a2, size_t a3 upc_off_t __wrap_upc_all_fwrite_local(upc_file_t * a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fwrite_local(a1, a2, a3, a4, a5); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fwrite_local(upc_file_t *, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fwrite_local(a1, a2, a3, a4, a5); @@ -612,6 +993,16 @@ upc_off_t __wrap_upc_all_fwrite_local(upc_file_t * a1, void * a2, size_t a3 void __real_upc_all_fwrite_local_async(upc_file_t * a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) ; void __wrap_upc_all_fwrite_local_async(upc_file_t * a1, void * a2, size_t a3, size_t a4, upc_flag_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fwrite_local_async(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fwrite_local_async(upc_file_t *, void *, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fwrite_local_async(a1, a2, a3, a4, a5); @@ -628,6 +1019,15 @@ upc_off_t __real_upc_all_fwrite_shared(upc_file_t * a1, shared[1] void * a2, upc_off_t __wrap_upc_all_fwrite_shared(upc_file_t * a1, shared[1] void * a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fwrite_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fwrite_shared(upc_file_t *, shared[1] void *, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fwrite_shared(a1, a2, a3, a4, a5, a6); @@ -644,6 +1044,16 @@ upc_off_t __wrap_upc_all_fwrite_shared(upc_file_t * a1, shared[1] void * a2, void __real_upc_all_fwrite_shared_async(upc_file_t * a1, shared[1] void * a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) ; void __wrap_upc_all_fwrite_shared_async(upc_file_t * a1, shared[1] void * a2, size_t a3, size_t a4, size_t a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fwrite_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fwrite_shared_async(upc_file_t *, shared[1] void *, size_t, size_t, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fwrite_shared_async(a1, a2, a3, a4, a5, a6); @@ -660,6 +1070,15 @@ upc_off_t __real_upc_all_fwrite_list_local(upc_file_t * a1, size_t a2, const upc_off_t __wrap_upc_all_fwrite_list_local(upc_file_t * a1, size_t a2, const struct upc_local_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fwrite_list_local(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fwrite_list_local(upc_file_t *, size_t, const struct upc_local_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fwrite_list_local(a1, a2, a3, a4, a5, a6); @@ -676,6 +1095,16 @@ upc_off_t __wrap_upc_all_fwrite_list_local(upc_file_t * a1, size_t a2, const void __real_upc_all_fwrite_list_local_async(upc_file_t * a1, size_t a2, const struct upc_local_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) ; void __wrap_upc_all_fwrite_list_local_async(upc_file_t * a1, size_t a2, const struct upc_local_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fwrite_list_local_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fwrite_list_local_async(upc_file_t *, size_t, const struct upc_local_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fwrite_list_local_async(a1, a2, a3, a4, a5, a6); @@ -692,6 +1121,15 @@ upc_off_t __real_upc_all_fwrite_list_shared(upc_file_t * a1, size_t a2, cons upc_off_t __wrap_upc_all_fwrite_list_shared(upc_file_t * a1, size_t a2, const struct upc_shared_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fwrite_list_shared(a1, a2, a3, a4, a5, a6); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fwrite_list_shared(upc_file_t *, size_t, const struct upc_shared_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fwrite_list_shared(a1, a2, a3, a4, a5, a6); @@ -708,6 +1146,16 @@ upc_off_t __wrap_upc_all_fwrite_list_shared(upc_file_t * a1, size_t a2, cons void __real_upc_all_fwrite_list_shared_async(upc_file_t * a1, size_t a2, const struct upc_shared_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) ; void __wrap_upc_all_fwrite_list_shared_async(upc_file_t * a1, size_t a2, const struct upc_shared_memvec * a3, size_t a4, const struct upc_filevec * a5, upc_flag_t a6) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_fwrite_list_shared_async(a1, a2, a3, a4, a5, a6); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_fwrite_list_shared_async(upc_file_t *, size_t, const struct upc_shared_memvec *, size_t, const struct upc_filevec *, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_fwrite_list_shared_async(a1, a2, a3, a4, a5, a6); @@ -724,6 +1172,15 @@ upc_off_t __real_upc_all_fwait_async(upc_file_t * a1) ; upc_off_t __wrap_upc_all_fwait_async(upc_file_t * a1) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_fwait_async(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_fwait_async(upc_file_t *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_fwait_async(a1); @@ -741,6 +1198,15 @@ upc_off_t __real_upc_all_ftest_async(upc_file_t * a1, int * a2) ; upc_off_t __wrap_upc_all_ftest_async(upc_file_t * a1, int * a2) { upc_off_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_ftest_async(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_off_t upc_all_ftest_async(upc_file_t *, int *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_all_ftest_async(a1, a2); @@ -757,6 +1223,16 @@ upc_off_t __wrap_upc_all_ftest_async(upc_file_t * a1, int * a2) { void __real_upc_all_broadcast(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) ; void __wrap_upc_all_broadcast(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_broadcast(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_broadcast(shared[1] void *, shared[1] const void *, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_broadcast(a1, a2, a3, a4); @@ -772,6 +1248,16 @@ void __wrap_upc_all_broadcast(shared[1] void * a1, shared[1] const void * a2 void __real_upc_all_scatter(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) ; void __wrap_upc_all_scatter(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_scatter(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_scatter(shared[1] void *, shared[1] const void *, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_scatter(a1, a2, a3, a4); @@ -787,6 +1273,16 @@ void __wrap_upc_all_scatter(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_gather(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) ; void __wrap_upc_all_gather(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_gather(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_gather(shared[1] void *, shared[1] const void *, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_gather(a1, a2, a3, a4); @@ -802,6 +1298,16 @@ void __wrap_upc_all_gather(shared[1] void * a1, shared[1] const void * a2, s void __real_upc_all_gather_all(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) ; void __wrap_upc_all_gather_all(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_gather_all(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_gather_all(shared[1] void *, shared[1] const void *, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_gather_all(a1, a2, a3, a4); @@ -817,6 +1323,16 @@ void __wrap_upc_all_gather_all(shared[1] void * a1, shared[1] const void * a void __real_upc_all_exchange(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) ; void __wrap_upc_all_exchange(shared[1] void * a1, shared[1] const void * a2, size_t a3, upc_flag_t a4) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_exchange(a1, a2, a3, a4); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_exchange(shared[1] void *, shared[1] const void *, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_exchange(a1, a2, a3, a4); @@ -832,6 +1348,16 @@ void __wrap_upc_all_exchange(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_permute(shared[1] void * a1, shared[1] const void * a2, shared[1] const int * a3, size_t a4, upc_flag_t a5) ; void __wrap_upc_all_permute(shared[1] void * a1, shared[1] const void * a2, shared[1] const int * a3, size_t a4, upc_flag_t a5) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_permute(a1, a2, a3, a4, a5); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_permute(shared[1] void *, shared[1] const void *, shared[1] const int *, size_t, upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_permute(a1, a2, a3, a4, a5); @@ -847,6 +1373,16 @@ void __wrap_upc_all_permute(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7) ; void __wrap_upc_all_reduceC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceC(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceC(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, signed char (*) (signed char, signed char), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceC(a1, a2, a3, a4, a5, a6, a7); @@ -862,6 +1398,16 @@ void __wrap_upc_all_reduceC(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceUC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7) ; void __wrap_upc_all_reduceUC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceUC(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceUC(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned char (*) (unsigned char, unsigned char), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceUC(a1, a2, a3, a4, a5, a6, a7); @@ -877,6 +1423,16 @@ void __wrap_upc_all_reduceUC(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7) ; void __wrap_upc_all_reduceS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceS(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceS(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, short (*) (short, short), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceS(a1, a2, a3, a4, a5, a6, a7); @@ -892,6 +1448,16 @@ void __wrap_upc_all_reduceS(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceUS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7) ; void __wrap_upc_all_reduceUS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceUS(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceUS(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned short (*) (unsigned short, unsigned short), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceUS(a1, a2, a3, a4, a5, a6, a7); @@ -907,6 +1473,16 @@ void __wrap_upc_all_reduceUS(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7) ; void __wrap_upc_all_reduceI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceI(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceI(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, int (*) (int, int), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceI(a1, a2, a3, a4, a5, a6, a7); @@ -922,6 +1498,16 @@ void __wrap_upc_all_reduceI(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceUI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7) ; void __wrap_upc_all_reduceUI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceUI(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceUI(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned int (*) (unsigned int, unsigned int), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceUI(a1, a2, a3, a4, a5, a6, a7); @@ -937,6 +1523,16 @@ void __wrap_upc_all_reduceUI(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7) ; void __wrap_upc_all_reduceL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceL(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceL(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, long (*) (long, long), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceL(a1, a2, a3, a4, a5, a6, a7); @@ -952,6 +1548,16 @@ void __wrap_upc_all_reduceL(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceUL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7) ; void __wrap_upc_all_reduceUL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceUL(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceUL(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned long (*) (unsigned long, unsigned long), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceUL(a1, a2, a3, a4, a5, a6, a7); @@ -967,6 +1573,16 @@ void __wrap_upc_all_reduceUL(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceF(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7) ; void __wrap_upc_all_reduceF(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceF(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceF(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, float (*) (float, float), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceF(a1, a2, a3, a4, a5, a6, a7); @@ -982,6 +1598,16 @@ void __wrap_upc_all_reduceF(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7) ; void __wrap_upc_all_reduceD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceD(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceD(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, double (*) (double, double), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceD(a1, a2, a3, a4, a5, a6, a7); @@ -997,6 +1623,16 @@ void __wrap_upc_all_reduceD(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_reduceLD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7) ; void __wrap_upc_all_reduceLD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_reduceLD(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_reduceLD(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, long double (*) (long double, long double), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_reduceLD(a1, a2, a3, a4, a5, a6, a7); @@ -1012,6 +1648,16 @@ void __wrap_upc_all_reduceLD(shared[1] void * a1, shared[1] const void * a2, void __real_upc_all_prefix_reduceC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, signed char (*a6) (signed char, signed char) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceC(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceC(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, signed char (*) (signed char, signed char), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceC(a1, a2, a3, a4, a5, a6, a7); @@ -1027,6 +1673,16 @@ void __wrap_upc_all_prefix_reduceC(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceUC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceUC(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned char (*a6) (unsigned char, unsigned char) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceUC(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceUC(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned char (*) (unsigned char, unsigned char), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceUC(a1, a2, a3, a4, a5, a6, a7); @@ -1042,6 +1698,16 @@ void __wrap_upc_all_prefix_reduceUC(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, short (*a6) (short, short) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceS(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceS(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, short (*) (short, short), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceS(a1, a2, a3, a4, a5, a6, a7); @@ -1057,6 +1723,16 @@ void __wrap_upc_all_prefix_reduceS(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceUS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceUS(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned short (*a6) (unsigned short, unsigned short) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceUS(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceUS(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned short (*) (unsigned short, unsigned short), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceUS(a1, a2, a3, a4, a5, a6, a7); @@ -1072,6 +1748,16 @@ void __wrap_upc_all_prefix_reduceUS(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, int (*a6) (int, int) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceI(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceI(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, int (*) (int, int), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceI(a1, a2, a3, a4, a5, a6, a7); @@ -1087,6 +1773,16 @@ void __wrap_upc_all_prefix_reduceI(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceUI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceUI(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned int (*a6) (unsigned int, unsigned int) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceUI(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceUI(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned int (*) (unsigned int, unsigned int), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceUI(a1, a2, a3, a4, a5, a6, a7); @@ -1102,6 +1798,16 @@ void __wrap_upc_all_prefix_reduceUI(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long (*a6) (long, long) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceL(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceL(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, long (*) (long, long), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceL(a1, a2, a3, a4, a5, a6, a7); @@ -1117,6 +1823,16 @@ void __wrap_upc_all_prefix_reduceL(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceUL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceUL(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, unsigned long (*a6) (unsigned long, unsigned long) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceUL(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceUL(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, unsigned long (*) (unsigned long, unsigned long), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceUL(a1, a2, a3, a4, a5, a6, a7); @@ -1132,6 +1848,16 @@ void __wrap_upc_all_prefix_reduceUL(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceF(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceF(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, float (*a6) (float, float) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceF(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceF(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, float (*) (float, float), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceF(a1, a2, a3, a4, a5, a6, a7); @@ -1147,6 +1873,16 @@ void __wrap_upc_all_prefix_reduceF(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, double (*a6) (double, double) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceD(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceD(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, double (*) (double, double), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceD(a1, a2, a3, a4, a5, a6, a7); @@ -1162,6 +1898,16 @@ void __wrap_upc_all_prefix_reduceD(shared[1] void * a1, shared[1] const void void __real_upc_all_prefix_reduceLD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7) ; void __wrap_upc_all_prefix_reduceLD(shared[1] void * a1, shared[1] const void * a2, upc_op_t a3, size_t a4, size_t a5, long double (*a6) (long double, long double) , upc_flag_t a7) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_prefix_reduceLD(a1, a2, a3, a4, a5, a6, a7); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_prefix_reduceLD(shared[1] void *, shared[1] const void *, upc_op_t, size_t, size_t, long double (*) (long double, long double), upc_flag_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_prefix_reduceLD(a1, a2, a3, a4, a5, a6, a7); @@ -1177,6 +1923,16 @@ void __wrap_upc_all_prefix_reduceLD(shared[1] void * a1, shared[1] const void void __real_upc_all_free(shared[1] void * a1) ; void __wrap_upc_all_free(shared[1] void * a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_free(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_free(shared[1] void *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_free(a1); @@ -1192,6 +1948,16 @@ void __wrap_upc_all_free(shared[1] void * a1) { void __real_upc_all_lock_free(upc_lock_t * a1) ; void __wrap_upc_all_lock_free(upc_lock_t * a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_all_lock_free(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_all_lock_free(upc_lock_t *) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_all_lock_free(a1); @@ -1208,6 +1974,15 @@ upc_tick_t __real_upc_ticks_now() ; upc_tick_t __wrap_upc_ticks_now() { upc_tick_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_ticks_now(); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_tick_t upc_ticks_now() C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_ticks_now(); @@ -1225,6 +2000,15 @@ unsigned long __real_upc_ticks_to_ns(upc_tick_t a1) ; unsigned long __wrap_upc_ticks_to_ns(upc_tick_t a1) { unsigned long retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_ticks_to_ns(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"unsigned long upc_ticks_to_ns(upc_tick_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_ticks_to_ns(a1); @@ -1242,6 +2026,15 @@ void * __real_upc_cast(shared[1] void * a1) ; void * __wrap_upc_cast(shared[1] void * a1) { void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_cast(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void * upc_cast(shared[1] void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_cast(a1); @@ -1259,6 +2052,15 @@ int __real_upc_castable(shared[1] void * a1) ; int __wrap_upc_castable(shared[1] void * a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_castable(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_castable(shared[1] void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_castable(a1); @@ -1276,6 +2078,15 @@ int __real_upc_thread_castable(unsigned int a1) ; int __wrap_upc_thread_castable(unsigned int a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_thread_castable(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_thread_castable(unsigned int) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_thread_castable(a1); @@ -1293,9 +2104,33 @@ upc_handle_t __real_upc_memcpy_nb(shared[1] void * a1, shared[1] const void * upc_handle_t __wrap_upc_memcpy_nb(shared[1] void * a1, shared[1] const void * a2, size_t a3) { upc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_memcpy_nb(a1, a2, a3); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_handle_t upc_memcpy_nb(shared[1] void *, shared[1] const void *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + size_t dst_thread = __real_upc_threadof(a1); + size_t src_thread = __real_upc_threadof(a2); + size_t my_thread = MYTHREAD; + if (my_thread == src_thread) { + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, dst_thread, a3); + } else { + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, dst_thread, a3, src_thread); + } + retval = __real_upc_memcpy_nb(a1, a2, a3); + if (my_thread == src_thread) { + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, my_thread, a3, dst_thread); + } else { + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, src_thread, a3); + } + TAU_PROFILE_STOP(t); return retval; @@ -1310,9 +2145,20 @@ upc_handle_t __real_upc_memget_nb(void * a1, shared[1] const void * a2, size upc_handle_t __wrap_upc_memget_nb(void * a1, shared[1] const void * a2, size_t a3) { upc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_memget_nb(a1, a2, a3); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_handle_t upc_memget_nb(void *, shared[1] const void *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, MYTHREAD, a3, __real_upc_threadof(a2)); retval = __real_upc_memget_nb(a1, a2, a3); + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, __real_upc_threadof(a2), a3); TAU_PROFILE_STOP(t); return retval; @@ -1327,9 +2173,20 @@ upc_handle_t __real_upc_memput_nb(shared[1] void * a1, const void * a2, size upc_handle_t __wrap_upc_memput_nb(shared[1] void * a1, const void * a2, size_t a3) { upc_handle_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_memput_nb(a1, a2, a3); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"upc_handle_t upc_memput_nb(shared[1] void *, const void *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, __real_upc_threadof(a1), a3); retval = __real_upc_memput_nb(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, MYTHREAD, a3, __real_upc_threadof(a1)); TAU_PROFILE_STOP(t); return retval; @@ -1343,6 +2200,16 @@ upc_handle_t __wrap_upc_memput_nb(shared[1] void * a1, const void * a2, size void __real_upc_sync_nb(upc_handle_t a1) ; void __wrap_upc_sync_nb(upc_handle_t a1) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_sync_nb(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_sync_nb(upc_handle_t) C", "", TAU_USER); TAU_PROFILE_START(t); __real_upc_sync_nb(a1); @@ -1359,6 +2226,15 @@ int __real_upc_test_nb(upc_handle_t a1) ; int __wrap_upc_test_nb(upc_handle_t a1) { int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_test_nb(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"int upc_test_nb(upc_handle_t) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_test_nb(a1); @@ -1375,9 +2251,34 @@ int __wrap_upc_test_nb(upc_handle_t a1) { void __real_upc_memcpy_nbi(shared[1] void * a1, shared[1] const void * a2, size_t a3) ; void __wrap_upc_memcpy_nbi(shared[1] void * a1, shared[1] const void * a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memcpy_nbi(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_memcpy_nbi(shared[1] void *, shared[1] const void *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + size_t dst_thread = __real_upc_threadof(a1); + size_t src_thread = __real_upc_threadof(a2); + size_t my_thread = MYTHREAD; + if (my_thread == src_thread) { + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, dst_thread, a3); + } else { + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, dst_thread, a3, src_thread); + } + __real_upc_memcpy_nbi(a1, a2, a3); + if (my_thread == src_thread) { + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, my_thread, a3, dst_thread); + } else { + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, src_thread, a3); + } + TAU_PROFILE_STOP(t); } @@ -1390,9 +2291,21 @@ void __wrap_upc_memcpy_nbi(shared[1] void * a1, shared[1] const void * a2, s void __real_upc_memget_nbi(void * a1, shared[1] const void * a2, size_t a3) ; void __wrap_upc_memget_nbi(void * a1, shared[1] const void * a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memget_nbi(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_memget_nbi(void *, shared[1] const void *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, MYTHREAD, a3, __real_upc_threadof(a2)); __real_upc_memget_nbi(a1, a2, a3); + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, __real_upc_threadof(a2), a3); TAU_PROFILE_STOP(t); } @@ -1405,9 +2318,21 @@ void __wrap_upc_memget_nbi(void * a1, shared[1] const void * a2, size_t a3) void __real_upc_memput_nbi(shared[1] void * a1, const void * a2, size_t a3) ; void __wrap_upc_memput_nbi(shared[1] void * a1, const void * a2, size_t a3) { + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memput_nbi(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"void upc_memput_nbi(shared[1] void *, const void *, size_t) C", "", TAU_USER); TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, __real_upc_threadof(a1), a3); __real_upc_memput_nbi(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, MYTHREAD, a3, __real_upc_threadof(a1)); TAU_PROFILE_STOP(t); } @@ -1421,6 +2346,15 @@ size_t __real_upc_nodeof(shared[1] void * a1) ; size_t __wrap_upc_nodeof(shared[1] void * a1) { size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_nodeof(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + TAU_PROFILE_TIMER(t,"size_t upc_nodeof(shared[1] void *) C", "", TAU_USER); TAU_PROFILE_START(t); retval = __real_upc_nodeof(a1); diff --git a/src/wrappers/upc/crayupc/tau_upc_wrapper b/src/wrappers/upc/crayupc/tau_upc_wrapper index 444c498f6..965b2e83f 100755 --- a/src/wrappers/upc/crayupc/tau_upc_wrapper +++ b/src/wrappers/upc/crayupc/tau_upc_wrapper @@ -81,12 +81,10 @@ if [ "x$pdtdir" != "x" ]; then fi fi -#echo "tau_wrap $headerfile.pdb $headerfile -o wr.c $runtimeopt $library $*" - tau_wrap $headerfile.pdb $headerfile -o wr.c --upc $runtimeopt $library $* + tau_wrap $headerfile.pdb $headerfile -o wr.c --upc cray $runtimeopt $library $* dirname=`basename ${headerfile} .h`_wrapper - echo sed -e 's@-L.*\$@-lcray_upc_runtime_wrap@g' ${dirname}/link_options.tau > ${dirname}/link_options.tau~ - sed -e 's@-L.*$@-lcray_upc_runtime_wrap@g' ${dirname}/link_options.tau > ${dirname}/link_options.tau~ - mv ${dirname}/link_options.tau~ ${dirname}/link_options.tau + + sed -e 's/@@WRAPPER_LIBRARY@@/-lcray_upc_runtime_wrap/' $dirname/link_options.tau.master > $dirname/link_options.tau else dirname=`basename ${headerfile} .h`_wrapper fi diff --git a/src/wrappers/upc/gupc/Makefile b/src/wrappers/upc/gupc/Makefile new file mode 100644 index 000000000..1901c4e92 --- /dev/null +++ b/src/wrappers/upc/gupc/Makefile @@ -0,0 +1,10 @@ +include ../../../../include/Makefile + +upcdir=`which upc | sed -e 's@bin/upc@@g'` + +install: + TAU_MAKEFILE=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/lib/Makefile.tau$(TAU_CONFIG) PATH=$(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/bin:${PATH} ./tau_upc_wrapper tau_gupc.h -lm -gasnet=${TAU_GASNET_DIR} -gasnetconduit=${UPCNETWORK} -upc=${upcdir} + @echo "DONE......!" + +clean: + /bin/rm -f *.o */*.o */*.a diff --git a/src/wrappers/upc/gupc/select.tau b/src/wrappers/upc/gupc/select.tau new file mode 100644 index 000000000..27271709d --- /dev/null +++ b/src/wrappers/upc/gupc/select.tau @@ -0,0 +1,152 @@ +BEGIN_INCLUDE_LIST +"# upc_memcpy(#" +"# upc_memcpyg(#" +"# upc_memget(#" +"# upc_memgetg(#" +"# upc_memput(#" +"# upc_memputg(#" +"# upc_memset(#" +"# upc_memsetg(#" +"# upc_addrfield(#" +"# upc_affinitysize(#" +"# upc_phaseof(#" +"# upc_resetphase(#" +"# upc_threadof(#" +"# upc_all_alloc(#" +"# upc_all_allocg(#" +"# upc_alloc(#" +"# upc_allocg(#" +"# upc_free(#" +"# upc_freeg(#" +"# upc_global_alloc(#" +"# upc_global_allocg(#" +"# upc_addrfield(#" +"# upc_all_alloc(#" +"# upc_alloc(#" +"# upc_free(#" +"# upc_global_alloc(#" +"# upc_local_alloc(#" +"# upc_memset(#" +"# upc_phaseof(#" +"# upc_threadof(#" +"# upc_addrfield(#" +"# upc_addrfieldg(#" +"# upc_affinitysize(#" +"# upc_affinitysizeg(#" +"# upc_all_lock_alloc(#" +"# upc_all_lock_allocg(#" +"# upc_global_exit(#" +"# upc_global_exitg(#" +"# upc_global_lock_alloc(#" +"# upc_global_lock_allocg(#" +"# upc_lock(#" +"# upc_lock_attempt(#" +"# upc_lock_attemptg(#" +"# upc_lock_free(#" +"# upc_lock_freeg(#" +"# upc_lockg(#" +"# upc_phaseof(#" +"# upc_phaseofg(#" +"# upc_resetphase(#" +"# upc_resetphaseg(#" +"# upc_threadof(#" +"# upc_threadofg(#" +"# upc_unlock(#" +"# upc_unlockg(#" +"# upc_all_lock_alloc(#" +"# upc_free(#" +"# upc_global_alloc(#" +"# upc_global_lock_alloc(#" +"# upc_lock(#" +"# upc_lock_attempt(#" +"# upc_lock_free(#" +"# upc_unlock(#" +"# upc_global_exit(#" +"# upc_main(#" +"# upc_memcpy(#" +"# upc_memget(#" +"# upc_memput(#" +"# upc_memset(#" +"# upc_all_broadcast(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_memcpy(#" +"# upc_all_alloc(#" +"# upc_coll_chk_blk_size(#" +"# upc_coll_chk_dst_affinity(#" +"# upc_coll_chk_nbytes(#" +"# upc_coll_chk_nelems(#" +"# upc_coll_chk_op(#" +"# upc_coll_chk_src_affinity(#" +"# upc_coll_chk_sync_mode(#" +"# upc_coll_err(#" +"# upc_free(#" +"# upc_global_exit(#" +"# upc_phaseof(#" +"# upc_threadof(#" +"# upc_all_exchange(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_memcpy(#" +"# upc_all_gather_all(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_memcpy(#" +"# upc_all_gather(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_memcpy(#" +"# upc_threadof(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_all_permute(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_memcpy(#" +"# upc_all_alloc(#" +"# upc_all_prefix_reduceC(#" +"# upc_all_prefix_reduceD(#" +"# upc_all_prefix_reduceF(#" +"# upc_all_prefix_reduceI(#" +"# upc_all_prefix_reduceL(#" +"# upc_all_prefix_reduceLD(#" +"# upc_all_prefix_reduceS(#" +"# upc_all_prefix_reduceUC(#" +"# upc_all_prefix_reduceUI(#" +"# upc_all_prefix_reduceUL(#" +"# upc_all_prefix_reduceUS(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_free(#" +"# upc_phaseof(#" +"# upc_threadof(#" +"# upc_all_alloc(#" +"# upc_all_reduceC(#" +"# upc_all_reduceD(#" +"# upc_all_reduceF(#" +"# upc_all_reduceI(#" +"# upc_all_reduceL(#" +"# upc_all_reduceLD(#" +"# upc_all_reduceS(#" +"# upc_all_reduceUC(#" +"# upc_all_reduceUI(#" +"# upc_all_reduceUL(#" +"# upc_all_reduceUS(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_free(#" +"# upc_phaseof(#" +"# upc_threadof(#" +"# upc_all_scatter(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_memcpy(#" +"# upc_all_sort(#" +"# upc_alloc(#" +"# upc_coll_init(#" +"# upc_coll_init_flag(#" +"# upc_free(#" +"# upc_memcpy(#" +"# upc_phaseof(#" +"# upc_threadof(#" +END_INCLUDE_LIST diff --git a/src/wrappers/upc/gupc/tau_gupc.h b/src/wrappers/upc/gupc/tau_gupc.h new file mode 100644 index 000000000..2d22c79d9 --- /dev/null +++ b/src/wrappers/upc/gupc/tau_gupc.h @@ -0,0 +1,12 @@ +#ifdef __UPC_PUPC_INST__ +#undef __UPC_PUPC_INST__ +#define NEED__UPC_PUPC_INST__ +#endif + +#pragma pupc off + +#include + +#ifdef NEED__UPC_PUPC_INST__ +#define __UPC_PUPC_INST__ +#endif diff --git a/src/wrappers/upc/gupc/tau_gupc_wrapper/Makefile b/src/wrappers/upc/gupc/tau_gupc_wrapper/Makefile new file mode 100644 index 000000000..455b80d44 --- /dev/null +++ b/src/wrappers/upc/gupc/tau_gupc_wrapper/Makefile @@ -0,0 +1,16 @@ +include ${TAU_MAKEFILE} +CC=$(TAU_UPCC) +CFLAGS=$(TAU_DEFS) $(TAU_INCLUDE) $(TAU_MPI_INCLUDE) -I.. +EXTRA_FLAGS= + +AR=$(TAU_AR) +ARFLAGS=rcv + +libtau_gupc_wrap.a: tau_gupc_wrap.o + $(AR) $(ARFLAGS) $@ $< + +tau_gupc_wrap.o: wr.c + $(CC) $(CFLAGS) $(EXTRA_FLAGS) -c $< -o $@ +clean: + /bin/rm -f tau_gupc_wrap.o libtau_gupc_wrap.a + diff --git a/src/wrappers/upc/crayupc/upc.h b/src/wrappers/upc/gupc/tau_gupc_wrapper/link_options.tau similarity index 100% rename from src/wrappers/upc/crayupc/upc.h rename to src/wrappers/upc/gupc/tau_gupc_wrapper/link_options.tau diff --git a/src/wrappers/upc/gupc/tau_gupc_wrapper/link_options.tau.master b/src/wrappers/upc/gupc/tau_gupc_wrapper/link_options.tau.master new file mode 100644 index 000000000..b4de3cab8 --- /dev/null +++ b/src/wrappers/upc/gupc/tau_gupc_wrapper/link_options.tau.master @@ -0,0 +1 @@ +-Wl,-wrap,upc_global_exit -Wl,-wrap,upc_global_alloc -Wl,-wrap,upc_all_alloc -Wl,-wrap,upc_alloc -Wl,-wrap,upc_local_alloc -Wl,-wrap,upc_free -Wl,-wrap,upc_threadof -Wl,-wrap,upc_phaseof -Wl,-wrap,upc_resetphase -Wl,-wrap,upc_addrfield -Wl,-wrap,upc_affinitysize -Wl,-wrap,upc_global_lock_alloc -Wl,-wrap,upc_all_lock_alloc -Wl,-wrap,upc_lock_free -Wl,-wrap,upc_lock -Wl,-wrap,upc_lock_attempt -Wl,-wrap,upc_unlock -Wl,-wrap,upc_memcpy -Wl,-wrap,upc_memget -Wl,-wrap,upc_memput -Wl,-wrap,upc_memset WRAPPER_LIBRARY diff --git a/src/wrappers/upc/gupc/tau_gupc_wrapper/wr.c b/src/wrappers/upc/gupc/tau_gupc_wrapper/wr.c new file mode 100644 index 000000000..1742fce0f --- /dev/null +++ b/src/wrappers/upc/gupc/tau_gupc_wrapper/wr.c @@ -0,0 +1,575 @@ +#include +#include +#include + +#pragma pupc off + +#ifdef __BERKELEY_UPC__ +#pragma UPCR NO_SRCPOS +#endif + +static int tau_upc_node = -1; +static int tau_upc_tagid_f = 0; +#define TAU_UPC_TAGID (tau_upc_tagid_f = (tau_upc_tagid_f & 255)) +#define TAU_UPC_TAGID_NEXT ((++tau_upc_tagid_f) & 255) + +void tau_totalnodes(int, int); + + +/********************************************************** + upc_global_exit + **********************************************************/ + +void __real_upc_global_exit(int a1) ; +void __wrap_upc_global_exit(int a1) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_global_exit(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_global_exit(int) C", "", TAU_USER); + TAU_PROFILE_START(t); + __real_upc_global_exit(a1); + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_global_alloc + **********************************************************/ + +shared void * __real_upc_global_alloc(size_t a1, size_t a2) ; +shared void * __wrap_upc_global_alloc(size_t a1, size_t a2) { + + shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_global_alloc(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"shared[1] void * upc_global_alloc(size_t, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_global_alloc(a1, a2); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_all_alloc + **********************************************************/ + +shared void * __real_upc_all_alloc(size_t a1, size_t a2) ; +shared void * __wrap_upc_all_alloc(size_t a1, size_t a2) { + + shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_alloc(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"shared[1] void * upc_all_alloc(size_t, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_all_alloc(a1, a2); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_alloc + **********************************************************/ + +shared void * __real_upc_alloc(size_t a1) ; +shared void * __wrap_upc_alloc(size_t a1) { + + shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_alloc(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"shared[1] void * upc_alloc(size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_alloc(a1); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_local_alloc + **********************************************************/ + +shared void * __real_upc_local_alloc(size_t a1, size_t a2) ; +shared void * __wrap_upc_local_alloc(size_t a1, size_t a2) { + + shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_local_alloc(a1, a2); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"shared[1] void * upc_local_alloc(size_t, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_local_alloc(a1, a2); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_free + **********************************************************/ + +void __real_upc_free(shared void * a1) ; +void __wrap_upc_free(shared void * a1) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_free(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_free(shared[1] void *) C", "", TAU_USER); + TAU_PROFILE_START(t); + __real_upc_free(a1); + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_threadof + **********************************************************/ + +size_t __real_upc_threadof(shared void * a1) ; +size_t __wrap_upc_threadof(shared void * a1) { + + size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_threadof(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"size_t upc_threadof(shared[1] void *) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_threadof(a1); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_phaseof + **********************************************************/ + +size_t __real_upc_phaseof(shared void * a1) ; +size_t __wrap_upc_phaseof(shared void * a1) { + + size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_phaseof(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"size_t upc_phaseof(shared[1] void *) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_phaseof(a1); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_resetphase + **********************************************************/ + +shared void * __real_upc_resetphase(shared void * a1) ; +shared void * __wrap_upc_resetphase(shared void * a1) { + + shared void * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_resetphase(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"shared[1] void * upc_resetphase(shared[1] void *) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_resetphase(a1); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_addrfield + **********************************************************/ + +size_t __real_upc_addrfield(shared void * a1) ; +size_t __wrap_upc_addrfield(shared void * a1) { + + size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_addrfield(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"size_t upc_addrfield(shared[1] void *) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_addrfield(a1); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_affinitysize + **********************************************************/ + +size_t __real_upc_affinitysize(size_t a1, size_t a2, size_t a3) ; +size_t __wrap_upc_affinitysize(size_t a1, size_t a2, size_t a3) { + + size_t retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_affinitysize(a1, a2, a3); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"size_t upc_affinitysize(size_t, size_t, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_affinitysize(a1, a2, a3); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_global_lock_alloc + **********************************************************/ + +upc_lock_t * __real_upc_global_lock_alloc() ; +upc_lock_t * __wrap_upc_global_lock_alloc() { + + upc_lock_t * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_global_lock_alloc(); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"upc_lock_t * upc_global_lock_alloc() C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_global_lock_alloc(); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_all_lock_alloc + **********************************************************/ + +upc_lock_t * __real_upc_all_lock_alloc() ; +upc_lock_t * __wrap_upc_all_lock_alloc() { + + upc_lock_t * retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_all_lock_alloc(); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"upc_lock_t * upc_all_lock_alloc() C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_all_lock_alloc(); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_lock_free + **********************************************************/ + +void __real_upc_lock_free(upc_lock_t * a1) ; +void __wrap_upc_lock_free(upc_lock_t * a1) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_lock_free(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_lock_free(upc_lock_t *) C", "", TAU_USER); + TAU_PROFILE_START(t); + __real_upc_lock_free(a1); + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_lock + **********************************************************/ + +void __real_upc_lock(upc_lock_t * a1) ; +void __wrap_upc_lock(upc_lock_t * a1) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_lock(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_lock(upc_lock_t *) C", "", TAU_USER); + TAU_PROFILE_START(t); + __real_upc_lock(a1); + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_lock_attempt + **********************************************************/ + +int __real_upc_lock_attempt(upc_lock_t * a1) ; +int __wrap_upc_lock_attempt(upc_lock_t * a1) { + + int retval = 0; + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + return __real_upc_lock_attempt(a1); + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"int upc_lock_attempt(upc_lock_t *) C", "", TAU_USER); + TAU_PROFILE_START(t); + retval = __real_upc_lock_attempt(a1); + TAU_PROFILE_STOP(t); + return retval; + +} + + +/********************************************************** + upc_unlock + **********************************************************/ + +void __real_upc_unlock(upc_lock_t * a1) ; +void __wrap_upc_unlock(upc_lock_t * a1) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_unlock(a1); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_unlock(upc_lock_t *) C", "", TAU_USER); + TAU_PROFILE_START(t); + __real_upc_unlock(a1); + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_memcpy + **********************************************************/ + +void __real_upc_memcpy(shared void *restrict a1, shared const void *restrict a2, size_t a3) ; +void __wrap_upc_memcpy(shared void *restrict a1, shared const void *restrict a2, size_t a3) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memcpy(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_memcpy(shared[1] void *restrict, shared[1] const void *restrict, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + size_t dst_thread = __real_upc_threadof(a1); + size_t src_thread = __real_upc_threadof(a2); + size_t my_thread = MYTHREAD; + if (my_thread == src_thread) { + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, dst_thread, a3); + } else { + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, dst_thread, a3, src_thread); + } + + __real_upc_memcpy(a1, a2, a3); + if (my_thread == src_thread) { + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, my_thread, a3, dst_thread); + } else { + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, src_thread, a3); + } + + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_memget + **********************************************************/ + +void __real_upc_memget(void *restrict a1, shared const void *restrict a2, size_t a3) ; +void __wrap_upc_memget(void *restrict a1, shared const void *restrict a2, size_t a3) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memget(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_memget(void *restrict, shared[1] const void *restrict, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, MYTHREAD, a3, __real_upc_threadof(a2)); + __real_upc_memget(a1, a2, a3); + TAU_TRACE_RECVMSG(TAU_UPC_TAGID, __real_upc_threadof(a2), a3); + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_memput + **********************************************************/ + +void __real_upc_memput(shared void *restrict a1, const void *restrict a2, size_t a3) ; +void __wrap_upc_memput(shared void *restrict a1, const void *restrict a2, size_t a3) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memput(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_memput(shared[1] void *restrict, const void *restrict, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, __real_upc_threadof(a1), a3); + __real_upc_memput(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, MYTHREAD, a3, __real_upc_threadof(a1)); + TAU_PROFILE_STOP(t); + +} + + +/********************************************************** + upc_memset + **********************************************************/ + +void __real_upc_memset(shared void * a1, int a2, size_t a3) ; +void __wrap_upc_memset(shared void * a1, int a2, size_t a3) { + + if (tau_upc_node == -1) { + tau_upc_node = TAU_PROFILE_GET_NODE(); + if (tau_upc_node == -1) { + __real_upc_memset(a1, a2, a3); + return; + } else { + tau_totalnodes(1,THREADS); + } + } + + TAU_PROFILE_TIMER(t,"void upc_memset(shared[1] void *, int, size_t) C", "", TAU_USER); + TAU_PROFILE_START(t); + TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, __real_upc_threadof(a1), a3); + __real_upc_memset(a1, a2, a3); + TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, MYTHREAD, a3, __real_upc_threadof(a1)); + TAU_PROFILE_STOP(t); + +} + diff --git a/src/wrappers/upc/gupc/tau_upc_wrapper b/src/wrappers/upc/gupc/tau_upc_wrapper new file mode 100755 index 000000000..9b1dc1a57 --- /dev/null +++ b/src/wrappers/upc/gupc/tau_upc_wrapper @@ -0,0 +1,160 @@ +#!/bin/sh +usage() +{ + cmd=`basename $0` + echo "" + echo " $cmd - Generates a wrapper library that can intercept " + echo " at link time or at runtime UPC routines specified in a header file" + echo " Usage: $cmd [-w (default) | -d | -r] -gasnet= -gasnetconduit= -upc= " + echo " -w (default): generates wrappers for re-linking the application" + echo " -d : generates wrappers by redefining routines during compilation in header files" + echo " -r : generates wrappers that may be pre-loaded using tau_exec at runtime" + echo " -gasnet= : specifies the location of the gasnet directory" + echo " -gasnetconduit= : specifies the conduit (smp, mpi, etc.) for gasnet" + echo " -upc= : specifies the upc directory" + echo "NOTE: $cmd reads the TAU_MAKEFILE environment variable to get PDT settings" + echo " Example: " + echo " % $cmd tau_upcr.h -lm -gasnet=/usr/local/packages/gasnet-1.18.0 -gasnetconduit=mpi -upc=/usr/local/packages/gupc-4.7.0.2" + echo " generates a wrapper library that may be linked in using TAU_OPTIONS -optTauWrapFile=/link_options.tau" + echo "" + exit 1 +} + +upcdir=/usr/local/packages/gupc +gasnetdir=/usr/local/packages/gasnet +gasnetconduit=mpi +runtime_specified=0; +header_redirection_specified=0; +headerfile="" +library="" +modarg="" +if [ $# = 0 ] ; then + usage +fi + +if [ $# -gt 1 ]; then + headerfile=$1 + library=$2 + shift; + shift; +fi + +for arg in "$@"; do + case $arg in + -r) + runtime_specified=1; + shift; + ;; + -d) + header_redirection_specified=1; + shift; + ;; + -gasnet=*) + gasnetdir=`echo $arg | sed -e 's/-gasnet=//g'` + shift; + ;; + -gasnetconduit=*) + gasnetconduit="`echo $arg | sed -e 's/-gasnetconduit=//g'`-conduit" + shift; + ;; + -upc=*) + upcdir=`echo $arg | sed -e 's/-upc=//g'` + shift; + ;; + esac +done + +if [ "x$TAU_MAKEFILE" = "x" ] ;then + echo "ERROR: Please set the TAU_MAKEFILE environment variable." + exit 1 +fi + +if [ ! -r $TAU_MAKEFILE ] ;then + echo "ERROR: environment variable TAU_MAKEFILE is set but the file is not readable" + exit 1 +fi + +if [ ! -d "$upcdir" ] ; then + echo "ERROR: invalid UPC directory: $upcdir" + exit 1 +fi + +if [ ! -d "$gasnetdir" ] ; then + echo "ERROR: invalid GASNet directory: $gasnetdir" + exit 1 +fi + +if [ ! -d "$gasnetdir/include/$gasnetconduit" ] ; then + echo "ERROR: GASNet conduit $gasnetconduit not found in $gasnetdir" + exit 1 +fi + +pdtdir="`grep PDTDIR= $TAU_MAKEFILE | sed -e 's/PDTDIR=//g' `" +tauarch="`tau-config | grep TAUARCH | sed -e 's@TAUARCH=@@g' `" +basedir="`tau-config | grep BASEDIR | sed -e 's@BASEDIR=@@g' `" + +parser=$pdtdir/$tauarch/bin/upcparse +if [ ! -x $parser ]; then + echo "ERROR: Can't execute $parser: Using PDT = $pdtdir, ARCH = $tauarch" + exit 1 +fi +baseheader=`basename $headerfile .h` + +echo $parser $headerfile -DGASNET_SEQ -I$upcdir/include -DUPCRI_BUILDING_LIBUPCR=1 -I$gasnetdir/include -I$gasnetdir/include/$gasnetconduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS +$parser $headerfile -DGASNET_SEQ -I$upcdir/include -I$gasnetdir/include -I$gasnetdir/include/$gasnetconduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS +mv $baseheader.pdb $headerfile.pdb + +if [ $runtime_specified = 1 ] ; then + runtimeopt='-r' +else + if [ $header_redirection_specified = 1 ]; then + runtimeopt='' + else + runtimeopt='-w' + fi +fi + + +# Locate libgupc.a +if [ -r "$upcdir/lib64/libgupc.a" ] ; then + libgupc="$upcdir/lib64/libgupc.a" +elif [ -r "$upcdir/lib/libgupc.a" ] ; then + libgupc="$upcdir/lib/libgupc.a" +else + echo "ERROR: Cannot locate libgupc.a in $upcdir" + exit 1 +fi + +# Generate select.tau +echo "BEGIN_INCLUDE_LIST" > select.tau +for sym in _upc upc upcr ; do + nm -A $libgupc | grep " $sym" | awk '{ print $3;}' | sed -e "s/$sym/\"# $sym/g" -e 's/$/(#"/g' >> select.tau +done +echo "END_INCLUDE_LIST" >> select.tau + +# Generate wr.c +echo $basedir/bin/tau_wrap $headerfile.pdb $headerfile -o wr.c $runtimeopt $library $* -f select.tau --upc gnu +$basedir/bin/tau_wrap $headerfile.pdb $headerfile -o wr.c $runtimeopt $library $* -f select.tau --upc gnu +dirname=`basename ${headerfile} .h`_wrapper + +# Fix the Makefile with the correct smp args. +sed -e "s@^EXTRA_FLAGS=@EXTRA_FLAGS=-DGASNET_SEQ -I$upcdir/include -I$gasnetdir/include -I$gasnetdir/include/$gasnetconduit -DGASNETT_USE_GCC_ATTRIBUTE_MAYALIAS @g" $dirname/Makefile > $dirname/Makefile~ +mv -f $dirname/Makefile~ $dirname/Makefile + +# Add the library to the link options +# GNU UPC can't link static archives of UPC code as of Aug. 2012. +# Link the object file directly. +sed -e "s@WRAPPER_LIBRARY@$basedir/lib/wrappers/upc/gupc/tau_gupc_wrap.o@" $dirname/link_options.tau.master > $dirname/link_options.tau + +# Build and install +cd $dirname +make +cd .. +rm -f ${headerfile}.pdb +mkdir -p $basedir/lib/wrappers/upc/gupc +echo cp $dirname/link_options.tau $basedir/lib/wrappers/upc/gupc +cp $dirname/link_options.tau $basedir/lib/wrappers/upc/gupc +echo cp $dirname/tau_gupc_wrap.o $basedir/lib/wrappers/upc/gupc +cp $dirname/tau_gupc_wrap.o $basedir/lib/wrappers/upc/gupc +echo cp $dirname/libtau_gupc_wrap.a $basedir/lib/ +cp $dirname/libtau_gupc_wrap.a $basedir/lib/ diff --git a/tools/src/paraprof/src/ThreeDeeGeneralPlotUtils.java b/tools/src/paraprof/src/ThreeDeeGeneralPlotUtils.java index 3d8cccd6e..ea215294c 100644 --- a/tools/src/paraprof/src/ThreeDeeGeneralPlotUtils.java +++ b/tools/src/paraprof/src/ThreeDeeGeneralPlotUtils.java @@ -17,17 +17,30 @@ import com.graphbuilder.math.FuncMap; import com.graphbuilder.math.VarMap; +import edu.uoregon.tau.perfdmf.Thread; + public class ThreeDeeGeneralPlotUtils { static final String BEGIN = "BEGIN_VIZ"; static final String END = "END_VIZ"; - public static VarMap getEvaluation(int rank, int maxRank, int node, - int context, int thread, int maxNode, int maxContext, - int maxThread, float[] topoVals, float[] varMins, float varMaxs[], - float varMeans[], int[] axisDim, - Map expressions) {// String[] expressions, int rank, - // int maxRank){ float[] atomValue, + public static VarMap getEvaluation(int rank, + int maxRank, + Thread thread, + ParaProfTrial pptrial, + + // int node, + // int context, int thread, int maxNode, int maxContext, + // int maxThread, + float[] topoVals, float[] varMins, float varMaxs[], + float varMeans[], int[] axisDim, Map expressions) {// String[] + // expressions, + // int + // rank, + // int + // maxRank){ + // float[] + // atomValue, // System.out.println(rank); FuncMap fm = new FuncMap(); fm.loadDefaultFunctions(); @@ -35,9 +48,9 @@ public static VarMap getEvaluation(int rank, int maxRank, int node, vm.setValue("maxRank", maxRank); vm.setValue("rank", rank); vm.setValue("color", topoVals[3]); - vm.setValue("node", node); - vm.setValue("context", context); - vm.setValue("thread", thread); + vm.setValue("node", thread.getNodeID()); + vm.setValue("context", thread.getContextID()); + vm.setValue("thread", thread.getThreadID()); vm.setValue("event0.val", topoVals[0]); vm.setValue("event1.val", topoVals[1]); vm.setValue("event2.val", topoVals[2]); @@ -54,10 +67,10 @@ public static VarMap getEvaluation(int rank, int maxRank, int node, vm.setValue("event1.mean", varMeans[1]); vm.setValue("event2.mean", varMeans[2]); vm.setValue("event3.mean", varMeans[3]); -// vm.setValue("atomic0", atomValue[0]); -// vm.setValue("atomic1", atomValue[1]); -// vm.setValue("atomic2", atomValue[2]); -// vm.setValue("atomic3", atomValue[3]); + // vm.setValue("atomic0", atomValue[0]); + // vm.setValue("atomic1", atomValue[1]); + // vm.setValue("atomic2", atomValue[2]); + // vm.setValue("atomic3", atomValue[3]); vm.setValue("axisDimX", axisDim[0]); vm.setValue("axisDimY", axisDim[1]); vm.setValue("axisDimZ", axisDim[2]); @@ -69,7 +82,9 @@ public static VarMap getEvaluation(int rank, int maxRank, int node, while (it.hasNext()) { Entry e = it.next(); - x = ExpressionTree.parse(e.getValue()); + + x = ExpressionTree + .parse(insertMetaDataValues(e.getValue(), thread)); res = x.eval(vm, fm); // System.out.println(e.getKey()+" "+res); vm.setValue(e.getKey(), res); @@ -78,6 +93,45 @@ public static VarMap getEvaluation(int rank, int maxRank, int node, } + private static final String metadata = "metadata("; + + private static String insertMetaDataValues(String s, Thread t) { + int dex = 0; + while (dex >= 0) { + int loc = s.indexOf(metadata, dex); + if (loc == -1) + return s; + int cloc = s.indexOf(')', loc); + String key = s.substring(loc + 9, cloc); + + String value = t.getMetaData().get(key); + + if (value == null) { + + value=t.getDataSource().getMetaData().get(key); + if(value==null){ + System.out.println("Metadata key " + key + + " not found at top or in node,thread " + t.getNodeID() + "," + + t.getThreadID() + ". Using 0"); + value = "0"; + }} + + try{ + Double.parseDouble(value); + } + catch(NumberFormatException e){ + System.out.println("Metadata key " + key + + " is non-numeric in node,thread " + t.getNodeID() + "," + + t.getThreadID() + ". Using 0"); + value = "0"; + } + s=s.substring(0,loc)+value+s.substring(cloc+1); + + dex = cloc; + } + return s; + } + private static String[] splitEQ(String s) { String[] tuple = new String[2]; @@ -187,22 +241,21 @@ public static double[][] getRankCoordinate(VarMap vm, int maxpoint, return coords; } - - public static int[] parseMPIProcName(String pname){ - - String s = pname.substring(pname.indexOf('('),pname.indexOf(')')+1); - - return(parseTuple(s)); - + public static int[] parseMPIProcName(String pname) { + + String s = pname.substring(pname.indexOf('('), pname.indexOf(')') + 1); + + return (parseTuple(s)); + } - + public static int[] parseTuple(String tuple) { tuple = tuple.substring(1, tuple.length() - 1); String[] tmp = tuple.split(","); - int tmplen=tmp.length; - if(tmplen<3){ - tmplen=3; + int tmplen = tmp.length; + if (tmplen < 3) { + tmplen = 3; } int[] tres = new int[tmplen]; for (int i = 0; i < tmplen; i++) { @@ -317,41 +370,40 @@ public static CoordMap parseMapFile(String fileLoc) { int core = num.indexOf('1'); // System.out.println("Core: "+ core); coords[rank][3] = core; - } else - { - String[]duo = s.split(":"); + } else { + String[] duo = s.split(":"); int rank = Integer.parseInt(duo[0]); - int place=nodes.indexOf(duo[1]); - - if(place==-1){ + int place = nodes.indexOf(duo[1]); + + if (place == -1) { nodes.add(duo[1]); - place=nodes.size()-1; + place = nodes.size() - 1; } - //nodes.insert(rank,duo[1]); - coords[rank][0]=place%10; - coords[rank][1]=(place/10)%8; - coords[rank][2]=(place/10/8); - } - - -// if (s.indexOf(',') > 0) { -// String[] corexyz = s.split(","); -// // System.out.println(corexyz[0]+", "+corexyz[1]+", "+corexyz[2]+", "+corexyz[3]); -// int core = Integer.parseInt(corexyz[0]); -// for (int i = 0; i < 3; i++) { -// coords[core][i] = Integer.parseInt(corexyz[i + 1]); -// } + // nodes.insert(rank,duo[1]); + coords[rank][0] = place % 10; + coords[rank][1] = (place / 10) % 8; + coords[rank][2] = (place / 10 / 8); } - // int x1 = s.indexOf('='); - // //int x2 = s.indexOf('"', x1 + 1); - // - // //String id = s.substring(0,x1); - // String name= s.substring(x1+1); - // names.add(name);//expressions.put(id, exp); + // if (s.indexOf(',') > 0) { + // String[] corexyz = s.split(","); + // // + // System.out.println(corexyz[0]+", "+corexyz[1]+", "+corexyz[2]+", "+corexyz[3]); + // int core = Integer.parseInt(corexyz[0]); + // for (int i = 0; i < 3; i++) { + // coords[core][i] = Integer.parseInt(corexyz[i + 1]); // } + } + + // int x1 = s.indexOf('='); + // //int x2 = s.indexOf('"', x1 + 1); + // + // //String id = s.substring(0,x1); + // String name= s.substring(x1+1); + // names.add(name);//expressions.put(id, exp); + // } - //} + // } } catch (FileNotFoundException e) { // TODO Auto-generated catch block @@ -370,10 +422,12 @@ public static CoordMap parseMapFile(String fileLoc) { for (int i = 0; i < ranks; i++) { int node = coords[i][3]; for (int j = 0; j < 3; j++) { - int sub=1; - if(j==1)sub=2; - if(j==2)sub=6; - int cc = (node/sub) % coremax[j]; + int sub = 1; + if (j == 1) + sub = 2; + if (j == 2) + sub = 6; + int cc = (node / sub) % coremax[j]; // int cy=node%cymax; // int cz=node%czmax; int c = cc + coords[i][j] * (coremax[j] + space); @@ -390,7 +444,8 @@ public static CoordMap parseMapFile(String fileLoc) { // done[i][1]=y; // done[i][2]=z; } - System.out.println(i + ": " + done[i][0] + "," + done[i][1] + ","+ done[i][2]); + System.out.println(i + ": " + done[i][0] + "," + done[i][1] + "," + + done[i][2]); } System.out.println("min: " + min[0] + "," + min[1] + "," + min[2]); System.out.println("max: " + max[0] + "," + max[1] + "," + max[2]); diff --git a/tools/src/paraprof/src/ThreeDeeWindow.java b/tools/src/paraprof/src/ThreeDeeWindow.java index 86742ec1a..4fd7cf0f5 100644 --- a/tools/src/paraprof/src/ThreeDeeWindow.java +++ b/tools/src/paraprof/src/ThreeDeeWindow.java @@ -729,14 +729,8 @@ else if (prefix.equals("Map")) { vm = ThreeDeeGeneralPlotUtils.getEvaluation( rankIndex, numThreads, - thread.getNodeID(), - thread.getContextID(), - thread.getThreadID(), - ppTrial.getDataSource().getNumberOfNodes(), - ppTrial.getDataSource().getNumberOfContexts( - thread.getNodeID()), - ppTrial.getDataSource().getNumberOfThreads( - thread.getNodeID(), thread.getContextID()), + thread, + ppTrial, topoVals, varMins, varMaxs, varMeans, settings .getCustomTopoAxes(), expressions); diff --git a/tools/src/perfdmf/bin/perfdmf.jar b/tools/src/perfdmf/bin/perfdmf.jar index acbaa8c97..5990bbef6 100644 Binary files a/tools/src/perfdmf/bin/perfdmf.jar and b/tools/src/perfdmf/bin/perfdmf.jar differ diff --git a/tools/src/perfdmf/src/DatabaseAPI.java b/tools/src/perfdmf/src/DatabaseAPI.java index 88b1324e0..797d35a9f 100644 --- a/tools/src/perfdmf/src/DatabaseAPI.java +++ b/tools/src/perfdmf/src/DatabaseAPI.java @@ -1259,8 +1259,9 @@ private void uploadFunctionProfiles(int trialID, DataSource dataSource, Map it = dataSource.getAllThreads().iterator(); it.hasNext() && summaryOnly == false;) { edu.uoregon.tau.perfdmf.Thread thread = it.next(); diff --git a/tools/src/perfdmf/src/loader/LoadTrial.java b/tools/src/perfdmf/src/loader/LoadTrial.java index 7b9425f90..3177b8ae9 100644 --- a/tools/src/perfdmf/src/loader/LoadTrial.java +++ b/tools/src/perfdmf/src/loader/LoadTrial.java @@ -21,6 +21,7 @@ public class LoadTrial { private Experiment exp; private boolean fixNames; private boolean summaryOnly; + private boolean useNulls; private int expID; public int trialID; private DataSource dataSource; @@ -57,6 +58,7 @@ public static void outputHelp() { + " snap, perixml, gptl, paraver, ipm, google\n" + " -t, --trialid Specify trial ID\n" + " -i, --fixnames Use the fixnames option for gprof\n" + + " -z, --usenull Include NULL values as 0 for mean calculation\n" + " -m, --metadata XML metadata for the trial\n\n" + "Notes:\n" + " For the TAU profiles type, you can specify either a specific set of profile\n" + "files on the commandline, or you can specify a directory (by default the current\n" @@ -133,6 +135,7 @@ public void loadTrial(int fileType) { try { dataSource = UtilFncs.initializeDataSource(files, fileType, fixNames); + dataSource.setMeanIncludeNulls(useNulls); } catch (DataSourceException e) { e.printStackTrace(); System.err.println("Error: Unable to initialize datasource!"); @@ -263,6 +266,7 @@ static public void main(String[] args) { CmdLineParser.Option appNameOpt = parser.addStringOption('a', "applicationname"); CmdLineParser.Option expNameOpt = parser.addStringOption('x', "experimentname"); CmdLineParser.Option summaryOpt = parser.addBooleanOption('s', "summaryonly"); + CmdLineParser.Option useNullOpt = parser.addBooleanOption('z', "usenull"); try { parser.parse(args); @@ -287,6 +291,7 @@ static public void main(String[] args) { Boolean fixNames = (Boolean) parser.getOptionValue(fixOpt); String metadataFile = (String) parser.getOptionValue(metadataOpt); Boolean summaryOnly = (Boolean) parser.getOptionValue(summaryOpt); + Boolean useNull = (Boolean) parser.getOptionValue(useNullOpt); if (help != null && help.booleanValue()) { LoadTrial.outputHelp(); @@ -393,6 +398,9 @@ static public void main(String[] args) { if (summaryOnly == null) { summaryOnly = new Boolean(false); } + if (useNull == null) { + useNull = new Boolean(false); + } if(multippk){ @@ -431,6 +439,7 @@ static public void main(String[] args) { trans.fixNames = fixNames.booleanValue(); trans.metadataFile = metadataFile; trans.summaryOnly = summaryOnly.booleanValue(); + trans.useNulls = useNull.booleanValue(); trans.loadTrial(fileType); // the trial will be saved when the load is finished (update is called) } diff --git a/tools/src/tau_compiler.sh b/tools/src/tau_compiler.sh index 431408c31..36c446f3b 100755 --- a/tools/src/tau_compiler.sh +++ b/tools/src/tau_compiler.sh @@ -8,7 +8,8 @@ declare -i group_f_F=1 declare -i group_c=2 declare -i group_C=3 declare -i group_upc=4 -declare -i berkeley_upcc=$FALSE +# Replaced with more flexible "upc" variable +#declare -i berkeley_upcc=$FALSE declare -i disablePdtStep=$FALSE declare -i hasAnOutputFile=$FALSE @@ -210,38 +211,50 @@ compilerSpecified="" #constitute the regular command, with the first command (immediately) #after the sequence, being the compiler. In this "for" loops, the #regular command is being read. -for arg in "$@"; do - - case $arg in - -opt*) - ;; - - *) - if [ $tempCounter == 0 ]; then - CMD=$arg - #The first command (immediately) after the -opt sequence is the compiler. - if [ $CMD == upcc ]; then - berkeley_upcc=$TRUE - echoIfDebug "Berkeley UPCC: TRUE!" - fi - fi +for arg in "$@"; do - # Thanks to Bernd Mohr for the following that handles quotes and spaces (see configure for explanation) - modarg=`echo "x$arg" | sed -e 's/^x//' -e 's/"/\\\"/g' -e s,\',%@%\',g -e 's/%@%/\\\/g' -e 's/ /\\\ /g' -e 's#(#\\\(#g' -e 's#)#\\\)#g'` - #modarg=`echo "x$arg" | sed -e 's/^x//' -e 's/"/\\\"/g' -e s,\',%@%\',g -e 's/%@%/\\\/g' -e 's/ /\\\ /g'` - THEARGS="$THEARGS $modarg" + case $arg in + -opt*) + ;; + *) + if [ $tempCounter == 0 ]; then + CMD=$arg + #The first command (immediately) after the -opt sequence is the compiler. + case $CMD in + upcc|*/upcc) + upc="berkeley" + echoIfDebug "Berkeley UPCC: TRUE!" + ;; + upc|*/upc) + upc="gnu" + echoIfDebug "GNU UPC: TRUE!" + ;; + cc|*/cc) + upc="cray" + echoIfDebug "CRAY UPCC: TRUE!" + ;; + *) + upc="unknown" + echoIfDebug "WARNING: UNKNOWN UPC" + ;; + esac + fi - if [ $foundFirstArg == 0 ]; then - foundFirstArg=1 - compilerSpecified="$modarg" - else - regularCmd="$regularCmd $modarg" - fi + # Thanks to Bernd Mohr for the following that handles quotes and spaces (see configure for explanation) + modarg=`echo "x$arg" | sed -e 's/^x//' -e 's/"/\\\"/g' -e s,\',%@%\',g -e 's/%@%/\\\/g' -e 's/ /\\\ /g' -e 's#(#\\\(#g' -e 's#)#\\\)#g'` + #modarg=`echo "x$arg" | sed -e 's/^x//' -e 's/"/\\\"/g' -e s,\',%@%\',g -e 's/%@%/\\\/g' -e 's/ /\\\ /g'` + THEARGS="$THEARGS $modarg" - tempCounter=tempCounter+1 - ;; - esac + if [ $foundFirstArg == 0 ]; then + foundFirstArg=1 + compilerSpecified="$modarg" + else + regularCmd="$regularCmd $modarg" + fi + tempCounter=tempCounter+1 + ;; + esac done echoIfDebug "\nRegular command passed is -- $regularCmd "; echoIfDebug "The compiler being read is $CMD \n" @@ -278,7 +291,7 @@ for arg in "$@" ; do else case $arg in - --help) # Do not use -h as Cray compilers specifie -h upc -h ... + --help) # Do not use -h as Cray compilers specify -h upc -h ... printUsage 0 ;; @@ -481,7 +494,12 @@ for arg in "$@" ; do -optTauWrapFile*) tauWrapFile="$tauWrapFile ${arg#"-optTauWrapFile="}" - echoIfDebug "\ttauWrapFile is: "$tauWrapFile + echoIfDebug "\ttauWrapFile is: $tauWrapFile" + ;; + + -optTauGASPU*) + optTauGASPU="${arg#"-optTauGASPU="}" + echoIfDebug "\toptTauGASPU is: $optTauGASPU" ;; -optTauSelectFile*) @@ -729,14 +747,13 @@ for arg in "$@" ; do optCompInst=$TRUE disablePdtStep=$TRUE # force the debug flag so we get symbolic information - if [ $berkeley_upcc == $TRUE ]; - then - optCompile="$optCompile -Wc,-g" - optLinking="$optLinking -Wc,-g" - else - optCompile="$optCompile -g" - optLinking="$optLinking -g" - fi + if [ $upc == "berkeley" ] ; then + optCompile="$optCompile -Wc,-g" + optLinking="$optLinking -Wc,-g" + else + optCompile="$optCompile -g" + optLinking="$optLinking -g" + fi echoIfDebug "\tUsing Compiler-based Instrumentation" ;; -optPDTInst) @@ -756,6 +773,10 @@ for arg in "$@" ; do optFixHashIf=$TRUE echoIfDebug "\tFixing Hash-Ifs" ;; + -opt*) + #Assume any other options should be passed on to the compiler. + argsRemaining="$argsRemaining ${arg%% *}" + ;; -optMICOffloadLinking*) optMICOffloadLinking="${arg#"-optMICOffloadLinking="} $optMICOffloadLinking" @@ -1203,7 +1224,7 @@ if [ $optCompInst == $TRUE ]; then optLinking="$optLinking $optCompInstLinking" fi -if [ $berkeley_upcc == $TRUE ]; then +if [ $upc == "berkeley" ]; then optLinking=`echo $optLinking| sed -e 's@-Wl@-Wl,-Wl@g'` echoIfDebug "optLinking modified to accomodate -Wl,-Wl for upcc. optLinking=$optLinking" fi @@ -1336,20 +1357,47 @@ if [ $numFiles == 0 ]; then echoIfDebug "Linking command is $linkCmd " fi - echoIfDebug "trackUPCR = $trackUPCR, wrappers = $optWrappersDir/upc/bupc/link_options.tau " - if [ $trackUPCR == $TRUE -a $berkeley_upcc == $TRUE -a -r $optWrappersDir/upc/bupc/link_options.tau ] ; then - linkCmd="$linkCmd `cat $optWrappersDir/upc/bupc/link_options.tau` $optLinking" - echoIfDebug "Linking command is $linkCmd" - fi - if [ $trackUPCR == $TRUE -a $berkeley_upcc == $FALSE -a -r $optWrappersDir/upc/cray/link_options.tau -a -r $optWrappersDir/../libcray_upc_runtime_wrap.a ] ; then - linkCmd="$linkCmd `cat $optWrappersDir/upc/cray/link_options.tau` $optLinking" - echoIfDebug "Linking command is $linkCmd" + if [ $trackUPCR == $TRUE ] ; then + case $upc in + berkeley) + if [ -r $optWrappersDir/upc/bupc/link_options.tau ] ; then + linkCmd="$linkCmd `cat $optWrappersDir/upc/bupc/link_options.tau` $optLinking" + echoIfDebug "Linking command is $linkCmd" + else + echo "Warning: can't locate link_options.tau for Berkeley UPC runtime tracking" + fi + ;; + gnu) + if [ -r $optWrappersDir/upc/gupc/link_options.tau ] ; then + linkCmd="$linkCmd `cat $optWrappersDir/upc/gupc/link_options.tau` $optLinking" + echoIfDebug "Linking command is $linkCmd" + else + echo "Warning: can't locate link_options.tau for GNU UPC runtime tracking" + fi + ;; + cray) + if [ -r $optWrappersDir/upc/cray/link_options.tau -a -r $optWrappersDir/../libcray_upc_runtime_wrap.a ] ; then + linkCmd="$linkCmd `cat $optWrappersDir/upc/cray/link_options.tau` $optLinking" + echoIfDebug "Linking command is $linkCmd" + else + echo "Warning: can't locate link_options.tau for CRAY UPC runtime tracking" + fi + ;; + *) + echoIfDebug "upc = $upc" + ;; + esac fi - + if [ "x$tauWrapFile" != "x" ]; then echoIfDebug "Linking command is $linkCmd" fi + if [ "x$optTauGASPU" != "x" ]; then + linkCmd="$linkCmd $optTauGASPU" + echoIfDebug "Linking command is $linkCmd" + fi + if [ $optFujitsu == $TRUE ]; then linkCmd=`echo $linkCmd | sed -e 's/fccpx/FCCpx/g' -e 's/frtpx/FCCpx/g'` fi @@ -1857,15 +1905,37 @@ cmdCreatePompRegions="`${optOpari2ConfigTool} --nm` ${objectFilesForLinking} ${o echoIfDebug "Linking command is $linkCmd " fi - echoIfDebug "trackUPCR = $trackUPCR, wrappers = $optWrappersDir/upc/bupc/link_options.tau " - if [ $trackUPCR == $TRUE -a $berkeley_upcc == $TRUE -a -r $optWrappersDir/upc/bupc/link_options.tau ] ; then - newCmd="$newCmd `cat $optWrappersDir/upc/bupc/link_options.tau`" - echoIfDebug "Linking command is $newCmd" - fi - if [ $trackUPCR == $TRUE -a $berkeley_upcc == $FALSE -a -r $optWrappersDir/upc/cray/link_options.tau -a -r $optWrappersDir/../libcray_upc_runtime_wrap.a ] ; then - newCmd="$newCmd `cat $optWrappersDir/upc/cray/link_options.tau`" - echoIfDebug "Linking command is $newCmd" + if [ $trackUPCR == $TRUE ] ; then + case $upc in + berkeley) + if [ -r $optWrappersDir/upc/bupc/link_options.tau ] ; then + newCmd="$newCmd `cat $optWrappersDir/upc/bupc/link_options.tau` $optLinking" + echoIfDebug "Linking command is $newCmd" + else + echo "Warning: can't locate link_options.tau for Berkeley UPC runtime tracking" + fi + ;; + gnu) + if [ -r $optWrappersDir/upc/gupc/link_options.tau ] ; then + newCmd="$newCmd `cat $optWrappersDir/upc/gupc/link_options.tau` $optLinking" + echoIfDebug "Linking command is $newCmd" + else + echo "Warning: can't locate link_options.tau for GNU UPC runtime tracking" + fi + ;; + cray) + if [ -r $optWrappersDir/upc/cray/link_options.tau -a -r $optWrappersDir/../libcray_upc_runtime_wrap.a ] ; then + newCmd="$newCmd `cat $optWrappersDir/upc/cray/link_options.tau` $optLinking" + echoIfDebug "Linking command is $newCmd" + else + echo "Warning: can't locate link_options.tau for CRAY UPC runtime tracking" + fi + ;; + *) + echoIfDebug "upc = $upc" + ;; + esac fi if [ "x$tauWrapFile" != "x" ]; then @@ -1873,6 +1943,11 @@ cmdCreatePompRegions="`${optOpari2ConfigTool} --nm` ${objectFilesForLinking} ${o echoIfDebug "Linking command is $newCmd" fi + if [ "x$optTauGASPU" != "x" ]; then + newCmd="$newCmd $optTauGASPU" + echoIfDebug "Linking command is $newCmd" + fi + madeToLinkStep=$TRUE evalWithDebugMessage "$newCmd" "Linking (Together) object files" diff --git a/tools/src/tau_exec b/tools/src/tau_exec index 0c6a2dcde..c1dd869c8 100755 --- a/tools/src/tau_exec +++ b/tools/src/tau_exec @@ -244,7 +244,11 @@ else TAU_SHLIBX=.so fi -TAUEX_LD_LIBRARY_PATH=$BASEDIR/lib/$theBinding:$LD_LIBRARY_PATH +if [ "x$LD_LIBRARY_PATH" = "x" ] ; then + TAUEX_LD_LIBRARY_PATH=$BASEDIR/lib/$theBinding:$BASEDIR/lib +else + TAUEX_LD_LIBRARY_PATH=$BASEDIR/lib/$theBinding:$BASEDIR/lib:$LD_LIBRARY_PATH +fi if [ $apple = 1 ]; then TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX else diff --git a/tools/src/tau_gen_wrapper b/tools/src/tau_gen_wrapper index 68f404e58..0d9deee11 100755 --- a/tools/src/tau_gen_wrapper +++ b/tools/src/tau_gen_wrapper @@ -5,10 +5,11 @@ usage() echo "" echo " $cmd - Generates a wrapper library that can intercept " echo " at link time or at runtime routines specified in a header file" - echo " Usage: $cmd [-w (default) | -d | -r] " + echo " Usage: $cmd [-w (default) | -d | -r] [-upc={berkeley,gnu,cray}]" echo " -w (default): generates wrappers for re-linking the application" echo " -d : generates wrappers by redefining routines during compilation in header files" echo " -r : generates wrappers that may be pre-loaded using tau_exec at runtime" + echo " -upc: generates UPC runtime wrapper for use with -optTrackUPCR" echo "NOTE: $cmd reads the TAU_MAKEFILE environment variable to get PDT settings" echo " Example: " echo " % $cmd hdf5.h /usr/lib/libhdf5.a " @@ -22,6 +23,7 @@ header_redirection_specified=0; headerfile="" library="" modarg="" +upc="" if [ $# = 0 ] ; then usage fi @@ -36,12 +38,19 @@ fi for arg in "$@"; do case $arg in -r) - runtime_specified=1; - shift; + runtime_specified=1 + shift ;; -d) - header_redirection_specified=1; - shift; + header_redirection_specified=1 + shift + ;; + -upc=*) + upc="`echo $1 | sed 's/-upc=//'`" + shift + ;; + -h|-help|--help) + usage ;; esac done @@ -56,9 +65,71 @@ if [ ! -r $TAU_MAKEFILE ] ;then exit 1 fi +if ! which tau-config > /dev/null 2>&1 ; then + echo "ERROR: tau-config is not in your path." + exit 1 +fi + +pdtdir="`grep PDTDIR= $TAU_MAKEFILE | sed -e 's/PDTDIR=//g' `" +tauarch="`tau-config | grep TAUARCH | sed -e 's@TAUARCH=@@g' `" +tauroot="`tau-config | grep TAUROOT | sed -e 's@TAUROOT=@@g' `" +basedir="`tau-config | grep BASEDIR | sed -e 's@BASEDIR=@@g' `" + +if [ "x$upc" != "x" ] ; then + gasnetdir="`grep TAU_GASNET_DIR= $TAU_MAKEFILE | sed -e 's/TAU_GASNET_DIR=//g' `" + upcnetwork="`grep UPCNETWORK= $TAU_MAKEFILE | sed -e 's/UPCNETWORK=//g' `" + case $upc in + berkeley) + if ! which upcc > /dev/null 2>&1 ; then + echo "ERROR: upcc is not in your path." + exit 1 + fi + upcdir="`which upcc | sed -e 's@bin/upcc@@g'`" + if [ -d $upcdir/opt_inst ] ; then + upcdir="$upcdir/opt_inst" + elif [ -d "$upcdir/inst/opt" ] ; then + upcdir="$upcdir/inst/opt" + else + echo "ERROR: Cannot locate instrumented Berkeley configuration at $upcdir" + exit 1 + fi + echo PATH="$basedir/bin:$PATH" $tauroot/src/wrappers/upc/bupc/upcr/tau_upc_runtime_wrapper "$headerfile" -lm -gasnet="$gasnetdir" -gasnetconduit=$upcnetwork -upc="$upcdir" + PATH="$basedir/bin:$PATH" $tauroot/src/wrappers/upc/bupc/upcr/tau_upc_runtime_wrapper "$headerfile" "$library" -gasnet="$gasnetdir" -gasnetconduit=$upcnetwork -upc="$upcdir" + retval=$? + mv select.tau `basename "$headerfile" .h`_wrapper + exit $retval + ;; + gnu) + if ! which upc > /dev/null 2>&1 ; then + echo "ERROR: upc is not in your path." + exit 1 + else + upcdir="`which upc | sed -e 's@bin/upc@@g'`" + fi + echo PATH="$basedir/bin:$PATH" $tauroot/src/wrappers/upc/gupc/tau_upc_wrapper "$headerfile" "$library" -gasnet="$gasnetdir" -gasnetconduit=$upcnetwork -upc="$upcdir" + PATH="$basedir/bin:$PATH" $tauroot/src/wrappers/upc/gupc/tau_upc_wrapper "$headerfile" "$library" -gasnet="$gasnetdir" -gasnetconduit=$upcnetwork -upc="$upcdir" + retval=$? + mv select.tau `basename "$headerfile" .h`_wrapper + exit $retval + ;; + cray) + if ! which cc > /dev/null 2>&1 ; then + echo "ERROR: cc is not in your path." + exit 1 + fi + PATH="$basedir/bin:$PATH" $tauroot/src/wrappers/upc/crayupc/tau_upc_wrapper "$headerfile" "$library" + retval=$? + mv select.tau `basename "$headerfile" .h`_wrapper + exit $retval + ;; + *) + echo "ERROR: Unknown UPC family: $upc" + exit 1 + exit $? + ;; + esac +fi -pdtdir=`grep PDTDIR= $TAU_MAKEFILE | sed -e 's/PDTDIR=//g' ` -tauarch=`tau-config | grep TAUARCH | sed -e 's@TAUARCH=@@g' ` parser=$pdtdir/$tauarch/bin/cxxparse if [ ! -x $parser ]; then diff --git a/tools/src/tau_upc.sh b/tools/src/tau_upc.sh index 064dce5b7..8e9b1e776 100755 --- a/tools/src/tau_upc.sh +++ b/tools/src/tau_upc.sh @@ -210,7 +210,7 @@ if [ $invoke_with_tau = yes ] ; then cat < /tmp/makefile.tau.$USER.$$ include $MAKEFILE all: - @\$(TAU_COMPILER) $TAUCOMPILER_OPTIONS \$(TAU_RUN_CC) \$(TAU_UPC_COMPILER_OPTIONS) $TAUARGS + @\$(TAU_COMPILER) $TAUCOMPILER_OPTIONS -optTauGASPU=\$(TAU_GASPU_UPC) \$(TAU_RUN_CC) \$(TAU_UPC_COMPILER_OPTIONS) $TAUARGS EOF make -s -f /tmp/makefile.tau.$USER.$$ retval=$? diff --git a/utils/FixMakefile b/utils/FixMakefile index 8dc6f4ad8..b8f6422ba 100755 --- a/utils/FixMakefile +++ b/utils/FixMakefile @@ -931,6 +931,10 @@ case $1 in echo NOTE: Using fixes for GNU 4.6+ compiler echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout ;; + CLANG) + echo NOTE: Using CLANG C++ compiler + echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout + ;; CRAYCC) echo NOTE: Using The Cray T3E CC C++ compiler echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout @@ -1388,11 +1392,11 @@ case $1 in echo "NOTE: Enabled Tracing. Compiling with -DTRACING_ON" echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout ;; - UPCC) + BUPC) echo "NOTE: Enabled Berkeley UPC (upcc) GASP support in TAU." echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout ;; - GNU_UPC) + GUPC) echo "NOTE: Enabled GNU UPC GASP support in TAU" echo "s/#$1#\(.*\)/$bs\1#$1#/g" >> $sedout ;; diff --git a/utils/Makefile b/utils/Makefile index 5de0db052..9aadebc51 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -45,10 +45,12 @@ LINKER = $(CC) LEXTRA = DWARFOPTS= #APPLECXX#CEXTRA = -DAPPLECXX #ENDIF# +#CLANG#CEXTRA = -DTAU_CLANG -DTAU_DOT_H_LESS_HEADERS #ENDIF# #HP_CFLAGS#CEXTRA = -AA -DTAU_DOT_H_LESS_HEADERS #ENDIF# #FX#CEXTRA = -Xg -DTAU_DOT_H_LESS_HEADERS -DTAU_FUJITSU #ENDIF# #CRAYCC#CEXTRA = -DTAU_DOT_H_LESS_HEADERS #ENDIF# #MPC#CEXTRA = -DTAU_MPC -DTAU_DOT_H_LESS_HEADERS #ENDIF# +#OPEN64ORC#CEXTRA = -DTAU_OPEN64 -DTAU_DOT_H_LESS_HEADERS #ENDIF# #MPC#CC = $(CONFIG_CXX) #ENDIF# #CRAYX1CC#CEXTRA = -DTAU_DOT_H_LESS_HEADERS #ENDIF# #MINGW#CEXTRA = -DTAU_WINDOWS -DTAU_DISABLE_SIGUSR -DTAU_MINGW -DTAU_ENABLE -DTAU_GNU #ENDIF# diff --git a/utils/tau_cupti_avail/tau_cupti_avail.cpp b/utils/tau_cupti_avail/tau_cupti_avail.cpp index 2259cc1ac..bfe3fa962 100644 --- a/utils/tau_cupti_avail/tau_cupti_avail.cpp +++ b/utils/tau_cupti_avail/tau_cupti_avail.cpp @@ -252,13 +252,12 @@ int main(int argc, char **argv) if (listCounters) { + Tau_CuptiLayer_Initialize_Map(); CuptiCounterEvent::printHeader(); - for(counter_map_it it = Tau_CuptiLayer_map().begin(); it != Tau_CuptiLayer_map().end(); it++) + for(counter_map_it it = Tau_CuptiLayer_Counter_Map.begin(); it != Tau_CuptiLayer_Counter_Map.end(); it++) { it->second->print(); - //ev->print(); } - } if (checkCounters) @@ -286,11 +285,11 @@ int main(int argc, char **argv) for(vector::iterator it = tags.begin(); it != tags.end(); it++) { - //printf("size of available counters: %d.\n", Tau_CuptiLayer_map().size()); + //printf("size of available counters: %d.\n", Tau_CuptiLayer_Counter_Map.size()); - if (Tau_CuptiLayer_map().count(*it) > 0) + if (Tau_CuptiLayer_Counter_Map.count(*it) > 0) { - CuptiCounterEvent* ev = Tau_CuptiLayer_map().find(*it)->second; + CuptiCounterEvent* ev = Tau_CuptiLayer_Counter_Map.find(*it)->second; //ev->print(); tags_added.push_back(*it); counters_added.push_back(ev); diff --git a/utils/tau_datatypes.h b/utils/tau_datatypes.h index 77c8337b9..b72f6e939 100644 --- a/utils/tau_datatypes.h +++ b/utils/tau_datatypes.h @@ -4,7 +4,6 @@ *****************************************************************************/ #include -using std::string; enum instrumentKind_t { TAU_LOOPS, TAU_LINE, TAU_ROUTINE_DECL, TAU_ROUTINE_ENTRY, TAU_ROUTINE_EXIT, TAU_ABORT, TAU_NOT_SPECIFIED, TAU_IO, TAU_MEMORY, TAU_TIMER, TAU_PHASE, TAU_INIT, TAU_FORALL, TAU_BARRIER, TAU_FENCE, TAU_NOTIFY }; @@ -19,8 +18,8 @@ enum tau_language_t { tau_c, tau_cplusplus, tau_fortran, tau_upc }; struct itemRef { itemRef(const pdbItem *i, bool isT); itemRef(const pdbItem *i, itemKind_t k, int l, int c); - itemRef(const pdbItem *i, itemKind_t k, int l, int c, string code, itemAttr_t); - //itemRef(const pdbItem *i, itemKind_t k, itemQualifier_t q, string name); + itemRef(const pdbItem *i, itemKind_t k, int l, int c, std::string code, itemAttr_t); + //itemRef(const pdbItem *i, itemKind_t k, itemQualifier_t q, std::string name); itemRef(const pdbItem *i, bool isT, int l, int c); itemRef(const pdbItem *i, itemKind_t k, pdbLoc start, pdbLoc stop); const pdbItem *item; @@ -32,13 +31,11 @@ struct itemRef { int col; pdbLoc begin; pdbLoc end; - string snippet; + std::string snippet; itemAttr_t attribute; }; #endif /* TAU_DYNINST */ -extern bool fuzzyMatch(const string& a, const string& b); - /*************************************************************************** * $RCSfile: tau_datatypes.h,v $ $Author: geimer $ * $Revision: 1.13 $ $Date: 2008/12/11 16:02:22 $ diff --git a/utils/tau_instrument.cpp b/utils/tau_instrument.cpp index ac3fef734..42a3a1f23 100644 --- a/utils/tau_instrument.cpp +++ b/utils/tau_instrument.cpp @@ -36,7 +36,6 @@ using namespace std; //#define DEBUG 1 extern bool wildcardCompare(char *wild, char *string, char kleenestar); extern bool instrumentEntity(const string& function_name); -extern bool fuzzyMatch(const string& a, const string& b); extern bool memory_flag; bool isVoidRoutine(const pdbItem* r); int parseLanguageString(const string& str); @@ -59,6 +58,52 @@ bool use_spec = false; /* by default, do not use code from specification file /////////////////////////////////////////////////////////////////////////// +/* -------------------------------------------------------------------------- */ +/* -- Fuzzy Match. Allows us to match files that don't quite match properly, + * but infact refer to the same file. For e.g., /home/pkg/foo.cpp and ./foo.cpp + * or foo.cpp and ./foo.cpp. This routine allows us to match such files! + * -------------------------------------------------------------------------- */ +static bool fuzzyMatch(const string & a, const string & b) +{ /* This function allows us to match string like ./foo.cpp with + /home/pkg/foo.cpp */ + if (a == b) + { /* the two files do match */ +#ifdef DEBUG + cout <<"fuzzyMatch returns true for "< str && isspace(*end)) end--; + + // Write new null terminator + *(end+1) = 0; + + return str; +} + /////////////////////////////////////////////////////////////////////////// // parseInstrumentationCommand // input: line - character string containing a line of text from the selective @@ -458,6 +524,7 @@ void parseInstrumentationCommand(char *line, int lineno) #endif /* DEBUG */ original = line; + line = trimwhitespace(line); /* check the initial keyword */ if (strncmp(line, "file", 4) == 0) { @@ -2437,7 +2504,6 @@ bool addFileInstrumentationRequests(PDB& p, pdbFile *file, vector& it printf("Instrumenting memory references for Fortran when selective instrumentation file was not specified. Using memory file=\"*\" routine = \"#\"\n"); #endif /* DEBUG */ instrumentList.push_back(new tauInstrument(string("*"), string("#"), TAU_MEMORY)); - } for (it = instrumentList.begin(); it != instrumentList.end(); it++) { diff --git a/utils/tau_wrap.cpp b/utils/tau_wrap.cpp index 73435c8be..e9b6c5ca2 100644 --- a/utils/tau_wrap.cpp +++ b/utils/tau_wrap.cpp @@ -1,19 +1,19 @@ /**************************************************************************** -** TAU Portable Profiling Package ** -** http://www.cs.uoregon.edu/research/paracomp/tau ** -***************************************************************************** -** Copyright 2007 ** -** Department of Computer and Information Science, University of Oregon ** -** Advanced Computing Laboratory, Los Alamos National Laboratory ** -****************************************************************************/ + ** TAU Portable Profiling Package ** + ** http://www.cs.uoregon.edu/research/paracomp/tau ** + ***************************************************************************** + ** Copyright 2007 ** + ** Department of Computer and Information Science, University of Oregon ** + ** Advanced Computing Laboratory, Los Alamos National Laboratory ** + ****************************************************************************/ /*************************************************************************** -** File : tau_wrap.cpp ** -** Description : Generates a wrapper library for external pkgs ** -** for instrumentation with TAU. ** -** Author : Sameer Shende ** -** Contact : sameer@cs.uoregon.edu sameer@paratools.com ** -** Documentation : ** -***************************************************************************/ + ** File : tau_wrap.cpp ** + ** Description : Generates a wrapper library for external pkgs ** + ** for instrumentation with TAU. ** + ** Author : Sameer Shende ** + ** Contact : sameer@cs.uoregon.edu sameer@paratools.com ** + ** Documentation : ** + ***************************************************************************/ /* Headers */ #include @@ -51,77 +51,70 @@ using namespace std; #else #define TAU_DIR_CHARACTER '/' #endif /* TAU_WINDOWS */ - +/* Function call interception types: + * runtime interception: bar remains bar + * preprocessor interception: bar becomes tau_bar + * wrapper library interception: bar becomes __wrap_bar + */ +#define RUNTIME_INTERCEPT 1 +#define PREPROC_INTERCEPT 0 +#define WRAPPER_INTERCEPT -1 +/* Known UPC environments */ +#define BERKELEY 1 +#define GNU 2 +#define CRAY 3 //#define DEBUG 1 + /* For selective instrumentation */ extern int processInstrumentationRequests(char *fname); extern bool instrumentEntity(const string& function_name); extern bool processFileForInstrumentation(const string& file_name); extern bool isInstrumentListEmpty(void); -/* Prototypes for selective instrumentation */ -extern bool addFileInstrumentationRequests(PDB& p, pdbFile *file, vector & itemvec); +/* Prototypes for selective instrumentation */ +extern bool addFileInstrumentationRequests(PDB& p, pdbFile *file, vector & itemvec); /* Globals */ bool memory_flag = false; /* by default, do not insert malloc.h in instrumented C/C++ files */ bool strict_typing = false; /* by default unless --strict option is used. */ bool shmem_wrapper = false; /* by default unless --shmem option is used. */ -bool upc_wrapper = false; /* by default unless --upc option is used. */ bool pshmem_use_underscore_instead_of_p = false; /* by default unless --pshmem_use_underscore_instead_of_p option is used. */ +int upc = 0; /* UPC environment */ -/////////////////////////////////////////////////////////////////////////// +struct FunctionSignatureInfo +{ + FunctionSignatureInfo(pdbRoutine * r) : + shmem_fortran_interface(false), + shmem_len_argcount(0), + shmem_pe_argcount(0), + shmem_cond_argcount(0), + func(r->name()), + proto(r->name()) + { } + + // For shmem wrapping + bool shmem_fortran_interface; + int shmem_len_argcount; + int shmem_pe_argcount; + int shmem_cond_argcount; + + // For upc wrapping + // ... + + string func; + string proto; + string returntypename; + string funchandle; + string rcalledfunc; +}; -/* -------------------------------------------------------------------------- */ -/* -- Fuzzy Match. Allows us to match files that don't quite match properly, - * but infact refer to the same file. For e.g., /home/pkg/foo.cpp and ./foo.cpp - * or foo.cpp and ./foo.cpp. This routine allows us to match such files! - * -------------------------------------------------------------------------- */ -bool fuzzyMatch(const string& a, const string& b) -{ /* This function allows us to match string like ./foo.cpp with - /home/pkg/foo.cpp */ - if (a == b) - { /* the two files do match */ -#ifdef DEBUG - cout <<"fuzzyMatch returns true for "<& itemvec, PDB& pdb, pdbFile *file) { - +static void getCReferencesForWrapper(vector & itemvec, PDB& pdb, pdbFile *file) +{ /* moved selective instrumentation file processing here */ - if (!isInstrumentListEmpty()) - { /* there are finite instrumentation requests, add requests for this file */ + if (!isInstrumentListEmpty()) { + /* there are finite instrumentation requests, add requests for this file */ addFileInstrumentationRequests(pdb, file, itemvec); } } -#ifdef OLD +static bool isReturnTypeVoid(pdbRoutine *r) { - /* we used to keep the selective instrumentation file processing at the - entry. But, when a routine is specified as a phase, we need to annotate - its itemRef accordingly. This needs the entry/exit records to be created - prior to processing the selective instrumentation file. N/A for wrappers - as there are no entry/exit records created.*/ - - PDB::croutinevec routines = pdb.getCRoutineVec(); - for (PDB::croutinevec::const_iterator rit=routines.begin(); - rit!=routines.end(); ++rit) - { - pdbRoutine::locvec retlocations = (*rit)->returnLocations(); - if ( (*rit)->location().file() == file && !(*rit)->isCompilerGenerated() - && (instrumentEntity((*rit)->fullName())) ) - { - itemvec.push_back(new itemRef(*rit, BODY_BEGIN, - (*rit)->bodyBegin().line(), (*rit)->bodyBegin().col())); -#ifdef DEBUG - cout <<" Location begin: "<< (*rit)->location().line() << " col " - << (*rit)->location().col() <headBegin().line() << " col " << (*rit)->headBegin().col() <headEnd().line() << " col " - << (*rit)->headEnd().col() <bodyBegin().line() << " col " << (*rit)->bodyBegin().col() <bodyEnd().line() << " col " - << (*rit)->bodyEnd().col() <line() << " col " - << (*rlit)->col() <line(), (*rlit)->col())); - } - itemvec.push_back(new itemRef(*rit, BODY_END, - (*rit)->bodyEnd().line(), (*rit)->bodyEnd().col())); -#ifdef DEBUG - cout <<" Return type: " << (*rit)->signature()->returnType()->name()<name() <<" Signature: " << - (*rit)->signature()->name() <callees(); - } - } - - /* All instrumentation requests are in. Sort these now and remove duplicates */ -#ifdef DEBUG - for(vector::iterator iter = itemvec.begin(); iter != itemvec.end(); - iter++) - { - cout <<"Before SORT: Items ("<<(*iter)->line<<", "<<(*iter)->col<<")" - <<"snippet = "<<(*iter)->snippet<::iterator iter = itemvec.begin(); iter != itemvec.end(); - iter++) - { - cout <<"Items ("<<(*iter)->line<<", "<<(*iter)->col<<")" - <<"snippet = "<<(*iter)->snippet<signature()->returnType()->name(); + return ((rname.compare(0, 4, "void") == 0) && + (rname.find("*") == string::npos)); } -#endif /* OLD - delete */ -bool isReturnTypeVoid(pdbRoutine *r) +static bool doesRoutineNameContainGet(string const & rname) { - if ((strcmp(r->signature()->returnType()->name().c_str(), "void") == 0) || -(strcmp(r->signature()->returnType()->name().c_str(), "void ") == 0)) - { -#ifdef DEBUG - cout <<"Return type is void for "<name()<name(); + + bool isPut = false; + bool isGet = false; + bool isCpy = false; + bool isSig = false; + + // FIXME: list functions not supported at this time + if ((rname.find("_vlist") != string::npos) || + (rname.find("_ilist") != string::npos)) { + return; } - if (strstr(rname, "long") !=0) { // long is found - multiplier_string=string("sizeof(long)*"); return; + // FIXME: strided functions not supported at this time + if (rname.find("strided") != string::npos) { + return; } - if (strstr(rname, "double") !=0) { // double is found - multiplier_string=string("sizeof(double)*"); return; + // FIXME: semephore functions not supported at this time + if (rname.find("_sem_") != string::npos) { + return; } - if (strstr(rname, "float") !=0) { // float is found - multiplier_string=string("sizeof(float)*"); return; + + if (rname.find("_memput") != string::npos) { + isPut = true; + if (rname.find("_signal") != string::npos) { + isSig = true; + } + } else if (rname.find("_memget") != string::npos) { + isGet = true; + } else if (rname.find("_memcpy") != string::npos) { + isCpy = true; + } else if (rname.find("_memset") != string::npos) { + isPut = true; } - if (strstr(rname, "16") !=0) { - multiplier_string=string("2*"); return; + + if (isGet) { + impl << " TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, " + << upc_mythread() << ", a3, " << upc_threadof("a2") << ");" << endl; + } else if (isPut) { + if (isSig) { + // This is unsafe.... Maybe in future map the semephore to a tag? + // In any case, we need support for _sem_wait for this to work. + //impl << " TAU_TRACE_SENDMSG((int)a4, upcr_threadof_shared(a1), a3);" << endl; + } else { + impl << " TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, " << upc_threadof("a1") << ", a3);" << endl; + } + } else if (isCpy) { + impl << " size_t dst_thread = " << upc_threadof("a1") << ";\n" + << " size_t src_thread = " << upc_threadof("a2") << ";\n" + << " size_t my_thread = " << upc_mythread() << ";\n" + << " if (my_thread == src_thread) {\n" + << " TAU_TRACE_SENDMSG(TAU_UPC_TAGID_NEXT, dst_thread, a3);\n" + << " } else {\n" + << " TAU_TRACE_SENDMSG_REMOTE(TAU_UPC_TAGID_NEXT, dst_thread, a3, src_thread);\n" + << " }\n" + << endl; } - if (strstr(rname, "32") !=0) { - multiplier_string=string("4*"); return; + +} + +void printUPCMessageAfterRoutine(pdbRoutine * r, ofstream & impl, FunctionSignatureInfo sig) +{ + string const & rname = r->name(); + + bool isPut = false; + bool isGet = false; + bool isCpy = false; + bool isSig = false; + + // FIXME: list functions not supported at this time + if ((rname.find("_vlist") != string::npos) || + (rname.find("_ilist") != string::npos)) { + return; } - if (strstr(rname, "64") !=0) { - multiplier_string=string("8*"); return; + // FIXME: strided functions not supported at this time + if (rname.find("strided") != string::npos) { + return; } - if (strstr(rname, "128") !=0) { - multiplier_string=string("16*"); return; + // FIXME: semephore functions not supported at this time + if (rname.find("_sem_") != string::npos) { + return; } - if (strstr(rname, "4") !=0) { // INT4_SWAP uses 4 bytes not 4 bits. - multiplier_string=string("4*"); return; + + if (rname.find("_memput") != string::npos) { + isPut = true; + if (rname.find("_signal") != string::npos) { + isSig = true; + } + } else if (rname.find("_memget") != string::npos) { + isGet = true; + } else if (rname.find("_memcpy") != string::npos) { + isCpy = true; + } else if (rname.find("_memset") != string::npos) { + isPut = true; } - if (strstr(rname, "8") !=0) { // INT8_SWAP uses 8 bytes not 8 bits. = 64 - multiplier_string=string("8*"); return; + + if (isGet) { + impl << " TAU_TRACE_RECVMSG(TAU_UPC_TAGID, " << upc_threadof("a2") << ", a3);" << endl; + } else if (isPut && !isSig) { + impl << " TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, " << upc_mythread() << ", a3, " + << upc_threadof("a1") << ");" << endl; + } else if (isCpy) { + impl << " if (my_thread == src_thread) {\n" + << " TAU_TRACE_RECVMSG_REMOTE(TAU_UPC_TAGID, my_thread, a3, dst_thread);\n" + << " } else {\n" + << " TAU_TRACE_RECVMSG(TAU_UPC_TAGID, src_thread, a3);\n" + << " }\n" + << endl; } } -void printShmemMessageBeforeRoutine(pdbRoutine *r, ofstream& impl, int len_argument_no, int pe_argument_no, bool fortran_interface) { - const char *rname = r->name().c_str(); - int routine_len = r->name().size(); - string multiplier_string(""); + +void printShmemMessageBeforeRoutine(pdbRoutine *r, ofstream& impl, FunctionSignatureInfo sig) +{ + int len_argument_no = sig.shmem_len_argcount; + int pe_argument_no = sig.shmem_pe_argcount; + bool fortran_interface = sig.shmem_fortran_interface; + string const & rname = r->name(); char length_string[1024]; char processor_arg[256]; @@ -315,44 +351,41 @@ void printShmemMessageBeforeRoutine(pdbRoutine *r, ofstream& impl, int len_argu sprintf(processor_arg, "a%d", pe_argument_no); } + char const * multiplier_string = getMultiplierString(rname); #ifdef DEBUG - printf("Size = %d, name = %s\n", routine_len, rname); -#endif /* DEBUG */ - getMultiplierString(rname, multiplier_string); -#ifdef DEBUG - printf("Multiplier string = %s\n", multiplier_string.c_str()); + printf("Multiplier string = %s\n", multiplier_string); #endif /* DEBUG */ if (len_argument_no != 0) { if (fortran_interface) { - sprintf(length_string, "%s (*a%d)", multiplier_string.c_str(), len_argument_no); + sprintf(length_string, "%s (*a%d)", multiplier_string, len_argument_no); } else { - sprintf(length_string, "%sa%d", multiplier_string.c_str(), len_argument_no); + sprintf(length_string, "%sa%d", multiplier_string, len_argument_no); } } else { - sprintf(length_string, "%s1", multiplier_string.c_str()); + sprintf(length_string, "%s1", multiplier_string); } - - if ((doesRoutineNameContainGet(rname, routine_len) == true) || - (doesRoutineNameContainFetchOp(rname, routine_len) == true)) { /* Get */ + + if (doesRoutineNameContainGet(rname) || doesRoutineNameContainFetchOp(rname)) { #ifdef DEBUG - printf("Routine name %s contains Get variant\n", rname); + cout << "Routine name " << rname << " contains Get variant" << endl; #endif /* DEBUG */ impl <<" TAU_TRACE_SENDMSG_REMOTE(TAU_SHMEM_TAGID_NEXT, Tau_get_node(), "<name().c_str(); - int routine_len = r->name().size(); - string multiplier_string(""); +void printShmemMessageAfterRoutine(pdbRoutine *r, ofstream& impl, FunctionSignatureInfo sig) +{ + int len_argument_no = sig.shmem_len_argcount; + int pe_argument_no = sig.shmem_pe_argcount; + int cond_argument_no = sig.shmem_cond_argcount; + bool fortran_interface = sig.shmem_fortran_interface; + string const & rname = r->name(); char length_string[1024]; char processor_arg[256]; char cond_string[1024]; @@ -367,47 +400,44 @@ void printShmemMessageAfterRoutine(pdbRoutine *r, ofstream& impl, int len_argum sprintf(processor_arg, "a%d", pe_argument_no); } + char const * multiplier_string = getMultiplierString(rname); #ifdef DEBUG - printf("Size = %d, name = %s\n", routine_len, rname); -#endif /* DEBUG */ - getMultiplierString(rname, multiplier_string); -#ifdef DEBUG - printf("Multiplier string = %s\n", multiplier_string.c_str()); + printf("Multiplier string = %s\n", multiplier_string); #endif /* DEBUG */ if (len_argument_no != 0) { if (fortran_interface) { - sprintf(length_string, "%s (*a%d)", multiplier_string.c_str(), len_argument_no); + sprintf(length_string, "%s (*a%d)", multiplier_string, len_argument_no); } else { - sprintf(length_string, "%sa%d", multiplier_string.c_str(), len_argument_no); + sprintf(length_string, "%sa%d", multiplier_string, len_argument_no); } } else { - sprintf(length_string, "%s1", multiplier_string.c_str()); + sprintf(length_string, "%s1", multiplier_string); } - is_it_a_get = doesRoutineNameContainGet(rname, routine_len); - is_it_a_fetchop = doesRoutineNameContainFetchOp(rname, routine_len); - is_it_a_cond_fetchop = doesRoutineNameContainCondFetchOp(rname, routine_len); - - if ((strstr(rname, "start_pes") != 0) || (strstr(rname, "shmem_init") != 0)) { /* if it is either of these */ - if (pshmem_use_underscore_instead_of_p) { - impl << " tau_totalnodes(1,_shmem_n_pes());"<name(); - proto = r->name(); - funchandle = string("_h) ("); - rcalledfunc = string("(*"+r->name()+"_h)"); - - func.append("("); - rcalledfunc.append("("); - proto.append("("); +void printFunctionNameInOutputFile(pdbRoutine *r, ofstream& impl, char const * prefix, FunctionSignatureInfo & sig) +{ + sig.func = r->name() + "("; + sig.proto = r->name() + "("; + sig.rcalledfunc = "(*" + r->name() + "_h)"; + sig.funchandle = "_h) ("; + + pdbGroup const * grp = r->signature()->returnType()->isGroup(); + if (grp) { + sig.returntypename = grp->name(); + } else { + sig.returntypename = r->signature()->returnType()->name(); + if (upc && (sig.returntypename.compare(0, 10, "shared[1] ") == 0)) { + sig.returntypename.replace(0, 10, "shared "); + } + } + impl << sig.returntypename << prefix << sig.func; - impl<name()<signature()->arguments(); + int argcount = 1; + pdbType::argvec const & av = r->signature()->arguments(); for(pdbType::argvec::const_iterator argsit = av.begin(); argsit != av.end(); argsit++, argcount++) { - char number[256]; #ifdef DEBUG - cout <<"Argument "<<(*argsit).name()<<" Type "<<(*argsit).type()->name()<name() <<" Type " << argsit->type()->name() << endl; #endif /* DEBUG */ if (shmem_wrapper) { -#ifdef DEBUG - cout <<"Argument "<<(*argsit).name()<<" Type "<<(*argsit).type()->name()<name().compare("len") == 0) || + (argsit->name().compare("nelems") == 0)) { #ifdef DEBUG printf("Argcount = %d for len\n", argcount); #endif /* DEBUG */ - shmem_len_argcount = argcount; - if ((*argsit).type()->kind() == pdbItem::TY_PTR) { - fortran_interface = true; + sig.shmem_len_argcount = argcount; + if (argsit->type()->kind() == pdbItem::TY_PTR) { + sig.shmem_fortran_interface = true; } } - if (strcmp((*argsit).name().c_str(), "pe") == 0) { + if (argsit->name().compare("pe") == 0) { #ifdef DEBUG printf("Argcount = %d for pe\n", argcount); #endif /* DEBUG */ - shmem_pe_argcount = argcount; - if ((*argsit).type()->kind() == pdbItem::TY_PTR) { - fortran_interface = true; + sig.shmem_pe_argcount = argcount; + if (argsit->type()->kind() == pdbItem::TY_PTR) { + sig.shmem_fortran_interface = true; } } - if ((strcmp((*argsit).name().c_str(), "match") == 0) || - (strcmp((*argsit).name().c_str(), "cond") == 0)) { + if ((argsit->name().compare("match") == 0) || + (argsit->name().compare("cond") == 0)) { #ifdef DEBUG printf("Argcount = %d for match/cond\n", argcount); #endif /* DEBUG */ - shmem_cond_argcount = argcount; + sig.shmem_cond_argcount = argcount; } } + if (argcount != 1) { /* not a startup */ - func.append(", "); - proto.append(", "); - funchandle.append(", "); - rcalledfunc.append(", "); + sig.func.append(", "); + sig.proto.append(", "); + sig.rcalledfunc.append(", "); + sig.funchandle.append(", "); impl<<", "; } + + char number[256]; sprintf(number, "%d", argcount); const pdbGroup *gr; string argtypename; - if ( (gr=(*argsit).type()->isGroup()) != 0) { - argtypename=gr->name(); + if ((gr = argsit->type()->isGroup()) != 0) { + argtypename = gr->name(); } else { - argtypename=(*argsit).type()->name(); + argtypename = argsit->type()->name(); } -/* Originally, we were only checking { } in arg names for upc_wrapper. Now - * we do it for all cases because PDB files from Rose have this artifact.*/ - if (1) { - /* upc headers sometimes have struct members in the argument name: - * const struct upc_filevec {upc_off_t offset;size_t len;}* - * We need to erase everything between the two curly braces */ - int pos1, pos2; + + /* headers sometimes have struct members in the argument name: + * const struct upc_filevec {upc_off_t offset;size_t len;}* + * We need to erase everything between the two curly braces */ + int pos1 = argtypename.find("{"); + int pos2 = argtypename.find("}"); + if (pos1 != string::npos && pos2 != string::npos) { #ifdef DEBUG - cout <<"BEFORE ARG type="< 0 && pos2 > 0) argtypename.erase(pos1,pos2-pos1+1); + argtypename.erase(pos1, pos2-pos1+1); #ifdef DEBUG - cout <<"AFTER ARG type="< 0 && pos2 > 0) argtypename.erase(pos1,pos2-pos1+1); + argtypename.erase(pos1, pos2-pos1+1); #ifdef DEBUG - cout <<"AFTER ARG type="<name() + "_p"; string macro("#define "); - string func(r->name()); - string proto(r->name()); - string protoname(r->name()); - string funchandle("_h) ("); - string rcalledfunc("(*"+r->name()+"_h)"); - string wcalledfunc("__real_"+r->name()); - string dltext; - string returntypename; string retstring(" return;"); - const pdbGroup *grp; - func.append("("); - proto.append("("); - protoname.append("_p"); - bool fortran_interface = false; /* if *len or *pe appears in the arglist */ - int shmem_len_argcount = 0; - int shmem_pe_argcount = 0; - int shmem_cond_argcount = 0; + string dltext; if (r->signature()->hasEllipsis()) { // For a full discussion of why vararg functions are difficult to wrap // please see: http://www.swig.org/Doc1.3/Varargs.html#Varargs - impl <<"#warning \"TAU: Not generating wrapper for vararg function "<name()<<"\""<name()<signature()->returnType()->isGroup()) != 0) { - returntypename = grp->name(); - } else { - returntypename = r->signature()->returnType()->name(); - if (upc_wrapper) { -#ifdef DEBUG - cout <<"RETURN Type name = "<< r->signature()->returnType()->name()<name() << "_h = NULL;"<name()<name() << "_h = NULL;"<name()<name() << "_h = NULL;"<name()<name() << "_h = NULL;"<name()<name() + string("_h = (") + protoname + string("_h) dlsym(tau_handle,\"")+r->name() + string("\"); \n"); - } - else - { - dlsym = r->name() + string("_h = dlsym(tau_handle,\"")+r->name() + - string("\"); \n"); - } - dltext = string(" if (tau_handle == NULL) \n") + - string(" tau_handle = (void *) dlopen(tau_orig_libname, RTLD_NOW); \n\n") + - string(" if (tau_handle == NULL) { \n") + - string(" perror(\"Error opening library in dlopen call\"); \n")+ retstring + string("\n") + - string(" } \n") + - string(" else { \n") + - string(" if (") + r->name() + string("_h == NULL)\n\t") + dlsym + - string(" if (") + r->name() + string ("_h == NULL) {\n") + - string(" perror(\"Error obtaining symbol info from dlopen'ed lib\"); \n") + string(" ")+ retstring + string("\n }\n"); - } + ostringstream buff; + buff << " if (tau_handle == NULL) \n" + << " tau_handle = (void *) dlopen(tau_orig_libname, RTLD_NOW); \n\n" + << " if (tau_handle == NULL) { \n" + << " perror(\"Error opening library in dlopen call\"); \n" + << retstring << "\n" + << " } else { \n" + << " if (" << r->name() << "_h == NULL)\n" + << " "; + if (strict_typing) + buff << r->name() << "_h = (" << protoname << "_h) dlsym(tau_handle,\"" << r->name() << "\"); \n"; + else + buff << r->name() << "_h = dlsym(tau_handle,\"" << r->name() << "\"); \n"; + buff << " if (" << r->name() << "_h == NULL) {\n" + << " perror(\"Error obtaining symbol info from dlopen'ed lib\"); \n" + << " " << retstring << "\n" + << " }\n"; + dltext = buff.str(); + } /* if (runtime == RUNTIME_INTERCEPT) */ if (!isVoid) { - impl<<" "<fullName()<<"\", \"\", " - <fullName()<<"\", \"\", "< tau_bar */ - impl<<" "< tau_bar */ + impl<<" "< netcdf */ /* -------------------------------------------------------------------------- */ -void extractLibName(const char *filename, string& libname) +string extractLibName(string const & filename) { - char *name = strdup(filename); - int len = strlen(name); /* length */ - int i; - - for (i=0; i < len; i++) - { - if (name[i] == '.') name[i] = '\0'; /* truncate it if . is found */ - } - libname=string(name); + return filename.substr(0, filename.find(".")); } /* -------------------------------------------------------------------------- */ /* -- Instrumentation routine for a C program ------------------------------- */ /* -------------------------------------------------------------------------- */ -bool instrumentCFile(PDB& pdb, pdbFile* f, ofstream& header, ofstream& impl, ofstream& linkoptsfile, string& group_name, string& header_file, int runtime, string& runtime_libname, string& libname) +bool instrumentCFile(PDB& pdb, pdbFile* f, ofstream& header, ofstream& impl, + ofstream& linkoptsfile, string& group_name, string& header_file, + int runtime, string& runtime_libname, string& libname) { - //static int firsttime=0; string file(f->name()); - + // open source file ifstream istr(file.c_str()); if (!istr) { @@ -820,111 +844,127 @@ bool instrumentCFile(PDB& pdb, pdbFile* f, ofstream& header, ofstream& impl, ofs #endif // initialize reference vector - vector itemvec; + vector itemvec; getCReferencesForWrapper(itemvec, pdb, f); PDB::croutinevec routines = pdb.getCRoutineVec(); - for (PDB::croutinevec::const_iterator rit=routines.begin(); - rit!=routines.end(); ++rit) { + for (PDB::croutinevec::const_iterator rit=routines.begin(); rit!=routines.end(); ++rit) { pdbRoutine::locvec retlocations = (*rit)->returnLocations(); - if ( (*rit)->location().file() == f && !(*rit)->isCompilerGenerated() - && (instrumentEntity((*rit)->fullName())) ) + if ( (*rit)->location().file() == f + && !(*rit)->isCompilerGenerated() + && (instrumentEntity((*rit)->fullName())) ) { - printRoutineInOutputFile(*rit, header, impl, group_name, runtime, runtime_libname); - if (runtime == -1) { /* -Wl,-wrap,,-wrap, */ - if (!(*rit)->signature()->hasEllipsis()) { /* does not have varargs */ - linkoptsfile <<"-Wl,-wrap,"<<(*rit)->name()<<" "; - } - } - + printRoutineInOutputFile(*rit, header, impl, group_name, runtime, runtime_libname); + if (runtime == WRAPPER_INTERCEPT) { /* -Wl,-wrap,,-wrap, */ + if (!(*rit)->signature()->hasEllipsis()) { /* does not have varargs */ + linkoptsfile <<"-Wl,-wrap,"<<(*rit)->name()<<" "; + } + } } } return true; - } + /* -------------------------------------------------------------------------- */ /* -- Define a TAU group after ------------------------- */ /* -------------------------------------------------------------------------- */ -void defineTauGroup(ofstream& ostr, string& group_name) +void defineTauGroup(ofstream& ostr, string & group_name) { - if (strcmp(group_name.c_str(), "TAU_USER") != 0) - { /* Write the following lines only when -DTAU_GROUP=string is defined */ + if (group_name.compare("TAU_USER") != 0) { + /* Write the following lines only when -DTAU_GROUP=string is defined */ ostr<< "#ifndef "< [-o ] [-w librarytobewrapped] [-r runtimelibname] [-g groupname] [-i headerfile] [-c|-c++|-fortran] [-f ] [--strict]"< must be specified\n"< [-o ] [-w librarytobewrapped] [-r runtimelibname] [-g groupname] [-i headerfile] [-c|-c++|-fortran] [-f ] [--strict]"< must be specified\n"<"<"<"<"<\n" + << "#include <" << header_file << ">\n" + << "#include \n" + << endl; if (shmem_wrapper) { - impl <<"int TAUDECL tau_totalnodes(int set_or_get, int value);"<"<"<name(), string(filename))) && - (instrumentThisFile = processFileForInstrumentation(string(filename)))) -*/ - if (instrumentThisFile = processFileForInstrumentation(string(filename))) - { /* should we instrument this file? Yes */ + if (processFileForInstrumentation(filename)) { + for (PDB::filevec::const_iterator it=p.getFileVec().begin(); it!=p.getFileVec().end(); it++) { #ifdef DEBUG - cout <<"Instrument file: "<name()<name()<