Skip to content

Commit

Permalink
Merge branch 'master' of git.nic.uoregon.edu:/gitroot/tau2
Browse files Browse the repository at this point in the history
Former-commit-id: 8435f0f12deb6d956c7e36868792fd9d3a739664
  • Loading branch information
wspear committed Sep 17, 2013
2 parents 23e69b9 + 21fb764 commit a84fc35
Show file tree
Hide file tree
Showing 9 changed files with 358 additions and 4 deletions.
48 changes: 48 additions & 0 deletions examples/power/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#****************************************************************************
#* TAU Portable Profiling Package **
#* http://www.cs.uoregon.edu/research/tau **
#****************************************************************************
#* Copyright 1997 **
#* Department of Computer and Information Science, University of Oregon **
#* Advanced Computing Laboratory, Los Alamos National Laboratory **
#****************************************************************************
#######################################################################
## pC++/Sage++ Copyright (C) 1993,1995 ##
## Indiana University University of Oregon University of Rennes ##
#######################################################################


TAU_MAKEFILE ?=../../include/Makefile

include $(TAU_MAKEFILE)

CC = TAU_MAKEFILE=$(TAU_MAKEFILE) $(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/bin/tau_cc.sh

PRINT = pr

RM = /bin/rm -rf

TARGET = matmult

OBJS = matmult.o matmult_initialize.o
CFLAGS=-g -O2
LDFLAGS=-g -O2

##############################################

all: $(TARGET)

install: $(TARGET)

$(TARGET): $(OBJS)
$(CC) $(LDFLAGS) $(OBJS) -o $@ $(LIBS)

matmult.o: matmult.c
$(CC) $(CFLAGS) -c $< -o $@

matmult_initialize.o: matmult_initialize.c matmult_initialize.h
$(CC) $(CFLAGS) -c $< -o $@

clean:
$(RM) $(OBJS) $(TARGET) profile.* *.trc *.edf *.z MULT* *.inst.* *.pdb Comp_gnu.o
##############################################
35 changes: 35 additions & 0 deletions examples/power/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
This example illustrates TAU's API for tracking Power. To track Power, simply insert:

TAU_TRACK_POWER();

in the code. An interrupt is generated every 10 seconds. To change the interrupt interval to say 1s, please insert:

TAU_SET_INTERRUPT_INTERVAL(1);

make

To track Power, the following must be done:

1. TAU must be configured to use PAPI configured with --with-component=rapl
2. The executable must not rely on any LD_LIBRARY_PATH settings. So, we typically add -Wl,-rapth,/path/to/dso while linking
in the executable. So, we configure TAU using:

./configure -pdt=/usr/local/packages/pdt -c++=mpicxx -cc=mpicc -fortran=mpif90 -mpilibrary=-lmpi_usempif08#-lmpi_usempi_ignore_tkr#-lmpi_mpifh#-lmpi#-Wl,-rpath,/usr/local/packages/openmpi/1.7_intel-14-tm/lib#-Wl,-rpath,/usr/local/packages/intel/14.0/compiler/lib/intel64/ -bfd=download -iowrapper -papi=/usr/local/packages/papi-5.2.0 -bfd=download -iowrapper

make install

3. The system on which you are running the executable must allow you to read the /dev/cpu/*/msr files:

su
# chmod o+r /dev/cpu/*/msr
4. The executable must have special previleges to execute code. These are granted by root using:

# setcap cap_sys_rawio=ep ./matmult

5. The user then executes the code using:
% mpirun -np 4 ./matmult

6. Max and min marker events trigger when the usage is beyond a threshold of the value seen during the execution. To change
the event threshold, simply set:
% export TAU_EVENT_THRESHOLD=0.01
for capturing 1% variation in an atomic event.
232 changes: 232 additions & 0 deletions examples/power/matmult.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
/******************************************************************************
* OpenMp Example - Matrix Multiply - C Version
* Demonstrates a matrix multiply using OpenMP.
*
* Modified from here:
* https://computing.llnl.gov/tutorials/openMP/samples/C/omp_mm.c
*
* For PAPI_FP_INS, the exclusive count for the event:
* for (null) [OpenMP location: file:matmult.c ]
* should be 2E+06 / Number of Threads
******************************************************************************/
#include <stdio.h>
#include <stdlib.h>

#include "matmult_initialize.h"

#ifdef TAU_MPI
int provided;
#include <mpi.h>
/* NOTE: MPI is just used to spawn multiple copies of the kernel to different ranks.
This is not a parallel implementation */
#endif /* TAU_MPI */

#ifdef PTHREADS
#include <pthread.h>
#endif /* PTHREADS */

#ifndef MATRIX_SIZE
#define MATRIX_SIZE 1024
#endif

#define NRA MATRIX_SIZE /* number of rows in matrix A */
#define NCA MATRIX_SIZE /* number of columns in matrix A */
#define NCB MATRIX_SIZE /* number of columns in matrix B */

double** allocateMatrix(int rows, int cols) {
int i;
double **matrix = (double**)malloc((sizeof(double*)) * rows);
for (i=0; i<rows; i++) {
matrix[i] = (double*)malloc((sizeof(double)) * cols);
}
return matrix;
}

#ifdef APP_USE_INLINE_MULTIPLY
__inline double multiply(double a, double b) {
return a * b;
}
#endif /* APP_USE_INLINE_MULTIPLY */

// cols_a and rows_b are the same value
void compute_nested(double **a, double **b, double **c, int rows_a, int cols_a, int cols_b) {
int i,j,k;
#pragma omp parallel private(i) shared(a,b,c) num_threads(2)
{
/*** Do matrix multiply sharing iterations on outer loop ***/
/*** Display who does which iterations for demonstration purposes ***/
#pragma omp for nowait
for (i=0; i<rows_a; i++) {
#pragma omp parallel private(i,j,k) shared(a,b,c) num_threads(2)
{
#pragma omp for nowait
for(j=0; j<cols_b; j++) {
for (k=0; k<cols_a; k++) {
#ifdef APP_USE_INLINE_MULTIPLY
c[i][j] += multiply(a[i][k], b[k][j]);
#else
c[i][j] += a[i][k] * b[k][j];
#endif
}
}
}
}
} /*** End of parallel region ***/
}

// cols_a and rows_b are the same value
void compute(double **a, double **b, double **c, int rows_a, int cols_a, int cols_b) {
int i,j,k;
#pragma omp parallel private(i,j,k) shared(a,b,c)
{
/*** Do matrix multiply sharing iterations on outer loop ***/
/*** Display who does which iterations for demonstration purposes ***/
#pragma omp for nowait
for (i=0; i<rows_a; i++) {
for(j=0; j<cols_b; j++) {
for (k=0; k<cols_a; k++) {
#ifdef APP_USE_INLINE_MULTIPLY
c[i][j] += multiply(a[i][k], b[k][j]);
#else /* APP_USE_INLINE_MULTIPLY */
c[i][j] += a[i][k] * b[k][j];
#endif /* APP_USE_INLINE_MULTIPLY */
}
}
}
} /*** End of parallel region ***/
}

void compute_interchange(double **a, double **b, double **c, int rows_a, int cols_a, int cols_b) {
int i,j,k;
#pragma omp parallel private(i,j,k) shared(a,b,c)
{
/*** Do matrix multiply sharing iterations on outer loop ***/
/*** Display who does which iterations for demonstration purposes ***/
#pragma omp for nowait
for (i=0; i<rows_a; i++) {
for (k=0; k<cols_a; k++) {
for(j=0; j<cols_b; j++) {
#ifdef APP_USE_INLINE_MULTIPLY
c[i][j] += multiply(a[i][k], b[k][j]);
#else /* APP_USE_INLINE_MULTIPLY */
c[i][j] += a[i][k] * b[k][j];
#endif /* APP_USE_INLINE_MULTIPLY */
}
}
}
} /*** End of parallel region ***/
}

double do_work(void) {
double **a, /* matrix A to be multiplied */
**b, /* matrix B to be multiplied */
**c; /* result matrix C */
a = allocateMatrix(NRA, NCA);
b = allocateMatrix(NCA, NCB);
c = allocateMatrix(NRA, NCB);

/*** Spawn a parallel region explicitly scoping all variables ***/

initialize(a, NRA, NCA);
initialize(b, NCA, NCB);
initialize(c, NRA, NCB);

compute(a, b, c, NRA, NCA, NCB);
#if defined(TAU_OPENMP)
if (omp_get_nested()) {
compute_nested(a, b, c, NRA, NCA, NCB);
}
#endif
#ifdef TAU_MPI
if (provided == MPI_THREAD_MULTIPLE)
{
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// printf("Rank: %d: provided is MPI_THREAD_MULTIPLE\n", rank);
}
#endif /* TAU_MPI */
compute_interchange(a, b, c, NRA, NCA, NCB);

return c[0][1];
}

void * threaded_func(void *data)
{
do_work();
return NULL;
}

int main (int argc, char *argv[])
{

#ifdef PTHREADS
int ret;
pthread_attr_t attr;
pthread_t tid1, tid2, tid3;
#endif /* PTHREADS */


#ifdef TAU_MPI
#if (defined(PTHREADS) || defined(TAU_OPENMP))
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
printf("MPI_Init_thread: provided = %d, MPI_THREAD_MULTIPLE=%d\n", provided, MPI_THREAD_MULTIPLE);
#else
MPI_Init(&argc, &argv);
#endif /* THREADS */
#endif /* TAU_MPI */

TAU_SET_INTERRUPT_INTERVAL(1);
TAU_TRACK_POWER();
#ifdef PTHREADS
if (ret = pthread_create(&tid1, NULL, threaded_func, NULL) )
{
printf("Error: pthread_create (1) fails ret = %d\n", ret);
exit(1);
}

if (ret = pthread_create(&tid2, NULL, threaded_func, NULL) )
{
printf("Error: pthread_create (2) fails ret = %d\n", ret);
exit(1);
}

if (ret = pthread_create(&tid3, NULL, threaded_func, NULL) )
{
printf("Error: pthread_create (3) fails ret = %d\n", ret);
exit(1);
}

#endif /* PTHREADS */

/* On thread 0: */
do_work();

#ifdef PTHREADS
if (ret = pthread_join(tid1, NULL) )
{
printf("Error: pthread_join (1) fails ret = %d\n", ret);
exit(1);
}

if (ret = pthread_join(tid2, NULL) )
{
printf("Error: pthread_join (2) fails ret = %d\n", ret);
exit(1);
}

if (ret = pthread_join(tid3, NULL) )
{
printf("Error: pthread_join (3) fails ret = %d\n", ret);
exit(1);
}

#endif /* PTHREADS */

#ifdef TAU_MPI
MPI_Finalize();
#endif /* TAU_MPI */
printf ("Done.\n");

return 0;
}

17 changes: 17 additions & 0 deletions examples/power/matmult_initialize.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "matmult_initialize.h"

void initialize(double **matrix, int rows, int cols) {
int i,j;
#pragma omp parallel private(i,j) shared(matrix)
{
//set_num_threads();
/*** Initialize matrices ***/
#pragma omp for nowait
for (i=0; i<rows; i++) {
for (j=0; j<cols; j++) {
matrix[i][j]= i+j;
}
}
}
}

14 changes: 14 additions & 0 deletions examples/power/matmult_initialize.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef _INIT_H_
#define _INIT_H_

#ifdef __cplus_plus
extern "C" {
#endif /* __cplus_plus */

extern void initialize(double **matrix, int rows, int cols);

#ifdef __cplus_plus
}
#endif /* __cplus_plus */

#endif /* _INIT_H */
2 changes: 1 addition & 1 deletion include/Profile/TauEnv.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#define TAU_ACTION_DUMP_BACKTRACES 3

#ifndef TAU_EVENT_THRESHOLD
#define TAU_EVENT_THRESHOLD .5
#define TAU_EVENT_THRESHOLD_DEFAULT .5
#endif /* TAU_EVENT_THRESHOLD */

#ifdef __cplusplus
Expand Down
6 changes: 6 additions & 0 deletions include/TAU.h.default
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@
#define TAU_DISABLE_TRACKING_MEMORY()
#define TAU_TRACK_MEMORY()
#define TAU_TRACK_MEMORY_HERE()
#define TAU_TRACK_POWER()
#define TAU_TRACK_POWER_HERE()
#define TAU_ENABLE_TRACKING_POWER()
#define TAU_DISABLE_TRACKING_POWER()


#define TAU_SET_INTERRUPT_INTERVAL(value)

#define CT(obj)
Expand Down
7 changes: 4 additions & 3 deletions src/Profile/TauEnv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,11 +1336,12 @@ void TauEnv_initialize()
}

const char *evt_threshold = getconf("TAU_EVENT_THRESHOLD");
env_evt_threshold = TAU_EVENT_THRESHOLD;
env_evt_threshold = TAU_EVENT_THRESHOLD_DEFAULT;
if (evt_threshold) {
sscanf(evt_threshold,"%g",&env_evt_threshold);
double evt_value = 0.0;
sscanf(evt_threshold,"%g",&evt_value);
env_evt_threshold = evt_value;
TAU_METADATA("TAU_EVENT_THRESHOLD", evt_threshold);
printf("TAU_EVENT_THRESHOLD set to %g\n", env_evt_threshold);
}

const char *numcalls = getconf("TAU_THROTTLE_NUMCALLS");
Expand Down
1 change: 1 addition & 0 deletions src/Profile/TauShmemOpenShmemC.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <pshmem.h>
#include <Profile/Profiler.h>
#include <stdio.h>
#include <stddef.h>
int TAUDECL tau_totalnodes(int set_or_get, int value);
static int tau_shmem_tagid_f=0 ;
#define TAU_SHMEM_TAGID tau_shmem_tagid_f=tau_shmem_tagid_f%250
Expand Down

0 comments on commit a84fc35

Please sign in to comment.