-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of ssh://git/gitroot/tau2
Former-commit-id: dbbd056b5099f7111611eb2f045e7f6df8df7003
- Loading branch information
Showing
21 changed files
with
429 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#**************************************************************************** | ||
#* TAU Portable Profiling Package ** | ||
#* http://www.cs.uoregon.edu/research/tau ** | ||
#**************************************************************************** | ||
#* Copyright 1997 ** | ||
#* Department of Computer and Information Science, University of Oregon ** | ||
#* Advanced Computing Laboratory, Los Alamos National Laboratory ** | ||
#**************************************************************************** | ||
####################################################################### | ||
## pC++/Sage++ Copyright (C) 1993,1995 ## | ||
## Indiana University University of Oregon University of Rennes ## | ||
####################################################################### | ||
|
||
|
||
TAU_MAKEFILE ?=../../include/Makefile | ||
|
||
include $(TAU_MAKEFILE) | ||
|
||
CC = TAU_MAKEFILE=$(TAU_MAKEFILE) $(TAU_PREFIX_INSTALL_DIR)/$(CONFIG_ARCH)/bin/tau_cc.sh | ||
|
||
PRINT = pr | ||
|
||
RM = /bin/rm -rf | ||
|
||
TARGET = matmult | ||
|
||
OBJS = matmult.o matmult_initialize.o | ||
CFLAGS=-g -O2 | ||
LDFLAGS=-g -O2 | ||
|
||
############################################## | ||
|
||
all: $(TARGET) | ||
|
||
install: $(TARGET) | ||
|
||
$(TARGET): $(OBJS) | ||
$(CC) $(LDFLAGS) $(OBJS) -o $@ $(LIBS) | ||
|
||
matmult.o: matmult.c | ||
$(CC) $(CFLAGS) -c $< -o $@ | ||
|
||
matmult_initialize.o: matmult_initialize.c matmult_initialize.h | ||
$(CC) $(CFLAGS) -c $< -o $@ | ||
|
||
clean: | ||
$(RM) $(OBJS) $(TARGET) profile.* *.trc *.edf *.z MULT* *.inst.* *.pdb Comp_gnu.o | ||
############################################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
This example illustrates TAU's API for tracking Power. To track Power, simply insert: | ||
|
||
TAU_TRACK_POWER(); | ||
|
||
in the code. An interrupt is generated every 10 seconds. To change the interrupt interval to say 1s, please insert: | ||
|
||
TAU_SET_INTERRUPT_INTERVAL(1); | ||
|
||
make | ||
|
||
To track Power, the following must be done: | ||
|
||
1. TAU must be configured to use PAPI configured with --with-component=rapl | ||
2. The executable must not rely on any LD_LIBRARY_PATH settings. So, we typically add -Wl,-rapth,/path/to/dso while linking | ||
in the executable. So, we configure TAU using: | ||
|
||
./configure -pdt=/usr/local/packages/pdt -c++=mpicxx -cc=mpicc -fortran=mpif90 -mpilibrary=-lmpi_usempif08#-lmpi_usempi_ignore_tkr#-lmpi_mpifh#-lmpi#-Wl,-rpath,/usr/local/packages/openmpi/1.7_intel-14-tm/lib#-Wl,-rpath,/usr/local/packages/intel/14.0/compiler/lib/intel64/ -bfd=download -iowrapper -papi=/usr/local/packages/papi-5.2.0 -bfd=download -iowrapper | ||
|
||
make install | ||
|
||
3. The system on which you are running the executable must allow you to read the /dev/cpu/*/msr files: | ||
|
||
su | ||
# chmod o+r /dev/cpu/*/msr | ||
4. The executable must have special previleges to execute code. These are granted by root using: | ||
|
||
# setcap cap_sys_rawio=ep ./matmult | ||
|
||
5. The user then executes the code using: | ||
% mpirun -np 4 ./matmult | ||
|
||
6. Max and min marker events trigger when the usage is beyond a threshold of the value seen during the execution. To change | ||
the event threshold, simply set: | ||
% export TAU_EVENT_THRESHOLD=0.01 | ||
for capturing 1% variation in an atomic event. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
/****************************************************************************** | ||
* OpenMp Example - Matrix Multiply - C Version | ||
* Demonstrates a matrix multiply using OpenMP. | ||
* | ||
* Modified from here: | ||
* https://computing.llnl.gov/tutorials/openMP/samples/C/omp_mm.c | ||
* | ||
* For PAPI_FP_INS, the exclusive count for the event: | ||
* for (null) [OpenMP location: file:matmult.c ] | ||
* should be 2E+06 / Number of Threads | ||
******************************************************************************/ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
#include "matmult_initialize.h" | ||
|
||
#ifdef TAU_MPI | ||
int provided; | ||
#include <mpi.h> | ||
/* NOTE: MPI is just used to spawn multiple copies of the kernel to different ranks. | ||
This is not a parallel implementation */ | ||
#endif /* TAU_MPI */ | ||
|
||
#ifdef PTHREADS | ||
#include <pthread.h> | ||
#endif /* PTHREADS */ | ||
|
||
#ifndef MATRIX_SIZE | ||
#define MATRIX_SIZE 1024 | ||
#endif | ||
|
||
#define NRA MATRIX_SIZE /* number of rows in matrix A */ | ||
#define NCA MATRIX_SIZE /* number of columns in matrix A */ | ||
#define NCB MATRIX_SIZE /* number of columns in matrix B */ | ||
|
||
double** allocateMatrix(int rows, int cols) { | ||
int i; | ||
double **matrix = (double**)malloc((sizeof(double*)) * rows); | ||
for (i=0; i<rows; i++) { | ||
matrix[i] = (double*)malloc((sizeof(double)) * cols); | ||
} | ||
return matrix; | ||
} | ||
|
||
#ifdef APP_USE_INLINE_MULTIPLY | ||
__inline double multiply(double a, double b) { | ||
return a * b; | ||
} | ||
#endif /* APP_USE_INLINE_MULTIPLY */ | ||
|
||
// cols_a and rows_b are the same value | ||
void compute_nested(double **a, double **b, double **c, int rows_a, int cols_a, int cols_b) { | ||
int i,j,k; | ||
#pragma omp parallel private(i) shared(a,b,c) num_threads(2) | ||
{ | ||
/*** Do matrix multiply sharing iterations on outer loop ***/ | ||
/*** Display who does which iterations for demonstration purposes ***/ | ||
#pragma omp for nowait | ||
for (i=0; i<rows_a; i++) { | ||
#pragma omp parallel private(i,j,k) shared(a,b,c) num_threads(2) | ||
{ | ||
#pragma omp for nowait | ||
for(j=0; j<cols_b; j++) { | ||
for (k=0; k<cols_a; k++) { | ||
#ifdef APP_USE_INLINE_MULTIPLY | ||
c[i][j] += multiply(a[i][k], b[k][j]); | ||
#else | ||
c[i][j] += a[i][k] * b[k][j]; | ||
#endif | ||
} | ||
} | ||
} | ||
} | ||
} /*** End of parallel region ***/ | ||
} | ||
|
||
// cols_a and rows_b are the same value | ||
void compute(double **a, double **b, double **c, int rows_a, int cols_a, int cols_b) { | ||
int i,j,k; | ||
#pragma omp parallel private(i,j,k) shared(a,b,c) | ||
{ | ||
/*** Do matrix multiply sharing iterations on outer loop ***/ | ||
/*** Display who does which iterations for demonstration purposes ***/ | ||
#pragma omp for nowait | ||
for (i=0; i<rows_a; i++) { | ||
for(j=0; j<cols_b; j++) { | ||
for (k=0; k<cols_a; k++) { | ||
#ifdef APP_USE_INLINE_MULTIPLY | ||
c[i][j] += multiply(a[i][k], b[k][j]); | ||
#else /* APP_USE_INLINE_MULTIPLY */ | ||
c[i][j] += a[i][k] * b[k][j]; | ||
#endif /* APP_USE_INLINE_MULTIPLY */ | ||
} | ||
} | ||
} | ||
} /*** End of parallel region ***/ | ||
} | ||
|
||
void compute_interchange(double **a, double **b, double **c, int rows_a, int cols_a, int cols_b) { | ||
int i,j,k; | ||
#pragma omp parallel private(i,j,k) shared(a,b,c) | ||
{ | ||
/*** Do matrix multiply sharing iterations on outer loop ***/ | ||
/*** Display who does which iterations for demonstration purposes ***/ | ||
#pragma omp for nowait | ||
for (i=0; i<rows_a; i++) { | ||
for (k=0; k<cols_a; k++) { | ||
for(j=0; j<cols_b; j++) { | ||
#ifdef APP_USE_INLINE_MULTIPLY | ||
c[i][j] += multiply(a[i][k], b[k][j]); | ||
#else /* APP_USE_INLINE_MULTIPLY */ | ||
c[i][j] += a[i][k] * b[k][j]; | ||
#endif /* APP_USE_INLINE_MULTIPLY */ | ||
} | ||
} | ||
} | ||
} /*** End of parallel region ***/ | ||
} | ||
|
||
double do_work(void) { | ||
double **a, /* matrix A to be multiplied */ | ||
**b, /* matrix B to be multiplied */ | ||
**c; /* result matrix C */ | ||
a = allocateMatrix(NRA, NCA); | ||
b = allocateMatrix(NCA, NCB); | ||
c = allocateMatrix(NRA, NCB); | ||
|
||
/*** Spawn a parallel region explicitly scoping all variables ***/ | ||
|
||
initialize(a, NRA, NCA); | ||
initialize(b, NCA, NCB); | ||
initialize(c, NRA, NCB); | ||
|
||
compute(a, b, c, NRA, NCA, NCB); | ||
#if defined(TAU_OPENMP) | ||
if (omp_get_nested()) { | ||
compute_nested(a, b, c, NRA, NCA, NCB); | ||
} | ||
#endif | ||
#ifdef TAU_MPI | ||
if (provided == MPI_THREAD_MULTIPLE) | ||
{ | ||
int rank; | ||
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | ||
// printf("Rank: %d: provided is MPI_THREAD_MULTIPLE\n", rank); | ||
} | ||
#endif /* TAU_MPI */ | ||
compute_interchange(a, b, c, NRA, NCA, NCB); | ||
|
||
return c[0][1]; | ||
} | ||
|
||
void * threaded_func(void *data) | ||
{ | ||
do_work(); | ||
return NULL; | ||
} | ||
|
||
int main (int argc, char *argv[]) | ||
{ | ||
|
||
#ifdef PTHREADS | ||
int ret; | ||
pthread_attr_t attr; | ||
pthread_t tid1, tid2, tid3; | ||
#endif /* PTHREADS */ | ||
|
||
|
||
#ifdef TAU_MPI | ||
#if (defined(PTHREADS) || defined(TAU_OPENMP)) | ||
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); | ||
printf("MPI_Init_thread: provided = %d, MPI_THREAD_MULTIPLE=%d\n", provided, MPI_THREAD_MULTIPLE); | ||
#else | ||
MPI_Init(&argc, &argv); | ||
#endif /* THREADS */ | ||
#endif /* TAU_MPI */ | ||
|
||
TAU_SET_INTERRUPT_INTERVAL(1); | ||
TAU_TRACK_POWER(); | ||
#ifdef PTHREADS | ||
if (ret = pthread_create(&tid1, NULL, threaded_func, NULL) ) | ||
{ | ||
printf("Error: pthread_create (1) fails ret = %d\n", ret); | ||
exit(1); | ||
} | ||
|
||
if (ret = pthread_create(&tid2, NULL, threaded_func, NULL) ) | ||
{ | ||
printf("Error: pthread_create (2) fails ret = %d\n", ret); | ||
exit(1); | ||
} | ||
|
||
if (ret = pthread_create(&tid3, NULL, threaded_func, NULL) ) | ||
{ | ||
printf("Error: pthread_create (3) fails ret = %d\n", ret); | ||
exit(1); | ||
} | ||
|
||
#endif /* PTHREADS */ | ||
|
||
/* On thread 0: */ | ||
do_work(); | ||
|
||
#ifdef PTHREADS | ||
if (ret = pthread_join(tid1, NULL) ) | ||
{ | ||
printf("Error: pthread_join (1) fails ret = %d\n", ret); | ||
exit(1); | ||
} | ||
|
||
if (ret = pthread_join(tid2, NULL) ) | ||
{ | ||
printf("Error: pthread_join (2) fails ret = %d\n", ret); | ||
exit(1); | ||
} | ||
|
||
if (ret = pthread_join(tid3, NULL) ) | ||
{ | ||
printf("Error: pthread_join (3) fails ret = %d\n", ret); | ||
exit(1); | ||
} | ||
|
||
#endif /* PTHREADS */ | ||
|
||
#ifdef TAU_MPI | ||
MPI_Finalize(); | ||
#endif /* TAU_MPI */ | ||
printf ("Done.\n"); | ||
|
||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#include "matmult_initialize.h" | ||
|
||
void initialize(double **matrix, int rows, int cols) { | ||
int i,j; | ||
#pragma omp parallel private(i,j) shared(matrix) | ||
{ | ||
//set_num_threads(); | ||
/*** Initialize matrices ***/ | ||
#pragma omp for nowait | ||
for (i=0; i<rows; i++) { | ||
for (j=0; j<cols; j++) { | ||
matrix[i][j]= i+j; | ||
} | ||
} | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#ifndef _INIT_H_ | ||
#define _INIT_H_ | ||
|
||
#ifdef __cplus_plus | ||
extern "C" { | ||
#endif /* __cplus_plus */ | ||
|
||
extern void initialize(double **matrix, int rows, int cols); | ||
|
||
#ifdef __cplus_plus | ||
} | ||
#endif /* __cplus_plus */ | ||
|
||
#endif /* _INIT_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Oops, something went wrong.