Skip to content

Commit

Permalink
Fixing OpenCL support with amd opencl amdocl64 library
Browse files Browse the repository at this point in the history
Former-commit-id: a8979bf6c056fc4f31c80133afb2dfc8544a853a
  • Loading branch information
khuck committed Feb 17, 2025
1 parent f9f9ba7 commit 7a61aec
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 50 deletions.
32 changes: 23 additions & 9 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -2453,7 +2453,7 @@ for arg in "$@"; do
dwarfdir=""
shift
;;

-elfutils=download)
elfutils=yes
download_elfutils=yes
Expand Down Expand Up @@ -3242,7 +3242,7 @@ for arg in "$@"; do
fi
shift
;;

-rocprofv2)
fixmakeargs="$fixmakeargs ROCPROFILERV2"
rocprofv2=yes
Expand All @@ -3258,7 +3258,7 @@ for arg in "$@"; do
pthread=yes
shift
;;

-rocprofsdk=*)
fixmakeargs="$fixmakeargs ROCPROFILERSDK"
rocprofsdk=yes
Expand Down Expand Up @@ -3374,7 +3374,8 @@ for arg in "$@"; do
shift
;;
-opencl=*)
openclinclude=`echo $arg | sed -e 's/-opencl=//' -e 's/ /#/g'`/include
opencldir=`echo $arg | sed -e 's/-opencl=//' -e 's/ /#/g'`
openclinclude=$opencldir/include
if [ ! -d $openclinclude ] ; then
echo "Error: Cannot access GPU include directory $openclinclude"
if [ `uname -s ` = "Darwin" ]; then
Expand All @@ -3396,8 +3397,21 @@ for arg in "$@"; do
use_opencl=yes
else
echo "Error: Cannot find GPU headers, TAU currently supports either CUDA or OpenCL"
exit 0
fi
fi
opencllib="libOpenCL.so"
for d in "OpenCL" "amdocl64" ; do
libname="lib${d}.so"
echo "Looking for ${libname} in ${opencldir}"
exists=`find ${opencldir} -name ${libname} | head -n 1`
if [ -r "${exists}" ]; then
echo "found $exists"
opencllib=${exists}
fixmakeargs="$fixmakeargs OPENCL opencllib=${opencllib}"
break
fi
done
shift
;;

Expand Down Expand Up @@ -3738,9 +3752,8 @@ fi

if [ "x$use_opencl" = "xyes" -a "x$openclinclude" = "x" ]; then
ld_lib_path=`echo $LD_LIBRARY_PATH | sed -e "s@:@ @g" `
for d in $ld_lib_path
do
echo "OPENCL: CHECKING $d"
for d in $ld_lib_path ; do
echo "OPENCL: CHECKING $d"
if [ -r $d/libOpenCL.so ]; then
echo "checking $d"
openclinclude=`echo $d | sed -e "s@/lib64@@g" -e "s@loader@headers@g" `/include
Expand All @@ -3757,6 +3770,7 @@ if [ "x$use_opencl" = "xyes" -a "x$openclinclude" = "x" ]; then
fixmakeargs="$fixmakeargs OPENCL TAU_USE_GPU openclinclude=$openclinclude/sycl"
else
echo "Error: Cannot find GPU headers, TAU currently supports either CUDA or OpenCL"
exit 0
fi
fi
fi
Expand Down Expand Up @@ -11003,7 +11017,7 @@ if [ "x$download_elfutils" = xyes ] ; then

predowndir=`pwd`
elfutilsdir=$libelfutilsdir/elfutils

if [ -r "$elfutilsdir/lib/libdw.so" -a -r "$elfutilsdir/include/elfutils/libdw.h" ]; then
echo "Found elfutils"
echo "elfutils download skipping"
Expand Down Expand Up @@ -11037,7 +11051,7 @@ if [ "$elfutils" = "yes" ]; then
echo "Could not find elfutils $elfutilsdir/lib/libdw.so"
exit 1
fi


fixmakeargs="$fixmakeargs elfutilsincdir=$elfutilsinc"
fixmakeargs="$fixmakeargs elfutilslibdir=$elfutilslib"
Expand Down
8 changes: 5 additions & 3 deletions examples/gpu/python_opencl/Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
include ../../../include/Makefile
#TAU_OPENCL_INC=/opt/intel/oneapi/compiler/2021.1-beta10/linux/include/sycl
TAU_OPENCL_INC=${ROCM_PATH}/include

TAU_OPENCL_LIB=-L$(TAU_OPENCL_INC)/../lib/x86_64 -L$(TAU_OPENCL_INC)/../lib -lOpenCL
#TAU_OPENCL_LIB=-L$(TAU_OPENCL_INC)/../lib/x86_64 -L$(TAU_OPENCL_INC)/../lib -lOpenCL
#TAU_OPENCL_LIB=-L/opt/intel/oneapi/compiler/2021.1-beta10/linux/lib -lOpenCL
#TAU_CXX=clang++
TAU_OPENCL_LIB=-L$(TAU_OPENCL_INC)/../lib/x86_64 -L$(TAU_OPENCL_INC)/../lib -L${ROCM_PATH}/lib -Wl,-rpath,${ROCM_PATH}/lib -lamdocl64

all: libmatmult.so

libmatmult.so: matmult.o
libmatmult.so: matmult.o Makefile
$(TAU_CXX) -g -o $@ $< $(TAU_OPENCL_LIB) -shared

matmult.o: matmult.cpp
$(TAU_CXX) -I$(TAU_OPENCL_INC) -g -c $< -o $@ -fPIC

clean:
clean:
rm -rf libmatmult.so matmult.o profile.*

run:
Expand Down
35 changes: 17 additions & 18 deletions examples/gpu/python_opencl/matmult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ extern "C" int entry(int argc, char**argv)
block_mult = ceil(SIZE_OF_MATRIX / ((float) SIZE_OF_BLOCK));
else
block_mult = 1;


number_of_blocks = SIZE_OF_BLOCK * block_mult;

unsigned int matsize = SIZE_OF_MATRIX*SIZE_OF_MATRIX*sizeof(float);
Expand Down Expand Up @@ -143,18 +143,17 @@ extern "C" int entry(int argc, char**argv)

cl_uint nDevices, count;
cl_device_id *cdDevices = NULL;
ci = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &count);
ci = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &count);

cdDevices = (cl_device_id *)malloc(count * sizeof(cl_device_id));
//ci = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_ALL, count, cdDevices, NULL);
ci = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, cdDevices, NULL);
ci = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, count, cdDevices, NULL);
CHECK_CL_ERROR(ci);

cout << count << " devices found." << endl;

string device_list("");
int number_of_iterations = 1;

int opt = getopt(argc, argv, "d:i:");
while(opt != -1) {
stringstream str;
Expand Down Expand Up @@ -206,7 +205,7 @@ extern "C" int entry(int argc, char**argv)
}
//cout << "finnished mapping devices." << endl;

//cl_context GPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ci);
//cl_context GPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ci);
cl_context GPUContext = clCreateContext(0, nDevices, devices, NULL, NULL, &ci);
CHECK_CL_ERROR(ci);

Expand All @@ -217,7 +216,7 @@ extern "C" int entry(int argc, char**argv)
char name[256];
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, sizeof(name), &name, NULL);
cout << "Using device name: " << name << endl;

cqCommandQueue[d] = clCreateCommandQueue(GPUContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &ci);
CHECK_CL_ERROR(ci);

Expand All @@ -236,23 +235,23 @@ extern "C" int entry(int argc, char**argv)
60000, log, NULL);

CHECK_CL_ERROR(ci);

//printf("build log: %s\n", log);
//cout << log << endl;

size_t thread_size[] = {number_of_threads, number_of_threads};
size_t block_size[] = {number_of_blocks, number_of_blocks};
/*
/*
cl_mem sub_a = clCreateBuffer(GPUContext, CL_MEM_ALLOC_HOST_PTR, submatsize,
NULL, NULL);
cl_mem sub_b = clCreateBuffer(GPUContext, CL_MEM_ALLOC_HOST_PTR, submatsize,
NULL, NULL);
cl_kernel OpenCL_multiply_matrices_shared_blocks = clCreateKernel(OpenCLProgram,
"multiply_matrices_shared_blocks", &ci);
CHECK_CL_ERROR(ci);
ci = clSetKernelArg(OpenCL_multiply_matrices_shared_blocks, 0, sizeof(cl_mem), (void *) &d_a);
CHECK_CL_ERROR(ci);
ci = clSetKernelArg(OpenCL_multiply_matrices_shared_blocks, 1, sizeof(cl_mem), (void *) &d_b);
Expand Down Expand Up @@ -305,14 +304,14 @@ extern "C" int entry(int argc, char**argv)
clEnqueueWriteBuffer(cCQ, d_a, CL_TRUE, 0, matsize, a, 0, NULL, &event_mem);
clEnqueueWriteBuffer(cCQ, d_b, CL_TRUE, 0, matsize, b, 0, NULL, &event_mem);
clWaitForEvents(1, &event_mem);

event = clCreateUserEvent(GPUContext, &ci);
CHECK_CL_ERROR(ci);

ci = clEnqueueNDRangeKernel(cCQ, OpenCL_multiply_matrices, 2, NULL,
block_size, thread_size, 0, NULL, &event);
CHECK_CL_ERROR(ci);

//clWaitForEvents(1, &shared_event);
clWaitForEvents(1, &event);
CHECK_CL_ERROR(ci);
Expand All @@ -324,7 +323,7 @@ extern "C" int entry(int argc, char**argv)
//clFinish(cCQ);

}

cout << "Finished " << number_of_iterations << " iterations on " << nDevices << " devices." << endl;
/*
std::cout << " results: " << std::endl;
Expand All @@ -334,7 +333,7 @@ extern "C" int entry(int argc, char**argv)
}
std::cout << std::endl;
}
*/
*/

free(a);
free(b);
Expand Down
4 changes: 3 additions & 1 deletion include/Makefile.skel
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ TAU_LLVM_SRC_DIR=
TAU_LLVM_CXX=
TAU_LLVM_CC=
TAU_STARPU_DIR=
TAU_OPENCL_LIBRARY=
#MPC#TAU_CC_FE=$(FULL_CC)#ENDIF#
#MPC#CONFIG_CC=$(FULL_CC)#ENDIF#
#MPC#TAU_CXX_FE=$(FULL_CXX)#ENDIF#
Expand Down Expand Up @@ -784,6 +785,7 @@ JDKBINDIR = $(JDKDIR)/bin
#ROCMSMI#TAU_ROCM_SMI_INCLUDE_FLAGS = -DTAU_ROCM_SMI -I$(TAU_ROCM_SMI_INC) #ENDIF#
#SUPPRESS_PTHREAD_CREATE_WRAPPER#PROFILEOPT117 = -DTAU_SUPPRESS_PTHREAD_CREATE_WRAPPER #ENDIF#
#STARPU#PROFILEOPT118 = -I$(TAU_STARPU_DIR) -DTAU_STARPU #ENDIF#
#OPENCL#PROFILEOPT120 = -DTAU_OPENCL_LIBRARY=\"$(TAU_OPENCL_LIBRARY)\" #ENDIF#
#GNU_GFORTRAN#TAU_ALLOW_ARG_MISMATCH=-fallow-argument-mismatch#ENDIF#

MRNET_ROOT=
Expand Down Expand Up @@ -1433,7 +1435,7 @@ PROFILEOPTS = $(PROFILEOPT1) $(PROFILEOPT2) $(PROFILEOPT3) $(PROFILEOPT4) \
$(PROFILEOPT108) $(PROFILEOPT109) $(PROFILEOPT110) \
$(PROFILEOPT111) $(PROFILEOPT112) $(PROFILEOPT113) $(PROFILEOPT114) \
$(PROFILEOPT115) $(PROFILEOPT116) $(PROFILEOPT117) $(PROFILEOPT118) \
$(PROFILEOPT119) $(TRACEOPT) \
$(PROFILEOPT119) $(PROFILEOPT120) $(TRACEOPT) \
$(TAU_SOS_INCLUDE_OPTS) $(TAU_ADIOS_INCLUDE_OPTS) \
$(TAU_OTF2_INCLUDE_OPTS) $(TAU_CALIPER_INCLUDE_OPTS) \
$(TAU_CORESYMBOLICATION_INCLUDE_OPTS) $(TAU_ELF_BFD_PROFILEOPT) \
Expand Down
Loading

0 comments on commit 7a61aec

Please sign in to comment.