Skip to content

Commit

Permalink
COMP: force CUDA 8 (required for c++11)
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon Rit committed Apr 13, 2018
1 parent abcaddc commit 8180545
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 66 deletions.
27 changes: 3 additions & 24 deletions cmake/FindCUDA_wrap.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
if (MINGW)
# Cuda doesn't work with mingw at all
set (CUDA_FOUND FALSE)
elseif (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 2.8)
elseif (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8)
# FindCuda is included with CMake 2.8
set (CUDA_FOUND FALSE)
else ()
Expand Down Expand Up @@ -37,13 +37,6 @@ endif ()
set (CUDA_FOUND ${CUDA_FOUND} CACHE BOOL "Did we find cuda?")
mark_as_advanced(CUDA_FOUND)

if(CUDA_FOUND)
if(${CUDA_VERSION} LESS 3.2)
message("CUDA version ${CUDA_VERSION} found, too old for RTK")
set(CUDA_FOUND FALSE)
endif()
endif()

if (CUDA_FOUND)
cuda_include_directories (${CMAKE_CURRENT_SOURCE_DIR})
endif ()
Expand All @@ -53,28 +46,14 @@ endif ()
# This script will modify CUDA_NVCC_FLAGS if system default is not gcc-4.3
include (nvcc-check)

if("${CUDA_VERSION}" LESS 6.5)
# set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
# -gencode arch=compute_10,code=sm_10
# -gencode arch=compute_11,code=sm_11
# -gencode arch=compute_12,code=sm_12
# -gencode arch=compute_13,code=sm_13
# )
endif ()

if("${CUDA_VERSION}" LESS 5.0)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
-gencode arch=compute_20,code=sm_20
-gencode arch=compute_20,code=compute_20
)
elseif("${CUDA_VERSION}" LESS 8.0)
if("${CUDA_VERSION}" VERSION_LESS 8.0)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
-gencode arch=compute_20,code=sm_20
-gencode arch=compute_30,code=sm_30
-gencode arch=compute_35,code=sm_35
-gencode arch=compute_35,code=compute_35
)
elseif("${CUDA_VERSION}" LESS 9.0)
elseif("${CUDA_VERSION}" VERSION_LESS 9.0)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
-Wno-deprecated-gpu-targets
-gencode arch=compute_20,code=sm_20
Expand Down
33 changes: 7 additions & 26 deletions cmake/nvcc-check.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,22 @@ if(CUDA_FOUND)
if(${CUDA_VERSION} VERSION_GREATER "8.99")
FIND_GCC(GCC_PATH "6" "4")
endif()
if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "6.99")
if(NOT GCC_PATH )
FIND_GCC(GCC_PATH "4" "9")
endif()
if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "5.4.99")
if(NOT GCC_PATH )
FIND_GCC(GCC_PATH "4" "8")
endif()
if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "5.4.99")
if(NOT GCC_PATH )
FIND_GCC(GCC_PATH "4" "7")
endif()
if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "4.1.99")
if(NOT GCC_PATH )
FIND_GCC(GCC_PATH "4" "6")
endif()
if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "4.0.99")
if(NOT GCC_PATH )
FIND_GCC(GCC_PATH "4" "5")
endif()
if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "3.99")
if(NOT GCC_PATH )
FIND_GCC(GCC_PATH "4" "4")
endif()
if(NOT GCC_PATH)
Expand All @@ -78,7 +78,7 @@ if(CUDA_FOUND)
endif()

if(GCC_PATH)
if(NOT APPLE OR "${CUDA_VERSION}" LESS 7.0)
if(NOT APPLE)
message(STATUS "nvcc-check: Found adequate gcc (${GCC_PATH})... telling nvcc to use it!")
#Only append option if not already done
list (FIND CUDA_NVCC_FLAGS "--compiler-bindir" _index)
Expand All @@ -90,23 +90,4 @@ if(CUDA_FOUND)
message(FATAL_ERROR "nvcc-check: Please install adequate gcc for cuda.\nNote that gcc-4.x can be installed side-by-side with your current version of gcc.\n")
endif()
endif()


if(CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "APPLE")
# For CUDA 3.2: surface_functions.h does some non-compliant things...
# so we tell g++ to ignore them when called via nvcc
# by passing the -fpermissive flag through the nvcc
# build trajectory. Unfortunately, nvcc will also
# blindly pass this flag to gcc, even though it is not
# valid... resulting in TONS of warnings. So, we go
# version checking again, this time nvcc...
# Get the nvcc version number

# This issue seems to be only if cuda is installed in system so test CUDA_INCLUDE_DIRS
# (see http://nvidia.custhelp.com/app/answers/detail/a_id/2869/~/linux-based-cuda-v3.x-compiler-issue-affecting-cuda-surface-apis)
if(CUDA_VERSION_MAJOR MATCHES "3" AND CUDA_VERSION_MINOR MATCHES "2" AND CUDA_INCLUDE_DIRS MATCHES "/usr/include")
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options='-fpermissive')
message(STATUS "nvcc-check: CUDA 3.2 exception: CUDA_NVCC_FLAGS set to \"${CUDA_NVCC_FLAGS}\"")
endif()
endif()
endif()
2 changes: 1 addition & 1 deletion itk-module-init.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ list(APPEND CMAKE_MODULE_PATH ${RTK_SOURCE_DIR}/cmake)

find_package(CUDA_wrap QUIET)
if(CUDA_FOUND)
if(${CUDA_VERSION} LESS 4.1)
if(${CUDA_VERSION} VERSION_LESS 8.0)
message(WARNING "CUDA version ${CUDA_VERSION} is not supported by RTK.")
set(RTK_USE_CUDA_DEFAULT OFF)
else()
Expand Down
4 changes: 2 additions & 2 deletions src/rtkCudaBackProjectionImageFilter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ CUDA_back_project(int projSize[3],
// Allocate array for input projections, in order to bind them to
// either a 2D layered texture (requires GetCudaComputeCapability >= 2.0) or
// a 3D texture
if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1)
if(GetCudaComputeCapability(device).first<=1)
cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent);
else
cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent, cudaArrayLayered);
Expand All @@ -308,7 +308,7 @@ CUDA_back_project(int projSize[3],

// Run kernels. Note: Projection data is passed via texture memory,
// transform matrix is passed via constant memory
if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1)
if(GetCudaComputeCapability(device).first<=1)
{
// Compute block and grid sizes
dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ);
Expand Down
4 changes: 2 additions & 2 deletions src/rtkCudaFDKBackProjectionImageFilter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ CUDA_reconstruct_conebeam(
// Allocate array for input projections, in order to bind them to
// either a 2D layered texture (requires GetCudaComputeCapability >= 2.0) or
// a 3D texture
if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1)
if(GetCudaComputeCapability(device).first<=1)
cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent);
else
cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent, cudaArrayLayered);
Expand All @@ -206,7 +206,7 @@ CUDA_reconstruct_conebeam(

// Run kernels. Note: Projection data is passed via texture memory,
// transform matrix is passed via constant memory
if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1)
if(GetCudaComputeCapability(device).first<=1)
{
// Compute block and grid sizes
dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ);
Expand Down
8 changes: 0 additions & 8 deletions src/rtkCudaFFTConvolutionImageFilter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@ CUDA_fft_convolution(const int3 &inputDimension,
else
result = cufftPlan3d(&fftFwd, inputDimension.z, inputDimension.y, inputDimension.x, CUFFT_R2C);
CUFFT_CHECK_ERROR(result);
#if (CUDA_VERSION<8000)
result = cufftSetCompatibilityMode(fftFwd, CUFFT_COMPATIBILITY_FFTW_ALL);
CUFFT_CHECK_ERROR(result);
#endif
result = cufftExecR2C(fftFwd, deviceProjection, deviceProjectionFFT);
CUFFT_CHECK_ERROR(result);
cufftDestroy(fftFwd);
Expand Down Expand Up @@ -131,10 +127,6 @@ CUDA_fft_convolution(const int3 &inputDimension,
else
result = cufftPlan3d(&fftInv, inputDimension.z, inputDimension.y, inputDimension.x, CUFFT_C2R);
CUFFT_CHECK_ERROR(result);
#if (CUDA_VERSION<8000)
result = cufftSetCompatibilityMode(fftInv, CUFFT_COMPATIBILITY_FFTW_ALL);
CUFFT_CHECK_ERROR(result);
#endif
result = cufftExecC2R(fftInv, deviceProjectionFFT, deviceProjection);
CUFFT_CHECK_ERROR(result);

Expand Down
4 changes: 2 additions & 2 deletions src/rtkCudaWarpBackProjectionImageFilter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ CUDA_warp_back_project(int projSize[3],
// Allocate array for input projections, in order to bind them to
// either a 2D layered texture (requires GetCudaComputeCapability >= 2.0) or
// a 3D texture
if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1)
if(GetCudaComputeCapability(device).first<=1)
cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent);
else
cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent, cudaArrayLayered);
Expand Down Expand Up @@ -452,7 +452,7 @@ CUDA_warp_back_project(int projSize[3],

// Run kernels. Note: Projection data is passed via texture memory,
// transform matrix is passed via constant memory
if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1)
if(GetCudaComputeCapability(device).first<=1)
{
// Compute block and grid sizes
dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ);
Expand Down
2 changes: 1 addition & 1 deletion src/rtkCudaWarpImageFilter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ CUDA_warp(int input_vol_dim[3],
unsigned int blocksInY = (output_vol_dim[1]-1)/tBlock_y + 1;
unsigned int blocksInZ = (output_vol_dim[2]-1)/tBlock_z + 1;

if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1)
if(GetCudaComputeCapability(device).first<=1)
{
dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ);
dim3 dimBlock = dim3(tBlock_x, tBlock_y, tBlock_z);
Expand Down

0 comments on commit 8180545

Please sign in to comment.