From 8180545fba5f2aca8537ccfd547248355b6bbc5d Mon Sep 17 00:00:00 2001 From: Simon Rit Date: Fri, 13 Apr 2018 13:43:07 +0200 Subject: [PATCH] COMP: force CUDA 8 (required for c++11) --- cmake/FindCUDA_wrap.cmake | 27 ++--------------- cmake/nvcc-check.cmake | 33 +++++---------------- itk-module-init.cmake | 2 +- src/rtkCudaBackProjectionImageFilter.cu | 4 +-- src/rtkCudaFDKBackProjectionImageFilter.cu | 4 +-- src/rtkCudaFFTConvolutionImageFilter.cu | 8 ----- src/rtkCudaWarpBackProjectionImageFilter.cu | 4 +-- src/rtkCudaWarpImageFilter.cu | 2 +- 8 files changed, 18 insertions(+), 66 deletions(-) diff --git a/cmake/FindCUDA_wrap.cmake b/cmake/FindCUDA_wrap.cmake index ffb8a5264..d0bb0a398 100644 --- a/cmake/FindCUDA_wrap.cmake +++ b/cmake/FindCUDA_wrap.cmake @@ -3,7 +3,7 @@ if (MINGW) # Cuda doesn't work with mingw at all set (CUDA_FOUND FALSE) -elseif (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 2.8) +elseif (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) # FindCuda is included with CMake 2.8 set (CUDA_FOUND FALSE) else () @@ -37,13 +37,6 @@ endif () set (CUDA_FOUND ${CUDA_FOUND} CACHE BOOL "Did we find cuda?") mark_as_advanced(CUDA_FOUND) -if(CUDA_FOUND) - if(${CUDA_VERSION} LESS 3.2) - message("CUDA version ${CUDA_VERSION} found, too old for RTK") - set(CUDA_FOUND FALSE) - endif() -endif() - if (CUDA_FOUND) cuda_include_directories (${CMAKE_CURRENT_SOURCE_DIR}) endif () @@ -53,28 +46,14 @@ endif () # This script will modify CUDA_NVCC_FLAGS if system default is not gcc-4.3 include (nvcc-check) -if("${CUDA_VERSION}" LESS 6.5) - # set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} - # -gencode arch=compute_10,code=sm_10 - # -gencode arch=compute_11,code=sm_11 - # -gencode arch=compute_12,code=sm_12 - # -gencode arch=compute_13,code=sm_13 - # ) -endif () - -if("${CUDA_VERSION}" LESS 5.0) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} - -gencode arch=compute_20,code=sm_20 - -gencode arch=compute_20,code=compute_20 - ) -elseif("${CUDA_VERSION}" LESS 8.0) +if("${CUDA_VERSION}" VERSION_LESS 8.0) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_35,code=compute_35 ) -elseif("${CUDA_VERSION}" LESS 9.0) +elseif("${CUDA_VERSION}" VERSION_LESS 9.0) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Wno-deprecated-gpu-targets -gencode arch=compute_20,code=sm_20 diff --git a/cmake/nvcc-check.cmake b/cmake/nvcc-check.cmake index f1c238fdd..53bc13adc 100644 --- a/cmake/nvcc-check.cmake +++ b/cmake/nvcc-check.cmake @@ -49,22 +49,22 @@ if(CUDA_FOUND) if(${CUDA_VERSION} VERSION_GREATER "8.99") FIND_GCC(GCC_PATH "6" "4") endif() - if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "6.99") + if(NOT GCC_PATH ) FIND_GCC(GCC_PATH "4" "9") endif() - if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "5.4.99") + if(NOT GCC_PATH ) FIND_GCC(GCC_PATH "4" "8") endif() - if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "5.4.99") + if(NOT GCC_PATH ) FIND_GCC(GCC_PATH "4" "7") endif() - if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "4.1.99") + if(NOT GCC_PATH ) FIND_GCC(GCC_PATH "4" "6") endif() - if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "4.0.99") + if(NOT GCC_PATH ) FIND_GCC(GCC_PATH "4" "5") endif() - if(NOT GCC_PATH AND ${CUDA_VERSION} VERSION_GREATER "3.99") + if(NOT GCC_PATH ) FIND_GCC(GCC_PATH "4" "4") endif() if(NOT GCC_PATH) @@ -78,7 +78,7 @@ if(CUDA_FOUND) endif() if(GCC_PATH) - if(NOT APPLE OR "${CUDA_VERSION}" LESS 7.0) + if(NOT APPLE) message(STATUS "nvcc-check: Found adequate gcc (${GCC_PATH})... telling nvcc to use it!") #Only append option if not already done list (FIND CUDA_NVCC_FLAGS "--compiler-bindir" _index) @@ -90,23 +90,4 @@ if(CUDA_FOUND) message(FATAL_ERROR "nvcc-check: Please install adequate gcc for cuda.\nNote that gcc-4.x can be installed side-by-side with your current version of gcc.\n") endif() endif() - - - if(CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "APPLE") - # For CUDA 3.2: surface_functions.h does some non-compliant things... - # so we tell g++ to ignore them when called via nvcc - # by passing the -fpermissive flag through the nvcc - # build trajectory. Unfortunately, nvcc will also - # blindly pass this flag to gcc, even though it is not - # valid... resulting in TONS of warnings. So, we go - # version checking again, this time nvcc... - # Get the nvcc version number - - # This issue seems to be only if cuda is installed in system so test CUDA_INCLUDE_DIRS - # (see http://nvidia.custhelp.com/app/answers/detail/a_id/2869/~/linux-based-cuda-v3.x-compiler-issue-affecting-cuda-surface-apis) - if(CUDA_VERSION_MAJOR MATCHES "3" AND CUDA_VERSION_MINOR MATCHES "2" AND CUDA_INCLUDE_DIRS MATCHES "/usr/include") - set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --compiler-options='-fpermissive') - message(STATUS "nvcc-check: CUDA 3.2 exception: CUDA_NVCC_FLAGS set to \"${CUDA_NVCC_FLAGS}\"") - endif() - endif() endif() diff --git a/itk-module-init.cmake b/itk-module-init.cmake index 87ca2c1c5..6288695cb 100644 --- a/itk-module-init.cmake +++ b/itk-module-init.cmake @@ -5,7 +5,7 @@ list(APPEND CMAKE_MODULE_PATH ${RTK_SOURCE_DIR}/cmake) find_package(CUDA_wrap QUIET) if(CUDA_FOUND) - if(${CUDA_VERSION} LESS 4.1) + if(${CUDA_VERSION} VERSION_LESS 8.0) message(WARNING "CUDA version ${CUDA_VERSION} is not supported by RTK.") set(RTK_USE_CUDA_DEFAULT OFF) else() diff --git a/src/rtkCudaBackProjectionImageFilter.cu b/src/rtkCudaBackProjectionImageFilter.cu index 937089daf..3bd36e876 100644 --- a/src/rtkCudaBackProjectionImageFilter.cu +++ b/src/rtkCudaBackProjectionImageFilter.cu @@ -281,7 +281,7 @@ CUDA_back_project(int projSize[3], // Allocate array for input projections, in order to bind them to // either a 2D layered texture (requires GetCudaComputeCapability >= 2.0) or // a 3D texture - if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1) + if(GetCudaComputeCapability(device).first<=1) cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent); else cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent, cudaArrayLayered); @@ -308,7 +308,7 @@ CUDA_back_project(int projSize[3], // Run kernels. Note: Projection data is passed via texture memory, // transform matrix is passed via constant memory - if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1) + if(GetCudaComputeCapability(device).first<=1) { // Compute block and grid sizes dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ); diff --git a/src/rtkCudaFDKBackProjectionImageFilter.cu b/src/rtkCudaFDKBackProjectionImageFilter.cu index 45a5a28d8..f451284ef 100644 --- a/src/rtkCudaFDKBackProjectionImageFilter.cu +++ b/src/rtkCudaFDKBackProjectionImageFilter.cu @@ -179,7 +179,7 @@ CUDA_reconstruct_conebeam( // Allocate array for input projections, in order to bind them to // either a 2D layered texture (requires GetCudaComputeCapability >= 2.0) or // a 3D texture - if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1) + if(GetCudaComputeCapability(device).first<=1) cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent); else cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent, cudaArrayLayered); @@ -206,7 +206,7 @@ CUDA_reconstruct_conebeam( // Run kernels. Note: Projection data is passed via texture memory, // transform matrix is passed via constant memory - if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1) + if(GetCudaComputeCapability(device).first<=1) { // Compute block and grid sizes dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ); diff --git a/src/rtkCudaFFTConvolutionImageFilter.cu b/src/rtkCudaFFTConvolutionImageFilter.cu index 36f465eda..c638f3246 100644 --- a/src/rtkCudaFFTConvolutionImageFilter.cu +++ b/src/rtkCudaFFTConvolutionImageFilter.cu @@ -95,10 +95,6 @@ CUDA_fft_convolution(const int3 &inputDimension, else result = cufftPlan3d(&fftFwd, inputDimension.z, inputDimension.y, inputDimension.x, CUFFT_R2C); CUFFT_CHECK_ERROR(result); -#if (CUDA_VERSION<8000) - result = cufftSetCompatibilityMode(fftFwd, CUFFT_COMPATIBILITY_FFTW_ALL); - CUFFT_CHECK_ERROR(result); -#endif result = cufftExecR2C(fftFwd, deviceProjection, deviceProjectionFFT); CUFFT_CHECK_ERROR(result); cufftDestroy(fftFwd); @@ -131,10 +127,6 @@ CUDA_fft_convolution(const int3 &inputDimension, else result = cufftPlan3d(&fftInv, inputDimension.z, inputDimension.y, inputDimension.x, CUFFT_C2R); CUFFT_CHECK_ERROR(result); -#if (CUDA_VERSION<8000) - result = cufftSetCompatibilityMode(fftInv, CUFFT_COMPATIBILITY_FFTW_ALL); - CUFFT_CHECK_ERROR(result); -#endif result = cufftExecC2R(fftInv, deviceProjectionFFT, deviceProjection); CUFFT_CHECK_ERROR(result); diff --git a/src/rtkCudaWarpBackProjectionImageFilter.cu b/src/rtkCudaWarpBackProjectionImageFilter.cu index b191b1df6..94bb9c32f 100644 --- a/src/rtkCudaWarpBackProjectionImageFilter.cu +++ b/src/rtkCudaWarpBackProjectionImageFilter.cu @@ -356,7 +356,7 @@ CUDA_warp_back_project(int projSize[3], // Allocate array for input projections, in order to bind them to // either a 2D layered texture (requires GetCudaComputeCapability >= 2.0) or // a 3D texture - if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1) + if(GetCudaComputeCapability(device).first<=1) cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent); else cudaMalloc3DArray((cudaArray**)&array_proj, &channelDesc, projExtent, cudaArrayLayered); @@ -452,7 +452,7 @@ CUDA_warp_back_project(int projSize[3], // Run kernels. Note: Projection data is passed via texture memory, // transform matrix is passed via constant memory - if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1) + if(GetCudaComputeCapability(device).first<=1) { // Compute block and grid sizes dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ); diff --git a/src/rtkCudaWarpImageFilter.cu b/src/rtkCudaWarpImageFilter.cu index 4b00e27cf..32464b823 100644 --- a/src/rtkCudaWarpImageFilter.cu +++ b/src/rtkCudaWarpImageFilter.cu @@ -296,7 +296,7 @@ CUDA_warp(int input_vol_dim[3], unsigned int blocksInY = (output_vol_dim[1]-1)/tBlock_y + 1; unsigned int blocksInZ = (output_vol_dim[2]-1)/tBlock_z + 1; - if(CUDA_VERSION<4000 || GetCudaComputeCapability(device).first<=1) + if(GetCudaComputeCapability(device).first<=1) { dim3 dimGrid = dim3(blocksInX, blocksInY*blocksInZ); dim3 dimBlock = dim3(tBlock_x, tBlock_y, tBlock_z);