Hi all -- a follow up on the previous messages of this thread. I have the same error, i.e. "All CUDA-capable devices are busy or unavailable". I have tried to follow the recommendations given in various threads of this forum, but to no avail.
My GPU is a Tesla P100, which is a Pascal architecture with compute capability 60 ("sm_60" or "compute_60"), and therefore requires CUDA 8.0 or newer. I am using the CUDA 10.1, which is almost the latest (10.2 is the latest). This is the CUDA_ARCH variable in my Makefile (entire Makefile is shown at the end of this post, which I have modified a little from the original K-WAVE Makefile):
CUDA_ARCH = --generate-code arch=compute_30,code=sm_30 \
--generate-code arch=compute_32,code=sm_32 \
--generate-code arch=compute_35,code=sm_35 \
--generate-code arch=compute_37,code=sm_37 \
--generate-code arch=compute_50,code=sm_50 \
--generate-code arch=compute_52,code=sm_52 \
--generate-code arch=compute_53,code=sm_53 \
--generate-code arch=compute_60,code=sm_60 \
--generate-code arch=compute_61,code=sm_61 \
--generate-code arch=compute_62,code=sm_62 \
--generate-code arch=compute_70,code=sm_70 \
--generate-code arch=compute_72,code=sm_72 \
--generate-code arch=compute_75,code=sm_75
I am not sure what I am missing... Thanks for your input!
Bastien
************************* ERROR MESSAGE *************************
deepbrain:guerin[152] ../../KWAVE_1.2.1/src/kspaceFirstOrder-CUDA/kspaceFirstOrder-CUDA -i kwave3D_N104_650KHZ_0p75MM_PAR.h5 -o kwave3D_N104_650KHZ_0p75MM_SOL.h5
┌───────────────────────────────────────────────────────────────┐
│ kspaceFirstOrder-CUDA v1.3 │
├───────────────────────────────────────────────────────────────┤
│ Reading simulation configuration: Done │
│ Selected GPU device id: Failed │
└───────────────────────────────────────────────────────────────┘
┌───────────────────────────────────────────────────────────────┐
│ !!! K-Wave experienced a fatal error !!! │
├───────────────────────────────────────────────────────────────┤
│ Error: All CUDA-capable devices are busy or unavailable. │
├───────────────────────────────────────────────────────────────┤
│ Execution terminated │
└───────────────────────────────────────────────────────────────┘
deepbrain:guerin[153]
*****************************************************************
************************* OUTPUT OF NVIDIA-SMI *************************
Fri May 15 10:57:31 2020
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67 Driver Version: 418.67 CUDA Version: 10.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE... Off | 00000000:04:00.0 Off | 0 |
| N/A 33C P0 26W / 250W | 0MiB / 16280MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
*****************************************************************
************************* MAKEFILE *************************
# Select compiler
# GNU is default due to Intel 2018's compatibility issues with Ubuntu 18.04
COMPILER = GNU
#COMPILER = Intel
# SEMI static lining is default since it is expected the binary will run on the
# same system.
# Everything will be linked statically, may not work on all GPUs
#LINKING = STATIC
# Everything will be linked dynamically
#LINKING = DYNAMIC
# Everything but CUDA will be linked statically
LINKING = SEMI
# Set up paths: If using modules, the paths are set up automatically,
# otherwise, set paths manually
CUDA_DIR = /usr/pubsw/packages/CUDA/10.1
HDF5_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5
ZLIB_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/zlib-1.2.11
SZIP_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/szip-2.1.1
# Select CPU architecture (what instruction set to be used).
# The native architecture will compile and optimize the code for the underlying
# processor.
CPU_ARCH = native
#CPU_ARCH = AVX
#CPU_ARCH = AVX2
#CPU_ARCH = AVX512
############################### Common flags ###################################
# Git hash of release 1.3
GIT_HASH = -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\"
# Replace tabs by spaces
.RECIPEPREFIX +=
# What CUDA GPU architectures to include in the binary
CUDA_ARCH = --generate-code arch=compute_30,code=sm_30 \
--generate-code arch=compute_32,code=sm_32 \
--generate-code arch=compute_35,code=sm_35 \
--generate-code arch=compute_37,code=sm_37 \
--generate-code arch=compute_50,code=sm_50 \
--generate-code arch=compute_52,code=sm_52 \
--generate-code arch=compute_53,code=sm_53 \
--generate-code arch=compute_60,code=sm_60 \
--generate-code arch=compute_61,code=sm_61 \
--generate-code arch=compute_62,code=sm_62 \
--generate-code arch=compute_70,code=sm_70 \
--generate-code arch=compute_72,code=sm_72 \
--generate-code arch=compute_75,code=sm_75
# What libraries to link and how
ifeq ($(LINKING), STATIC)
LDLIBS = $(HDF5_DIR)/lib/libhdf5_hl.a \
$(HDF5_DIR)/lib/libhdf5.a \
$(CUDA_DIR)/lib64/libcufft_static.a \
$(CUDA_DIR)/lib64/libculibos.a \
$(CUDA_DIR)/lib64/libcudart_static.a \
$(ZLIB_DIR)/lib/libz.a \
$(SZIP_DIR)/lib/libsz.a \
-ldl
else ifeq ($(LINKING), DYNAMIC)
LDLIBS = -lhdf5 -lhdf5_hl -lz -lcufft
else ifeq ($(LINKING), SEMI)
LDLIBS = $(HDF5_DIR)/lib/libhdf5_hl.a \
$(HDF5_DIR)/lib/libhdf5.a \
$(ZLIB_DIR)/lib/libz.a \
$(SZIP_DIR)/lib/libsz.a \
-lcufft \
-ldl
endif
############################## NVCC + GNU g++ ##################################
ifeq ($(COMPILER), GNU)
# C++ compiler for CUDA
CXX = /usr/pubsw/packages/CUDA/10.0/bin/nvcc
# C++ standard
CPP_STD = -std=c++11
# Enable OpenMP
OPENMP = -fopenmp
# Set CPU architecture
# Sandy Bridge, Ivy Bridge
ifeq ($(CPU_ARCH), AVX)
CPU_FLAGS = -m64 -mavx
# Haswell, Broadwell
else ifeq ($(CPU_ARCH), AVX2)
CPU_FLAGS = -m64 -mavx2
# Skylake-X, Ice Lake, Cannon Lake
else ifeq ($(CPU_ARCH), AVX512)
CPU_FLAGS = -m64 -mavx512f
# Maximum performance for this CPU
else
CPU_FLAGS = -m64 -march=native -mtune=native
endif
# Use maximum optimization
CPU_OPT = -O3 -ffast-math -fassociative-math
# Use maximum optimization
GPU_OPT = -O3
# CPU Debug flags
CPU_DEBUG =
# Debug flags
GPU_DEBUG =
# Profile flags
PROFILE =
# C++ warning flags
WARNING = -Wall
# Add include directories
INCLUDES = -I$(HDF5_DIR)/include -I.
# Add library directories
LIB_PATHS = -L$(HDF5_DIR)/lib -L$(CUDA_DIR)/lib64
# Set compiler flags and header files directories
CXXFLAGS = -Xcompiler="$(CPU_FLAGS) $(CPU_OPT) $(OPENMP) \
$(CPU_DEBUG) $(PROFILE) $(WARNING)"\
$(GPU_OPT) $(CPP_STD) $(GPU_DEBUG) \
$(GIT_HASH) \
$(INCLUDES) \
--device-c --restrict
# Set linker flags and library files directories
LDFLAGS = -Xcompiler="$(OPENMP)" \
-Xlinker="-rpath,$(HDF5_DIR)/lib:$(CUDA_DIR)/lib64" \
-std=c++11 \
$(LIB_PATHS)
endif
############################ NVCC + Intel icpc #################################
ifeq ($(COMPILER), Intel)
# C++ compiler for CUDA
CXX = /usr/pubsw/packages/CUDA/10.0/bin/nvcc
# C++ standard
CPP_STD = -std=c++11
# Enable OpenMP
OPENMP = -qopenmp
# Set CPU architecture
# Sandy Bridge, Ivy Bridge
ifeq ($(CPU_ARCH), AVX)
CPU_FLAGS = -m64 -xAVX
# Haswell, Broadwell
else ifeq ($(CPU_ARCH), AVX2)
CPU_FLAGS = -m64 -xCORE-AVX2
# Skylake-X, Ice Lake, Cannon Lake
else ifeq ($(CPU_ARCH), AVX512)
CPU_FLAGS = -m64 -xCORE-AVX512
# Maximum performance for this CPU
else
CPU_FLAGS = -m64 -xhost
endif
# Use maximum optimization
CPU_OPT = -Ofast
# Use maximum optimization
GPU_OPT = -O3
# CPU Debug flags
CPU_DEBUG =
# Debug flags
GPU_DEBUG =
# Profile flags
PROFILE =
# C++ warning flags
WARNING = -Wall
# Add include directories
INCLUDES = -I$(HDF5_DIR)/include -I.
# Add library directories
LIB_PATHS = -L$(HDF5_DIR)/lib -L$(CUDA_DIR)/lib64
# Set compiler flags and header files directories
CXXFLAGS = -Xcompiler="$(CPU_FLAGS) $(CPU_OPT) $(OPENMP) \
$(CPU_DEBUG) $(PROFILE) $(WARNING)" \
$(GPU_OPT) $(CPP_STD) $(GPU_DEBUG) \
$(GIT_HASH) \
$(INCLUDES) \
--device-c --restrict -ccbin=icpc
# Set linker flags and library files directories
ifneq ($(LINKING), DYNAMIC)
LDFLAGS = -Xcompiler="$(OPENMP) -static-intel -qopenmp-link=static"
else
LDFLAGS = -Xcompiler="$(OPENMP)"
endif
LDFLAGS += -std=c++11 -ccbin=icpc \
-Xlinker="-rpath,$(HDF5_DIR)/lib:$(CUDA_DIR)/lib64" \
$(LIB_PATHS)
endif
################################### Build ######################################
# Target binary name
TARGET = kspaceFirstOrder-CUDA
# Units to be compiled
DEPENDENCIES = main.o \
Containers/MatrixContainer.o \
Containers/CudaMatrixContainer.o \
Containers/OutputStreamContainer.o \
Hdf5/Hdf5File.o \
Hdf5/Hdf5FileHeader.o \
KSpaceSolver/KSpaceFirstOrderSolver.o \
KSpaceSolver/SolverCudaKernels.o \
Logger/Logger.o \
MatrixClasses/BaseFloatMatrix.o \
MatrixClasses/BaseIndexMatrix.o \
MatrixClasses/CufftComplexMatrix.o \
MatrixClasses/ComplexMatrix.o \
MatrixClasses/IndexMatrix.o \
MatrixClasses/RealMatrix.o \
MatrixClasses/TransposeCudaKernels.o \
OutputStreams/BaseOutputStream.o \
OutputStreams/IndexOutputStream.o \
OutputStreams/CuboidOutputStream.o \
OutputStreams/WholeDomainOutputStream.o \
OutputStreams/OutputStreamsCudaKernels.o \
Parameters/CommandLineParameters.o \
Parameters/Parameters.o \
Parameters/CudaParameters.o \
Parameters/CudaDeviceConstants.o
# Build target
all: $(TARGET)
# Link target
$(TARGET): $(DEPENDENCIES)
$(CXX) $(LDFLAGS) $(DEPENDENCIES) $(LDLIBS) -o $@
# Compile CPU units
%.o: %.cpp
$(CXX) $(CXXFLAGS) -o $@ -c $<
# Compile CUDA units
%.o: %.cu
$(CXX) $(CXXFLAGS) $(CUDA_ARCH) -o $@ -c $<
# Clean repository
.PHONY: clean
clean:
rm -f $(DEPENDENCIES) $(TARGET)
(END)