Hi -- I am trying to compile the CUDA code from scratch since the binaries require the GLIBC_2.27 library which I don't have and is hard to grab on my system.
The compilation goes well but fails at linking. This is a bit strange... All the *.o files are where they are supposed to be, but I get a ton of "undefined reference" errors. I am using CentOS Linux release 7.7.1908.
Here is my Makefile:
################################################################################
# Set following flags based on your compiler and library paths #
################################################################################
# Select compiler
# GNU is default due to Intel 2018's compatibility issues with Ubuntu 18.04
COMPILER = GNU
#COMPILER = Intel
# SEMI static lining is default since it is expected the binary will run on the
# same system.
# Everything will be linked statically, may not work on all GPUs
#LINKING = STATIC
# Everything will be linked dynamically
#LINKING = DYNAMIC
# Everything but CUDA will be linked statically
LINKING = SEMI
# Set up paths: If using modules, the paths are set up automatically,
# otherwise, set paths manually
CUDA_DIR = /usr/pubsw/packages/CUDA/10.0
HDF5_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5
ZLIB_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/zlib-1.2.11
SZIP_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/szip-2.1.1
# Select CPU architecture (what instruction set to be used).
# The native architecture will compile and optimize the code for the underlying
# processor.
CPU_ARCH = native
#CPU_ARCH = AVX
#CPU_ARCH = AVX2
#CPU_ARCH = AVX512
############################## Common flags ###################################
# Git hash of release 1.3
GIT_HASH = -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\"
# Replace tabs by spaces
.RECIPEPREFIX +=
# What CUDA GPU architectures to include in the binary
CUDA_ARCH = --generate-code arch=compute_30,code=sm_30 \
--generate-code arch=compute_32,code=sm_32 \
--generate-code arch=compute_35,code=sm_35 \
--generate-code arch=compute_37,code=sm_37 \
--generate-code arch=compute_50,code=sm_50 \
--generate-code arch=compute_52,code=sm_52 \
--generate-code arch=compute_53,code=sm_53 \
--generate-code arch=compute_60,code=sm_60 \
--generate-code arch=compute_61,code=sm_61 \
--generate-code arch=compute_62,code=sm_62 \
--generate-code arch=compute_70,code=sm_70 \
--generate-code arch=compute_72,code=sm_72 \
--generate-code arch=compute_75,code=sm_75
# What libraries to link and how
ifeq ($(LINKING), STATIC)
LDLIBS = $(HDF5_DIR)/lib/libhdf5_hl.a \
$(HDF5_DIR)/lib/libhdf5.a \
$(CUDA_DIR)/lib64/libcufft_static.a \
$(CUDA_DIR)/lib64/libculibos.a \
$(CUDA_DIR)/lib64/libcudart_static.a \
$(ZLIB_DIR)/lib/libz.a \
$(SZIP_DIR)/lib/libsz.a \
-ldl
else ifeq ($(LINKING), DYNAMIC)
LDLIBS = -lhdf5 -lhdf5_hl -lz -lcufft
else ifeq ($(LINKING), SEMI)
LDLIBS = $(HDF5_DIR)/lib/libhdf5_hl.a \
$(HDF5_DIR)/lib/libhdf5.a \
$(ZLIB_DIR)/lib/libz.a \
$(SZIP_DIR)/lib/libsz.a \
-lcufft \
-ldl
endif
############################## NVCC + GNU g++ ##################################
ifeq ($(COMPILER), GNU)
# C++ compiler for CUDA
CXX = /usr/pubsw/packages/CUDA/10.0/bin/nvcc
# C++ standard
CPP_STD = -std=c++11
# Enable OpenMP
OPENMP = -fopenmp
# Set CPU architecture
# Sandy Bridge, Ivy Bridge
ifeq ($(CPU_ARCH), AVX)
CPU_FLAGS = -m64 -mavx
# Haswell, Broadwell
else ifeq ($(CPU_ARCH), AVX2)
CPU_FLAGS = -m64 -mavx2
# Skylake-X, Ice Lake, Cannon Lake
else ifeq ($(CPU_ARCH), AVX512)
CPU_FLAGS = -m64 -mavx512f
# Maximum performance for this CPU
else
CPU_FLAGS = -m64 -march=native -mtune=native
endif
# Use maximum optimization
CPU_OPT = -O3 -ffast-math -fassociative-math
# Use maximum optimization
GPU_OPT = -O3
# CPU Debug flags
CPU_DEBUG =
# Debug flags
GPU_DEBUG =
# Profile flags
PROFILE =
# C++ warning flags
WARNING = -Wall
# Add include directories
INCLUDES = -I$(HDF5_DIR)/include -I.
# Add library directories
LIB_PATHS = -L$(HDF5_DIR)/lib -L$(CUDA_DIR)/lib64
# Set compiler flags and header files directories
CXXFLAGS = -Xcompiler="$(CPU_FLAGS) $(CPU_OPT) $(OPENMP) \
$(CPU_DEBUG) $(PROFILE) $(WARNING)"\
$(GPU_OPT) $(CPP_STD) $(GPU_DEBUG) \
$(GIT_HASH) \
$(INCLUDES) \
--device-c --restrict
# Set linker flags and library files directories
LDFLAGS = -Xcompiler="$(OPENMP)" \
-Xlinker="-rpath,$(HDF5_DIR)/lib:$(CUDA_DIR)/lib64" \
-std=c++11 \
$(LIB_PATHS)
endif
############################ NVCC + Intel icpc #################################
ifeq ($(COMPILER), Intel)
# C++ compiler for CUDA
CXX = /usr/pubsw/packages/CUDA/10.0/bin/nvcc
# C++ standard
CPP_STD = -std=c++11
# Enable OpenMP
OPENMP = -qopenmp
# Set CPU architecture
# Sandy Bridge, Ivy Bridge
ifeq ($(CPU_ARCH), AVX)
CPU_FLAGS = -m64 -xAVX
# Haswell, Broadwell
else ifeq ($(CPU_ARCH), AVX2)
CPU_FLAGS = -m64 -xCORE-AVX2
# Skylake-X, Ice Lake, Cannon Lake
else ifeq ($(CPU_ARCH), AVX512)
CPU_FLAGS = -m64 -xCORE-AVX512
# Maximum performance for this CPU
else
CPU_FLAGS = -m64 -xhost
endif
# Use maximum optimization
CPU_OPT = -Ofast
# Use maximum optimization
GPU_OPT = -O3
# CPU Debug flags
CPU_DEBUG =
# Debug flags
GPU_DEBUG =
# Profile flags
PROFILE =
# C++ warning flags
WARNING = -Wall
# Add include directories
INCLUDES = -I$(HDF5_DIR)/include -I.
# Add library directories
LIB_PATHS = -L$(HDF5_DIR)/lib -L$(CUDA_DIR)/lib64
# Set compiler flags and header files directories
CXXFLAGS = -Xcompiler="$(CPU_FLAGS) $(CPU_OPT) $(OPENMP) \
$(CPU_DEBUG) $(PROFILE) $(WARNING)" \
$(GPU_OPT) $(CPP_STD) $(GPU_DEBUG) \
$(GIT_HASH) \
$(INCLUDES) \
--device-c --restrict -ccbin=icpc
# Set linker flags and library files directories
ifneq ($(LINKING), DYNAMIC)
LDFLAGS = -Xcompiler="$(OPENMP) -static-intel -qopenmp-link=static"
else
LDFLAGS = -Xcompiler="$(OPENMP)"
endif
LDFLAGS += -std=c++11 -ccbin=icpc \
-Xlinker="-rpath,$(HDF5_DIR)/lib:$(CUDA_DIR)/lib64" \
$(LIB_PATHS)
endif
################################### Build ######################################
# Target binary name
TARGET = kspaceFirstOrder-CUDA
# Units to be compiled
DEPENDENCIES = main.o \
Containers/MatrixContainer.o \
Containers/CudaMatrixContainer.o \
Containers/OutputStreamContainer.o \
Hdf5/Hdf5File.o \
Hdf5/Hdf5FileHeader.o \
KSpaceSolver/KSpaceFirstOrderSolver.o \
KSpaceSolver/SolverCudaKernels.o \
Logger/Logger.o \
MatrixClasses/BaseFloatMatrix.o \
MatrixClasses/BaseIndexMatrix.o \
MatrixClasses/CufftComplexMatrix.o \
MatrixClasses/ComplexMatrix.o \
MatrixClasses/IndexMatrix.o \
MatrixClasses/RealMatrix.o \
MatrixClasses/TransposeCudaKernels.o \
OutputStreams/BaseOutputStream.o \
OutputStreams/IndexOutputStream.o \
OutputStreams/CuboidOutputStream.o \
OutputStreams/WholeDomainOutputStream.o \
OutputStreams/OutputStreamsCudaKernels.o \
Parameters/CommandLineParameters.o \
Parameters/Parameters.o \
Parameters/CudaParameters.o \
Parameters/CudaDeviceConstants.o
# Build target
all: $(TARGET)
# Link target
$(TARGET): $(DEPENDENCIES)
$(CXX) $(LDFLAGS) $(DEPENDENCIES) $(LDLIBS) -o $@
# Compile CPU units
%.o: %.cpp
$(CXX) $(CXXFLAGS) -o $@ -c $<
# Compile CUDA units
%.o: %.cu
$(CXX) $(CXXFLAGS) $(CUDA_ARCH) -o $@ -c $<
# Clean repository
.PHONY: clean
clean:
rm -f $(DEPENDENCIES) $(TARGET)
Below are the first linking errors:
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o main.o -c main.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Containers/MatrixContainer.o -c Containers/MatrixContainer.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_32,code=sm_32 --generate-code arch=compute_35,code=sm_35 --generate-code arch=compute_37,code=sm_37 --generate-code arch=compute_50,code=sm_50 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_53,code=sm_53 --generate-code arch=compute_60,code=sm_60 --generate-code arch=compute_61,code=sm_61 --generate-code arch=compute_62,code=sm_62 --generate-code arch=compute_70,code=sm_70 --generate-code arch=compute_72,code=sm_72 --generate-code arch=compute_75,code=sm_75 -o Containers/CudaMatrixContainer.o -c Containers/CudaMatrixContainer.cu
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Containers/OutputStreamContainer.o -c Containers/OutputStreamContainer.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Hdf5/Hdf5File.o -c Hdf5/Hdf5File.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Hdf5/Hdf5FileHeader.o -c Hdf5/Hdf5FileHeader.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o KSpaceSolver/KSpaceFirstOrderSolver.o -c KSpaceSolver/KSpaceFirstOrderSolver.cpp
KSpaceSolver/KSpaceFirstOrderSolver.cpp:2116:0: warning: ignoring #pragma omp simd [-Wunknown-pragmas]
#pragma omp simd
^
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp -Wall" -O3 -std=c++11 -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_32,code=sm_32 --generate-code arch=compute_35,code=sm_35 --generate-code arch=compute_37,code=sm_37 --generate-code arch=compute_50,code=sm_50 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_53,code=sm_53 --generate-code arch=compute_60,code=sm_60 --generate-code arch=compute_61,code=sm_61 --generate-code arch=compute_62,code=sm_62 --generate-code arch=compute_70,code=sm_70 --generate-code arch=compute_72,code=sm_72 --generate-code arch=compute_75,code=sm_75 -o KSpaceSolver/SolverCudaKernels.o -c KSpaceSolver/SolverCudaKernels.cu
Many thanks!