linking error, CUDA code « k-Wave User Forum

Hi -- I am trying to compile the CUDA code from scratch since the binaries require the GLIBC_2.27 library which I don't have and is hard to grab on my system.

The compilation goes well but fails at linking. This is a bit strange... All the *.o files are where they are supposed to be, but I get a ton of "undefined reference" errors. I am using CentOS Linux release 7.7.1908.

Here is my Makefile:

################################################################################
#         Set following flags based on your compiler and library paths         #
################################################################################

# Select compiler
# GNU is default due to Intel 2018's compatibility issues with Ubuntu 18.04
 COMPILER = GNU
#COMPILER = Intel

# SEMI static lining is default since it is expected the binary will run on the
# same system.
# Everything will be linked statically, may not work on all GPUs
#LINKING = STATIC
# Everything will be linked dynamically
#LINKING = DYNAMIC
# Everything but CUDA will be linked statically
LINKING = SEMI

# Set up paths: If using modules, the paths are set up automatically,
#               otherwise, set paths manually
CUDA_DIR = /usr/pubsw/packages/CUDA/10.0
HDF5_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5
ZLIB_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/zlib-1.2.11
SZIP_DIR = /autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/szip-2.1.1

# Select CPU architecture (what instruction set to be used).
# The native architecture will compile and optimize the code for the underlying
# processor.

 CPU_ARCH = native
#CPU_ARCH = AVX
#CPU_ARCH = AVX2
#CPU_ARCH = AVX512
############################## Common flags ###################################
# Git hash of release 1.3
GIT_HASH       = -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\"

# Replace tabs by spaces
.RECIPEPREFIX += 

# What CUDA GPU architectures to include in the binary
CUDA_ARCH = --generate-code arch=compute_30,code=sm_30 \
            --generate-code arch=compute_32,code=sm_32 \
            --generate-code arch=compute_35,code=sm_35 \
            --generate-code arch=compute_37,code=sm_37 \
            --generate-code arch=compute_50,code=sm_50 \
            --generate-code arch=compute_52,code=sm_52 \
            --generate-code arch=compute_53,code=sm_53 \
            --generate-code arch=compute_60,code=sm_60 \
            --generate-code arch=compute_61,code=sm_61 \
            --generate-code arch=compute_62,code=sm_62 \
            --generate-code arch=compute_70,code=sm_70 \
            --generate-code arch=compute_72,code=sm_72 \
            --generate-code arch=compute_75,code=sm_75

# What libraries to link and how
ifeq ($(LINKING), STATIC)
  LDLIBS = $(HDF5_DIR)/lib/libhdf5_hl.a         \
           $(HDF5_DIR)/lib/libhdf5.a            \
           $(CUDA_DIR)/lib64/libcufft_static.a  \
           $(CUDA_DIR)/lib64/libculibos.a       \
           $(CUDA_DIR)/lib64/libcudart_static.a \
           $(ZLIB_DIR)/lib/libz.a               \
           $(SZIP_DIR)/lib/libsz.a              \
           -ldl

else ifeq ($(LINKING), DYNAMIC)
  LDLIBS = -lhdf5 -lhdf5_hl -lz -lcufft

else ifeq ($(LINKING), SEMI)
  LDLIBS = $(HDF5_DIR)/lib/libhdf5_hl.a \
           $(HDF5_DIR)/lib/libhdf5.a    \
           $(ZLIB_DIR)/lib/libz.a       \
           $(SZIP_DIR)/lib/libsz.a      \
           -lcufft                      \
           -ldl
endif

############################## NVCC + GNU g++ ##################################
ifeq ($(COMPILER), GNU)
  # C++ compiler for CUDA
  CXX       = /usr/pubsw/packages/CUDA/10.0/bin/nvcc

  # C++ standard
  CPP_STD   = -std=c++11

  # Enable OpenMP
  OPENMP    = -fopenmp

  # Set CPU architecture
  # Sandy Bridge, Ivy Bridge
  ifeq ($(CPU_ARCH), AVX)
    CPU_FLAGS = -m64 -mavx

  # Haswell, Broadwell
  else ifeq ($(CPU_ARCH), AVX2)
    CPU_FLAGS = -m64 -mavx2

  # Skylake-X, Ice Lake, Cannon Lake
  else ifeq ($(CPU_ARCH), AVX512)
    CPU_FLAGS = -m64 -mavx512f

  # Maximum performance for this CPU
  else
    CPU_FLAGS = -m64 -march=native -mtune=native
  endif

  # Use maximum optimization
  CPU_OPT   = -O3 -ffast-math -fassociative-math
  # Use maximum optimization
  GPU_OPT   = -O3

  # CPU Debug flags
  CPU_DEBUG =
  # Debug flags
  GPU_DEBUG =
  # Profile flags
  PROFILE   =
  # C++ warning flags
  WARNING   = -Wall
 # Add include directories
  INCLUDES  = -I$(HDF5_DIR)/include -I.
  # Add library directories
  LIB_PATHS = -L$(HDF5_DIR)/lib -L$(CUDA_DIR)/lib64

  # Set compiler flags and header files directories
  CXXFLAGS  = -Xcompiler="$(CPU_FLAGS) $(CPU_OPT) $(OPENMP)  \
                          $(CPU_DEBUG) $(PROFILE) $(WARNING)"\
              $(GPU_OPT) $(CPP_STD) $(GPU_DEBUG) \
              $(GIT_HASH)                        \
              $(INCLUDES)                        \
              --device-c --restrict

  # Set linker flags and library files directories
  LDFLAGS   = -Xcompiler="$(OPENMP)" \
              -Xlinker="-rpath,$(HDF5_DIR)/lib:$(CUDA_DIR)/lib64" \
              -std=c++11             \
               $(LIB_PATHS)
endif

############################ NVCC + Intel icpc #################################
ifeq ($(COMPILER), Intel)
  # C++ compiler for CUDA
  CXX       = /usr/pubsw/packages/CUDA/10.0/bin/nvcc

  # C++ standard
  CPP_STD   = -std=c++11

  # Enable OpenMP
  OPENMP    = -qopenmp

  # Set CPU architecture
  # Sandy Bridge, Ivy Bridge
  ifeq ($(CPU_ARCH), AVX)
    CPU_FLAGS = -m64 -xAVX

  # Haswell, Broadwell
  else ifeq ($(CPU_ARCH), AVX2)
    CPU_FLAGS = -m64 -xCORE-AVX2

  # Skylake-X, Ice Lake, Cannon Lake
  else ifeq ($(CPU_ARCH), AVX512)
    CPU_FLAGS = -m64 -xCORE-AVX512
  # Maximum performance for this CPU
  else
    CPU_FLAGS = -m64 -xhost
  endif

  # Use maximum optimization
  CPU_OPT   = -Ofast
  # Use maximum optimization
  GPU_OPT   = -O3

  # CPU Debug flags
  CPU_DEBUG =
  # Debug flags
  GPU_DEBUG =
  # Profile flags
  PROFILE   =
  # C++ warning flags
  WARNING   = -Wall

  # Add include directories
  INCLUDES  = -I$(HDF5_DIR)/include -I.
  # Add library directories
  LIB_PATHS = -L$(HDF5_DIR)/lib -L$(CUDA_DIR)/lib64

  # Set compiler flags and header files directories
  CXXFLAGS  = -Xcompiler="$(CPU_FLAGS) $(CPU_OPT) $(OPENMP)   \
                          $(CPU_DEBUG) $(PROFILE) $(WARNING)" \
              $(GPU_OPT) $(CPP_STD) $(GPU_DEBUG) \
              $(GIT_HASH)                        \
              $(INCLUDES)                        \
              --device-c --restrict -ccbin=icpc

  # Set linker flags and library files directories
  ifneq ($(LINKING), DYNAMIC)
    LDFLAGS = -Xcompiler="$(OPENMP) -static-intel -qopenmp-link=static"
  else
    LDFLAGS = -Xcompiler="$(OPENMP)"
  endif

  LDFLAGS  += -std=c++11 -ccbin=icpc \
              -Xlinker="-rpath,$(HDF5_DIR)/lib:$(CUDA_DIR)/lib64" \
              $(LIB_PATHS)
endif
################################### Build ######################################
# Target binary name
TARGET       = kspaceFirstOrder-CUDA

# Units to be compiled
DEPENDENCIES = main.o                                   \
               Containers/MatrixContainer.o             \
               Containers/CudaMatrixContainer.o         \
               Containers/OutputStreamContainer.o       \
               Hdf5/Hdf5File.o                          \
               Hdf5/Hdf5FileHeader.o                    \
               KSpaceSolver/KSpaceFirstOrderSolver.o    \
               KSpaceSolver/SolverCudaKernels.o         \
               Logger/Logger.o                          \
               MatrixClasses/BaseFloatMatrix.o          \
               MatrixClasses/BaseIndexMatrix.o          \
               MatrixClasses/CufftComplexMatrix.o       \
               MatrixClasses/ComplexMatrix.o            \
               MatrixClasses/IndexMatrix.o              \
               MatrixClasses/RealMatrix.o               \
               MatrixClasses/TransposeCudaKernels.o     \
               OutputStreams/BaseOutputStream.o         \
               OutputStreams/IndexOutputStream.o        \
               OutputStreams/CuboidOutputStream.o       \
               OutputStreams/WholeDomainOutputStream.o  \
               OutputStreams/OutputStreamsCudaKernels.o \
               Parameters/CommandLineParameters.o       \
               Parameters/Parameters.o                  \
               Parameters/CudaParameters.o              \
               Parameters/CudaDeviceConstants.o

# Build target
all: $(TARGET)

# Link target
$(TARGET): $(DEPENDENCIES)
  $(CXX) $(LDFLAGS) $(DEPENDENCIES) $(LDLIBS) -o $@

# Compile CPU units
%.o: %.cpp
  $(CXX) $(CXXFLAGS) -o $@ -c $<

# Compile CUDA units
%.o: %.cu
  $(CXX) $(CXXFLAGS) $(CUDA_ARCH) -o $@ -c $<

# Clean repository
.PHONY: clean
clean:
  rm -f $(DEPENDENCIES) $(TARGET)

Below are the first linking errors:

/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o main.o -c main.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Containers/MatrixContainer.o -c Containers/MatrixContainer.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_32,code=sm_32 --generate-code arch=compute_35,code=sm_35 --generate-code arch=compute_37,code=sm_37 --generate-code arch=compute_50,code=sm_50 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_53,code=sm_53 --generate-code arch=compute_60,code=sm_60 --generate-code arch=compute_61,code=sm_61 --generate-code arch=compute_62,code=sm_62 --generate-code arch=compute_70,code=sm_70 --generate-code arch=compute_72,code=sm_72 --generate-code arch=compute_75,code=sm_75 -o Containers/CudaMatrixContainer.o -c Containers/CudaMatrixContainer.cu
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Containers/OutputStreamContainer.o -c Containers/OutputStreamContainer.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Hdf5/Hdf5File.o -c Hdf5/Hdf5File.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o Hdf5/Hdf5FileHeader.o -c Hdf5/Hdf5FileHeader.cpp
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict -o KSpaceSolver/KSpaceFirstOrderSolver.o -c KSpaceSolver/KSpaceFirstOrderSolver.cpp
KSpaceSolver/KSpaceFirstOrderSolver.cpp:2116:0: warning: ignoring #pragma omp simd [-Wunknown-pragmas]
         #pragma omp simd
 ^
/usr/pubsw/packages/CUDA/10.0/bin/nvcc -Xcompiler="-m64 -march=native -mtune=native -O3 -ffast-math -fassociative-math -fopenmp   -Wall" -O3 -std=c++11  -D__KWAVE_GIT_HASH__=\"468dc31c2842a7df5f2a07c3a13c16c9b0b2b770\" -I/autofs/space/guerin/USneuromod/KWAVE_1.2.1/src/hdf5-1.12.0/hdf5/include -I. --device-c --restrict --generate-code arch=compute_30,code=sm_30 --generate-code arch=compute_32,code=sm_32 --generate-code arch=compute_35,code=sm_35 --generate-code arch=compute_37,code=sm_37 --generate-code arch=compute_50,code=sm_50 --generate-code arch=compute_52,code=sm_52 --generate-code arch=compute_53,code=sm_53 --generate-code arch=compute_60,code=sm_60 --generate-code arch=compute_61,code=sm_61 --generate-code arch=compute_62,code=sm_62 --generate-code arch=compute_70,code=sm_70 --generate-code arch=compute_72,code=sm_72 --generate-code arch=compute_75,code=sm_75 -o KSpaceSolver/SolverCudaKernels.o -c KSpaceSolver/SolverCudaKernels.cu

Many thanks!

k-Wave

A MATLAB toolbox for the time-domain
simulation of acoustic wave fields

linking error, CUDA code

Reply

k-Wave

A MATLAB toolbox for the time-domain simulation of acoustic wave fields

linking error, CUDA code

Reply

A MATLAB toolbox for the time-domain
simulation of acoustic wave fields