从源码编译 rocSolver
本人只操作过单个rocm版本的情景,20240218 ubuntu 22.04.01
https://docs.amd.com/en/docs-5.1.3/deploy/linux/os-native/uninstall.html
# Uninstall single-version ROCm packages
sudo apt autoremove rocm-core
# Uninstall Kernel-mode Driver
sudo apt autoremove amdgpu-dkms
# remove apt source
sudo rm /etc/apt/sources.list.d/.list
sudo rm /etc/apt/sources.list.d/.list
sudo rm /etc/apt/sources.list.d/rocm.list
sudo rm /etc/apt/sources.list.d/amdgpu.list
sudo rm -rf /var/cache/apt/*
sudo apt-get clean all
sudo reboot
https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html#rocm-install-quick
sudo apt install "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)"
# See prerequisites. Adding current user to Video and Render groups
sudo usermod -a -G render,video $LOGNAME
wget https://repo.radeon.com/amdgpu-install/6.0.2/ubuntu/jammy/amdgpu-install_6.0.60002-1_all.deb
sudo apt install ./amdgpu-install_6.0.60002-1_all.deb
sudo apt update
sudo apt install amdgpu-dkms
sudo apt install rocm
sudo reboot
sudo amdgpu-install --usecase=graphics,rocm
sudo reboot
Clang: ... 'cmath' file not found
找不到 #include_next
sudo apt install libstdc++-12-dev
$ git clone --recursive https://github.com/amd/rocm-examples.git
$ cd HIP-Basic/device_query
$ make
$ ./hip_device_query
ex_rocsolver_dgeqrf.cpp
/
// example.cpp source code //
/
#include // for std::min
#include // for size_t
#include
#include
#include // for hip functions
#include // for all the rocsolver C interfaces and type declarations
void init_vector(double* A, int n)
{
for(int i=0; i hA(size_A); // creates array for matrix in CPU
std::vector hIpiv(size_piv); // creates array for householder scalars in CPU
init_vector(hA.data(), size_A);
memset(hIpiv.data(), 0, size_piv*sizeof(double));
print_matrix(hA.data(), M, N, lda);
double *dA, *dIpiv;
hipMalloc(&dA, sizeof(double)*size_A); // allocates memory for matrix in GPU
hipMalloc(&dIpiv, sizeof(double)*size_piv); // allocates memory for scalars in GPU
// here is where you would initialize matrix A (array hA) with input data
// note: matrices must be stored in column major format,
// i.e. entry (i,j) should be accessed by hA[i + j*lda]
// copy data to GPU
hipMemcpy(dA, hA.data(), sizeof(double)*size_A, hipMemcpyHostToDevice);
// compute the QR factorization on the GPU
rocsolver_dgeqrf(handle, M, N, dA, lda, dIpiv);
// copy the results back to CPU
hipMemcpy(hA.data(), dA, sizeof(double)*size_A, hipMemcpyDeviceToHost);
hipMemcpy(hIpiv.data(), dIpiv, sizeof(double)*size_piv, hipMemcpyDeviceToHost);
printf("\nR =\n");
print_matrix(hA.data(), M, N, lda);
printf("\ntau=\n");
print_matrix(hIpiv.data(), 1, N, 1);
// the results are now in hA and hIpiv, so you can use them here
hipFree(dA); // de-allocate GPU memory
hipFree(dIpiv);
rocblas_destroy_handle(handle); // destroy handle
}
Makefile:
EXE := ex_rocsolver_dgeqrf
all: $(EXE)
INC := -I /opt/rocm/include -D__HIP_PLATFORM_AMD__
LD_FLAGS := -L /opt/rocm/lib -lamdhip64 -lrocblas -lrocsolver
ex_rocsolver_dgeqrf.o: ex_rocsolver_dgeqrf.cpp
g++ $< $(INC) -c -o $@
ex_rocsolver_dgeqrf: ex_rocsolver_dgeqrf.o
g++ $< $(LD_FLAGS) -o $@
.PHONY: clean
clean:
${RM} *.o $(EXE)
运行效果:
使用matlab对结果做验证: