sudo apt install python3.10-venv
sudo apt install libmsgpack-dev
sudo pip install joblib
git clone --recursive https://github.com/ROCm/rocBLAS.git
$ cd rocBLAS/
$ ./install.sh -i -g
构建时间也不短
git clone --recursive [email protected]:ROCm/rocSOLVER.git
cd rocSOLVER/
~/ex_rocm/rocSOLVER$ ./install.sh -i -g --install_dir ../local/ --rocblas_dir /opt/rocm/lib
这个编译时间真的长,3个小时的样子,主要是99%后花了两个小时多,跟计算机性能关系不大。
ex_rocsolver_dgeqrf.cpp
/
// example.cpp source code //
/
#include // for std::min
#include // for size_t
#include
#include
#include // for hip functions
#include // for all the rocsolver C interfaces and type declarations
void init_vector(double* A, int n)
{
for(int i=0; i hA(size_A); // creates array for matrix in CPU
std::vector hIpiv(size_piv); // creates array for householder scalars in CPU
init_vector(hA.data(), size_A);
memset(hIpiv.data(), 0, size_piv*sizeof(double));
print_matrix(hA.data(), M, N, lda);
double *dA, *dIpiv;
hipMalloc(&dA, sizeof(double)*size_A); // allocates memory for matrix in GPU
hipMalloc(&dIpiv, sizeof(double)*size_piv); // allocates memory for scalars in GPU
// here is where you would initialize matrix A (array hA) with input data
// note: matrices must be stored in column major format,
// i.e. entry (i,j) should be accessed by hA[i + j*lda]
// copy data to GPU
hipMemcpy(dA, hA.data(), sizeof(double)*size_A, hipMemcpyHostToDevice);
// compute the QR factorization on the GPU
rocsolver_dgeqrf(handle, M, N, dA, lda, dIpiv);
// copy the results back to CPU
hipMemcpy(hA.data(), dA, sizeof(double)*size_A, hipMemcpyDeviceToHost);
hipMemcpy(hIpiv.data(), dIpiv, sizeof(double)*size_piv, hipMemcpyDeviceToHost);
printf("\nR =\n");
print_matrix(hA.data(), M, N, lda);
printf("\ntau=\n");
print_matrix(hIpiv.data(), 1, N, 1);
// the results are now in hA and hIpiv, so you can use them here
hipFree(dA); // de-allocate GPU memory
hipFree(dIpiv);
rocblas_destroy_handle(handle); // destroy handle
}
Makefile
EXE := ex_rocsolver_dgeqrf
all: $(EXE)
INC := -I /home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/include/rocsolver -D__HIP_PLATFORM_AMD__
LD_FLAGS := -L /home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/lib -lamdhip64 -lrocblas -lrocsolver
ex_rocsolver_dgeqrf.o: ex_rocsolver_dgeqrf.cpp
g++ -g $< $(INC) -c -o $@
ex_rocsolver_dgeqrf: ex_rocsolver_dgeqrf.o
g++ -g $< $(LD_FLAGS) -o $@
.PHONY: clean
clean:
${RM} *.o $(EXE)
export LD_LIBRARY_PATH=/home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/lib
37 ROCSOLVER_LAUNCH_KERNEL(set_diag
(gdb)
137 ROCSOLVER_LAUNCH_KERNEL(set_diag
(gdb)
145 if(j < n - 1)
(gdb)
147 rocsolver_larf_template(handle, rocblas_side_left, m - j, n - j - 1, A,
(gdb)
154 ROCSOLVER_LAUNCH_KERNEL(restore_diag
(gdb)
154 ROCSOLVER_LAUNCH_KERNEL(restore_diag
(gdb)
129 for(rocblas_int j = 0; j < dim; ++j)
(gdb)
132 rocsolver_larfg_template(handle, m - j, A, shiftA + idx2D(j, j, lda), A,
(gdb)
137 ROCSOLVER_LAUNCH_KERNEL(set_diag
(gdb)
137 ROCSOLVER_LAUNCH_KERNEL(set_diag
(gdb)
145 if(j < n - 1)
(gdb)
154 ROCSOLVER_LAUNCH_KERNEL(restore_diag
(gdb)
154 ROCSOLVER_LAUNCH_KERNEL(restore_diag
(gdb)
129 for(rocblas_int j = 0; j < dim; ++j)
(gdb)
163 }
(gdb)
rocsolver_geqrf_template
174 }
(gdb)