Actual source code: cycliccuda.cu
slepc-3.16.3 2022-04-11
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc singular value solver: "cyclic" (CUDA implementation)
12: */
13: #include <slepc/private/svdimpl.h>
14: #include "../src/svd/impls/cyclic/cyclic.h"
16: PetscErrorCode MatMult_Cyclic_CUDA(Mat B,Vec x,Vec y)
17: {
18: PetscErrorCode ierr;
19: SVD_CYCLIC_SHELL *ctx;
20: const PetscScalar *d_px;
21: PetscScalar *d_py;
22: PetscInt m;
25: MatShellGetContext(B,&ctx);
26: MatGetLocalSize(ctx->A,&m,NULL);
27: VecCUDAGetArrayRead(x,&d_px);
28: VecCUDAGetArrayWrite(y,&d_py);
29: VecCUDAPlaceArray(ctx->x1,d_px);
30: VecCUDAPlaceArray(ctx->x2,d_px+m);
31: VecCUDAPlaceArray(ctx->y1,d_py);
32: VecCUDAPlaceArray(ctx->y2,d_py+m);
33: MatMult(ctx->A,ctx->x2,ctx->y1);
34: MatMult(ctx->AT,ctx->x1,ctx->y2);
35: VecCUDAResetArray(ctx->x1);
36: VecCUDAResetArray(ctx->x2);
37: VecCUDAResetArray(ctx->y1);
38: VecCUDAResetArray(ctx->y2);
39: VecCUDARestoreArrayRead(x,&d_px);
40: VecCUDARestoreArrayWrite(y,&d_py);
41: return(0);
42: }
44: PetscErrorCode MatMult_ECross_CUDA(Mat B,Vec x,Vec y)
45: {
46: PetscErrorCode ierr;
47: SVD_CYCLIC_SHELL *ctx;
48: const PetscScalar *d_px;
49: PetscScalar *d_py;
50: PetscInt mn,m,n;
53: MatShellGetContext(B,&ctx);
54: MatGetLocalSize(ctx->A,NULL,&n);
55: VecGetLocalSize(y,&mn);
56: m = mn-n;
57: VecCUDAGetArrayRead(x,&d_px);
58: VecCUDAGetArrayWrite(y,&d_py);
59: VecCUDAPlaceArray(ctx->x1,d_px);
60: VecCUDAPlaceArray(ctx->x2,d_px+m);
61: VecCUDAPlaceArray(ctx->y1,d_py);
62: VecCUDAPlaceArray(ctx->y2,d_py+m);
63: VecCopy(ctx->x1,ctx->y1);
64: MatMult(ctx->A,ctx->x2,ctx->w);
65: MatMult(ctx->AT,ctx->w,ctx->y2);
66: VecCUDAResetArray(ctx->x1);
67: VecCUDAResetArray(ctx->x2);
68: VecCUDAResetArray(ctx->y1);
69: VecCUDAResetArray(ctx->y2);
70: VecCUDARestoreArrayRead(x,&d_px);
71: VecCUDARestoreArrayWrite(y,&d_py);
72: return(0);
73: }