Actual source code: fninvsqrt.c

slepc-3.18.2 2023-01-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Inverse square root function  x^(-1/2)
 12: */

 14: #include <slepc/private/fnimpl.h>
 15: #include <slepcblaslapack.h>

 17: PetscErrorCode FNEvaluateFunction_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
 18: {
 20: #if !defined(PETSC_USE_COMPLEX)
 22: #endif
 23:   *y = 1.0/PetscSqrtScalar(x);
 24:   return 0;
 25: }

 27: PetscErrorCode FNEvaluateDerivative_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
 28: {
 30: #if !defined(PETSC_USE_COMPLEX)
 32: #endif
 33:   *y = -1.0/(2.0*PetscPowScalarReal(x,1.5));
 34:   return 0;
 35: }

 37: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Schur(FN fn,Mat A,Mat B)
 38: {
 39:   PetscBLASInt   n=0,ld,*ipiv,info;
 40:   PetscScalar    *Ba,*Wa;
 41:   PetscInt       m;
 42:   Mat            W;

 44:   FN_AllocateWorkMat(fn,A,&W);
 45:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
 46:   MatDenseGetArray(B,&Ba);
 47:   MatDenseGetArray(W,&Wa);
 48:   /* compute B = sqrtm(A) */
 49:   MatGetSize(A,&m,NULL);
 50:   PetscBLASIntCast(m,&n);
 51:   ld = n;
 52:   FNSqrtmSchur(fn,n,Ba,n,PETSC_FALSE);
 53:   /* compute B = A\B */
 54:   PetscMalloc1(ld,&ipiv);
 55:   PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
 56:   SlepcCheckLapackInfo("gesv",info);
 57:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
 58:   PetscFree(ipiv);
 59:   MatDenseRestoreArray(W,&Wa);
 60:   MatDenseRestoreArray(B,&Ba);
 61:   FN_FreeWorkMat(fn,&W);
 62:   return 0;
 63: }

 65: PetscErrorCode FNEvaluateFunctionMatVec_Invsqrt_Schur(FN fn,Mat A,Vec v)
 66: {
 67:   PetscBLASInt   n=0,ld,*ipiv,info,one=1;
 68:   PetscScalar    *Ba,*Wa;
 69:   PetscInt       m;
 70:   Mat            B,W;

 72:   FN_AllocateWorkMat(fn,A,&B);
 73:   FN_AllocateWorkMat(fn,A,&W);
 74:   MatDenseGetArray(B,&Ba);
 75:   MatDenseGetArray(W,&Wa);
 76:   /* compute B_1 = sqrtm(A)*e_1 */
 77:   MatGetSize(A,&m,NULL);
 78:   PetscBLASIntCast(m,&n);
 79:   ld = n;
 80:   FNSqrtmSchur(fn,n,Ba,n,PETSC_TRUE);
 81:   /* compute B_1 = A\B_1 */
 82:   PetscMalloc1(ld,&ipiv);
 83:   PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&one,Wa,&ld,ipiv,Ba,&ld,&info));
 84:   SlepcCheckLapackInfo("gesv",info);
 85:   PetscFree(ipiv);
 86:   MatDenseRestoreArray(W,&Wa);
 87:   MatDenseRestoreArray(B,&Ba);
 88:   MatGetColumnVector(B,v,0);
 89:   FN_FreeWorkMat(fn,&W);
 90:   FN_FreeWorkMat(fn,&B);
 91:   return 0;
 92: }

 94: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP(FN fn,Mat A,Mat B)
 95: {
 96:   PetscBLASInt   n=0;
 97:   PetscScalar    *T;
 98:   PetscInt       m;

100:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
101:   MatDenseGetArray(B,&T);
102:   MatGetSize(A,&m,NULL);
103:   PetscBLASIntCast(m,&n);
104:   FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_TRUE);
105:   MatDenseRestoreArray(B,&T);
106:   return 0;
107: }

109: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS(FN fn,Mat A,Mat B)
110: {
111:   PetscBLASInt   n=0;
112:   PetscScalar    *T;
113:   PetscInt       m;

115:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
116:   MatDenseGetArray(B,&T);
117:   MatGetSize(A,&m,NULL);
118:   PetscBLASIntCast(m,&n);
119:   FNSqrtmNewtonSchulz(fn,n,T,n,PETSC_TRUE);
120:   MatDenseRestoreArray(B,&T);
121:   return 0;
122: }

124: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi(FN fn,Mat A,Mat B)
125: {
126:   PetscBLASInt   n=0,ld,*ipiv,info;
127:   PetscScalar    *Ba,*Wa;
128:   PetscInt       m;
129:   Mat            W;

131:   FN_AllocateWorkMat(fn,A,&W);
132:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
133:   MatDenseGetArray(B,&Ba);
134:   MatDenseGetArray(W,&Wa);
135:   /* compute B = sqrtm(A) */
136:   MatGetSize(A,&m,NULL);
137:   PetscBLASIntCast(m,&n);
138:   ld = n;
139:   FNSqrtmSadeghi(fn,n,Ba,n);
140:   /* compute B = A\B */
141:   PetscMalloc1(ld,&ipiv);
142:   PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
143:   SlepcCheckLapackInfo("gesv",info);
144:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
145:   PetscFree(ipiv);
146:   MatDenseRestoreArray(W,&Wa);
147:   MatDenseRestoreArray(B,&Ba);
148:   FN_FreeWorkMat(fn,&W);
149:   return 0;
150: }

152: #if defined(PETSC_HAVE_CUDA)
153: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS_CUDA(FN fn,Mat A,Mat B)
154: {
155:   PetscBLASInt   n=0;
156:   PetscScalar    *Ba;
157:   PetscInt       m;

159:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
160:   MatDenseCUDAGetArray(B,&Ba);
161:   MatGetSize(A,&m,NULL);
162:   PetscBLASIntCast(m,&n);
163:   FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_TRUE);
164:   MatDenseCUDARestoreArray(B,&Ba);
165:   return 0;
166: }

168: #if defined(PETSC_HAVE_MAGMA)
169: #include <slepcmagma.h>

171: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
172: {
173:   PetscBLASInt   n=0;
174:   PetscScalar    *T;
175:   PetscInt       m;

177:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
178:   MatDenseCUDAGetArray(B,&T);
179:   MatGetSize(A,&m,NULL);
180:   PetscBLASIntCast(m,&n);
181:   FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_TRUE);
182:   MatDenseCUDARestoreArray(B,&T);
183:   return 0;
184: }

186: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
187: {
188:   PetscBLASInt   n=0,ld,*ipiv;
189:   PetscScalar    *Ba,*Wa;
190:   PetscInt       m;
191:   Mat            W;

193:   FN_AllocateWorkMat(fn,A,&W);
194:   if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
195:   MatDenseCUDAGetArray(B,&Ba);
196:   MatDenseCUDAGetArray(W,&Wa);
197:   /* compute B = sqrtm(A) */
198:   MatGetSize(A,&m,NULL);
199:   PetscBLASIntCast(m,&n);
200:   ld = n;
201:   FNSqrtmSadeghi_CUDAm(fn,n,Ba,n);
202:   /* compute B = A\B */
203:   SlepcMagmaInit();
204:   PetscMalloc1(ld,&ipiv);
205:   PetscCallMAGMA(magma_xgesv_gpu,n,n,Wa,ld,ipiv,Ba,ld);
206:   PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
207:   PetscFree(ipiv);
208:   MatDenseCUDARestoreArray(W,&Wa);
209:   MatDenseCUDARestoreArray(B,&Ba);
210:   FN_FreeWorkMat(fn,&W);
211:   return 0;
212: }
213: #endif /* PETSC_HAVE_MAGMA */
214: #endif /* PETSC_HAVE_CUDA */

216: PetscErrorCode FNView_Invsqrt(FN fn,PetscViewer viewer)
217: {
218:   PetscBool      isascii;
219:   char           str[50];
220:   const char     *methodname[] = {
221:                   "Schur method for inv(A)*sqrtm(A)",
222:                   "Denman-Beavers (product form)",
223:                   "Newton-Schulz iteration",
224:                   "Sadeghi iteration"
225:   };
226:   const int      nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);

228:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
229:   if (isascii) {
230:     if (fn->beta==(PetscScalar)1.0) {
231:       if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer,"  inverse square root: x^(-1/2)\n");
232:       else {
233:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
234:         PetscViewerASCIIPrintf(viewer,"  inverse square root: (%s*x)^(-1/2)\n",str);
235:       }
236:     } else {
237:       SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
238:       if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer,"  inverse square root: %s*x^(-1/2)\n",str);
239:       else {
240:         PetscViewerASCIIPrintf(viewer,"  inverse square root: %s",str);
241:         PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
242:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
243:         PetscViewerASCIIPrintf(viewer,"*(%s*x)^(-1/2)\n",str);
244:         PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
245:       }
246:     }
247:     if (fn->method<nmeth) PetscViewerASCIIPrintf(viewer,"  computing matrix functions with: %s\n",methodname[fn->method]);
248:   }
249:   return 0;
250: }

252: SLEPC_EXTERN PetscErrorCode FNCreate_Invsqrt(FN fn)
253: {
254:   fn->ops->evaluatefunction          = FNEvaluateFunction_Invsqrt;
255:   fn->ops->evaluatederivative        = FNEvaluateDerivative_Invsqrt;
256:   fn->ops->evaluatefunctionmat[0]    = FNEvaluateFunctionMat_Invsqrt_Schur;
257:   fn->ops->evaluatefunctionmat[1]    = FNEvaluateFunctionMat_Invsqrt_DBP;
258:   fn->ops->evaluatefunctionmat[2]    = FNEvaluateFunctionMat_Invsqrt_NS;
259:   fn->ops->evaluatefunctionmat[3]    = FNEvaluateFunctionMat_Invsqrt_Sadeghi;
260: #if defined(PETSC_HAVE_CUDA)
261:   fn->ops->evaluatefunctionmatcuda[2] = FNEvaluateFunctionMat_Invsqrt_NS_CUDA;
262: #if defined(PETSC_HAVE_MAGMA)
263:   fn->ops->evaluatefunctionmatcuda[1] = FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm;
264:   fn->ops->evaluatefunctionmatcuda[3] = FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm;
265: #endif /* PETSC_HAVE_MAGMA */
266: #endif /* PETSC_HAVE_CUDA */
267:   fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Invsqrt_Schur;
268:   fn->ops->view                      = FNView_Invsqrt;
269:   return 0;
270: }