Actual source code: fnsqrt.c

slepc-3.16.3 2022-04-11
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Square root function  sqrt(x)
 12: */

 14: #include <slepc/private/fnimpl.h>
 15: #include <slepcblaslapack.h>

 17: PetscErrorCode FNEvaluateFunction_Sqrt(FN fn,PetscScalar x,PetscScalar *y)
 18: {
 20: #if !defined(PETSC_USE_COMPLEX)
 21:   if (x<0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
 22: #endif
 23:   *y = PetscSqrtScalar(x);
 24:   return(0);
 25: }

 27: PetscErrorCode FNEvaluateDerivative_Sqrt(FN fn,PetscScalar x,PetscScalar *y)
 28: {
 30:   if (x==0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
 31: #if !defined(PETSC_USE_COMPLEX)
 32:   if (x<0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
 33: #endif
 34:   *y = 1.0/(2.0*PetscSqrtScalar(x));
 35:   return(0);
 36: }

 38: PetscErrorCode FNEvaluateFunctionMat_Sqrt_Schur(FN fn,Mat A,Mat B)
 39: {
 41:   PetscBLASInt   n=0;
 42:   PetscScalar    *T;
 43:   PetscInt       m;

 46:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
 47:   MatDenseGetArray(B,&T);
 48:   MatGetSize(A,&m,NULL);
 49:   PetscBLASIntCast(m,&n);
 50:   FNSqrtmSchur(fn,n,T,n,PETSC_FALSE);
 51:   MatDenseRestoreArray(B,&T);
 52:   return(0);
 53: }

 55: PetscErrorCode FNEvaluateFunctionMatVec_Sqrt_Schur(FN fn,Mat A,Vec v)
 56: {
 58:   PetscBLASInt   n=0;
 59:   PetscScalar    *T;
 60:   PetscInt       m;
 61:   Mat            B;

 64:   FN_AllocateWorkMat(fn,A,&B);
 65:   MatDenseGetArray(B,&T);
 66:   MatGetSize(A,&m,NULL);
 67:   PetscBLASIntCast(m,&n);
 68:   FNSqrtmSchur(fn,n,T,n,PETSC_TRUE);
 69:   MatDenseRestoreArray(B,&T);
 70:   MatGetColumnVector(B,v,0);
 71:   FN_FreeWorkMat(fn,&B);
 72:   return(0);
 73: }

 75: PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP(FN fn,Mat A,Mat B)
 76: {
 78:   PetscBLASInt   n=0;
 79:   PetscScalar    *T;
 80:   PetscInt       m;

 83:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
 84:   MatDenseGetArray(B,&T);
 85:   MatGetSize(A,&m,NULL);
 86:   PetscBLASIntCast(m,&n);
 87:   FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_FALSE);
 88:   MatDenseRestoreArray(B,&T);
 89:   return(0);
 90: }

 92: PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS(FN fn,Mat A,Mat B)
 93: {
 95:   PetscBLASInt   n=0;
 96:   PetscScalar    *Ba;
 97:   PetscInt       m;

100:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
101:   MatDenseGetArray(B,&Ba);
102:   MatGetSize(A,&m,NULL);
103:   PetscBLASIntCast(m,&n);
104:   FNSqrtmNewtonSchulz(fn,n,Ba,n,PETSC_FALSE);
105:   MatDenseRestoreArray(B,&Ba);
106:   return(0);
107: }

109: #define MAXIT 50

111: /*
112:    Computes the principal square root of the matrix A using the
113:    Sadeghi iteration. A is overwritten with sqrtm(A).
114:  */
115: PetscErrorCode FNSqrtmSadeghi(FN fn,PetscBLASInt n,PetscScalar *A,PetscBLASInt ld)
116: {
117:   PetscScalar    *M,*M2,*G,*X=A,*work,work1,sqrtnrm;
118:   PetscScalar    szero=0.0,sone=1.0,smfive=-5.0,s1d16=1.0/16.0;
119:   PetscReal      tol,Mres=0.0,nrm,rwork[1],done=1.0;
120:   PetscBLASInt   N,i,it,*piv=NULL,info,lwork=0,query=-1,one=1,zero=0;
121:   PetscBool      converged=PETSC_FALSE;
123:   unsigned int   ftz;

126:   N = n*n;
127:   tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;
128:   SlepcSetFlushToZero(&ftz);

130:   /* query work size */
131:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,piv,&work1,&query,&info));
132:   PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork);

134:   PetscMalloc5(N,&M,N,&M2,N,&G,lwork,&work,n,&piv);
135:   PetscArraycpy(M,A,N);

137:   /* scale M */
138:   nrm = LAPACKlange_("fro",&n,&n,M,&n,rwork);
139:   if (nrm>1.0) {
140:     sqrtnrm = PetscSqrtReal(nrm);
141:     PetscStackCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&nrm,&done,&N,&one,M,&N,&info));
142:     SlepcCheckLapackInfo("lascl",info);
143:     tol *= nrm;
144:   }
145:   PetscInfo2(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol);

147:   /* X = I */
148:   PetscArrayzero(X,N);
149:   for (i=0;i<n;i++) X[i+i*ld] = 1.0;

151:   for (it=0;it<MAXIT && !converged;it++) {

153:     /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */
154:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M,&ld,M,&ld,&szero,M2,&ld));
155:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&N,&smfive,M,&one,M2,&one));
156:     for (i=0;i<n;i++) M2[i+i*ld] += 15.0;
157:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&s1d16,M,&ld,M2,&ld,&szero,G,&ld));
158:     for (i=0;i<n;i++) G[i+i*ld] += 5.0/16.0;

160:     /* X = X*G */
161:     PetscArraycpy(M2,X,N);
162:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M2,&ld,G,&ld,&szero,X,&ld));

164:     /* M = M*inv(G*G) */
165:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,G,&ld,&szero,M2,&ld));
166:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,M2,&ld,piv,&info));
167:     SlepcCheckLapackInfo("getrf",info);
168:     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,M2,&ld,piv,work,&lwork,&info));
169:     SlepcCheckLapackInfo("getri",info);

171:     PetscArraycpy(G,M,N);
172:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,M2,&ld,&szero,M,&ld));

174:     /* check ||I-M|| */
175:     PetscArraycpy(M2,M,N);
176:     for (i=0;i<n;i++) M2[i+i*ld] -= 1.0;
177:     Mres = LAPACKlange_("fro",&n,&n,M2,&n,rwork);
178:     if (PetscIsNanReal(Mres)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number");
179:     if (Mres<=tol) converged = PETSC_TRUE;
180:     PetscInfo2(fn,"it: %D res: %g\n",it,(double)Mres);
181:     PetscLogFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n);
182:   }

184:   if (Mres>tol) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations",MAXIT);

186:   /* undo scaling */
187:   if (nrm>1.0) PetscStackCallBLAS("BLASscal",BLASscal_(&N,&sqrtnrm,A,&one));

189:   PetscFree5(M,M2,G,work,piv);
190:   SlepcResetFlushToZero(&ftz);
191:   return(0);
192: }

194: #if defined(PETSC_HAVE_CUDA)
195: #include "../src/sys/classes/fn/impls/cuda/fnutilcuda.h"
196: #include <slepccublas.h>

198: #if defined(PETSC_HAVE_MAGMA)
199: #include <slepcmagma.h>

201: /*
202:  * Matrix square root by Sadeghi iteration. CUDA version.
203:  * Computes the principal square root of the matrix T using the
204:  * Sadeghi iteration. T is overwritten with sqrtm(T).
205:  */
206: PetscErrorCode FNSqrtmSadeghi_CUDAm(FN fn,PetscBLASInt n,PetscScalar *A,PetscBLASInt ld)
207: {
208:   PetscScalar        *d_X,*d_M,*d_M2,*d_G,*d_work,alpha;
209:   const PetscScalar  szero=0.0,sone=1.0,smfive=-5.0,s15=15.0,s1d16=1.0/16.0;
210:   PetscReal          tol,Mres=0.0,nrm,sqrtnrm;
211:   PetscInt           it,nb,lwork;
212:   PetscBLASInt       info,*piv,N;
213:   const PetscBLASInt one=1,zero=0;
214:   PetscBool          converged=PETSC_FALSE;
215:   cublasHandle_t     cublasv2handle;
216:   PetscErrorCode     ierr;
217:   cublasStatus_t     cberr;
218:   cudaError_t        cerr;
219:   magma_int_t        mierr;

222:   PetscCUDAInitializeCheck(); /* For CUDA event timers */
223:   PetscCUBLASGetHandle(&cublasv2handle);
224:   magma_init();
225:   N = n*n;
226:   tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;

228:   PetscMalloc1(n,&piv);
229:   cerr = cudaMalloc((void **)&d_X,sizeof(PetscScalar)*N);CHKERRCUDA(cerr);
230:   cerr = cudaMalloc((void **)&d_M,sizeof(PetscScalar)*N);CHKERRCUDA(cerr);
231:   cerr = cudaMalloc((void **)&d_M2,sizeof(PetscScalar)*N);CHKERRCUDA(cerr);
232:   cerr = cudaMalloc((void **)&d_G,sizeof(PetscScalar)*N);CHKERRCUDA(cerr);

234:   nb = magma_get_xgetri_nb(n);
235:   lwork = nb*n;
236:   cerr = cudaMalloc((void **)&d_work,sizeof(PetscScalar)*lwork);CHKERRCUDA(cerr);
237:   PetscLogGpuTimeBegin();

239:   /* M = A */
240:   cerr = cudaMemcpy(d_M,A,sizeof(PetscScalar)*N,cudaMemcpyHostToDevice);CHKERRCUDA(cerr);

242:   /* scale M */
243:   cberr = cublasXnrm2(cublasv2handle,N,d_M,one,&nrm);CHKERRCUBLAS(cberr);
244:   if (nrm>1.0) {
245:     sqrtnrm = PetscSqrtReal(nrm);
246:     alpha = 1.0/nrm;
247:     cberr = cublasXscal(cublasv2handle,N,&alpha,d_M,one);CHKERRCUBLAS(cberr);
248:     tol *= nrm;
249:   }
250:   PetscInfo2(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol);

252:   /* X = I */
253:   cerr = cudaMemset(d_X,zero,sizeof(PetscScalar)*N);CHKERRCUDA(cerr);
254:   set_diagonal(n,d_X,ld,sone);CHKERRQ(cerr);

256:   for (it=0;it<MAXIT && !converged;it++) {

258:     /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */
259:     cberr = cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M,ld,d_M,ld,&szero,d_M2,ld);CHKERRCUBLAS(cberr);
260:     cberr = cublasXaxpy(cublasv2handle,N,&smfive,d_M,one,d_M2,one);CHKERRCUBLAS(cberr);
261:     shift_diagonal(n,d_M2,ld,s15);CHKERRQ(cerr);
262:     cberr = cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&s1d16,d_M,ld,d_M2,ld,&szero,d_G,ld);CHKERRCUBLAS(cberr);
263:     shift_diagonal(n,d_G,ld,5.0/16.0);CHKERRQ(cerr);

265:     /* X = X*G */
266:     cerr = cudaMemcpy(d_M2,d_X,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice);CHKERRCUDA(cerr);
267:     cberr = cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M2,ld,d_G,ld,&szero,d_X,ld);CHKERRCUBLAS(cberr);

269:     /* M = M*inv(G*G) */
270:     cberr = cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_G,ld,&szero,d_M2,ld);CHKERRCUBLAS(cberr);
271:     /* magma */
272:     mmagma_xgetrf_gpu(n,n,d_M2,ld,piv,&info);CHKERRMAGMA(mierr);
273:     if (info < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKgetrf: Illegal value on argument %d",PetscAbsInt(info));
274:     if (info > 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"LAPACKgetrf: Matrix is singular. U(%d,%d) is zero",info,info);
275:     mmagma_xgetri_gpu(n,d_M2,ld,piv,d_work,lwork,&info);CHKERRMAGMA(mierr);
276:     if (info < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKgetri: Illegal value on argument %d",PetscAbsInt(info));
277:     if (info > 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"LAPACKgetri: Matrix is singular. U(%d,%d) is zero",info,info);
278:     /* magma */
279:     cerr = cudaMemcpy(d_G,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice);CHKERRCUDA(cerr);
280:     cberr = cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_M2,ld,&szero,d_M,ld);CHKERRCUBLAS(cberr);

282:     /* check ||I-M|| */
283:     cerr = cudaMemcpy(d_M2,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice);CHKERRCUDA(cerr);
284:     shift_diagonal(n,d_M2,ld,-1.0);CHKERRQ(cerr);
285:     cberr = cublasXnrm2(cublasv2handle,N,d_M2,one,&Mres);CHKERRCUBLAS(cberr);
286:     if (PetscIsNanReal(Mres)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number");
287:     if (Mres<=tol) converged = PETSC_TRUE;
288:     PetscInfo2(fn,"it: %D res: %g\n",it,(double)Mres);
289:     PetscLogGpuFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n);
290:   }

292:   if (Mres>tol) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations", MAXIT);

294:   if (nrm>1.0) {cberr = cublasXscal(cublasv2handle,N,&sqrtnrm,d_X,one);CHKERRCUBLAS(cberr);}
295:   cerr = cudaMemcpy(A,d_X,sizeof(PetscScalar)*N,cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
296:   PetscLogGpuTimeEnd();

298:   cerr = cudaFree(d_X);CHKERRCUDA(cerr);
299:   cerr = cudaFree(d_M);CHKERRCUDA(cerr);
300:   cerr = cudaFree(d_M2);CHKERRCUDA(cerr);
301:   cerr = cudaFree(d_G);CHKERRCUDA(cerr);
302:   cerr = cudaFree(d_work);CHKERRCUDA(cerr);
303:   PetscFree(piv);

305:   magma_finalize();
306:   return(0);
307: }
308: #endif /* PETSC_HAVE_MAGMA */
309: #endif /* PETSC_HAVE_CUDA */

311: PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi(FN fn,Mat A,Mat B)
312: {
314:   PetscBLASInt   n=0;
315:   PetscScalar    *Ba;
316:   PetscInt       m;

319:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
320:   MatDenseGetArray(B,&Ba);
321:   MatGetSize(A,&m,NULL);
322:   PetscBLASIntCast(m,&n);
323:   FNSqrtmSadeghi(fn,n,Ba,n);
324:   MatDenseRestoreArray(B,&Ba);
325:   return(0);
326: }

328: #if defined(PETSC_HAVE_CUDA)
329: PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS_CUDA(FN fn,Mat A,Mat B)
330: {
332:   PetscBLASInt   n=0;
333:   PetscScalar    *Ba;
334:   PetscInt       m;

337:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
338:   MatDenseGetArray(B,&Ba);
339:   MatGetSize(A,&m,NULL);
340:   PetscBLASIntCast(m,&n);
341:   FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_FALSE);
342:   MatDenseRestoreArray(B,&Ba);
343:   return(0);
344: }

346: #if defined(PETSC_HAVE_MAGMA)
347: PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
348: {
350:   PetscBLASInt   n=0;
351:   PetscScalar    *T;
352:   PetscInt       m;

355:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
356:   MatDenseGetArray(B,&T);
357:   MatGetSize(A,&m,NULL);
358:   PetscBLASIntCast(m,&n);
359:   FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_FALSE);
360:   MatDenseRestoreArray(B,&T);
361:   return(0);
362: }

364: PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
365: {
367:   PetscBLASInt   n=0;
368:   PetscScalar    *Ba;
369:   PetscInt       m;

372:   if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
373:   MatDenseGetArray(B,&Ba);
374:   MatGetSize(A,&m,NULL);
375:   PetscBLASIntCast(m,&n);
376:   FNSqrtmSadeghi_CUDAm(fn,n,Ba,n);
377:   MatDenseRestoreArray(B,&Ba);
378:   return(0);
379: }
380: #endif /* PETSC_HAVE_MAGMA */
381: #endif /* PETSC_HAVE_CUDA */

383: PetscErrorCode FNView_Sqrt(FN fn,PetscViewer viewer)
384: {
386:   PetscBool      isascii;
387:   char           str[50];
388:   const char     *methodname[] = {
389:                   "Schur method for the square root",
390:                   "Denman-Beavers (product form)",
391:                   "Newton-Schulz iteration",
392:                   "Sadeghi iteration"
393: #if defined(PETSC_HAVE_CUDA)
394:                  ,"Newton-Schulz iteration CUDA"
395: #if defined(PETSC_HAVE_MAGMA)
396:                  ,"Denman-Beavers (product form) CUDA/MAGMA",
397:                   "Sadeghi iteration CUDA/MAGMA"
398: #endif
399: #endif
400:   };
401:   const int      nmeth=sizeof(methodname)/sizeof(methodname[0]);

404:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
405:   if (isascii) {
406:     if (fn->beta==(PetscScalar)1.0) {
407:       if (fn->alpha==(PetscScalar)1.0) {
408:         PetscViewerASCIIPrintf(viewer,"  Square root: sqrt(x)\n");
409:       } else {
410:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
411:         PetscViewerASCIIPrintf(viewer,"  Square root: sqrt(%s*x)\n",str);
412:       }
413:     } else {
414:       SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
415:       if (fn->alpha==(PetscScalar)1.0) {
416:         PetscViewerASCIIPrintf(viewer,"  Square root: %s*sqrt(x)\n",str);
417:       } else {
418:         PetscViewerASCIIPrintf(viewer,"  Square root: %s",str);
419:         PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
420:         SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
421:         PetscViewerASCIIPrintf(viewer,"*sqrt(%s*x)\n",str);
422:         PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
423:       }
424:     }
425:     if (fn->method<nmeth) {
426:       PetscViewerASCIIPrintf(viewer,"  computing matrix functions with: %s\n",methodname[fn->method]);
427:     }
428:   }
429:   return(0);
430: }

432: SLEPC_EXTERN PetscErrorCode FNCreate_Sqrt(FN fn)
433: {
435:   fn->ops->evaluatefunction          = FNEvaluateFunction_Sqrt;
436:   fn->ops->evaluatederivative        = FNEvaluateDerivative_Sqrt;
437:   fn->ops->evaluatefunctionmat[0]    = FNEvaluateFunctionMat_Sqrt_Schur;
438:   fn->ops->evaluatefunctionmat[1]    = FNEvaluateFunctionMat_Sqrt_DBP;
439:   fn->ops->evaluatefunctionmat[2]    = FNEvaluateFunctionMat_Sqrt_NS;
440:   fn->ops->evaluatefunctionmat[3]    = FNEvaluateFunctionMat_Sqrt_Sadeghi;
441: #if defined(PETSC_HAVE_CUDA)
442:   fn->ops->evaluatefunctionmat[4]    = FNEvaluateFunctionMat_Sqrt_NS_CUDA;
443: #if defined(PETSC_HAVE_MAGMA)
444:   fn->ops->evaluatefunctionmat[5]    = FNEvaluateFunctionMat_Sqrt_DBP_CUDAm;
445:   fn->ops->evaluatefunctionmat[6]    = FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm;
446: #endif /* PETSC_HAVE_MAGMA */
447: #endif /* PETSC_HAVE_CUDA */
448:   fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Sqrt_Schur;
449:   fn->ops->view                      = FNView_Sqrt;
450:   return(0);
451: }