Actual source code: nepdefl.c
slepc-3.15.2 2021-09-20
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h>
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
20: if (X) *X = extop->X;
21: if (H) {
22: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
23: }
24: return(0);
25: }
27: PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
28: {
30: Vec uu;
31: PetscInt ld,i;
32: PetscMPIInt np;
33: PetscReal norm;
36: BVGetColumn(extop->X,k,&uu);
37: ld = extop->szd+1;
38: NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE);
39: BVRestoreColumn(extop->X,k,&uu);
40: BVNormColumn(extop->X,k,NORM_2,&norm);
41: BVScaleColumn(extop->X,k,1.0/norm);
42: MPI_Comm_size(PetscObjectComm((PetscObject)u),&np);CHKERRMPI(ierr);
43: for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
44: extop->H[k*(ld+1)] = lambda;
45: return(0);
46: }
48: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
49: {
51: PetscScalar *Ap;
52: PetscInt ldh=extop->szd+1,ldds,i,j,k1=k+1;
53: PetscScalar *eigr,*eigi,*t,*Z;
54: Vec x;
57: NEPDeflationExtendInvariantPair(extop,u,lambda,k);
58: PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t);
59: DSReset(ds);
60: DSSetType(ds,DSNHEP);
61: DSAllocate(ds,ldh);
62: DSGetLeadingDimension(ds,&ldds);
63: DSGetArray(ds,DS_MAT_A,&Ap);
64: for (j=0;j<k1;j++)
65: for (i=0;i<k1;i++) Ap[j*ldds+i] = extop->H[j*ldh+i];
66: DSRestoreArray(ds,DS_MAT_A,&Ap);
67: DSSetDimensions(ds,k1,0,0,k1);
68: DSSolve(ds,eigr,eigi);
69: DSVectors(ds,DS_MAT_X,&k,NULL);
70: DSGetArray(ds,DS_MAT_X,&Z);
71: BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds);
72: DSRestoreArray(ds,DS_MAT_X,&Z);
73: BVGetColumn(extop->X,k,&x);
74: NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE);
75: BVRestoreColumn(extop->X,k,&x);
76: PetscFree3(eigr,eigi,t);
77: return(0);
78: }
80: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
81: {
83: PetscMPIInt np,rk,count;
84: PetscScalar *array1,*array2;
85: PetscInt nloc;
88: if (extop->szd) {
89: MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk);CHKERRMPI(ierr);
90: MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np);CHKERRMPI(ierr);
91: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
92: if (v) {
93: VecGetArray(v,&array1);
94: VecGetArray(vex,&array2);
95: if (back) {
96: PetscArraycpy(array1,array2,nloc);
97: } else {
98: PetscArraycpy(array2,array1,nloc);
99: }
100: VecRestoreArray(v,&array1);
101: VecRestoreArray(vex,&array2);
102: }
103: if (a) {
104: VecGetArray(vex,&array2);
105: if (back) {
106: PetscArraycpy(a,array2+nloc,extop->szd);
107: PetscMPIIntCast(extop->szd,&count);
108: MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));CHKERRMPI(ierr);
109: } else {
110: PetscArraycpy(array2+nloc,a,extop->szd);
111: PetscMPIIntCast(extop->szd,&count);
112: MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));CHKERRMPI(ierr);
113: }
114: VecRestoreArray(vex,&array2);
115: }
116: } else {
117: if (back) {VecCopy(vex,v);}
118: else {VecCopy(v,vex);}
119: }
120: return(0);
121: }
123: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
124: {
126: PetscInt nloc;
127: Vec u;
128: VecType type;
131: if (extop->szd) {
132: BVGetColumn(extop->nep->V,0,&u);
133: VecGetType(u,&type);
134: BVRestoreColumn(extop->nep->V,0,&u);
135: VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
136: VecSetType(*v,type);
137: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
138: nloc += extop->szd;
139: VecSetSizes(*v,nloc,PETSC_DECIDE);
140: } else {
141: BVCreateVec(extop->nep->V,v);
142: }
143: return(0);
144: }
146: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
147: {
148: PetscErrorCode ierr;
149: PetscInt nloc;
150: BVType type;
151: BVOrthogType otype;
152: BVOrthogRefineType oref;
153: PetscReal oeta;
154: BVOrthogBlockType oblock;
155: NEP nep=extop->nep;
158: if (extop->szd) {
159: BVGetSizes(nep->V,&nloc,NULL,NULL);
160: BVCreate(PetscObjectComm((PetscObject)nep),V);
161: BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
162: BVGetType(nep->V,&type);
163: BVSetType(*V,type);
164: BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
165: BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
166: PetscObjectStateIncrease((PetscObject)*V);
167: PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
168: } else {
169: BVDuplicateResize(nep->V,sz,V);
170: }
171: return(0);
172: }
174: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
175: {
177: PetscInt n,next,i;
178: PetscRandom rand;
179: PetscScalar *array;
180: PetscMPIInt nn,np;
183: BVGetRandomContext(extop->nep->V,&rand);
184: VecSetRandom(v,rand);
185: if (extop->szd) {
186: MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);CHKERRMPI(ierr);
187: BVGetSizes(extop->nep->V,&n,NULL,NULL);
188: VecGetLocalSize(v,&next);
189: VecGetArray(v,&array);
190: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
191: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
192: PetscMPIIntCast(extop->n,&nn);
193: MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v));CHKERRMPI(ierr);
194: VecRestoreArray(v,&array);
195: }
196: return(0);
197: }
199: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
200: {
202: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
203: PetscScalar sone=1.0,zero=0.0;
204: PetscBLASInt ldh_,ldhj_,n_;
207: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
208: PetscArrayzero(Hj,i);
209: PetscBLASIntCast(ldhj+1,&ldh_);
210: PetscBLASIntCast(ldhj,&ldhj_);
211: PetscBLASIntCast(n,&n_);
212: if (idx<1) {
213: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
214: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
215: } else {
216: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
217: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
218: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
219: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
220: }
221: if (idx<0) {
222: idx = -idx;
223: for (k=1;k<idx;k++) {
224: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
225: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
226: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
227: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
228: }
229: }
230: return(0);
231: }
233: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
234: {
236: PetscInt i;
239: NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n);
240: extop->n++;
241: BVSetActiveColumns(extop->X,0,extop->n);
242: if (extop->n <= extop->szd) {
243: /* update XpX */
244: BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
245: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
246: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
247: /* determine minimality index */
248: extop->midx = PetscMin(extop->max_midx,extop->n);
249: /* polynominal basis coeficients */
250: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
251: /* evaluate the polynomial basis in H */
252: NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
253: }
254: return(0);
255: }
257: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
258: {
259: PetscErrorCode ierr;
260: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
261: Mat A,B;
262: PetscScalar *array;
263: const PetscScalar *barray;
266: if (idx<0) {ini = 0; fin = extop->nep->nt;}
267: else {ini = idx; fin = idx+1;}
268: if (y) sz = hfjp?n+2:n+1;
269: else sz = hfjp?3*n:2*n;
270: ldh = extop->szd+1;
271: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
272: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
273: MatDenseGetArray(A,&array);
274: for (j=0;j<n;j++)
275: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
276: MatDenseRestoreArrayWrite(A,&array);
277: if (y) {
278: MatDenseGetArray(A,&array);
279: array[extop->n*(sz+1)] = lambda;
280: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
281: for (i=0;i<n;i++) array[n*sz+i] = y[i];
282: MatDenseRestoreArrayWrite(A,&array);
283: for (j=ini;j<fin;j++) {
284: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
285: MatDenseGetArrayRead(B,&barray);
286: for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
287: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
288: MatDenseRestoreArrayRead(B,&barray);
289: }
290: } else {
291: off = idx<0?ld*n:0;
292: MatDenseGetArray(A,&array);
293: for (k=0;k<n;k++) {
294: array[(n+k)*sz+k] = 1.0;
295: array[(n+k)*sz+n+k] = lambda;
296: }
297: if (hfjp) for (k=0;k<n;k++) {
298: array[(2*n+k)*sz+n+k] = 1.0;
299: array[(2*n+k)*sz+2*n+k] = lambda;
300: }
301: MatDenseRestoreArray(A,&array);
302: for (j=ini;j<fin;j++) {
303: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
304: MatDenseGetArrayRead(B,&barray);
305: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
306: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
307: MatDenseRestoreArrayRead(B,&barray);
308: }
309: }
310: MatDestroy(&A);
311: MatDestroy(&B);
312: return(0);
313: }
315: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
316: {
317: NEP_DEF_MATSHELL *matctx;
318: PetscErrorCode ierr;
319: NEP_EXT_OP extop;
320: Vec x1,y1;
321: PetscScalar *yy,sone=1.0,zero=0.0;
322: const PetscScalar *xx;
323: PetscInt nloc,i;
324: PetscMPIInt np;
325: PetscBLASInt n_,one=1,szd_;
328: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);CHKERRMPI(ierr);
329: MatShellGetContext(M,(void**)&matctx);
330: extop = matctx->extop;
331: if (extop->ref) {
332: VecZeroEntries(y);
333: }
334: if (extop->szd) {
335: x1 = matctx->w[0]; y1 = matctx->w[1];
336: VecGetArrayRead(x,&xx);
337: VecPlaceArray(x1,xx);
338: VecGetArray(y,&yy);
339: VecPlaceArray(y1,yy);
340: MatMult(matctx->T,x1,y1);
341: if (!extop->ref && extop->n) {
342: VecGetLocalSize(x1,&nloc);
343: /* copy for avoiding warning of constant array xx */
344: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
345: BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
346: BVDotVec(extop->X,x1,matctx->work);
347: PetscBLASIntCast(extop->n,&n_);
348: PetscBLASIntCast(extop->szd,&szd_);
349: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
350: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
351: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
352: }
353: VecResetArray(x1);
354: VecRestoreArrayRead(x,&xx);
355: VecResetArray(y1);
356: VecRestoreArray(y,&yy);
357: } else {
358: MatMult(matctx->T,x,y);
359: }
360: return(0);
361: }
363: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
364: {
365: PetscErrorCode ierr;
366: NEP_DEF_MATSHELL *matctx;
369: MatShellGetContext(M,(void**)&matctx);
370: if (right) {
371: VecDuplicate(matctx->w[0],right);
372: }
373: if (left) {
374: VecDuplicate(matctx->w[0],left);
375: }
376: return(0);
377: }
379: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
380: {
381: PetscErrorCode ierr;
382: NEP_DEF_MATSHELL *matctx;
385: MatShellGetContext(M,(void**)&matctx);
386: if (matctx->extop->szd) {
387: BVDestroy(&matctx->U);
388: PetscFree2(matctx->hfj,matctx->work);
389: PetscFree2(matctx->A,matctx->B);
390: VecDestroy(&matctx->w[0]);
391: VecDestroy(&matctx->w[1]);
392: }
393: MatDestroy(&matctx->T);
394: PetscFree(matctx);
395: return(0);
396: }
398: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
399: {
400: PetscScalar p;
401: PetscInt i;
404: if (!jacobian) {
405: val[0] = 1.0;
406: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
407: } else {
408: val[0] = 0.0;
409: p = 1.0;
410: for (i=1;i<extop->n;i++) {
411: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
412: p *= (lambda-extop->bc[i-1]);
413: }
414: }
415: return(0);
416: }
418: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
419: {
420: PetscErrorCode ierr;
421: NEP_DEF_MATSHELL *matctx,*matctxC;
422: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
423: Mat F,Mshell,Mcomp;
424: PetscBool ini=PETSC_FALSE;
425: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
426: PetscBLASInt n_,info,szd_;
429: if (!M) Mshell = jacobian?extop->MJ:extop->MF;
430: else Mshell = *M;
431: Mcomp = jacobian?extop->MF:extop->MJ;
432: if (!Mshell) {
433: ini = PETSC_TRUE;
434: PetscNew(&matctx);
435: MatGetLocalSize(extop->nep->function,&mloc,&nloc);
436: nloc += szd; mloc += szd;
437: MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
438: MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation);
439: MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation);
440: MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation);
441: matctx->nep = extop->nep;
442: matctx->extop = extop;
443: if (!M) {
444: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
445: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
446: PetscObjectReference((PetscObject)matctx->T);
447: } else {
448: matctx->jacob = jacobian;
449: MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES, &matctx->T);
450: *M = Mshell;
451: }
452: if (szd) {
453: BVCreateVec(extop->nep->V,matctx->w);
454: VecDuplicate(matctx->w[0],matctx->w+1);
455: BVDuplicateResize(extop->nep->V,szd,&matctx->U);
456: PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
457: PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
458: }
459: } else {
460: MatShellGetContext(Mshell,(void**)&matctx);
461: }
462: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
463: if (ini || matctx->theta != lambda) {
464: if (jacobian) {
465: NEPComputeJacobian(extop->nep,lambda,matctx->T);
466: } else {
467: NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->T);
468: }
469: }
470: if (n) {
471: matctx->hfjset = PETSC_FALSE;
472: if (!extop->simpU) {
473: /* likely hfjp has been already computed */
474: if (Mcomp) {
475: MatShellGetContext(Mcomp,(void**)&matctxC);
476: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
477: PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt);
478: matctx->hfjset = PETSC_TRUE;
479: }
480: }
481: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
482: if (!matctx->hfjset) {
483: NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
484: matctx->hfjset = PETSC_TRUE;
485: }
486: BVSetActiveColumns(matctx->U,0,n);
487: hf = jacobian?hfjp:hfj;
488: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
489: BVMatMult(extop->X,extop->nep->A[0],matctx->U);
490: BVMultInPlace(matctx->U,F,0,n);
491: BVSetActiveColumns(extop->W,0,extop->n);
492: for (j=1;j<extop->nep->nt;j++) {
493: BVMatMult(extop->X,extop->nep->A[j],extop->W);
494: MatDensePlaceArray(F,hf+j*n*n);
495: BVMult(matctx->U,1.0,1.0,extop->W,F);
496: MatDenseResetArray(F);
497: }
498: MatDestroy(&F);
499: } else {
500: hfj = matctx->hfj;
501: BVSetActiveColumns(matctx->U,0,n);
502: BVMatMult(extop->X,matctx->T,matctx->U);
503: for (j=0;j<n;j++) {
504: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
505: hfj[j*(n+1)] += lambda;
506: }
507: PetscBLASIntCast(n,&n_);
508: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
509: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
510: PetscFPTrapPop();
511: SlepcCheckLapackInfo("trtri",info);
512: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
513: BVMultInPlace(matctx->U,F,0,n);
514: if (jacobian) {
515: NEPDeflationComputeFunction(extop,lambda,NULL);
516: MatShellGetContext(extop->MF,(void**)&matctxC);
517: BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
518: }
519: MatDestroy(&F);
520: }
521: PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
522: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
523: A = matctx->A;
524: PetscArrayzero(A,szd*szd);
525: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
526: for (j=0;j<n;j++)
527: for (i=0;i<n;i++)
528: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
529: PetscBLASIntCast(n,&n_);
530: PetscBLASIntCast(szd,&szd_);
531: B = matctx->B;
532: PetscArrayzero(B,szd*szd);
533: for (i=1;i<extop->midx;i++) {
534: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
535: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
536: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
537: pts = hHprev; hHprev = hH; hH = pts;
538: }
539: PetscFree3(basisv,hH,hHprev);
540: }
541: matctx->theta = lambda;
542: matctx->n = extop->n;
543: }
544: return(0);
545: }
547: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
548: {
552: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
553: if (F) *F = extop->MF;
554: return(0);
555: }
557: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
558: {
562: NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
563: if (J) *J = extop->MJ;
564: return(0);
565: }
567: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
568: {
569: PetscErrorCode ierr;
570: NEP_DEF_MATSHELL *matctx;
571: NEP_DEF_FUN_SOLVE solve;
572: PetscInt i,j,n=extop->n;
573: Vec u,tu;
574: Mat F;
575: PetscScalar snone=-1.0,sone=1.0;
576: PetscBLASInt n_,szd_,ldh_,*p,info;
577: Mat Mshell;
580: solve = extop->solve;
581: if (lambda!=solve->theta || n!=solve->n) {
582: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
583: Mshell = (solve->sincf)?extop->MF:solve->T;
584: MatShellGetContext(Mshell,(void**)&matctx);
585: KSPSetOperators(solve->ksp,matctx->T,matctx->T);
586: if (!extop->ref && n) {
587: PetscBLASIntCast(n,&n_);
588: PetscBLASIntCast(extop->szd,&szd_);
589: PetscBLASIntCast(extop->szd+1,&ldh_);
590: if (!extop->simpU) {
591: BVSetActiveColumns(solve->T_1U,0,n);
592: for (i=0;i<n;i++) {
593: BVGetColumn(matctx->U,i,&u);
594: BVGetColumn(solve->T_1U,i,&tu);
595: KSPSolve(solve->ksp,u,tu);
596: BVRestoreColumn(solve->T_1U,i,&tu);
597: BVRestoreColumn(matctx->U,i,&u);
598: }
599: MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
600: BVDot(solve->T_1U,extop->X,F);
601: MatDestroy(&F);
602: } else {
603: for (j=0;j<n;j++)
604: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
605: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
606: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
607: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
608: }
609: PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd);
610: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
611: PetscMalloc1(n,&p);
612: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
613: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
614: SlepcCheckLapackInfo("getrf",info);
615: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
616: SlepcCheckLapackInfo("getri",info);
617: PetscFPTrapPop();
618: PetscFree(p);
619: }
620: solve->theta = lambda;
621: solve->n = n;
622: }
623: return(0);
624: }
626: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
627: {
628: PetscErrorCode ierr;
629: Vec b1,x1;
630: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
631: NEP_DEF_MATSHELL *matctx;
632: NEP_DEF_FUN_SOLVE solve=extop->solve;
633: PetscBLASInt one=1,szd_,n_,ldh_;
634: PetscInt nloc,i;
635: PetscMPIInt np,count;
638: if (extop->ref) {
639: VecZeroEntries(x);
640: }
641: if (extop->szd) {
642: x1 = solve->w[0]; b1 = solve->w[1];
643: VecGetArray(x,&xx);
644: VecPlaceArray(x1,xx);
645: VecGetArray(b,&bb);
646: VecPlaceArray(b1,bb);
647: } else {
648: b1 = b; x1 = x;
649: }
650: KSPSolve(extop->solve->ksp,b1,x1);
651: if (!extop->ref && extop->n) {
652: PetscBLASIntCast(extop->szd,&szd_);
653: PetscBLASIntCast(extop->n,&n_);
654: PetscBLASIntCast(extop->szd+1,&ldh_);
655: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
656: PetscMalloc2(extop->n,&b2,extop->n,&x2);
657: MPI_Comm_size(PetscObjectComm((PetscObject)b),&np);CHKERRMPI(ierr);
658: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
659: w = solve->work; w2 = solve->work+extop->n;
660: MatShellGetContext(solve->sincf?extop->MF:solve->T,(void**)&matctx);
661: PetscArraycpy(w2,b2,extop->n);
662: BVDotVec(extop->X,x1,w);
663: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
664: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
665: if (extop->simpU) {
666: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
667: for (i=0;i<extop->n;i++) w[i] = x2[i];
668: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
669: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
670: BVMultVec(extop->X,-1.0,1.0,x1,w);
671: } else {
672: BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
673: }
674: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
675: PetscMPIIntCast(extop->n,&count);
676: MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b));CHKERRMPI(ierr);
677: }
678: if (extop->szd) {
679: VecResetArray(x1);
680: VecRestoreArray(x,&xx);
681: VecResetArray(b1);
682: VecRestoreArray(b,&bb);
683: if (!extop->ref && extop->n) { PetscFree2(b2,x2);}
684: }
685: return(0);
686: }
688: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
689: {
691: extop->ref = ref;
692: return(0);
693: }
695: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
696: {
697: PetscErrorCode ierr;
698: PetscInt j;
699: NEP_DEF_FUN_SOLVE solve;
702: if (!extop) return(0);
703: PetscFree(extop->H);
704: BVDestroy(&extop->X);
705: if (extop->szd) {
706: VecDestroy(&extop->w);
707: PetscFree3(extop->Hj,extop->XpX,extop->bc);
708: BVDestroy(&extop->W);
709: }
710: MatDestroy(&extop->MF);
711: MatDestroy(&extop->MJ);
712: if (extop->solve) {
713: solve = extop->solve;
714: if (extop->szd) {
715: if (!extop->simpU) {BVDestroy(&solve->T_1U);}
716: PetscFree2(solve->M,solve->work);
717: VecDestroy(&solve->w[0]);
718: VecDestroy(&solve->w[1]);
719: }
720: if (!solve->sincf) {
721: MatDestroy(&solve->T);
722: }
723: PetscFree(extop->solve);
724: }
725: if (extop->proj) {
726: if (extop->szd) {
727: for (j=0;j<extop->nep->nt;j++) {MatDestroy(&extop->proj->V1pApX[j]);}
728: MatDestroy(&extop->proj->XpV1);
729: PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
730: VecDestroy(&extop->proj->w);
731: BVDestroy(&extop->proj->V1);
732: }
733: PetscFree(extop->proj);
734: }
735: PetscFree(extop);
736: return(0);
737: }
739: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
740: {
741: PetscErrorCode ierr;
742: NEP_EXT_OP op;
743: NEP_DEF_FUN_SOLVE solve;
744: PetscInt szd;
745: Vec x;
748: NEPDeflationReset(*extop);
749: PetscNew(&op);
750: *extop = op;
751: op->nep = nep;
752: op->n = 0;
753: op->szd = szd = sz-1;
754: op->max_midx = PetscMin(MAX_MINIDX,szd);
755: op->X = X;
756: if (!X) { BVDuplicateResize(nep->V,sz,&op->X); }
757: else { PetscObjectReference((PetscObject)X); }
758: PetscCalloc1(sz*sz,&(op)->H);
759: if (op->szd) {
760: BVGetColumn(op->X,0,&x);
761: VecDuplicate(x,&op->w);
762: BVRestoreColumn(op->X,0,&x);
763: op->simpU = PETSC_FALSE;
764: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
765: /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
766: PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
767: } else {
768: op->simpU = PETSC_TRUE;
769: }
770: PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
771: BVDuplicateResize(op->X,op->szd,&op->W);
772: }
773: if (ksp) {
774: PetscNew(&solve);
775: op->solve = solve;
776: solve->ksp = ksp;
777: solve->sincf = sincfun;
778: solve->n = -1;
779: if (op->szd) {
780: if (!op->simpU) {
781: BVDuplicateResize(nep->V,szd,&solve->T_1U);
782: }
783: PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
784: BVCreateVec(nep->V,&solve->w[0]);
785: VecDuplicate(solve->w[0],&solve->w[1]);
786: }
787: }
788: return(0);
789: }
791: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
792: {
793: PetscScalar *T,*Ei,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
794: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
795: const PetscScalar *E;
796: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
797: PetscBLASInt inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
798: PetscMPIInt np;
799: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
800: NEP_EXT_OP extop=proj->extop;
801: NEP nep=extop->nep;
802: PetscErrorCode ierr;
805: DSGetDimensions(ds,&nv,NULL,NULL,NULL,NULL);
806: DSGetLeadingDimension(ds,&ldds);
807: DSGetArray(ds,mat,&T);
808: PetscArrayzero(T,ldds*nv);
809: PetscBLASIntCast(ldds*nv,&dim2);
810: /* mat = V1^*T(lambda)V1 */
811: for (i=0;i<nep->nt;i++) {
812: if (deriv) {
813: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
814: } else {
815: FNEvaluateFunction(nep->f[i],lambda,&alpha);
816: }
817: DSGetArray(ds,DSMatExtra[i],&Ei);
818: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,Ei,&inc,T,&inc));
819: DSRestoreArray(ds,DSMatExtra[i],&Ei);
820: }
821: if (!extop->ref && extop->n) {
822: n = extop->n;
823: szd = extop->szd;
824: PetscArrayzero(proj->work,proj->lwork);
825: PetscBLASIntCast(nv,&nv_);
826: PetscBLASIntCast(n,&n_);
827: PetscBLASIntCast(ldds,&ldds_);
828: PetscBLASIntCast(szd,&szd_);
829: PetscBLASIntCast(proj->dim,&dim_);
830: PetscBLASIntCast(extop->szd+1,&ldh_);
831: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
832: nwork += 2*proj->dim*proj->dim;
834: /* mat = mat + V1^*U(lambda)V2 */
835: for (i=0;i<nep->nt;i++) {
836: if (extop->simpU) {
837: if (deriv) {
838: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
839: } else {
840: FNEvaluateFunction(nep->f[i],lambda,&alpha);
841: }
842: ww = w1; w = w2;
843: PetscArraycpy(ww,proj->V2,szd*nv);
844: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);CHKERRMPI(ierr);
845: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
846: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
847: alpha = -alpha;
848: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
849: if (deriv) {
850: PetscBLASIntCast(szd*nv,&szdk);
851: FNEvaluateFunction(nep->f[i],lambda,&alpha2);
852: PetscArraycpy(w,proj->V2,szd*nv);
853: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
854: alpha2 = -alpha2;
855: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
856: alpha2 = 1.0;
857: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
858: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
859: }
860: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
861: } else {
862: NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
863: w = deriv?w2:w1; ww = deriv?w1:w2;
864: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);CHKERRMPI(ierr);
865: s = PetscSqrtReal(np);
866: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
867: }
868: MatDenseGetArrayRead(proj->V1pApX[i],&E);
869: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
870: MatDenseRestoreArrayRead(proj->V1pApX[i],&E);
871: }
873: /* mat = mat + V2^*A(lambda)V1 */
874: basisv = proj->work+nwork; nwork += szd;
875: hH = proj->work+nwork; nwork += szd*szd;
876: hHprev = proj->work+nwork; nwork += szd*szd;
877: AB = proj->work+nwork;
878: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
879: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
880: for (j=0;j<n;j++)
881: for (i=0;i<n;i++)
882: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
883: MatDenseGetArrayRead(proj->XpV1,&E);
884: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
885: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
886: MatDenseRestoreArrayRead(proj->XpV1,&E);
888: /* mat = mat + V2^*B(lambda)V2 */
889: PetscArrayzero(AB,szd*szd);
890: for (i=1;i<extop->midx;i++) {
891: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
892: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
893: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
894: pts = hHprev; hHprev = hH; hH = pts;
895: }
896: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
897: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
898: }
899: DSRestoreArray(ds,mat,&T);
900: return(0);
901: }
903: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
904: {
905: PetscErrorCode ierr;
906: PetscInt k,j,n=extop->n,dim;
907: Vec v,ve;
908: BV V1;
909: Mat G;
910: NEP nep=extop->nep;
911: NEP_DEF_PROJECT proj;
914: NEPCheckSplit(extop->nep,1);
915: proj = extop->proj;
916: if (!proj) {
917: /* Initialize the projection data structure */
918: PetscNew(&proj);
919: extop->proj = proj;
920: proj->extop = extop;
921: BVGetSizes(Vext,NULL,NULL,&dim);
922: proj->dim = dim;
923: if (extop->szd) {
924: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
925: PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
926: for (j=0;j<nep->nt;j++) {
927: MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
928: }
929: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
930: BVCreateVec(extop->X,&proj->w);
931: BVDuplicateResize(extop->X,proj->dim,&proj->V1);
932: }
933: DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
934: }
936: /* Split Vext in V1 and V2 */
937: if (extop->szd) {
938: for (j=j0;j<j1;j++) {
939: BVGetColumn(Vext,j,&ve);
940: BVGetColumn(proj->V1,j,&v);
941: NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
942: BVRestoreColumn(proj->V1,j,&v);
943: BVRestoreColumn(Vext,j,&ve);
944: }
945: V1 = proj->V1;
946: } else V1 = Vext;
948: /* Compute matrices V1^* A_i V1 */
949: BVSetActiveColumns(V1,j0,j1);
950: for (k=0;k<nep->nt;k++) {
951: DSGetMat(ds,DSMatExtra[k],&G);
952: BVMatProject(V1,nep->A[k],V1,G);
953: DSRestoreMat(ds,DSMatExtra[k],&G);
954: }
956: if (extop->n) {
957: if (extop->szd) {
958: /* Compute matrices V1^* A_i X and V1^* X */
959: BVSetActiveColumns(extop->W,0,n);
960: for (k=0;k<nep->nt;k++) {
961: BVMatMult(extop->X,nep->A[k],extop->W);
962: BVDot(extop->W,V1,proj->V1pApX[k]);
963: }
964: BVDot(V1,extop->X,proj->XpV1);
965: }
966: }
967: return(0);
968: }