Actual source code: bvtensor.c

slepc-3.15.2 2021-09-20
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Tensor BV that is represented in compact form as V = (I otimes U) S
 12: */

 14: #include <slepc/private/bvimpl.h>
 15: #include <slepcblaslapack.h>

 17: typedef struct {
 18:   BV          U;        /* first factor */
 19:   Mat         S;        /* second factor */
 20:   PetscScalar *qB;      /* auxiliary matrix used in non-standard inner products */
 21:   PetscScalar *sw;      /* work space */
 22:   PetscInt    d;        /* degree of the tensor BV */
 23:   PetscInt    ld;       /* leading dimension of a single block in S */
 24:   PetscInt    puk;      /* copy of the k value */
 25:   Vec         u;        /* auxiliary work vector */
 26: } BV_TENSOR;

 28: PetscErrorCode BVMultInPlace_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
 29: {
 30:   PetscErrorCode    ierr;
 31:   BV_TENSOR         *ctx = (BV_TENSOR*)V->data;
 32:   PetscScalar       *pS;
 33:   const PetscScalar *q;
 34:   PetscInt          ldq,lds = ctx->ld*ctx->d;

 37:   MatGetSize(Q,&ldq,NULL);
 38:   MatDenseGetArray(ctx->S,&pS);
 39:   MatDenseGetArrayRead(Q,&q);
 40:   BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,ldq,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,q+V->l*ldq+V->l,PETSC_FALSE);
 41:   MatDenseRestoreArrayRead(Q,&q);
 42:   MatDenseRestoreArray(ctx->S,&pS);
 43:   return(0);
 44: }

 46: PetscErrorCode BVMultInPlaceTranspose_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
 47: {
 48:   PetscErrorCode    ierr;
 49:   BV_TENSOR         *ctx = (BV_TENSOR*)V->data;
 50:   PetscScalar       *pS;
 51:   const PetscScalar *q;
 52:   PetscInt          ldq,lds = ctx->ld*ctx->d;

 55:   MatGetSize(Q,&ldq,NULL);
 56:   MatDenseGetArray(ctx->S,&pS);
 57:   MatDenseGetArrayRead(Q,&q);
 58:   BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,ldq,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,q+V->l*ldq+V->l,PETSC_TRUE);
 59:   MatDenseRestoreArrayRead(Q,&q);
 60:   MatDenseRestoreArray(ctx->S,&pS);
 61:   return(0);
 62: }

 64: PetscErrorCode BVDot_Tensor(BV X,BV Y,Mat M)
 65: {
 67:   BV_TENSOR         *x = (BV_TENSOR*)X->data,*y = (BV_TENSOR*)Y->data;
 68:   PetscScalar       *m;
 69:   const PetscScalar *px,*py;
 70:   PetscInt          ldm,lds = x->ld*x->d;

 73:   if (x->U!=y->U) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_SUP,"BVDot() in BVTENSOR requires that both operands have the same U factor");
 74:   if (lds!=y->ld*y->d) SETERRQ2(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_SIZ,"Mismatching dimensions ld*d %D %D",lds,y->ld*y->d);
 75:   MatGetSize(M,&ldm,NULL);
 76:   MatDenseGetArrayRead(x->S,&px);
 77:   MatDenseGetArrayRead(y->S,&py);
 78:   MatDenseGetArray(M,&m);
 79:   BVDot_BLAS_Private(X,Y->k-Y->l,X->k-X->l,lds,ldm,py+(Y->nc+Y->l)*lds,px+(X->nc+X->l)*lds,m+X->l*ldm+Y->l,PETSC_FALSE);
 80:   MatDenseRestoreArray(M,&m);
 81:   MatDenseRestoreArrayRead(x->S,&px);
 82:   MatDenseRestoreArrayRead(y->S,&py);
 83:   return(0);
 84: }

 86: PetscErrorCode BVScale_Tensor(BV bv,PetscInt j,PetscScalar alpha)
 87: {
 89:   BV_TENSOR      *ctx = (BV_TENSOR*)bv->data;
 90:   PetscScalar    *pS;
 91:   PetscInt       lds = ctx->ld*ctx->d;

 94:   MatDenseGetArray(ctx->S,&pS);
 95:   if (j<0) {
 96:     BVScale_BLAS_Private(bv,(bv->k-bv->l)*lds,pS+(bv->nc+bv->l)*lds,alpha);
 97:   } else {
 98:     BVScale_BLAS_Private(bv,lds,pS+(bv->nc+j)*lds,alpha);
 99:   }
100:   MatDenseRestoreArray(ctx->S,&pS);
101:   return(0);
102: }

104: PetscErrorCode BVNorm_Tensor(BV bv,PetscInt j,NormType type,PetscReal *val)
105: {
106:   PetscErrorCode    ierr;
107:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
108:   const PetscScalar *pS;
109:   PetscInt          lds = ctx->ld*ctx->d;

112:   MatDenseGetArrayRead(ctx->S,&pS);
113:   if (j<0) {
114:     BVNorm_LAPACK_Private(bv,lds,bv->k-bv->l,pS+(bv->nc+bv->l)*lds,type,val,PETSC_FALSE);
115:   } else {
116:     BVNorm_LAPACK_Private(bv,lds,1,pS+(bv->nc+j)*lds,type,val,PETSC_FALSE);
117:   }
118:   MatDenseRestoreArrayRead(ctx->S,&pS);
119:   return(0);
120: }

122: PetscErrorCode BVCopyColumn_Tensor(BV V,PetscInt j,PetscInt i)
123: {
125:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
126:   PetscScalar    *pS;
127:   PetscInt       lds = ctx->ld*ctx->d;

130:   MatDenseGetArray(ctx->S,&pS);
131:   PetscArraycpy(pS+(V->nc+i)*lds,pS+(V->nc+j)*lds,lds);
132:   MatDenseRestoreArray(ctx->S,&pS);
133:   return(0);
134: }

136: static PetscErrorCode BVTensorNormColumn(BV bv,PetscInt j,PetscReal *norm)
137: {
138:   PetscErrorCode    ierr;
139:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
140:   PetscBLASInt      one=1,lds_;
141:   PetscScalar       sone=1.0,szero=0.0,*x,dot;
142:   const PetscScalar *S;
143:   PetscReal         alpha=1.0,scale=0.0,aval;
144:   PetscInt          i,lds,ld=ctx->ld;

147:   lds = ld*ctx->d;
148:   MatDenseGetArrayRead(ctx->S,&S);
149:   PetscBLASIntCast(lds,&lds_);
150:   if (ctx->qB) {
151:     x = ctx->sw;
152:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,S+j*lds,&one,&szero,x,&one));
153:     dot = PetscRealPart(BLASdot_(&lds_,S+j*lds,&one,x,&one));
154:     BV_SafeSqrt(bv,dot,norm);
155:   } else {
156:     /* Compute *norm = BLASnrm2_(&lds_,S+j*lds,&one); */
157:     if (lds==1) *norm = PetscAbsScalar(S[j*lds]);
158:     else {
159:       for (i=0;i<lds;i++) {
160:         aval = PetscAbsScalar(S[i+j*lds]);
161:         if (aval!=0.0) {
162:           if (scale<aval) {
163:             alpha = 1.0 + alpha*PetscSqr(scale/aval);
164:             scale = aval;
165:           } else alpha += PetscSqr(aval/scale);
166:         }
167:       }
168:       *norm = scale*PetscSqrtReal(alpha);
169:     }
170:   }
171:   MatDenseRestoreArrayRead(ctx->S,&S);
172:   return(0);
173: }

175: PetscErrorCode BVOrthogonalizeGS1_Tensor(BV bv,PetscInt k,Vec v,PetscBool *which,PetscScalar *h,PetscScalar *c,PetscReal *onorm,PetscReal *norm)
176: {
177:   PetscErrorCode    ierr;
178:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
179:   PetscScalar       *pS,*cc,*x,dot,sonem=-1.0,sone=1.0,szero=0.0;
180:   PetscInt          i,lds = ctx->ld*ctx->d;
181:   PetscBLASInt      lds_,k_,one=1;
182:   const PetscScalar *omega;

185:   if (v) SETERRQ(PetscObjectComm((PetscObject)bv),PETSC_ERR_SUP,"Orthogonalization against an external vector is not allowed in BVTENSOR");
186:   MatDenseGetArray(ctx->S,&pS);
187:   if (!c) {
188:     VecGetArray(bv->buffer,&cc);
189:   } else cc = c;
190:   PetscBLASIntCast(lds,&lds_);
191:   PetscBLASIntCast(k,&k_);

193:   if (onorm) { BVTensorNormColumn(bv,k,onorm); }

195:   if (ctx->qB) x = ctx->sw;
196:   else x = pS+k*lds;

198:   if (bv->orthog_type==BV_ORTHOG_MGS) {  /* modified Gram-Schmidt */

200:     if (bv->indef) { /* signature */
201:       VecGetArrayRead(bv->omega,&omega);
202:     }
203:     for (i=-bv->nc;i<k;i++) {
204:       if (which && i>=0 && !which[i]) continue;
205:       if (ctx->qB) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
206:       /* c_i = (s_k, s_i) */
207:       dot = PetscRealPart(BLASdot_(&lds_,pS+i*lds,&one,x,&one));
208:       if (bv->indef) dot /= PetscRealPart(omega[i]);
209:       BV_SetValue(bv,i,0,cc,dot);
210:       /* s_k = s_k - c_i s_i */
211:       dot = -dot;
212:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&lds_,&dot,pS+i*lds,&one,pS+k*lds,&one));
213:     }
214:     if (bv->indef) {
215:       VecRestoreArrayRead(bv->omega,&omega);
216:     }

218:   } else {  /* classical Gram-Schmidt */
219:     if (ctx->qB) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));

221:     /* cc = S_{0:k-1}^* s_k */
222:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&lds_,&k_,&sone,pS,&lds_,x,&one,&szero,cc,&one));

224:     /* s_k = s_k - S_{0:k-1} cc */
225:     if (bv->indef) { BV_ApplySignature(bv,k,cc,PETSC_TRUE); }
226:      PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&k_,&sonem,pS,&lds_,cc,&one,&sone,pS+k*lds,&one));
227:     if (bv->indef) { BV_ApplySignature(bv,k,cc,PETSC_FALSE); }
228:   }

230:   if (norm) { BVTensorNormColumn(bv,k,norm); }
231:   BV_AddCoefficients(bv,k,h,cc);
232:   MatDenseRestoreArray(ctx->S,&pS);
233:   VecRestoreArray(bv->buffer,&cc);
234:   return(0);
235: }

237: PetscErrorCode BVView_Tensor(BV bv,PetscViewer viewer)
238: {
239:   PetscErrorCode    ierr;
240:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
241:   PetscViewerFormat format;
242:   PetscBool         isascii;
243:   const char        *bvname,*uname,*sname;

246:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
247:   if (isascii) {
248:     PetscViewerGetFormat(viewer,&format);
249:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
250:       PetscViewerASCIIPrintf(viewer,"number of tensor blocks (degree): %D\n",ctx->d);
251:       PetscViewerASCIIPrintf(viewer,"number of columns of U factor: %D\n",ctx->ld);
252:       return(0);
253:     }
254:     BVView(ctx->U,viewer);
255:     MatView(ctx->S,viewer);
256:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
257:       PetscObjectGetName((PetscObject)bv,&bvname);
258:       PetscObjectGetName((PetscObject)ctx->U,&uname);
259:       PetscObjectGetName((PetscObject)ctx->S,&sname);
260:       PetscViewerASCIIPrintf(viewer,"%s=kron(eye(%D),%s)*%s(:,1:%D);\n",bvname,ctx->d,uname,sname,bv->k);
261:     }
262:   } else {
263:     BVView(ctx->U,viewer);
264:     MatView(ctx->S,viewer);
265:   }
266:   return(0);
267: }

269: static PetscErrorCode BVTensorUpdateMatrix(BV V,PetscInt ini,PetscInt end)
270: {
272:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
273:   PetscInt       i,j,r,c,l,k,ld=ctx->ld,lds=ctx->d*ctx->ld;
274:   PetscScalar    *qB,*sqB;
275:   Vec            u;
276:   Mat            A;

279:   if (!V->matrix) return(0);
280:   l = ctx->U->l; k = ctx->U->k;
281:   /* update inner product matrix */
282:   if (!ctx->qB) {
283:     PetscCalloc2(lds*lds,&ctx->qB,lds,&ctx->sw);
284:     VecDuplicate(ctx->U->t,&ctx->u);
285:   }
286:   ctx->U->l = 0;
287:   for (r=0;r<ctx->d;r++) {
288:     for (c=0;c<=r;c++) {
289:       MatNestGetSubMat(V->matrix,r,c,&A);
290:       if (A) {
291:         qB = ctx->qB+c*ld*lds+r*ld;
292:         for (i=ini;i<end;i++) {
293:           BVGetColumn(ctx->U,i,&u);
294:           MatMult(A,u,ctx->u);
295:           ctx->U->k = i+1;
296:           BVDotVec(ctx->U,ctx->u,qB+i*lds);
297:           BVRestoreColumn(ctx->U,i,&u);
298:           for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
299:           qB[i*lds+i] = PetscRealPart(qB[i+i*lds]);
300:         }
301:         if (c!=r) {
302:           sqB = ctx->qB+r*ld*lds+c*ld;
303:           for (i=ini;i<end;i++) for (j=0;j<=i;j++) {
304:             sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
305:             sqB[j+i*lds] = qB[j+i*lds];
306:           }
307:         }
308:       }
309:     }
310:   }
311:   ctx->U->l = l; ctx->U->k = k;
312:   return(0);
313: }

315: static PetscErrorCode BVTensorBuildFirstColumn_Tensor(BV V,PetscInt k)
316: {
318:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
319:   PetscInt       i,nq=0;
320:   PetscScalar    *pS,*omega;
321:   PetscReal      norm;
322:   PetscBool      breakdown=PETSC_FALSE;

325:   MatDenseGetArray(ctx->S,&pS);
326:   for (i=0;i<ctx->d;i++) {
327:     if (i>=k) {
328:       BVSetRandomColumn(ctx->U,nq);
329:     } else {
330:       BVCopyColumn(ctx->U,i,nq);
331:     }
332:     BVOrthogonalizeColumn(ctx->U,nq,pS+i*ctx->ld,&norm,&breakdown);
333:     if (!breakdown) {
334:       BVScaleColumn(ctx->U,nq,1.0/norm);
335:       pS[nq+i*ctx->ld] = norm;
336:       nq++;
337:     }
338:   }
339:   MatDenseRestoreArray(ctx->S,&pS);
340:   if (!nq) SETERRQ1(PetscObjectComm((PetscObject)V),1,"Cannot build first column of tensor BV; U should contain k=%D nonzero columns",k);
341:   BVTensorUpdateMatrix(V,0,nq);
342:   BVTensorNormColumn(V,0,&norm);
343:   BVScale_Tensor(V,0,1.0/norm);
344:   if (V->indef) {
345:     BV_AllocateSignature(V);
346:     VecGetArray(V->omega,&omega);
347:     omega[0] = (norm<0.0)? -1.0: 1.0;
348:     VecRestoreArray(V->omega,&omega);
349:   }
350:   /* set active columns */
351:   ctx->U->l = 0;
352:   ctx->U->k = nq;
353:   return(0);
354: }

356: /*@
357:    BVTensorBuildFirstColumn - Builds the first column of the tensor basis vectors
358:    V from the data contained in the first k columns of U.

360:    Collective on V

362:    Input Parameters:
363: +  V - the basis vectors context
364: -  k - the number of columns of U with relevant information

366:    Notes:
367:    At most d columns are considered, where d is the degree of the tensor BV.
368:    Given V = (I otimes U) S, this function computes the first column of V, that
369:    is, it computes the coefficients of the first column of S by orthogonalizing
370:    the first d columns of U. If k is less than d (or linearly dependent columns
371:    are found) then additional random columns are used.

373:    The computed column has unit norm.

375:    Level: advanced

377: .seealso: BVCreateTensor()
378: @*/
379: PetscErrorCode BVTensorBuildFirstColumn(BV V,PetscInt k)
380: {

386:   PetscUseMethod(V,"BVTensorBuildFirstColumn_C",(BV,PetscInt),(V,k));
387:   return(0);
388: }

390: static PetscErrorCode BVTensorCompress_Tensor(BV V,PetscInt newc)
391: {
393:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
394:   PetscInt       nwu=0,nnc,nrow,lwa,r,c;
395:   PetscInt       i,j,k,n,lds=ctx->ld*ctx->d,deg=ctx->d,lock,cs1=V->k,rs1=ctx->U->k,rk=0,offu;
396:   PetscScalar    *S,*M,*Z,*pQ,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau,*work,*qB,*sqB;
397:   PetscReal      *sg,tol,*rwork;
398:   PetscBLASInt   ld_,cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
399:   Mat            Q,A;

402:   if (!cs1) return(0);
403:   lwa = 6*ctx->ld*lds+2*cs1;
404:   n = PetscMin(rs1,deg*cs1);
405:   lock = ctx->U->l;
406:   nnc = cs1-lock-newc;
407:   nrow = rs1-lock;
408:   PetscCalloc6(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pQ,deg*rs1,&tau,lwa,&work,6*n,&rwork);
409:   offu = lock*(rs1+1);
410:   M = work+nwu;
411:   nwu += rs1*cs1*deg;
412:   sg = rwork;
413:   Z = work+nwu;
414:   nwu += deg*cs1*n;
415:   PetscBLASIntCast(n,&n_);
416:   PetscBLASIntCast(nnc,&nnc_);
417:   PetscBLASIntCast(cs1,&cs1_);
418:   PetscBLASIntCast(rs1,&rs1_);
419:   PetscBLASIntCast(newc,&newc_);
420:   PetscBLASIntCast(newc*deg,&newctdeg);
421:   PetscBLASIntCast(nnc*deg,&nnctdeg);
422:   PetscBLASIntCast(cs1*deg,&cs1tdeg);
423:   PetscBLASIntCast(lwa-nwu,&lw_);
424:   PetscBLASIntCast(nrow,&nrow_);
425:   PetscBLASIntCast(lds,&lds_);
426:   MatDenseGetArray(ctx->S,&S);

428:   if (newc>0) {
429:     /* truncate columns associated with new converged eigenpairs */
430:     for (j=0;j<deg;j++) {
431:       for (i=lock;i<lock+newc;i++) {
432:         PetscArraycpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ctx->ld+lock,nrow);
433:       }
434:     }
435:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
436: #if !defined (PETSC_USE_COMPLEX)
437:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,&info));
438: #else
439:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
440: #endif
441:     SlepcCheckLapackInfo("gesvd",info);
442:     PetscFPTrapPop();
443:     /* SVD has rank min(newc,nrow) */
444:     rk = PetscMin(newc,nrow);
445:     for (i=0;i<rk;i++) {
446:       t = sg[i];
447:       PetscStackCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,Z+i,&n_));
448:     }
449:     for (i=0;i<deg;i++) {
450:       for (j=lock;j<lock+newc;j++) {
451:         PetscArraycpy(S+j*lds+i*ctx->ld+lock,Z+(newc*i+j-lock)*n,rk);
452:         PetscArrayzero(S+j*lds+i*ctx->ld+lock+rk,(ctx->ld-lock-rk));
453:       }
454:     }
455:     /*
456:       update columns associated with non-converged vectors, orthogonalize
457:       against pQ so that next M has rank nnc+d-1 insted of nrow+d-1
458:     */
459:     for (i=0;i<deg;i++) {
460:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS+i*newc*nnc,&newc_));
461:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS+i*newc*nnc,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
462:       /* repeat orthogonalization step */
463:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS2,&newc_));
464:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
465:       for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
466:     }
467:   }

469:   /* truncate columns associated with non-converged eigenpairs */
470:   for (j=0;j<deg;j++) {
471:     for (i=lock+newc;i<cs1;i++) {
472:       PetscArraycpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ctx->ld+lock,nrow);
473:     }
474:   }
475:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
476: #if !defined (PETSC_USE_COMPLEX)
477:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,&info));
478: #else
479:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
480: #endif
481:   SlepcCheckLapackInfo("gesvd",info);
482:   PetscFPTrapPop();
483:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
484:   rk = 0;
485:   for (i=0;i<PetscMin(nrow,nnctdeg);i++) if (sg[i]>tol) rk++;
486:   rk = PetscMin(nnc+deg-1,rk);
487:   /* the SVD has rank (at most) nnc+deg-1 */
488:   for (i=0;i<rk;i++) {
489:     t = sg[i];
490:     PetscStackCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,Z+i,&n_));
491:   }
492:   /* update S */
493:   PetscArrayzero(S+cs1*lds,(V->m-cs1)*lds);
494:   k = ctx->ld-lock-newc-rk;
495:   for (i=0;i<deg;i++) {
496:     for (j=lock+newc;j<cs1;j++) {
497:       PetscArraycpy(S+j*lds+i*ctx->ld+lock+newc,Z+(nnc*i+j-lock-newc)*n,rk);
498:       PetscArrayzero(S+j*lds+i*ctx->ld+lock+newc+rk,k);
499:     }
500:   }
501:   if (newc>0) {
502:     for (i=0;i<deg;i++) {
503:       p = SS+nnc*newc*i;
504:       for (j=lock+newc;j<cs1;j++) {
505:         for (k=0;k<newc;k++) S[j*lds+i*ctx->ld+lock+k] = *(p++);
506:       }
507:     }
508:   }

510:   /* orthogonalize pQ */
511:   rk = rk+newc;
512:   PetscBLASIntCast(rk,&rk_);
513:   PetscBLASIntCast(cs1-lock,&nnc_);
514:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
515:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
516:   SlepcCheckLapackInfo("geqrf",info);
517:   for (i=0;i<deg;i++) {
518:     PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pQ+offu,&rs1_,S+lock*lds+lock+i*ctx->ld,&lds_));
519:   }
520:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&nrow_,&rk_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
521:   SlepcCheckLapackInfo("orgqr",info);
522:   PetscFPTrapPop();

524:   /* update vectors U(:,idx) = U*Q(:,idx) */
525:   rk = rk+lock;
526:   for (i=0;i<lock;i++) pQ[i*(1+rs1)] = 1.0;
527:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pQ,&Q);
528:   ctx->U->k = rs1;
529:   BVMultInPlace(ctx->U,Q,lock,rk);
530:   MatDestroy(&Q);

532:   if (ctx->qB) {
533:    /* update matrix qB */
534:     PetscBLASIntCast(ctx->ld,&ld_);
535:     PetscBLASIntCast(rk,&rk_);
536:     for (r=0;r<ctx->d;r++) {
537:       for (c=0;c<=r;c++) {
538:         MatNestGetSubMat(V->matrix,r,c,&A);
539:         if (A) {
540:           qB = ctx->qB+r*ctx->ld+c*ctx->ld*lds;
541:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&rk_,&rs1_,&sone,qB,&lds_,pQ,&rs1_,&zero,work+nwu,&rs1_));
542:           PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&rk_,&rk_,&rs1_,&sone,pQ,&rs1_,work+nwu,&rs1_,&zero,qB,&lds_));
543:           for (i=0;i<rk;i++) {
544:             for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
545:             qB[i+i*lds] = PetscRealPart(qB[i+i*lds]);
546:           }
547:           for (i=rk;i<ctx->ld;i++) {
548:             PetscArrayzero(qB+i*lds,ctx->ld);
549:           }
550:           for (i=0;i<rk;i++) {
551:             PetscArrayzero(qB+i*lds+rk,(ctx->ld-rk));
552:           }
553:           if (c!=r) {
554:             sqB = ctx->qB+r*ctx->ld*lds+c*ctx->ld;
555:             for (i=0;i<ctx->ld;i++) for (j=0;j<ctx->ld;j++) sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
556:           }
557:         }
558:       }
559:     }
560:   }

562:   /* free work space */
563:   PetscFree6(SS,SS2,pQ,tau,work,rwork);
564:   MatDenseRestoreArray(ctx->S,&S);

566:   /* set active columns */
567:   if (newc) ctx->U->l += newc;
568:   ctx->U->k = rk;
569:   return(0);
570: }

572: /*@
573:    BVTensorCompress - Updates the U and S factors of the tensor basis vectors
574:    object V by means of an SVD, removing redundant information.

576:    Collective on V

578:    Input Parameters:
579: +  V - the tensor basis vectors context
580: -  newc - additional columns to be locked

582:    Notes:
583:    This function is typically used when restarting Krylov solvers. Truncating a
584:    tensor BV V = (I otimes U) S to its leading columns amounts to keeping the
585:    leading columns of S. However, to effectively reduce the size of the
586:    decomposition, it is necessary to compress it in a way that fewer columns of
587:    U are employed. This can be achieved by means of an update that involves the
588:    SVD of the low-rank matrix [S_0 S_1 ... S_{d-1}], where S_i are the pieces of S.

590:    If newc is nonzero, then newc columns are added to the leading columns of V.
591:    This means that the corresponding columns of the U and S factors will remain
592:    invariant in subsequent operations.

594:    Level: advanced

596: .seealso: BVCreateTensor(), BVSetActiveColumns()
597: @*/
598: PetscErrorCode BVTensorCompress(BV V,PetscInt newc)
599: {

605:   PetscUseMethod(V,"BVTensorCompress_C",(BV,PetscInt),(V,newc));
606:   return(0);
607: }

609: static PetscErrorCode BVTensorGetDegree_Tensor(BV bv,PetscInt *d)
610: {
611:   BV_TENSOR *ctx = (BV_TENSOR*)bv->data;

614:   *d = ctx->d;
615:   return(0);
616: }

618: /*@
619:    BVTensorGetDegree - Returns the number of blocks (degree) of the tensor BV.

621:    Not collective

623:    Input Parameter:
624: .  bv - the basis vectors context

626:    Output Parameter:
627: .  d - the degree

629:    Level: advanced

631: .seealso: BVCreateTensor()
632: @*/
633: PetscErrorCode BVTensorGetDegree(BV bv,PetscInt *d)
634: {

640:   PetscUseMethod(bv,"BVTensorGetDegree_C",(BV,PetscInt*),(bv,d));
641:   return(0);
642: }

644: static PetscErrorCode BVTensorGetFactors_Tensor(BV V,BV *U,Mat *S)
645: {
646:   BV_TENSOR *ctx = (BV_TENSOR*)V->data;

649:   if (ctx->puk>-1) SETERRQ(PetscObjectComm((PetscObject)V),PETSC_ERR_ORDER,"Previous call to BVTensonGetFactors without a BVTensorRestoreFactors call");
650:   ctx->puk = ctx->U->k;
651:   if (U) *U = ctx->U;
652:   if (S) *S = ctx->S;
653:   return(0);
654: }

656: /*@C
657:    BVTensorGetFactors - Returns the two factors involved in the definition of the
658:    tensor basis vectors object, V = (I otimes U) S.

660:    Logically Collective on V

662:    Input Parameter:
663: .  V - the basis vectors context

665:    Output Parameters:
666: +  U - the BV factor
667: -  S - the Mat factor

669:    Notes:
670:    The returned factors are references (not copies) of the internal factors,
671:    so modifying them will change the tensor BV as well. Some operations of the
672:    tensor BV assume that U has orthonormal columns, so if the user modifies U
673:    this restriction must be taken into account.

675:    The returned factors must not be destroyed. BVTensorRestoreFactors() must
676:    be called when they are no longer needed.

678:    Pass a NULL vector for any of the arguments that is not needed.

680:    Level: advanced

682: .seealso: BVTensorRestoreFactors()
683: @*/
684: PetscErrorCode BVTensorGetFactors(BV V,BV *U,Mat *S)
685: {

690:   PetscUseMethod(V,"BVTensorGetFactors_C",(BV,BV*,Mat*),(V,U,S));
691:   return(0);
692: }

694: static PetscErrorCode BVTensorRestoreFactors_Tensor(BV V,BV *U,Mat *S)
695: {
697:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;

700:   PetscObjectStateIncrease((PetscObject)V);
701:   if (U) *U = NULL;
702:   if (S) *S = NULL;
703:   BVTensorUpdateMatrix(V,ctx->puk,ctx->U->k);
704:   ctx->puk = -1;
705:   return(0);
706: }

708: /*@C
709:    BVTensorRestoreFactors - Restore the two factors that were obtained with
710:    BVTensorGetFactors().

712:    Logically Collective on V

714:    Input Parameters:
715: +  V - the basis vectors context
716: .  U - the BV factor (or NULL)
717: -  S - the Mat factor (or NULL)

719:    Nots:
720:    The arguments must match the corresponding call to BVTensorGetFactors().

722:    Level: advanced

724: .seealso: BVTensorGetFactors()
725: @*/
726: PetscErrorCode BVTensorRestoreFactors(BV V,BV *U,Mat *S)
727: {

734:   PetscUseMethod(V,"BVTensorRestoreFactors_C",(BV,BV*,Mat*),(V,U,S));
735:   return(0);
736: }

738: PetscErrorCode BVDestroy_Tensor(BV bv)
739: {
741:   BV_TENSOR      *ctx = (BV_TENSOR*)bv->data;

744:   BVDestroy(&ctx->U);
745:   MatDestroy(&ctx->S);
746:   if (ctx->u) {
747:     PetscFree2(ctx->qB,ctx->sw);
748:     VecDestroy(&ctx->u);
749:   }
750:   PetscFree(bv->data);
751:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",NULL);
752:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",NULL);
753:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",NULL);
754:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",NULL);
755:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",NULL);
756:   return(0);
757: }

759: SLEPC_EXTERN PetscErrorCode BVCreate_Tensor(BV bv)
760: {
762:   BV_TENSOR      *ctx;

765:   PetscNewLog(bv,&ctx);
766:   bv->data = (void*)ctx;
767:   ctx->puk = -1;

769:   bv->ops->multinplace      = BVMultInPlace_Tensor;
770:   bv->ops->multinplacetrans = BVMultInPlaceTranspose_Tensor;
771:   bv->ops->dot              = BVDot_Tensor;
772:   bv->ops->scale            = BVScale_Tensor;
773:   bv->ops->norm             = BVNorm_Tensor;
774:   bv->ops->copycolumn       = BVCopyColumn_Tensor;
775:   bv->ops->gramschmidt      = BVOrthogonalizeGS1_Tensor;
776:   bv->ops->destroy          = BVDestroy_Tensor;
777:   bv->ops->view             = BVView_Tensor;

779:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",BVTensorBuildFirstColumn_Tensor);
780:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",BVTensorCompress_Tensor);
781:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",BVTensorGetDegree_Tensor);
782:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",BVTensorGetFactors_Tensor);
783:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",BVTensorRestoreFactors_Tensor);
784:   return(0);
785: }

787: /*@
788:    BVCreateTensor - Creates a tensor BV that is represented in compact form
789:    as V = (I otimes U) S, where U has orthonormal columns.

791:    Collective on U

793:    Input Parameters:
794: +  U - a basis vectors object
795: -  d - the number of blocks (degree) of the tensor BV

797:    Output Parameter:
798: .  V - the new basis vectors context

800:    Notes:
801:    The new basis vectors object is V = (I otimes U) S, where otimes denotes
802:    the Kronecker product, I is the identity matrix of order d, and S is a
803:    sequential matrix allocated internally. This compact representation is
804:    used e.g. to represent the Krylov basis generated with the linearization
805:    of a matrix polynomial of degree d.

807:    The size of V (number of rows) is equal to d times n, where n is the size
808:    of U. The dimensions of S are d times m rows and m-d+1 columns, where m is
809:    the number of columns of U, so m should be at least d.

811:    The communicator of V will be the same as U.

813:    On input, the content of U is irrelevant. Alternatively, it may contain
814:    some nonzero columns that will be used by BVTensorBuildFirstColumn().

816:    Level: advanced

818: .seealso: BVTensorGetDegree(), BVTensorGetFactors(), BVTensorBuildFirstColumn()
819: @*/
820: PetscErrorCode BVCreateTensor(BV U,PetscInt d,BV *V)
821: {
823:   PetscBool      match;
824:   PetscInt       n,N,m;
825:   BV_TENSOR      *ctx;

830:   PetscObjectTypeCompare((PetscObject)U,BVTENSOR,&match);
831:   if (match) SETERRQ(PetscObjectComm((PetscObject)U),PETSC_ERR_SUP,"U cannot be of type tensor");

833:   BVCreate(PetscObjectComm((PetscObject)U),V);
834:   BVGetSizes(U,&n,&N,&m);
835:   if (m<d) SETERRQ2(PetscObjectComm((PetscObject)U),PETSC_ERR_ARG_SIZ,"U has %D columns, it should have at least d=%D",m,d);
836:   BVSetSizes(*V,d*n,d*N,m-d+1);
837:   PetscObjectChangeTypeName((PetscObject)*V,BVTENSOR);
838:   PetscLogEventBegin(BV_Create,*V,0,0,0);
839:   BVCreate_Tensor(*V);
840:   PetscLogEventEnd(BV_Create,*V,0,0,0);

842:   ctx = (BV_TENSOR*)(*V)->data;
843:   ctx->U  = U;
844:   ctx->d  = d;
845:   ctx->ld = m;
846:   PetscObjectReference((PetscObject)U);
847:   PetscLogObjectParent((PetscObject)*V,(PetscObject)U);
848:   MatCreateSeqDense(PETSC_COMM_SELF,d*m,m-d+1,NULL,&ctx->S);
849:   PetscLogObjectParent((PetscObject)*V,(PetscObject)ctx->S);
850:   PetscObjectSetName((PetscObject)ctx->S,"S");

852:   /* Copy user-provided attributes of U */
853:   (*V)->orthog_type  = U->orthog_type;
854:   (*V)->orthog_ref   = U->orthog_ref;
855:   (*V)->orthog_eta   = U->orthog_eta;
856:   (*V)->orthog_block = U->orthog_block;
857:   (*V)->vmm          = U->vmm;
858:   (*V)->rrandom      = U->rrandom;
859:   return(0);
860: }