Actual source code: qslice.c

slepc-3.10.0 2018-09-18
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2018, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc polynomial eigensolver: "stoar"

 13:    Method: S-TOAR with spectrum slicing for symmetric quadratic eigenproblems

 15:    Algorithm:

 17:        Symmetric Two-Level Orthogonal Arnoldi.

 19:    References:

 21:        [1] C. Campos and J.E. Roman, "Inertia-based spectrum slicing
 22:            for symmetric quadratic eigenvalue problems", in preparation,
 23:            2018.
 24: */

 26: #include <slepc/private/pepimpl.h>         /*I "slepcpep.h" I*/
 27: #include "../src/pep/impls/krylov/pepkrylov.h"
 28: #include <slepcblaslapack.h>

 30: static PetscBool  cited = PETSC_FALSE;
 31: static const char citation[] =
 32:   "@Article{slepc-slice-qep,\n"
 33:   "   author = \"C. Campos and J. E. Roman\",\n"
 34:   "   title = \"Inertia-based spectrum slicing for symmetric quadratic eigenvalue problems\",\n"
 35:   "   journal = \"In preparation\",\n"
 36:   "   volume = \"xx\",\n"
 37:   "   number = \"x\",\n"
 38:   "   pages = \"xx--xx\",\n"
 39:   "   year = \"2018,\"\n"
 40:   "   doi = \"https://doi.org/10.1007/xxx\"\n"
 41:   "}\n";

 43: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON

 45: static PetscErrorCode PEPQSliceResetSR(PEP pep)
 46: {
 48:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
 49:   PEP_SR         sr=ctx->sr;
 50:   PEP_shift      s;
 51:   PetscInt       i;

 54:   if (sr) {
 55:     /* Reviewing list of shifts to free memory */
 56:     s = sr->s0;
 57:     if (s) {
 58:       while (s->neighb[1]) {
 59:         s = s->neighb[1];
 60:         PetscFree(s->neighb[0]);
 61:       }
 62:       PetscFree(s);
 63:     }
 64:     PetscFree(sr->S);
 65:     for (i=0;i<pep->nconv;i++) {PetscFree(sr->qinfo[i].q);}
 66:     PetscFree(sr->qinfo);
 67:     for (i=0;i<3;i++) {VecDestroy(&sr->v[i]);}
 68:     EPSDestroy(&sr->eps);
 69:     PetscFree(sr);
 70:   }
 71:   ctx->sr = NULL;
 72:   return(0);
 73: }

 75: PetscErrorCode PEPReset_STOAR_QSlice(PEP pep)
 76: {
 78:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;

 81:   PEPQSliceResetSR(pep);
 82:   PetscFree(ctx->inertias);
 83:   PetscFree(ctx->shifts);
 84:   return(0);
 85: }

 87: /*
 88:   PEPQSliceAllocateSolution - Allocate memory storage for common variables such
 89:   as eigenvalues and eigenvectors.
 90: */
 91: static PetscErrorCode PEPQSliceAllocateSolution(PEP pep)
 92: {
 94:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
 95:   PetscInt       k;
 96:   PetscLogDouble cnt;
 97:   BVType         type;
 98:   Vec            t;
 99:   PEP_SR         sr = ctx->sr;

102:   /* allocate space for eigenvalues and friends */
103:   k = PetscMax(1,sr->numEigs);
104:   PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
105:   PetscCalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
106:   PetscFree(sr->qinfo);
107:   PetscCalloc1(k,&sr->qinfo);
108:   cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
109:   PetscLogObjectMemory((PetscObject)pep,cnt);

111:   /* allocate sr->V and transfer options from pep->V */
112:   BVDestroy(&sr->V);
113:   BVCreate(PetscObjectComm((PetscObject)pep),&sr->V);
114:   PetscLogObjectParent((PetscObject)pep,(PetscObject)sr->V);
115:   if (!pep->V) { PEPGetBV(pep,&pep->V); }
116:   if (!((PetscObject)(pep->V))->type_name) {
117:     BVSetType(sr->V,BVSVEC);
118:   } else {
119:     BVGetType(pep->V,&type);
120:     BVSetType(sr->V,type);
121:   }
122:   STMatCreateVecsEmpty(pep->st,&t,NULL);
123:   BVSetSizesFromVec(sr->V,t,k+1);
124:   VecDestroy(&t);
125:   sr->ld = k;
126:   PetscFree(sr->S);
127:   PetscMalloc1((k+1)*sr->ld*(pep->nmat-1),&sr->S);
128:   return(0);
129: }

131: /* Convergence test to compute positive Ritz values */
132: static PetscErrorCode ConvergedPositive(EPS eps,PetscScalar eigr,PetscScalar eigi,PetscReal res,PetscReal *errest,void *ctx)
133: {
135:   *errest = (PetscRealPart(eigr)>0.0)?0.0:res;
136:   return(0);
137: }

139: static PetscErrorCode PEPQSliceGetInertia(PEP pep,PetscReal shift,PetscInt *inertia,PetscInt *zeros,PetscInt correction)
140: {
142:   KSP            ksp,kspr;
143:   PC             pc;
144:   Mat            F,P;
145:   PetscBool      flg;
146:   PetscReal      nzshift=0.0;
147:   PetscScalar    dot;
148:   PetscRandom    rand;
149:   PetscInt       nconv;
150:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
151:   PEP_SR         sr=ctx->sr;

154:   if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
155:     *inertia = 0;
156:   } else if (shift <= PETSC_MIN_REAL) {
157:     *inertia = 0;
158:     if (zeros) *zeros = 0;
159:   } else {
160:     /* If the shift is zero, perturb it to a very small positive value.
161:        The goal is that the nonzero pattern is the same in all cases and reuse
162:        the symbolic factorizations */
163:     nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
164:     STSetShift(pep->st,nzshift);
165:     PEPEvaluateBasis(pep,nzshift,0,pep->solvematcoeffs,NULL);
166:     STSetUp(pep->st);
167:     STMatSetUp(pep->st,pep->sfactor,pep->solvematcoeffs);
168:     STGetKSP(pep->st,&ksp);
169:     KSPGetPC(ksp,&pc);
170:     PetscObjectTypeCompare((PetscObject)pc,PCREDUNDANT,&flg);
171:     if (flg) {
172:       PCRedundantGetKSP(pc,&kspr);
173:       KSPGetPC(kspr,&pc);
174:     }
175:     PCFactorGetMatrix(pc,&F);
176:     MatGetInertia(F,inertia,zeros,NULL);
177:   }
178:   if (!correction) {
179:     if (shift >= PETSC_MAX_REAL) *inertia = 2*pep->n;
180:     else if (shift>PETSC_MIN_REAL) { 
181:       KSPGetOperators(ksp,&P,NULL);
182:       if (*inertia!=pep->n && !sr->v[0]) {
183:         MatCreateVecs(P,&sr->v[0],NULL);
184:         VecDuplicate(sr->v[0],&sr->v[1]);
185:         VecDuplicate(sr->v[0],&sr->v[2]);
186:         BVGetRandomContext(pep->V,&rand);
187:         VecSetRandom(sr->v[0],rand);
188:       }
189:       if (*inertia<pep->n && *inertia>0) {
190:         if (!sr->eps) {
191:           EPSCreate(PetscObjectComm((PetscObject)pep),&sr->eps);
192:           EPSSetProblemType(sr->eps,EPS_HEP);
193:           EPSSetWhichEigenpairs(sr->eps,EPS_LARGEST_REAL);
194:           EPSSetConvergenceTestFunction(sr->eps,ConvergedPositive,NULL,NULL);
195:         }
196:         EPSSetOperators(sr->eps,P,NULL);
197:         EPSSolve(sr->eps);
198:         EPSGetConverged(sr->eps,&nconv);
199:         if (!nconv) SETERRQ1(((PetscObject)pep)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",nzshift);
200:         EPSGetEigenpair(sr->eps,0,NULL,NULL,sr->v[0],sr->v[1]); 
201:       }
202:       if (*inertia!=pep->n) {
203:         MatMult(pep->A[1],sr->v[0],sr->v[1]);
204:         MatMult(pep->A[2],sr->v[0],sr->v[2]);
205:         VecAXPY(sr->v[1],2*nzshift,sr->v[2]);
206:         VecDot(sr->v[1],sr->v[0],&dot);
207:         if (PetscRealPart(dot)>0.0) *inertia = 2*pep->n-*inertia;
208:       }
209:     }
210:   } else if (correction<0) *inertia = 2*pep->n-*inertia;
211:   return(0);
212: }

214: /*
215:    Dummy backtransform operation
216:  */
217: static PetscErrorCode PEPBackTransform_Skip(PEP pep)
218: {
220:   return(0);
221: }

223: PetscErrorCode PEPSetUp_STOAR_QSlice(PEP pep)
224: {
226:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
227:   PEP_SR         sr;
228:   PetscInt       ld,i,zeros=0;
229:   SlepcSC        sc;
230:   PetscBool      issinv;
231:   PetscReal      r;

234:   if (pep->intb >= PETSC_MAX_REAL && pep->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
235:   PetscObjectTypeCompareAny((PetscObject)pep->st,&issinv,STSINVERT,STCAYLEY,"");
236:   if (!issinv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
237:   if (pep->tol==PETSC_DEFAULT) pep->tol = SLEPC_DEFAULT_TOL*1e-2;  /* use tighter tolerance */
238:   if (ctx->nev==0) ctx->nev = PetscMin(20,pep->n);  /* nev not set, use default value */
239:   if (pep->n>10 && ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
240:   pep->ops->backtransform = PEPBackTransform_Skip;

242:   /* create spectrum slicing context and initialize it */
243:   PEPQSliceResetSR(pep);
244:   PetscNewLog(pep,&sr);
245:   ctx->sr   = sr;
246:   sr->itsKs = 0;
247:   sr->nleap = 0;
248:   sr->sPres = NULL;
249:   /* check presence of ends and finding direction */
250:   if (pep->inta > PETSC_MIN_REAL || pep->intb >= PETSC_MAX_REAL) {
251:     sr->int0 = pep->inta;
252:     sr->int1 = pep->intb;
253:     sr->dir = 1;
254:     if (pep->intb >= PETSC_MAX_REAL) { /* Right-open interval */
255:       sr->hasEnd = PETSC_FALSE;
256:     } else sr->hasEnd = PETSC_TRUE;
257:   } else {
258:     sr->int0 = pep->intb;
259:     sr->int1 = pep->inta;
260:     sr->dir = -1;
261:     sr->hasEnd = PetscNot(pep->inta <= PETSC_MIN_REAL);
262:   }

264:   PetscMalloc1(pep->nmat,&pep->solvematcoeffs);
265:   if (!pep->st) {PEPGetST(pep,&pep->st);}
266:   STSetTransform(pep->st,PETSC_FALSE);
267:   STSetUp(pep->st);

269:   /* compute inertia0 */
270:   ctx->hyperbolic = (pep->problem_type==PEP_HYPERBOLIC)? PETSC_TRUE: PETSC_FALSE;
271:   PEPQSliceGetInertia(pep,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL,ctx->hyperbolic?0:1);
272:   if (zeros && (sr->int0==pep->inta || sr->int0==pep->intb)) SETERRQ(((PetscObject)pep)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in the interval endpoint");

274:   /* compute inertia1 */
275:   PEPQSliceGetInertia(pep,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL,ctx->hyperbolic?0:1);
276:   if (zeros) SETERRQ(((PetscObject)pep)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
277:   if (!ctx->hyperbolic) {
278:     if (sr->dir*(sr->inertia1-sr->inertia0)<0) {
279:       sr->intcorr = -1;
280:       sr->inertia0 = 2*pep->n-sr->inertia0;
281:       sr->inertia1 = 2*pep->n-sr->inertia1;
282:     } else sr->intcorr = 1;
283:   } else {
284:     if (sr->inertia0<=pep->n && sr->inertia1<=pep->n) sr->intcorr = 1;
285:     else if (sr->inertia0>=pep->n && sr->inertia1>=pep->n) sr->intcorr = -1;
286:   }
287:   
288:   if (sr->hasEnd) {
289:     sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
290:     i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
291:   }

293:   /* number of eigenvalues in interval */
294:   sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
295:   PetscInfo3(pep,"QSlice setup: allocating for %D eigenvalues in [%g,%g]\n",sr->numEigs,(double)pep->inta,(double)pep->intb);
296:   if (sr->numEigs) {
297:     PEPQSliceAllocateSolution(pep);
298:     PEPSetDimensions_Default(pep,ctx->nev,&ctx->ncv,&ctx->mpd);
299:     pep->nev = ctx->nev; pep->ncv = ctx->ncv; pep->mpd = ctx->mpd;
300:     if (!pep->max_it) pep->max_it = 100;
301:     ld   = ctx->ncv+2;
302:     DSSetType(pep->ds,DSGHIEP);
303:     DSSetCompact(pep->ds,PETSC_TRUE);
304:     DSAllocate(pep->ds,ld);
305:     DSGetSlepcSC(pep->ds,&sc);
306:     sc->rg            = NULL;
307:     sc->comparison    = SlepcCompareLargestMagnitude;
308:     sc->comparisonctx = NULL;
309:     sc->map           = NULL;
310:     sc->mapobj        = NULL;
311:   }
312:   return(0);
313: }

315: /*
316:    Fills the fields of a shift structure
317: */
318: static PetscErrorCode PEPCreateShift(PEP pep,PetscReal val,PEP_shift neighb0,PEP_shift neighb1)
319: {
321:   PEP_shift      s,*pending2;
322:   PetscInt       i;
323:   PEP_SR         sr;
324:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;

327:   sr = ctx->sr;
328:   PetscNewLog(pep,&s);
329:   s->value = val;
330:   s->neighb[0] = neighb0;
331:   if (neighb0) neighb0->neighb[1] = s;
332:   s->neighb[1] = neighb1;
333:   if (neighb1) neighb1->neighb[0] = s;
334:   s->comp[0] = PETSC_FALSE;
335:   s->comp[1] = PETSC_FALSE;
336:   s->index = -1;
337:   s->neigs = 0;
338:   s->nconv[0] = s->nconv[1] = 0;
339:   s->nsch[0] = s->nsch[1]=0;
340:   /* Inserts in the stack of pending shifts */
341:   /* If needed, the array is resized */
342:   if (sr->nPend >= sr->maxPend) {
343:     sr->maxPend *= 2;
344:     PetscMalloc1(sr->maxPend,&pending2);
345:     PetscLogObjectMemory((PetscObject)pep,sizeof(PEP_shift));
346:     for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
347:     PetscFree(sr->pending);
348:     sr->pending = pending2;
349:   }
350:   sr->pending[sr->nPend++]=s;
351:   return(0);
352: }

354: /* Provides next shift to be computed */
355: static PetscErrorCode PEPExtractShift(PEP pep)
356: {
358:   PetscInt       iner,zeros=0;
359:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
360:   PEP_SR         sr;
361:   PetscReal      newShift,aux;
362:   PEP_shift      sPres;

365:   sr = ctx->sr;
366:   if (sr->nPend > 0) {
367:     if (sr->dirch) {
368:       aux = sr->int1; sr->int1 = sr->int0; sr->int0 = aux;
369:       iner = sr->inertia1; sr->inertia1 = sr->inertia0; sr->inertia0 = iner;
370:       sr->dir *= -1;
371:       PetscFree(sr->s0->neighb[1]);
372:       PetscFree(sr->s0);
373:       sr->nPend--;
374:       PEPCreateShift(pep,sr->int0,NULL,NULL);
375:       sr->sPrev = NULL;
376:       sr->sPres = sr->pending[--sr->nPend];
377:       pep->target = sr->sPres->value;
378:       sr->s0 = sr->sPres;
379:       pep->reason = PEP_CONVERGED_ITERATING;
380:     } else {
381:       sr->sPrev = sr->sPres;
382:       sr->sPres = sr->pending[--sr->nPend];
383:     }
384:     sPres = sr->sPres;
385:     PEPQSliceGetInertia(pep,sPres->value,&iner,ctx->detect?&zeros:NULL,sr->intcorr);
386:     if (zeros) {
387:       newShift = sPres->value*(1.0+SLICE_PTOL);
388:       if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
389:       else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
390:       PEPQSliceGetInertia(pep,newShift,&iner,&zeros,sr->intcorr);
391:       if (zeros) SETERRQ1(((PetscObject)pep)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
392:       sPres->value = newShift;
393:     }
394:     sr->sPres->inertia = iner;
395:     pep->target = sr->sPres->value;
396:     pep->reason = PEP_CONVERGED_ITERATING;
397:     pep->its = 0;
398:   } else sr->sPres = NULL;
399:   return(0);
400: }

402: /*
403:   Obtains value of subsequent shift
404: */
405: static PetscErrorCode PEPGetNewShiftValue(PEP pep,PetscInt side,PetscReal *newS)
406: {
407:   PetscReal lambda,d_prev;
408:   PetscInt  i,idxP;
409:   PEP_SR    sr;
410:   PEP_shift sPres,s;
411:   PEP_TOAR  *ctx=(PEP_TOAR*)pep->data;

414:   sr = ctx->sr;
415:   sPres = sr->sPres;
416:   if (sPres->neighb[side]) {
417:   /* Completing a previous interval */
418:     if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
419:       if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
420:       else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
421:     } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
422:   } else { /* (Only for side=1). Creating a new interval. */
423:     if (sPres->neigs==0) {/* No value has been accepted*/
424:       if (sPres->neighb[0]) {
425:         /* Multiplying by 10 the previous distance */
426:         *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
427:         sr->nleap++;
428:         /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
429:         if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Unable to compute the wanted eigenvalues with open interval");
430:       } else { /* First shift */
431:         if (pep->nconv != 0) {
432:           /* Unaccepted values give information for next shift */
433:           idxP=0;/* Number of values left from shift */
434:           for (i=0;i<pep->nconv;i++) {
435:             lambda = PetscRealPart(pep->eigr[i]);
436:             if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
437:             else break;
438:           }
439:           /* Avoiding subtraction of eigenvalues (might be the same).*/
440:           if (idxP>0) {
441:             d_prev = PetscAbsReal(sPres->value - PetscRealPart(pep->eigr[0]))/(idxP+0.3);
442:           } else {
443:             d_prev = PetscAbsReal(sPres->value - PetscRealPart(pep->eigr[pep->nconv-1]))/(pep->nconv+0.3);
444:           }
445:           *newS = sPres->value + ((sr->dir)*d_prev*pep->nev)/2;
446:           sr->dirch = PETSC_FALSE;
447:         } else { /* No values found, no information for next shift */
448:           if (!sr->dirch) {
449:             sr->dirch = PETSC_TRUE;
450:             *newS = sr->int1;
451:           } else SETERRQ(PetscObjectComm((PetscObject)pep),1,"First shift renders no information");
452:         }
453:       }
454:     } else { /* Accepted values found */
455:       sr->dirch = PETSC_FALSE;
456:       sr->nleap = 0;
457:       /* Average distance of values in previous subinterval */
458:       s = sPres->neighb[0];
459:       while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
460:         s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
461:       }
462:       if (s) {
463:         d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
464:       } else { /* First shift. Average distance obtained with values in this shift */
465:         /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
466:         if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(pep->tol)) {
467:           d_prev =  PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
468:         } else {
469:           d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
470:         }
471:       }
472:       /* Average distance is used for next shift by adding it to value on the right or to shift */
473:       if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
474:         *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(pep->nev))/2;
475:       } else { /* Last accepted value is on the left of shift. Adding to shift */
476:         *newS = sPres->value + ((sr->dir)*d_prev*(pep->nev))/2;
477:       }
478:     }
479:     /* End of interval can not be surpassed */
480:     if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
481:   }/* of neighb[side]==null */
482:   return(0);
483: }

485: /*
486:   Function for sorting an array of real values
487: */
488: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
489: {
490:   PetscReal re;
491:   PetscInt  i,j,tmp;

494:   if (!prev) for (i=0;i<nr;i++) perm[i] = i;
495:   /* Insertion sort */
496:   for (i=1;i<nr;i++) {
497:     re = PetscRealPart(r[perm[i]]);
498:     j = i-1;
499:     while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
500:       tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
501:     }
502:   }
503:   return(0);
504: }

506: /* Stores the pairs obtained since the last shift in the global arrays */
507: static PetscErrorCode PEPStoreEigenpairs(PEP pep)
508: {
510:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
511:   PetscReal      lambda,err,*errest;
512:   PetscInt       i,*aux,count=0,ndef,ld,nconv=pep->nconv,d=pep->nmat-1,idx;
513:   PetscBool      iscayley,divide=PETSC_FALSE;
514:   PEP_SR         sr = ctx->sr;
515:   PEP_shift      sPres;
516:   Vec            w;
517:   Mat            MS;
518:   BV             tV;
519:   PetscScalar    *S,*eigr,*tS;

522:   sPres = sr->sPres;
523:   sPres->index = sr->indexEig;

525:   if (nconv>sr->ndef0+sr->ndef1) {
526:     /* Back-transform */
527:     STBackTransform(pep->st,nconv,pep->eigr,pep->eigi);
528:     for (i=0;i<nconv;i++) {
529: #if defined(PETSC_USE_COMPLEX)
530:       if (PetscImaginaryPart(pep->eigr[i])) pep->eigr[i] = sr->int0-sr->dir;
531: #else
532:       if (pep->eigi[i]) pep->eigr[i] = sr->int0-sr->dir;
533: #endif
534:     }
535:     PetscObjectTypeCompare((PetscObject)pep->st,STCAYLEY,&iscayley);
536:     /* Sort eigenvalues */
537:     sortRealEigenvalues(pep->eigr,pep->perm,nconv,PETSC_FALSE,sr->dir);
538:     BVGetSizes(pep->V,NULL,NULL,&ld);
539:     BVTensorGetFactors(ctx->V,NULL,&MS);
540:     MatDenseGetArray(MS,&S);
541:     /* Values stored in global array */
542:     PetscCalloc4(nconv,&eigr,nconv,&errest,nconv*nconv*d,&tS,nconv,&aux);
543:     ndef = sr->ndef0+sr->ndef1;
544:     for (i=0;i<nconv;i++) {
545:       lambda = PetscRealPart(pep->eigr[pep->perm[i]]);
546:       err = pep->errest[pep->perm[i]];
547:       if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
548:         if (sr->indexEig+count-ndef>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Unexpected error in Spectrum Slicing");
549:         eigr[count] = lambda;
550:         errest[count] = err;
551:         if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) sPres->nconv[0]++;
552:         if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) sPres->nconv[1]++;
553:         PetscMemcpy(tS+count*(d*nconv),S+pep->perm[i]*(d*ld),nconv*sizeof(PetscScalar));
554:         PetscMemcpy(tS+count*(d*nconv)+nconv,S+pep->perm[i]*(d*ld)+ld,nconv*sizeof(PetscScalar));
555:         count++;
556:       }
557:     }
558:     for (i=0;i<count;i++) {
559:       PetscMemcpy(S+i*(d*ld),tS+i*nconv*d,nconv*sizeof(PetscScalar));
560:       PetscMemcpy(S+i*(d*ld)+ld,tS+i*nconv*d+nconv,nconv*sizeof(PetscScalar));
561:     }
562:     MatDenseRestoreArray(MS,&S);
563:     BVTensorRestoreFactors(ctx->V,NULL,&MS);
564:     BVSetActiveColumns(ctx->V,0,count);
565:     BVTensorCompress(ctx->V,count);
566:     if (sr->sPres->nconv[0] && sr->sPres->nconv[1]) {
567:       divide = PETSC_TRUE;
568:       BVTensorGetFactors(ctx->V,NULL,&MS);
569:       MatDenseGetArray(MS,&S);
570:       PetscMemzero(tS,nconv*nconv*d*sizeof(PetscScalar));
571:       for (i=0;i<count;i++) {
572:         PetscMemcpy(tS+i*nconv*d,S+i*(d*ld),count*sizeof(PetscScalar));
573:         PetscMemcpy(tS+i*nconv*d+nconv,S+i*(d*ld)+ld,count*sizeof(PetscScalar));
574:       }
575:       MatDenseRestoreArray(MS,&S);
576:       BVTensorRestoreFactors(ctx->V,NULL,&MS);
577:       BVSetActiveColumns(pep->V,0,count);
578:       BVDuplicateResize(pep->V,count,&tV);
579:       BVCopy(pep->V,tV);
580:     }
581:     if (sr->sPres->nconv[0]) {
582:       if (divide) {
583:         BVSetActiveColumns(ctx->V,0,sr->sPres->nconv[0]);
584:         BVTensorCompress(ctx->V,sr->sPres->nconv[0]);
585:       }
586:       for (i=0;i<sr->ndef0;i++) aux[i] = sr->idxDef0[i];
587:       for (i=sr->ndef0;i<sr->sPres->nconv[0];i++) aux[i] = sr->indexEig+i-sr->ndef0;
588:       BVTensorGetFactors(ctx->V,NULL,&MS);
589:       MatDenseGetArray(MS,&S);
590:       for (i=0;i<sr->sPres->nconv[0];i++) {
591:         sr->eigr[aux[i]] = eigr[i];
592:         sr->errest[aux[i]] = errest[i];
593:         BVGetColumn(pep->V,i,&w);
594:         BVInsertVec(sr->V,aux[i],w);
595:         BVRestoreColumn(pep->V,i,&w);
596:         idx = sr->ld*d*aux[i];
597:         PetscMemzero(sr->S+idx,sr->ld*d*sizeof(PetscScalar));
598:         PetscMemcpy(sr->S+idx,S+i*(ld*d),sr->sPres->nconv[0]*sizeof(PetscScalar));
599:         PetscMemcpy(sr->S+idx+sr->ld,S+i*(ld*d)+ld,sr->sPres->nconv[0]*sizeof(PetscScalar));
600:         PetscFree(sr->qinfo[aux[i]].q);
601:         PetscMalloc1(sr->sPres->nconv[0],&sr->qinfo[aux[i]].q);
602:         PetscMemcpy(sr->qinfo[aux[i]].q,aux,sr->sPres->nconv[0]*sizeof(PetscInt));
603:         sr->qinfo[aux[i]].nq = sr->sPres->nconv[0];
604:       }
605:       MatDenseRestoreArray(MS,&S);
606:       BVTensorRestoreFactors(ctx->V,NULL,&MS);
607:     }
608:   
609:     if (sr->sPres->nconv[1]) {
610:       if (divide) {
611:         BVTensorGetFactors(ctx->V,NULL,&MS);
612:         MatDenseGetArray(MS,&S);
613:         for (i=0;i<sr->sPres->nconv[1];i++) {
614:           PetscMemcpy(S+i*(d*ld),tS+(sr->sPres->nconv[0]+i)*nconv*d,count*sizeof(PetscScalar));
615:           PetscMemcpy(S+i*(d*ld)+ld,tS+(sr->sPres->nconv[0]+i)*nconv*d+nconv,count*sizeof(PetscScalar));
616:         }
617:         MatDenseRestoreArray(MS,&S);
618:         BVTensorRestoreFactors(ctx->V,NULL,&MS);
619:         BVSetActiveColumns(pep->V,0,count);
620:         BVCopy(tV,pep->V);
621:         BVSetActiveColumns(ctx->V,0,sr->sPres->nconv[1]);
622:         BVTensorCompress(ctx->V,sr->sPres->nconv[1]);
623:       }
624:       for (i=0;i<sr->ndef1;i++) aux[i] = sr->idxDef1[i];
625:       for (i=sr->ndef1;i<sr->sPres->nconv[1];i++) aux[i] = sr->indexEig+sr->sPres->nconv[0]-sr->ndef0+i-sr->ndef1;
626:       BVTensorGetFactors(ctx->V,NULL,&MS);
627:       MatDenseGetArray(MS,&S);
628:       for (i=0;i<sr->sPres->nconv[1];i++) {
629:         sr->eigr[aux[i]] = eigr[sr->sPres->nconv[0]+i];
630:         sr->errest[aux[i]] = errest[sr->sPres->nconv[0]+i];
631:         BVGetColumn(pep->V,i,&w);
632:         BVInsertVec(sr->V,aux[i],w);
633:         BVRestoreColumn(pep->V,i,&w);
634:         idx = sr->ld*d*aux[i];
635:         PetscMemzero(sr->S+idx,sr->ld*d*sizeof(PetscScalar));
636:         PetscMemcpy(sr->S+idx,S+i*(ld*d),sr->sPres->nconv[1]*sizeof(PetscScalar));
637:         PetscMemcpy(sr->S+idx+sr->ld,S+i*(ld*d)+ld,sr->sPres->nconv[1]*sizeof(PetscScalar));
638:         PetscFree(sr->qinfo[aux[i]].q);
639:         PetscMalloc1(sr->sPres->nconv[1],&sr->qinfo[aux[i]].q);
640:         PetscMemcpy(sr->qinfo[aux[i]].q,aux,sr->sPres->nconv[1]*sizeof(PetscInt));
641:         sr->qinfo[aux[i]].nq = sr->sPres->nconv[1];
642:       }
643:       MatDenseRestoreArray(MS,&S);
644:       BVTensorRestoreFactors(ctx->V,NULL,&MS);
645:     }
646:     sPres->neigs = count-sr->ndef0-sr->ndef1;
647:     sr->indexEig += sPres->neigs;
648:     sPres->nconv[0]-= sr->ndef0;
649:     sPres->nconv[1]-= sr->ndef1;
650:     PetscFree4(eigr,errest,tS,aux);
651:   } else {
652:     sPres->neigs = 0;
653:     sPres->nconv[0]= 0;
654:     sPres->nconv[1]= 0;
655:   }
656:   /* Global ordering array updating */
657:   sortRealEigenvalues(sr->eigr,sr->perm,sr->indexEig,PETSC_FALSE,sr->dir);
658:   /* Check for completion */
659:   sPres->comp[0] = PetscNot(sPres->nconv[0] < sPres->nsch[0]);
660:   sPres->comp[1] = PetscNot(sPres->nconv[1] < sPres->nsch[1]);
661:   if (sPres->nconv[0] > sPres->nsch[0] || sPres->nconv[1] > sPres->nsch[1]) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Mismatch between number of values found and information from inertia, consider using PEPKrylovSchurSetDetectZeros()");
662:   if (divide) { BVDestroy(&tV); }
663:   return(0);
664: }

666: static PetscErrorCode PEPLookForDeflation(PEP pep)
667: {
668:   PetscReal val;
669:   PetscInt  i,count0=0,count1=0;
670:   PEP_shift sPres;
671:   PetscInt  ini,fin;
672:   PEP_SR    sr;
673:   PEP_TOAR  *ctx=(PEP_TOAR*)pep->data;

676:   sr = ctx->sr;
677:   sPres = sr->sPres;

679:   if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
680:   else ini = 0;
681:   fin = sr->indexEig;
682:   /* Selection of ends for searching new values */
683:   if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
684:   else sPres->ext[0] = sPres->neighb[0]->value;
685:   if (!sPres->neighb[1]) {
686:     if (sr->hasEnd) sPres->ext[1] = sr->int1;
687:     else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
688:   } else sPres->ext[1] = sPres->neighb[1]->value;
689:   /* Selection of values between right and left ends */
690:   for (i=ini;i<fin;i++) {
691:     val=PetscRealPart(sr->eigr[sr->perm[i]]);
692:     /* Values to the right of left shift */
693:     if ((sr->dir)*(val - sPres->ext[1]) < 0) {
694:       if ((sr->dir)*(val - sPres->value) < 0) count0++;
695:       else count1++;
696:     } else break;
697:   }
698:   /* The number of values on each side are found */
699:   if (sPres->neighb[0]) {
700:     sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
701:     if (sPres->nsch[0]<0) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Mismatch between number of values found and information from inertia, consider using PEPSTOARSetDetectZeros()");
702:   } else sPres->nsch[0] = 0;

704:   if (sPres->neighb[1]) {
705:     sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
706:     if (sPres->nsch[1]<0) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Mismatch between number of values found and information from inertia, consider using PEPSTOARSetDetectZeros()");
707:   } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);

709:   /* Completing vector of indexes for deflation */
710:   for (i=0;i<count0;i++) sr->idxDef0[i] = sr->perm[ini+i];
711:   sr->ndef0 = count0;
712:   for (i=0;i<count1;i++) sr->idxDef1[i] = sr->perm[ini+count0+i];
713:   sr->ndef1 = count1;
714:   return(0);
715: }

717: /*
718:   Compute a run of Lanczos iterations
719: */
720: static PetscErrorCode PEPSTOARrun_QSlice(PEP pep,PetscReal *a,PetscReal *b,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,Vec *t_)
721: {
723:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
724:   PetscInt       i,j,m=*M,l,lock;
725:   PetscInt       lds,d,ld,offq,nqt;
726:   Vec            v=t_[0],t=t_[1],q=t_[2];
727:   PetscReal      norm,sym=0.0,fro=0.0,*f;
728:   PetscScalar    *y,*S,sigma;
729:   PetscBLASInt   j_,one=1;
730:   PetscBool      lindep;
731:   Mat            MS;

734:   PetscMalloc1(*M,&y);
735:   BVGetSizes(pep->V,NULL,NULL,&ld);
736:   BVTensorGetDegree(ctx->V,&d);
737:   BVGetActiveColumns(pep->V,&lock,&nqt);
738:   lds = d*ld;
739:   offq = ld;

741:   *breakdown = PETSC_FALSE; /* ----- */
742:   STGetShift(pep->st,&sigma);
743:   DSGetDimensions(pep->ds,NULL,NULL,&l,NULL,NULL);
744:   BVSetActiveColumns(ctx->V,0,m);
745:   BVSetActiveColumns(pep->V,0,nqt);
746:   for (j=k;j<m;j++) {
747:     /* apply operator */
748:     BVTensorGetFactors(ctx->V,NULL,&MS);
749:     MatDenseGetArray(MS,&S);
750:     BVGetColumn(pep->V,nqt,&t);
751:     BVMultVec(pep->V,1.0,0.0,v,S+j*lds);
752:     MatMult(pep->A[1],v,q);
753:     MatMult(pep->A[2],v,t);
754:     VecAXPY(q,sigma*pep->sfactor,t);
755:     VecScale(q,pep->sfactor);
756:     BVMultVec(pep->V,1.0,0.0,v,S+offq+j*lds);
757:     MatMult(pep->A[2],v,t);
758:     VecAXPY(q,pep->sfactor*pep->sfactor,t);
759:     STMatSolve(pep->st,q,t);
760:     VecScale(t,-1.0);
761:     BVRestoreColumn(pep->V,nqt,&t);

763:     /* orthogonalize */
764:     BVOrthogonalizeColumn(pep->V,nqt,S+(j+1)*lds,&norm,&lindep);
765:     if (!lindep) {
766:       *(S+(j+1)*lds+nqt) = norm;
767:       BVScaleColumn(pep->V,nqt,1.0/norm);
768:       nqt++;
769:     }
770:     for (i=0;i<nqt;i++) *(S+(j+1)*lds+offq+i) = *(S+j*lds+i)+sigma*(*(S+(j+1)*lds+i));
771:     BVSetActiveColumns(pep->V,0,nqt);
772:     MatDenseRestoreArray(MS,&S);
773:     BVTensorRestoreFactors(ctx->V,NULL,&MS);

775:     /* level-2 orthogonalization */
776:     BVOrthogonalizeColumn(ctx->V,j+1,y,&norm,&lindep);
777:     a[j] = PetscRealPart(y[j]);
778:     omega[j+1] = (norm > 0)?1.0:-1.0;
779:     BVScaleColumn(ctx->V,j+1,1.0/norm);
780:     b[j] = PetscAbsReal(norm);

782:     /* check symmetry */
783:     DSGetArrayReal(pep->ds,DS_MAT_T,&f);
784:     if (j==k) {
785:       for (i=l;i<j-1;i++) y[i] = PetscAbsScalar(y[i])-PetscAbsReal(f[2*ld+i]);
786:       for (i=0;i<l;i++) y[i] = 0.0;
787:     }
788:     DSRestoreArrayReal(pep->ds,DS_MAT_T,&f);
789:     if (j>0) y[j-1] = PetscAbsScalar(y[j-1])-PetscAbsReal(b[j-1]);
790:     PetscBLASIntCast(j,&j_);
791:     sym = SlepcAbs(BLASnrm2_(&j_,y,&one),sym);
792:     fro = SlepcAbs(fro,SlepcAbs(a[j],b[j]));
793:     if (j>0) fro = SlepcAbs(fro,b[j-1]);
794:     if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*pep->tol)) {
795:       *symmlost = PETSC_TRUE;
796:       *M=j;
797:       break;
798:     }
799:   }
800:   BVSetActiveColumns(pep->V,lock,nqt);
801:   BVSetActiveColumns(ctx->V,0,*M);
802:   PetscFree(y);
803:   return(0);
804: }

806: static PetscErrorCode PEPSTOAR_QSlice(PEP pep,Mat B)
807: {
809:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
810:   PetscInt       j,k,l,nv=0,ld,ldds,t,nq=0,idx;
811:   PetscInt       nconv=0,deg=pep->nmat-1,count0=0,count1=0;
812:   PetscScalar    *Q,*om,sigma,*back,*S,*pQ;
813:   PetscReal      beta,norm=1.0,*omega,*a,*b,*r,eta,lambda;
814:   PetscBool      breakdown,symmlost=PETSC_FALSE,sinv,falselock=PETSC_TRUE;
815:   Mat            MS,MQ;
816:   Vec            v,vomega;
817:   PEP_SR         sr;
818:   BVOrthogType   otype;
819:   BVOrthogBlockType obtype;

822:   PetscCitationsRegister(citation,&cited);

824:   /* Resize if needed for deflating vectors  */
825:   sr = ctx->sr;
826:   sigma = sr->sPres->value;
827:   k = sr->ndef0+sr->ndef1;
828:   pep->ncv = ctx->ncv+k;
829:   pep->nev = ctx->nev+k;
830:   PEPAllocateSolution(pep,3);
831:   BVDestroy(&ctx->V);
832:   BVCreateTensor(pep->V,pep->nmat-1,&ctx->V);
833:   BVGetOrthogonalization(pep->V,&otype,NULL,&eta,&obtype);
834:   BVSetOrthogonalization(ctx->V,otype,BV_ORTHOG_REFINE_ALWAYS,eta,obtype);
835:   DSAllocate(pep->ds,pep->ncv+2);
836:   PetscMalloc1(pep->ncv,&back);
837:   DSGetLeadingDimension(pep->ds,&ldds);
838:   BVSetMatrix(ctx->V,B,PETSC_TRUE);
839:   if (ctx->lock) {
840:     PetscOptionsGetBool(NULL,NULL,"-pep_stoar_falselocking",&falselock,NULL);
841:   } else SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"A locking variant is needed for spectrum slicing");
842:   PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
843:   RGPushScale(pep->rg,sinv?pep->sfactor:1.0/pep->sfactor);
844:   STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);

846:   /* Get the starting Arnoldi vector */
847:   BVSetActiveColumns(pep->V,0,1);
848:   BVTensorBuildFirstColumn(ctx->V,pep->nini);
849:   BVSetActiveColumns(ctx->V,0,1);
850:   if (k) {
851:     /* Insert deflated vectors */
852:     BVSetActiveColumns(pep->V,0,0);
853:     idx = sr->ndef0?sr->idxDef0[0]:sr->idxDef1[0];
854:     for (j=0;j<k;j++) {
855:       BVGetColumn(pep->V,j,&v);
856:       BVCopyVec(sr->V,sr->qinfo[idx].q[j],v);
857:       BVRestoreColumn(pep->V,j,&v);
858:     }
859:     /* Update innerproduct matrix */
860:     BVSetActiveColumns(ctx->V,0,0);
861:     BVTensorGetFactors(ctx->V,NULL,&MS);
862:     BVSetActiveColumns(pep->V,0,k);
863:     BVTensorRestoreFactors(ctx->V,NULL,&MS);

865:     BVGetSizes(pep->V,NULL,NULL,&ld);
866:     BVTensorGetFactors(ctx->V,NULL,&MS);
867:     MatDenseGetArray(MS,&S);
868:     for (j=0;j<sr->ndef0;j++) {
869:       PetscMemzero(S+j*ld*deg,ld*deg*sizeof(PetscScalar));
870:       PetscMemcpy(S+j*ld*deg,sr->S+sr->idxDef0[j]*sr->ld*deg,k*sizeof(PetscScalar));
871:       PetscMemcpy(S+j*ld*deg+ld,sr->S+sr->idxDef0[j]*sr->ld*deg+sr->ld,k*sizeof(PetscScalar));
872:       pep->eigr[j] = sr->eigr[sr->idxDef0[j]];
873:       pep->errest[j] = sr->errest[sr->idxDef0[j]];
874:     }
875:     for (j=0;j<sr->ndef1;j++) {
876:       PetscMemzero(S+(j+sr->ndef0)*ld*deg,ld*deg*sizeof(PetscScalar));
877:       PetscMemcpy(S+(j+sr->ndef0)*ld*deg,sr->S+sr->idxDef1[j]*sr->ld*deg,k*sizeof(PetscScalar));
878:       PetscMemcpy(S+(j+sr->ndef0)*ld*deg+ld,sr->S+sr->idxDef1[j]*sr->ld*deg+sr->ld,k*sizeof(PetscScalar));
879:       pep->eigr[j+sr->ndef0] = sr->eigr[sr->idxDef1[j]];
880:       pep->errest[j+sr->ndef0] = sr->errest[sr->idxDef1[j]];
881:     }
882:     MatDenseRestoreArray(MS,&S);
883:     BVTensorRestoreFactors(ctx->V,NULL,&MS);
884:     BVSetActiveColumns(ctx->V,0,k+1);
885:     VecCreateSeq(PETSC_COMM_SELF,k+1,&vomega);
886:     VecGetArray(vomega,&om);
887:     for (j=0;j<k;j++) {
888:       BVOrthogonalizeColumn(ctx->V,j,NULL,&norm,NULL);
889:       BVScaleColumn(ctx->V,j,1/norm);
890:       om[j] = (norm>=0.0)?1.0:-1.0;
891:     }
892:     BVTensorGetFactors(ctx->V,NULL,&MS);
893:     MatDenseGetArray(MS,&S);
894:     for (j=0;j<deg;j++) {
895:       BVSetRandomColumn(pep->V,k+j);
896:       BVOrthogonalizeColumn(pep->V,k+j,S+k*ld*deg+j*ld,&norm,NULL);
897:       BVScaleColumn(pep->V,k+j,1.0/norm);
898:       S[k*ld*deg+j*ld+k+j] = norm;
899:     }
900:     MatDenseRestoreArray(MS,&S);
901:     BVSetActiveColumns(pep->V,0,k+deg);
902:     BVTensorRestoreFactors(ctx->V,NULL,&MS);
903:     BVOrthogonalizeColumn(ctx->V,k,NULL,&norm,NULL);
904:     BVScaleColumn(ctx->V,k,1.0/norm);
905:     om[k] = (norm>=0.0)?1.0:-1.0;
906:     VecRestoreArray(vomega,&om);
907:     BVSetSignature(ctx->V,vomega);
908:     DSGetArrayReal(pep->ds,DS_MAT_T,&a);
909:     VecGetArray(vomega,&om);
910:     for (j=0;j<k;j++) a[j] = PetscRealPart(om[j]/(pep->eigr[j]-sigma));
911:     VecRestoreArray(vomega,&om);
912:     VecDestroy(&vomega);
913:     DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
914:     DSGetArray(pep->ds,DS_MAT_Q,&pQ);
915:     PetscMemzero(pQ,ldds*k*sizeof(PetscScalar));
916:     for (j=0;j<k;j++) pQ[j+j*ldds] = 1.0;
917:     DSRestoreArray(pep->ds,DS_MAT_Q,&pQ);
918:   }
919:   BVSetActiveColumns(ctx->V,0,k+1);
920:   DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
921:   VecCreateSeq(PETSC_COMM_SELF,k+1,&vomega);
922:   BVGetSignature(ctx->V,vomega);
923:   VecGetArray(vomega,&om);
924:   for (j=0;j<k+1;j++) omega[j] = PetscRealPart(om[j]);
925:   VecRestoreArray(vomega,&om);
926:   DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
927:   VecDestroy(&vomega);

929:   PetscInfo7(pep,"Start STOAR: sigma=%g in [%g,%g], for deflation: left=%D right=%D, searching: left=%D right=%D\n",(double)sr->sPres->value,(double)(sr->sPres->neighb[0]?sr->sPres->neighb[0]->value:sr->int0),(double)(sr->sPres->neighb[1]?sr->sPres->neighb[1]->value:sr->int1),sr->ndef0,sr->ndef1,sr->sPres->nsch[0],sr->sPres->nsch[1]);

931:   /* Restart loop */
932:   l = 0;
933:   pep->nconv = k;
934:   while (pep->reason == PEP_CONVERGED_ITERATING) {
935:     pep->its++;
936:     DSGetArrayReal(pep->ds,DS_MAT_T,&a);
937:     b = a+ldds;
938:     DSGetArrayReal(pep->ds,DS_MAT_D,&omega);

940:     /* Compute an nv-step Lanczos factorization */
941:     nv = PetscMin(pep->nconv+pep->mpd,pep->ncv);
942:     PEPSTOARrun_QSlice(pep,a,b,omega,pep->nconv+l,&nv,&breakdown,&symmlost,pep->work);
943:     beta = b[nv-1];
944:     if (symmlost && nv==pep->nconv+l) {
945:       pep->reason = PEP_DIVERGED_SYMMETRY_LOST;
946:       pep->nconv = nconv;
947:       PetscInfo2(pep,"Symmetry lost in STOAR sigma=%g nconv=%D)\n",(double)sr->sPres->value,nconv);
948:       if (falselock || !ctx->lock) {
949:         BVSetActiveColumns(ctx->V,0,pep->nconv);
950:         BVTensorCompress(ctx->V,0);
951:       }
952:       break;
953:     }
954:     DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
955:     DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
956:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
957:     if (l==0) {
958:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
959:     } else {
960:       DSSetState(pep->ds,DS_STATE_RAW);
961:     }

963:     /* Solve projected problem */
964:     DSSolve(pep->ds,pep->eigr,pep->eigi);
965:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
966:     DSSynchronize(pep->ds,pep->eigr,pep->eigi);

968:     /* Check convergence */
969:     /* PEPSTOARpreKConvergence(pep,nv,&norm,pep->work);*/
970:     norm = 1.0;
971:     DSGetDimensions(pep->ds,NULL,NULL,NULL,NULL,&t);
972:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,t-pep->nconv,PetscAbsReal(beta)*norm,&k);
973:     (*pep->stopping)(pep,pep->its,pep->max_it,k,pep->nev,&pep->reason,pep->stoppingctx);
974:     for (j=0;j<k;j++) back[j] = pep->eigr[j];
975:     STBackTransform(pep->st,k,back,pep->eigi);
976:     count0=count1=0;
977:     for (j=0;j<k;j++) {
978:       lambda = PetscRealPart(back[j]);
979:       if (((sr->dir)*(sr->sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sr->sPres->ext[0]) > 0)) count0++;
980:       if (((sr->dir)*(lambda - sr->sPres->value) > 0) && ((sr->dir)*(sr->sPres->ext[1] - lambda) > 0)) count1++;
981:     }
982:     if ((count0-sr->ndef0 >= sr->sPres->nsch[0]) && (count1-sr->ndef1 >= sr->sPres->nsch[1])) pep->reason = PEP_CONVERGED_TOL;
983:     /* Update l */
984:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
985:     else {
986:       l = PetscMax(1,(PetscInt)((nv-k)/2));
987:       l = PetscMin(l,t);
988:       if (!breakdown) {
989:         DSGetArrayReal(pep->ds,DS_MAT_T,&a);
990:         if (*(a+ldds+k+l-1)!=0) {
991:           if (k+l<nv-1) l = l+1;
992:           else l = l-1;
993:         }
994:         /* Prepare the Rayleigh quotient for restart */
995:         DSGetArray(pep->ds,DS_MAT_Q,&Q);
996:         DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
997:         r = a + 2*ldds;
998:         for (j=k;j<k+l;j++) {
999:           r[j] = PetscRealPart(Q[nv-1+j*ldds]*beta);
1000:         }
1001:         b = a+ldds;
1002:         b[k+l-1] = r[k+l-1];
1003:         omega[k+l] = omega[nv];
1004:         DSRestoreArray(pep->ds,DS_MAT_Q,&Q);
1005:         DSRestoreArrayReal(pep->ds,DS_MAT_T,&a);
1006:         DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);
1007:       }
1008:     }
1009:     nconv = k;
1010:     if (!ctx->lock && pep->reason == PEP_CONVERGED_ITERATING && !breakdown) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

1012:     /* Update S */
1013:     DSGetMat(pep->ds,DS_MAT_Q,&MQ);
1014:     BVMultInPlace(ctx->V,MQ,pep->nconv,k+l);
1015:     MatDestroy(&MQ);

1017:     /* Copy last column of S */
1018:     BVCopyColumn(ctx->V,nv,k+l);
1019:     DSGetArrayReal(pep->ds,DS_MAT_D,&omega);
1020:     VecCreateSeq(PETSC_COMM_SELF,k+l,&vomega);
1021:     VecGetArray(vomega,&om);
1022:     for (j=0;j<k+l;j++) om[j] = omega[j];
1023:     VecRestoreArray(vomega,&om);
1024:     BVSetActiveColumns(ctx->V,0,k+l);
1025:     BVSetSignature(ctx->V,vomega);
1026:     VecDestroy(&vomega);
1027:     DSRestoreArrayReal(pep->ds,DS_MAT_D,&omega);

1029:     if (breakdown && pep->reason == PEP_CONVERGED_ITERATING) {
1030:       /* stop if breakdown */
1031:       PetscInfo2(pep,"Breakdown TOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
1032:       pep->reason = PEP_DIVERGED_BREAKDOWN;
1033:     }
1034:     if (pep->reason != PEP_CONVERGED_ITERATING) l--;
1035:     BVGetActiveColumns(pep->V,NULL,&nq);
1036:     if (k+l+deg<=nq) {
1037:       BVSetActiveColumns(ctx->V,pep->nconv,k+l+1);
1038:       if (!falselock && ctx->lock) {
1039:         BVTensorCompress(ctx->V,k-pep->nconv);
1040:       } else {
1041:         BVTensorCompress(ctx->V,0);
1042:       }
1043:     }
1044:     pep->nconv = k;
1045:     PEPMonitor(pep,pep->its,nconv,pep->eigr,pep->eigi,pep->errest,nv);
1046:   }
1047:   sr->itsKs += pep->its;
1048:   if (pep->nconv>0) {
1049:     BVSetActiveColumns(ctx->V,0,pep->nconv);
1050:     BVGetActiveColumns(pep->V,NULL,&nq);
1051:     BVSetActiveColumns(pep->V,0,nq);
1052:     if (nq>pep->nconv) {
1053:       BVTensorCompress(ctx->V,pep->nconv);
1054:       BVSetActiveColumns(pep->V,0,pep->nconv);
1055:     }
1056:     for (j=0;j<pep->nconv;j++) {
1057:       pep->eigr[j] *= pep->sfactor;
1058:       pep->eigi[j] *= pep->sfactor;
1059:     }
1060:   }
1061:   PetscInfo4(pep,"Finished STOAR: nconv=%D (deflated=%D, left=%D, right=%D)\n",pep->nconv,sr->ndef0+sr->ndef1,count0-sr->ndef0,count1-sr->ndef1);
1062:   STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
1063:   RGPopScale(pep->rg);

1065:   if (pep->reason == PEP_DIVERGED_SYMMETRY_LOST && nconv<sr->ndef0+sr->ndef1) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Symmetry lost at sigma=%g",(double)sr->sPres->value);
1066:   if (pep->reason == PEP_DIVERGED_SYMMETRY_LOST && nconv==sr->ndef0+sr->ndef1) {
1067:     if (++sr->symmlost>10) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Symmetry lost at sigma=%g",(double)sr->sPres->value);
1068:   } else sr->symmlost = 0;

1070:   /* truncate Schur decomposition and change the state to raw so that
1071:      DSVectors() computes eigenvectors from scratch */
1072:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
1073:   DSSetState(pep->ds,DS_STATE_RAW);
1074:   PetscFree(back);
1075:   return(0);
1076: }

1078: #define SWAP(a,b,t) {t=a;a=b;b=t;}

1080: static PetscErrorCode PEPQSliceGetInertias(PEP pep,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
1081: {
1082:   PetscErrorCode  ierr;
1083:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
1084:   PEP_SR          sr=ctx->sr;
1085:   PetscInt        i=0,j,tmpi;
1086:   PetscReal       v,tmpr;
1087:   PEP_shift       s;

1090:   if (!pep->state) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_WRONGSTATE,"Must call PEPSetUp() first");
1091:   if (!sr) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see PEPSetInterval()");
1092:   if (!sr->s0) {  /* PEPSolve not called yet */
1093:     *n = 2;
1094:   } else {
1095:     *n = 1;
1096:     s = sr->s0;
1097:     while (s) {
1098:       (*n)++;
1099:       s = s->neighb[1];
1100:     }
1101:   }
1102:   PetscMalloc1(*n,shifts);
1103:   PetscMalloc1(*n,inertias);
1104:   if (!sr->s0) {  /* PEPSolve not called yet */
1105:     (*shifts)[0]   = sr->int0;
1106:     (*shifts)[1]   = sr->int1;
1107:     (*inertias)[0] = sr->inertia0;
1108:     (*inertias)[1] = sr->inertia1;
1109:   } else {
1110:     s = sr->s0;
1111:     while (s) {
1112:       (*shifts)[i]     = s->value;
1113:       (*inertias)[i++] = s->inertia;
1114:       s = s->neighb[1];
1115:     }
1116:     (*shifts)[i]   = sr->int1;
1117:     (*inertias)[i] = sr->inertia1;
1118:   }
1119:   /* remove possible duplicate in last position */
1120:   if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
1121:   /* sort result */
1122:   for (i=0;i<*n;i++) {
1123:     v = (*shifts)[i];
1124:     for (j=i+1;j<*n;j++) {
1125:       if (v > (*shifts)[j]) {
1126:         SWAP((*shifts)[i],(*shifts)[j],tmpr);
1127:         SWAP((*inertias)[i],(*inertias)[j],tmpi);
1128:         v = (*shifts)[i];
1129:       }
1130:     }
1131:   }
1132:   return(0);
1133: }

1135: PetscErrorCode PEPSolve_STOAR_QSlice(PEP pep)
1136: {
1138:   PetscInt       i,j,ti,deg=pep->nmat-1;
1139:   PetscReal      newS;
1140:   PEP_TOAR       *ctx=(PEP_TOAR*)pep->data;
1141:   PEP_SR         sr=ctx->sr;
1142:   Mat            S,B;
1143:   PetscScalar    *pS;

1146:   /* Only with eigenvalues present in the interval ...*/
1147:   if (sr->numEigs==0) {
1148:     pep->reason = PEP_CONVERGED_TOL;
1149:     return(0);
1150:   }

1152:   /* Inner product matrix */
1153:   PEPSTOARSetUpInnerMatrix(pep,&B);

1155:   /* Array of pending shifts */
1156:   sr->maxPend = 100; /* Initial size */
1157:   sr->nPend = 0;
1158:   PetscMalloc1(sr->maxPend,&sr->pending);
1159:   PetscLogObjectMemory((PetscObject)pep,(sr->maxPend)*sizeof(PEP_shift));
1160:   PEPCreateShift(pep,sr->int0,NULL,NULL);
1161:   /* extract first shift */
1162:   sr->sPrev = NULL;
1163:   sr->sPres = sr->pending[--sr->nPend];
1164:   sr->sPres->inertia = sr->inertia0;
1165:   pep->target = sr->sPres->value;
1166:   sr->s0 = sr->sPres;
1167:   sr->indexEig = 0;

1169:   /* Memory reservation for auxiliary variables */
1170:   PetscLogObjectMemory((PetscObject)pep,(sr->numEigs+2*pep->ncv)*sizeof(PetscScalar));
1171:   for (i=0;i<sr->numEigs;i++) {
1172:     sr->eigr[i]   = 0.0;
1173:     sr->eigi[i]   = 0.0;
1174:     sr->errest[i] = 0.0;
1175:     sr->perm[i]   = i;
1176:   }
1177:   /* Vectors for deflation */
1178:   PetscMalloc2(sr->numEigs,&sr->idxDef0,sr->numEigs,&sr->idxDef1);
1179:   PetscLogObjectMemory((PetscObject)pep,sr->numEigs*sizeof(PetscInt));
1180:   sr->indexEig = 0;
1181:   while (sr->sPres) {
1182:     /* Search for deflation */
1183:     PEPLookForDeflation(pep);
1184:     /* KrylovSchur */
1185:     PEPSTOAR_QSlice(pep,B);

1187:     PEPStoreEigenpairs(pep);
1188:     /* Select new shift */
1189:     if (!sr->sPres->comp[1]) {
1190:       PEPGetNewShiftValue(pep,1,&newS);
1191:       PEPCreateShift(pep,newS,sr->sPres,sr->sPres->neighb[1]);
1192:     }
1193:     if (!sr->sPres->comp[0]) {
1194:       /* Completing earlier interval */
1195:       PEPGetNewShiftValue(pep,0,&newS);
1196:       PEPCreateShift(pep,newS,sr->sPres->neighb[0],sr->sPres);
1197:     }
1198:     /* Preparing for a new search of values */
1199:     PEPExtractShift(pep);
1200:   }

1202:   /* Updating pep values prior to exit */
1203:   PetscFree2(sr->idxDef0,sr->idxDef1);
1204:   PetscFree(sr->pending);
1205:   pep->nconv  = sr->indexEig;
1206:   pep->reason = PEP_CONVERGED_TOL;
1207:   pep->its    = sr->itsKs;
1208:   pep->nev    = sr->indexEig;
1209:   MatCreateSeqDense(PETSC_COMM_SELF,pep->nconv,pep->nconv,NULL,&S);
1210:   MatDenseGetArray(S,&pS);
1211:   for (i=0;i<pep->nconv;i++) {
1212:     for (j=0;j<sr->qinfo[i].nq;j++) pS[i*pep->nconv+sr->qinfo[i].q[j]] = *(sr->S+i*sr->ld*deg+j);
1213:   }
1214:   MatDenseRestoreArray(S,&pS);
1215:   BVSetActiveColumns(sr->V,0,pep->nconv);
1216:   BVMultInPlace(sr->V,S,0,pep->nconv);
1217:   MatDestroy(&S);
1218:   BVDestroy(&pep->V);
1219:   pep->V = sr->V;
1220:   PetscFree4(pep->eigr,pep->eigi,pep->errest,pep->perm);
1221:   pep->eigr   = sr->eigr;
1222:   pep->eigi   = sr->eigi;
1223:   pep->perm   = sr->perm;
1224:   pep->errest = sr->errest;
1225:   if (sr->dir<0) {
1226:     for (i=0;i<pep->nconv/2;i++) {
1227:       ti = sr->perm[i]; sr->perm[i] = sr->perm[pep->nconv-1-i]; sr->perm[pep->nconv-1-i] = ti;
1228:     }
1229:   }
1230:   PetscFree(ctx->inertias);
1231:   PetscFree(ctx->shifts);
1232:   MatDestroy(&B);
1233:   PEPQSliceGetInertias(pep,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1234:   return(0);
1235: }