Actual source code: ks-slice.c
slepc-3.12.1 2019-11-08
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
15: References:
17: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
18: solving sparse symmetric generalized eigenproblems", SIAM J.
19: Matrix Anal. Appl. 15(1):228-272, 1994.
21: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
22: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
23: 2012.
24: */
26: #include <slepc/private/epsimpl.h>
27: #include "krylovschur.h"
29: static PetscBool cited = PETSC_FALSE;
30: static const char citation[] =
31: "@Article{slepc-slice,\n"
32: " author = \"C. Campos and J. E. Roman\",\n"
33: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
34: " journal = \"Numer. Algorithms\",\n"
35: " volume = \"60\",\n"
36: " number = \"2\",\n"
37: " pages = \"279--295\",\n"
38: " year = \"2012,\"\n"
39: " doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
40: "}\n";
42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: #define InertiaMismatch(h,d) \
45: do { \
46: SETERRQ1(PetscObjectComm((PetscObject)h),1,"Mismatch between number of values found and information from inertia%s",d?"":", consider using EPSKrylovSchurSetDetectZeros()"); \
47: } while (0)
49: static PetscErrorCode EPSSliceResetSR(EPS eps)
50: {
51: PetscErrorCode ierr;
52: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
53: EPS_SR sr=ctx->sr;
54: EPS_shift s;
57: if (sr) {
58: if (ctx->npart>1) {
59: BVDestroy(&sr->V);
60: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
61: }
62: /* Reviewing list of shifts to free memory */
63: s = sr->s0;
64: if (s) {
65: while (s->neighb[1]) {
66: s = s->neighb[1];
67: PetscFree(s->neighb[0]);
68: }
69: PetscFree(s);
70: }
71: PetscFree(sr);
72: }
73: ctx->sr = NULL;
74: return(0);
75: }
77: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
78: {
79: PetscErrorCode ierr;
80: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
83: if (!ctx->global) return(0);
84: /* Destroy auxiliary EPS */
85: EPSSliceResetSR(ctx->eps);
86: EPSDestroy(&ctx->eps);
87: if (ctx->npart>1) {
88: PetscSubcommDestroy(&ctx->subc);
89: if (ctx->commset) {
90: MPI_Comm_free(&ctx->commrank);
91: ctx->commset = PETSC_FALSE;
92: }
93: }
94: PetscFree(ctx->subintervals);
95: PetscFree(ctx->nconv_loc);
96: EPSSliceResetSR(eps);
97: PetscFree(ctx->inertias);
98: PetscFree(ctx->shifts);
99: if (ctx->npart>1) {
100: ISDestroy(&ctx->isrow);
101: ISDestroy(&ctx->iscol);
102: MatDestroyMatrices(1,&ctx->submata);
103: MatDestroyMatrices(1,&ctx->submatb);
104: }
105: return(0);
106: }
108: /*
109: EPSSliceAllocateSolution - Allocate memory storage for common variables such
110: as eigenvalues and eigenvectors. The argument extra is used for methods
111: that require a working basis slightly larger than ncv.
112: */
113: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
114: {
115: PetscErrorCode ierr;
116: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
117: PetscReal eta;
118: PetscInt k;
119: PetscLogDouble cnt;
120: BVType type;
121: BVOrthogType orthog_type;
122: BVOrthogRefineType orthog_ref;
123: BVOrthogBlockType ob_type;
124: Mat matrix;
125: Vec t;
126: EPS_SR sr = ctx->sr;
129: /* allocate space for eigenvalues and friends */
130: k = PetscMax(1,sr->numEigs);
131: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
132: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
133: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
134: PetscLogObjectMemory((PetscObject)eps,cnt);
136: /* allocate sr->V and transfer options from eps->V */
137: BVDestroy(&sr->V);
138: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
139: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
140: if (!eps->V) { EPSGetBV(eps,&eps->V); }
141: if (!((PetscObject)(eps->V))->type_name) {
142: BVSetType(sr->V,BVSVEC);
143: } else {
144: BVGetType(eps->V,&type);
145: BVSetType(sr->V,type);
146: }
147: STMatCreateVecsEmpty(eps->st,&t,NULL);
148: BVSetSizesFromVec(sr->V,t,k);
149: VecDestroy(&t);
150: EPS_SetInnerProduct(eps);
151: BVGetMatrix(eps->V,&matrix,NULL);
152: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
153: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
154: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
155: return(0);
156: }
158: static PetscErrorCode EPSSliceGetEPS(EPS eps)
159: {
160: PetscErrorCode ierr;
161: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
162: BV V;
163: BVType type;
164: PetscReal eta;
165: BVOrthogType orthog_type;
166: BVOrthogRefineType orthog_ref;
167: BVOrthogBlockType ob_type;
168: Mat A,B=NULL,Ar=NULL,Br=NULL;
169: PetscInt i;
170: PetscReal h,a,b,zero;
171: PetscMPIInt rank;
172: EPS_SR sr=ctx->sr;
173: PC pc;
174: PCType pctype;
175: KSP ksp;
176: KSPType ksptype;
177: STType sttype;
178: PetscObjectState Astate,Bstate=0;
179: PetscObjectId Aid,Bid=0;
180: MatSolverType stype;
183: EPSGetOperators(eps,&A,&B);
184: if (ctx->npart==1) {
185: if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
186: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
187: EPSSetOperators(ctx->eps,A,B);
188: a = eps->inta; b = eps->intb;
189: } else {
190: PetscObjectStateGet((PetscObject)A,&Astate);
191: PetscObjectGetId((PetscObject)A,&Aid);
192: if (B) {
193: PetscObjectStateGet((PetscObject)B,&Bstate);
194: PetscObjectGetId((PetscObject)B,&Bid);
195: }
196: if (!ctx->subc) {
197: /* Create context for subcommunicators */
198: PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
199: PetscSubcommSetNumber(ctx->subc,ctx->npart);
200: PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
201: PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
203: /* Duplicate matrices */
204: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
205: ctx->Astate = Astate;
206: ctx->Aid = Aid;
207: if (B) {
208: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
209: ctx->Bstate = Bstate;
210: ctx->Bid = Bid;
211: }
212: } else {
213: if (ctx->Astate != Astate || (B && ctx->Bstate != Bstate) || ctx->Aid != Aid || (B && ctx->Bid != Bid)) {
214: EPSGetOperators(ctx->eps,&Ar,&Br);
215: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
216: ctx->Astate = Astate;
217: ctx->Aid = Aid;
218: if (B) {
219: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
220: ctx->Bstate = Bstate;
221: ctx->Bid = Bid;
222: }
223: EPSSetOperators(ctx->eps,Ar,Br);
224: MatDestroy(&Ar);
225: MatDestroy(&Br);
226: }
227: }
229: /* Determine subintervals */
230: if (!ctx->subintset) { /* uniform distribution if no set by user */
231: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
232: h = (eps->intb-eps->inta)/ctx->npart;
233: a = eps->inta+ctx->subc->color*h;
234: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
235: PetscFree(ctx->subintervals);
236: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
237: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
238: ctx->subintervals[ctx->npart] = eps->intb;
239: } else {
240: a = ctx->subintervals[ctx->subc->color];
241: b = ctx->subintervals[ctx->subc->color+1];
242: }
244: if (!ctx->eps) {
245: /* Create auxiliary EPS */
246: EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
247: EPSSetOperators(ctx->eps,Ar,Br);
248: MatDestroy(&Ar);
249: MatDestroy(&Br);
250: }
252: /* Create subcommunicator grouping processes with same rank */
253: if (ctx->commset) { MPI_Comm_free(&ctx->commrank); }
254: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
255: MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
256: ctx->commset = PETSC_TRUE;
257: }
258: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
260: /* Transfer options for ST, KSP and PC */
261: STGetType(eps->st,&sttype);
262: STSetType(ctx->eps->st,sttype);
263: STGetKSP(eps->st,&ksp);
264: KSPGetType(ksp,&ksptype);
265: KSPGetPC(ksp,&pc);
266: PCGetType(pc,&pctype);
267: PCFactorGetMatSolverType(pc,&stype);
268: PCFactorGetZeroPivot(pc,&zero);
269: STGetKSP(ctx->eps->st,&ksp);
270: KSPSetType(ksp,ksptype);
271: KSPGetPC(ksp,&pc);
272: PCSetType(pc,pctype);
273: PCFactorSetZeroPivot(pc,zero);
274: if (stype) { PCFactorSetMatSolverType(pc,stype); }
276: EPSSetConvergenceTest(ctx->eps,eps->conv);
277: EPSSetInterval(ctx->eps,a,b);
278: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
279: ctx_local->npart = ctx->npart;
280: ctx_local->detect = ctx->detect;
281: ctx_local->global = PETSC_FALSE;
282: ctx_local->eps = eps;
283: ctx_local->subc = ctx->subc;
284: ctx_local->commrank = ctx->commrank;
286: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
287: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
289: /* transfer options from eps->V */
290: EPSGetBV(ctx->eps,&V);
291: if (!eps->V) { EPSGetBV(eps,&eps->V); }
292: if (!((PetscObject)(eps->V))->type_name) {
293: BVSetType(V,BVSVEC);
294: } else {
295: BVGetType(eps->V,&type);
296: BVSetType(V,type);
297: }
298: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
299: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
300: ctx->eps->which = eps->which;
301: ctx->eps->max_it = eps->max_it;
302: ctx->eps->tol = eps->tol;
303: ctx->eps->purify = eps->purify;
304: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
305: EPSSetProblemType(ctx->eps,eps->problem_type);
306: EPSSetUp(ctx->eps);
307: ctx->eps->nconv = 0;
308: ctx->eps->its = 0;
309: for (i=0;i<ctx->eps->ncv;i++) {
310: ctx->eps->eigr[i] = 0.0;
311: ctx->eps->eigi[i] = 0.0;
312: ctx->eps->errest[i] = 0.0;
313: }
314: return(0);
315: }
317: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
318: {
320: KSP ksp;
321: PC pc;
322: Mat F;
323: PetscReal nzshift;
326: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
327: if (inertia) *inertia = eps->n;
328: } else if (shift <= PETSC_MIN_REAL) {
329: if (inertia) *inertia = 0;
330: if (zeros) *zeros = 0;
331: } else {
332: /* If the shift is zero, perturb it to a very small positive value.
333: The goal is that the nonzero pattern is the same in all cases and reuse
334: the symbolic factorizations */
335: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
336: STSetShift(eps->st,nzshift);
337: STSetUp(eps->st);
338: STGetKSP(eps->st,&ksp);
339: KSPGetPC(ksp,&pc);
340: PCFactorGetMatrix(pc,&F);
341: MatGetInertia(F,inertia,zeros,NULL);
342: }
343: return(0);
344: }
346: /*
347: Dummy backtransform operation
348: */
349: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
350: {
352: return(0);
353: }
355: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
356: {
357: PetscErrorCode ierr;
358: PetscBool issinv;
359: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
360: EPS_SR sr,sr_loc,sr_glob;
361: PetscInt nEigs,dssz=1,i,zeros=0,off=0,method;
362: PetscMPIInt nproc,rank=0,aux;
363: PetscReal r;
364: MPI_Request req;
365: Mat A,B=NULL;
366: SlepcSC sc;
367: PetscInt flg=0;
368: DSParallelType ptype;
371: if (ctx->global) {
372: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
373: if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
374: if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
375: PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
376: if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
377: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2; /* use tighter tolerance */
378: if (!eps->max_it) eps->max_it = 100;
379: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
380: if (eps->n>10 && ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
381: }
382: eps->ops->backtransform = EPSBackTransform_Skip;
384: /* create spectrum slicing context and initialize it */
385: EPSSliceResetSR(eps);
386: PetscNewLog(eps,&sr);
387: ctx->sr = sr;
388: sr->itsKs = 0;
389: sr->nleap = 0;
390: sr->nMAXCompl = eps->nev/4;
391: sr->iterCompl = eps->max_it/4;
392: sr->sPres = NULL;
393: sr->nS = 0;
395: if (ctx->npart==1 || ctx->global) {
396: /* check presence of ends and finding direction */
397: if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
398: sr->int0 = eps->inta;
399: sr->int1 = eps->intb;
400: sr->dir = 1;
401: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
402: sr->hasEnd = PETSC_FALSE;
403: } else sr->hasEnd = PETSC_TRUE;
404: } else {
405: sr->int0 = eps->intb;
406: sr->int1 = eps->inta;
407: sr->dir = -1;
408: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
409: }
410: }
411: if (ctx->global) {
412: /* prevent computation of factorization in global eps */
413: STSetTransform(eps->st,PETSC_FALSE);
414: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
415: /* create subintervals and initialize auxiliary eps for slicing runs */
416: EPSSliceGetEPS(eps);
417: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
418: if (ctx->npart>1) {
419: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
420: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
421: if (!rank) {
422: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
423: }
424: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
425: PetscFree(ctx->nconv_loc);
426: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
427: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
428: if (sr->dir<0) off = 1;
429: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
430: PetscMPIIntCast(sr_loc->numEigs,&aux);
431: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
432: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
433: } else {
434: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
435: if (!rank) {
436: PetscMPIIntCast(sr_loc->numEigs,&aux);
437: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
438: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
439: }
440: PetscMPIIntCast(ctx->npart,&aux);
441: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
442: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
443: }
444: nEigs = 0;
445: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
446: } else {
447: nEigs = sr_loc->numEigs;
448: sr->inertia0 = sr_loc->inertia0;
449: sr->dir = sr_loc->dir;
450: }
451: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
452: sr->numEigs = nEigs;
453: eps->nev = nEigs;
454: eps->ncv = nEigs;
455: eps->mpd = nEigs;
456: } else {
457: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
458: sr_glob = ctx_glob->sr;
459: if (ctx->npart>1) {
460: sr->dir = sr_glob->dir;
461: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
462: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
463: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
464: else sr->hasEnd = PETSC_TRUE;
465: }
467: /* compute inertia0 */
468: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
469: PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&flg,NULL);
470: if (zeros) { /* error in factorization */
471: if (sr->int0==ctx->eps->inta || sr->int0==ctx->eps->intb) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in the interval endpoint");
472: else if (ctx_glob->subintset && !flg) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
473: else {
474: if (flg==1) { /* idle subgroup */
475: sr->inertia0 = -1;
476: } else { /* perturb shift */
477: sr->int0 *= (1.0+SLICE_PTOL);
478: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
479: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
480: }
481: }
482: }
483: if (ctx->npart>1) {
484: /* inertia1 is received from neighbour */
485: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
486: if (!rank) {
487: if (sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1))) { /* send inertia0 to neighbour0 */
488: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
489: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
490: }
491: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
492: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
493: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
494: }
495: if (sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
496: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
497: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
498: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
499: }
500: }
501: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
502: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
503: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
504: } else sr_glob->inertia1 = sr->inertia1;
505: }
507: /* last process in eps comm computes inertia1 */
508: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
509: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
510: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
511: if (!rank && sr->inertia0==-1) {
512: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
513: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
514: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
515: }
516: if (sr->hasEnd) {
517: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
518: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
519: }
520: }
522: /* number of eigenvalues in interval */
523: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
524: if (ctx->npart>1) {
525: /* memory allocate for subinterval eigenpairs */
526: EPSSliceAllocateSolution(eps,1);
527: }
528: dssz = eps->ncv+1;
529: if (sr->numEigs>0) {
530: DSGetSlepcSC(eps->ds,&sc);
531: sc->rg = NULL;
532: sc->comparison = SlepcCompareLargestMagnitude;
533: sc->comparisonctx = NULL;
534: sc->map = NULL;
535: sc->mapobj = NULL;
536: }
537: DSGetParallel(ctx->eps->ds,&ptype);
538: DSSetParallel(eps->ds,ptype);
539: DSGetMethod(ctx->eps->ds,&method);
540: DSSetMethod(eps->ds,method);
541: }
542: DSSetType(eps->ds,DSHEP);
543: DSSetCompact(eps->ds,PETSC_TRUE);
544: DSAllocate(eps->ds,dssz);
545: /* keep state of subcomm matrices to check that the user does not modify them */
546: EPSGetOperators(eps,&A,&B);
547: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
548: PetscObjectGetId((PetscObject)A,&ctx->Aid);
549: if (B) {
550: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
551: PetscObjectGetId((PetscObject)B,&ctx->Bid);
552: } else {
553: ctx->Bstate=0;
554: ctx->Bid=0;
555: }
556: return(0);
557: }
559: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
560: {
561: PetscErrorCode ierr;
562: Vec v,vg,v_loc;
563: IS is1,is2;
564: VecScatter vec_sc;
565: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
566: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
567: PetscScalar *array;
568: EPS_SR sr_loc;
569: BV V_loc;
572: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
573: V_loc = sr_loc->V;
575: /* Gather parallel eigenvectors */
576: BVGetColumn(eps->V,0,&v);
577: VecGetOwnershipRange(v,&n0,&m0);
578: BVRestoreColumn(eps->V,0,&v);
579: BVGetColumn(ctx->eps->V,0,&v);
580: VecGetLocalSize(v,&nloc);
581: BVRestoreColumn(ctx->eps->V,0,&v);
582: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
583: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
584: idx = -1;
585: for (si=0;si<ctx->npart;si++) {
586: j = 0;
587: for (i=n0;i<m0;i++) {
588: idx1[j] = i;
589: idx2[j++] = i+eps->n*si;
590: }
591: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
592: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
593: BVGetColumn(eps->V,0,&v);
594: VecScatterCreate(v,is1,vg,is2,&vec_sc);
595: BVRestoreColumn(eps->V,0,&v);
596: ISDestroy(&is1);
597: ISDestroy(&is2);
598: for (i=0;i<ctx->nconv_loc[si];i++) {
599: BVGetColumn(eps->V,++idx,&v);
600: if (ctx->subc->color==si) {
601: BVGetColumn(V_loc,i,&v_loc);
602: VecGetArray(v_loc,&array);
603: VecPlaceArray(vg,array);
604: }
605: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
606: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
607: if (ctx->subc->color==si) {
608: VecResetArray(vg);
609: VecRestoreArray(v_loc,&array);
610: BVRestoreColumn(V_loc,i,&v_loc);
611: }
612: BVRestoreColumn(eps->V,idx,&v);
613: }
614: VecScatterDestroy(&vec_sc);
615: }
616: PetscFree2(idx1,idx2);
617: VecDestroy(&vg);
618: return(0);
619: }
621: /*
622: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
623: */
624: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
625: {
626: PetscErrorCode ierr;
627: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
630: if (ctx->global && ctx->npart>1) {
631: EPSComputeVectors(ctx->eps);
632: EPSSliceGatherEigenVectors(eps);
633: }
634: return(0);
635: }
637: #define SWAP(a,b,t) {t=a;a=b;b=t;}
639: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
640: {
641: PetscErrorCode ierr;
642: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
643: PetscInt i=0,j,tmpi;
644: PetscReal v,tmpr;
645: EPS_shift s;
648: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
649: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
650: if (!ctx->sr->s0) { /* EPSSolve not called yet */
651: *n = 2;
652: } else {
653: *n = 1;
654: s = ctx->sr->s0;
655: while (s) {
656: (*n)++;
657: s = s->neighb[1];
658: }
659: }
660: PetscMalloc1(*n,shifts);
661: PetscMalloc1(*n,inertias);
662: if (!ctx->sr->s0) { /* EPSSolve not called yet */
663: (*shifts)[0] = ctx->sr->int0;
664: (*shifts)[1] = ctx->sr->int1;
665: (*inertias)[0] = ctx->sr->inertia0;
666: (*inertias)[1] = ctx->sr->inertia1;
667: } else {
668: s = ctx->sr->s0;
669: while (s) {
670: (*shifts)[i] = s->value;
671: (*inertias)[i++] = s->inertia;
672: s = s->neighb[1];
673: }
674: (*shifts)[i] = ctx->sr->int1;
675: (*inertias)[i] = ctx->sr->inertia1;
676: }
677: /* remove possible duplicate in last position */
678: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
679: /* sort result */
680: for (i=0;i<*n;i++) {
681: v = (*shifts)[i];
682: for (j=i+1;j<*n;j++) {
683: if (v > (*shifts)[j]) {
684: SWAP((*shifts)[i],(*shifts)[j],tmpr);
685: SWAP((*inertias)[i],(*inertias)[j],tmpi);
686: v = (*shifts)[i];
687: }
688: }
689: }
690: return(0);
691: }
693: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
694: {
695: PetscErrorCode ierr;
696: PetscMPIInt rank,nproc;
697: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
698: PetscInt i,idx,j;
699: PetscInt *perm_loc,off=0,*inertias_loc,ns;
700: PetscScalar *eigr_loc;
701: EPS_SR sr_loc;
702: PetscReal *shifts_loc;
703: PetscMPIInt *disp,*ns_loc,aux;
706: eps->nconv = 0;
707: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
708: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
710: /* Gather the shifts used and the inertias computed */
711: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
712: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
713: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
714: ns--;
715: for (i=0;i<ns;i++) {
716: inertias_loc[i] = inertias_loc[i+1];
717: shifts_loc[i] = shifts_loc[i+1];
718: }
719: }
720: PetscMalloc1(ctx->npart,&ns_loc);
721: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
722: PetscMPIIntCast(ns,&aux);
723: if (!rank) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
724: PetscMPIIntCast(ctx->npart,&aux);
725: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
726: ctx->nshifts = 0;
727: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
728: PetscFree(ctx->inertias);
729: PetscFree(ctx->shifts);
730: PetscMalloc1(ctx->nshifts,&ctx->inertias);
731: PetscMalloc1(ctx->nshifts,&ctx->shifts);
733: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
734: eigr_loc = sr_loc->eigr;
735: perm_loc = sr_loc->perm;
736: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
737: PetscMalloc1(ctx->npart,&disp);
738: disp[0] = 0;
739: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
740: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
741: PetscMPIIntCast(sr_loc->numEigs,&aux);
742: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
743: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
744: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
745: PetscMPIIntCast(ns,&aux);
746: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
747: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
748: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
749: } else { /* subcommunicators with different size */
750: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
751: if (!rank) {
752: PetscMPIIntCast(sr_loc->numEigs,&aux);
753: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
754: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
755: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
756: PetscMPIIntCast(ns,&aux);
757: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
758: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
759: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
760: }
761: PetscMPIIntCast(eps->nconv,&aux);
762: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
763: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
764: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
765: PetscMPIIntCast(ctx->nshifts,&aux);
766: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
767: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
768: }
769: /* Update global array eps->perm */
770: idx = ctx->nconv_loc[0];
771: for (i=1;i<ctx->npart;i++) {
772: off += ctx->nconv_loc[i-1];
773: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
774: }
776: /* Gather parallel eigenvectors */
777: PetscFree(ns_loc);
778: PetscFree(disp);
779: PetscFree(shifts_loc);
780: PetscFree(inertias_loc);
781: return(0);
782: }
784: /*
785: Fills the fields of a shift structure
786: */
787: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
788: {
789: PetscErrorCode ierr;
790: EPS_shift s,*pending2;
791: PetscInt i;
792: EPS_SR sr;
793: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
796: sr = ctx->sr;
797: PetscNewLog(eps,&s);
798: s->value = val;
799: s->neighb[0] = neighb0;
800: if (neighb0) neighb0->neighb[1] = s;
801: s->neighb[1] = neighb1;
802: if (neighb1) neighb1->neighb[0] = s;
803: s->comp[0] = PETSC_FALSE;
804: s->comp[1] = PETSC_FALSE;
805: s->index = -1;
806: s->neigs = 0;
807: s->nconv[0] = s->nconv[1] = 0;
808: s->nsch[0] = s->nsch[1]=0;
809: /* Inserts in the stack of pending shifts */
810: /* If needed, the array is resized */
811: if (sr->nPend >= sr->maxPend) {
812: sr->maxPend *= 2;
813: PetscMalloc1(sr->maxPend,&pending2);
814: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
815: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
816: PetscFree(sr->pending);
817: sr->pending = pending2;
818: }
819: sr->pending[sr->nPend++]=s;
820: return(0);
821: }
823: /* Prepare for Rational Krylov update */
824: static PetscErrorCode EPSPrepareRational(EPS eps)
825: {
826: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
827: PetscErrorCode ierr;
828: PetscInt dir,i,k,ld,nv;
829: PetscScalar *A;
830: EPS_SR sr = ctx->sr;
831: Vec v;
834: DSGetLeadingDimension(eps->ds,&ld);
835: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
836: dir*=sr->dir;
837: k = 0;
838: for (i=0;i<sr->nS;i++) {
839: if (dir*PetscRealPart(sr->S[i])>0.0) {
840: sr->S[k] = sr->S[i];
841: sr->S[sr->nS+k] = sr->S[sr->nS+i];
842: BVGetColumn(sr->Vnext,k,&v);
843: BVCopyVec(eps->V,eps->nconv+i,v);
844: BVRestoreColumn(sr->Vnext,k,&v);
845: k++;
846: if (k>=sr->nS/2)break;
847: }
848: }
849: /* Copy to DS */
850: DSGetArray(eps->ds,DS_MAT_A,&A);
851: PetscArrayzero(A,ld*ld);
852: for (i=0;i<k;i++) {
853: A[i*(1+ld)] = sr->S[i];
854: A[k+i*ld] = sr->S[sr->nS+i];
855: }
856: sr->nS = k;
857: DSRestoreArray(eps->ds,DS_MAT_A,&A);
858: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
859: DSSetDimensions(eps->ds,nv,0,0,k);
860: /* Append u to V */
861: BVGetColumn(sr->Vnext,sr->nS,&v);
862: BVCopyVec(eps->V,sr->nv,v);
863: BVRestoreColumn(sr->Vnext,sr->nS,&v);
864: return(0);
865: }
867: /* Provides next shift to be computed */
868: static PetscErrorCode EPSExtractShift(EPS eps)
869: {
870: PetscErrorCode ierr;
871: PetscInt iner,zeros=0;
872: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
873: EPS_SR sr;
874: PetscReal newShift;
875: EPS_shift sPres;
878: sr = ctx->sr;
879: if (sr->nPend > 0) {
880: sr->sPrev = sr->sPres;
881: sr->sPres = sr->pending[--sr->nPend];
882: sPres = sr->sPres;
883: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
884: if (zeros) {
885: newShift = sPres->value*(1.0+SLICE_PTOL);
886: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
887: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
888: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
889: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
890: sPres->value = newShift;
891: }
892: sr->sPres->inertia = iner;
893: eps->target = sr->sPres->value;
894: eps->reason = EPS_CONVERGED_ITERATING;
895: eps->its = 0;
896: } else sr->sPres = NULL;
897: return(0);
898: }
900: /*
901: Symmetric KrylovSchur adapted to spectrum slicing:
902: Allows searching an specific amount of eigenvalues in the subintervals left and right.
903: Returns whether the search has succeeded
904: */
905: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
906: {
907: PetscErrorCode ierr;
908: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
909: PetscInt i,k,l,ld,nv,*iwork,j;
910: Mat U;
911: PetscScalar *Q,*A;
912: PetscReal *a,*b,beta;
913: PetscBool breakdown;
914: PetscInt count0,count1;
915: PetscReal lambda;
916: EPS_shift sPres;
917: PetscBool complIterating;
918: PetscBool sch0,sch1;
919: PetscInt iterCompl=0,n0,n1;
920: EPS_SR sr = ctx->sr;
923: /* Spectrum slicing data */
924: sPres = sr->sPres;
925: complIterating =PETSC_FALSE;
926: sch1 = sch0 = PETSC_TRUE;
927: DSGetLeadingDimension(eps->ds,&ld);
928: PetscMalloc1(2*ld,&iwork);
929: count0=0;count1=0; /* Found on both sides */
930: if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
931: /* Rational Krylov */
932: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
933: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
934: DSSetDimensions(eps->ds,l+1,0,0,0);
935: BVSetActiveColumns(eps->V,0,l+1);
936: DSGetMat(eps->ds,DS_MAT_Q,&U);
937: BVMultInPlace(eps->V,U,0,l+1);
938: MatDestroy(&U);
939: } else {
940: /* Get the starting Lanczos vector */
941: EPSGetStartVector(eps,0,NULL);
942: l = 0;
943: }
944: /* Restart loop */
945: while (eps->reason == EPS_CONVERGED_ITERATING) {
946: eps->its++; sr->itsKs++;
947: /* Compute an nv-step Lanczos factorization */
948: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
949: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
950: b = a + ld;
951: EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
952: sr->nv = nv;
953: beta = b[nv-1];
954: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
955: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
956: if (l==0) {
957: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
958: } else {
959: DSSetState(eps->ds,DS_STATE_RAW);
960: }
961: BVSetActiveColumns(eps->V,eps->nconv,nv);
963: /* Solve projected problem and compute residual norm estimates */
964: if (eps->its == 1 && l > 0) {/* After rational update */
965: DSGetArray(eps->ds,DS_MAT_A,&A);
966: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
967: b = a + ld;
968: k = eps->nconv+l;
969: A[k*ld+k-1] = A[(k-1)*ld+k];
970: A[k*ld+k] = a[k];
971: for (j=k+1; j< nv; j++) {
972: A[j*ld+j] = a[j];
973: A[j*ld+j-1] = b[j-1] ;
974: A[(j-1)*ld+j] = b[j-1];
975: }
976: DSRestoreArray(eps->ds,DS_MAT_A,&A);
977: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
978: DSSolve(eps->ds,eps->eigr,NULL);
979: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
980: DSSetCompact(eps->ds,PETSC_TRUE);
981: } else { /* Restart */
982: DSSolve(eps->ds,eps->eigr,NULL);
983: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
984: }
985: DSSynchronize(eps->ds,eps->eigr,NULL);
987: /* Residual */
988: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,0.0,1.0,&k);
989: /* Checking values obtained for completing */
990: for (i=0;i<k;i++) {
991: sr->back[i]=eps->eigr[i];
992: }
993: STBackTransform(eps->st,k,sr->back,eps->eigi);
994: count0=count1=0;
995: for (i=0;i<k;i++) {
996: lambda = PetscRealPart(sr->back[i]);
997: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
998: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
999: }
1000: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
1001: else {
1002: /* Checks completion */
1003: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
1004: eps->reason = EPS_CONVERGED_TOL;
1005: } else {
1006: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
1007: if (complIterating) {
1008: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
1009: } else if (k >= eps->nev) {
1010: n0 = sPres->nsch[0]-count0;
1011: n1 = sPres->nsch[1]-count1;
1012: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
1013: /* Iterating for completion*/
1014: complIterating = PETSC_TRUE;
1015: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
1016: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
1017: iterCompl = sr->iterCompl;
1018: } else eps->reason = EPS_CONVERGED_TOL;
1019: }
1020: }
1021: }
1022: /* Update l */
1023: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
1024: else l = nv-k;
1025: if (breakdown) l=0;
1026: if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
1028: if (eps->reason == EPS_CONVERGED_ITERATING) {
1029: if (breakdown) {
1030: /* Start a new Lanczos factorization */
1031: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
1032: EPSGetStartVector(eps,k,&breakdown);
1033: if (breakdown) {
1034: eps->reason = EPS_DIVERGED_BREAKDOWN;
1035: PetscInfo(eps,"Unable to generate more start vectors\n");
1036: }
1037: } else {
1038: /* Prepare the Rayleigh quotient for restart */
1039: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1040: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1041: b = a + ld;
1042: for (i=k;i<k+l;i++) {
1043: a[i] = PetscRealPart(eps->eigr[i]);
1044: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1045: }
1046: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1047: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1048: }
1049: }
1050: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1051: DSGetMat(eps->ds,DS_MAT_Q,&U);
1052: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1053: MatDestroy(&U);
1055: /* Normalize u and append it to V */
1056: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1057: BVCopyColumn(eps->V,nv,k+l);
1058: }
1059: eps->nconv = k;
1060: if (eps->reason != EPS_CONVERGED_ITERATING) {
1061: /* Store approximated values for next shift */
1062: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1063: sr->nS = l;
1064: for (i=0;i<l;i++) {
1065: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1066: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1067: }
1068: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1069: }
1070: }
1071: /* Check for completion */
1072: for (i=0;i< eps->nconv; i++) {
1073: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1074: else sPres->nconv[0]++;
1075: }
1076: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
1077: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
1078: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1]) InertiaMismatch(eps,ctx->detect);
1079: PetscFree(iwork);
1080: return(0);
1081: }
1083: /*
1084: Obtains value of subsequent shift
1085: */
1086: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1087: {
1088: PetscReal lambda,d_prev;
1089: PetscInt i,idxP;
1090: EPS_SR sr;
1091: EPS_shift sPres,s;
1092: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1095: sr = ctx->sr;
1096: sPres = sr->sPres;
1097: if (sPres->neighb[side]) {
1098: /* Completing a previous interval */
1099: if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1100: if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1101: else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1102: } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1103: } else { /* (Only for side=1). Creating a new interval. */
1104: if (sPres->neigs==0) {/* No value has been accepted*/
1105: if (sPres->neighb[0]) {
1106: /* Multiplying by 10 the previous distance */
1107: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1108: sr->nleap++;
1109: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1110: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1111: } else { /* First shift */
1112: if (eps->nconv != 0) {
1113: /* Unaccepted values give information for next shift */
1114: idxP=0;/* Number of values left from shift */
1115: for (i=0;i<eps->nconv;i++) {
1116: lambda = PetscRealPart(eps->eigr[i]);
1117: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1118: else break;
1119: }
1120: /* Avoiding subtraction of eigenvalues (might be the same).*/
1121: if (idxP>0) {
1122: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1123: } else {
1124: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1125: }
1126: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1127: } else { /* No values found, no information for next shift */
1128: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1129: }
1130: }
1131: } else { /* Accepted values found */
1132: sr->nleap = 0;
1133: /* Average distance of values in previous subinterval */
1134: s = sPres->neighb[0];
1135: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1136: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1137: }
1138: if (s) {
1139: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1140: } else { /* First shift. Average distance obtained with values in this shift */
1141: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1142: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1143: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1144: } else {
1145: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1146: }
1147: }
1148: /* Average distance is used for next shift by adding it to value on the right or to shift */
1149: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1150: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1151: } else { /* Last accepted value is on the left of shift. Adding to shift */
1152: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1153: }
1154: }
1155: /* End of interval can not be surpassed */
1156: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1157: }/* of neighb[side]==null */
1158: return(0);
1159: }
1161: /*
1162: Function for sorting an array of real values
1163: */
1164: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1165: {
1166: PetscReal re;
1167: PetscInt i,j,tmp;
1170: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1171: /* Insertion sort */
1172: for (i=1;i<nr;i++) {
1173: re = PetscRealPart(r[perm[i]]);
1174: j = i-1;
1175: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1176: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1177: }
1178: }
1179: return(0);
1180: }
1182: /* Stores the pairs obtained since the last shift in the global arrays */
1183: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1184: {
1185: PetscErrorCode ierr;
1186: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1187: PetscReal lambda,err,norm;
1188: PetscInt i,count;
1189: PetscBool iscayley;
1190: EPS_SR sr = ctx->sr;
1191: EPS_shift sPres;
1192: Vec v,w;
1195: sPres = sr->sPres;
1196: sPres->index = sr->indexEig;
1197: count = sr->indexEig;
1198: /* Back-transform */
1199: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1200: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1201: /* Sort eigenvalues */
1202: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1203: /* Values stored in global array */
1204: for (i=0;i<eps->nconv;i++) {
1205: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1206: err = eps->errest[eps->perm[i]];
1208: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1209: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1210: sr->eigr[count] = lambda;
1211: sr->errest[count] = err;
1212: /* Explicit purification */
1213: BVGetColumn(eps->V,eps->perm[i],&w);
1214: if (eps->purify) {
1215: BVGetColumn(sr->V,count,&v);
1216: STApply(eps->st,w,v);
1217: BVRestoreColumn(sr->V,count,&v);
1218: } else {
1219: BVInsertVec(sr->V,count,w);
1220: }
1221: BVRestoreColumn(eps->V,eps->perm[i],&w);
1222: BVNormColumn(sr->V,count,NORM_2,&norm);
1223: BVScaleColumn(sr->V,count,1.0/norm);
1224: count++;
1225: }
1226: }
1227: sPres->neigs = count - sr->indexEig;
1228: sr->indexEig = count;
1229: /* Global ordering array updating */
1230: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1231: return(0);
1232: }
1234: static PetscErrorCode EPSLookForDeflation(EPS eps)
1235: {
1236: PetscErrorCode ierr;
1237: PetscReal val;
1238: PetscInt i,count0=0,count1=0;
1239: EPS_shift sPres;
1240: PetscInt ini,fin,k,idx0,idx1;
1241: EPS_SR sr;
1242: Vec v;
1243: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1246: sr = ctx->sr;
1247: sPres = sr->sPres;
1249: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1250: else ini = 0;
1251: fin = sr->indexEig;
1252: /* Selection of ends for searching new values */
1253: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1254: else sPres->ext[0] = sPres->neighb[0]->value;
1255: if (!sPres->neighb[1]) {
1256: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1257: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1258: } else sPres->ext[1] = sPres->neighb[1]->value;
1259: /* Selection of values between right and left ends */
1260: for (i=ini;i<fin;i++) {
1261: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1262: /* Values to the right of left shift */
1263: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1264: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1265: else count1++;
1266: } else break;
1267: }
1268: /* The number of values on each side are found */
1269: if (sPres->neighb[0]) {
1270: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1271: if (sPres->nsch[0]<0) InertiaMismatch(eps,ctx->detect);
1272: } else sPres->nsch[0] = 0;
1274: if (sPres->neighb[1]) {
1275: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1276: if (sPres->nsch[1]<0) InertiaMismatch(eps,ctx->detect);
1277: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1279: /* Completing vector of indexes for deflation */
1280: idx0 = ini;
1281: idx1 = ini+count0+count1;
1282: k=0;
1283: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1284: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1285: BVSetNumConstraints(sr->Vnext,k);
1286: for (i=0;i<k;i++) {
1287: BVGetColumn(sr->Vnext,-i-1,&v);
1288: BVCopyVec(sr->V,sr->idxDef[i],v);
1289: BVRestoreColumn(sr->Vnext,-i-1,&v);
1290: }
1292: /* For rational Krylov */
1293: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1294: EPSPrepareRational(eps);
1295: }
1296: eps->nconv = 0;
1297: /* Get rid of temporary Vnext */
1298: BVDestroy(&eps->V);
1299: eps->V = sr->Vnext;
1300: sr->Vnext = NULL;
1301: return(0);
1302: }
1304: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1305: {
1306: PetscErrorCode ierr;
1307: PetscInt i,lds,ti;
1308: PetscReal newS;
1309: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1310: EPS_SR sr=ctx->sr;
1311: Mat A,B=NULL;
1312: PetscObjectState Astate,Bstate=0;
1313: PetscObjectId Aid,Bid=0;
1316: PetscCitationsRegister(citation,&cited);
1317: if (ctx->global) {
1318: EPSSolve_KrylovSchur_Slice(ctx->eps);
1319: ctx->eps->state = EPS_STATE_SOLVED;
1320: eps->reason = EPS_CONVERGED_TOL;
1321: if (ctx->npart>1) {
1322: /* Gather solution from subsolvers */
1323: EPSSliceGatherSolution(eps);
1324: } else {
1325: eps->nconv = sr->numEigs;
1326: eps->its = ctx->eps->its;
1327: PetscFree(ctx->inertias);
1328: PetscFree(ctx->shifts);
1329: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1330: }
1331: } else {
1332: if (ctx->npart==1) {
1333: sr->eigr = ctx->eps->eigr;
1334: sr->eigi = ctx->eps->eigi;
1335: sr->perm = ctx->eps->perm;
1336: sr->errest = ctx->eps->errest;
1337: sr->V = ctx->eps->V;
1338: }
1339: /* Check that the user did not modify subcomm matrices */
1340: EPSGetOperators(eps,&A,&B);
1341: PetscObjectStateGet((PetscObject)A,&Astate);
1342: PetscObjectGetId((PetscObject)A,&Aid);
1343: if (B) {
1344: PetscObjectStateGet((PetscObject)B,&Bstate);
1345: PetscObjectGetId((PetscObject)B,&Bid);
1346: }
1347: if (Astate!=ctx->Astate || (B && Bstate!=ctx->Bstate) || Aid!=ctx->Aid || (B && Bid!=ctx->Bid)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcomm matrices have been modified by user");
1348: /* Only with eigenvalues present in the interval ...*/
1349: if (sr->numEigs==0) {
1350: eps->reason = EPS_CONVERGED_TOL;
1351: return(0);
1352: }
1353: /* Array of pending shifts */
1354: sr->maxPend = 100; /* Initial size */
1355: sr->nPend = 0;
1356: PetscMalloc1(sr->maxPend,&sr->pending);
1357: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1358: EPSCreateShift(eps,sr->int0,NULL,NULL);
1359: /* extract first shift */
1360: sr->sPrev = NULL;
1361: sr->sPres = sr->pending[--sr->nPend];
1362: sr->sPres->inertia = sr->inertia0;
1363: eps->target = sr->sPres->value;
1364: sr->s0 = sr->sPres;
1365: sr->indexEig = 0;
1366: /* Memory reservation for auxiliary variables */
1367: lds = PetscMin(eps->mpd,eps->ncv);
1368: PetscCalloc1(lds*lds,&sr->S);
1369: PetscMalloc1(eps->ncv,&sr->back);
1370: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1371: for (i=0;i<sr->numEigs;i++) {
1372: sr->eigr[i] = 0.0;
1373: sr->eigi[i] = 0.0;
1374: sr->errest[i] = 0.0;
1375: sr->perm[i] = i;
1376: }
1377: /* Vectors for deflation */
1378: PetscMalloc1(sr->numEigs,&sr->idxDef);
1379: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1380: sr->indexEig = 0;
1381: /* Main loop */
1382: while (sr->sPres) {
1383: /* Search for deflation */
1384: EPSLookForDeflation(eps);
1385: /* KrylovSchur */
1386: EPSKrylovSchur_Slice(eps);
1388: EPSStoreEigenpairs(eps);
1389: /* Select new shift */
1390: if (!sr->sPres->comp[1]) {
1391: EPSGetNewShiftValue(eps,1,&newS);
1392: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1393: }
1394: if (!sr->sPres->comp[0]) {
1395: /* Completing earlier interval */
1396: EPSGetNewShiftValue(eps,0,&newS);
1397: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1398: }
1399: /* Preparing for a new search of values */
1400: EPSExtractShift(eps);
1401: }
1403: /* Updating eps values prior to exit */
1404: PetscFree(sr->S);
1405: PetscFree(sr->idxDef);
1406: PetscFree(sr->pending);
1407: PetscFree(sr->back);
1408: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1409: BVSetNumConstraints(sr->Vnext,0);
1410: BVDestroy(&eps->V);
1411: eps->V = sr->Vnext;
1412: eps->nconv = sr->indexEig;
1413: eps->reason = EPS_CONVERGED_TOL;
1414: eps->its = sr->itsKs;
1415: eps->nds = 0;
1416: if (sr->dir<0) {
1417: for (i=0;i<eps->nconv/2;i++) {
1418: ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1419: }
1420: }
1421: }
1422: return(0);
1423: }