Actual source code: ks-slice.c
slepc-3.10.0 2018-09-18
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2018, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
15: References:
17: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
18: solving sparse symmetric generalized eigenproblems", SIAM J.
19: Matrix Anal. Appl. 15(1):228-272, 1994.
21: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
22: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
23: 2012.
24: */
26: #include <slepc/private/epsimpl.h>
27: #include "krylovschur.h"
29: static PetscBool cited = PETSC_FALSE;
30: static const char citation[] =
31: "@Article{slepc-slice,\n"
32: " author = \"C. Campos and J. E. Roman\",\n"
33: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
34: " journal = \"Numer. Algorithms\",\n"
35: " volume = \"60\",\n"
36: " number = \"2\",\n"
37: " pages = \"279--295\",\n"
38: " year = \"2012,\"\n"
39: " doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
40: "}\n";
42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: static PetscErrorCode EPSSliceResetSR(EPS eps) {
45: PetscErrorCode ierr;
46: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
47: EPS_SR sr=ctx->sr;
48: EPS_shift s;
51: if (sr) {
52: if (ctx->npart>1) {
53: BVDestroy(&sr->V);
54: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
55: }
56: /* Reviewing list of shifts to free memory */
57: s = sr->s0;
58: if (s) {
59: while (s->neighb[1]) {
60: s = s->neighb[1];
61: PetscFree(s->neighb[0]);
62: }
63: PetscFree(s);
64: }
65: PetscFree(sr);
66: }
67: ctx->sr = NULL;
68: return(0);
69: }
71: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
72: {
73: PetscErrorCode ierr;
74: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
77: if (!ctx->global) return(0);
78: /* Destroy auxiliary EPS */
79: EPSSliceResetSR(ctx->eps);
80: EPSDestroy(&ctx->eps);
81: if (ctx->npart>1) {
82: PetscSubcommDestroy(&ctx->subc);
83: if (ctx->commset) {
84: MPI_Comm_free(&ctx->commrank);
85: ctx->commset = PETSC_FALSE;
86: }
87: }
88: PetscFree(ctx->subintervals);
89: PetscFree(ctx->nconv_loc);
90: EPSSliceResetSR(eps);
91: PetscFree(ctx->inertias);
92: PetscFree(ctx->shifts);
93: if (ctx->npart>1) {
94: ISDestroy(&ctx->isrow);
95: ISDestroy(&ctx->iscol);
96: MatDestroyMatrices(1,&ctx->submata);
97: MatDestroyMatrices(1,&ctx->submatb);
98: }
99: return(0);
100: }
102: /*
103: EPSSliceAllocateSolution - Allocate memory storage for common variables such
104: as eigenvalues and eigenvectors. The argument extra is used for methods
105: that require a working basis slightly larger than ncv.
106: */
107: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
108: {
109: PetscErrorCode ierr;
110: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
111: PetscReal eta;
112: PetscInt k;
113: PetscLogDouble cnt;
114: BVType type;
115: BVOrthogType orthog_type;
116: BVOrthogRefineType orthog_ref;
117: BVOrthogBlockType ob_type;
118: Mat matrix;
119: Vec t;
120: EPS_SR sr = ctx->sr;
123: /* allocate space for eigenvalues and friends */
124: k = PetscMax(1,sr->numEigs);
125: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
126: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
127: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
128: PetscLogObjectMemory((PetscObject)eps,cnt);
130: /* allocate sr->V and transfer options from eps->V */
131: BVDestroy(&sr->V);
132: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
133: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
134: if (!eps->V) { EPSGetBV(eps,&eps->V); }
135: if (!((PetscObject)(eps->V))->type_name) {
136: BVSetType(sr->V,BVSVEC);
137: } else {
138: BVGetType(eps->V,&type);
139: BVSetType(sr->V,type);
140: }
141: STMatCreateVecsEmpty(eps->st,&t,NULL);
142: BVSetSizesFromVec(sr->V,t,k);
143: VecDestroy(&t);
144: EPS_SetInnerProduct(eps);
145: BVGetMatrix(eps->V,&matrix,NULL);
146: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
147: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
148: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
149: return(0);
150: }
152: static PetscErrorCode EPSSliceGetEPS(EPS eps)
153: {
154: PetscErrorCode ierr;
155: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
156: BV V;
157: BVType type;
158: PetscReal eta;
159: BVOrthogType orthog_type;
160: BVOrthogRefineType orthog_ref;
161: BVOrthogBlockType ob_type;
162: Mat A,B=NULL,Ar=NULL,Br=NULL;
163: PetscInt i;
164: PetscReal h,a,b,zero;
165: PetscMPIInt rank;
166: EPS_SR sr=ctx->sr;
167: PC pc;
168: PCType pctype;
169: KSP ksp;
170: KSPType ksptype;
171: STType sttype;
172: PetscObjectState Astate,Bstate=0;
173: PetscObjectId Aid,Bid=0;
174: MatSolverType stype;
177: EPSGetOperators(eps,&A,&B);
178: if (ctx->npart==1) {
179: if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
180: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
181: EPSSetOperators(ctx->eps,A,B);
182: a = eps->inta; b = eps->intb;
183: } else {
184: PetscObjectStateGet((PetscObject)A,&Astate);
185: PetscObjectGetId((PetscObject)A,&Aid);
186: if (B) {
187: PetscObjectStateGet((PetscObject)B,&Bstate);
188: PetscObjectGetId((PetscObject)B,&Bid);
189: }
190: if (!ctx->subc) {
191: /* Create context for subcommunicators */
192: PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
193: PetscSubcommSetNumber(ctx->subc,ctx->npart);
194: PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
195: PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
197: /* Duplicate matrices */
198: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
199: ctx->Astate = Astate;
200: ctx->Aid = Aid;
201: if (B) {
202: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
203: ctx->Bstate = Bstate;
204: ctx->Bid = Bid;
205: }
206: } else {
207: if (ctx->Astate != Astate || (B && ctx->Bstate != Bstate) || ctx->Aid != Aid || (B && ctx->Bid != Bid)) {
208: EPSGetOperators(ctx->eps,&Ar,&Br);
209: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
210: ctx->Astate = Astate;
211: ctx->Aid = Aid;
212: if (B) {
213: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
214: ctx->Bstate = Bstate;
215: ctx->Bid = Bid;
216: }
217: EPSSetOperators(ctx->eps,Ar,Br);
218: MatDestroy(&Ar);
219: MatDestroy(&Br);
220: }
221: }
223: /* Determine subintervals */
224: if (!ctx->subintset) { /* uniform distribution if no set by user */
225: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
226: h = (eps->intb-eps->inta)/ctx->npart;
227: a = eps->inta+ctx->subc->color*h;
228: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
229: PetscFree(ctx->subintervals);
230: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
231: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
232: ctx->subintervals[ctx->npart] = eps->intb;
233: } else {
234: a = ctx->subintervals[ctx->subc->color];
235: b = ctx->subintervals[ctx->subc->color+1];
236: }
238: if (!ctx->eps) {
239: /* Create auxiliary EPS */
240: EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
241: EPSSetOperators(ctx->eps,Ar,Br);
242: MatDestroy(&Ar);
243: MatDestroy(&Br);
244: }
246: /* Create subcommunicator grouping processes with same rank */
247: if (ctx->commset) { MPI_Comm_free(&ctx->commrank); }
248: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
249: MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
250: ctx->commset = PETSC_TRUE;
251: }
252: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
254: /* Transfer options for ST, KSP and PC */
255: STGetType(eps->st,&sttype);
256: STSetType(ctx->eps->st,sttype);
257: STGetKSP(eps->st,&ksp);
258: KSPGetType(ksp,&ksptype);
259: KSPGetPC(ksp,&pc);
260: PCGetType(pc,&pctype);
261: PCFactorGetMatSolverType(pc,&stype);
262: PCFactorGetZeroPivot(pc,&zero);
263: STGetKSP(ctx->eps->st,&ksp);
264: KSPSetType(ksp,ksptype);
265: KSPGetPC(ksp,&pc);
266: PCSetType(pc,pctype);
267: PCFactorSetZeroPivot(pc,zero);
268: if (stype) { PCFactorSetMatSolverType(pc,stype); }
270: EPSSetConvergenceTest(ctx->eps,eps->conv);
271: EPSSetInterval(ctx->eps,a,b);
272: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
273: ctx_local->npart = ctx->npart;
274: ctx_local->detect = ctx->detect;
275: ctx_local->global = PETSC_FALSE;
276: ctx_local->eps = eps;
277: ctx_local->subc = ctx->subc;
278: ctx_local->commrank = ctx->commrank;
280: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
281: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
283: /* transfer options from eps->V */
284: EPSGetBV(ctx->eps,&V);
285: if (!eps->V) { EPSGetBV(eps,&eps->V); }
286: if (!((PetscObject)(eps->V))->type_name) {
287: BVSetType(V,BVSVEC);
288: } else {
289: BVGetType(eps->V,&type);
290: BVSetType(V,type);
291: }
292: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
293: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
294: ctx->eps->which = eps->which;
295: ctx->eps->max_it = eps->max_it;
296: ctx->eps->tol = eps->tol;
297: ctx->eps->purify = eps->purify;
298: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
299: EPSSetProblemType(ctx->eps,eps->problem_type);
300: EPSSetUp(ctx->eps);
301: ctx->eps->nconv = 0;
302: ctx->eps->its = 0;
303: for (i=0;i<ctx->eps->ncv;i++) {
304: ctx->eps->eigr[i] = 0.0;
305: ctx->eps->eigi[i] = 0.0;
306: ctx->eps->errest[i] = 0.0;
307: }
308: return(0);
309: }
311: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
312: {
314: KSP ksp;
315: PC pc;
316: Mat F;
317: PetscReal nzshift;
320: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
321: if (inertia) *inertia = eps->n;
322: } else if (shift <= PETSC_MIN_REAL) {
323: if (inertia) *inertia = 0;
324: if (zeros) *zeros = 0;
325: } else {
326: /* If the shift is zero, perturb it to a very small positive value.
327: The goal is that the nonzero pattern is the same in all cases and reuse
328: the symbolic factorizations */
329: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
330: STSetShift(eps->st,nzshift);
331: STSetUp(eps->st);
332: STGetKSP(eps->st,&ksp);
333: KSPGetPC(ksp,&pc);
334: PCFactorGetMatrix(pc,&F);
335: MatGetInertia(F,inertia,zeros,NULL);
336: }
337: return(0);
338: }
340: /*
341: Dummy backtransform operation
342: */
343: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
344: {
346: return(0);
347: }
349: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
350: {
351: PetscErrorCode ierr;
352: PetscBool issinv;
353: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
354: EPS_SR sr,sr_loc,sr_glob;
355: PetscInt nEigs,dssz=1,i,zeros=0,off=0,method;
356: PetscMPIInt nproc,rank=0,aux;
357: PetscReal r;
358: MPI_Request req;
359: Mat A,B=NULL;
360: SlepcSC sc;
361: PetscInt flg=0;
362: DSParallelType ptype;
365: if (ctx->global) {
366: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
367: if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
368: if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
369: PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
370: if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
371: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2; /* use tighter tolerance */
372: if (!eps->max_it) eps->max_it = 100;
373: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
374: if (eps->n>10 && ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
375: }
376: eps->ops->backtransform = EPSBackTransform_Skip;
378: /* create spectrum slicing context and initialize it */
379: EPSSliceResetSR(eps);
380: PetscNewLog(eps,&sr);
381: ctx->sr = sr;
382: sr->itsKs = 0;
383: sr->nleap = 0;
384: sr->nMAXCompl = eps->nev/4;
385: sr->iterCompl = eps->max_it/4;
386: sr->sPres = NULL;
387: sr->nS = 0;
389: if (ctx->npart==1 || ctx->global) {
390: /* check presence of ends and finding direction */
391: if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
392: sr->int0 = eps->inta;
393: sr->int1 = eps->intb;
394: sr->dir = 1;
395: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
396: sr->hasEnd = PETSC_FALSE;
397: } else sr->hasEnd = PETSC_TRUE;
398: } else {
399: sr->int0 = eps->intb;
400: sr->int1 = eps->inta;
401: sr->dir = -1;
402: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
403: }
404: }
405: if (ctx->global) {
406: /* prevent computation of factorization in global eps */
407: STSetTransform(eps->st,PETSC_FALSE);
408: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
409: /* create subintervals and initialize auxiliary eps for slicing runs */
410: EPSSliceGetEPS(eps);
411: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
412: if (ctx->npart>1) {
413: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
414: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
415: if (!rank) {
416: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
417: }
418: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
419: PetscFree(ctx->nconv_loc);
420: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
421: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
422: if (sr->dir<0) off = 1;
423: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
424: PetscMPIIntCast(sr_loc->numEigs,&aux);
425: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
426: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
427: } else {
428: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
429: if (!rank) {
430: PetscMPIIntCast(sr_loc->numEigs,&aux);
431: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
432: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
433: }
434: PetscMPIIntCast(ctx->npart,&aux);
435: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
436: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
437: }
438: nEigs = 0;
439: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
440: } else {
441: nEigs = sr_loc->numEigs;
442: sr->inertia0 = sr_loc->inertia0;
443: }
444: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
445: sr->numEigs = nEigs;
446: eps->nev = nEigs;
447: eps->ncv = nEigs;
448: eps->mpd = nEigs;
449: } else {
450: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
451: sr_glob = ctx_glob->sr;
452: if (ctx->npart>1) {
453: sr->dir = sr_glob->dir;
454: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
455: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
456: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
457: else sr->hasEnd = PETSC_TRUE;
458: }
460: /* compute inertia0 */
461: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
462: PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&flg,NULL);
463: if (zeros) { /* error in factorization */
464: if (sr->int0==ctx->eps->inta || sr->int0==ctx->eps->intb) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in the interval endpoint");
465: else if(ctx_glob->subintset && !flg) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
466: else {
467: if (flg==1) { /* idle subgroup */
468: sr->inertia0 = -1;
469: } else { /* perturb shift */
470: sr->int0 *= (1.0+SLICE_PTOL);
471: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
472: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
473: }
474: }
475: }
476: if (ctx->npart>1) {
477: /* inertia1 is received from neighbour */
478: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
479: if (!rank) {
480: if ( sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1)) ) { /* send inertia0 to neighbour0 */
481: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
482: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
483: }
484: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
485: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
486: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
487: }
488: if ( sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
489: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
490: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
491: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
492: }
493: }
494: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
495: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
496: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
497: } else sr_glob->inertia1 = sr->inertia1;
498: }
500: /* last process in eps comm computes inertia1 */
501: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
502: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
503: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
504: if (!rank && sr->inertia0==-1) {
505: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
506: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
507: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
508: }
509: if (sr->hasEnd) {
510: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
511: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
512: }
513: }
515: /* number of eigenvalues in interval */
516: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
517: if (ctx->npart>1) {
518: /* memory allocate for subinterval eigenpairs */
519: EPSSliceAllocateSolution(eps,1);
520: }
521: dssz = eps->ncv+1;
522: if (sr->numEigs>0) {
523: DSGetSlepcSC(eps->ds,&sc);
524: sc->rg = NULL;
525: sc->comparison = SlepcCompareLargestMagnitude;
526: sc->comparisonctx = NULL;
527: sc->map = NULL;
528: sc->mapobj = NULL;
529: }
530: DSGetParallel(ctx->eps->ds,&ptype);
531: DSSetParallel(eps->ds,ptype);
532: DSGetMethod(ctx->eps->ds,&method);
533: DSSetMethod(eps->ds,method);
534: }
535: DSSetType(eps->ds,DSHEP);
536: DSSetCompact(eps->ds,PETSC_TRUE);
537: DSAllocate(eps->ds,dssz);
538: /* keep state of subcomm matrices to check that the user does not modify them */
539: EPSGetOperators(eps,&A,&B);
540: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
541: PetscObjectGetId((PetscObject)A,&ctx->Aid);
542: if (B) {
543: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
544: PetscObjectGetId((PetscObject)B,&ctx->Bid);
545: } else {
546: ctx->Bstate=0;
547: ctx->Bid=0;
548: }
549: return(0);
550: }
552: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
553: {
554: PetscErrorCode ierr;
555: Vec v,vg,v_loc;
556: IS is1,is2;
557: VecScatter vec_sc;
558: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
559: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
560: PetscScalar *array;
561: EPS_SR sr_loc;
562: BV V_loc;
565: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
566: V_loc = sr_loc->V;
568: /* Gather parallel eigenvectors */
569: BVGetColumn(eps->V,0,&v);
570: VecGetOwnershipRange(v,&n0,&m0);
571: BVRestoreColumn(eps->V,0,&v);
572: BVGetColumn(ctx->eps->V,0,&v);
573: VecGetLocalSize(v,&nloc);
574: BVRestoreColumn(ctx->eps->V,0,&v);
575: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
576: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
577: idx = -1;
578: for (si=0;si<ctx->npart;si++) {
579: j = 0;
580: for (i=n0;i<m0;i++) {
581: idx1[j] = i;
582: idx2[j++] = i+eps->n*si;
583: }
584: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
585: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
586: BVGetColumn(eps->V,0,&v);
587: VecScatterCreate(v,is1,vg,is2,&vec_sc);
588: BVRestoreColumn(eps->V,0,&v);
589: ISDestroy(&is1);
590: ISDestroy(&is2);
591: for (i=0;i<ctx->nconv_loc[si];i++) {
592: BVGetColumn(eps->V,++idx,&v);
593: if (ctx->subc->color==si) {
594: BVGetColumn(V_loc,i,&v_loc);
595: VecGetArray(v_loc,&array);
596: VecPlaceArray(vg,array);
597: }
598: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
599: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
600: if (ctx->subc->color==si) {
601: VecResetArray(vg);
602: VecRestoreArray(v_loc,&array);
603: BVRestoreColumn(V_loc,i,&v_loc);
604: }
605: BVRestoreColumn(eps->V,idx,&v);
606: }
607: VecScatterDestroy(&vec_sc);
608: }
609: PetscFree2(idx1,idx2);
610: VecDestroy(&vg);
611: return(0);
612: }
614: /*
615: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
616: */
617: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
618: {
619: PetscErrorCode ierr;
620: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
623: if (ctx->global && ctx->npart>1) {
624: EPSComputeVectors(ctx->eps);
625: EPSSliceGatherEigenVectors(eps);
626: }
627: return(0);
628: }
630: #define SWAP(a,b,t) {t=a;a=b;b=t;}
632: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
633: {
634: PetscErrorCode ierr;
635: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
636: PetscInt i=0,j,tmpi;
637: PetscReal v,tmpr;
638: EPS_shift s;
641: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
642: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
643: if (!ctx->sr->s0) { /* EPSSolve not called yet */
644: *n = 2;
645: } else {
646: *n = 1;
647: s = ctx->sr->s0;
648: while (s) {
649: (*n)++;
650: s = s->neighb[1];
651: }
652: }
653: PetscMalloc1(*n,shifts);
654: PetscMalloc1(*n,inertias);
655: if (!ctx->sr->s0) { /* EPSSolve not called yet */
656: (*shifts)[0] = ctx->sr->int0;
657: (*shifts)[1] = ctx->sr->int1;
658: (*inertias)[0] = ctx->sr->inertia0;
659: (*inertias)[1] = ctx->sr->inertia1;
660: } else {
661: s = ctx->sr->s0;
662: while (s) {
663: (*shifts)[i] = s->value;
664: (*inertias)[i++] = s->inertia;
665: s = s->neighb[1];
666: }
667: (*shifts)[i] = ctx->sr->int1;
668: (*inertias)[i] = ctx->sr->inertia1;
669: }
670: /* remove possible duplicate in last position */
671: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
672: /* sort result */
673: for (i=0;i<*n;i++) {
674: v = (*shifts)[i];
675: for (j=i+1;j<*n;j++) {
676: if (v > (*shifts)[j]) {
677: SWAP((*shifts)[i],(*shifts)[j],tmpr);
678: SWAP((*inertias)[i],(*inertias)[j],tmpi);
679: v = (*shifts)[i];
680: }
681: }
682: }
683: return(0);
684: }
686: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
687: {
688: PetscErrorCode ierr;
689: PetscMPIInt rank,nproc;
690: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
691: PetscInt i,idx,j;
692: PetscInt *perm_loc,off=0,*inertias_loc,ns;
693: PetscScalar *eigr_loc;
694: EPS_SR sr_loc;
695: PetscReal *shifts_loc;
696: PetscMPIInt *disp,*ns_loc,aux;
699: eps->nconv = 0;
700: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
701: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
703: /* Gather the shifts used and the inertias computed */
704: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
705: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
706: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
707: ns--;
708: for (i=0;i<ns;i++) {
709: inertias_loc[i] = inertias_loc[i+1];
710: shifts_loc[i] = shifts_loc[i+1];
711: }
712: }
713: PetscMalloc1(ctx->npart,&ns_loc);
714: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
715: PetscMPIIntCast(ns,&aux);
716: if (!rank) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
717: PetscMPIIntCast(ctx->npart,&aux);
718: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
719: ctx->nshifts = 0;
720: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
721: PetscFree(ctx->inertias);
722: PetscFree(ctx->shifts);
723: PetscMalloc1(ctx->nshifts,&ctx->inertias);
724: PetscMalloc1(ctx->nshifts,&ctx->shifts);
726: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
727: eigr_loc = sr_loc->eigr;
728: perm_loc = sr_loc->perm;
729: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
730: PetscMalloc1(ctx->npart,&disp);
731: disp[0] = 0;
732: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
733: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
734: PetscMPIIntCast(sr_loc->numEigs,&aux);
735: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
736: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
737: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
738: PetscMPIIntCast(ns,&aux);
739: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
740: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
741: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
742: } else { /* subcommunicators with different size */
743: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
744: if (!rank) {
745: PetscMPIIntCast(sr_loc->numEigs,&aux);
746: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
747: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
748: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
749: PetscMPIIntCast(ns,&aux);
750: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
751: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
752: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
753: }
754: PetscMPIIntCast(eps->nconv,&aux);
755: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
756: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
757: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
758: PetscMPIIntCast(ctx->nshifts,&aux);
759: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
760: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
761: }
762: /* Update global array eps->perm */
763: idx = ctx->nconv_loc[0];
764: for (i=1;i<ctx->npart;i++) {
765: off += ctx->nconv_loc[i-1];
766: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
767: }
769: /* Gather parallel eigenvectors */
770: PetscFree(ns_loc);
771: PetscFree(disp);
772: PetscFree(shifts_loc);
773: PetscFree(inertias_loc);
774: return(0);
775: }
777: /*
778: Fills the fields of a shift structure
779: */
780: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
781: {
782: PetscErrorCode ierr;
783: EPS_shift s,*pending2;
784: PetscInt i;
785: EPS_SR sr;
786: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
789: sr = ctx->sr;
790: PetscNewLog(eps,&s);
791: s->value = val;
792: s->neighb[0] = neighb0;
793: if (neighb0) neighb0->neighb[1] = s;
794: s->neighb[1] = neighb1;
795: if (neighb1) neighb1->neighb[0] = s;
796: s->comp[0] = PETSC_FALSE;
797: s->comp[1] = PETSC_FALSE;
798: s->index = -1;
799: s->neigs = 0;
800: s->nconv[0] = s->nconv[1] = 0;
801: s->nsch[0] = s->nsch[1]=0;
802: /* Inserts in the stack of pending shifts */
803: /* If needed, the array is resized */
804: if (sr->nPend >= sr->maxPend) {
805: sr->maxPend *= 2;
806: PetscMalloc1(sr->maxPend,&pending2);
807: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
808: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
809: PetscFree(sr->pending);
810: sr->pending = pending2;
811: }
812: sr->pending[sr->nPend++]=s;
813: return(0);
814: }
816: /* Prepare for Rational Krylov update */
817: static PetscErrorCode EPSPrepareRational(EPS eps)
818: {
819: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
820: PetscErrorCode ierr;
821: PetscInt dir,i,k,ld,nv;
822: PetscScalar *A;
823: EPS_SR sr = ctx->sr;
824: Vec v;
827: DSGetLeadingDimension(eps->ds,&ld);
828: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
829: dir*=sr->dir;
830: k = 0;
831: for (i=0;i<sr->nS;i++) {
832: if (dir*PetscRealPart(sr->S[i])>0.0) {
833: sr->S[k] = sr->S[i];
834: sr->S[sr->nS+k] = sr->S[sr->nS+i];
835: BVGetColumn(sr->Vnext,k,&v);
836: BVCopyVec(eps->V,eps->nconv+i,v);
837: BVRestoreColumn(sr->Vnext,k,&v);
838: k++;
839: if (k>=sr->nS/2)break;
840: }
841: }
842: /* Copy to DS */
843: DSGetArray(eps->ds,DS_MAT_A,&A);
844: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
845: for (i=0;i<k;i++) {
846: A[i*(1+ld)] = sr->S[i];
847: A[k+i*ld] = sr->S[sr->nS+i];
848: }
849: sr->nS = k;
850: DSRestoreArray(eps->ds,DS_MAT_A,&A);
851: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
852: DSSetDimensions(eps->ds,nv,0,0,k);
853: /* Append u to V */
854: BVGetColumn(sr->Vnext,sr->nS,&v);
855: BVCopyVec(eps->V,sr->nv,v);
856: BVRestoreColumn(sr->Vnext,sr->nS,&v);
857: return(0);
858: }
860: /* Provides next shift to be computed */
861: static PetscErrorCode EPSExtractShift(EPS eps)
862: {
863: PetscErrorCode ierr;
864: PetscInt iner,zeros=0;
865: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
866: EPS_SR sr;
867: PetscReal newShift;
868: EPS_shift sPres;
871: sr = ctx->sr;
872: if (sr->nPend > 0) {
873: sr->sPrev = sr->sPres;
874: sr->sPres = sr->pending[--sr->nPend];
875: sPres = sr->sPres;
876: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
877: if (zeros) {
878: newShift = sPres->value*(1.0+SLICE_PTOL);
879: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
880: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
881: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
882: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
883: sPres->value = newShift;
884: }
885: sr->sPres->inertia = iner;
886: eps->target = sr->sPres->value;
887: eps->reason = EPS_CONVERGED_ITERATING;
888: eps->its = 0;
889: } else sr->sPres = NULL;
890: return(0);
891: }
893: /*
894: Symmetric KrylovSchur adapted to spectrum slicing:
895: Allows searching an specific amount of eigenvalues in the subintervals left and right.
896: Returns whether the search has succeeded
897: */
898: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
899: {
900: PetscErrorCode ierr;
901: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
902: PetscInt i,k,l,ld,nv,*iwork,j;
903: Mat U;
904: PetscScalar *Q,*A;
905: PetscReal *a,*b,beta;
906: PetscBool breakdown;
907: PetscInt count0,count1;
908: PetscReal lambda;
909: EPS_shift sPres;
910: PetscBool complIterating;
911: PetscBool sch0,sch1;
912: PetscInt iterCompl=0,n0,n1;
913: EPS_SR sr = ctx->sr;
916: /* Spectrum slicing data */
917: sPres = sr->sPres;
918: complIterating =PETSC_FALSE;
919: sch1 = sch0 = PETSC_TRUE;
920: DSGetLeadingDimension(eps->ds,&ld);
921: PetscMalloc1(2*ld,&iwork);
922: count0=0;count1=0; /* Found on both sides */
923: if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
924: /* Rational Krylov */
925: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
926: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
927: DSSetDimensions(eps->ds,l+1,0,0,0);
928: BVSetActiveColumns(eps->V,0,l+1);
929: DSGetMat(eps->ds,DS_MAT_Q,&U);
930: BVMultInPlace(eps->V,U,0,l+1);
931: MatDestroy(&U);
932: } else {
933: /* Get the starting Lanczos vector */
934: EPSGetStartVector(eps,0,NULL);
935: l = 0;
936: }
937: /* Restart loop */
938: while (eps->reason == EPS_CONVERGED_ITERATING) {
939: eps->its++; sr->itsKs++;
940: /* Compute an nv-step Lanczos factorization */
941: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
942: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
943: b = a + ld;
944: EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
945: sr->nv = nv;
946: beta = b[nv-1];
947: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
948: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
949: if (l==0) {
950: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
951: } else {
952: DSSetState(eps->ds,DS_STATE_RAW);
953: }
954: BVSetActiveColumns(eps->V,eps->nconv,nv);
956: /* Solve projected problem and compute residual norm estimates */
957: if (eps->its == 1 && l > 0) {/* After rational update */
958: DSGetArray(eps->ds,DS_MAT_A,&A);
959: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
960: b = a + ld;
961: k = eps->nconv+l;
962: A[k*ld+k-1] = A[(k-1)*ld+k];
963: A[k*ld+k] = a[k];
964: for (j=k+1; j< nv; j++) {
965: A[j*ld+j] = a[j];
966: A[j*ld+j-1] = b[j-1] ;
967: A[(j-1)*ld+j] = b[j-1];
968: }
969: DSRestoreArray(eps->ds,DS_MAT_A,&A);
970: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
971: DSSolve(eps->ds,eps->eigr,NULL);
972: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
973: DSSetCompact(eps->ds,PETSC_TRUE);
974: } else { /* Restart */
975: DSSolve(eps->ds,eps->eigr,NULL);
976: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
977: }
978: DSSynchronize(eps->ds,eps->eigr,NULL);
980: /* Residual */
981: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,1.0,&k);
982: /* Checking values obtained for completing */
983: for (i=0;i<k;i++) {
984: sr->back[i]=eps->eigr[i];
985: }
986: STBackTransform(eps->st,k,sr->back,eps->eigi);
987: count0=count1=0;
988: for (i=0;i<k;i++) {
989: lambda = PetscRealPart(sr->back[i]);
990: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
991: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
992: }
993: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
994: else {
995: /* Checks completion */
996: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
997: eps->reason = EPS_CONVERGED_TOL;
998: } else {
999: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
1000: if (complIterating) {
1001: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
1002: } else if (k >= eps->nev) {
1003: n0 = sPres->nsch[0]-count0;
1004: n1 = sPres->nsch[1]-count1;
1005: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
1006: /* Iterating for completion*/
1007: complIterating = PETSC_TRUE;
1008: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
1009: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
1010: iterCompl = sr->iterCompl;
1011: } else eps->reason = EPS_CONVERGED_TOL;
1012: }
1013: }
1014: }
1015: /* Update l */
1016: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
1017: else l = nv-k;
1018: if (breakdown) l=0;
1019: if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
1021: if (eps->reason == EPS_CONVERGED_ITERATING) {
1022: if (breakdown) {
1023: /* Start a new Lanczos factorization */
1024: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
1025: EPSGetStartVector(eps,k,&breakdown);
1026: if (breakdown) {
1027: eps->reason = EPS_DIVERGED_BREAKDOWN;
1028: PetscInfo(eps,"Unable to generate more start vectors\n");
1029: }
1030: } else {
1031: /* Prepare the Rayleigh quotient for restart */
1032: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1033: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1034: b = a + ld;
1035: for (i=k;i<k+l;i++) {
1036: a[i] = PetscRealPart(eps->eigr[i]);
1037: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1038: }
1039: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1040: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1041: }
1042: }
1043: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1044: DSGetMat(eps->ds,DS_MAT_Q,&U);
1045: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1046: MatDestroy(&U);
1048: /* Normalize u and append it to V */
1049: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1050: BVCopyColumn(eps->V,nv,k+l);
1051: }
1052: eps->nconv = k;
1053: if (eps->reason != EPS_CONVERGED_ITERATING) {
1054: /* Store approximated values for next shift */
1055: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1056: sr->nS = l;
1057: for (i=0;i<l;i++) {
1058: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1059: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1060: }
1061: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1062: }
1063: }
1064: /* Check for completion */
1065: for (i=0;i< eps->nconv; i++) {
1066: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1067: else sPres->nconv[0]++;
1068: }
1069: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
1070: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
1071: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1]) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1072: PetscFree(iwork);
1073: return(0);
1074: }
1076: /*
1077: Obtains value of subsequent shift
1078: */
1079: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1080: {
1081: PetscReal lambda,d_prev;
1082: PetscInt i,idxP;
1083: EPS_SR sr;
1084: EPS_shift sPres,s;
1085: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1088: sr = ctx->sr;
1089: sPres = sr->sPres;
1090: if (sPres->neighb[side]) {
1091: /* Completing a previous interval */
1092: if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1093: if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1094: else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1095: } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1096: } else { /* (Only for side=1). Creating a new interval. */
1097: if (sPres->neigs==0) {/* No value has been accepted*/
1098: if (sPres->neighb[0]) {
1099: /* Multiplying by 10 the previous distance */
1100: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1101: sr->nleap++;
1102: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1103: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1104: } else { /* First shift */
1105: if (eps->nconv != 0) {
1106: /* Unaccepted values give information for next shift */
1107: idxP=0;/* Number of values left from shift */
1108: for (i=0;i<eps->nconv;i++) {
1109: lambda = PetscRealPart(eps->eigr[i]);
1110: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1111: else break;
1112: }
1113: /* Avoiding subtraction of eigenvalues (might be the same).*/
1114: if (idxP>0) {
1115: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1116: } else {
1117: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1118: }
1119: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1120: } else { /* No values found, no information for next shift */
1121: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1122: }
1123: }
1124: } else { /* Accepted values found */
1125: sr->nleap = 0;
1126: /* Average distance of values in previous subinterval */
1127: s = sPres->neighb[0];
1128: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1129: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1130: }
1131: if (s) {
1132: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1133: } else { /* First shift. Average distance obtained with values in this shift */
1134: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1135: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1136: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1137: } else {
1138: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1139: }
1140: }
1141: /* Average distance is used for next shift by adding it to value on the right or to shift */
1142: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1143: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1144: } else { /* Last accepted value is on the left of shift. Adding to shift */
1145: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1146: }
1147: }
1148: /* End of interval can not be surpassed */
1149: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1150: }/* of neighb[side]==null */
1151: return(0);
1152: }
1154: /*
1155: Function for sorting an array of real values
1156: */
1157: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1158: {
1159: PetscReal re;
1160: PetscInt i,j,tmp;
1163: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1164: /* Insertion sort */
1165: for (i=1;i<nr;i++) {
1166: re = PetscRealPart(r[perm[i]]);
1167: j = i-1;
1168: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1169: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1170: }
1171: }
1172: return(0);
1173: }
1175: /* Stores the pairs obtained since the last shift in the global arrays */
1176: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1177: {
1178: PetscErrorCode ierr;
1179: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1180: PetscReal lambda,err,norm;
1181: PetscInt i,count;
1182: PetscBool iscayley;
1183: EPS_SR sr = ctx->sr;
1184: EPS_shift sPres;
1185: Vec v,w;
1188: sPres = sr->sPres;
1189: sPres->index = sr->indexEig;
1190: count = sr->indexEig;
1191: /* Back-transform */
1192: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1193: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1194: /* Sort eigenvalues */
1195: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1196: /* Values stored in global array */
1197: for (i=0;i<eps->nconv;i++) {
1198: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1199: err = eps->errest[eps->perm[i]];
1201: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1202: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1203: sr->eigr[count] = lambda;
1204: sr->errest[count] = err;
1205: /* Explicit purification */
1206: BVGetColumn(eps->V,eps->perm[i],&w);
1207: if (eps->purify) {
1208: BVGetColumn(sr->V,count,&v);
1209: STApply(eps->st,w,v);
1210: BVRestoreColumn(sr->V,count,&v);
1211: } else {
1212: BVInsertVec(sr->V,count,w);
1213: }
1214: BVRestoreColumn(eps->V,eps->perm[i],&w);
1215: BVNormColumn(sr->V,count,NORM_2,&norm);
1216: BVScaleColumn(sr->V,count,1.0/norm);
1217: count++;
1218: }
1219: }
1220: sPres->neigs = count - sr->indexEig;
1221: sr->indexEig = count;
1222: /* Global ordering array updating */
1223: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1224: return(0);
1225: }
1227: static PetscErrorCode EPSLookForDeflation(EPS eps)
1228: {
1229: PetscErrorCode ierr;
1230: PetscReal val;
1231: PetscInt i,count0=0,count1=0;
1232: EPS_shift sPres;
1233: PetscInt ini,fin,k,idx0,idx1;
1234: EPS_SR sr;
1235: Vec v;
1236: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1239: sr = ctx->sr;
1240: sPres = sr->sPres;
1242: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1243: else ini = 0;
1244: fin = sr->indexEig;
1245: /* Selection of ends for searching new values */
1246: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1247: else sPres->ext[0] = sPres->neighb[0]->value;
1248: if (!sPres->neighb[1]) {
1249: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1250: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1251: } else sPres->ext[1] = sPres->neighb[1]->value;
1252: /* Selection of values between right and left ends */
1253: for (i=ini;i<fin;i++) {
1254: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1255: /* Values to the right of left shift */
1256: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1257: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1258: else count1++;
1259: } else break;
1260: }
1261: /* The number of values on each side are found */
1262: if (sPres->neighb[0]) {
1263: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1264: if (sPres->nsch[0]<0) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1265: } else sPres->nsch[0] = 0;
1267: if (sPres->neighb[1]) {
1268: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1269: if (sPres->nsch[1]<0) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1270: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1272: /* Completing vector of indexes for deflation */
1273: idx0 = ini;
1274: idx1 = ini+count0+count1;
1275: k=0;
1276: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1277: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1278: BVSetNumConstraints(sr->Vnext,k);
1279: for (i=0;i<k;i++) {
1280: BVGetColumn(sr->Vnext,-i-1,&v);
1281: BVCopyVec(sr->V,sr->idxDef[i],v);
1282: BVRestoreColumn(sr->Vnext,-i-1,&v);
1283: }
1285: /* For rational Krylov */
1286: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1287: EPSPrepareRational(eps);
1288: }
1289: eps->nconv = 0;
1290: /* Get rid of temporary Vnext */
1291: BVDestroy(&eps->V);
1292: eps->V = sr->Vnext;
1293: sr->Vnext = NULL;
1294: return(0);
1295: }
1297: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1298: {
1299: PetscErrorCode ierr;
1300: PetscInt i,lds,ti;
1301: PetscReal newS;
1302: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1303: EPS_SR sr=ctx->sr;
1304: Mat A,B=NULL;
1305: PetscObjectState Astate,Bstate=0;
1306: PetscObjectId Aid,Bid=0;
1309: PetscCitationsRegister(citation,&cited);
1310: if (ctx->global) {
1311: EPSSolve_KrylovSchur_Slice(ctx->eps);
1312: ctx->eps->state = EPS_STATE_SOLVED;
1313: eps->reason = EPS_CONVERGED_TOL;
1314: if (ctx->npart>1) {
1315: /* Gather solution from subsolvers */
1316: EPSSliceGatherSolution(eps);
1317: } else {
1318: eps->nconv = sr->numEigs;
1319: eps->its = ctx->eps->its;
1320: PetscFree(ctx->inertias);
1321: PetscFree(ctx->shifts);
1322: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1323: }
1324: } else {
1325: if (ctx->npart==1) {
1326: sr->eigr = ctx->eps->eigr;
1327: sr->eigi = ctx->eps->eigi;
1328: sr->perm = ctx->eps->perm;
1329: sr->errest = ctx->eps->errest;
1330: sr->V = ctx->eps->V;
1331: }
1332: /* Check that the user did not modify subcomm matrices */
1333: EPSGetOperators(eps,&A,&B);
1334: PetscObjectStateGet((PetscObject)A,&Astate);
1335: PetscObjectGetId((PetscObject)A,&Aid);
1336: if (B) {
1337: PetscObjectStateGet((PetscObject)B,&Bstate);
1338: PetscObjectGetId((PetscObject)B,&Bid);
1339: }
1340: if (Astate!=ctx->Astate || (B && Bstate!=ctx->Bstate) || Aid!=ctx->Aid || (B && Bid!=ctx->Bid)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcomm matrices have been modified by user");
1341: /* Only with eigenvalues present in the interval ...*/
1342: if (sr->numEigs==0) {
1343: eps->reason = EPS_CONVERGED_TOL;
1344: return(0);
1345: }
1346: /* Array of pending shifts */
1347: sr->maxPend = 100; /* Initial size */
1348: sr->nPend = 0;
1349: PetscMalloc1(sr->maxPend,&sr->pending);
1350: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1351: EPSCreateShift(eps,sr->int0,NULL,NULL);
1352: /* extract first shift */
1353: sr->sPrev = NULL;
1354: sr->sPres = sr->pending[--sr->nPend];
1355: sr->sPres->inertia = sr->inertia0;
1356: eps->target = sr->sPres->value;
1357: sr->s0 = sr->sPres;
1358: sr->indexEig = 0;
1359: /* Memory reservation for auxiliary variables */
1360: lds = PetscMin(eps->mpd,eps->ncv);
1361: PetscCalloc1(lds*lds,&sr->S);
1362: PetscMalloc1(eps->ncv,&sr->back);
1363: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1364: for (i=0;i<sr->numEigs;i++) {
1365: sr->eigr[i] = 0.0;
1366: sr->eigi[i] = 0.0;
1367: sr->errest[i] = 0.0;
1368: sr->perm[i] = i;
1369: }
1370: /* Vectors for deflation */
1371: PetscMalloc1(sr->numEigs,&sr->idxDef);
1372: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1373: sr->indexEig = 0;
1374: /* Main loop */
1375: while (sr->sPres) {
1376: /* Search for deflation */
1377: EPSLookForDeflation(eps);
1378: /* KrylovSchur */
1379: EPSKrylovSchur_Slice(eps);
1381: EPSStoreEigenpairs(eps);
1382: /* Select new shift */
1383: if (!sr->sPres->comp[1]) {
1384: EPSGetNewShiftValue(eps,1,&newS);
1385: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1386: }
1387: if (!sr->sPres->comp[0]) {
1388: /* Completing earlier interval */
1389: EPSGetNewShiftValue(eps,0,&newS);
1390: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1391: }
1392: /* Preparing for a new search of values */
1393: EPSExtractShift(eps);
1394: }
1396: /* Updating eps values prior to exit */
1397: PetscFree(sr->S);
1398: PetscFree(sr->idxDef);
1399: PetscFree(sr->pending);
1400: PetscFree(sr->back);
1401: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1402: BVSetNumConstraints(sr->Vnext,0);
1403: BVDestroy(&eps->V);
1404: eps->V = sr->Vnext;
1405: eps->nconv = sr->indexEig;
1406: eps->reason = EPS_CONVERGED_TOL;
1407: eps->its = sr->itsKs;
1408: eps->nds = 0;
1409: if (sr->dir<0) {
1410: for (i=0;i<eps->nconv/2;i++) {
1411: ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1412: }
1413: }
1414: }
1415: return(0);
1416: }