Actual source code: ks-slice.c
slepc-3.17.0 2022-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
15: References:
17: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
18: solving sparse symmetric generalized eigenproblems", SIAM J.
19: Matrix Anal. Appl. 15(1):228-272, 1994.
21: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
22: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
23: 2012.
24: */
26: #include <slepc/private/epsimpl.h>
27: #include "krylovschur.h"
29: static PetscBool cited = PETSC_FALSE;
30: static const char citation[] =
31: "@Article{slepc-slice,\n"
32: " author = \"C. Campos and J. E. Roman\",\n"
33: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
34: " journal = \"Numer. Algorithms\",\n"
35: " volume = \"60\",\n"
36: " number = \"2\",\n"
37: " pages = \"279--295\",\n"
38: " year = \"2012,\"\n"
39: " doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
40: "}\n";
42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: static PetscErrorCode EPSSliceResetSR(EPS eps)
45: {
46: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
47: EPS_SR sr=ctx->sr;
48: EPS_shift s;
50: if (sr) {
51: if (ctx->npart>1) {
52: BVDestroy(&sr->V);
53: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
54: }
55: /* Reviewing list of shifts to free memory */
56: s = sr->s0;
57: if (s) {
58: while (s->neighb[1]) {
59: s = s->neighb[1];
60: PetscFree(s->neighb[0]);
61: }
62: PetscFree(s);
63: }
64: PetscFree(sr);
65: }
66: ctx->sr = NULL;
67: PetscFunctionReturn(0);
68: }
70: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
71: {
72: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
74: if (!ctx->global) PetscFunctionReturn(0);
75: /* Reset auxiliary EPS */
76: EPSSliceResetSR(ctx->eps);
77: EPSReset(ctx->eps);
78: EPSSliceResetSR(eps);
79: PetscFree(ctx->inertias);
80: PetscFree(ctx->shifts);
81: PetscFunctionReturn(0);
82: }
84: PetscErrorCode EPSDestroy_KrylovSchur_Slice(EPS eps)
85: {
86: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
88: if (!ctx->global) PetscFunctionReturn(0);
89: /* Destroy auxiliary EPS */
90: EPSReset_KrylovSchur_Slice(eps);
91: EPSDestroy(&ctx->eps);
92: if (ctx->npart>1) {
93: PetscSubcommDestroy(&ctx->subc);
94: if (ctx->commset) {
95: MPI_Comm_free(&ctx->commrank);
96: ctx->commset = PETSC_FALSE;
97: }
98: ISDestroy(&ctx->isrow);
99: ISDestroy(&ctx->iscol);
100: MatDestroyMatrices(1,&ctx->submata);
101: MatDestroyMatrices(1,&ctx->submatb);
102: }
103: PetscFree(ctx->subintervals);
104: PetscFree(ctx->nconv_loc);
105: PetscFunctionReturn(0);
106: }
108: /*
109: EPSSliceAllocateSolution - Allocate memory storage for common variables such
110: as eigenvalues and eigenvectors. The argument extra is used for methods
111: that require a working basis slightly larger than ncv.
112: */
113: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
114: {
115: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
116: PetscReal eta;
117: PetscInt k;
118: PetscLogDouble cnt;
119: BVType type;
120: BVOrthogType orthog_type;
121: BVOrthogRefineType orthog_ref;
122: BVOrthogBlockType ob_type;
123: Mat matrix;
124: Vec t;
125: EPS_SR sr = ctx->sr;
127: /* allocate space for eigenvalues and friends */
128: k = PetscMax(1,sr->numEigs);
129: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
130: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
131: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
132: PetscLogObjectMemory((PetscObject)eps,cnt);
134: /* allocate sr->V and transfer options from eps->V */
135: BVDestroy(&sr->V);
136: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
137: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
138: if (!eps->V) EPSGetBV(eps,&eps->V);
139: if (!((PetscObject)(eps->V))->type_name) BVSetType(sr->V,BVSVEC);
140: else {
141: BVGetType(eps->V,&type);
142: BVSetType(sr->V,type);
143: }
144: STMatCreateVecsEmpty(eps->st,&t,NULL);
145: BVSetSizesFromVec(sr->V,t,k);
146: VecDestroy(&t);
147: EPS_SetInnerProduct(eps);
148: BVGetMatrix(eps->V,&matrix,NULL);
149: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
150: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
151: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
152: PetscFunctionReturn(0);
153: }
155: static PetscErrorCode EPSSliceGetEPS(EPS eps)
156: {
157: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
158: BV V;
159: BVType type;
160: PetscReal eta;
161: BVOrthogType orthog_type;
162: BVOrthogRefineType orthog_ref;
163: BVOrthogBlockType ob_type;
164: PetscInt i;
165: PetscReal h,a,b;
166: PetscRandom rand;
167: EPS_SR sr=ctx->sr;
169: if (!ctx->eps) EPSKrylovSchurGetChildEPS(eps,&ctx->eps);
171: /* Determine subintervals */
172: if (ctx->npart==1) {
173: a = eps->inta; b = eps->intb;
174: } else {
175: if (!ctx->subintset) { /* uniform distribution if no set by user */
177: h = (eps->intb-eps->inta)/ctx->npart;
178: a = eps->inta+ctx->subc->color*h;
179: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
180: PetscFree(ctx->subintervals);
181: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
182: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
183: ctx->subintervals[ctx->npart] = eps->intb;
184: } else {
185: a = ctx->subintervals[ctx->subc->color];
186: b = ctx->subintervals[ctx->subc->color+1];
187: }
188: }
189: EPSSetInterval(ctx->eps,a,b);
190: EPSSetConvergenceTest(ctx->eps,eps->conv);
191: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
192: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
194: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
195: ctx_local->detect = ctx->detect;
197: /* transfer options from eps->V */
198: EPSGetBV(ctx->eps,&V);
199: BVGetRandomContext(V,&rand); /* make sure the random context is available when duplicating */
200: if (!eps->V) EPSGetBV(eps,&eps->V);
201: if (!((PetscObject)(eps->V))->type_name) BVSetType(V,BVSVEC);
202: else {
203: BVGetType(eps->V,&type);
204: BVSetType(V,type);
205: }
206: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
207: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
209: ctx->eps->which = eps->which;
210: ctx->eps->max_it = eps->max_it;
211: ctx->eps->tol = eps->tol;
212: ctx->eps->purify = eps->purify;
213: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
214: EPSSetProblemType(ctx->eps,eps->problem_type);
215: EPSSetUp(ctx->eps);
216: ctx->eps->nconv = 0;
217: ctx->eps->its = 0;
218: for (i=0;i<ctx->eps->ncv;i++) {
219: ctx->eps->eigr[i] = 0.0;
220: ctx->eps->eigi[i] = 0.0;
221: ctx->eps->errest[i] = 0.0;
222: }
223: PetscFunctionReturn(0);
224: }
226: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
227: {
228: KSP ksp,kspr;
229: PC pc;
230: Mat F;
231: PetscReal nzshift=shift;
232: PetscBool flg;
234: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
235: if (inertia) *inertia = eps->n;
236: } else if (shift <= PETSC_MIN_REAL) {
237: if (inertia) *inertia = 0;
238: if (zeros) *zeros = 0;
239: } else {
240: /* If the shift is zero, perturb it to a very small positive value.
241: The goal is that the nonzero pattern is the same in all cases and reuse
242: the symbolic factorizations */
243: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
244: STSetShift(eps->st,nzshift);
245: STGetKSP(eps->st,&ksp);
246: KSPGetPC(ksp,&pc);
247: PetscObjectTypeCompare((PetscObject)pc,PCREDUNDANT,&flg);
248: if (flg) {
249: PCRedundantGetKSP(pc,&kspr);
250: KSPGetPC(kspr,&pc);
251: }
252: PCFactorGetMatrix(pc,&F);
253: MatGetInertia(F,inertia,zeros,NULL);
254: }
255: if (inertia) PetscInfo(eps,"Computed inertia at shift %g: %" PetscInt_FMT "\n",(double)nzshift,*inertia);
256: PetscFunctionReturn(0);
257: }
259: /*
260: Dummy backtransform operation
261: */
262: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
263: {
264: PetscFunctionReturn(0);
265: }
267: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
268: {
269: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
270: EPS_SR sr,sr_loc,sr_glob;
271: PetscInt nEigs,dssz=1,i,zeros=0,off=0,method,hiteig=0;
272: PetscMPIInt nproc,rank=0,aux;
273: PetscReal r;
274: MPI_Request req;
275: Mat A,B=NULL;
276: DSParallelType ptype;
277: MPI_Comm child;
279: if (ctx->global) {
280: EPSCheckHermitianDefiniteCondition(eps,PETSC_TRUE," with spectrum slicing");
281: EPSCheckSinvertCayleyCondition(eps,PETSC_TRUE," with spectrum slicing");
285: EPSCheckUnsupportedCondition(eps,EPS_FEATURE_ARBITRARY | EPS_FEATURE_REGION | EPS_FEATURE_STOPPING,PETSC_TRUE," with spectrum slicing");
286: EPSCheckIgnoredCondition(eps,EPS_FEATURE_BALANCE,PETSC_TRUE," with spectrum slicing");
287: if (eps->tol==PETSC_DEFAULT) {
288: #if defined(PETSC_USE_REAL_SINGLE)
289: eps->tol = SLEPC_DEFAULT_TOL;
290: #else
291: /* use tighter tolerance */
292: eps->tol = SLEPC_DEFAULT_TOL*1e-2;
293: #endif
294: }
295: if (eps->max_it==PETSC_DEFAULT) eps->max_it = 100;
296: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
298: }
299: eps->ops->backtransform = EPSBackTransform_Skip;
301: /* create spectrum slicing context and initialize it */
302: EPSSliceResetSR(eps);
303: PetscNewLog(eps,&sr);
304: ctx->sr = sr;
305: sr->itsKs = 0;
306: sr->nleap = 0;
307: sr->nMAXCompl = eps->nev/4;
308: sr->iterCompl = eps->max_it/4;
309: sr->sPres = NULL;
310: sr->nS = 0;
312: if (ctx->npart==1 || ctx->global) {
313: /* check presence of ends and finding direction */
314: if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
315: sr->int0 = eps->inta;
316: sr->int1 = eps->intb;
317: sr->dir = 1;
318: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
319: sr->hasEnd = PETSC_FALSE;
320: } else sr->hasEnd = PETSC_TRUE;
321: } else {
322: sr->int0 = eps->intb;
323: sr->int1 = eps->inta;
324: sr->dir = -1;
325: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
326: }
327: }
328: if (ctx->global) {
329: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
330: /* create subintervals and initialize auxiliary eps for slicing runs */
331: EPSKrylovSchurGetChildEPS(eps,&ctx->eps);
332: /* prevent computation of factorization in global eps */
333: STSetTransform(eps->st,PETSC_FALSE);
334: EPSSliceGetEPS(eps);
335: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
336: if (ctx->npart>1) {
337: PetscSubcommGetChild(ctx->subc,&child);
338: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
339: MPI_Comm_rank(child,&rank);
340: if (!rank) MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
341: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,child);
342: PetscFree(ctx->nconv_loc);
343: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
344: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
345: if (sr->dir<0) off = 1;
346: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
347: PetscMPIIntCast(sr_loc->numEigs,&aux);
348: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
349: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
350: } else {
351: MPI_Comm_rank(child,&rank);
352: if (!rank) {
353: PetscMPIIntCast(sr_loc->numEigs,&aux);
354: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
355: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
356: }
357: PetscMPIIntCast(ctx->npart,&aux);
358: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,child);
359: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,child);
360: }
361: nEigs = 0;
362: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
363: } else {
364: nEigs = sr_loc->numEigs;
365: sr->inertia0 = sr_loc->inertia0;
366: sr->dir = sr_loc->dir;
367: }
368: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
369: sr->numEigs = nEigs;
370: eps->nev = nEigs;
371: eps->ncv = nEigs;
372: eps->mpd = nEigs;
373: } else {
374: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
375: sr_glob = ctx_glob->sr;
376: if (ctx->npart>1) {
377: sr->dir = sr_glob->dir;
378: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
379: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
380: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
381: else sr->hasEnd = PETSC_TRUE;
382: }
383: /* sets first shift */
384: STSetShift(eps->st,(sr->int0==0.0)?10.0/PETSC_MAX_REAL:sr->int0);
385: STSetUp(eps->st);
387: /* compute inertia0 */
388: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
389: /* undocumented option to control what to do when an eigenvalue is found:
390: - error out if it's the endpoint of the user-provided interval (or sub-interval)
391: - if it's an endpoint computed internally:
392: + if hiteig=0 error out
393: + else if hiteig=1 the subgroup that hit the eigenvalue does nothing
394: + otherwise the subgroup that hit the eigenvalue perturbs the shift and recomputes inertia
395: */
396: PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&hiteig,NULL);
397: if (zeros) { /* error in factorization */
400: if (hiteig==1) { /* idle subgroup */
401: sr->inertia0 = -1;
402: } else { /* perturb shift */
403: sr->int0 *= (1.0+SLICE_PTOL);
404: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
406: }
407: }
408: if (ctx->npart>1) {
409: PetscSubcommGetChild(ctx->subc,&child);
410: /* inertia1 is received from neighbour */
411: MPI_Comm_rank(child,&rank);
412: if (!rank) {
413: if (sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1))) { /* send inertia0 to neighbour0 */
414: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
415: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
416: }
417: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
418: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
419: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
420: }
421: if (sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
422: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
423: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
424: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
425: }
426: }
427: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
428: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,child);
429: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,child);
430: } else sr_glob->inertia1 = sr->inertia1;
431: }
433: /* last process in eps comm computes inertia1 */
434: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
435: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
437: if (!rank && sr->inertia0==-1) {
438: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
439: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
440: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
441: }
442: if (sr->hasEnd) {
443: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
444: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
445: }
446: }
448: /* number of eigenvalues in interval */
449: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
450: if (ctx->npart>1) {
451: /* memory allocate for subinterval eigenpairs */
452: EPSSliceAllocateSolution(eps,1);
453: }
454: dssz = eps->ncv+1;
455: DSGetParallel(ctx->eps->ds,&ptype);
456: DSSetParallel(eps->ds,ptype);
457: DSGetMethod(ctx->eps->ds,&method);
458: DSSetMethod(eps->ds,method);
459: }
460: DSSetType(eps->ds,DSHEP);
461: DSSetCompact(eps->ds,PETSC_TRUE);
462: DSAllocate(eps->ds,dssz);
463: /* keep state of subcomm matrices to check that the user does not modify them */
464: EPSGetOperators(eps,&A,&B);
465: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
466: PetscObjectGetId((PetscObject)A,&ctx->Aid);
467: if (B) {
468: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
469: PetscObjectGetId((PetscObject)B,&ctx->Bid);
470: } else {
471: ctx->Bstate=0;
472: ctx->Bid=0;
473: }
474: PetscFunctionReturn(0);
475: }
477: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
478: {
479: Vec v,vg,v_loc;
480: IS is1,is2;
481: VecScatter vec_sc;
482: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
483: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
484: PetscScalar *array;
485: EPS_SR sr_loc;
486: BV V_loc;
488: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
489: V_loc = sr_loc->V;
491: /* Gather parallel eigenvectors */
492: BVGetColumn(eps->V,0,&v);
493: VecGetOwnershipRange(v,&n0,&m0);
494: BVRestoreColumn(eps->V,0,&v);
495: BVGetColumn(ctx->eps->V,0,&v);
496: VecGetLocalSize(v,&nloc);
497: BVRestoreColumn(ctx->eps->V,0,&v);
498: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
499: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
500: idx = -1;
501: for (si=0;si<ctx->npart;si++) {
502: j = 0;
503: for (i=n0;i<m0;i++) {
504: idx1[j] = i;
505: idx2[j++] = i+eps->n*si;
506: }
507: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
508: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
509: BVGetColumn(eps->V,0,&v);
510: VecScatterCreate(v,is1,vg,is2,&vec_sc);
511: BVRestoreColumn(eps->V,0,&v);
512: ISDestroy(&is1);
513: ISDestroy(&is2);
514: for (i=0;i<ctx->nconv_loc[si];i++) {
515: BVGetColumn(eps->V,++idx,&v);
516: if (ctx->subc->color==si) {
517: BVGetColumn(V_loc,i,&v_loc);
518: VecGetArray(v_loc,&array);
519: VecPlaceArray(vg,array);
520: }
521: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
522: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
523: if (ctx->subc->color==si) {
524: VecResetArray(vg);
525: VecRestoreArray(v_loc,&array);
526: BVRestoreColumn(V_loc,i,&v_loc);
527: }
528: BVRestoreColumn(eps->V,idx,&v);
529: }
530: VecScatterDestroy(&vec_sc);
531: }
532: PetscFree2(idx1,idx2);
533: VecDestroy(&vg);
534: PetscFunctionReturn(0);
535: }
537: /*
538: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
539: */
540: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
541: {
542: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
544: if (ctx->global && ctx->npart>1) {
545: EPSComputeVectors(ctx->eps);
546: EPSSliceGatherEigenVectors(eps);
547: }
548: PetscFunctionReturn(0);
549: }
551: #define SWAP(a,b,t) {t=a;a=b;b=t;}
553: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
554: {
555: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
556: PetscInt i=0,j,tmpi;
557: PetscReal v,tmpr;
558: EPS_shift s;
562: if (!ctx->sr->s0) { /* EPSSolve not called yet */
563: *n = 2;
564: } else {
565: *n = 1;
566: s = ctx->sr->s0;
567: while (s) {
568: (*n)++;
569: s = s->neighb[1];
570: }
571: }
572: PetscMalloc1(*n,shifts);
573: PetscMalloc1(*n,inertias);
574: if (!ctx->sr->s0) { /* EPSSolve not called yet */
575: (*shifts)[0] = ctx->sr->int0;
576: (*shifts)[1] = ctx->sr->int1;
577: (*inertias)[0] = ctx->sr->inertia0;
578: (*inertias)[1] = ctx->sr->inertia1;
579: } else {
580: s = ctx->sr->s0;
581: while (s) {
582: (*shifts)[i] = s->value;
583: (*inertias)[i++] = s->inertia;
584: s = s->neighb[1];
585: }
586: (*shifts)[i] = ctx->sr->int1;
587: (*inertias)[i] = ctx->sr->inertia1;
588: }
589: /* remove possible duplicate in last position */
590: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
591: /* sort result */
592: for (i=0;i<*n;i++) {
593: v = (*shifts)[i];
594: for (j=i+1;j<*n;j++) {
595: if (v > (*shifts)[j]) {
596: SWAP((*shifts)[i],(*shifts)[j],tmpr);
597: SWAP((*inertias)[i],(*inertias)[j],tmpi);
598: v = (*shifts)[i];
599: }
600: }
601: }
602: PetscFunctionReturn(0);
603: }
605: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
606: {
607: PetscMPIInt rank,nproc,*disp,*ns_loc,aux;
608: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
609: PetscInt i,idx,j,*perm_loc,off=0,*inertias_loc,ns;
610: PetscScalar *eigr_loc;
611: EPS_SR sr_loc;
612: PetscReal *shifts_loc;
613: MPI_Comm child;
615: eps->nconv = 0;
616: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
617: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
619: /* Gather the shifts used and the inertias computed */
620: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
621: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
622: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
623: ns--;
624: for (i=0;i<ns;i++) {
625: inertias_loc[i] = inertias_loc[i+1];
626: shifts_loc[i] = shifts_loc[i+1];
627: }
628: }
629: PetscMalloc1(ctx->npart,&ns_loc);
630: PetscSubcommGetChild(ctx->subc,&child);
631: MPI_Comm_rank(child,&rank);
632: PetscMPIIntCast(ns,&aux);
633: if (!rank) MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank);
634: PetscMPIIntCast(ctx->npart,&aux);
635: MPI_Bcast(ns_loc,aux,MPI_INT,0,child);
636: ctx->nshifts = 0;
637: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
638: PetscFree(ctx->inertias);
639: PetscFree(ctx->shifts);
640: PetscMalloc1(ctx->nshifts,&ctx->inertias);
641: PetscMalloc1(ctx->nshifts,&ctx->shifts);
643: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
644: eigr_loc = sr_loc->eigr;
645: perm_loc = sr_loc->perm;
646: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
647: PetscMalloc1(ctx->npart,&disp);
648: disp[0] = 0;
649: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
650: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
651: PetscMPIIntCast(sr_loc->numEigs,&aux);
652: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
653: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
654: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
655: PetscMPIIntCast(ns,&aux);
656: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
657: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
658: MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
659: } else { /* subcommunicators with different size */
660: if (!rank) {
661: PetscMPIIntCast(sr_loc->numEigs,&aux);
662: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
663: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
664: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
665: PetscMPIIntCast(ns,&aux);
666: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
667: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
668: MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
669: }
670: PetscMPIIntCast(eps->nconv,&aux);
671: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,child);
672: MPI_Bcast(eps->perm,aux,MPIU_INT,0,child);
673: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,child);
674: PetscMPIIntCast(ctx->nshifts,&aux);
675: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,child);
676: MPI_Bcast(&eps->its,1,MPIU_INT,0,child);
677: }
678: /* Update global array eps->perm */
679: idx = ctx->nconv_loc[0];
680: for (i=1;i<ctx->npart;i++) {
681: off += ctx->nconv_loc[i-1];
682: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
683: }
685: /* Gather parallel eigenvectors */
686: PetscFree(ns_loc);
687: PetscFree(disp);
688: PetscFree(shifts_loc);
689: PetscFree(inertias_loc);
690: PetscFunctionReturn(0);
691: }
693: /*
694: Fills the fields of a shift structure
695: */
696: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
697: {
698: EPS_shift s,*pending2;
699: PetscInt i;
700: EPS_SR sr;
701: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
703: sr = ctx->sr;
704: if ((neighb0 && val==neighb0->value) || (neighb1 && val==neighb1->value)) {
705: sr->nPend++;
706: PetscFunctionReturn(0);
707: }
708: PetscNewLog(eps,&s);
709: s->value = val;
710: s->neighb[0] = neighb0;
711: if (neighb0) neighb0->neighb[1] = s;
712: s->neighb[1] = neighb1;
713: if (neighb1) neighb1->neighb[0] = s;
714: s->comp[0] = PETSC_FALSE;
715: s->comp[1] = PETSC_FALSE;
716: s->index = -1;
717: s->neigs = 0;
718: s->nconv[0] = s->nconv[1] = 0;
719: s->nsch[0] = s->nsch[1]=0;
720: /* Inserts in the stack of pending shifts */
721: /* If needed, the array is resized */
722: if (sr->nPend >= sr->maxPend) {
723: sr->maxPend *= 2;
724: PetscMalloc1(sr->maxPend,&pending2);
725: PetscLogObjectMemory((PetscObject)eps,sr->maxPend*sizeof(EPS_shift*));
726: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
727: PetscFree(sr->pending);
728: sr->pending = pending2;
729: }
730: sr->pending[sr->nPend++]=s;
731: PetscFunctionReturn(0);
732: }
734: /* Prepare for Rational Krylov update */
735: static PetscErrorCode EPSPrepareRational(EPS eps)
736: {
737: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
738: PetscInt dir,i,k,ld,nv;
739: PetscScalar *A;
740: EPS_SR sr = ctx->sr;
741: Vec v;
743: DSGetLeadingDimension(eps->ds,&ld);
744: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
745: dir*=sr->dir;
746: k = 0;
747: for (i=0;i<sr->nS;i++) {
748: if (dir*PetscRealPart(sr->S[i])>0.0) {
749: sr->S[k] = sr->S[i];
750: sr->S[sr->nS+k] = sr->S[sr->nS+i];
751: BVGetColumn(sr->Vnext,k,&v);
752: BVCopyVec(eps->V,eps->nconv+i,v);
753: BVRestoreColumn(sr->Vnext,k,&v);
754: k++;
755: if (k>=sr->nS/2)break;
756: }
757: }
758: /* Copy to DS */
759: DSGetArray(eps->ds,DS_MAT_A,&A);
760: PetscArrayzero(A,ld*ld);
761: for (i=0;i<k;i++) {
762: A[i*(1+ld)] = sr->S[i];
763: A[k+i*ld] = sr->S[sr->nS+i];
764: }
765: sr->nS = k;
766: DSRestoreArray(eps->ds,DS_MAT_A,&A);
767: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL);
768: DSSetDimensions(eps->ds,nv,0,k);
769: /* Append u to V */
770: BVGetColumn(sr->Vnext,sr->nS,&v);
771: BVCopyVec(eps->V,sr->nv,v);
772: BVRestoreColumn(sr->Vnext,sr->nS,&v);
773: PetscFunctionReturn(0);
774: }
776: /* Provides next shift to be computed */
777: static PetscErrorCode EPSExtractShift(EPS eps)
778: {
779: PetscInt iner,zeros=0;
780: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
781: EPS_SR sr;
782: PetscReal newShift,diam,ptol;
783: EPS_shift sPres;
785: sr = ctx->sr;
786: if (sr->nPend > 0) {
787: if (sr->sPres==sr->pending[sr->nPend-1]) {
788: eps->reason = EPS_CONVERGED_ITERATING;
789: eps->its = 0;
790: sr->nPend--;
791: sr->sPres->rep = PETSC_TRUE;
792: PetscFunctionReturn(0);
793: }
794: sr->sPrev = sr->sPres;
795: sr->sPres = sr->pending[--sr->nPend];
796: sPres = sr->sPres;
797: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
798: if (zeros) {
799: diam = PetscMin(PetscAbsReal(sPres->neighb[0]->value-sPres->value),PetscAbsReal(sPres->neighb[1]->value-sPres->value));
800: ptol = PetscMin(SLICE_PTOL,diam/2);
801: newShift = sPres->value*(1.0+ptol);
802: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
803: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
804: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
806: sPres->value = newShift;
807: }
808: sr->sPres->inertia = iner;
809: eps->target = sr->sPres->value;
810: eps->reason = EPS_CONVERGED_ITERATING;
811: eps->its = 0;
812: } else sr->sPres = NULL;
813: PetscFunctionReturn(0);
814: }
816: /*
817: Symmetric KrylovSchur adapted to spectrum slicing:
818: Allows searching an specific amount of eigenvalues in the subintervals left and right.
819: Returns whether the search has succeeded
820: */
821: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
822: {
823: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
824: PetscInt i,k,l,ld,nv,*iwork,j,count0,count1,iterCompl=0,n0,n1;
825: Mat U,Op;
826: PetscScalar *Q,*A;
827: PetscReal *a,*b,beta,lambda;
828: EPS_shift sPres;
829: PetscBool breakdown,complIterating,sch0,sch1;
830: EPS_SR sr = ctx->sr;
832: /* Spectrum slicing data */
833: sPres = sr->sPres;
834: complIterating =PETSC_FALSE;
835: sch1 = sch0 = PETSC_TRUE;
836: DSGetLeadingDimension(eps->ds,&ld);
837: PetscMalloc1(2*ld,&iwork);
838: count0=0;count1=0; /* Found on both sides */
839: if (!sPres->rep && sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
840: /* Rational Krylov */
841: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
842: DSGetDimensions(eps->ds,NULL,NULL,&l,NULL);
843: DSSetDimensions(eps->ds,l+1,0,0);
844: BVSetActiveColumns(eps->V,0,l+1);
845: DSGetMat(eps->ds,DS_MAT_Q,&U);
846: BVMultInPlace(eps->V,U,0,l+1);
847: MatDestroy(&U);
848: } else {
849: /* Get the starting Lanczos vector */
850: EPSGetStartVector(eps,0,NULL);
851: l = 0;
852: }
853: /* Restart loop */
854: while (eps->reason == EPS_CONVERGED_ITERATING) {
855: eps->its++; sr->itsKs++;
856: /* Compute an nv-step Lanczos factorization */
857: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
858: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
859: b = a + ld;
860: STGetOperator(eps->st,&Op);
861: BVMatLanczos(eps->V,Op,a,b,eps->nconv+l,&nv,&breakdown);
862: STRestoreOperator(eps->st,&Op);
863: sr->nv = nv;
864: beta = b[nv-1];
865: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
866: DSSetDimensions(eps->ds,nv,eps->nconv,eps->nconv+l);
867: if (l==0) DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
868: else DSSetState(eps->ds,DS_STATE_RAW);
869: BVSetActiveColumns(eps->V,eps->nconv,nv);
871: /* Solve projected problem and compute residual norm estimates */
872: if (eps->its == 1 && l > 0) {/* After rational update */
873: DSGetArray(eps->ds,DS_MAT_A,&A);
874: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
875: b = a + ld;
876: k = eps->nconv+l;
877: A[k*ld+k-1] = A[(k-1)*ld+k];
878: A[k*ld+k] = a[k];
879: for (j=k+1; j< nv; j++) {
880: A[j*ld+j] = a[j];
881: A[j*ld+j-1] = b[j-1] ;
882: A[(j-1)*ld+j] = b[j-1];
883: }
884: DSRestoreArray(eps->ds,DS_MAT_A,&A);
885: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
886: DSSolve(eps->ds,eps->eigr,NULL);
887: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
888: DSSetCompact(eps->ds,PETSC_TRUE);
889: } else { /* Restart */
890: DSSolve(eps->ds,eps->eigr,NULL);
891: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
892: }
893: DSSynchronize(eps->ds,eps->eigr,NULL);
895: /* Residual */
896: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,0.0,1.0,&k);
897: /* Checking values obtained for completing */
898: for (i=0;i<k;i++) {
899: sr->back[i]=eps->eigr[i];
900: }
901: STBackTransform(eps->st,k,sr->back,eps->eigi);
902: count0=count1=0;
903: for (i=0;i<k;i++) {
904: lambda = PetscRealPart(sr->back[i]);
905: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
906: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
907: }
908: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
909: else {
910: /* Checks completion */
911: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
912: eps->reason = EPS_CONVERGED_TOL;
913: } else {
914: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
915: if (complIterating) {
916: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
917: } else if (k >= eps->nev) {
918: n0 = sPres->nsch[0]-count0;
919: n1 = sPres->nsch[1]-count1;
920: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
921: /* Iterating for completion*/
922: complIterating = PETSC_TRUE;
923: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
924: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
925: iterCompl = sr->iterCompl;
926: } else eps->reason = EPS_CONVERGED_TOL;
927: }
928: }
929: }
930: /* Update l */
931: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
932: else l = nv-k;
933: if (breakdown) l=0;
934: if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
936: if (eps->reason == EPS_CONVERGED_ITERATING) {
937: if (breakdown) {
938: /* Start a new Lanczos factorization */
939: PetscInfo(eps,"Breakdown in Krylov-Schur method (it=%" PetscInt_FMT " norm=%g)\n",eps->its,(double)beta);
940: EPSGetStartVector(eps,k,&breakdown);
941: if (breakdown) {
942: eps->reason = EPS_DIVERGED_BREAKDOWN;
943: PetscInfo(eps,"Unable to generate more start vectors\n");
944: }
945: } else {
946: /* Prepare the Rayleigh quotient for restart */
947: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
948: DSGetArray(eps->ds,DS_MAT_Q,&Q);
949: b = a + ld;
950: for (i=k;i<k+l;i++) {
951: a[i] = PetscRealPart(eps->eigr[i]);
952: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
953: }
954: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
955: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
956: }
957: }
958: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
959: DSGetMat(eps->ds,DS_MAT_Q,&U);
960: BVMultInPlace(eps->V,U,eps->nconv,k+l);
961: MatDestroy(&U);
963: /* Normalize u and append it to V */
964: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) BVCopyColumn(eps->V,nv,k+l);
965: eps->nconv = k;
966: if (eps->reason != EPS_CONVERGED_ITERATING) {
967: /* Store approximated values for next shift */
968: DSGetArray(eps->ds,DS_MAT_Q,&Q);
969: sr->nS = l;
970: for (i=0;i<l;i++) {
971: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
972: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
973: }
974: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
975: }
976: }
977: /* Check for completion */
978: for (i=0;i< eps->nconv; i++) {
979: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
980: else sPres->nconv[0]++;
981: }
982: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
983: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
984: PetscInfo(eps,"Lanczos: %" PetscInt_FMT " evals in [%g,%g]%s and %" PetscInt_FMT " evals in [%g,%g]%s\n",count0,(double)(sr->dir==1?sPres->ext[0]:sPres->value),(double)(sr->dir==1?sPres->value:sPres->ext[0]),sPres->comp[0]?"*":"",count1,(double)(sr->dir==1?sPres->value:sPres->ext[1]),(double)(sr->dir==1?sPres->ext[1]:sPres->value),sPres->comp[1]?"*":"");
986: PetscFree(iwork);
987: PetscFunctionReturn(0);
988: }
990: /*
991: Obtains value of subsequent shift
992: */
993: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
994: {
995: PetscReal lambda,d_prev;
996: PetscInt i,idxP;
997: EPS_SR sr;
998: EPS_shift sPres,s;
999: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1001: sr = ctx->sr;
1002: sPres = sr->sPres;
1003: if (sPres->neighb[side]) {
1004: /* Completing a previous interval */
1005: *newS = (sPres->value + sPres->neighb[side]->value)/2;
1006: if (PetscAbsReal(sPres->value - *newS)/PetscAbsReal(sPres->value)<=100*PETSC_SQRT_MACHINE_EPSILON) *newS = sPres->value;
1007: } else { /* (Only for side=1). Creating a new interval. */
1008: if (sPres->neigs==0) {/* No value has been accepted*/
1009: if (sPres->neighb[0]) {
1010: /* Multiplying by 10 the previous distance */
1011: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1012: sr->nleap++;
1013: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1015: } else { /* First shift */
1017: /* Unaccepted values give information for next shift */
1018: idxP=0;/* Number of values left from shift */
1019: for (i=0;i<eps->nconv;i++) {
1020: lambda = PetscRealPart(eps->eigr[i]);
1021: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1022: else break;
1023: }
1024: /* Avoiding subtraction of eigenvalues (might be the same).*/
1025: if (idxP>0) {
1026: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1027: } else {
1028: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1029: }
1030: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1031: }
1032: } else { /* Accepted values found */
1033: sr->nleap = 0;
1034: /* Average distance of values in previous subinterval */
1035: s = sPres->neighb[0];
1036: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1037: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1038: }
1039: if (s) {
1040: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1041: } else { /* First shift. Average distance obtained with values in this shift */
1042: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1043: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1044: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1045: } else {
1046: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1047: }
1048: }
1049: /* Average distance is used for next shift by adding it to value on the right or to shift */
1050: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1051: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1052: } else { /* Last accepted value is on the left of shift. Adding to shift */
1053: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1054: }
1055: }
1056: /* End of interval can not be surpassed */
1057: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1058: }/* of neighb[side]==null */
1059: PetscFunctionReturn(0);
1060: }
1062: /*
1063: Function for sorting an array of real values
1064: */
1065: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1066: {
1067: PetscReal re;
1068: PetscInt i,j,tmp;
1070: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1071: /* Insertion sort */
1072: for (i=1;i<nr;i++) {
1073: re = PetscRealPart(r[perm[i]]);
1074: j = i-1;
1075: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1076: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1077: }
1078: }
1079: PetscFunctionReturn(0);
1080: }
1082: /* Stores the pairs obtained since the last shift in the global arrays */
1083: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1084: {
1085: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1086: PetscReal lambda,err,norm;
1087: PetscInt i,count;
1088: PetscBool iscayley;
1089: EPS_SR sr = ctx->sr;
1090: EPS_shift sPres;
1091: Vec v,w;
1093: sPres = sr->sPres;
1094: sPres->index = sr->indexEig;
1095: count = sr->indexEig;
1096: /* Back-transform */
1097: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1098: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1099: /* Sort eigenvalues */
1100: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1101: /* Values stored in global array */
1102: for (i=0;i<eps->nconv;i++) {
1103: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1104: err = eps->errest[eps->perm[i]];
1106: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1108: sr->eigr[count] = lambda;
1109: sr->errest[count] = err;
1110: /* Explicit purification */
1111: BVGetColumn(eps->V,eps->perm[i],&w);
1112: if (eps->purify) {
1113: BVGetColumn(sr->V,count,&v);
1114: STApply(eps->st,w,v);
1115: BVRestoreColumn(sr->V,count,&v);
1116: } else BVInsertVec(sr->V,count,w);
1117: BVRestoreColumn(eps->V,eps->perm[i],&w);
1118: BVNormColumn(sr->V,count,NORM_2,&norm);
1119: BVScaleColumn(sr->V,count,1.0/norm);
1120: count++;
1121: }
1122: }
1123: sPres->neigs = count - sr->indexEig;
1124: sr->indexEig = count;
1125: /* Global ordering array updating */
1126: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1127: PetscFunctionReturn(0);
1128: }
1130: static PetscErrorCode EPSLookForDeflation(EPS eps)
1131: {
1132: PetscReal val;
1133: PetscInt i,count0=0,count1=0;
1134: EPS_shift sPres;
1135: PetscInt ini,fin,k,idx0,idx1;
1136: EPS_SR sr;
1137: Vec v;
1138: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1140: sr = ctx->sr;
1141: sPres = sr->sPres;
1143: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1144: else ini = 0;
1145: fin = sr->indexEig;
1146: /* Selection of ends for searching new values */
1147: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1148: else sPres->ext[0] = sPres->neighb[0]->value;
1149: if (!sPres->neighb[1]) {
1150: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1151: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1152: } else sPres->ext[1] = sPres->neighb[1]->value;
1153: /* Selection of values between right and left ends */
1154: for (i=ini;i<fin;i++) {
1155: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1156: /* Values to the right of left shift */
1157: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1158: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1159: else count1++;
1160: } else break;
1161: }
1162: /* The number of values on each side are found */
1163: if (sPres->neighb[0]) {
1164: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1166: } else sPres->nsch[0] = 0;
1168: if (sPres->neighb[1]) {
1169: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1171: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1173: /* Completing vector of indexes for deflation */
1174: idx0 = ini;
1175: idx1 = ini+count0+count1;
1176: k=0;
1177: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1178: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1179: BVSetNumConstraints(sr->Vnext,k);
1180: for (i=0;i<k;i++) {
1181: BVGetColumn(sr->Vnext,-i-1,&v);
1182: BVCopyVec(sr->V,sr->idxDef[i],v);
1183: BVRestoreColumn(sr->Vnext,-i-1,&v);
1184: }
1186: /* For rational Krylov */
1187: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) EPSPrepareRational(eps);
1188: eps->nconv = 0;
1189: /* Get rid of temporary Vnext */
1190: BVDestroy(&eps->V);
1191: eps->V = sr->Vnext;
1192: sr->Vnext = NULL;
1193: PetscFunctionReturn(0);
1194: }
1196: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1197: {
1198: PetscInt i,lds,ti;
1199: PetscReal newS;
1200: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1201: EPS_SR sr=ctx->sr;
1202: Mat A,B=NULL;
1203: PetscObjectState Astate,Bstate=0;
1204: PetscObjectId Aid,Bid=0;
1206: PetscCitationsRegister(citation,&cited);
1207: if (ctx->global) {
1208: EPSSolve_KrylovSchur_Slice(ctx->eps);
1209: ctx->eps->state = EPS_STATE_SOLVED;
1210: eps->reason = EPS_CONVERGED_TOL;
1211: if (ctx->npart>1) {
1212: /* Gather solution from subsolvers */
1213: EPSSliceGatherSolution(eps);
1214: } else {
1215: eps->nconv = sr->numEigs;
1216: eps->its = ctx->eps->its;
1217: PetscFree(ctx->inertias);
1218: PetscFree(ctx->shifts);
1219: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1220: }
1221: } else {
1222: if (ctx->npart==1) {
1223: sr->eigr = ctx->eps->eigr;
1224: sr->eigi = ctx->eps->eigi;
1225: sr->perm = ctx->eps->perm;
1226: sr->errest = ctx->eps->errest;
1227: sr->V = ctx->eps->V;
1228: }
1229: /* Check that the user did not modify subcomm matrices */
1230: EPSGetOperators(eps,&A,&B);
1231: PetscObjectStateGet((PetscObject)A,&Astate);
1232: PetscObjectGetId((PetscObject)A,&Aid);
1233: if (B) {
1234: PetscObjectStateGet((PetscObject)B,&Bstate);
1235: PetscObjectGetId((PetscObject)B,&Bid);
1236: }
1238: /* Only with eigenvalues present in the interval ...*/
1239: if (sr->numEigs==0) {
1240: eps->reason = EPS_CONVERGED_TOL;
1241: PetscFunctionReturn(0);
1242: }
1243: /* Array of pending shifts */
1244: sr->maxPend = 100; /* Initial size */
1245: sr->nPend = 0;
1246: PetscMalloc1(sr->maxPend,&sr->pending);
1247: PetscLogObjectMemory((PetscObject)eps,sr->maxPend*sizeof(EPS_shift*));
1248: EPSCreateShift(eps,sr->int0,NULL,NULL);
1249: /* extract first shift */
1250: sr->sPrev = NULL;
1251: sr->sPres = sr->pending[--sr->nPend];
1252: sr->sPres->inertia = sr->inertia0;
1253: eps->target = sr->sPres->value;
1254: sr->s0 = sr->sPres;
1255: sr->indexEig = 0;
1256: /* Memory reservation for auxiliary variables */
1257: lds = PetscMin(eps->mpd,eps->ncv);
1258: PetscCalloc1(lds*lds,&sr->S);
1259: PetscMalloc1(eps->ncv,&sr->back);
1260: PetscLogObjectMemory((PetscObject)eps,(lds*lds+eps->ncv)*sizeof(PetscScalar));
1261: for (i=0;i<sr->numEigs;i++) {
1262: sr->eigr[i] = 0.0;
1263: sr->eigi[i] = 0.0;
1264: sr->errest[i] = 0.0;
1265: sr->perm[i] = i;
1266: }
1267: /* Vectors for deflation */
1268: PetscMalloc1(sr->numEigs,&sr->idxDef);
1269: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1270: sr->indexEig = 0;
1271: /* Main loop */
1272: while (sr->sPres) {
1273: /* Search for deflation */
1274: EPSLookForDeflation(eps);
1275: /* KrylovSchur */
1276: EPSKrylovSchur_Slice(eps);
1278: EPSStoreEigenpairs(eps);
1279: /* Select new shift */
1280: if (!sr->sPres->comp[1]) {
1281: EPSGetNewShiftValue(eps,1,&newS);
1282: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1283: }
1284: if (!sr->sPres->comp[0]) {
1285: /* Completing earlier interval */
1286: EPSGetNewShiftValue(eps,0,&newS);
1287: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1288: }
1289: /* Preparing for a new search of values */
1290: EPSExtractShift(eps);
1291: }
1293: /* Updating eps values prior to exit */
1294: PetscFree(sr->S);
1295: PetscFree(sr->idxDef);
1296: PetscFree(sr->pending);
1297: PetscFree(sr->back);
1298: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1299: BVSetNumConstraints(sr->Vnext,0);
1300: BVDestroy(&eps->V);
1301: eps->V = sr->Vnext;
1302: eps->nconv = sr->indexEig;
1303: eps->reason = EPS_CONVERGED_TOL;
1304: eps->its = sr->itsKs;
1305: eps->nds = 0;
1306: if (sr->dir<0) {
1307: for (i=0;i<eps->nconv/2;i++) {
1308: ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1309: }
1310: }
1311: }
1312: PetscFunctionReturn(0);
1313: }