Actual source code: dshep.c
slepc-3.12.0 2019-09-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
15: {
19: DSAllocateMat_Private(ds,DS_MAT_A);
20: DSAllocateMat_Private(ds,DS_MAT_Q);
21: DSAllocateMatReal_Private(ds,DS_MAT_T);
22: PetscFree(ds->perm);
23: PetscMalloc1(ld,&ds->perm);
24: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
25: return(0);
26: }
28: /* 0 l k n-1
29: -----------------------------------------
30: |* . . |
31: | * . . |
32: | * . . |
33: | * . . |
34: |. . . . o o |
35: | o o |
36: | o o |
37: | o o |
38: | o o |
39: | o o |
40: |. . . . o o o o o o o x |
41: | x x x |
42: | x x x |
43: | x x x |
44: | x x x |
45: | x x x |
46: | x x x |
47: | x x x |
48: | x x x|
49: | x x|
50: -----------------------------------------
51: */
53: static PetscErrorCode DSSwitchFormat_HEP(DS ds)
54: {
56: PetscReal *T = ds->rmat[DS_MAT_T];
57: PetscScalar *A = ds->mat[DS_MAT_A];
58: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld;
61: /* switch from compact (arrow) to dense storage */
62: PetscArrayzero(A,ld*ld);
63: for (i=0;i<k;i++) {
64: A[i+i*ld] = T[i];
65: A[k+i*ld] = T[i+ld];
66: A[i+k*ld] = T[i+ld];
67: }
68: A[k+k*ld] = T[k];
69: for (i=k+1;i<n;i++) {
70: A[i+i*ld] = T[i];
71: A[i-1+i*ld] = T[i-1+ld];
72: A[i+(i-1)*ld] = T[i-1+ld];
73: }
74: if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
75: return(0);
76: }
78: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
79: {
80: PetscErrorCode ierr;
81: PetscViewerFormat format;
82: PetscInt i,j,r,c,rows;
83: PetscReal value;
84: const char *methodname[] = {
85: "Implicit QR method (_steqr)",
86: "Relatively Robust Representations (_stevr)",
87: "Divide and Conquer method (_stedc)",
88: "Block Divide and Conquer method (dsbtdc)"
89: };
90: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
93: PetscViewerGetFormat(viewer,&format);
94: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
95: if (ds->bs>1) {
96: PetscViewerASCIIPrintf(viewer,"block size: %D\n",ds->bs);
97: }
98: if (ds->method<nmeth) {
99: PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
100: }
101: return(0);
102: }
103: if (ds->compact) {
104: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
105: rows = ds->extrarow? ds->n+1: ds->n;
106: if (format == PETSC_VIEWER_ASCII_MATLAB) {
107: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
108: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
109: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
110: for (i=0;i<ds->n;i++) {
111: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_T]+i));
112: }
113: for (i=0;i<rows-1;i++) {
114: r = PetscMax(i+2,ds->k+1);
115: c = i+1;
116: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",r,c,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
117: if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
118: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",c,r,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
119: }
120: }
121: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
122: } else {
123: for (i=0;i<rows;i++) {
124: for (j=0;j<ds->n;j++) {
125: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
126: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
127: else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
128: else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
129: else value = 0.0;
130: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
131: }
132: PetscViewerASCIIPrintf(viewer,"\n");
133: }
134: }
135: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
136: PetscViewerFlush(viewer);
137: } else {
138: DSViewMat(ds,viewer,DS_MAT_A);
139: }
140: if (ds->state>DS_STATE_INTERMEDIATE) { DSViewMat(ds,viewer,DS_MAT_Q); }
141: return(0);
142: }
144: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
145: {
146: PetscScalar *Q = ds->mat[DS_MAT_Q];
147: PetscInt ld = ds->ld,i;
151: switch (mat) {
152: case DS_MAT_X:
153: case DS_MAT_Y:
154: if (j) {
155: if (ds->state>=DS_STATE_CONDENSED) {
156: PetscArraycpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld);
157: } else {
158: PetscArrayzero(ds->mat[mat]+(*j)*ld,ld);
159: *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
160: }
161: } else {
162: if (ds->state>=DS_STATE_CONDENSED) {
163: PetscArraycpy(ds->mat[mat],Q,ld*ld);
164: } else {
165: PetscArrayzero(ds->mat[mat],ld*ld);
166: for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
167: }
168: }
169: if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
170: break;
171: case DS_MAT_U:
172: case DS_MAT_VT:
173: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
174: break;
175: default:
176: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
177: }
178: return(0);
179: }
181: /*
182: ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form
184: [ d 0 0 0 e ]
185: [ 0 d 0 0 e ]
186: A = [ 0 0 d 0 e ]
187: [ 0 0 0 d e ]
188: [ e e e e d ]
190: to tridiagonal form
192: [ d e 0 0 0 ]
193: [ e d e 0 0 ]
194: T = Q'*A*Q = [ 0 e d e 0 ],
195: [ 0 0 e d e ]
196: [ 0 0 0 e d ]
198: where Q is an orthogonal matrix. Rutishauser's algorithm is used to
199: perform the reduction, which requires O(n**2) flops. The accumulation
200: of the orthogonal factor Q, however, requires O(n**3) flops.
202: Arguments
203: =========
205: N (input) INTEGER
206: The order of the matrix A. N >= 0.
208: D (input/output) DOUBLE PRECISION array, dimension (N)
209: On entry, the diagonal entries of the matrix A to be
210: reduced.
211: On exit, the diagonal entries of the reduced matrix T.
213: E (input/output) DOUBLE PRECISION array, dimension (N-1)
214: On entry, the off-diagonal entries of the matrix A to be
215: reduced.
216: On exit, the subdiagonal entries of the reduced matrix T.
218: Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
219: On exit, the orthogonal matrix Q.
221: LDQ (input) INTEGER
222: The leading dimension of the array Q.
224: Note
225: ====
226: Based on Fortran code contributed by Daniel Kressner
227: */
228: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
229: {
230: #if defined(SLEPC_MISSING_LAPACK_LARTG)
232: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
233: #else
234: PetscBLASInt i,j,j2,one=1;
235: PetscReal c,s,p,off,temp;
238: if (n<=2) return(0);
240: for (j=0;j<n-2;j++) {
242: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
243: temp = e[j+1];
244: PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
245: s = -s;
247: /* Apply rotation to diagonal elements */
248: temp = d[j+1];
249: e[j] = c*s*(temp-d[j]);
250: d[j+1] = s*s*d[j] + c*c*temp;
251: d[j] = c*c*d[j] + s*s*temp;
253: /* Apply rotation to Q */
254: j2 = j+2;
255: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));
257: /* Chase newly introduced off-diagonal entry to the top left corner */
258: for (i=j-1;i>=0;i--) {
259: off = -s*e[i];
260: e[i] = c*e[i];
261: temp = e[i+1];
262: PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
263: s = -s;
264: temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
265: p = s*temp;
266: d[i+1] += p;
267: d[i] -= p;
268: e[i] = -e[i] - c*temp;
269: j2 = j+2;
270: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
271: }
272: }
273: return(0);
274: #endif
275: }
277: /*
278: Reduce to tridiagonal form by means of ArrowTridiag.
279: */
280: static PetscErrorCode DSIntermediate_HEP(DS ds)
281: {
282: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
284: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
285: #else
287: PetscInt i;
288: PetscBLASInt n1,n2,n3,lwork,info,l,n,ld,off;
289: PetscScalar *A,*Q,*work,*tau;
290: PetscReal *d,*e;
293: PetscBLASIntCast(ds->n,&n);
294: PetscBLASIntCast(ds->l,&l);
295: PetscBLASIntCast(ds->ld,&ld);
296: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
297: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
298: n3 = n1+n2;
299: off = l+l*ld;
300: A = ds->mat[DS_MAT_A];
301: Q = ds->mat[DS_MAT_Q];
302: d = ds->rmat[DS_MAT_T];
303: e = ds->rmat[DS_MAT_T]+ld;
304: PetscArrayzero(Q,ld*ld);
305: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
307: if (ds->compact) {
309: if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);
311: } else {
313: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
315: if (ds->state<DS_STATE_INTERMEDIATE) {
316: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
317: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
318: tau = ds->work;
319: work = ds->work+ld;
320: lwork = ld*ld;
321: PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
322: SlepcCheckLapackInfo("sytrd",info);
323: PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
324: SlepcCheckLapackInfo("orgtr",info);
325: } else {
326: /* copy tridiagonal to d,e */
327: for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
328: for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
329: }
330: }
331: return(0);
332: #endif
333: }
335: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
336: {
338: PetscInt n,l,i,*perm,ld=ds->ld;
339: PetscScalar *A;
340: PetscReal *d;
343: if (!ds->sc) return(0);
344: n = ds->n;
345: l = ds->l;
346: A = ds->mat[DS_MAT_A];
347: d = ds->rmat[DS_MAT_T];
348: perm = ds->perm;
349: if (!rr) {
350: DSSortEigenvaluesReal_Private(ds,d,perm);
351: } else {
352: DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
353: }
354: for (i=l;i<n;i++) wr[i] = d[perm[i]];
355: DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
356: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
357: if (!ds->compact) {
358: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
359: }
360: return(0);
361: }
363: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
364: {
366: PetscInt i;
367: PetscBLASInt n,ld,incx=1;
368: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
369: PetscReal *e,beta;
372: PetscBLASIntCast(ds->n,&n);
373: PetscBLASIntCast(ds->ld,&ld);
374: A = ds->mat[DS_MAT_A];
375: Q = ds->mat[DS_MAT_Q];
376: e = ds->rmat[DS_MAT_T]+ld;
378: if (ds->compact) {
379: beta = e[n-1]; /* in compact, we assume all entries are zero except the last one */
380: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
381: ds->k = n;
382: } else {
383: DSAllocateWork_Private(ds,2*ld,0,0);
384: x = ds->work;
385: y = ds->work+ld;
386: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
387: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
388: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
389: ds->k = n;
390: }
391: return(0);
392: }
394: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
395: {
396: #if defined(PETSC_MISSING_LAPACK_STEQR)
398: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
399: #else
401: PetscInt i;
402: PetscBLASInt n1,n2,n3,info,l,n,ld,off;
403: PetscScalar *Q,*A;
404: PetscReal *d,*e;
407: if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
408: PetscBLASIntCast(ds->n,&n);
409: PetscBLASIntCast(ds->l,&l);
410: PetscBLASIntCast(ds->ld,&ld);
411: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
412: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
413: n3 = n1+n2;
414: off = l+l*ld;
415: Q = ds->mat[DS_MAT_Q];
416: A = ds->mat[DS_MAT_A];
417: d = ds->rmat[DS_MAT_T];
418: e = ds->rmat[DS_MAT_T]+ld;
420: /* Reduce to tridiagonal form */
421: DSIntermediate_HEP(ds);
423: /* Solve the tridiagonal eigenproblem */
424: for (i=0;i<l;i++) wr[i] = d[i];
426: DSAllocateWork_Private(ds,0,2*ld,0);
427: PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
428: SlepcCheckLapackInfo("steqr",info);
429: for (i=l;i<n;i++) wr[i] = d[i];
431: /* Create diagonal matrix as a result */
432: if (ds->compact) {
433: PetscArrayzero(e,n-1);
434: } else {
435: for (i=l;i<n;i++) {
436: PetscArrayzero(A+l+i*ld,n-l);
437: }
438: for (i=l;i<n;i++) A[i+i*ld] = d[i];
439: }
441: /* Set zero wi */
442: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
443: return(0);
444: #endif
445: }
447: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
448: {
449: #if defined(SLEPC_MISSING_LAPACK_STEVR)
451: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
452: #else
454: PetscInt i;
455: PetscBLASInt n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
456: PetscScalar *A,*Q,*W=NULL,one=1.0,zero=0.0;
457: PetscReal *d,*e,abstol=0.0,vl,vu;
458: #if defined(PETSC_USE_COMPLEX)
459: PetscInt j;
460: PetscReal *ritz;
461: #endif
464: if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
465: PetscBLASIntCast(ds->n,&n);
466: PetscBLASIntCast(ds->l,&l);
467: PetscBLASIntCast(ds->ld,&ld);
468: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
469: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
470: n3 = n1+n2;
471: off = l+l*ld;
472: A = ds->mat[DS_MAT_A];
473: Q = ds->mat[DS_MAT_Q];
474: d = ds->rmat[DS_MAT_T];
475: e = ds->rmat[DS_MAT_T]+ld;
477: /* Reduce to tridiagonal form */
478: DSIntermediate_HEP(ds);
480: /* Solve the tridiagonal eigenproblem */
481: for (i=0;i<l;i++) wr[i] = d[i];
483: if (ds->state<DS_STATE_INTERMEDIATE) { /* Q contains useful info */
484: DSAllocateMat_Private(ds,DS_MAT_W);
485: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
486: W = ds->mat[DS_MAT_W];
487: }
488: #if defined(PETSC_USE_COMPLEX)
489: DSAllocateMatReal_Private(ds,DS_MAT_Q);
490: #endif
491: lwork = 20*ld;
492: liwork = 10*ld;
493: DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
494: isuppz = ds->iwork+liwork;
495: #if defined(PETSC_USE_COMPLEX)
496: ritz = ds->rwork+lwork;
497: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
498: for (i=l;i<n;i++) wr[i] = ritz[i];
499: #else
500: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
501: #endif
502: SlepcCheckLapackInfo("stevr",info);
503: #if defined(PETSC_USE_COMPLEX)
504: for (i=l;i<n;i++)
505: for (j=l;j<n;j++)
506: Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
507: #endif
508: if (ds->state<DS_STATE_INTERMEDIATE) { /* accumulate previous Q */
509: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
510: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
511: }
512: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
514: /* Create diagonal matrix as a result */
515: if (ds->compact) {
516: PetscArrayzero(e,n-1);
517: } else {
518: for (i=l;i<n;i++) {
519: PetscArrayzero(A+l+i*ld,n-l);
520: }
521: for (i=l;i<n;i++) A[i+i*ld] = d[i];
522: }
524: /* Set zero wi */
525: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
526: return(0);
527: #endif
528: }
530: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
531: {
532: #if defined(SLEPC_MISSING_LAPACK_STEDC)
534: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC - Lapack routine is unavailable");
535: #else
537: PetscInt i;
538: PetscBLASInt n1,info,l,ld,off,lrwork,liwork;
539: PetscScalar *Q,*A;
540: PetscReal *d,*e;
541: #if defined(PETSC_USE_COMPLEX)
542: PetscBLASInt lwork;
543: PetscInt j;
544: #endif
547: if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
548: PetscBLASIntCast(ds->l,&l);
549: PetscBLASIntCast(ds->ld,&ld);
550: PetscBLASIntCast(ds->n-ds->l,&n1);
551: off = l+l*ld;
552: Q = ds->mat[DS_MAT_Q];
553: A = ds->mat[DS_MAT_A];
554: d = ds->rmat[DS_MAT_T];
555: e = ds->rmat[DS_MAT_T]+ld;
557: /* Reduce to tridiagonal form */
558: DSIntermediate_HEP(ds);
560: /* Solve the tridiagonal eigenproblem */
561: for (i=0;i<l;i++) wr[i] = d[i];
563: lrwork = 5*n1*n1+3*n1+1;
564: liwork = 5*n1*n1+6*n1+6;
565: #if !defined(PETSC_USE_COMPLEX)
566: DSAllocateWork_Private(ds,0,lrwork,liwork);
567: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
568: #else
569: lwork = ld*ld;
570: DSAllocateWork_Private(ds,lwork,lrwork,liwork);
571: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
572: /* Fixing Lapack bug*/
573: for (j=ds->l;j<ds->n;j++)
574: for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
575: #endif
576: SlepcCheckLapackInfo("stedc",info);
577: for (i=l;i<ds->n;i++) wr[i] = d[i];
579: /* Create diagonal matrix as a result */
580: if (ds->compact) {
581: PetscArrayzero(e,ds->n-1);
582: } else {
583: for (i=l;i<ds->n;i++) {
584: PetscArrayzero(A+l+i*ld,ds->n-l);
585: }
586: for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
587: }
589: /* Set zero wi */
590: if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
591: return(0);
592: #endif
593: }
595: #if !defined(PETSC_USE_COMPLEX)
596: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
597: {
599: PetscBLASInt i,j,k,m,n,info,nblks,bs,ld,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
600: PetscScalar *Q,*A;
601: PetscReal *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;
604: if (ds->l>0) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for l>1");
605: if (ds->compact) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for compact storage");
606: PetscBLASIntCast(ds->ld,&ld);
607: PetscBLASIntCast(ds->bs,&bs);
608: PetscBLASIntCast(ds->n,&n);
609: nblks = n/bs;
610: Q = ds->mat[DS_MAT_Q];
611: A = ds->mat[DS_MAT_A];
612: d = ds->rmat[DS_MAT_T];
613: e = ds->rmat[DS_MAT_T]+ld;
614: lrwork = 4*n*n+60*n+1;
615: liwork = 5*n+5*nblks-1;
616: lde = 2*bs+1;
617: DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
618: D = ds->work;
619: E = ds->work+bs*n;
620: rwork = ds->rwork;
621: ksizes = ds->iwork;
622: iwork = ds->iwork+nblks;
623: PetscArrayzero(iwork,liwork);
625: /* Copy matrix to block tridiagonal format */
626: j=0;
627: for (i=0;i<nblks;i++) {
628: ksizes[i]=bs;
629: for (k=0;k<bs;k++)
630: for (m=0;m<bs;m++)
631: D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
632: j = j + bs;
633: }
634: j=0;
635: for (i=0;i<nblks-1;i++) {
636: for (k=0;k<bs;k++)
637: for (m=0;m<bs;m++)
638: E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
639: j = j + bs;
640: }
642: /* Solve the block tridiagonal eigenproblem */
643: BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
644: Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
645: for (i=0;i<ds->n;i++) wr[i] = d[i];
647: /* Create diagonal matrix as a result */
648: if (ds->compact) {
649: PetscArrayzero(e,ds->n-1);
650: } else {
651: for (i=0;i<ds->n;i++) {
652: PetscArrayzero(A+i*ld,ds->n);
653: }
654: for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
655: }
657: /* Set zero wi */
658: if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
659: return(0);
660: }
661: #endif
663: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
664: {
665: PetscInt i,ld=ds->ld,l=ds->l;
666: PetscScalar *A;
669: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
670: A = ds->mat[DS_MAT_A];
671: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
672: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
673: }
674: if (ds->extrarow) ds->k = n;
675: else ds->k = 0;
676: ds->n = n;
677: return(0);
678: }
680: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
681: {
683: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
684: PetscMPIInt n,rank,off=0,size,ldn,ld3;
687: if (ds->compact) kr = 3*ld;
688: else k = (ds->n-l)*ld;
689: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
690: if (eigr) k += (ds->n-l);
691: DSAllocateWork_Private(ds,k+kr,0,0);
692: PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
693: PetscMPIIntCast(ds->n-l,&n);
694: PetscMPIIntCast(ld*(ds->n-l),&ldn);
695: PetscMPIIntCast(ld*3,&ld3);
696: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
697: if (!rank) {
698: if (ds->compact) {
699: MPI_Pack(ds->rmat[DS_MAT_T],ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
700: } else {
701: MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
702: }
703: if (ds->state>DS_STATE_RAW) {
704: MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
705: }
706: if (eigr) {
707: MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
708: }
709: }
710: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
711: if (rank) {
712: if (ds->compact) {
713: MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_T],ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
714: } else {
715: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
716: }
717: if (ds->state>DS_STATE_RAW) {
718: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
719: }
720: if (eigr) {
721: MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
722: }
723: }
724: return(0);
725: }
727: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
728: {
729: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE)
731: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE - Lapack routines are unavailable");
732: #else
734: PetscScalar *work;
735: PetscReal *rwork;
736: PetscBLASInt *ipiv;
737: PetscBLASInt lwork,info,n,ld;
738: PetscReal hn,hin;
739: PetscScalar *A;
742: PetscBLASIntCast(ds->n,&n);
743: PetscBLASIntCast(ds->ld,&ld);
744: lwork = 8*ld;
745: DSAllocateWork_Private(ds,lwork,ld,ld);
746: work = ds->work;
747: rwork = ds->rwork;
748: ipiv = ds->iwork;
749: DSSwitchFormat_HEP(ds);
751: /* use workspace matrix W to avoid overwriting A */
752: DSAllocateMat_Private(ds,DS_MAT_W);
753: A = ds->mat[DS_MAT_W];
754: PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);
756: /* norm of A */
757: hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
759: /* norm of inv(A) */
760: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
761: SlepcCheckLapackInfo("getrf",info);
762: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
763: SlepcCheckLapackInfo("getri",info);
764: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
766: *cond = hn*hin;
767: return(0);
768: #endif
769: }
771: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
772: {
773: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
775: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
776: #else
778: PetscInt i,j,k=ds->k;
779: PetscScalar *Q,*A,*R,*tau,*work;
780: PetscBLASInt ld,n1,n0,lwork,info;
783: PetscBLASIntCast(ds->ld,&ld);
784: DSAllocateWork_Private(ds,ld*ld,0,0);
785: tau = ds->work;
786: work = ds->work+ld;
787: PetscBLASIntCast(ld*(ld-1),&lwork);
788: DSAllocateMat_Private(ds,DS_MAT_W);
789: A = ds->mat[DS_MAT_A];
790: Q = ds->mat[DS_MAT_Q];
791: R = ds->mat[DS_MAT_W];
793: /* copy I+alpha*A */
794: PetscArrayzero(Q,ld*ld);
795: PetscArrayzero(R,ld*ld);
796: for (i=0;i<k;i++) {
797: Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
798: Q[k+i*ld] = alpha*A[k+i*ld];
799: }
801: /* compute qr */
802: PetscBLASIntCast(k+1,&n1);
803: PetscBLASIntCast(k,&n0);
804: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
805: SlepcCheckLapackInfo("geqrf",info);
807: /* copy R from Q */
808: for (j=0;j<k;j++)
809: for (i=0;i<=j;i++)
810: R[i+j*ld] = Q[i+j*ld];
812: /* compute orthogonal matrix in Q */
813: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
814: SlepcCheckLapackInfo("orgqr",info);
816: /* compute the updated matrix of projected problem */
817: for (j=0;j<k;j++)
818: for (i=0;i<k+1;i++)
819: A[j*ld+i] = Q[i*ld+j];
820: alpha = -1.0/alpha;
821: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
822: for (i=0;i<k;i++)
823: A[ld*i+i] -= alpha;
824: return(0);
825: #endif
826: }
828: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
829: {
831: if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
832: else *flg = PETSC_FALSE;
833: return(0);
834: }
836: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
837: {
839: ds->ops->allocate = DSAllocate_HEP;
840: ds->ops->view = DSView_HEP;
841: ds->ops->vectors = DSVectors_HEP;
842: ds->ops->solve[0] = DSSolve_HEP_QR;
843: ds->ops->solve[1] = DSSolve_HEP_MRRR;
844: ds->ops->solve[2] = DSSolve_HEP_DC;
845: #if !defined(PETSC_USE_COMPLEX)
846: ds->ops->solve[3] = DSSolve_HEP_BDC;
847: #endif
848: ds->ops->sort = DSSort_HEP;
849: ds->ops->synchronize = DSSynchronize_HEP;
850: ds->ops->truncate = DSTruncate_HEP;
851: ds->ops->update = DSUpdateExtraRow_HEP;
852: ds->ops->cond = DSCond_HEP;
853: ds->ops->transrks = DSTranslateRKS_HEP;
854: ds->ops->hermitian = DSHermitian_HEP;
855: return(0);
856: }