Actual source code: dsnhep.c
slepc-3.12.0 2019-09-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
15: {
19: DSAllocateMat_Private(ds,DS_MAT_A);
20: DSAllocateMat_Private(ds,DS_MAT_Q);
21: PetscFree(ds->perm);
22: PetscMalloc1(ld,&ds->perm);
23: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
24: return(0);
25: }
27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
28: {
29: PetscErrorCode ierr;
30: PetscViewerFormat format;
33: PetscViewerGetFormat(viewer,&format);
34: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);
35: DSViewMat(ds,viewer,DS_MAT_A);
36: if (ds->state>DS_STATE_INTERMEDIATE) { DSViewMat(ds,viewer,DS_MAT_Q); }
37: if (ds->mat[DS_MAT_X]) { DSViewMat(ds,viewer,DS_MAT_X); }
38: if (ds->mat[DS_MAT_Y]) { DSViewMat(ds,viewer,DS_MAT_Y); }
39: return(0);
40: }
42: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
43: {
44: #if defined(PETSC_MISSING_LAPACK_GESVD)
46: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
47: #else
49: PetscInt i,j;
50: PetscBLASInt info,ld,n,n1,lwork,inc=1;
51: PetscScalar sdummy,done=1.0,zero=0.0;
52: PetscReal *sigma;
53: PetscBool iscomplex = PETSC_FALSE;
54: PetscScalar *A = ds->mat[DS_MAT_A];
55: PetscScalar *Q = ds->mat[DS_MAT_Q];
56: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
57: PetscScalar *W;
60: if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
61: PetscBLASIntCast(ds->n,&n);
62: PetscBLASIntCast(ds->ld,&ld);
63: n1 = n+1;
64: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
65: if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
66: DSAllocateWork_Private(ds,5*ld,6*ld,0);
67: DSAllocateMat_Private(ds,DS_MAT_W);
68: W = ds->mat[DS_MAT_W];
69: lwork = 5*ld;
70: sigma = ds->rwork+5*ld;
72: /* build A-w*I in W */
73: for (j=0;j<n;j++)
74: for (i=0;i<=n;i++)
75: W[i+j*ld] = A[i+j*ld];
76: for (i=0;i<n;i++)
77: W[i+i*ld] -= A[(*k)+(*k)*ld];
79: /* compute SVD of W */
80: #if !defined(PETSC_USE_COMPLEX)
81: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
82: #else
83: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
84: #endif
85: SlepcCheckLapackInfo("gesvd",info);
87: /* the smallest singular value is the new error estimate */
88: if (rnorm) *rnorm = sigma[n-1];
90: /* update vector with right singular vector associated to smallest singular value,
91: accumulating the transformation matrix Q */
92: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
93: return(0);
94: #endif
95: }
97: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
98: {
100: PetscInt i;
103: for (i=0;i<ds->n;i++) {
104: DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
105: }
106: return(0);
107: }
109: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
110: {
111: #if defined(SLEPC_MISSING_LAPACK_TREVC)
113: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
114: #else
116: PetscInt i;
117: PetscBLASInt mm=1,mout,info,ld,n,*select,inc = 1;
118: PetscScalar tmp,done=1.0,zero=0.0;
119: PetscReal norm;
120: PetscBool iscomplex = PETSC_FALSE;
121: PetscScalar *A = ds->mat[DS_MAT_A];
122: PetscScalar *Q = ds->mat[DS_MAT_Q];
123: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
124: PetscScalar *Y;
127: PetscBLASIntCast(ds->n,&n);
128: PetscBLASIntCast(ds->ld,&ld);
129: DSAllocateWork_Private(ds,0,0,ld);
130: select = ds->iwork;
131: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
133: /* compute k-th eigenvector Y of A */
134: Y = X+(*k)*ld;
135: select[*k] = (PetscBLASInt)PETSC_TRUE;
136: #if !defined(PETSC_USE_COMPLEX)
137: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
138: mm = iscomplex? 2: 1;
139: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
140: DSAllocateWork_Private(ds,3*ld,0,0);
141: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
142: #else
143: DSAllocateWork_Private(ds,2*ld,ld,0);
144: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
145: #endif
146: SlepcCheckLapackInfo("trevc",info);
147: if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
149: /* accumulate and normalize eigenvectors */
150: if (ds->state>=DS_STATE_CONDENSED) {
151: PetscArraycpy(ds->work,Y,mout*ld);
152: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
153: #if !defined(PETSC_USE_COMPLEX)
154: if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
155: #endif
156: norm = BLASnrm2_(&n,Y,&inc);
157: #if !defined(PETSC_USE_COMPLEX)
158: if (iscomplex) {
159: tmp = BLASnrm2_(&n,Y+ld,&inc);
160: norm = SlepcAbsEigenvalue(norm,tmp);
161: }
162: #endif
163: tmp = 1.0 / norm;
164: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
165: #if !defined(PETSC_USE_COMPLEX)
166: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
167: #endif
168: }
170: /* set output arguments */
171: if (iscomplex) (*k)++;
172: if (rnorm) {
173: if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
174: else *rnorm = PetscAbsScalar(Y[n-1]);
175: }
176: return(0);
177: #endif
178: }
180: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
181: {
182: #if defined(SLEPC_MISSING_LAPACK_TREVC)
184: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
185: #else
187: PetscInt i;
188: PetscBLASInt n,ld,mout,info,inc = 1;
189: PetscBool iscomplex;
190: PetscScalar *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
191: PetscReal norm;
192: const char *side,*back;
195: PetscBLASIntCast(ds->n,&n);
196: PetscBLASIntCast(ds->ld,&ld);
197: if (left) {
198: X = NULL;
199: Y = ds->mat[DS_MAT_Y];
200: side = "L";
201: } else {
202: X = ds->mat[DS_MAT_X];
203: Y = NULL;
204: side = "R";
205: }
206: Z = left? Y: X;
207: if (ds->state>=DS_STATE_CONDENSED) {
208: /* DSSolve() has been called, backtransform with matrix Q */
209: back = "B";
210: PetscArraycpy(Z,ds->mat[DS_MAT_Q],ld*ld);
211: } else back = "A";
212: #if !defined(PETSC_USE_COMPLEX)
213: DSAllocateWork_Private(ds,3*ld,0,0);
214: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
215: #else
216: DSAllocateWork_Private(ds,2*ld,ld,0);
217: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
218: #endif
219: SlepcCheckLapackInfo("trevc",info);
221: /* normalize eigenvectors */
222: for (i=0;i<n;i++) {
223: iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
224: norm = BLASnrm2_(&n,Z+i*ld,&inc);
225: #if !defined(PETSC_USE_COMPLEX)
226: if (iscomplex) {
227: tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
228: norm = SlepcAbsEigenvalue(norm,tmp);
229: }
230: #endif
231: tmp = 1.0 / norm;
232: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
233: #if !defined(PETSC_USE_COMPLEX)
234: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
235: #endif
236: if (iscomplex) i++;
237: }
238: return(0);
239: #endif
240: }
242: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
243: {
247: switch (mat) {
248: case DS_MAT_X:
249: if (ds->refined) {
250: if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
251: if (j) {
252: DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
253: } else {
254: DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
255: }
256: } else {
257: if (j) {
258: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
259: } else {
260: DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
261: }
262: }
263: break;
264: case DS_MAT_Y:
265: if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
266: if (j) {
267: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
268: } else {
269: DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
270: }
271: break;
272: case DS_MAT_U:
273: case DS_MAT_VT:
274: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
275: break;
276: default:
277: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
278: }
279: if (ds->state < DS_STATE_CONDENSED) {
280: DSSetState(ds,DS_STATE_CONDENSED);
281: }
282: return(0);
283: }
285: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
286: {
287: #if defined(PETSC_MISSING_LAPACK_TRSEN)
289: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
290: #else
292: PetscInt i;
293: PetscBLASInt info,n,ld,mout,lwork,*selection;
294: PetscScalar *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
295: #if !defined(PETSC_USE_COMPLEX)
296: PetscBLASInt *iwork,liwork;
297: #endif
300: if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
301: PetscBLASIntCast(ds->n,&n);
302: PetscBLASIntCast(ds->ld,&ld);
303: #if !defined(PETSC_USE_COMPLEX)
304: lwork = n;
305: liwork = 1;
306: DSAllocateWork_Private(ds,lwork,0,liwork+n);
307: work = ds->work;
308: lwork = ds->lwork;
309: selection = ds->iwork;
310: iwork = ds->iwork + n;
311: liwork = ds->liwork - n;
312: #else
313: lwork = 1;
314: DSAllocateWork_Private(ds,lwork,0,n);
315: work = ds->work;
316: selection = ds->iwork;
317: #endif
318: /* Compute the selected eigenvalue to be in the leading position */
319: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
320: PetscArrayzero(selection,n);
321: for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
322: #if !defined(PETSC_USE_COMPLEX)
323: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
324: #else
325: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
326: #endif
327: SlepcCheckLapackInfo("trsen",info);
328: *k = mout;
329: return(0);
330: #endif
331: }
333: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
334: {
335: #if defined(SLEPC_MISSING_LAPACK_TREXC)
337: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
338: #else
340: PetscScalar re;
341: PetscInt i,j,pos,result;
342: PetscBLASInt ifst,ilst,info,n,ld;
343: PetscScalar *T = ds->mat[DS_MAT_A];
344: PetscScalar *Q = ds->mat[DS_MAT_Q];
345: #if !defined(PETSC_USE_COMPLEX)
346: PetscScalar *work,im;
347: #endif
350: PetscBLASIntCast(ds->n,&n);
351: PetscBLASIntCast(ds->ld,&ld);
352: #if !defined(PETSC_USE_COMPLEX)
353: DSAllocateWork_Private(ds,ld,0,0);
354: work = ds->work;
355: #endif
356: /* selection sort */
357: for (i=ds->l;i<n-1;i++) {
358: re = wr[i];
359: #if !defined(PETSC_USE_COMPLEX)
360: im = wi[i];
361: #endif
362: pos = 0;
363: j=i+1; /* j points to the next eigenvalue */
364: #if !defined(PETSC_USE_COMPLEX)
365: if (im != 0) j=i+2;
366: #endif
367: /* find minimum eigenvalue */
368: for (;j<n;j++) {
369: #if !defined(PETSC_USE_COMPLEX)
370: SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
371: #else
372: SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
373: #endif
374: if (result > 0) {
375: re = wr[j];
376: #if !defined(PETSC_USE_COMPLEX)
377: im = wi[j];
378: #endif
379: pos = j;
380: }
381: #if !defined(PETSC_USE_COMPLEX)
382: if (wi[j] != 0) j++;
383: #endif
384: }
385: if (pos) {
386: /* interchange blocks */
387: PetscBLASIntCast(pos+1,&ifst);
388: PetscBLASIntCast(i+1,&ilst);
389: #if !defined(PETSC_USE_COMPLEX)
390: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
391: #else
392: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
393: #endif
394: SlepcCheckLapackInfo("trexc",info);
395: /* recover original eigenvalues from T matrix */
396: for (j=i;j<n;j++) {
397: wr[j] = T[j+j*ld];
398: #if !defined(PETSC_USE_COMPLEX)
399: if (j<n-1 && T[j+1+j*ld] != 0.0) {
400: /* complex conjugate eigenvalue */
401: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
402: PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
403: wr[j+1] = wr[j];
404: wi[j+1] = -wi[j];
405: j++;
406: } else {
407: wi[j] = 0.0;
408: }
409: #endif
410: }
411: }
412: #if !defined(PETSC_USE_COMPLEX)
413: if (wi[i] != 0) i++;
414: #endif
415: }
416: return(0);
417: #endif
418: }
420: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
421: {
425: if (!rr || wr == rr) {
426: DSSort_NHEP_Total(ds,wr,wi);
427: } else {
428: DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
429: }
430: return(0);
431: }
433: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
434: {
435: #if defined(SLEPC_MISSING_LAPACK_TREXC)
437: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
438: #else
440: PetscInt i,j,pos,inc=1;
441: PetscBLASInt ifst,ilst,info,n,ld;
442: PetscScalar *T = ds->mat[DS_MAT_A];
443: PetscScalar *Q = ds->mat[DS_MAT_Q];
444: #if !defined(PETSC_USE_COMPLEX)
445: PetscScalar *work;
446: #endif
449: PetscBLASIntCast(ds->n,&n);
450: PetscBLASIntCast(ds->ld,&ld);
451: #if !defined(PETSC_USE_COMPLEX)
452: DSAllocateWork_Private(ds,ld,0,0);
453: work = ds->work;
454: #endif
455: for (i=ds->l;i<n-1;i++) {
456: pos = perm[i];
457: #if !defined(PETSC_USE_COMPLEX)
458: inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
459: #endif
460: if (pos!=i) {
461: #if !defined(PETSC_USE_COMPLEX)
462: if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1))
463: SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
464: #endif
466: /* interchange blocks */
467: PetscBLASIntCast(pos+1,&ifst);
468: PetscBLASIntCast(i+1,&ilst);
469: #if !defined(PETSC_USE_COMPLEX)
470: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
471: #else
472: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
473: #endif
474: SlepcCheckLapackInfo("trexc",info);
475: for (j=i+1;j<n;j++) {
476: if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
477: }
478: perm[i] = i;
479: if (inc==2) perm[i+1] = i+1;
480: }
481: if (inc==2) i++;
482: }
483: /* recover original eigenvalues from T matrix */
484: for (j=ds->l;j<n;j++) {
485: wr[j] = T[j+j*ld];
486: #if !defined(PETSC_USE_COMPLEX)
487: if (j<n-1 && T[j+1+j*ld] != 0.0) {
488: /* complex conjugate eigenvalue */
489: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
490: wr[j+1] = wr[j];
491: wi[j+1] = -wi[j];
492: j++;
493: } else {
494: wi[j] = 0.0;
495: }
496: #endif
497: }
498: return(0);
499: #endif
500: }
502: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
503: {
505: PetscInt i;
506: PetscBLASInt n,ld,incx=1;
507: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
510: PetscBLASIntCast(ds->n,&n);
511: PetscBLASIntCast(ds->ld,&ld);
512: A = ds->mat[DS_MAT_A];
513: Q = ds->mat[DS_MAT_Q];
514: DSAllocateWork_Private(ds,2*ld,0,0);
515: x = ds->work;
516: y = ds->work+ld;
517: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
518: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
519: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
520: ds->k = n;
521: return(0);
522: }
524: /*
525: Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
526: The result overwrites A. Matrix A has the form
528: [ S | * ]
529: A = [-------]
530: [ R | H ]
532: where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
533: matrix of order n-k, and R is all zeros except the first row (the arrow).
534: The algorithm uses elementary reflectors to annihilate entries in the arrow, and
535: then proceeds upwards.
536: If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
537: hence the first ilo-1 rows and columns of Q are set to the identity matrix.
539: Required workspace is 2*n.
540: */
541: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
542: {
543: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
545: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
546: #else
547: PetscBLASInt i,j,n0,m,inc=1,incn=-1;
548: PetscScalar t,*v=work,*w=work+n,tau,tauc;
551: m = n-ilo+1;
552: for (i=k;i>ilo;i--) {
553: for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda]; /* _larfg does not allow negative inc, so use buffer */
554: n0 = i-ilo+1;
555: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
556: for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
557: tauc = PetscConj(tau);
558: A[i+(i-1)*lda] = v[0];
559: v[0] = 1.0;
560: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
561: for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
562: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
563: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
564: }
565: /* trivial in-place transposition of Q */
566: for (j=ilo-1;j<k;j++) {
567: for (i=j;i<k;i++) {
568: t = Q[i+j*ldq];
569: if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
570: Q[j+i*ldq] = PetscConj(t);
571: }
572: }
573: return(0);
574: #endif
575: }
577: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
578: {
579: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
581: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
582: #else
584: PetscScalar *work,*tau;
585: PetscInt i,j;
586: PetscBLASInt ilo,lwork,info,n,k,ld;
587: PetscScalar *A = ds->mat[DS_MAT_A];
588: PetscScalar *Q = ds->mat[DS_MAT_Q];
591: #if !defined(PETSC_USE_COMPLEX)
593: #endif
594: PetscBLASIntCast(ds->n,&n);
595: PetscBLASIntCast(ds->ld,&ld);
596: PetscBLASIntCast(ds->l+1,&ilo);
597: PetscBLASIntCast(ds->k,&k);
598: DSAllocateWork_Private(ds,ld+6*ld,0,0);
599: tau = ds->work;
600: work = ds->work+ld;
601: lwork = 6*ld;
603: /* initialize orthogonal matrix */
604: PetscArrayzero(Q,ld*ld);
605: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
606: if (n==1) { /* quick return */
607: wr[0] = A[0];
608: wi[0] = 0.0;
609: return(0);
610: }
612: /* reduce to upper Hessenberg form */
613: if (ds->state<DS_STATE_INTERMEDIATE) {
614: if (PETSC_FALSE && k>0) {
615: ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
616: } else {
617: PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
618: SlepcCheckLapackInfo("gehrd",info);
619: for (j=0;j<n-1;j++) {
620: for (i=j+2;i<n;i++) {
621: Q[i+j*ld] = A[i+j*ld];
622: A[i+j*ld] = 0.0;
623: }
624: }
625: PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
626: SlepcCheckLapackInfo("orghr",info);
627: }
628: }
630: /* compute the (real) Schur form */
631: #if !defined(PETSC_USE_COMPLEX)
632: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
633: for (j=0;j<ds->l;j++) {
634: if (j==n-1 || A[j+1+j*ld] == 0.0) {
635: /* real eigenvalue */
636: wr[j] = A[j+j*ld];
637: wi[j] = 0.0;
638: } else {
639: /* complex eigenvalue */
640: wr[j] = A[j+j*ld];
641: wr[j+1] = A[j+j*ld];
642: wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
643: PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
644: wi[j+1] = -wi[j];
645: j++;
646: }
647: }
648: #else
649: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
650: if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
651: #endif
652: SlepcCheckLapackInfo("hseqr",info);
653: return(0);
654: #endif
655: }
657: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
658: {
660: PetscInt ld=ds->ld,l=ds->l,k;
661: PetscMPIInt n,rank,off=0,size,ldn;
664: k = (ds->n-l)*ld;
665: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
666: if (eigr) k += ds->n-l;
667: if (eigi) k += ds->n-l;
668: DSAllocateWork_Private(ds,k,0,0);
669: PetscMPIIntCast(k*sizeof(PetscScalar),&size);
670: PetscMPIIntCast(ds->n-l,&n);
671: PetscMPIIntCast(ld*(ds->n-l),&ldn);
672: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
673: if (!rank) {
674: MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
675: if (ds->state>DS_STATE_RAW) {
676: MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
677: }
678: if (eigr) {
679: MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
680: }
681: if (eigi) {
682: MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683: }
684: }
685: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
686: if (rank) {
687: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
688: if (ds->state>DS_STATE_RAW) {
689: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
690: }
691: if (eigr) {
692: MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
693: }
694: if (eigi) {
695: MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
696: }
697: }
698: return(0);
699: }
701: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
702: {
703: PetscInt i,newn,ld=ds->ld,l=ds->l;
704: PetscScalar *A;
707: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
708: A = ds->mat[DS_MAT_A];
709: /* be careful not to break a diagonal 2x2 block */
710: if (A[n+(n-1)*ld]==0.0) newn = n;
711: else {
712: if (n<ds->n-1) newn = n+1;
713: else newn = n-1;
714: }
715: if (ds->extrarow && ds->k==ds->n) {
716: /* copy entries of extra row to the new position, then clean last row */
717: for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
718: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
719: }
720: ds->k = 0;
721: ds->n = newn;
722: return(0);
723: }
725: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
726: {
727: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
729: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
730: #else
732: PetscScalar *work;
733: PetscReal *rwork;
734: PetscBLASInt *ipiv;
735: PetscBLASInt lwork,info,n,ld;
736: PetscReal hn,hin;
737: PetscScalar *A;
740: PetscBLASIntCast(ds->n,&n);
741: PetscBLASIntCast(ds->ld,&ld);
742: lwork = 8*ld;
743: DSAllocateWork_Private(ds,lwork,ld,ld);
744: work = ds->work;
745: rwork = ds->rwork;
746: ipiv = ds->iwork;
748: /* use workspace matrix W to avoid overwriting A */
749: DSAllocateMat_Private(ds,DS_MAT_W);
750: A = ds->mat[DS_MAT_W];
751: PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);
753: /* norm of A */
754: if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
755: else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);
757: /* norm of inv(A) */
758: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
759: SlepcCheckLapackInfo("getrf",info);
760: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
761: SlepcCheckLapackInfo("getri",info);
762: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
764: *cond = hn*hin;
765: return(0);
766: #endif
767: }
769: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
770: {
771: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
773: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
774: #else
776: PetscInt i,j;
777: PetscBLASInt *ipiv,info,n,ld,one=1,ncol;
778: PetscScalar *A,*B,*Q,*g=gin,*ghat;
779: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
780: PetscReal gnorm;
783: PetscBLASIntCast(ds->n,&n);
784: PetscBLASIntCast(ds->ld,&ld);
785: A = ds->mat[DS_MAT_A];
787: if (!recover) {
789: DSAllocateWork_Private(ds,0,0,ld);
790: ipiv = ds->iwork;
791: if (!g) {
792: DSAllocateWork_Private(ds,ld,0,0);
793: g = ds->work;
794: }
795: /* use workspace matrix W to factor A-tau*eye(n) */
796: DSAllocateMat_Private(ds,DS_MAT_W);
797: B = ds->mat[DS_MAT_W];
798: PetscArraycpy(B,A,ld*ld);
800: /* Vector g initialy stores b = beta*e_n^T */
801: PetscArrayzero(g,n);
802: g[n-1] = beta;
804: /* g = (A-tau*eye(n))'\b */
805: for (i=0;i<n;i++) B[i+i*ld] -= tau;
806: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
807: SlepcCheckLapackInfo("getrf",info);
808: PetscLogFlops(2.0*n*n*n/3.0);
809: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
810: SlepcCheckLapackInfo("getrs",info);
811: PetscLogFlops(2.0*n*n-n);
813: /* A = A + g*b' */
814: for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;
816: } else { /* recover */
818: DSAllocateWork_Private(ds,ld,0,0);
819: ghat = ds->work;
820: Q = ds->mat[DS_MAT_Q];
822: /* g^ = -Q(:,idx)'*g */
823: PetscBLASIntCast(ds->l+ds->k,&ncol);
824: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));
826: /* A = A + g^*b' */
827: for (i=0;i<ds->l+ds->k;i++)
828: for (j=ds->l;j<ds->l+ds->k;j++)
829: A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;
831: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
832: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
833: }
835: /* Compute gamma factor */
836: if (gamma) {
837: gnorm = 0.0;
838: for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
839: *gamma = PetscSqrtReal(1.0+gnorm);
840: }
841: return(0);
842: #endif
843: }
845: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
846: {
848: ds->ops->allocate = DSAllocate_NHEP;
849: ds->ops->view = DSView_NHEP;
850: ds->ops->vectors = DSVectors_NHEP;
851: ds->ops->solve[0] = DSSolve_NHEP;
852: ds->ops->sort = DSSort_NHEP;
853: ds->ops->sortperm = DSSortWithPermutation_NHEP;
854: ds->ops->synchronize = DSSynchronize_NHEP;
855: ds->ops->truncate = DSTruncate_NHEP;
856: ds->ops->update = DSUpdateExtraRow_NHEP;
857: ds->ops->cond = DSCond_NHEP;
858: ds->ops->transharm = DSTranslateHarmonic_NHEP;
859: return(0);
860: }