Actual source code: dshep.c

slepc-3.12.0 2019-09-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
 15: {

 19:   DSAllocateMat_Private(ds,DS_MAT_A);
 20:   DSAllocateMat_Private(ds,DS_MAT_Q);
 21:   DSAllocateMatReal_Private(ds,DS_MAT_T);
 22:   PetscFree(ds->perm);
 23:   PetscMalloc1(ld,&ds->perm);
 24:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 25:   return(0);
 26: }

 28: /*   0       l           k                 n-1
 29:     -----------------------------------------
 30:     |*       .           .                  |
 31:     |  *     .           .                  |
 32:     |    *   .           .                  |
 33:     |      * .           .                  |
 34:     |. . . . o           o                  |
 35:     |          o         o                  |
 36:     |            o       o                  |
 37:     |              o     o                  |
 38:     |                o   o                  |
 39:     |                  o o                  |
 40:     |. . . . o o o o o o o x                |
 41:     |                    x x x              |
 42:     |                      x x x            |
 43:     |                        x x x          |
 44:     |                          x x x        |
 45:     |                            x x x      |
 46:     |                              x x x    |
 47:     |                                x x x  |
 48:     |                                  x x x|
 49:     |                                    x x|
 50:     -----------------------------------------
 51: */

 53: static PetscErrorCode DSSwitchFormat_HEP(DS ds)
 54: {
 56:   PetscReal      *T = ds->rmat[DS_MAT_T];
 57:   PetscScalar    *A = ds->mat[DS_MAT_A];
 58:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld;

 61:   /* switch from compact (arrow) to dense storage */
 62:   PetscArrayzero(A,ld*ld);
 63:   for (i=0;i<k;i++) {
 64:     A[i+i*ld] = T[i];
 65:     A[k+i*ld] = T[i+ld];
 66:     A[i+k*ld] = T[i+ld];
 67:   }
 68:   A[k+k*ld] = T[k];
 69:   for (i=k+1;i<n;i++) {
 70:     A[i+i*ld]     = T[i];
 71:     A[i-1+i*ld]   = T[i-1+ld];
 72:     A[i+(i-1)*ld] = T[i-1+ld];
 73:   }
 74:   if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
 75:   return(0);
 76: }

 78: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
 79: {
 80:   PetscErrorCode    ierr;
 81:   PetscViewerFormat format;
 82:   PetscInt          i,j,r,c,rows;
 83:   PetscReal         value;
 84:   const char        *methodname[] = {
 85:                      "Implicit QR method (_steqr)",
 86:                      "Relatively Robust Representations (_stevr)",
 87:                      "Divide and Conquer method (_stedc)",
 88:                      "Block Divide and Conquer method (dsbtdc)"
 89:   };
 90:   const int         nmeth=sizeof(methodname)/sizeof(methodname[0]);

 93:   PetscViewerGetFormat(viewer,&format);
 94:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 95:     if (ds->bs>1) {
 96:       PetscViewerASCIIPrintf(viewer,"block size: %D\n",ds->bs);
 97:     }
 98:     if (ds->method<nmeth) {
 99:       PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
100:     }
101:     return(0);
102:   }
103:   if (ds->compact) {
104:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
105:     rows = ds->extrarow? ds->n+1: ds->n;
106:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
107:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
108:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
109:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
110:       for (i=0;i<ds->n;i++) {
111:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_T]+i));
112:       }
113:       for (i=0;i<rows-1;i++) {
114:         r = PetscMax(i+2,ds->k+1);
115:         c = i+1;
116:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",r,c,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
117:         if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
118:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",c,r,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
119:         }
120:       }
121:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
122:     } else {
123:       for (i=0;i<rows;i++) {
124:         for (j=0;j<ds->n;j++) {
125:           if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
126:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
127:           else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
128:           else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
129:           else value = 0.0;
130:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
131:         }
132:         PetscViewerASCIIPrintf(viewer,"\n");
133:       }
134:     }
135:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
136:     PetscViewerFlush(viewer);
137:   } else {
138:     DSViewMat(ds,viewer,DS_MAT_A);
139:   }
140:   if (ds->state>DS_STATE_INTERMEDIATE) { DSViewMat(ds,viewer,DS_MAT_Q); }
141:   return(0);
142: }

144: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
145: {
146:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
147:   PetscInt       ld = ds->ld,i;

151:   switch (mat) {
152:     case DS_MAT_X:
153:     case DS_MAT_Y:
154:       if (j) {
155:         if (ds->state>=DS_STATE_CONDENSED) {
156:           PetscArraycpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld);
157:         } else {
158:           PetscArrayzero(ds->mat[mat]+(*j)*ld,ld);
159:           *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
160:         }
161:       } else {
162:         if (ds->state>=DS_STATE_CONDENSED) {
163:           PetscArraycpy(ds->mat[mat],Q,ld*ld);
164:         } else {
165:           PetscArrayzero(ds->mat[mat],ld*ld);
166:           for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
167:         }
168:       }
169:       if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
170:       break;
171:     case DS_MAT_U:
172:     case DS_MAT_VT:
173:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
174:       break;
175:     default:
176:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
177:   }
178:   return(0);
179: }

181: /*
182:   ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form

184:                 [ d 0 0 0 e ]
185:                 [ 0 d 0 0 e ]
186:             A = [ 0 0 d 0 e ]
187:                 [ 0 0 0 d e ]
188:                 [ e e e e d ]

190:   to tridiagonal form

192:                 [ d e 0 0 0 ]
193:                 [ e d e 0 0 ]
194:    T = Q'*A*Q = [ 0 e d e 0 ],
195:                 [ 0 0 e d e ]
196:                 [ 0 0 0 e d ]

198:   where Q is an orthogonal matrix. Rutishauser's algorithm is used to
199:   perform the reduction, which requires O(n**2) flops. The accumulation
200:   of the orthogonal factor Q, however, requires O(n**3) flops.

202:   Arguments
203:   =========

205:   N       (input) INTEGER
206:           The order of the matrix A.  N >= 0.

208:   D       (input/output) DOUBLE PRECISION array, dimension (N)
209:           On entry, the diagonal entries of the matrix A to be
210:           reduced.
211:           On exit, the diagonal entries of the reduced matrix T.

213:   E       (input/output) DOUBLE PRECISION array, dimension (N-1)
214:           On entry, the off-diagonal entries of the matrix A to be
215:           reduced.
216:           On exit, the subdiagonal entries of the reduced matrix T.

218:   Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
219:           On exit, the orthogonal matrix Q.

221:   LDQ     (input) INTEGER
222:           The leading dimension of the array Q.

224:   Note
225:   ====
226:   Based on Fortran code contributed by Daniel Kressner
227: */
228: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
229: {
230: #if defined(SLEPC_MISSING_LAPACK_LARTG)
232:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
233: #else
234:   PetscBLASInt i,j,j2,one=1;
235:   PetscReal    c,s,p,off,temp;

238:   if (n<=2) return(0);

240:   for (j=0;j<n-2;j++) {

242:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
243:     temp = e[j+1];
244:     PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
245:     s = -s;

247:     /* Apply rotation to diagonal elements */
248:     temp   = d[j+1];
249:     e[j]   = c*s*(temp-d[j]);
250:     d[j+1] = s*s*d[j] + c*c*temp;
251:     d[j]   = c*c*d[j] + s*s*temp;

253:     /* Apply rotation to Q */
254:     j2 = j+2;
255:     PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));

257:     /* Chase newly introduced off-diagonal entry to the top left corner */
258:     for (i=j-1;i>=0;i--) {
259:       off  = -s*e[i];
260:       e[i] = c*e[i];
261:       temp = e[i+1];
262:       PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
263:       s = -s;
264:       temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
265:       p = s*temp;
266:       d[i+1] += p;
267:       d[i] -= p;
268:       e[i] = -e[i] - c*temp;
269:       j2 = j+2;
270:       PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
271:     }
272:   }
273:   return(0);
274: #endif
275: }

277: /*
278:    Reduce to tridiagonal form by means of ArrowTridiag.
279: */
280: static PetscErrorCode DSIntermediate_HEP(DS ds)
281: {
282: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
284:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
285: #else
287:   PetscInt       i;
288:   PetscBLASInt   n1,n2,n3,lwork,info,l,n,ld,off;
289:   PetscScalar    *A,*Q,*work,*tau;
290:   PetscReal      *d,*e;

293:   PetscBLASIntCast(ds->n,&n);
294:   PetscBLASIntCast(ds->l,&l);
295:   PetscBLASIntCast(ds->ld,&ld);
296:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
297:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
298:   n3 = n1+n2;
299:   off = l+l*ld;
300:   A  = ds->mat[DS_MAT_A];
301:   Q  = ds->mat[DS_MAT_Q];
302:   d  = ds->rmat[DS_MAT_T];
303:   e  = ds->rmat[DS_MAT_T]+ld;
304:   PetscArrayzero(Q,ld*ld);
305:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;

307:   if (ds->compact) {

309:     if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);

311:   } else {

313:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

315:     if (ds->state<DS_STATE_INTERMEDIATE) {
316:       DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
317:       DSAllocateWork_Private(ds,ld+ld*ld,0,0);
318:       tau  = ds->work;
319:       work = ds->work+ld;
320:       lwork = ld*ld;
321:       PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
322:       SlepcCheckLapackInfo("sytrd",info);
323:       PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
324:       SlepcCheckLapackInfo("orgtr",info);
325:     } else {
326:       /* copy tridiagonal to d,e */
327:       for (i=l;i<n;i++)   d[i] = PetscRealPart(A[i+i*ld]);
328:       for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
329:     }
330:   }
331:   return(0);
332: #endif
333: }

335: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
336: {
338:   PetscInt       n,l,i,*perm,ld=ds->ld;
339:   PetscScalar    *A;
340:   PetscReal      *d;

343:   if (!ds->sc) return(0);
344:   n = ds->n;
345:   l = ds->l;
346:   A = ds->mat[DS_MAT_A];
347:   d = ds->rmat[DS_MAT_T];
348:   perm = ds->perm;
349:   if (!rr) {
350:     DSSortEigenvaluesReal_Private(ds,d,perm);
351:   } else {
352:     DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
353:   }
354:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
355:   DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
356:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
357:   if (!ds->compact) {
358:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
359:   }
360:   return(0);
361: }

363: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
364: {
366:   PetscInt       i;
367:   PetscBLASInt   n,ld,incx=1;
368:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;
369:   PetscReal      *e,beta;

372:   PetscBLASIntCast(ds->n,&n);
373:   PetscBLASIntCast(ds->ld,&ld);
374:   A  = ds->mat[DS_MAT_A];
375:   Q  = ds->mat[DS_MAT_Q];
376:   e  = ds->rmat[DS_MAT_T]+ld;

378:   if (ds->compact) {
379:     beta = e[n-1];   /* in compact, we assume all entries are zero except the last one */
380:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
381:     ds->k = n;
382:   } else {
383:     DSAllocateWork_Private(ds,2*ld,0,0);
384:     x = ds->work;
385:     y = ds->work+ld;
386:     for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
387:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
388:     for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
389:     ds->k = n;
390:   }
391:   return(0);
392: }

394: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
395: {
396: #if defined(PETSC_MISSING_LAPACK_STEQR)
398:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
399: #else
401:   PetscInt       i;
402:   PetscBLASInt   n1,n2,n3,info,l,n,ld,off;
403:   PetscScalar    *Q,*A;
404:   PetscReal      *d,*e;

407:   if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
408:   PetscBLASIntCast(ds->n,&n);
409:   PetscBLASIntCast(ds->l,&l);
410:   PetscBLASIntCast(ds->ld,&ld);
411:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
412:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
413:   n3 = n1+n2;
414:   off = l+l*ld;
415:   Q  = ds->mat[DS_MAT_Q];
416:   A  = ds->mat[DS_MAT_A];
417:   d  = ds->rmat[DS_MAT_T];
418:   e  = ds->rmat[DS_MAT_T]+ld;

420:   /* Reduce to tridiagonal form */
421:   DSIntermediate_HEP(ds);

423:   /* Solve the tridiagonal eigenproblem */
424:   for (i=0;i<l;i++) wr[i] = d[i];

426:   DSAllocateWork_Private(ds,0,2*ld,0);
427:   PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
428:   SlepcCheckLapackInfo("steqr",info);
429:   for (i=l;i<n;i++) wr[i] = d[i];

431:   /* Create diagonal matrix as a result */
432:   if (ds->compact) {
433:     PetscArrayzero(e,n-1);
434:   } else {
435:     for (i=l;i<n;i++) {
436:       PetscArrayzero(A+l+i*ld,n-l);
437:     }
438:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
439:   }

441:   /* Set zero wi */
442:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
443:   return(0);
444: #endif
445: }

447: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
448: {
449: #if defined(SLEPC_MISSING_LAPACK_STEVR)
451:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
452: #else
454:   PetscInt       i;
455:   PetscBLASInt   n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
456:   PetscScalar    *A,*Q,*W=NULL,one=1.0,zero=0.0;
457:   PetscReal      *d,*e,abstol=0.0,vl,vu;
458: #if defined(PETSC_USE_COMPLEX)
459:   PetscInt       j;
460:   PetscReal      *ritz;
461: #endif

464:   if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
465:   PetscBLASIntCast(ds->n,&n);
466:   PetscBLASIntCast(ds->l,&l);
467:   PetscBLASIntCast(ds->ld,&ld);
468:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
469:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
470:   n3 = n1+n2;
471:   off = l+l*ld;
472:   A  = ds->mat[DS_MAT_A];
473:   Q  = ds->mat[DS_MAT_Q];
474:   d  = ds->rmat[DS_MAT_T];
475:   e  = ds->rmat[DS_MAT_T]+ld;

477:   /* Reduce to tridiagonal form */
478:   DSIntermediate_HEP(ds);

480:   /* Solve the tridiagonal eigenproblem */
481:   for (i=0;i<l;i++) wr[i] = d[i];

483:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* Q contains useful info */
484:     DSAllocateMat_Private(ds,DS_MAT_W);
485:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
486:     W = ds->mat[DS_MAT_W];
487:   }
488: #if defined(PETSC_USE_COMPLEX)
489:   DSAllocateMatReal_Private(ds,DS_MAT_Q);
490: #endif
491:   lwork = 20*ld;
492:   liwork = 10*ld;
493:   DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
494:   isuppz = ds->iwork+liwork;
495: #if defined(PETSC_USE_COMPLEX)
496:   ritz = ds->rwork+lwork;
497:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
498:   for (i=l;i<n;i++) wr[i] = ritz[i];
499: #else
500:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
501: #endif
502:   SlepcCheckLapackInfo("stevr",info);
503: #if defined(PETSC_USE_COMPLEX)
504:   for (i=l;i<n;i++)
505:     for (j=l;j<n;j++)
506:       Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
507: #endif
508:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* accumulate previous Q */
509:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
510:     DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
511:   }
512:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);

514:   /* Create diagonal matrix as a result */
515:   if (ds->compact) {
516:     PetscArrayzero(e,n-1);
517:   } else {
518:     for (i=l;i<n;i++) {
519:       PetscArrayzero(A+l+i*ld,n-l);
520:     }
521:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
522:   }

524:   /* Set zero wi */
525:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
526:   return(0);
527: #endif
528: }

530: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
531: {
532: #if defined(SLEPC_MISSING_LAPACK_STEDC)
534:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC - Lapack routine is unavailable");
535: #else
537:   PetscInt       i;
538:   PetscBLASInt   n1,info,l,ld,off,lrwork,liwork;
539:   PetscScalar    *Q,*A;
540:   PetscReal      *d,*e;
541: #if defined(PETSC_USE_COMPLEX)
542:   PetscBLASInt   lwork;
543:   PetscInt       j;
544: #endif

547:   if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
548:   PetscBLASIntCast(ds->l,&l);
549:   PetscBLASIntCast(ds->ld,&ld);
550:   PetscBLASIntCast(ds->n-ds->l,&n1);
551:   off = l+l*ld;
552:   Q  = ds->mat[DS_MAT_Q];
553:   A  = ds->mat[DS_MAT_A];
554:   d  = ds->rmat[DS_MAT_T];
555:   e  = ds->rmat[DS_MAT_T]+ld;

557:   /* Reduce to tridiagonal form */
558:   DSIntermediate_HEP(ds);

560:   /* Solve the tridiagonal eigenproblem */
561:   for (i=0;i<l;i++) wr[i] = d[i];

563:   lrwork = 5*n1*n1+3*n1+1;
564:   liwork = 5*n1*n1+6*n1+6;
565: #if !defined(PETSC_USE_COMPLEX)
566:   DSAllocateWork_Private(ds,0,lrwork,liwork);
567:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
568: #else
569:   lwork = ld*ld;
570:   DSAllocateWork_Private(ds,lwork,lrwork,liwork);
571:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
572:   /* Fixing Lapack bug*/
573:   for (j=ds->l;j<ds->n;j++)
574:     for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
575: #endif
576:   SlepcCheckLapackInfo("stedc",info);
577:   for (i=l;i<ds->n;i++) wr[i] = d[i];

579:   /* Create diagonal matrix as a result */
580:   if (ds->compact) {
581:     PetscArrayzero(e,ds->n-1);
582:   } else {
583:     for (i=l;i<ds->n;i++) {
584:       PetscArrayzero(A+l+i*ld,ds->n-l);
585:     }
586:     for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
587:   }

589:   /* Set zero wi */
590:   if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
591:   return(0);
592: #endif
593: }

595: #if !defined(PETSC_USE_COMPLEX)
596: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
597: {
599:   PetscBLASInt   i,j,k,m,n,info,nblks,bs,ld,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
600:   PetscScalar    *Q,*A;
601:   PetscReal      *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;

604:   if (ds->l>0) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for l>1");
605:   if (ds->compact) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for compact storage");
606:   PetscBLASIntCast(ds->ld,&ld);
607:   PetscBLASIntCast(ds->bs,&bs);
608:   PetscBLASIntCast(ds->n,&n);
609:   nblks = n/bs;
610:   Q  = ds->mat[DS_MAT_Q];
611:   A  = ds->mat[DS_MAT_A];
612:   d  = ds->rmat[DS_MAT_T];
613:   e  = ds->rmat[DS_MAT_T]+ld;
614:   lrwork = 4*n*n+60*n+1;
615:   liwork = 5*n+5*nblks-1;
616:   lde = 2*bs+1;
617:   DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
618:   D      = ds->work;
619:   E      = ds->work+bs*n;
620:   rwork  = ds->rwork;
621:   ksizes = ds->iwork;
622:   iwork  = ds->iwork+nblks;
623:   PetscArrayzero(iwork,liwork);

625:   /* Copy matrix to block tridiagonal format */
626:   j=0;
627:   for (i=0;i<nblks;i++) {
628:     ksizes[i]=bs;
629:     for (k=0;k<bs;k++)
630:       for (m=0;m<bs;m++)
631:         D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
632:     j = j + bs;
633:   }
634:   j=0;
635:   for (i=0;i<nblks-1;i++) {
636:     for (k=0;k<bs;k++)
637:       for (m=0;m<bs;m++)
638:         E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
639:     j = j + bs;
640:   }

642:   /* Solve the block tridiagonal eigenproblem */
643:   BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
644:            Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
645:   for (i=0;i<ds->n;i++) wr[i] = d[i];

647:   /* Create diagonal matrix as a result */
648:   if (ds->compact) {
649:     PetscArrayzero(e,ds->n-1);
650:   } else {
651:     for (i=0;i<ds->n;i++) {
652:       PetscArrayzero(A+i*ld,ds->n);
653:     }
654:     for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
655:   }

657:   /* Set zero wi */
658:   if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
659:   return(0);
660: }
661: #endif

663: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
664: {
665:   PetscInt    i,ld=ds->ld,l=ds->l;
666:   PetscScalar *A;

669:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
670:   A = ds->mat[DS_MAT_A];
671:   if (!ds->compact && ds->extrarow && ds->k==ds->n) {
672:     for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
673:   }
674:   if (ds->extrarow) ds->k = n;
675:   else ds->k = 0;
676:   ds->n = n;
677:   return(0);
678: }

680: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
681: {
683:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
684:   PetscMPIInt    n,rank,off=0,size,ldn,ld3;

687:   if (ds->compact) kr = 3*ld;
688:   else k = (ds->n-l)*ld;
689:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
690:   if (eigr) k += (ds->n-l);
691:   DSAllocateWork_Private(ds,k+kr,0,0);
692:   PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
693:   PetscMPIIntCast(ds->n-l,&n);
694:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
695:   PetscMPIIntCast(ld*3,&ld3);
696:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
697:   if (!rank) {
698:     if (ds->compact) {
699:       MPI_Pack(ds->rmat[DS_MAT_T],ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
700:     } else {
701:       MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
702:     }
703:     if (ds->state>DS_STATE_RAW) {
704:       MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
705:     }
706:     if (eigr) {
707:       MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
708:     }
709:   }
710:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
711:   if (rank) {
712:     if (ds->compact) {
713:       MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_T],ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
714:     } else {
715:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
716:     }
717:     if (ds->state>DS_STATE_RAW) {
718:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
719:     }
720:     if (eigr) {
721:       MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
722:     }
723:   }
724:   return(0);
725: }

727: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
728: {
729: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE)
731:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE - Lapack routines are unavailable");
732: #else
734:   PetscScalar    *work;
735:   PetscReal      *rwork;
736:   PetscBLASInt   *ipiv;
737:   PetscBLASInt   lwork,info,n,ld;
738:   PetscReal      hn,hin;
739:   PetscScalar    *A;

742:   PetscBLASIntCast(ds->n,&n);
743:   PetscBLASIntCast(ds->ld,&ld);
744:   lwork = 8*ld;
745:   DSAllocateWork_Private(ds,lwork,ld,ld);
746:   work  = ds->work;
747:   rwork = ds->rwork;
748:   ipiv  = ds->iwork;
749:   DSSwitchFormat_HEP(ds);

751:   /* use workspace matrix W to avoid overwriting A */
752:   DSAllocateMat_Private(ds,DS_MAT_W);
753:   A = ds->mat[DS_MAT_W];
754:   PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);

756:   /* norm of A */
757:   hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);

759:   /* norm of inv(A) */
760:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
761:   SlepcCheckLapackInfo("getrf",info);
762:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
763:   SlepcCheckLapackInfo("getri",info);
764:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

766:   *cond = hn*hin;
767:   return(0);
768: #endif
769: }

771: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
772: {
773: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
775:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
776: #else
778:   PetscInt       i,j,k=ds->k;
779:   PetscScalar    *Q,*A,*R,*tau,*work;
780:   PetscBLASInt   ld,n1,n0,lwork,info;

783:   PetscBLASIntCast(ds->ld,&ld);
784:   DSAllocateWork_Private(ds,ld*ld,0,0);
785:   tau = ds->work;
786:   work = ds->work+ld;
787:   PetscBLASIntCast(ld*(ld-1),&lwork);
788:   DSAllocateMat_Private(ds,DS_MAT_W);
789:   A  = ds->mat[DS_MAT_A];
790:   Q  = ds->mat[DS_MAT_Q];
791:   R  = ds->mat[DS_MAT_W];

793:   /* copy I+alpha*A */
794:   PetscArrayzero(Q,ld*ld);
795:   PetscArrayzero(R,ld*ld);
796:   for (i=0;i<k;i++) {
797:     Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
798:     Q[k+i*ld] = alpha*A[k+i*ld];
799:   }

801:   /* compute qr */
802:   PetscBLASIntCast(k+1,&n1);
803:   PetscBLASIntCast(k,&n0);
804:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
805:   SlepcCheckLapackInfo("geqrf",info);

807:   /* copy R from Q */
808:   for (j=0;j<k;j++)
809:     for (i=0;i<=j;i++)
810:       R[i+j*ld] = Q[i+j*ld];

812:   /* compute orthogonal matrix in Q */
813:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
814:   SlepcCheckLapackInfo("orgqr",info);

816:   /* compute the updated matrix of projected problem */
817:   for (j=0;j<k;j++)
818:     for (i=0;i<k+1;i++)
819:       A[j*ld+i] = Q[i*ld+j];
820:   alpha = -1.0/alpha;
821:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
822:   for (i=0;i<k;i++)
823:     A[ld*i+i] -= alpha;
824:   return(0);
825: #endif
826: }

828: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
829: {
831:   if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
832:   else *flg = PETSC_FALSE;
833:   return(0);
834: }

836: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
837: {
839:   ds->ops->allocate      = DSAllocate_HEP;
840:   ds->ops->view          = DSView_HEP;
841:   ds->ops->vectors       = DSVectors_HEP;
842:   ds->ops->solve[0]      = DSSolve_HEP_QR;
843:   ds->ops->solve[1]      = DSSolve_HEP_MRRR;
844:   ds->ops->solve[2]      = DSSolve_HEP_DC;
845: #if !defined(PETSC_USE_COMPLEX)
846:   ds->ops->solve[3]      = DSSolve_HEP_BDC;
847: #endif
848:   ds->ops->sort          = DSSort_HEP;
849:   ds->ops->synchronize   = DSSynchronize_HEP;
850:   ds->ops->truncate      = DSTruncate_HEP;
851:   ds->ops->update        = DSUpdateExtraRow_HEP;
852:   ds->ops->cond          = DSCond_HEP;
853:   ds->ops->transrks      = DSTranslateRKS_HEP;
854:   ds->ops->hermitian     = DSHermitian_HEP;
855:   return(0);
856: }