Actual source code: matutil.c
slepc-3.18.2 2023-01-26
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/slepcimpl.h>
13: static PetscErrorCode MatCreateTile_Seq(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat G)
14: {
15: PetscInt i,j,M1,M2,N1,N2,*nnz,ncols,*scols,bs;
16: PetscScalar *svals,*buf;
17: const PetscInt *cols;
18: const PetscScalar *vals;
20: MatGetSize(A,&M1,&N1);
21: MatGetSize(D,&M2,&N2);
22: MatGetBlockSize(A,&bs);
24: PetscCalloc1((M1+M2)/bs,&nnz);
25: /* Preallocate for A */
26: if (a!=0.0) {
27: for (i=0;i<(M1+bs-1)/bs;i++) {
28: MatGetRow(A,i*bs,&ncols,NULL,NULL);
29: nnz[i] += ncols/bs;
30: MatRestoreRow(A,i*bs,&ncols,NULL,NULL);
31: }
32: }
33: /* Preallocate for B */
34: if (b!=0.0) {
35: for (i=0;i<(M1+bs-1)/bs;i++) {
36: MatGetRow(B,i*bs,&ncols,NULL,NULL);
37: nnz[i] += ncols/bs;
38: MatRestoreRow(B,i*bs,&ncols,NULL,NULL);
39: }
40: }
41: /* Preallocate for C */
42: if (c!=0.0) {
43: for (i=0;i<(M2+bs-1)/bs;i++) {
44: MatGetRow(C,i*bs,&ncols,NULL,NULL);
45: nnz[i+M1/bs] += ncols/bs;
46: MatRestoreRow(C,i*bs,&ncols,NULL,NULL);
47: }
48: }
49: /* Preallocate for D */
50: if (d!=0.0) {
51: for (i=0;i<(M2+bs-1)/bs;i++) {
52: MatGetRow(D,i*bs,&ncols,NULL,NULL);
53: nnz[i+M1/bs] += ncols/bs;
54: MatRestoreRow(D,i*bs,&ncols,NULL,NULL);
55: }
56: }
57: MatXAIJSetPreallocation(G,bs,nnz,NULL,NULL,NULL);
58: PetscFree(nnz);
60: PetscMalloc2(PetscMax(N1,N2),&buf,PetscMax(N1,N2),&scols);
61: /* Transfer A */
62: if (a!=0.0) {
63: for (i=0;i<M1;i++) {
64: MatGetRow(A,i,&ncols,&cols,&vals);
65: if (a!=1.0) {
66: svals=buf;
67: for (j=0;j<ncols;j++) svals[j] = vals[j]*a;
68: } else svals=(PetscScalar*)vals;
69: MatSetValues(G,1,&i,ncols,cols,svals,INSERT_VALUES);
70: MatRestoreRow(A,i,&ncols,&cols,&vals);
71: }
72: }
73: /* Transfer B */
74: if (b!=0.0) {
75: for (i=0;i<M1;i++) {
76: MatGetRow(B,i,&ncols,&cols,&vals);
77: if (b!=1.0) {
78: svals=buf;
79: for (j=0;j<ncols;j++) svals[j] = vals[j]*b;
80: } else svals=(PetscScalar*)vals;
81: for (j=0;j<ncols;j++) scols[j] = cols[j]+N1;
82: MatSetValues(G,1,&i,ncols,scols,svals,INSERT_VALUES);
83: MatRestoreRow(B,i,&ncols,&cols,&vals);
84: }
85: }
86: /* Transfer C */
87: if (c!=0.0) {
88: for (i=0;i<M2;i++) {
89: MatGetRow(C,i,&ncols,&cols,&vals);
90: if (c!=1.0) {
91: svals=buf;
92: for (j=0;j<ncols;j++) svals[j] = vals[j]*c;
93: } else svals=(PetscScalar*)vals;
94: j = i+M1;
95: MatSetValues(G,1,&j,ncols,cols,svals,INSERT_VALUES);
96: MatRestoreRow(C,i,&ncols,&cols,&vals);
97: }
98: }
99: /* Transfer D */
100: if (d!=0.0) {
101: for (i=0;i<M2;i++) {
102: MatGetRow(D,i,&ncols,&cols,&vals);
103: if (d!=1.0) {
104: svals=buf;
105: for (j=0;j<ncols;j++) svals[j] = vals[j]*d;
106: } else svals=(PetscScalar*)vals;
107: for (j=0;j<ncols;j++) scols[j] = cols[j]+N1;
108: j = i+M1;
109: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
110: MatRestoreRow(D,i,&ncols,&cols,&vals);
111: }
112: }
113: PetscFree2(buf,scols);
114: return 0;
115: }
117: static PetscErrorCode MatCreateTile_MPI(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat G)
118: {
119: PetscMPIInt np;
120: PetscInt p,i,j,N1,N2,m1,m2,n1,n2,*map1,*map2;
121: PetscInt *dnz,*onz,ncols,*scols,start,gstart;
122: PetscScalar *svals,*buf;
123: const PetscInt *cols,*mapptr1,*mapptr2;
124: const PetscScalar *vals;
126: MatGetSize(A,NULL,&N1);
127: MatGetLocalSize(A,&m1,&n1);
128: MatGetSize(D,NULL,&N2);
129: MatGetLocalSize(D,&m2,&n2);
131: /* Create mappings */
132: MPI_Comm_size(PetscObjectComm((PetscObject)G),&np);
133: MatGetOwnershipRangesColumn(A,&mapptr1);
134: MatGetOwnershipRangesColumn(B,&mapptr2);
135: PetscMalloc4(PetscMax(N1,N2),&buf,PetscMax(N1,N2),&scols,N1,&map1,N2,&map2);
136: for (p=0;p<np;p++) {
137: for (i=mapptr1[p];i<mapptr1[p+1];i++) map1[i] = i+mapptr2[p];
138: }
139: for (p=0;p<np;p++) {
140: for (i=mapptr2[p];i<mapptr2[p+1];i++) map2[i] = i+mapptr1[p+1];
141: }
143: MatPreallocateBegin(PetscObjectComm((PetscObject)G),m1+m2,n1+n2,dnz,onz);
144: MatGetOwnershipRange(G,&gstart,NULL);
145: /* Preallocate for A */
146: if (a!=0.0) {
147: MatGetOwnershipRange(A,&start,NULL);
148: for (i=0;i<m1;i++) {
149: MatGetRow(A,i+start,&ncols,&cols,NULL);
150: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
151: MatPreallocateSet(gstart+i,ncols,scols,dnz,onz);
152: MatRestoreRow(A,i+start,&ncols,&cols,NULL);
153: }
154: }
155: /* Preallocate for B */
156: if (b!=0.0) {
157: MatGetOwnershipRange(B,&start,NULL);
158: for (i=0;i<m1;i++) {
159: MatGetRow(B,i+start,&ncols,&cols,NULL);
160: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
161: MatPreallocateSet(gstart+i,ncols,scols,dnz,onz);
162: MatRestoreRow(B,i+start,&ncols,&cols,NULL);
163: }
164: }
165: /* Preallocate for C */
166: if (c!=0.0) {
167: MatGetOwnershipRange(C,&start,NULL);
168: for (i=0;i<m2;i++) {
169: MatGetRow(C,i+start,&ncols,&cols,NULL);
170: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
171: MatPreallocateSet(gstart+m1+i,ncols,scols,dnz,onz);
172: MatRestoreRow(C,i+start,&ncols,&cols,NULL);
173: }
174: }
175: /* Preallocate for D */
176: if (d!=0.0) {
177: MatGetOwnershipRange(D,&start,NULL);
178: for (i=0;i<m2;i++) {
179: MatGetRow(D,i+start,&ncols,&cols,NULL);
180: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
181: MatPreallocateSet(gstart+m1+i,ncols,scols,dnz,onz);
182: MatRestoreRow(D,i+start,&ncols,&cols,NULL);
183: }
184: }
185: MatMPIAIJSetPreallocation(G,0,dnz,0,onz);
186: MatPreallocateEnd(dnz,onz);
188: /* Transfer A */
189: if (a!=0.0) {
190: MatGetOwnershipRange(A,&start,NULL);
191: for (i=0;i<m1;i++) {
192: MatGetRow(A,i+start,&ncols,&cols,&vals);
193: if (a!=1.0) {
194: svals=buf;
195: for (j=0;j<ncols;j++) svals[j] = vals[j]*a;
196: } else svals=(PetscScalar*)vals;
197: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
198: j = gstart+i;
199: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
200: MatRestoreRow(A,i+start,&ncols,&cols,&vals);
201: }
202: }
203: /* Transfer B */
204: if (b!=0.0) {
205: MatGetOwnershipRange(B,&start,NULL);
206: for (i=0;i<m1;i++) {
207: MatGetRow(B,i+start,&ncols,&cols,&vals);
208: if (b!=1.0) {
209: svals=buf;
210: for (j=0;j<ncols;j++) svals[j] = vals[j]*b;
211: } else svals=(PetscScalar*)vals;
212: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
213: j = gstart+i;
214: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
215: MatRestoreRow(B,i+start,&ncols,&cols,&vals);
216: }
217: }
218: /* Transfer C */
219: if (c!=0.0) {
220: MatGetOwnershipRange(C,&start,NULL);
221: for (i=0;i<m2;i++) {
222: MatGetRow(C,i+start,&ncols,&cols,&vals);
223: if (c!=1.0) {
224: svals=buf;
225: for (j=0;j<ncols;j++) svals[j] = vals[j]*c;
226: } else svals=(PetscScalar*)vals;
227: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
228: j = gstart+m1+i;
229: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
230: MatRestoreRow(C,i+start,&ncols,&cols,&vals);
231: }
232: }
233: /* Transfer D */
234: if (d!=0.0) {
235: MatGetOwnershipRange(D,&start,NULL);
236: for (i=0;i<m2;i++) {
237: MatGetRow(D,i+start,&ncols,&cols,&vals);
238: if (d!=1.0) {
239: svals=buf;
240: for (j=0;j<ncols;j++) svals[j] = vals[j]*d;
241: } else svals=(PetscScalar*)vals;
242: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
243: j = gstart+m1+i;
244: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
245: MatRestoreRow(D,i+start,&ncols,&cols,&vals);
246: }
247: }
248: PetscFree4(buf,scols,map1,map2);
249: return 0;
250: }
252: /*@
253: MatCreateTile - Explicitly build a matrix from four blocks, G = [ a*A b*B; c*C d*D ].
255: Collective on A
257: Input Parameters:
258: + A - matrix for top-left block
259: . a - scaling factor for block A
260: . B - matrix for top-right block
261: . b - scaling factor for block B
262: . C - matrix for bottom-left block
263: . c - scaling factor for block C
264: . D - matrix for bottom-right block
265: - d - scaling factor for block D
267: Output Parameter:
268: . G - the resulting matrix
270: Notes:
271: In the case of a parallel matrix, a permuted version of G is returned. The permutation
272: is a perfect shuffle such that the local parts of A, B, C, D remain in the local part of
273: G for the same process.
275: Matrix G must be destroyed by the user.
277: The blocks can be of different type. They can be either ConstantDiagonal, or a standard
278: type such as AIJ, or any other type provided that it supports the MatGetRow operation.
279: The type of the output matrix will be the same as the first block that is not
280: ConstantDiagonal (checked in the A,B,C,D order).
282: Level: developer
284: .seealso: MatCreateNest()
285: @*/
286: PetscErrorCode MatCreateTile(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat *G)
287: {
288: PetscInt i,k,M1,M2,N1,N2,M,N,m1,m2,n1,n2,m,n,bs;
289: PetscBool diag[4];
290: Mat block[4] = {A,B,C,D};
291: MatType type[4];
292: PetscMPIInt size;
307: /* check row 1 */
308: MatGetSize(A,&M1,NULL);
309: MatGetLocalSize(A,&m1,NULL);
310: MatGetSize(B,&M,NULL);
311: MatGetLocalSize(B,&m,NULL);
313: /* check row 2 */
314: MatGetSize(C,&M2,NULL);
315: MatGetLocalSize(C,&m2,NULL);
316: MatGetSize(D,&M,NULL);
317: MatGetLocalSize(D,&m,NULL);
319: /* check column 1 */
320: MatGetSize(A,NULL,&N1);
321: MatGetLocalSize(A,NULL,&n1);
322: MatGetSize(C,NULL,&N);
323: MatGetLocalSize(C,NULL,&n);
325: /* check column 2 */
326: MatGetSize(B,NULL,&N2);
327: MatGetLocalSize(B,NULL,&n2);
328: MatGetSize(D,NULL,&N);
329: MatGetLocalSize(D,NULL,&n);
332: /* check matrix types */
333: for (i=0;i<4;i++) {
334: MatGetType(block[i],&type[i]);
335: PetscStrcmp(type[i],MATCONSTANTDIAGONAL,&diag[i]);
336: }
337: for (k=0;k<4;k++) if (!diag[k]) break;
340: MatGetBlockSize(block[k],&bs);
341: MatCreate(PetscObjectComm((PetscObject)block[k]),G);
342: MatSetSizes(*G,m1+m2,n1+n2,M1+M2,N1+N2);
343: MatSetType(*G,type[k]);
344: MatSetBlockSize(*G,bs);
345: MatSetUp(*G);
347: MPI_Comm_size(PetscObjectComm((PetscObject)*G),&size);
348: if (size>1) MatCreateTile_MPI(a,A,b,B,c,C,d,D,*G);
349: else MatCreateTile_Seq(a,A,b,B,c,C,d,D,*G);
350: MatAssemblyBegin(*G,MAT_FINAL_ASSEMBLY);
351: MatAssemblyEnd(*G,MAT_FINAL_ASSEMBLY);
352: return 0;
353: }
355: /*@C
356: MatCreateVecsEmpty - Get vector(s) compatible with the matrix, i.e. with the same
357: parallel layout, but without internal array.
359: Collective on mat
361: Input Parameter:
362: . mat - the matrix
364: Output Parameters:
365: + right - (optional) vector that the matrix can be multiplied against
366: - left - (optional) vector that the matrix vector product can be stored in
368: Note:
369: This is similar to MatCreateVecs(), but the new vectors do not have an internal
370: array, so the intended usage is with VecPlaceArray().
372: Level: developer
374: .seealso: VecDuplicateEmpty()
375: @*/
376: PetscErrorCode MatCreateVecsEmpty(Mat mat,Vec *right,Vec *left)
377: {
378: PetscBool standard,cuda=PETSC_FALSE,skip=PETSC_FALSE;
379: PetscInt M,N,mloc,nloc,rbs,cbs;
380: PetscMPIInt size;
381: Vec v;
386: PetscObjectTypeCompareAny((PetscObject)mat,&standard,MATSEQAIJ,MATMPIAIJ,MATSEQBAIJ,MATMPIBAIJ,MATSEQSBAIJ,MATMPISBAIJ,MATSEQDENSE,MATMPIDENSE,"");
387: PetscObjectTypeCompareAny((PetscObject)mat,&cuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
388: if (!standard && !cuda) {
389: MatCreateVecs(mat,right,left);
390: v = right? *right: *left;
391: if (v) {
392: PetscObjectTypeCompareAny((PetscObject)v,&standard,VECSEQ,VECMPI,"");
393: PetscObjectTypeCompareAny((PetscObject)v,&cuda,VECSEQCUDA,VECMPICUDA,"");
394: }
395: if (!standard && !cuda) skip = PETSC_TRUE;
396: else {
397: if (right) VecDestroy(right);
398: if (left) VecDestroy(left);
399: }
400: }
401: if (!skip) {
402: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
403: MatGetLocalSize(mat,&mloc,&nloc);
404: MatGetSize(mat,&M,&N);
405: MatGetBlockSizes(mat,&rbs,&cbs);
406: if (right) {
407: if (cuda) {
408: #if defined(PETSC_HAVE_CUDA)
409: if (size>1) VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)mat),cbs,nloc,N,NULL,right);
410: else VecCreateSeqCUDAWithArray(PetscObjectComm((PetscObject)mat),cbs,N,NULL,right);
411: #endif
412: } else {
413: if (size>1) VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),cbs,nloc,N,NULL,right);
414: else VecCreateSeqWithArray(PetscObjectComm((PetscObject)mat),cbs,N,NULL,right);
415: }
416: }
417: if (left) {
418: if (cuda) {
419: #if defined(PETSC_HAVE_CUDA)
420: if (size>1) VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)mat),rbs,mloc,M,NULL,left);
421: else VecCreateSeqCUDAWithArray(PetscObjectComm((PetscObject)mat),rbs,M,NULL,left);
422: #endif
423: } else {
424: if (size>1) VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),rbs,mloc,M,NULL,left);
425: else VecCreateSeqWithArray(PetscObjectComm((PetscObject)mat),rbs,M,NULL,left);
426: }
427: }
428: }
429: return 0;
430: }
432: /*@C
433: MatNormEstimate - Estimate the 2-norm of a matrix.
435: Collective on A
437: Input Parameters:
438: + A - the matrix
439: . vrn - random vector with normally distributed entries (can be NULL)
440: - w - workspace vector (can be NULL)
442: Output Parameter:
443: . nrm - the norm estimate
445: Notes:
446: Does not need access to the matrix entries, just performs a matrix-vector product.
447: Based on work by I. Ipsen and coworkers https://ipsen.math.ncsu.edu/ps/slides_ima.pdf
449: The input vector vrn must have unit 2-norm.
450: If vrn is NULL, then it is created internally and filled with VecSetRandomNormal().
452: Level: developer
454: .seealso: VecSetRandomNormal()
455: @*/
456: PetscErrorCode MatNormEstimate(Mat A,Vec vrn,Vec w,PetscReal *nrm)
457: {
458: PetscInt n;
459: Vec vv=NULL,ww=NULL;
467: if (!vrn) {
468: MatCreateVecs(A,&vv,NULL);
469: vrn = vv;
470: VecSetRandomNormal(vv,NULL,NULL,NULL);
471: VecNormalize(vv,NULL);
472: }
473: if (!w) {
474: MatCreateVecs(A,&ww,NULL);
475: w = ww;
476: }
478: MatGetSize(A,&n,NULL);
479: MatMult(A,vrn,w);
480: VecNorm(w,NORM_2,nrm);
481: *nrm *= PetscSqrtReal((PetscReal)n);
483: VecDestroy(&vv);
484: VecDestroy(&ww);
485: return 0;
486: }