Actual source code: mpirowbs.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/rowbs/mpi/mpirowbs.h
5: #define CHUNCKSIZE_LOCAL 10
9: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
10: {
14: if (v) {
15: #if defined(PETSC_USE_LOG)
16: int len = -n*(sizeof(int)+sizeof(PetscScalar));
17: #endif
18: PetscFree(v);
19: PetscLogObjectMemory(A,len);
20: }
21: return(0);
22: }
26: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
27: {
29: int len;
32: if (!n) {
33: *i = 0; *v = 0;
34: } else {
35: len = n*(sizeof(int) + sizeof(PetscScalar));
36: PetscMalloc(len,v);
37: PetscLogObjectMemory(A,len);
38: *i = (int*)(*v + n);
39: }
40: return(0);
41: }
45: PetscErrorCode MatScale_MPIRowbs(Mat inA,PetscScalar alpha)
46: {
47: Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
48: BSspmat *A = a->A;
49: BSsprow *vs;
50: PetscScalar *ap;
51: int i,m = inA->rmap.n,nrow,j;
55: for (i=0; i<m; i++) {
56: vs = A->rows[i];
57: nrow = vs->length;
58: ap = vs->nz;
59: for (j=0; j<nrow; j++) {
60: ap[j] *= alpha;
61: }
62: }
63: PetscLogFlops(a->nz);
64: return(0);
65: }
67: /* ----------------------------------------------------------------- */
70: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
71: {
72: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
74: int i,len,m = A->rmap.n,*tnnz;
75: BSspmat *bsmat;
76: BSsprow *vs;
79: PetscMalloc((m+1)*sizeof(int),&tnnz);
80: if (!nnz) {
81: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
82: if (nz <= 0) nz = 1;
83: for (i=0; i<m; i++) tnnz[i] = nz;
84: nz = nz*m;
85: } else {
86: nz = 0;
87: for (i=0; i<m; i++) {
88: if (nnz[i] <= 0) tnnz[i] = 1;
89: else tnnz[i] = nnz[i];
90: nz += tnnz[i];
91: }
92: }
94: /* Allocate BlockSolve matrix context */
95: PetscNew(BSspmat,&bsif->A);
96: bsmat = bsif->A;
97: BSset_mat_icc_storage(bsmat,PETSC_FALSE);
98: BSset_mat_symmetric(bsmat,PETSC_FALSE);
99: len = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
100: PetscMalloc(len,&bsmat->rows);
101: bsmat->num_rows = m;
102: bsmat->global_num_rows = A->rmap.N;
103: bsmat->map = bsif->bsmap;
104: vs = (BSsprow*)(bsmat->rows + m);
105: for (i=0; i<m; i++) {
106: bsmat->rows[i] = vs;
107: bsif->imax[i] = tnnz[i];
108: vs->diag_ind = -1;
109: MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
110: /* put zero on diagonal */
111: /*vs->length = 1;
112: vs->col[0] = i + bsif->rstart;
113: vs->nz[0] = 0.0;*/
114: vs->length = 0;
115: vs++;
116: }
117: PetscLogObjectMemory(A,sizeof(BSspmat) + len);
118: bsif->nz = 0;
119: bsif->maxnz = nz;
120: bsif->sorted = 0;
121: bsif->roworiented = PETSC_TRUE;
122: bsif->nonew = 0;
123: bsif->bs_color_single = 0;
125: PetscFree(tnnz);
126: return(0);
127: }
131: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
132: {
133: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
134: BSspmat *A = mat->A;
135: BSsprow *vs;
137: int *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
138: int *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
139: PetscScalar *ap,value;
142: for (k=0; k<m; k++) { /* loop over added rows */
143: row = im[k];
144: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
145: if (row >= AA->rmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->rmap.n-1);
146: vs = A->rows[row];
147: ap = vs->nz; rp = vs->col;
148: rmax = imax[row]; nrow = vs->length;
149: a = 0;
150: for (l=0; l<n; l++) { /* loop over added columns */
151: if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
152: if (in[l] >= AA->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->cmap.N-1);
153: col = in[l]; value = *v++;
154: if (!sorted) a = 0; b = nrow;
155: while (b-a > 5) {
156: t = (b+a)/2;
157: if (rp[t] > col) b = t;
158: else a = t;
159: }
160: for (i=a; i<b; i++) {
161: if (rp[i] > col) break;
162: if (rp[i] == col) {
163: if (addv == ADD_VALUES) ap[i] += value;
164: else ap[i] = value;
165: goto noinsert;
166: }
167: }
168: if (nonew) goto noinsert;
169: if (nrow >= rmax) {
170: /* there is no extra room in row, therefore enlarge */
171: int *itemp,*iout,*iin = vs->col;
172: PetscScalar *vout,*vin = vs->nz,*vtemp;
174: /* malloc new storage space */
175: imax[row] += CHUNCKSIZE_LOCAL;
176: MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
177: vout = vtemp; iout = itemp;
178: for (ii=0; ii<i; ii++) {
179: vout[ii] = vin[ii];
180: iout[ii] = iin[ii];
181: }
182: vout[i] = value;
183: iout[i] = col;
184: for (ii=i+1; ii<=nrow; ii++) {
185: vout[ii] = vin[ii-1];
186: iout[ii] = iin[ii-1];
187: }
188: /* free old row storage */
189: if (rmax > 0) {
190: MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
191: }
192: vs->col = iout; vs->nz = vout;
193: rmax = imax[row];
194: mat->maxnz += CHUNCKSIZE_LOCAL;
195: mat->reallocs++;
196: } else {
197: /* shift higher columns over to make room for newie */
198: for (ii=nrow-1; ii>=i; ii--) {
199: rp[ii+1] = rp[ii];
200: ap[ii+1] = ap[ii];
201: }
202: rp[i] = col;
203: ap[i] = value;
204: }
205: nrow++;
206: mat->nz++;
207: AA->same_nonzero = PETSC_FALSE;
208: noinsert:;
209: a = i + 1;
210: }
211: vs->length = nrow;
212: }
213: return(0);
214: }
219: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
220: {
222: return(0);
223: }
227: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
228: {
229: Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
230: BSspmat *A = a->A;
231: BSsprow *vs;
232: int i,j,rstart = AA->rmap.rstart;
235: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
237: /* Mark location of diagonal */
238: for (i=0; i<AA->rmap.n; i++) {
239: vs = A->rows[i];
240: for (j=0; j<vs->length; j++) {
241: if (vs->col[j] == i + rstart) {
242: vs->diag_ind = j;
243: break;
244: }
245: }
246: if (vs->diag_ind == -1) {
247: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
248: }
249: }
250: return(0);
251: }
255: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,PetscInt N,const PetscInt rz[],PetscScalar diag)
256: {
257: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
258: BSspmat *l = a->A;
260: int i,m = A->rmap.n - 1,col,base=A->rmap.rstart;
263: if (a->keepzeroedrows) {
264: for (i=0; i<N; i++) {
265: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
266: PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
267: if (diag != 0.0) {
268: col=rz[i]+base;
269: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
270: }
271: }
272: } else {
273: if (diag != 0.0) {
274: for (i=0; i<N; i++) {
275: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
276: if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
277: l->rows[rz[i]]->length = 1;
278: l->rows[rz[i]]->nz[0] = diag;
279: l->rows[rz[i]]->col[0] = A->rmap.rstart + rz[i];
280: } else {
281: col=rz[i]+base;
282: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
283: }
284: }
285: } else {
286: for (i=0; i<N; i++) {
287: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
288: l->rows[rz[i]]->length = 0;
289: }
290: }
291: A->same_nonzero = PETSC_FALSE;
292: }
293: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
294: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
295: return(0);
296: }
300: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
301: {
302: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
303: BSsprow *vs,**rs;
304: PetscScalar *xv;
305: PetscReal sum = 0.0;
307: int *xi,nz,i,j;
310: rs = mat->A->rows;
311: if (type == NORM_FROBENIUS) {
312: for (i=0; i<A->rmap.n; i++) {
313: vs = *rs++;
314: nz = vs->length;
315: xv = vs->nz;
316: while (nz--) {
317: #if defined(PETSC_USE_COMPLEX)
318: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
319: #else
320: sum += (*xv)*(*xv); xv++;
321: #endif
322: }
323: }
324: *norm = sqrt(sum);
325: } else if (type == NORM_1) { /* max column norm */
326: PetscReal *tmp;
327: PetscMalloc(A->cmap.n*sizeof(PetscReal),&tmp);
328: PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));
329: *norm = 0.0;
330: for (i=0; i<A->rmap.n; i++) {
331: vs = *rs++;
332: nz = vs->length;
333: xi = vs->col;
334: xv = vs->nz;
335: while (nz--) {
336: tmp[*xi] += PetscAbsScalar(*xv);
337: xi++; xv++;
338: }
339: }
340: for (j=0; j<A->rmap.n; j++) {
341: if (tmp[j] > *norm) *norm = tmp[j];
342: }
343: PetscFree(tmp);
344: } else if (type == NORM_INFINITY) { /* max row norm */
345: *norm = 0.0;
346: for (i=0; i<A->rmap.n; i++) {
347: vs = *rs++;
348: nz = vs->length;
349: xv = vs->nz;
350: sum = 0.0;
351: while (nz--) {
352: sum += PetscAbsScalar(*xv); xv++;
353: }
354: if (sum > *norm) *norm = sum;
355: }
356: } else {
357: SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
358: }
359: return(0);
360: }
362: /* ----------------------------------------------------------------- */
366: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
367: {
368: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
370: int i,j,row,col,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
371: PetscTruth roworiented = a->roworiented;
374: /* Note: There's no need to "unscale" the matrix, since scaling is
375: confined to a->pA, and we're working with a->A here */
376: for (i=0; i<m; i++) {
377: if (im[i] < 0) continue;
378: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->rmap.N-1);
379: if (im[i] >= rstart && im[i] < rend) {
380: row = im[i] - rstart;
381: for (j=0; j<n; j++) {
382: if (in[j] < 0) continue;
383: if (in[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->cmap.N-1);
384: if (in[j] >= 0 && in[j] < mat->cmap.N){
385: col = in[j];
386: if (roworiented) {
387: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
388: } else {
389: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
390: }
391: } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
392: }
393: } else {
394: if (!a->donotstash) {
395: if (roworiented) {
396: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
397: } else {
398: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
399: }
400: }
401: }
402: }
403: return(0);
404: }
408: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
409: {
410: MPI_Comm comm = mat->comm;
412: int nstash,reallocs;
413: InsertMode addv;
416: /* Note: There's no need to "unscale" the matrix, since scaling is
417: confined to a->pA, and we're working with a->A here */
419: /* make sure all processors are either in INSERTMODE or ADDMODE */
420: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
421: if (addv == (ADD_VALUES|INSERT_VALUES)) {
422: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
423: }
424: mat->insertmode = addv; /* in case this processor had no cache */
426: MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
427: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
428: PetscInfo2(0,"Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
429: return(0);
430: }
434: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
435: {
436: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
438: int i,j;
439: PetscTruth iascii;
440: BSspmat *A = a->A;
441: BSsprow **rs = A->rows;
442: PetscViewerFormat format;
445: PetscViewerGetFormat(viewer,&format);
446: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
448: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
449: int ind_l,ind_g,clq_l,clq_g,color;
450: ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
451: ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
452: clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
453: clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
454: color = BSnum_colors(a->pA);CHKERRBS(0);
455: PetscViewerASCIIPrintf(viewer," %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
456: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
457: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
458: for (i=0; i<A->num_rows; i++) {
459: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap.rstart);
460: for (j=0; j<rs[i]->length; j++) {
461: if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
462: }
463: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
464: }
465: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
466: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
467: } else {
468: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
469: for (i=0; i<A->num_rows; i++) {
470: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap.rstart);
471: for (j=0; j<rs[i]->length; j++) {
472: PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
473: }
474: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
475: }
476: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
477: }
478: PetscViewerFlush(viewer);
479: return(0);
480: }
484: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
485: {
486: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
488: PetscMPIInt rank,size;
489: PetscInt i,M,m,*sbuff,*rowlengths;
490: PetscInt *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
491: BSspmat *A = a->A;
492: BSsprow **rs = A->rows;
493: MPI_Comm comm = mat->comm;
494: MPI_Status status;
495: PetscScalar *vals;
496: MatInfo info;
499: MPI_Comm_size(comm,&size);
500: MPI_Comm_rank(comm,&rank);
502: M = mat->rmap.N; m = mat->rmap.n;
503: /* First gather together on the first processor the lengths of
504: each row, and write them out to the file */
505: PetscMalloc(m*sizeof(int),&sbuff);
506: for (i=0; i<A->num_rows; i++) {
507: sbuff[i] = rs[i]->length;
508: }
509: MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
510: if (!rank) {
511: PetscViewerBinaryGetDescriptor(viewer,&fd);
512: PetscMalloc((4+M)*sizeof(int),&rowlengths);
513: PetscMalloc(size*sizeof(int),&recvcts);
514: recvdisp = mat->rmap.range;
515: for (i=0; i<size; i++) {
516: recvcts[i] = recvdisp[i+1] - recvdisp[i];
517: }
518: /* first four elements of rowlength are the header */
519: rowlengths[0] = mat->cookie;
520: rowlengths[1] = mat->rmap.N;
521: rowlengths[2] = mat->cmap.N;
522: rowlengths[3] = (int)info.nz_used;
523: MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
524: PetscFree(sbuff);
525: PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
526: /* count the number of nonzeros on each processor */
527: PetscMemzero(recvcts,size*sizeof(int));
528: for (i=0; i<size; i++) {
529: for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
530: recvcts[i] += rowlengths[j+3];
531: }
532: }
533: /* allocate buffer long enough to hold largest one */
534: maxnz = 0;
535: for (i=0; i<size; i++) {
536: maxnz = PetscMax(maxnz,recvcts[i]);
537: }
538: PetscFree(rowlengths);
539: PetscFree(recvcts);
540: PetscMalloc(maxnz*sizeof(int),&cols);
542: /* binary store column indices for 0th processor */
543: nz = 0;
544: for (i=0; i<A->num_rows; i++) {
545: for (j=0; j<rs[i]->length; j++) {
546: cols[nz++] = rs[i]->col[j];
547: }
548: }
549: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
551: /* receive and store column indices for all other processors */
552: for (i=1; i<size; i++) {
553: /* should tell processor that I am now ready and to begin the send */
554: MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
555: MPI_Get_count(&status,MPI_INT,&nz);
556: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
557: }
558: PetscFree(cols);
559: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
561: /* binary store values for 0th processor */
562: nz = 0;
563: for (i=0; i<A->num_rows; i++) {
564: for (j=0; j<rs[i]->length; j++) {
565: vals[nz++] = rs[i]->nz[j];
566: }
567: }
568: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
570: /* receive and store nonzeros for all other processors */
571: for (i=1; i<size; i++) {
572: /* should tell processor that I am now ready and to begin the send */
573: MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
574: MPI_Get_count(&status,MPIU_SCALAR,&nz);
575: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
576: }
577: PetscFree(vals);
578: } else {
579: MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
580: PetscFree(sbuff);
582: /* count local nonzeros */
583: nz = 0;
584: for (i=0; i<A->num_rows; i++) {
585: for (j=0; j<rs[i]->length; j++) {
586: nz++;
587: }
588: }
589: /* copy into buffer column indices */
590: PetscMalloc(nz*sizeof(int),&cols);
591: nz = 0;
592: for (i=0; i<A->num_rows; i++) {
593: for (j=0; j<rs[i]->length; j++) {
594: cols[nz++] = rs[i]->col[j];
595: }
596: }
597: /* send */ /* should wait until processor zero tells me to go */
598: MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
599: PetscFree(cols);
601: /* copy into buffer column values */
602: PetscMalloc(nz*sizeof(PetscScalar),&vals);
603: nz = 0;
604: for (i=0; i<A->num_rows; i++) {
605: for (j=0; j<rs[i]->length; j++) {
606: vals[nz++] = rs[i]->nz[j];
607: }
608: }
609: /* send */ /* should wait until processor zero tells me to go */
610: MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
611: PetscFree(vals);
612: }
614: return(0);
615: }
619: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
620: {
621: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
623: PetscTruth iascii,isbinary;
626: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
627: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
628: if (!bsif->blocksolveassembly) {
629: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
630: }
631: if (iascii) {
632: MatView_MPIRowbs_ASCII(mat,viewer);
633: } else if (isbinary) {
634: MatView_MPIRowbs_Binary(mat,viewer);
635: } else {
636: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
637: }
638: return(0);
639: }
640:
643: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
644: {
645: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
646: BSspmat *A = a->A;
647: BSsprow *vs;
648: int size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
649: int msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
651: int ctr_j,*sbuf1_j,k;
652: PetscScalar val=0.0;
653: MPI_Comm comm;
654: MPI_Request *s_waits1,*r_waits1;
655: MPI_Status *s_status,*r_status;
658: comm = mat->comm;
659: tag = mat->tag;
660: size = a->size;
661: rank = a->rank;
662: M = mat->rmap.N;
663: rstart = mat->rmap.rstart;
665: PetscMalloc(M*sizeof(int),&rtable);
666: /* Create hash table for the mapping :row -> proc */
667: for (i=0,j=0; i<size; i++) {
668: len = mat->rmap.range[i+1];
669: for (; j<len; j++) {
670: rtable[j] = i;
671: }
672: }
674: /* Evaluate communication - mesg to whom, length of mesg, and buffer space
675: required. Based on this, buffers are allocated, and data copied into them. */
676: PetscMalloc(size*4*sizeof(int),&w1);/* mesg size */
677: w3 = w1 + 2*size; /* no of IS that needs to be sent to proc i */
678: w4 = w3 + size; /* temp work space used in determining w1, w3 */
679: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */
681: for (i=0; i<mat->rmap.n; i++) {
682: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
683: vs = A->rows[i];
684: for (j=0; j<vs->length; j++) {
685: proc = rtable[vs->col[j]];
686: w4[proc]++;
687: }
688: for (j=0; j<size; j++) {
689: if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
690: }
691: }
692:
693: nrqs = 0; /* number of outgoing messages */
694: msz = 0; /* total mesg length (for all proc */
695: w1[2*rank] = 0; /* no mesg sent to itself */
696: w3[rank] = 0;
697: for (i=0; i<size; i++) {
698: if (w1[2*i]) {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
699: }
700: /* pa - is list of processors to communicate with */
701: PetscMalloc((nrqs+1)*sizeof(int),&pa);
702: for (i=0,j=0; i<size; i++) {
703: if (w1[2*i]) {pa[j] = i; j++;}
704: }
706: /* Each message would have a header = 1 + 2*(no of ROWS) + data */
707: for (i=0; i<nrqs; i++) {
708: j = pa[i];
709: w1[2*j] += w1[2*j+1] + 2*w3[j];
710: msz += w1[2*j];
711: }
712:
713: /* Do a global reduction to determine how many messages to expect */
714: PetscMaxSum(comm,w1,&bsz,&nrqr);
716: /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
717: len = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
718: PetscMalloc(len,&rbuf1);
719: rbuf1[0] = (int*)(rbuf1 + nrqr);
720: for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;
722: /* Post the receives */
723: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
724: for (i=0; i<nrqr; ++i){
725: MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
726: }
727:
728: /* Allocate Memory for outgoing messages */
729: len = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
730: PetscMalloc(len,&sbuf1);
731: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
732: PetscMemzero(sbuf1,2*size*sizeof(int*));
733: tmp = (int*)(sbuf1 + 2*size);
734: ctr = tmp + msz;
736: {
737: int *iptr = tmp,ict = 0;
738: for (i=0; i<nrqs; i++) {
739: j = pa[i];
740: iptr += ict;
741: sbuf1[j] = iptr;
742: ict = w1[2*j];
743: }
744: }
746: /* Form the outgoing messages */
747: /* Clean up the header space */
748: for (i=0; i<nrqs; i++) {
749: j = pa[i];
750: sbuf1[j][0] = 0;
751: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
752: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
753: }
755: /* Parse the matrix and copy the data into sbuf1 */
756: for (i=0; i<mat->rmap.n; i++) {
757: PetscMemzero(ctr,size*sizeof(int));
758: vs = A->rows[i];
759: for (j=0; j<vs->length; j++) {
760: col = vs->col[j];
761: proc = rtable[col];
762: if (proc != rank) { /* copy to the outgoing buffer */
763: ctr[proc]++;
764: *ptr[proc] = col;
765: ptr[proc]++;
766: } else {
767: row = col - rstart;
768: col = i + rstart;
769: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
770: }
771: }
772: /* Update the headers for the current row */
773: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
774: if ((ctr_j = ctr[j])) {
775: sbuf1_j = sbuf1[j];
776: k = ++sbuf1_j[0];
777: sbuf1_j[2*k] = ctr_j;
778: sbuf1_j[2*k-1] = i + rstart;
779: }
780: }
781: }
782: /* Check Validity of the outgoing messages */
783: {
784: int sum;
785: for (i=0 ; i<nrqs ; i++) {
786: j = pa[i];
787: if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
788: }
790: for (i=0 ; i<nrqs ; i++) {
791: j = pa[i];
792: sum = 1;
793: for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
794: if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
795: }
796: }
797:
798: /* Now post the sends */
799: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
800: for (i=0; i<nrqs; ++i) {
801: j = pa[i];
802: MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
803: }
804:
805: /* Receive messages*/
806: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
807: for (i=0; i<nrqr; ++i) {
808: MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
809: /* Process the Message */
810: {
811: int *rbuf1_i,n_row,ct1;
813: rbuf1_i = rbuf1[idx];
814: n_row = rbuf1_i[0];
815: ct1 = 2*n_row+1;
816: val = 0.0;
817: /* Optimise this later */
818: for (j=1; j<=n_row; j++) {
819: col = rbuf1_i[2*j-1];
820: for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
821: row = rbuf1_i[ct1] - rstart;
822: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
823: }
824: }
825: }
826: }
828: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
829: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}
831: PetscFree(rtable);
832: PetscFree(w1);
833: PetscFree(pa);
834: PetscFree(rbuf1);
835: PetscFree(sbuf1);
836: PetscFree(r_waits1);
837: PetscFree(s_waits1);
838: PetscFree(r_status);
839: PetscFree(s_status);
840: return(0);
841: }
843: /*
844: This does the BlockSolve portion of the matrix assembly.
845: It is provided in a separate routine so that users can
846: operate on the matrix (using MatScale(), MatShift() etc.) after
847: the matrix has been assembled but before BlockSolve has sucked it
848: in and devoured it.
849: */
852: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
853: {
854: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
856: int ldim,low,high,i;
857: PetscScalar *diag;
860: if ((mat->was_assembled) && (!mat->same_nonzero)) { /* Free the old info */
861: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
862: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
863: }
865: if ((!mat->same_nonzero) || (!mat->was_assembled)) {
866: /* Indicates bypassing cliques in coloring */
867: if (a->bs_color_single) {
868: BSctx_set_si(a->procinfo,100);
869: }
870: /* Form permuted matrix for efficient parallel execution */
871: a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
872: /* Set up the communication */
873: a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
874: } else {
875: /* Repermute the matrix */
876: BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
877: }
879: /* Symmetrically scale the matrix by the diagonal */
880: BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);
882: /* Store inverse of square root of permuted diagonal scaling matrix */
883: VecGetLocalSize(a->diag,&ldim);
884: VecGetOwnershipRange(a->diag,&low,&high);
885: VecGetArray(a->diag,&diag);
886: for (i=0; i<ldim; i++) {
887: if (a->pA->scale_diag[i] != 0.0) {
888: diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
889: } else {
890: diag[i] = 1.0;
891: }
892: }
893: VecRestoreArray(a->diag,&diag);
894: a->assembled_icc_storage = a->A->icc_storage;
895: a->blocksolveassembly = 1;
896: mat->was_assembled = PETSC_TRUE;
897: mat->same_nonzero = PETSC_TRUE;
898: PetscInfo(mat,"Completed BlockSolve95 matrix assembly\n");
899: return(0);
900: }
904: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
905: {
906: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
908: int i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
909: PetscScalar *vals,val;
910: InsertMode addv = mat->insertmode;
913: while (1) {
914: MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
915: if (!flg) break;
916:
917: for (i=0; i<n;) {
918: /* Now identify the consecutive vals belonging to the same row */
919: for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
920: if (j < n) ncols = j-i;
921: else ncols = n-i;
922: /* Now assemble all these values with a single function call */
923: MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
924: i = j;
925: }
926: }
927: MatStashScatterEnd_Private(&mat->stash);
929: rstart = mat->rmap.rstart;
930: nzcount = a->nz; /* This is the number of nonzeros entered by the user */
931: /* BlockSolve requires that the matrix is structurally symmetric */
932: if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
933: MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
934: }
935:
936: /* BlockSolve requires that all the diagonal elements are set */
937: val = 0.0;
938: for (i=0; i<mat->rmap.n; i++) {
939: row = i; col = i + rstart;
940: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
941: }
942:
943: MatAssemblyBegin_MPIRowbs_local(mat,mode);
944: MatAssemblyEnd_MPIRowbs_local(mat,mode);
945:
946: a->blocksolveassembly = 0;
947: PetscInfo4(mat,"Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->rmap.n,mat->cmap.n,a->maxnz-a->nz,a->nz);
948: PetscInfo2(mat,"User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
949: PetscInfo1(mat,"Number of mallocs during MatSetValues is %d\n",a->reallocs);
950: return(0);
951: }
955: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
956: {
957: Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
958: BSspmat *A = l->A;
959: BSsprow *vs;
960: int i,j;
963: for (i=0; i <mat->rmap.n; i++) {
964: vs = A->rows[i];
965: for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
966: }
967: return(0);
968: }
970: /* the code does not do the diagonal entries correctly unless the
971: matrix is square and the column and row owerships are identical.
972: This is a BUG.
973: */
977: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
978: {
979: Mat_MPIRowbs *l = (Mat_MPIRowbs*)A->data;
981: int i,*owners = A->rmap.range,size = l->size;
982: int *nprocs,j,idx,nsends;
983: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
984: int *rvalues,tag = A->tag,count,base,slen,n,*source;
985: int *lens,imdex,*lrows,*values;
986: MPI_Comm comm = A->comm;
987: MPI_Request *send_waits,*recv_waits;
988: MPI_Status recv_status,*send_status;
989: PetscTruth found;
992: /* first count number of contributors to each processor */
993: PetscMalloc(2*size*sizeof(int),&nprocs);
994: PetscMemzero(nprocs,2*size*sizeof(int));
995: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
996: for (i=0; i<N; i++) {
997: idx = rows[i];
998: found = PETSC_FALSE;
999: for (j=0; j<size; j++) {
1000: if (idx >= owners[j] && idx < owners[j+1]) {
1001: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1002: }
1003: }
1004: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1005: }
1006: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
1008: /* inform other processors of number of messages and max length*/
1009: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1011: /* post receives: */
1012: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1013: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1014: for (i=0; i<nrecvs; i++) {
1015: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1016: }
1018: /* do sends:
1019: 1) starts[i] gives the starting index in svalues for stuff going to
1020: the ith processor
1021: */
1022: PetscMalloc((N+1)*sizeof(int),&svalues);
1023: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1024: PetscMalloc((size+1)*sizeof(int),&starts);
1025: starts[0] = 0;
1026: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1027: for (i=0; i<N; i++) {
1028: svalues[starts[owner[i]]++] = rows[i];
1029: }
1031: starts[0] = 0;
1032: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1033: count = 0;
1034: for (i=0; i<size; i++) {
1035: if (nprocs[2*i+1]) {
1036: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1037: }
1038: }
1039: PetscFree(starts);
1041: base = owners[rank];
1043: /* wait on receives */
1044: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1045: source = lens + nrecvs;
1046: count = nrecvs; slen = 0;
1047: while (count) {
1048: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1049: /* unpack receives into our local space */
1050: MPI_Get_count(&recv_status,MPI_INT,&n);
1051: source[imdex] = recv_status.MPI_SOURCE;
1052: lens[imdex] = n;
1053: slen += n;
1054: count--;
1055: }
1056: PetscFree(recv_waits);
1057:
1058: /* move the data into the send scatter */
1059: PetscMalloc((slen+1)*sizeof(int),&lrows);
1060: count = 0;
1061: for (i=0; i<nrecvs; i++) {
1062: values = rvalues + i*nmax;
1063: for (j=0; j<lens[i]; j++) {
1064: lrows[count++] = values[j] - base;
1065: }
1066: }
1067: PetscFree(rvalues);
1068: PetscFree(lens);
1069: PetscFree(owner);
1070: PetscFree(nprocs);
1071:
1072: /* actually zap the local rows */
1073: MatZeroRows_MPIRowbs_local(A,slen,lrows,diag);
1074: PetscFree(lrows);
1076: /* wait on sends */
1077: if (nsends) {
1078: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1079: MPI_Waitall(nsends,send_waits,send_status);
1080: PetscFree(send_status);
1081: }
1082: PetscFree(send_waits);
1083: PetscFree(svalues);
1085: return(0);
1086: }
1090: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1091: {
1092: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1093: BSsprow *vs,**rs;
1094: PetscScalar *xv;
1095: PetscReal sum = 0.0;
1097: int *xi,nz,i,j;
1100: if (a->size == 1) {
1101: MatNorm_MPIRowbs_local(mat,type,norm);
1102: } else {
1103: rs = a->A->rows;
1104: if (type == NORM_FROBENIUS) {
1105: for (i=0; i<mat->rmap.n; i++) {
1106: vs = *rs++;
1107: nz = vs->length;
1108: xv = vs->nz;
1109: while (nz--) {
1110: #if defined(PETSC_USE_COMPLEX)
1111: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1112: #else
1113: sum += (*xv)*(*xv); xv++;
1114: #endif
1115: }
1116: }
1117: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1118: *norm = sqrt(*norm);
1119: } else if (type == NORM_1) { /* max column norm */
1120: PetscReal *tmp,*tmp2;
1121: PetscMalloc(mat->cmap.n*sizeof(PetscReal),&tmp);
1122: PetscMalloc(mat->cmap.n*sizeof(PetscReal),&tmp2);
1123: PetscMemzero(tmp,mat->cmap.n*sizeof(PetscReal));
1124: *norm = 0.0;
1125: for (i=0; i<mat->rmap.n; i++) {
1126: vs = *rs++;
1127: nz = vs->length;
1128: xi = vs->col;
1129: xv = vs->nz;
1130: while (nz--) {
1131: tmp[*xi] += PetscAbsScalar(*xv);
1132: xi++; xv++;
1133: }
1134: }
1135: MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);
1136: for (j=0; j<mat->cmap.n; j++) {
1137: if (tmp2[j] > *norm) *norm = tmp2[j];
1138: }
1139: PetscFree(tmp);
1140: PetscFree(tmp2);
1141: } else if (type == NORM_INFINITY) { /* max row norm */
1142: PetscReal ntemp = 0.0;
1143: for (i=0; i<mat->rmap.n; i++) {
1144: vs = *rs++;
1145: nz = vs->length;
1146: xv = vs->nz;
1147: sum = 0.0;
1148: while (nz--) {
1149: sum += PetscAbsScalar(*xv); xv++;
1150: }
1151: if (sum > ntemp) ntemp = sum;
1152: }
1153: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1154: } else {
1155: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1156: }
1157: }
1158: return(0);
1159: }
1163: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1164: {
1165: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1166: BSprocinfo *bspinfo = bsif->procinfo;
1167: PetscScalar *xxa,*xworka,*yya;
1171: if (!bsif->blocksolveassembly) {
1172: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1173: }
1175: /* Permute and apply diagonal scaling: [ xwork = D^{1/2} * x ] */
1176: if (!bsif->vecs_permscale) {
1177: VecGetArray(bsif->xwork,&xworka);
1178: VecGetArray(xx,&xxa);
1179: BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1180: VecRestoreArray(bsif->xwork,&xworka);
1181: VecRestoreArray(xx,&xxa);
1182: VecPointwiseDivide(xx,bsif->xwork,bsif->diag);
1183: }
1185: VecGetArray(xx,&xxa);
1186: VecGetArray(yy,&yya);
1187: /* Do lower triangular multiplication: [ y = L * xwork ] */
1188: if (bspinfo->single) {
1189: BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1190: } else {
1191: BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1192: }
1193:
1194: /* Do upper triangular multiplication: [ y = y + L^{T} * xwork ] */
1195: if (mat->symmetric) {
1196: if (bspinfo->single){
1197: BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1198: } else {
1199: BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1200: }
1201: }
1202: /* not needed for ILU version since forward does it all */
1203: VecRestoreArray(xx,&xxa);
1204: VecRestoreArray(yy,&yya);
1206: /* Apply diagonal scaling to vector: [ y = D^{1/2} * y ] */
1207: if (!bsif->vecs_permscale) {
1208: VecGetArray(bsif->xwork,&xworka);
1209: VecGetArray(xx,&xxa);
1210: BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1211: VecRestoreArray(bsif->xwork,&xworka);
1212: VecRestoreArray(xx,&xxa);
1213: VecPointwiseDivide(bsif->xwork,yy,bsif->diag);
1214: VecGetArray(bsif->xwork,&xworka);
1215: VecGetArray(yy,&yya);
1216: BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1217: VecRestoreArray(bsif->xwork,&xworka);
1218: VecRestoreArray(yy,&yya);
1219: }
1220: PetscLogFlops(2*bsif->nz - mat->cmap.n);
1222: return(0);
1223: }
1227: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1228: {
1230: PetscScalar one = 1.0;
1233: (*mat->ops->mult)(mat,xx,zz);
1234: VecAXPY(zz,one,yy);
1235: return(0);
1236: }
1240: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1241: {
1242: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1243: PetscReal isend[5],irecv[5];
1247: info->rows_global = (double)A->rmap.N;
1248: info->columns_global = (double)A->cmap.N;
1249: info->rows_local = (double)A->cmap.n;
1250: info->columns_local = (double)A->rmap.n;
1251: info->block_size = 1.0;
1252: info->mallocs = (double)mat->reallocs;
1253: isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] = mat->maxnz - mat->nz;
1254: isend[3] = A->mem; isend[4] = info->mallocs;
1256: if (flag == MAT_LOCAL) {
1257: info->nz_used = isend[0];
1258: info->nz_allocated = isend[1];
1259: info->nz_unneeded = isend[2];
1260: info->memory = isend[3];
1261: info->mallocs = isend[4];
1262: } else if (flag == MAT_GLOBAL_MAX) {
1263: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1264: info->nz_used = irecv[0];
1265: info->nz_allocated = irecv[1];
1266: info->nz_unneeded = irecv[2];
1267: info->memory = irecv[3];
1268: info->mallocs = irecv[4];
1269: } else if (flag == MAT_GLOBAL_SUM) {
1270: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1271: info->nz_used = irecv[0];
1272: info->nz_allocated = irecv[1];
1273: info->nz_unneeded = irecv[2];
1274: info->memory = irecv[3];
1275: info->mallocs = irecv[4];
1276: }
1277: return(0);
1278: }
1282: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1283: {
1284: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1285: BSsprow **rs = a->A->rows;
1287: int i,n;
1288: PetscScalar *x,zero = 0.0;
1291: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1292: if (!a->blocksolveassembly) {
1293: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1294: }
1296: VecSet(v,zero);
1297: VecGetLocalSize(v,&n);
1298: if (n != mat->rmap.n) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1299: VecGetArray(v,&x);
1300: for (i=0; i<mat->rmap.n; i++) {
1301: x[i] = rs[i]->nz[rs[i]->diag_ind];
1302: }
1303: VecRestoreArray(v,&x);
1304: return(0);
1305: }
1309: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1310: {
1311: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1312: BSspmat *A = a->A;
1313: BSsprow *vs;
1315: int i;
1318: #if defined(PETSC_USE_LOG)
1319: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap.N,mat->cmap.N);
1320: #endif
1321: MatStashDestroy_Private(&mat->stash);
1322: if (a->bsmap) {
1323: PetscFree(a->bsmap->vlocal2global);
1324: PetscFree(a->bsmap->vglobal2local);
1325: if (a->bsmap->vglobal2proc) (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1326: PetscFree(a->bsmap);
1327: }
1329: if (A) {
1330: for (i=0; i<mat->rmap.n; i++) {
1331: vs = A->rows[i];
1332: MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1333: }
1334: /* Note: A->map = a->bsmap is freed above */
1335: PetscFree(A->rows);
1336: PetscFree(A);
1337: }
1338: if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1339: if (a->diag) {VecDestroy(a->diag);}
1340: if (a->xwork) {VecDestroy(a->xwork);}
1341: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
1342: if (a->fpA) {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1343: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1344: if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1345: PetscFree(a->imax);
1346: MPI_Comm_free(&(a->comm_mpirowbs));
1347: PetscFree(a);
1348: PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1349: return(0);
1350: }
1354: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op)
1355: {
1356: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
1360: switch (op) {
1361: case MAT_ROW_ORIENTED:
1362: a->roworiented = PETSC_TRUE;
1363: break;
1364: case MAT_COLUMN_ORIENTED:
1365: a->roworiented = PETSC_FALSE;
1366: break;
1367: case MAT_COLUMNS_SORTED:
1368: a->sorted = 1;
1369: break;
1370: case MAT_COLUMNS_UNSORTED:
1371: a->sorted = 0;
1372: break;
1373: case MAT_NO_NEW_NONZERO_LOCATIONS:
1374: a->nonew = 1;
1375: break;
1376: case MAT_YES_NEW_NONZERO_LOCATIONS:
1377: a->nonew = 0;
1378: break;
1379: case MAT_DO_NOT_USE_INODES:
1380: a->bs_color_single = 1;
1381: break;
1382: case MAT_YES_NEW_DIAGONALS:
1383: case MAT_ROWS_SORTED:
1384: case MAT_NEW_NONZERO_LOCATION_ERR:
1385: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1386: case MAT_ROWS_UNSORTED:
1387: case MAT_USE_HASH_TABLE:
1388: PetscInfo(A,"Option ignored\n");
1389: break;
1390: case MAT_IGNORE_OFF_PROC_ENTRIES:
1391: a->donotstash = PETSC_TRUE;
1392: break;
1393: case MAT_NO_NEW_DIAGONALS:
1394: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1395: break;
1396: case MAT_KEEP_ZEROED_ROWS:
1397: a->keepzeroedrows = PETSC_TRUE;
1398: break;
1399: case MAT_SYMMETRIC:
1400: BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1401: break;
1402: case MAT_STRUCTURALLY_SYMMETRIC:
1403: case MAT_NOT_SYMMETRIC:
1404: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1405: case MAT_HERMITIAN:
1406: case MAT_NOT_HERMITIAN:
1407: case MAT_SYMMETRY_ETERNAL:
1408: case MAT_NOT_SYMMETRY_ETERNAL:
1409: break;
1410: default:
1411: SETERRQ(PETSC_ERR_SUP,"unknown option");
1412: break;
1413: }
1414: return(0);
1415: }
1419: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1420: {
1421: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1422: BSspmat *A = mat->A;
1423: BSsprow *rs;
1424:
1426: if (row < AA->rmap.rstart || row >= AA->rmap.rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1428: rs = A->rows[row - AA->rmap.rstart];
1429: *nz = rs->length;
1430: if (v) *v = rs->nz;
1431: if (idx) *idx = rs->col;
1432: return(0);
1433: }
1437: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1438: {
1440: return(0);
1441: }
1443: /* ------------------------------------------------------------------ */
1447: PetscErrorCode MatPrintHelp_MPIRowbs(Mat A)
1448: {
1449: static PetscTruth called = PETSC_FALSE;
1450: MPI_Comm comm = A->comm;
1454: if (called) {return(0);} else called = PETSC_TRUE;
1455: (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):\n");
1456: (*PetscHelpPrintf)(comm," -mat_rowbs_no_inode - Do not use inodes\n");
1457: return(0);
1458: }
1462: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1463: {
1467: MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1468: return(0);
1469: }
1471: /* -------------------------------------------------------------------*/
1472: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1473: MatGetRow_MPIRowbs,
1474: MatRestoreRow_MPIRowbs,
1475: MatMult_MPIRowbs,
1476: /* 4*/ MatMultAdd_MPIRowbs,
1477: MatMult_MPIRowbs,
1478: MatMultAdd_MPIRowbs,
1479: MatSolve_MPIRowbs,
1480: 0,
1481: 0,
1482: /*10*/ 0,
1483: 0,
1484: 0,
1485: 0,
1486: 0,
1487: /*15*/ MatGetInfo_MPIRowbs,
1488: 0,
1489: MatGetDiagonal_MPIRowbs,
1490: 0,
1491: MatNorm_MPIRowbs,
1492: /*20*/ MatAssemblyBegin_MPIRowbs,
1493: MatAssemblyEnd_MPIRowbs,
1494: 0,
1495: MatSetOption_MPIRowbs,
1496: MatZeroEntries_MPIRowbs,
1497: /*25*/ MatZeroRows_MPIRowbs,
1498: 0,
1499: MatLUFactorNumeric_MPIRowbs,
1500: 0,
1501: MatCholeskyFactorNumeric_MPIRowbs,
1502: /*30*/ MatSetUpPreallocation_MPIRowbs,
1503: MatILUFactorSymbolic_MPIRowbs,
1504: MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1505: 0,
1506: 0,
1507: /*35*/ 0,
1508: MatForwardSolve_MPIRowbs,
1509: MatBackwardSolve_MPIRowbs,
1510: 0,
1511: 0,
1512: /*40*/ 0,
1513: MatGetSubMatrices_MPIRowbs,
1514: 0,
1515: 0,
1516: 0,
1517: /*45*/ MatPrintHelp_MPIRowbs,
1518: MatScale_MPIRowbs,
1519: 0,
1520: 0,
1521: 0,
1522: /*50*/ 0,
1523: 0,
1524: 0,
1525: 0,
1526: 0,
1527: /*55*/ 0,
1528: 0,
1529: 0,
1530: 0,
1531: 0,
1532: /*60*/ MatGetSubMatrix_MPIRowbs,
1533: MatDestroy_MPIRowbs,
1534: MatView_MPIRowbs,
1535: 0,
1536: MatUseScaledForm_MPIRowbs,
1537: /*65*/ MatScaleSystem_MPIRowbs,
1538: MatUnScaleSystem_MPIRowbs,
1539: 0,
1540: 0,
1541: 0,
1542: /*70*/ 0,
1543: 0,
1544: 0,
1545: 0,
1546: 0,
1547: /*75*/ 0,
1548: 0,
1549: 0,
1550: 0,
1551: 0,
1552: /*80*/ 0,
1553: 0,
1554: 0,
1555: 0,
1556: MatLoad_MPIRowbs,
1557: /*85*/ 0,
1558: 0,
1559: 0,
1560: 0,
1561: 0,
1562: /*90*/ 0,
1563: 0,
1564: 0,
1565: 0,
1566: 0,
1567: /*95*/ 0,
1568: 0,
1569: 0,
1570: 0};
1572: /* ------------------------------------------------------------------- */
1577: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1578: {
1582: mat->preallocated = PETSC_TRUE;
1583: MatCreateMPIRowbs_local(mat,nz,nnz);
1584: return(0);
1585: }
1588: /*MC
1589: MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1590: with the external package BlockSolve95. If BlockSolve95 is installed (see the manual for instructions
1591: on how to declare the existence of external packages), a matrix type can be constructed which invokes
1592: BlockSolve95 preconditioners and solvers.
1594: Options Database Keys:
1595: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()
1597: Level: beginner
1599: .seealso: MatCreateMPIRowbs
1600: M*/
1605: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIRowbs(Mat A)
1606: {
1607: Mat_MPIRowbs *a;
1608: BSmapping *bsmap;
1609: BSoff_map *bsoff;
1611: int *offset,m,M;
1612: PetscTruth flg1,flg2,flg3;
1613: BSprocinfo *bspinfo;
1614: MPI_Comm comm;
1615:
1617: comm = A->comm;
1619: PetscNew(Mat_MPIRowbs,&a);
1620: A->data = (void*)a;
1621: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1622: A->factor = 0;
1623: A->mapping = 0;
1624: a->vecs_permscale = PETSC_FALSE;
1625: A->insertmode = NOT_SET_VALUES;
1626: a->blocksolveassembly = 0;
1627: a->keepzeroedrows = PETSC_FALSE;
1629: MPI_Comm_rank(comm,&a->rank);
1630: MPI_Comm_size(comm,&a->size);
1633: PetscMapInitialize(comm,&A->rmap);
1634: PetscMapInitialize(comm,&A->cmap);
1635: m = A->rmap.n;
1636: M = A->rmap.N;
1638: PetscMalloc((A->rmap.n+1)*sizeof(int),&a->imax);
1639: a->reallocs = 0;
1641: /* build cache for off array entries formed */
1642: MatStashCreate_Private(A->comm,1,&A->stash);
1643: a->donotstash = PETSC_FALSE;
1645: /* Initialize BlockSolve information */
1646: a->A = 0;
1647: a->pA = 0;
1648: a->comm_pA = 0;
1649: a->fpA = 0;
1650: a->comm_fpA = 0;
1651: a->alpha = 1.0;
1652: a->0;
1653: a->failures = 0;
1654: MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));
1655: VecCreateMPI(A->comm,A->rmap.n,A->rmap.N,&(a->diag));
1656: VecDuplicate(a->diag,&(a->xwork));
1657: PetscLogObjectParent(A,a->diag); PetscLogObjectParent(A,a->xwork);
1658: PetscLogObjectMemory(A,(A->rmap.n+1)*sizeof(PetscScalar));
1659: bspinfo = BScreate_ctx();CHKERRBS(0);
1660: a->procinfo = bspinfo;
1661: BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1662: BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1663: BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1664: BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1665: BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1666: BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1667: #if defined(PETSC_USE_DEBUG)
1668: BSctx_set_err(bspinfo,1);CHKERRBS(0); /* BS error checking */
1669: #endif
1670: BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1671: #if defined (PETSC_USE_INFO)
1672: PetscOptionsHasName(PETSC_NULL,"-info",&flg1);
1673: if (flg1) {
1674: BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1675: }
1676: #endif
1677: PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1678: PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1679: PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1680: if (flg1 || flg2 || flg3) {
1681: BSctx_set_si(bspinfo,1);CHKERRBS(0);
1682: } else {
1683: BSctx_set_si(bspinfo,0);CHKERRBS(0);
1684: }
1685: #if defined(PETSC_USE_LOG)
1686: MLOG_INIT(); /* Initialize logging */
1687: #endif
1689: /* Compute global offsets */
1690: offset = &A->rmap.rstart;
1692: PetscNew(BSmapping,&a->bsmap);
1693: PetscLogObjectMemory(A,sizeof(BSmapping));
1694: bsmap = a->bsmap;
1695: PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1696: *((int*)bsmap->vlocal2global) = (*offset);
1697: bsmap->flocal2global = BSloc2glob;
1698: bsmap->free_l2g = 0;
1699: PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1700: *((int*)bsmap->vglobal2local) = (*offset);
1701: bsmap->fglobal2local = BSglob2loc;
1702: bsmap->free_g2l = 0;
1703: bsoff = BSmake_off_map(*offset,bspinfo,A->rmap.N);
1704: bsmap->vglobal2proc = (void*)bsoff;
1705: bsmap->fglobal2proc = BSglob2proc;
1706: bsmap->free_g2p = (void(*)(void*)) BSfree_off_map;
1707: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1708: "MatMPIRowbsSetPreallocation_MPIRowbs",
1709: MatMPIRowbsSetPreallocation_MPIRowbs);
1710: return(0);
1711: }
1716: /* @
1717: MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros
1718: per row in the matrix.
1720: Input Parameter:
1721: + mat - matrix
1722: . nz - maximum expected for any row
1723: - nzz - number expected in each row
1725: Note:
1726: This routine is valid only for matrices stored in the MATMPIROWBS
1727: format.
1728: @ */
1729: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1730: {
1731: PetscErrorCode ierr,(*f)(Mat,int,const int[]);
1734: PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1735: if (f) {
1736: (*f)(mat,nz,nnz);
1737: }
1738: return(0);
1739: }
1741: /* --------------- extra BlockSolve-specific routines -------------- */
1744: /* @
1745: MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1746: user can then manipulate to alter the default parameters.
1748: Input Parameter:
1749: mat - matrix
1751: Output Parameter:
1752: procinfo - processor information context
1754: Note:
1755: This routine is valid only for matrices stored in the MATMPIROWBS
1756: format.
1757: @ */
1758: PetscErrorCode PETSCMAT_DLLEXPORT MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1759: {
1760: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1761: PetscTruth ismpirowbs;
1765: PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1766: if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1767: procinfo = a->procinfo;
1768: return(0);
1769: }
1773: PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,MatType type,Mat *newmat)
1774: {
1775: Mat_MPIRowbs *a;
1776: BSspmat *A;
1777: BSsprow **rs;
1778: Mat mat;
1780: int i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1781: int header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1782: PetscScalar *vals;
1783: MPI_Comm comm = ((PetscObject)viewer)->comm;
1784: MPI_Status status;
1787: MPI_Comm_size(comm,&size);
1788: MPI_Comm_rank(comm,&rank);
1789: if (!rank) {
1790: PetscViewerBinaryGetDescriptor(viewer,&fd);
1791: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1792: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1793: if (header[3] < 0) {
1794: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1795: }
1796: }
1798: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1799: M = header[1];
1801: /* determine ownership of all rows */
1802: m = M/size + ((M % size) > rank);
1803: PetscMalloc((size+2)*sizeof(int),&rowners);
1804: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1805: rowners[0] = 0;
1806: for (i=2; i<=size; i++) {
1807: rowners[i] += rowners[i-1];
1808: }
1809: rstart = rowners[rank];
1810: rend = rowners[rank+1];
1812: /* distribute row lengths to all processors */
1813: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1814: if (!rank) {
1815: PetscMalloc(M*sizeof(int),&rowlengths);
1816: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1817: PetscMalloc(size*sizeof(int),&sndcounts);
1818: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1819: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1820: PetscFree(sndcounts);
1821: } else {
1822: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1823: }
1825: /* create our matrix */
1826: MatCreate(comm,newmat);
1827: MatSetSizes(*newmat,m,m,M,M);
1828: MatSetType(*newmat,type);
1829: MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1830: mat = *newmat;
1831: PetscFree(ourlens);
1833: a = (Mat_MPIRowbs*)mat->data;
1834: A = a->A;
1835: rs = A->rows;
1837: if (!rank) {
1838: /* calculate the number of nonzeros on each processor */
1839: PetscMalloc(size*sizeof(int),&procsnz);
1840: PetscMemzero(procsnz,size*sizeof(int));
1841: for (i=0; i<size; i++) {
1842: for (j=rowners[i]; j< rowners[i+1]; j++) {
1843: procsnz[i] += rowlengths[j];
1844: }
1845: }
1846: PetscFree(rowlengths);
1848: /* determine max buffer needed and allocate it */
1849: maxnz = 0;
1850: for (i=0; i<size; i++) {
1851: maxnz = PetscMax(maxnz,procsnz[i]);
1852: }
1853: PetscMalloc(maxnz*sizeof(int),&cols);
1855: /* read in my part of the matrix column indices */
1856: nz = procsnz[0];
1857: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1858:
1859: /* insert it into my part of matrix */
1860: nz = 0;
1861: for (i=0; i<A->num_rows; i++) {
1862: for (j=0; j<a->imax[i]; j++) {
1863: rs[i]->col[j] = cols[nz++];
1864: }
1865: rs[i]->length = a->imax[i];
1866: }
1867: /* read in parts for all other processors */
1868: for (i=1; i<size; i++) {
1869: nz = procsnz[i];
1870: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1871: MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1872: }
1873: PetscFree(cols);
1874: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1876: /* read in my part of the matrix numerical values */
1877: nz = procsnz[0];
1878: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1879:
1880: /* insert it into my part of matrix */
1881: nz = 0;
1882: for (i=0; i<A->num_rows; i++) {
1883: for (j=0; j<a->imax[i]; j++) {
1884: rs[i]->nz[j] = vals[nz++];
1885: }
1886: }
1887: /* read in parts for all other processors */
1888: for (i=1; i<size; i++) {
1889: nz = procsnz[i];
1890: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1891: MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1892: }
1893: PetscFree(vals);
1894: PetscFree(procsnz);
1895: } else {
1896: /* determine buffer space needed for message */
1897: nz = 0;
1898: for (i=0; i<A->num_rows; i++) {
1899: nz += a->imax[i];
1900: }
1901: PetscMalloc(nz*sizeof(int),&cols);
1903: /* receive message of column indices*/
1904: MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1905: MPI_Get_count(&status,MPI_INT,&maxnz);
1906: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1908: /* insert it into my part of matrix */
1909: nz = 0;
1910: for (i=0; i<A->num_rows; i++) {
1911: for (j=0; j<a->imax[i]; j++) {
1912: rs[i]->col[j] = cols[nz++];
1913: }
1914: rs[i]->length = a->imax[i];
1915: }
1916: PetscFree(cols);
1917: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1919: /* receive message of values*/
1920: MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1921: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1922: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1924: /* insert it into my part of matrix */
1925: nz = 0;
1926: for (i=0; i<A->num_rows; i++) {
1927: for (j=0; j<a->imax[i]; j++) {
1928: rs[i]->nz[j] = vals[nz++];
1929: }
1930: rs[i]->length = a->imax[i];
1931: }
1932: PetscFree(vals);
1933: }
1934: PetscFree(rowners);
1935: a->nz = a->maxnz;
1936: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1937: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1938: return(0);
1939: }
1941: /*
1942: Special destroy and view routines for factored matrices
1943: */
1946: static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat)
1947: {
1949: #if defined(PETSC_USE_LOG)
1950: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap.N,mat->cmap.N);
1951: #endif
1952: return(0);
1953: }
1957: static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1958: {
1962: MatView((Mat) mat->data,viewer);
1963: return(0);
1964: }
1968: PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact)
1969: {
1970: /* Note: f is not currently used in BlockSolve */
1971: Mat newmat;
1972: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1974: PetscTruth idn;
1977: if (isrow) {
1978: ISIdentity(isrow,&idn);
1979: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1980: }
1982: if (!mat->symmetric) {
1983: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
1984: preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
1985: symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
1986: }
1988: /* If the icc_storage flag wasn't set before the last blocksolveassembly, */
1989: /* we must completely redo the assembly as a different storage format is required. */
1990: if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
1991: mat->same_nonzero = PETSC_FALSE;
1992: mbs->blocksolveassembly = 0;
1993: }
1995: if (!mbs->blocksolveassembly) {
1996: BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
1997: BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
1998: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1999: }
2001: /* Copy permuted matrix */
2002: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2003: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2005: /* Set up the communication for factorization */
2006: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2007: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2009: /*
2010: Create a new Mat structure to hold the "factored" matrix,
2011: not this merely contains a pointer to the original matrix, since
2012: the original matrix contains the factor information.
2013: */
2014: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2015: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2017: newmat->data = (void*)mat;
2018: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2019: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2020: newmat->ops->view = MatView_MPIRowbs_Factored;
2021: newmat->factor = 1;
2022: newmat->preallocated = PETSC_TRUE;
2023: PetscMapCopy(mat->comm,&mat->rmap,&newmat->rmap);
2024: PetscMapCopy(mat->comm,&mat->cmap,&newmat->cmap);
2026: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2028: *newfact = newmat;
2029: return(0);
2030: }
2034: PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact)
2035: {
2036: Mat newmat;
2037: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
2039: PetscTruth idn;
2042: if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill");
2043: if (isrow) {
2044: ISIdentity(isrow,&idn);
2045: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2046: }
2047: if (iscol) {
2048: ISIdentity(iscol,&idn);
2049: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2050: }
2052: if (!mbs->blocksolveassembly) {
2053: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2054: }
2055:
2056: /* if (mat->symmetric) { */
2057: /* SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2058: /* MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2059: /* using the option MatSetOption(A,MAT_SYMMETRIC)"); */
2060: /* } */
2062: /* Copy permuted matrix */
2063: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2064: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2066: /* Set up the communication for factorization */
2067: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2068: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2070: /*
2071: Create a new Mat structure to hold the "factored" matrix,
2072: not this merely contains a pointer to the original matrix, since
2073: the original matrix contains the factor information.
2074: */
2075: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2076: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2078: newmat->data = (void*)mat;
2079: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2080: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2081: newmat->ops->view = MatView_MPIRowbs_Factored;
2082: newmat->factor = 1;
2083: newmat->preallocated = PETSC_TRUE;
2085: PetscMapCopy(mat->comm,&mat->rmap,&newmat->rmap);
2086: PetscMapCopy(mat->comm,&mat->cmap,&newmat->cmap);
2088: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2090: *newfact = newmat;
2091: return(0);
2092: }
2096: /*@C
2097: MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2098: format. This format is intended primarily as an interface for BlockSolve95.
2100: Collective on MPI_Comm
2102: Input Parameters:
2103: + comm - MPI communicator
2104: . m - number of local rows (or PETSC_DECIDE to have calculated)
2105: . M - number of global rows (or PETSC_DECIDE to have calculated)
2106: . nz - number of nonzeros per row (same for all local rows)
2107: - nnz - number of nonzeros per row (possibly different for each row).
2109: Output Parameter:
2110: . newA - the matrix
2112: Notes:
2113: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2114: than it must be used on all processors that share the object for that argument.
2116: The user MUST specify either the local or global matrix dimensions
2117: (possibly both).
2119: Specify the preallocated storage with either nz or nnz (not both). Set
2120: nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2121: allocation.
2123: Notes:
2124: By default, the matrix is assumed to be nonsymmetric; the user can
2125: take advantage of special optimizations for symmetric matrices by calling
2126: $ MatSetOption(mat,MAT_SYMMETRIC)
2127: $ MatSetOption(mat,MAT_SYMMETRY_ETERNAL)
2128: BEFORE calling the routine MatAssemblyBegin().
2130: Internally, the MATMPIROWBS format inserts zero elements to the
2131: matrix if necessary, so that nonsymmetric matrices are considered
2132: to be symmetric in terms of their sparsity structure; this format
2133: is required for use of the parallel communication routines within
2134: BlockSolve95. In particular, if the matrix element A[i,j] exists,
2135: then PETSc will internally allocate a 0 value for the element
2136: A[j,i] during MatAssemblyEnd() if the user has not already set
2137: a value for the matrix element A[j,i].
2139: Options Database Keys:
2140: . -mat_rowbs_no_inode - Do not use inodes.
2142: Level: intermediate
2143:
2144: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve
2146: .seealso: MatCreate(), MatSetValues()
2147: @*/
2148: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2149: {
2151:
2153: MatCreate(comm,newA);
2154: MatSetSizes(*newA,m,m,M,M);
2155: MatSetType(*newA,MATMPIROWBS);
2156: MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2157: return(0);
2158: }
2161: /* -------------------------------------------------------------------------*/
2163: #include src/mat/impls/aij/seq/aij.h
2164: #include src/mat/impls/aij/mpi/mpiaij.h
2168: PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2169: {
2171: int nmax,nstages_local,nstages,i,pos,max_no;
2175: /* Allocate memory to hold all the submatrices */
2176: if (scall != MAT_REUSE_MATRIX) {
2177: PetscMalloc((ismax+1)*sizeof(Mat),submat);
2178: }
2179:
2180: /* Determine the number of stages through which submatrices are done */
2181: nmax = 20*1000000 / (C->cmap.N * sizeof(int));
2182: if (!nmax) nmax = 1;
2183: nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
2185: /* Make sure every processor loops through the nstages */
2186: MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);
2188: for (i=0,pos=0; i<nstages; i++) {
2189: if (pos+nmax <= ismax) max_no = nmax;
2190: else if (pos == ismax) max_no = 0;
2191: else max_no = ismax-pos;
2192: MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2193: pos += max_no;
2194: }
2195: return(0);
2196: }
2197: /* -------------------------------------------------------------------------*/
2198: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2199: matrix and preservs zeroes from structural symetry
2200: */
2203: PetscErrorCode MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2204: {
2205: Mat_MPIRowbs *c = (Mat_MPIRowbs *)(C->data);
2206: BSspmat *A = c->A;
2207: Mat_SeqAIJ *mat;
2209: int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2210: int **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
2211: int nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2212: int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2213: int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
2214: int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2215: int *rmap_i,tag0,tag1,tag2,tag3;
2216: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2217: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2218: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2219: MPI_Status *r_status3,*r_status4,*s_status4;
2220: MPI_Comm comm;
2221: FLOAT **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2222: PetscScalar *mat_a;
2223: PetscTruth sorted;
2224: int *onodes1,*olengths1;
2227: comm = C->comm;
2228: tag0 = C->tag;
2229: size = c->size;
2230: rank = c->rank;
2231: m = C->rmap.N;
2232:
2233: /* Get some new tags to keep the communication clean */
2234: PetscObjectGetNewTag((PetscObject)C,&tag1);
2235: PetscObjectGetNewTag((PetscObject)C,&tag2);
2236: PetscObjectGetNewTag((PetscObject)C,&tag3);
2238: /* Check if the col indices are sorted */
2239: for (i=0; i<ismax; i++) {
2240: ISSorted(isrow[i],&sorted);
2241: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2242: ISSorted(iscol[i],&sorted);
2243: /* if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2244: }
2246: len = (2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int);
2247: PetscMalloc(len,&irow);
2248: icol = irow + ismax;
2249: nrow = (int*)(icol + ismax);
2250: ncol = nrow + ismax;
2251: rtable = ncol + ismax;
2253: for (i=0; i<ismax; i++) {
2254: ISGetIndices(isrow[i],&irow[i]);
2255: ISGetIndices(iscol[i],&icol[i]);
2256: ISGetLocalSize(isrow[i],&nrow[i]);
2257: ISGetLocalSize(iscol[i],&ncol[i]);
2258: }
2260: /* Create hash table for the mapping :row -> proc*/
2261: for (i=0,j=0; i<size; i++) {
2262: jmax = C->rmap.range[i+1];
2263: for (; j<jmax; j++) {
2264: rtable[j] = i;
2265: }
2266: }
2268: /* evaluate communication - mesg to who, length of mesg, and buffer space
2269: required. Based on this, buffers are allocated, and data copied into them*/
2270: PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2271: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2272: w3 = w2 + size; /* no of IS that needs to be sent to proc i */
2273: w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */
2274: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2275: for (i=0; i<ismax; i++) {
2276: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2277: jmax = nrow[i];
2278: irow_i = irow[i];
2279: for (j=0; j<jmax; j++) {
2280: row = irow_i[j];
2281: proc = rtable[row];
2282: w4[proc]++;
2283: }
2284: for (j=0; j<size; j++) {
2285: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
2286: }
2287: }
2288:
2289: nrqs = 0; /* no of outgoing messages */
2290: msz = 0; /* total mesg length (for all procs) */
2291: w1[rank] = 0; /* no mesg sent to self */
2292: w3[rank] = 0;
2293: for (i=0; i<size; i++) {
2294: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2295: }
2296: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2297: for (i=0,j=0; i<size; i++) {
2298: if (w1[i]) { pa[j] = i; j++; }
2299: }
2301: /* Each message would have a header = 1 + 2*(no of IS) + data */
2302: for (i=0; i<nrqs; i++) {
2303: j = pa[i];
2304: w1[j] += w2[j] + 2* w3[j];
2305: msz += w1[j];
2306: }
2308: /* Determine the number of messages to expect, their lengths, from from-ids */
2309: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2310: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2312: /* Now post the Irecvs corresponding to these messages */
2313: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2314:
2315: PetscFree(onodes1);
2316: PetscFree(olengths1);
2317:
2318: /* Allocate Memory for outgoing messages */
2319: len = 2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int);
2320: PetscMalloc(len,&sbuf1);
2321: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2322: PetscMemzero(sbuf1,2*size*sizeof(int*));
2323: /* allocate memory for outgoing data + buf to receive the first reply */
2324: tmp = (int*)(ptr + size);
2325: ctr = tmp + 2*msz;
2327: {
2328: int *iptr = tmp,ict = 0;
2329: for (i=0; i<nrqs; i++) {
2330: j = pa[i];
2331: iptr += ict;
2332: sbuf1[j] = iptr;
2333: ict = w1[j];
2334: }
2335: }
2337: /* Form the outgoing messages */
2338: /* Initialize the header space */
2339: for (i=0; i<nrqs; i++) {
2340: j = pa[i];
2341: sbuf1[j][0] = 0;
2342: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2343: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
2344: }
2345:
2346: /* Parse the isrow and copy data into outbuf */
2347: for (i=0; i<ismax; i++) {
2348: PetscMemzero(ctr,size*sizeof(int));
2349: irow_i = irow[i];
2350: jmax = nrow[i];
2351: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
2352: row = irow_i[j];
2353: proc = rtable[row];
2354: if (proc != rank) { /* copy to the outgoing buf*/
2355: ctr[proc]++;
2356: *ptr[proc] = row;
2357: ptr[proc]++;
2358: }
2359: }
2360: /* Update the headers for the current IS */
2361: for (j=0; j<size; j++) { /* Can Optimise this loop too */
2362: if ((ctr_j = ctr[j])) {
2363: sbuf1_j = sbuf1[j];
2364: k = ++sbuf1_j[0];
2365: sbuf1_j[2*k] = ctr_j;
2366: sbuf1_j[2*k-1] = i;
2367: }
2368: }
2369: }
2371: /* Now post the sends */
2372: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2373: for (i=0; i<nrqs; ++i) {
2374: j = pa[i];
2375: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2376: }
2378: /* Post Receives to capture the buffer size */
2379: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2380: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2381: rbuf2[0] = tmp + msz;
2382: for (i=1; i<nrqs; ++i) {
2383: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2384: }
2385: for (i=0; i<nrqs; ++i) {
2386: j = pa[i];
2387: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2388: }
2390: /* Send to other procs the buf size they should allocate */
2391:
2393: /* Receive messages*/
2394: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2395: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2396: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2397: PetscMalloc(len,&sbuf2);
2398: req_size = (int*)(sbuf2 + nrqr);
2399: req_source = req_size + nrqr;
2400:
2401: {
2402: BSsprow **sAi = A->rows;
2403: int id,rstart = C->rmap.rstart;
2404: int *sbuf2_i;
2406: for (i=0; i<nrqr; ++i) {
2407: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2408: req_size[idx] = 0;
2409: rbuf1_i = rbuf1[idx];
2410: start = 2*rbuf1_i[0] + 1;
2411: MPI_Get_count(r_status1+i,MPI_INT,&end);
2412: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2413: sbuf2_i = sbuf2[idx];
2414: for (j=start; j<end; j++) {
2415: id = rbuf1_i[j] - rstart;
2416: ncols = (sAi[id])->length;
2417: sbuf2_i[j] = ncols;
2418: req_size[idx] += ncols;
2419: }
2420: req_source[idx] = r_status1[i].MPI_SOURCE;
2421: /* form the header */
2422: sbuf2_i[0] = req_size[idx];
2423: for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2424: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2425: }
2426: }
2427: PetscFree(r_status1);
2428: PetscFree(r_waits1);
2430: /* recv buffer sizes */
2431: /* Receive messages*/
2432:
2433: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2434: PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2435: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2436: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2437: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
2439: for (i=0; i<nrqs; ++i) {
2440: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2441: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2442: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2443: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2444: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2445: }
2446: PetscFree(r_status2);
2447: PetscFree(r_waits2);
2448:
2449: /* Wait on sends1 and sends2 */
2450: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2451: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
2453: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
2454: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
2455: PetscFree(s_status1);
2456: PetscFree(s_status2);
2457: PetscFree(s_waits1);
2458: PetscFree(s_waits2);
2460: /* Now allocate buffers for a->j, and send them off */
2461: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2462: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2463: PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2464: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2465:
2466: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2467: {
2468: BSsprow *brow;
2469: int *Acol;
2470: int rstart = C->rmap.rstart;
2472: for (i=0; i<nrqr; i++) {
2473: rbuf1_i = rbuf1[i];
2474: sbuf_aj_i = sbuf_aj[i];
2475: ct1 = 2*rbuf1_i[0] + 1;
2476: ct2 = 0;
2477: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2478: kmax = rbuf1[i][2*j];
2479: for (k=0; k<kmax; k++,ct1++) {
2480: brow = A->rows[rbuf1_i[ct1] - rstart];
2481: ncols = brow->length;
2482: Acol = brow->col;
2483: /* load the column indices for this row into cols*/
2484: cols = sbuf_aj_i + ct2;
2485: PetscMemcpy(cols,Acol,ncols*sizeof(int));
2486: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2487: mappings?? */
2488: ct2 += ncols;
2489: }
2490: }
2491: MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2492: }
2493: }
2494: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2495: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
2497: /* Allocate buffers for a->a, and send them off */
2498: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2499: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2500: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2501: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2502:
2503: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2504: {
2505: BSsprow *brow;
2506: FLOAT *Aval;
2507: int rstart = C->rmap.rstart;
2508:
2509: for (i=0; i<nrqr; i++) {
2510: rbuf1_i = rbuf1[i];
2511: sbuf_aa_i = sbuf_aa[i];
2512: ct1 = 2*rbuf1_i[0]+1;
2513: ct2 = 0;
2514: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2515: kmax = rbuf1_i[2*j];
2516: for (k=0; k<kmax; k++,ct1++) {
2517: brow = A->rows[rbuf1_i[ct1] - rstart];
2518: ncols = brow->length;
2519: Aval = brow->nz;
2520: /* load the column values for this row into vals*/
2521: vals = sbuf_aa_i+ct2;
2522: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2523: ct2 += ncols;
2524: }
2525: }
2526: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2527: }
2528: }
2529: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2530: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2531: PetscFree(rbuf1);
2533: /* Form the matrix */
2534: /* create col map */
2535: {
2536: int *icol_i;
2537:
2538: len = (1+ismax)*sizeof(int*)+ ismax*C->cmap.N*sizeof(int);
2539: PetscMalloc(len,&cmap);
2540: cmap[0] = (int*)(cmap + ismax);
2541: PetscMemzero(cmap[0],(1+ismax*C->cmap.N)*sizeof(int));
2542: for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->cmap.N; }
2543: for (i=0; i<ismax; i++) {
2544: jmax = ncol[i];
2545: icol_i = icol[i];
2546: cmap_i = cmap[i];
2547: for (j=0; j<jmax; j++) {
2548: cmap_i[icol_i[j]] = j+1;
2549: }
2550: }
2551: }
2553: /* Create lens which is required for MatCreate... */
2554: for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2555: len = (1+ismax)*sizeof(int*)+ j*sizeof(int);
2556: PetscMalloc(len,&lens);
2557: lens[0] = (int*)(lens + ismax);
2558: PetscMemzero(lens[0],j*sizeof(int));
2559: for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2560:
2561: /* Update lens from local data */
2562: { BSsprow *Arow;
2563: for (i=0; i<ismax; i++) {
2564: jmax = nrow[i];
2565: cmap_i = cmap[i];
2566: irow_i = irow[i];
2567: lens_i = lens[i];
2568: for (j=0; j<jmax; j++) {
2569: row = irow_i[j];
2570: proc = rtable[row];
2571: if (proc == rank) {
2572: Arow=A->rows[row-C->rmap.rstart];
2573: ncols=Arow->length;
2574: cols=Arow->col;
2575: for (k=0; k<ncols; k++) {
2576: if (cmap_i[cols[k]]) { lens_i[j]++;}
2577: }
2578: }
2579: }
2580: }
2581: }
2582:
2583: /* Create row map*/
2584: len = (1+ismax)*sizeof(int*)+ ismax*C->rmap.N*sizeof(int);
2585: PetscMalloc(len,&rmap);
2586: rmap[0] = (int*)(rmap + ismax);
2587: PetscMemzero(rmap[0],ismax*C->rmap.N*sizeof(int));
2588: for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap.N;}
2589: for (i=0; i<ismax; i++) {
2590: rmap_i = rmap[i];
2591: irow_i = irow[i];
2592: jmax = nrow[i];
2593: for (j=0; j<jmax; j++) {
2594: rmap_i[irow_i[j]] = j;
2595: }
2596: }
2597:
2598: /* Update lens from offproc data */
2599: {
2600: int *rbuf2_i,*rbuf3_i,*sbuf1_i;
2602: for (tmp2=0; tmp2<nrqs; tmp2++) {
2603: MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2604: idx = pa[i];
2605: sbuf1_i = sbuf1[idx];
2606: jmax = sbuf1_i[0];
2607: ct1 = 2*jmax+1;
2608: ct2 = 0;
2609: rbuf2_i = rbuf2[i];
2610: rbuf3_i = rbuf3[i];
2611: for (j=1; j<=jmax; j++) {
2612: is_no = sbuf1_i[2*j-1];
2613: max1 = sbuf1_i[2*j];
2614: lens_i = lens[is_no];
2615: cmap_i = cmap[is_no];
2616: rmap_i = rmap[is_no];
2617: for (k=0; k<max1; k++,ct1++) {
2618: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2619: max2 = rbuf2_i[ct1];
2620: for (l=0; l<max2; l++,ct2++) {
2621: if (cmap_i[rbuf3_i[ct2]]) {
2622: lens_i[row]++;
2623: }
2624: }
2625: }
2626: }
2627: }
2628: }
2629: PetscFree(r_status3);
2630: PetscFree(r_waits3);
2631: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
2632: PetscFree(s_status3);
2633: PetscFree(s_waits3);
2635: /* Create the submatrices */
2636: if (scall == MAT_REUSE_MATRIX) {
2637: PetscTruth same;
2638:
2639: /*
2640: Assumes new rows are same length as the old rows,hence bug!
2641: */
2642: for (i=0; i<ismax; i++) {
2643: PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2644: if (!same) {
2645: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2646: }
2647: mat = (Mat_SeqAIJ*)(submats[i]->data);
2648: if ((submats[i]->rmap.n != nrow[i]) || (submats[i]->cmap.n != ncol[i])) {
2649: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2650: }
2651: PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap.n*sizeof(int),&same);
2652: if (!same) {
2653: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2654: }
2655: /* Initial matrix as if empty */
2656: PetscMemzero(mat->ilen,submats[i]->rmap.n*sizeof(int));
2657: submats[i]->factor = C->factor;
2658: }
2659: } else {
2660: for (i=0; i<ismax; i++) {
2661: /* Here we want to explicitly generate SeqAIJ matrices */
2662: MatCreate(PETSC_COMM_SELF,submats+i);
2663: MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);
2664: MatSetType(submats[i],MATSEQAIJ);
2665: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2666: }
2667: }
2669: /* Assemble the matrices */
2670: /* First assemble the local rows */
2671: {
2672: int ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2673: PetscScalar *imat_a;
2674: BSsprow *Arow;
2675:
2676: for (i=0; i<ismax; i++) {
2677: mat = (Mat_SeqAIJ*)submats[i]->data;
2678: imat_ilen = mat->ilen;
2679: imat_j = mat->j;
2680: imat_i = mat->i;
2681: imat_a = mat->a;
2682: cmap_i = cmap[i];
2683: rmap_i = rmap[i];
2684: irow_i = irow[i];
2685: jmax = nrow[i];
2686: for (j=0; j<jmax; j++) {
2687: row = irow_i[j];
2688: proc = rtable[row];
2689: if (proc == rank) {
2690: old_row = row;
2691: row = rmap_i[row];
2692: ilen_row = imat_ilen[row];
2693:
2694: Arow=A->rows[old_row-C->rmap.rstart];
2695: ncols=Arow->length;
2696: cols=Arow->col;
2697: vals=Arow->nz;
2698:
2699: mat_i = imat_i[row];
2700: mat_a = imat_a + mat_i;
2701: mat_j = imat_j + mat_i;
2702: for (k=0; k<ncols; k++) {
2703: if ((tcol = cmap_i[cols[k]])) {
2704: *mat_j++ = tcol - 1;
2705: *mat_a++ = (PetscScalar)vals[k];
2706: ilen_row++;
2707: }
2708: }
2709: imat_ilen[row] = ilen_row;
2710: }
2711: }
2712: }
2713: }
2715: /* Now assemble the off proc rows*/
2716: {
2717: int *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2718: int *imat_j,*imat_i;
2719: PetscScalar *imat_a;
2720: FLOAT *rbuf4_i;
2721:
2722: for (tmp2=0; tmp2<nrqs; tmp2++) {
2723: MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2724: idx = pa[i];
2725: sbuf1_i = sbuf1[idx];
2726: jmax = sbuf1_i[0];
2727: ct1 = 2*jmax + 1;
2728: ct2 = 0;
2729: rbuf2_i = rbuf2[i];
2730: rbuf3_i = rbuf3[i];
2731: rbuf4_i = rbuf4[i];
2732: for (j=1; j<=jmax; j++) {
2733: is_no = sbuf1_i[2*j-1];
2734: rmap_i = rmap[is_no];
2735: cmap_i = cmap[is_no];
2736: mat = (Mat_SeqAIJ*)submats[is_no]->data;
2737: imat_ilen = mat->ilen;
2738: imat_j = mat->j;
2739: imat_i = mat->i;
2740: imat_a = mat->a;
2741: max1 = sbuf1_i[2*j];
2742: for (k=0; k<max1; k++,ct1++) {
2743: row = sbuf1_i[ct1];
2744: row = rmap_i[row];
2745: ilen = imat_ilen[row];
2746: mat_i = imat_i[row];
2747: mat_a = imat_a + mat_i;
2748: mat_j = imat_j + mat_i;
2749: max2 = rbuf2_i[ct1];
2750: for (l=0; l<max2; l++,ct2++) {
2751: if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2752: *mat_j++ = tcol - 1;
2753: *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2754: ilen++;
2755: }
2756: }
2757: imat_ilen[row] = ilen;
2758: }
2759: }
2760: }
2761: }
2762: PetscFree(r_status4);
2763: PetscFree(r_waits4);
2764: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
2765: PetscFree(s_waits4);
2766: PetscFree(s_status4);
2768: /* Restore the indices */
2769: for (i=0; i<ismax; i++) {
2770: ISRestoreIndices(isrow[i],irow+i);
2771: ISRestoreIndices(iscol[i],icol+i);
2772: }
2774: /* Destroy allocated memory */
2775: PetscFree(irow);
2776: PetscFree(w1);
2777: PetscFree(pa);
2779: PetscFree(sbuf1);
2780: PetscFree(rbuf2);
2781: for (i=0; i<nrqr; ++i) {
2782: PetscFree(sbuf2[i]);
2783: }
2784: for (i=0; i<nrqs; ++i) {
2785: PetscFree(rbuf3[i]);
2786: PetscFree(rbuf4[i]);
2787: }
2789: PetscFree(sbuf2);
2790: PetscFree(rbuf3);
2791: PetscFree(rbuf4);
2792: PetscFree(sbuf_aj[0]);
2793: PetscFree(sbuf_aj);
2794: PetscFree(sbuf_aa[0]);
2795: PetscFree(sbuf_aa);
2796:
2797: PetscFree(cmap);
2798: PetscFree(rmap);
2799: PetscFree(lens);
2801: for (i=0; i<ismax; i++) {
2802: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2803: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2804: }
2805: return(0);
2806: }
2808: /*
2809: can be optimized by send only non-zeroes in iscol IS -
2810: so prebuild submatrix on sending side including A,B partitioning
2811: */
2814: #include src/vec/is/impls/general/general.h
2815: PetscErrorCode MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2816: {
2817: Mat_MPIRowbs *c = (Mat_MPIRowbs*)C->data;
2818: BSspmat *A = c->A;
2819: BSsprow *Arow;
2820: Mat_SeqAIJ *matA,*matB; /* on prac , off proc part of submat */
2821: Mat_MPIAIJ *mat; /* submat->data */
2823: int *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2824: int *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2825: int i,j,k,l,len,jmax,proc,idx;
2826: int **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2827: FLOAT **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */
2829: int *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2830: int *req_source;
2831: int ncols_t;
2832:
2833:
2834: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2835: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2836:
2837: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2838: MPI_Status *r_status3,*r_status4,*s_status4;
2839: MPI_Comm comm;
2843: comm = C->comm;
2844: tag0 = C->tag;
2845: size = c->size;
2846: rank = c->rank;
2848: if (size==1) {
2849: if (scall == MAT_REUSE_MATRIX) {
2850: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2851: return(0);
2852: } else {
2853: Mat *newsubmat;
2854:
2855: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2856: *submat=*newsubmat;
2857: ierr=PetscFree(newsubmat);
2858: return(0);
2859: }
2860: }
2861:
2862: /* Get some new tags to keep the communication clean */
2863: PetscObjectGetNewTag((PetscObject)C,&tag1);
2864: PetscObjectGetNewTag((PetscObject)C,&tag2);
2865: PetscObjectGetNewTag((PetscObject)C,&tag3);
2867: /* Check if the col indices are sorted */
2868: {PetscTruth sorted;
2869: ISSorted(isrow,&sorted);
2870: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2871: ISSorted(iscol,&sorted);
2872: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2873: }
2874:
2875: ISGetIndices(isrow,&irow);
2876: ISGetIndices(iscol,&icol);
2877: ISGetLocalSize(isrow,&nrow);
2878: ISGetLocalSize(iscol,&ncol);
2879:
2880: if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2881: if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2882:
2883:
2884: len = (C->rmap.N+1)*sizeof(int);
2885: PetscMalloc(len,&rtable);
2886: /* Create hash table for the mapping :row -> proc*/
2887: for (i=0,j=0; i<size; i++) {
2888: jmax = C->rmap.range[i+1];
2889: for (; j<jmax; j++) {
2890: rtable[j] = i;
2891: }
2892: }
2894: /* evaluate communication - mesg to who, length of mesg, and buffer space
2895: required. Based on this, buffers are allocated, and data copied into them*/
2896: PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2897: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2898: PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2899: for (j=0; j<nrow; j++) {
2900: row_t = irow[j];
2901: proc = rtable[row_t];
2902: w1[proc]++;
2903: }
2904: nrqs = 0; /* no of outgoing messages */
2905: msz = 0; /* total mesg length (for all procs) */
2906: w1[rank] = 0; /* no mesg sent to self */
2907: for (i=0; i<size; i++) {
2908: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2909: }
2910:
2911: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2912: for (i=0,j=0; i<size; i++) {
2913: if (w1[i]) {
2914: pa[j++] = i;
2915: w1[i]++; /* header for return data */
2916: msz+=w1[i];
2917: }
2918: }
2919:
2920: {int *onodes1,*olengths1;
2921: /* Determine the number of messages to expect, their lengths, from from-ids */
2922: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2923: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2924: /* Now post the Irecvs corresponding to these messages */
2925: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2926: PetscFree(onodes1);
2927: PetscFree(olengths1);
2928: }
2929:
2930: { int **ptr,*iptr,*tmp;
2931: /* Allocate Memory for outgoing messages */
2932: len = 2*size*sizeof(int*) + msz*sizeof(int);
2933: PetscMalloc(len,&sbuf1);
2934: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2935: PetscMemzero(sbuf1,2*size*sizeof(int*));
2936: /* allocate memory for outgoing data + buf to receive the first reply */
2937: tmp = (int*)(ptr + size);
2939: for (i=0,iptr=tmp; i<nrqs; i++) {
2940: j = pa[i];
2941: sbuf1[j] = iptr;
2942: iptr += w1[j];
2943: }
2945: /* Form the outgoing messages */
2946: for (i=0; i<nrqs; i++) {
2947: j = pa[i];
2948: sbuf1[j][0] = 0; /*header */
2949: ptr[j] = sbuf1[j] + 1;
2950: }
2951:
2952: /* Parse the isrow and copy data into outbuf */
2953: for (j=0; j<nrow; j++) {
2954: row_t = irow[j];
2955: proc = rtable[row_t];
2956: if (proc != rank) { /* copy to the outgoing buf*/
2957: sbuf1[proc][0]++;
2958: *ptr[proc] = row_t;
2959: ptr[proc]++;
2960: }
2961: }
2962: } /* block */
2964: /* Now post the sends */
2965:
2966: /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
2967: * of requested rows)*/
2969: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2970: for (i=0; i<nrqs; ++i) {
2971: j = pa[i];
2972: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2973: }
2975: /* Post Receives to capture the buffer size */
2976: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2977: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2978: PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
2979: for (i=1; i<nrqs; ++i) {
2980: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2981: }
2982: for (i=0; i<nrqs; ++i) {
2983: j = pa[i];
2984: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2985: }
2987: /* Send to other procs the buf size they should allocate */
2988: /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
2989: * (row sizes) */
2991: /* Receive messages*/
2992: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2993: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2994: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2995: PetscMalloc(len,&sbuf2);
2996: req_size = (int*)(sbuf2 + nrqr);
2997: req_source = req_size + nrqr;
2998:
2999: {
3000: BSsprow **sAi = A->rows;
3001: int id,rstart = C->rmap.rstart;
3002: int *sbuf2_i,*rbuf1_i,end;
3004: for (i=0; i<nrqr; ++i) {
3005: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
3006: req_size[idx] = 0;
3007: rbuf1_i = rbuf1[idx];
3008: MPI_Get_count(r_status1+i,MPI_INT,&end);
3009: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
3010: sbuf2_i = sbuf2[idx];
3011: for (j=1; j<end; j++) {
3012: id = rbuf1_i[j] - rstart;
3013: ncols_t = (sAi[id])->length;
3014: sbuf2_i[j] = ncols_t;
3015: req_size[idx] += ncols_t;
3016: }
3017: req_source[idx] = r_status1[i].MPI_SOURCE;
3018: /* form the header */
3019: sbuf2_i[0] = req_size[idx];
3020: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
3021: }
3022: }
3023: PetscFree(r_status1);
3024: PetscFree(r_waits1);
3026: /* recv buffer sizes */
3027: /* Receive messages*/
3028:
3029: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
3030: PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
3031: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
3032: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
3033: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
3035: for (i=0; i<nrqs; ++i) {
3036: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
3037: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
3038: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
3039: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
3040: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
3041: }
3042: PetscFree(r_status2);
3043: PetscFree(r_waits2);
3044:
3045: /* Wait on sends1 and sends2 */
3046: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
3047: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
3049: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
3050: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
3051: PetscFree(s_status1);
3052: PetscFree(s_status2);
3053: PetscFree(s_waits1);
3054: PetscFree(s_waits2);
3056: /* Now allocate buffers for a->j, and send them off */
3057: /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3058: * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3059:
3060: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3061: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3062: PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3063: for (i=1; i<nrqr; i++) sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3064:
3065: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3066: {
3067: int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3068: int rstart = C->rmap.rstart;
3070: for (i=0; i<nrqr; i++) {
3071: rbuf1_i = rbuf1[i];
3072: sbuf3_i = sbuf3[i];
3073: noutcols = 0;
3074: kmax = rbuf1_i[0]; /* num. of req. rows */
3075: for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3076: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3077: ncols = Arow->length;
3078: Acol = Arow->col;
3079: /* load the column indices for this row into cols*/
3080: cols = sbuf3_i + noutcols;
3081: PetscMemcpy(cols,Acol,ncols*sizeof(int));
3082: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3083: noutcols += ncols;
3084: }
3085: MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3086: }
3087: }
3088: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3089: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
3091: /* Allocate buffers for a->a, and send them off */
3092: /* can be optimized by conect with previous block */
3093: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3094: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3095: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3096: for (i=1; i<nrqr; i++) sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3097:
3098: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3099: {
3100: FLOAT *Aval,*vals,*sbuf4_i;
3101: int rstart = C->rmap.rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3102:
3103:
3104: for (i=0; i<nrqr; i++) {
3105: rbuf1_i = rbuf1[i];
3106: sbuf4_i = sbuf4[i];
3107: rqrow = 1;
3108: noutvals = 0;
3109: kmax = rbuf1_i[0]; /* num of req. rows */
3110: for (k=0; k<kmax; k++,rqrow++) {
3111: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3112: ncols = Arow->length;
3113: Aval = Arow->nz;
3114: /* load the column values for this row into vals*/
3115: vals = sbuf4_i+noutvals;
3116: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3117: noutvals += ncols;
3118: }
3119: MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3120: }
3121: }
3122: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3123: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3124: PetscFree(rbuf1);
3126: /* Form the matrix */
3128: /* create col map */
3129: len = C->cmap.N*sizeof(int)+1;
3130: PetscMalloc(len,&cmap);
3131: PetscMemzero(cmap,C->cmap.N*sizeof(int));
3132: for (j=0; j<ncol; j++) {
3133: cmap[icol[j]] = j+1;
3134: }
3135:
3136: /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3137: len = C->rmap.N*sizeof(int)+1;
3138: PetscMalloc(len,&rmap);
3139: PetscMemzero(rmap,C->rmap.N*sizeof(int));
3140: for (j=0; j<nrow; j++) {
3141: rmap[irow[j]] = j;
3142: }
3144: /*
3145: Determine the number of non-zeros in the diagonal and off-diagonal
3146: portions of the matrix in order to do correct preallocation
3147: */
3149: /* first get start and end of "diagonal" columns */
3150: if (csize == PETSC_DECIDE) {
3151: nlocal = ncol/size + ((ncol % size) > rank);
3152: } else {
3153: nlocal = csize;
3154: }
3155: {
3156: int ncols,*cols,olen,dlen,thecol;
3157: int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3158:
3159: MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3160: cstart = cend - nlocal;
3161: if (rank == size - 1 && cend != ncol) {
3162: SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns");
3163: }
3165: PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3166: o_nz = d_nz + nrow;
3167:
3168: /* Update lens from local data */
3169: for (j=0; j<nrow; j++) {
3170: row = irow[j];
3171: proc = rtable[row];
3172: if (proc == rank) {
3173: Arow=A->rows[row-C->rmap.rstart];
3174: ncols=Arow->length;
3175: cols=Arow->col;
3176: olen=dlen=0;
3177: for (k=0; k<ncols; k++) {
3178: if ((thecol=cmap[cols[k]])) {
3179: if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3180: else olen++;
3181: }
3182: }
3183: o_nz[j]=olen;
3184: d_nz[j]=dlen;
3185: } else d_nz[j]=o_nz[j]=0;
3186: }
3187: /* Update lens from offproc data and done waits */
3188: /* this will be much simplier after sending only appropriate columns */
3189: for (j=0; j<nrqs;j++) {
3190: MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3191: proc = pa[i];
3192: sbuf1_i = sbuf1[proc];
3193: cidx = 0;
3194: rbuf2_i = rbuf2[i];
3195: rbuf3_i = rbuf3[i];
3196: kmax = sbuf1_i[0]; /*num of rq. rows*/
3197: for (k=1; k<=kmax; k++) {
3198: row = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3199: for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3200: if ((thecol=cmap[rbuf3_i[cidx]])) {
3201:
3202: if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3203: else o_nz[row]++;
3204: }
3205: }
3206: }
3207: }
3208: }
3209: PetscFree(r_status3);
3210: PetscFree(r_waits3);
3211: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
3212: PetscFree(s_status3);
3213: PetscFree(s_waits3);
3215: if (scall == MAT_INITIAL_MATRIX) {
3216: MatCreate(comm,submat);
3217: MatSetSizes(*submat,nrow,nlocal,PETSC_DECIDE,ncol);
3218: MatSetType(*submat,C->type_name);
3219: MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3220: mat=(Mat_MPIAIJ *)((*submat)->data);
3221: matA=(Mat_SeqAIJ *)(mat->A->data);
3222: matB=(Mat_SeqAIJ *)(mat->B->data);
3223:
3224: } else {
3225: PetscTruth same;
3226: /* folowing code can be optionaly dropped for debuged versions of users
3227: * program, but I don't know PETSc option which can switch off such safety
3228: * tests - in a same way counting of o_nz,d_nz can be droped for REUSE
3229: * matrix */
3230:
3231: PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3232: if (!same) {
3233: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3234: }
3235: if (((*submat)->rmap.n != nrow) || ((*submat)->cmap.N != ncol)) {
3236: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3237: }
3238: mat=(Mat_MPIAIJ *)((*submat)->data);
3239: matA=(Mat_SeqAIJ *)(mat->A->data);
3240: matB=(Mat_SeqAIJ *)(mat->B->data);
3241: PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3242: if (!same) {
3243: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3244: }
3245: PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3246: if (!same) {
3247: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3248: }
3249: /* Initial matrix as if empty */
3250: PetscMemzero(matA->ilen,nrow*sizeof(int));
3251: PetscMemzero(matB->ilen,nrow*sizeof(int));
3252: /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3253: * delete all possibly nonactual inforamtion */
3254: /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3255: * copy some factor information - where are thay */
3256: (*submat)->was_assembled=PETSC_FALSE;
3257: (*submat)->assembled=PETSC_FALSE;
3258:
3259: }
3260: PetscFree(d_nz);
3262: /* Assemble the matrix */
3263: /* First assemble from local rows */
3264: {
3265: int i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3266: FLOAT *vals;
3267: PetscScalar *matA_a,*matB_a;
3268:
3269: for (j=0; j<nrow; j++) {
3270: oldrow = irow[j];
3271: proc = rtable[oldrow];
3272: if (proc == rank) {
3273: row = rmap[oldrow];
3274:
3275: Arow = A->rows[oldrow-C->rmap.rstart];
3276: ncols = Arow->length;
3277: cols = Arow->col;
3278: vals = Arow->nz;
3279:
3280: i_row = matA->i[row];
3281: matA_a = matA->a + i_row;
3282: matA_j = matA->j + i_row;
3283: i_row = matB->i[row];
3284: matB_a = matB->a + i_row;
3285: matB_j = matB->j + i_row;
3286: for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3287: if ((tcol = cmap[cols[k]])) {
3288: if (tcol<=cstart) {
3289: *matB_j++ = tcol-1;
3290: *matB_a++ = vals[k];
3291: ilenB++;
3292: } else if (tcol<=cend) {
3293: *matA_j++ = (tcol-1)-cstart;
3294: *matA_a++ = (PetscScalar)(vals[k]);
3295: ilenA++;
3296: } else {
3297: *matB_j++ = tcol-1;
3298: *matB_a++ = vals[k];
3299: ilenB++;
3300: }
3301: }
3302: }
3303: matA->ilen[row]=ilenA;
3304: matB->ilen[row]=ilenB;
3305:
3306: }
3307: }
3308: }
3310: /* Now assemble the off proc rows*/
3311: {
3312: int *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3313: int *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3314: PetscScalar *matA_a,*matB_a;
3315: FLOAT *rbuf4_i;
3317: for (j=0; j<nrqs; j++) {
3318: MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3319: proc = pa[i];
3320: sbuf1_i = sbuf1[proc];
3321:
3322: cidx = 0;
3323: rbuf2_i = rbuf2[i];
3324: rbuf3_i = rbuf3[i];
3325: rbuf4_i = rbuf4[i];
3326: kmax = sbuf1_i[0];
3327: for (k=1; k<=kmax; k++) {
3328: row = rmap[sbuf1_i[k]];
3329:
3330: i_row = matA->i[row];
3331: matA_a = matA->a + i_row;
3332: matA_j = matA->j + i_row;
3333: i_row = matB->i[row];
3334: matB_a = matB->a + i_row;
3335: matB_j = matB->j + i_row;
3336:
3337: lmax = rbuf2_i[k];
3338: for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3339: if ((tcol = cmap[rbuf3_i[cidx]])) {
3340: if (tcol<=cstart) {
3341: *matB_j++ = tcol-1;
3342: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3343: ilenB++;
3344: } else if (tcol<=cend) {
3345: *matA_j++ = (tcol-1)-cstart;
3346: *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3347: ilenA++;
3348: } else {
3349: *matB_j++ = tcol-1;
3350: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3351: ilenB++;
3352: }
3353: }
3354: }
3355: matA->ilen[row]=ilenA;
3356: matB->ilen[row]=ilenB;
3357: }
3358: }
3359: }
3361: PetscFree(r_status4);
3362: PetscFree(r_waits4);
3363: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
3364: PetscFree(s_waits4);
3365: PetscFree(s_status4);
3367: /* Restore the indices */
3368: ISRestoreIndices(isrow,&irow);
3369: ISRestoreIndices(iscol,&icol);
3371: /* Destroy allocated memory */
3372: PetscFree(rtable);
3373: PetscFree(w1);
3374: PetscFree(pa);
3376: PetscFree(sbuf1);
3377: PetscFree(rbuf2[0]);
3378: PetscFree(rbuf2);
3379: for (i=0; i<nrqr; ++i) {
3380: PetscFree(sbuf2[i]);
3381: }
3382: for (i=0; i<nrqs; ++i) {
3383: PetscFree(rbuf3[i]);
3384: PetscFree(rbuf4[i]);
3385: }
3387: PetscFree(sbuf2);
3388: PetscFree(rbuf3);
3389: PetscFree(rbuf4);
3390: PetscFree(sbuf3[0]);
3391: PetscFree(sbuf3);
3392: PetscFree(sbuf4[0]);
3393: PetscFree(sbuf4);
3394:
3395: PetscFree(cmap);
3396: PetscFree(rmap);
3399: MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3400: MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);
3403: return(0);
3404: }