Actual source code: matpapt.c

  1: #define PETSCMAT_DLL

  3: /*
  4:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  5:           C = P * A * P^T
  6: */

 8:  #include src/mat/impls/aij/seq/aij.h
 9:  #include src/mat/utils/freespace.h

 11: static PetscEvent logkey_matapplypapt          = 0;
 12: static PetscEvent logkey_matapplypapt_symbolic = 0;
 13: static PetscEvent logkey_matapplypapt_numeric  = 0;

 15: /*
 16:      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
 17:            C = P * A * P^T;

 19:      Note: C is assumed to be uncreated.
 20:            If this is not the case, Destroy C before calling this routine.
 21: */
 24: PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
 25: {
 26:   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
 27:   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
 28:   PetscErrorCode     ierr;
 29:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
 30:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
 31:   PetscInt           *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
 32:   PetscInt           *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
 33:   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N;
 34:   PetscInt           i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
 35:   MatScalar          *ca;

 38:   /* some error checking which could be moved into interface layer */
 39:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
 40:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);

 42:   /* Set up timers */
 43:   if (!logkey_matapplypapt_symbolic) {
 44:     PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);
 45:   }
 46:   PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);

 48:   /* Create ij structure of P^T */
 49:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

 51:   /* Allocate ci array, arrays for fill computation and */
 52:   /* free space for accumulating nonzero column info */
 53:   PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);
 54:   ci[0] = 0;

 56:   PetscMalloc((2*an+2*pm+1)*sizeof(PetscInt),&padenserow);
 57:   PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(PetscInt));
 58:   pasparserow  = padenserow  + an;
 59:   denserow     = pasparserow + an;
 60:   sparserow    = denserow    + pm;

 62:   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
 63:   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
 64:   PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);
 65:   current_space = free_space;

 67:   /* Determine fill for each row of C: */
 68:   for (i=0;i<pm;i++) {
 69:     pnzi  = pi[i+1] - pi[i];
 70:     panzi = 0;
 71:     /* Get symbolic sparse row of PA: */
 72:     for (j=0;j<pnzi;j++) {
 73:       arow = *pj++;
 74:       anzj = ai[arow+1] - ai[arow];
 75:       ajj  = aj + ai[arow];
 76:       for (k=0;k<anzj;k++) {
 77:         if (!padenserow[ajj[k]]) {
 78:           padenserow[ajj[k]]   = -1;
 79:           pasparserow[panzi++] = ajj[k];
 80:         }
 81:       }
 82:     }
 83:     /* Using symbolic row of PA, determine symbolic row of C: */
 84:     paj    = pasparserow;
 85:     cnzi   = 0;
 86:     for (j=0;j<panzi;j++) {
 87:       ptrow = *paj++;
 88:       ptnzj = pti[ptrow+1] - pti[ptrow];
 89:       ptjj  = ptj + pti[ptrow];
 90:       for (k=0;k<ptnzj;k++) {
 91:         if (!denserow[ptjj[k]]) {
 92:           denserow[ptjj[k]] = -1;
 93:           sparserow[cnzi++] = ptjj[k];
 94:         }
 95:       }
 96:     }

 98:     /* sort sparse representation */
 99:     PetscSortInt(cnzi,sparserow);

101:     /* If free space is not available, make more free space */
102:     /* Double the amount of total space in the list */
103:     if (current_space->local_remaining<cnzi) {
104:       PetscFreeSpaceGet(current_space->total_array_size,&current_space);
105:     }

107:     /* Copy data into free space, and zero out dense row */
108:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
109:     current_space->array           += cnzi;
110:     current_space->local_used      += cnzi;
111:     current_space->local_remaining -= cnzi;

113:     for (j=0;j<panzi;j++) {
114:       padenserow[pasparserow[j]] = 0;
115:     }
116:     for (j=0;j<cnzi;j++) {
117:       denserow[sparserow[j]] = 0;
118:     }
119:     ci[i+1] = ci[i] + cnzi;
120:   }
121:   /* column indices are in the list of free space */
122:   /* Allocate space for cj, initialize cj, and */
123:   /* destroy list of free space and other temporary array(s) */
124:   PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);
125:   PetscFreeSpaceContiguous(&free_space,cj);
126:   PetscFree(padenserow);
127: 
128:   /* Allocate space for ca */
129:   PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
130:   PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
131: 
132:   /* put together the new matrix */
133:   MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);

135:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
136:   /* Since these are PETSc arrays, change flags to free them as necessary. */
137:   c = (Mat_SeqAIJ *)((*C)->data);
138:   c->freedata = PETSC_TRUE;
139:   c->nonew    = 0;

141:   /* Clean up. */
142:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

144:   PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);
145:   return(0);
146: }

148: /*
149:      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
150:            C = P * A * P^T;
151:      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
152: */
155: PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
156: {
158:   PetscInt       flops=0;
159:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
160:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
161:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
162:   PetscInt       *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
163:   PetscInt       *ci=c->i,*cj=c->j;
164:   PetscInt       an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,cn=C->cmap.N,cm=C->rmap.N;
165:   PetscInt       i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
166:   MatScalar      *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;


170:   /* This error checking should be unnecessary if the symbolic was performed */
171:   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm);
172:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
173:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
174:   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn);

176:   /* Set up timers */
177:   if (!logkey_matapplypapt_numeric) {
178:     PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);
179:   }
180:   PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);

182:   PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(PetscInt)),&paa);
183:   PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));
184:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

186:   paj      = (PetscInt*)(paa + an);
187:   pajdense = paj + an;

189:   for (i=0;i<pm;i++) {
190:     /* Form sparse row of P*A */
191:     pnzi  = pi[i+1] - pi[i];
192:     panzj = 0;
193:     for (j=0;j<pnzi;j++) {
194:       arow = *pj++;
195:       anzj = ai[arow+1] - ai[arow];
196:       ajj  = aj + ai[arow];
197:       aaj  = aa + ai[arow];
198:       for (k=0;k<anzj;k++) {
199:         if (!pajdense[ajj[k]]) {
200:           pajdense[ajj[k]] = -1;
201:           paj[panzj++]     = ajj[k];
202:         }
203:         paa[ajj[k]] += (*pa)*aaj[k];
204:       }
205:       flops += 2*anzj;
206:       pa++;
207:     }

209:     /* Sort the j index array for quick sparse axpy. */
210:     PetscSortInt(panzj,paj);

212:     /* Compute P*A*P^T using sparse inner products. */
213:     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
214:     cnzi = ci[i+1] - ci[i];
215:     for (j=0;j<cnzi;j++) {
216:       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
217:       ptcol = *cj++;
218:       ptnzj = pi[ptcol+1] - pi[ptcol];
219:       ptj   = pjj + pi[ptcol];
220:       ptaj  = pta + pi[ptcol];
221:       sum   = 0.;
222:       k1    = 0;
223:       k2    = 0;
224:       while ((k1<panzj) && (k2<ptnzj)) {
225:         if (paj[k1]==ptj[k2]) {
226:           sum += paa[paj[k1++]]*ptaj[k2++];
227:         } else if (paj[k1] < ptj[k2]) {
228:           k1++;
229:         } else /* if (paj[k1] > ptj[k2]) */ {
230:           k2++;
231:         }
232:       }
233:       *ca++ = sum;
234:     }

236:     /* Zero the current row info for P*A */
237:     for (j=0;j<panzj;j++) {
238:       paa[paj[j]]      = 0.;
239:       pajdense[paj[j]] = 0;
240:     }
241:   }

243:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
244:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
245:   PetscLogFlops(flops);
246:   PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);
247:   return(0);
248: }
249: 
252: PetscErrorCode MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
253: {

257:   if (!logkey_matapplypapt) {
258:     PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);
259:   }
260:   PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);
261:   MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
262:   MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
263:   PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);
264:   return(0);
265: }