Actual source code: csrperm.c

  1: #define PETSCMAT_DLL

  3: /*
  4:   Defines basic operations for the MATSEQCSRPERM matrix class.
  5:   This class is derived from the MATSEQAIJ class and retains the 
  6:   compressed row storage (aka Yale sparse matrix format) but augments 
  7:   it with some permutation information that enables some operations 
  8:   to be more vectorizable.  A physically rearranged copy of the matrix 
  9:   may be stored if the user desires.

 11:   Eventually a variety of permutations may be supported.
 12: */

 14:  #include src/mat/impls/aij/seq/aij.h

 16: #define NDIM 512
 17:     /* NDIM specifies how many rows at a time we should work with when 
 18:      * performing the vectorized mat-vec.  This depends on various factors 
 19:      * such as vector register length, etc., and I really need to add a 
 20:      * way for the user (or the library) to tune this.  I'm setting it to  
 21:      * 512 for now since that is what Ed D'Azevedo was using in his Fortran 
 22:      * routines. */

 24: typedef struct {
 25:   PetscInt  ngroup;
 26:   PetscInt *xgroup;
 27:     /* Denotes where groups of rows with same number of nonzeros 
 28:      * begin and end, i.e., xgroup[i] gives us the position in iperm[] 
 29:      * where the ith group begins. */
 30:   PetscInt *nzgroup; /*  how many nonzeros each row that is a member of group i has. */
 31:   PetscInt *iperm;  /* The permutation vector. */

 33:   /* Flag that indicates whether we need to clean up permutation 
 34:    * information during the MatDestroy. */
 35:   PetscTruth CleanUpCSRPERM;

 37:   /* Some of this stuff is for Ed's recursive triangular solve.
 38:    * I'm not sure what I need yet. */
 39:   PetscInt blocksize;
 40:   PetscInt nstep;
 41:   PetscInt *jstart_list;
 42:   PetscInt *jend_list;
 43:   PetscInt *action_list;
 44:   PetscInt *ngroup_list;
 45:   PetscInt **ipointer_list;
 46:   PetscInt **xgroup_list;
 47:   PetscInt **nzgroup_list;
 48:   PetscInt **iperm_list;

 50:   /* We need to keep a pointer to MatAssemblyEnd_SeqAIJ because we 
 51:    * actually want to call this function from within the 
 52:    * MatAssemblyEnd_SeqCSRPERM function.  Similarly, we also need 
 53:    * MatDestroy_SeqAIJ and MatDuplicate_SeqAIJ. */
 54:   PetscErrorCode (*AssemblyEnd_SeqAIJ)(Mat,MatAssemblyType);
 55:   PetscErrorCode (*MatDestroy_SeqAIJ)(Mat);
 56:   PetscErrorCode (*MatDuplicate_SeqAIJ)(Mat,MatDuplicateOption,Mat*);
 57: } Mat_SeqCSRPERM;

 61: PetscErrorCode MatDestroy_SeqCSRPERM(Mat A)
 62: {
 64:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;

 66:   /* We are going to convert A back into a SEQAIJ matrix, since we are 
 67:    * eventually going to use MatDestroy_SeqAIJ() to destroy everything 
 68:    * that is not specific to CSRPERM.
 69:    * In preparation for this, reset the operations pointers in A to 
 70:    * their SeqAIJ versions. */
 71:   A->ops->assemblyend = csrperm->AssemblyEnd_SeqAIJ;
 72:     /* I suppose I don't actually need to point A->ops->assemblyend back 
 73:      * to the right thing, but I do it anyway for completeness. */
 74:   A->ops->destroy   = csrperm->MatDestroy_SeqAIJ;
 75:   A->ops->duplicate = csrperm->MatDuplicate_SeqAIJ;

 77:   /* Free everything in the Mat_SeqCSRPERM data structure. */
 78:   if(csrperm->CleanUpCSRPERM) {
 79:     PetscFree(csrperm->xgroup);
 80:     PetscFree(csrperm->nzgroup);
 81:     PetscFree(csrperm->iperm);
 82:   }

 84:   /* Free the Mat_SeqCSRPERM struct itself. */
 85:   PetscFree(csrperm);

 87:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 
 88:    * to destroy everything that remains. */
 89:   PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
 90:   /* Note that I don't call MatSetType().  I believe this is because that 
 91:    * is only to be called when *building* a matrix.  I could be wrong, but 
 92:    * that is how things work for the SuperLU matrix class. */
 93:   (*A->ops->destroy)(A);
 94:   return(0);
 95: }

 97: PetscErrorCode MatDuplicate_SeqCSRPERM(Mat A, MatDuplicateOption op, Mat *M)
 98: {
100:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;

103:   (*csrperm->MatDuplicate_SeqAIJ)(A,op,M);
104:   SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CSRPERM matrices yet");
105:   return(0);
106: }

110: PetscErrorCode SeqCSRPERM_create_perm(Mat A)
111: {
112:   PetscInt   m;  /* Number of rows in the matrix. */
113:   Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
114:   PetscInt   *ia;  /* From the CSR representation; points to the beginning  of each row. */
115:   PetscInt   maxnz; /* Maximum number of nonzeros in any row. */
116:   PetscInt   *rows_in_bucket;
117:     /* To construct the permutation, we sort each row into one of maxnz 
118:      * buckets based on how many nonzeros are in the row. */
119:   PetscInt   nz;
120:   PetscInt   *nz_in_row;  /* the number of nonzero elements in row k. */
121:   PetscInt   *ipnz;
122:     /* When constructing the iperm permutation vector, 
123:      * ipnz[nz] is used to point to the next place in the permutation vector 
124:      * that a row with nz nonzero elements should be placed.*/
125:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
126:     /* Points to the MATSEQCSRPERM-specific data in the matrix B. */
128:   PetscInt       i, ngroup, istart, ipos;

130:   /* I really ought to put something in here to check if B is of 
131:    * type MATSEQCSRPERM and return an error code if it is not.
132:    * Come back and do this! */
133: 
134:   m  = A->rmap.n;
135:   ia = a->i;
136: 
137:   /* Allocate the arrays that will hold the permutation vector. */
138:   PetscMalloc(m*sizeof(PetscInt), &csrperm->iperm);

140:   /* Allocate some temporary work arrays that will be used in 
141:    * calculating the permuation vector and groupings. */
142:   PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
143:   PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
144:   PetscMalloc(m*sizeof(PetscInt), &nz_in_row);

146:   /* Now actually figure out the permutation and grouping. */

148:   /* First pass: Determine number of nonzeros in each row, maximum 
149:    * number of nonzeros in any row, and how many rows fall into each  
150:    * "bucket" of rows with same number of nonzeros. */
151:   maxnz = 0;
152:   for (i=0; i<m; i++) {
153:     nz_in_row[i] = ia[i+1]-ia[i];
154:     if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
155:   }

157:   for (i=0; i<=maxnz; i++) {
158:     rows_in_bucket[i] = 0;
159:   }
160:   for (i=0; i<m; i++) {
161:     nz = nz_in_row[i];
162:     rows_in_bucket[nz]++;
163:   }

165:   /* Allocate space for the grouping info.  There will be at most (maxnz + 1) 
166:    * groups.  (It is maxnz + 1 instead of simply maxnz because there may be 
167:    * rows with no nonzero elements.)  If there are (maxnz + 1) groups, 
168:    * then xgroup[] must consist of (maxnz + 2) elements, since the last 
169:    * element of xgroup will tell us where the (maxnz + 1)th group ends.
170:    * We allocate space for the maximum number of groups; 
171:    * that is potentially a little wasteful, but not too much so.  
172:    * Perhaps I should fix it later. */
173:   PetscMalloc((maxnz+2)*sizeof(PetscInt), &csrperm->xgroup);
174:   PetscMalloc((maxnz+1)*sizeof(PetscInt), &csrperm->nzgroup);

176:   /* Second pass.  Look at what is in the buckets and create the groupings.
177:    * Note that it is OK to have a group of rows with no non-zero values. */
178:   ngroup = 0;
179:   istart = 0;
180:   for (i=0; i<=maxnz; i++) {
181:     if (rows_in_bucket[i] > 0) {
182:       csrperm->nzgroup[ngroup] = i;
183:       csrperm->xgroup[ngroup] = istart;
184:       ngroup++;
185:       istart += rows_in_bucket[i];
186:     }
187:   }

189:   csrperm->xgroup[ngroup] = istart;
190:   csrperm->ngroup = ngroup;

192:   /* Now fill in the permutation vector iperm. */
193:   ipnz[0] = 0;
194:   for (i=0; i<maxnz; i++) {
195:     ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
196:   }

198:   for (i=0; i<m; i++) {
199:     nz = nz_in_row[i];
200:     ipos = ipnz[nz];
201:     csrperm->iperm[ipos] = i;
202:     ipnz[nz]++;
203:   }

205:   /* Clean up temporary work arrays. */
206:   PetscFree(rows_in_bucket);
207:   PetscFree(ipnz);
208:   PetscFree(nz_in_row);

210:   /* Since we've allocated some memory to hold permutation info, 
211:    * flip the CleanUpCSRPERM flag to true. */
212:   csrperm->CleanUpCSRPERM = PETSC_TRUE;
213:   return(0);
214: }


219: PetscErrorCode MatAssemblyEnd_SeqCSRPERM(Mat A, MatAssemblyType mode)
220: {
222:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
223:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;

225:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
226: 
227:   /* Since a MATSEQCSRPERM matrix is really just a MATSEQAIJ with some 
228:    * extra information, call the AssemblyEnd routine for a MATSEQAIJ. 
229:    * I'm not sure if this is the best way to do this, but it avoids 
230:    * a lot of code duplication.
231:    * I also note that currently MATSEQCSRPERM doesn't know anything about 
232:    * the Mat_CompressedRow data structure that SeqAIJ now uses when there 
233:    * are many zero rows.  If the SeqAIJ assembly end routine decides to use 
234:    * this, this may break things.  (Don't know... haven't looked at it.) */
235:   a->inode.use = PETSC_FALSE;
236:   (*csrperm->AssemblyEnd_SeqAIJ)(A, mode);

238:   /* Now calculate the permutation and grouping information. */
239:   SeqCSRPERM_create_perm(A);
240:   return(0);
241: }

243:  #include src/inline/dot.h

247: PetscErrorCode MatMult_SeqCSRPERM(Mat A,Vec xx,Vec yy)
248: {
249:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
250:   PetscScalar    *x,*y,*aa;
252:   PetscInt       *aj,*ai;
253: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking))
254:   PetscInt       i,j;
255: #endif

257:   /* Variables that don't appear in MatMult_SeqAIJ. */
258:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
259:   PetscInt       *iperm;  /* Points to the permutation vector. */
260:   PetscInt       *xgroup;
261:     /* Denotes where groups of rows with same number of nonzeros 
262:      * begin and end in iperm. */
263:   PetscInt *nzgroup;
264:   PetscInt ngroup;
265:   PetscInt igroup;
266:   PetscInt jstart,jend;
267:     /* jstart is used in loops to denote the position in iperm where a 
268:      * group starts; jend denotes the position where it ends.
269:      * (jend + 1 is where the next group starts.) */
270:   PetscInt iold,nz;
271:   PetscInt istart,iend,isize;
272:   PetscInt ipos;
273:   PetscScalar yp[NDIM];
274:   PetscInt    ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */

276: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
277: #pragma disjoint(*x,*y,*aa)
278: #endif

281:   VecGetArray(xx,&x);
282:   VecGetArray(yy,&y);
283:   aj  = a->j;    /* aj[k] gives column index for element aa[k]. */
284:   aa  = a->a;  /* Nonzero elements stored row-by-row. */
285:   ai  = a->i;   /* ai[k] is the position in aa and aj where row k starts. */

287:   /* Get the info we need about the permutations and groupings. */
288:   iperm  = csrperm->iperm;
289:   ngroup = csrperm->ngroup;
290:   xgroup = csrperm->xgroup;
291:   nzgroup = csrperm->nzgroup;
292: 
293: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking)
294:   fortranmultcsrperm_(&m,x,ii,aj,aa,y);
295: #else

297:   for (igroup=0; igroup<ngroup; igroup++) {
298:     jstart = xgroup[igroup];
299:     jend   = xgroup[igroup+1] - 1;
300:     nz     = nzgroup[igroup];

302:     /* Handle the special cases where the number of nonzeros per row 
303:      * in the group is either 0 or 1. */
304:     if (nz == 0) {
305:       for (i=jstart; i<=jend; i++) {
306:         y[iperm[i]] = 0.0;
307:       }
308:     } else if (nz == 1) {
309:       for (i=jstart; i<=jend; i++) {
310:         iold = iperm[i];
311:         ipos = ai[iold];
312:         y[iold] = aa[ipos] * x[aj[ipos]];
313:       }
314:     } else {
315: 
316:       /* We work our way through the current group in chunks of NDIM rows 
317:        * at a time. */

319:       for (istart=jstart; istart<=jend; istart+=NDIM) {
320:         /* Figure out where the chunk of 'isize' rows ends in iperm.
321:          * 'isize may of course be less than NDIM for the last chunk. */
322:         iend = istart + (NDIM - 1);
323:         if (iend > jend) { iend = jend; }
324:         isize = iend - istart + 1;

326:         /* Initialize the yp[] array that will be used to hold part of 
327:          * the permuted results vector, and figure out where in aa each 
328:          * row of the chunk will begin. */
329:         for (i=0; i<isize; i++) {
330:           iold = iperm[istart + i];
331:             /* iold is a row number from the matrix A *before* reordering. */
332:           ip[i] = ai[iold];
333:             /* ip[i] tells us where the ith row of the chunk begins in aa. */
334:           yp[i] = (PetscScalar) 0.0;
335:         }

337:         /* If the number of zeros per row exceeds the number of rows in 
338:          * the chunk, we should vectorize along nz, that is, perform the 
339:          * mat-vec one row at a time as in the usual CSR case. */
340:         if (nz > isize) {
341: #if defined(PETSC_HAVE_CRAYC)
342: #pragma _CRI preferstream
343: #endif
344:           for (i=0; i<isize; i++) {
345: #if defined(PETSC_HAVE_CRAYC)
346: #pragma _CRI prefervector
347: #endif
348:             for (j=0; j<nz; j++) {
349:               ipos = ip[i] + j;
350:               yp[i] += aa[ipos] * x[aj[ipos]];
351:             }
352:           }
353:         } else {
354:         /* Otherwise, there are enough rows in the chunk to make it 
355:          * worthwhile to vectorize across the rows, that is, to do the 
356:          * matvec by operating with "columns" of the chunk. */
357:           for (j=0; j<nz; j++) {
358:             for(i=0; i<isize; i++) {
359:               ipos = ip[i] + j;
360:               yp[i] += aa[ipos] * x[aj[ipos]];
361:             }
362:           }
363:         }

365: #if defined(PETSC_HAVE_CRAYC)
366: #pragma _CRI ivdep
367: #endif
368:         /* Put results from yp[] into non-permuted result vector y. */
369:         for (i=0; i<isize; i++) {
370:           y[iperm[istart+i]] = yp[i];
371:         }
372:       } /* End processing chunk of isize rows of a group. */
373:     } /* End handling matvec for chunk with nz > 1. */
374:   } /* End loop over igroup. */
375: #endif
376:   PetscLogFlops(2*a->nz - A->rmap.n);
377:   VecRestoreArray(xx,&x);
378:   VecRestoreArray(yy,&y);
379:   return(0);
380: }


383: /* MatMultAdd_SeqCSRPERM() calculates yy = ww + A * xx.
384:  * Note that the names I used to designate the vectors differs from that 
385:  * used in MatMultAdd_SeqAIJ().  I did this to keep my notation consistent 
386:  * with the MatMult_SeqCSRPERM() routine, which is very similar to this one. */
387: /*
388:     I hate having virtually identical code for the mult and the multadd!!!
389: */
392: PetscErrorCode MatMultAdd_SeqCSRPERM(Mat A,Vec xx,Vec ww,Vec yy)
393: {
394:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
395:   PetscScalar    *x,*y,*w,*aa;
397:   PetscInt       *aj,*ai;
398: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
399:   PetscInt       i,j;
400: #endif

402:   /* Variables that don't appear in MatMultAdd_SeqAIJ. */
403:   Mat_SeqCSRPERM *csrperm;
404:   PetscInt *iperm;  /* Points to the permutation vector. */
405:   PetscInt *xgroup;
406:     /* Denotes where groups of rows with same number of nonzeros 
407:      * begin and end in iperm. */
408:   PetscInt *nzgroup;
409:   PetscInt ngroup;
410:   PetscInt igroup;
411:   PetscInt jstart,jend;
412:     /* jstart is used in loops to denote the position in iperm where a 
413:      * group starts; jend denotes the position where it ends.
414:      * (jend + 1 is where the next group starts.) */
415:   PetscInt iold,nz;
416:   PetscInt istart,iend,isize;
417:   PetscInt ipos;
418:   PetscScalar yp[NDIM];
419:   PetscInt ip[NDIM];
420:     /* yp[] and ip[] are treated as vector "registers" for performing 
421:      * the mat-vec. */

423: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
424: #pragma disjoint(*x,*y,*aa)
425: #endif

428:   VecGetArray(xx,&x);
429:   VecGetArray(yy,&y);
430:   if (yy != ww) {
431:     VecGetArray(ww,&w);
432:   } else {
433:     w = y;
434:   }

436:   aj  = a->j;  /* aj[k] gives column index for element aa[k]. */
437:   aa  = a->a;  /* Nonzero elements stored row-by-row. */
438:   ai  = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

440:   /* Get the info we need about the permutations and groupings. */
441:   csrperm = (Mat_SeqCSRPERM *) A->spptr;
442:   iperm = csrperm->iperm;
443:   ngroup = csrperm->ngroup;
444:   xgroup = csrperm->xgroup;
445:   nzgroup = csrperm->nzgroup;
446: 
447: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
448:   fortranmultaddcsrperm_(&m,x,ii,aj,aa,y,w);
449: #else

451:   for(igroup=0; igroup<ngroup; igroup++) {
452:     jstart = xgroup[igroup];
453:     jend = xgroup[igroup+1] - 1;

455:     nz = nzgroup[igroup];

457:     /* Handle the special cases where the number of nonzeros per row 
458:      * in the group is either 0 or 1. */
459:     if(nz == 0) {
460:       for(i=jstart; i<=jend; i++) {
461:         iold = iperm[i];
462:         y[iold] = w[iold];
463:       }
464:     }
465:     else if(nz == 1) {
466:       for(i=jstart; i<=jend; i++) {
467:         iold = iperm[i];
468:         ipos = ai[iold];
469:         y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
470:       }
471:     }
472:     /* For the general case: */
473:     else {
474: 
475:       /* We work our way through the current group in chunks of NDIM rows 
476:        * at a time. */

478:       for(istart=jstart; istart<=jend; istart+=NDIM) {
479:         /* Figure out where the chunk of 'isize' rows ends in iperm.
480:          * 'isize may of course be less than NDIM for the last chunk. */
481:         iend = istart + (NDIM - 1);
482:         if(iend > jend) { iend = jend; }
483:         isize = iend - istart + 1;

485:         /* Initialize the yp[] array that will be used to hold part of 
486:          * the permuted results vector, and figure out where in aa each 
487:          * row of the chunk will begin. */
488:         for(i=0; i<isize; i++) {
489:           iold = iperm[istart + i];
490:             /* iold is a row number from the matrix A *before* reordering. */
491:           ip[i] = ai[iold];
492:             /* ip[i] tells us where the ith row of the chunk begins in aa. */
493:           yp[i] = w[iold];
494:         }

496:         /* If the number of zeros per row exceeds the number of rows in 
497:          * the chunk, we should vectorize along nz, that is, perform the 
498:          * mat-vec one row at a time as in the usual CSR case. */
499:         if(nz > isize) {
500: #if defined(PETSC_HAVE_CRAYC)
501: #pragma _CRI preferstream
502: #endif
503:           for(i=0; i<isize; i++) {
504: #if defined(PETSC_HAVE_CRAYC)
505: #pragma _CRI prefervector
506: #endif
507:             for(j=0; j<nz; j++) {
508:               ipos = ip[i] + j;
509:               yp[i] += aa[ipos] * x[aj[ipos]];
510:             }
511:           }
512:         }
513:         /* Otherwise, there are enough rows in the chunk to make it 
514:          * worthwhile to vectorize across the rows, that is, to do the 
515:          * matvec by operating with "columns" of the chunk. */
516:         else {
517:           for(j=0; j<nz; j++) {
518:             for(i=0; i<isize; i++) {
519:               ipos = ip[i] + j;
520:               yp[i] += aa[ipos] * x[aj[ipos]];
521:             }
522:           }
523:         }

525: #if defined(PETSC_HAVE_CRAYC)
526: #pragma _CRI ivdep
527: #endif
528:         /* Put results from yp[] into non-permuted result vector y. */
529:         for(i=0; i<isize; i++) {
530:           y[iperm[istart+i]] = yp[i];
531:         }
532:       } /* End processing chunk of isize rows of a group. */
533: 
534:     } /* End handling matvec for chunk with nz > 1. */
535:   } /* End loop over igroup. */

537: #endif
538:   PetscLogFlops(2*a->nz - A->rmap.n);
539:   VecRestoreArray(xx,&x);
540:   VecRestoreArray(yy,&y);
541:   if (yy != ww) {
542:     VecRestoreArray(ww,&w);
543:   }
544:   return(0);
545: }


548: /* MatConvert_SeqAIJ_SeqCSRPERM converts a SeqAIJ matrix into a 
549:  * SeqCSRPERM matrix.  This routine is called by the MatCreate_SeqCSRPERM() 
550:  * routine, but can also be used to convert an assembled SeqAIJ matrix 
551:  * into a SeqCSRPERM one. */
555: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCSRPERM(Mat A,MatType type,MatReuse reuse,Mat *newmat)
556: {
557:   /* This routine is only called to convert to MATSEQCSRPERM
558:    * from MATSEQAIJ, so we can ignore 'MatType Type'. */
560:   Mat            B = *newmat;
561:   Mat_SeqCSRPERM *csrperm;

564:   if (reuse == MAT_INITIAL_MATRIX) {
565:     MatDuplicate(A,MAT_COPY_VALUES,&B);
566:   }

568:   PetscNew(Mat_SeqCSRPERM,&csrperm);
569:   B->spptr = (void *) csrperm;

571:   /* Save a pointer to the original SeqAIJ assembly end routine, because we 
572:    * will want to use it later in the CSRPERM assembly end routine. 
573:    * Also, save a pointer to the original SeqAIJ Destroy routine, because we 
574:    * will want to use it in the CSRPERM destroy routine. */
575:   csrperm->AssemblyEnd_SeqAIJ  = A->ops->assemblyend;
576:   csrperm->MatDestroy_SeqAIJ   = A->ops->destroy;
577:   csrperm->MatDuplicate_SeqAIJ = A->ops->duplicate;

579:   /* Set function pointers for methods that we inherit from AIJ but 
580:    * override. */
581:   B->ops->duplicate   = MatDuplicate_SeqCSRPERM;
582:   B->ops->assemblyend = MatAssemblyEnd_SeqCSRPERM;
583:   B->ops->destroy     = MatDestroy_SeqCSRPERM;
584:   B->ops->mult        = MatMult_SeqCSRPERM;
585:   B->ops->multadd     = MatMultAdd_SeqCSRPERM;

587:   /* If A has already been assembled, compute the permutation. */
588:   if (A->assembled == PETSC_TRUE) {
589:     SeqCSRPERM_create_perm(B);
590:   }
591:   PetscObjectChangeTypeName((PetscObject)B,MATSEQCSRPERM);
592:   *newmat = B;
593:   return(0);
594: }


600: /*@C
601:    MatCreateSeqCSRPERM - Creates a sparse matrix of type SEQCSRPERM.
602:    This type inherits from AIJ, but calculates some additional permutation 
603:    information that is used to allow better vectorization of some 
604:    operations.  At the cost of increased storage, the AIJ formatted 
605:    matrix can be copied to a format in which pieces of the matrix are 
606:    stored in ELLPACK format, allowing the vectorized matrix multiply 
607:    routine to use stride-1 memory accesses.  As with the AIJ type, it is 
608:    important to preallocate matrix storage in order to get good assembly 
609:    performance.
610:    
611:    Collective on MPI_Comm

613:    Input Parameters:
614: +  comm - MPI communicator, set to PETSC_COMM_SELF
615: .  m - number of rows
616: .  n - number of columns
617: .  nz - number of nonzeros per row (same for all rows)
618: -  nnz - array containing the number of nonzeros in the various rows 
619:          (possibly different for each row) or PETSC_NULL

621:    Output Parameter:
622: .  A - the matrix 

624:    Notes:
625:    If nnz is given then nz is ignored

627:    Level: intermediate

629: .keywords: matrix, cray, sparse, parallel

631: .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
632: @*/
633: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCSRPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
634: {

638:   MatCreate(comm,A);
639:   MatSetSizes(*A,m,n,m,n);
640:   MatSetType(*A,MATSEQCSRPERM);
641:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);
642:   return(0);
643: }


649: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCSRPERM(Mat A)
650: {

654:   /* Following the example of the SuperLU class, I change the type name 
655:    * before calling MatSetType() to force proper construction of SeqAIJ 
656:    * and MATSEQCSRPERM types. */
657:   PetscObjectChangeTypeName((PetscObject)A,MATSEQCSRPERM);
658:   MatSetType(A,MATSEQAIJ);
659:   MatConvert_SeqAIJ_SeqCSRPERM(A,MATSEQCSRPERM,MAT_REUSE_MATRIX,&A);
660:   return(0);
661: }