Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include src/mat/impls/maij/maij.h
 21:  #include src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   return(0);
129: }

131: /*MC
132:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
133:   multicomponent problems, interpolating or restricting each component the same way independently.
134:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

136:   Operations provided:
137: . MatMult
138: . MatMultTranspose
139: . MatMultAdd
140: . MatMultTransposeAdd

142:   Level: advanced

144: .seealso: MatCreateSeqDense
145: M*/

150: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
151: {
153:   Mat_MPIMAIJ    *b;

156:   PetscNew(Mat_MPIMAIJ,&b);
157:   A->data  = (void*)b;
158:   PetscMemzero(A->ops,sizeof(struct _MatOps));
159:   A->factor           = 0;
160:   A->mapping          = 0;

162:   b->AIJ  = 0;
163:   b->dof  = 0;
164:   b->OAIJ = 0;
165:   b->ctx  = 0;
166:   b->w    = 0;
167:   return(0);
168: }

171: /* --------------------------------------------------------------------------------------*/
174: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
175: {
176:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
177:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
178:   PetscScalar    *x,*y,*v,sum1, sum2;
180:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
181:   PetscInt       n,i,jrow,j;

184:   VecGetArray(xx,&x);
185:   VecGetArray(yy,&y);
186:   idx  = a->j;
187:   v    = a->a;
188:   ii   = a->i;

190:   for (i=0; i<m; i++) {
191:     jrow = ii[i];
192:     n    = ii[i+1] - jrow;
193:     sum1  = 0.0;
194:     sum2  = 0.0;
195:     for (j=0; j<n; j++) {
196:       sum1 += v[jrow]*x[2*idx[jrow]];
197:       sum2 += v[jrow]*x[2*idx[jrow]+1];
198:       jrow++;
199:      }
200:     y[2*i]   = sum1;
201:     y[2*i+1] = sum2;
202:   }

204:   PetscLogFlops(4*a->nz - 2*m);
205:   VecRestoreArray(xx,&x);
206:   VecRestoreArray(yy,&y);
207:   return(0);
208: }

212: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
213: {
214:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
215:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
216:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
218:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

221:   VecSet(yy,zero);
222:   VecGetArray(xx,&x);
223:   VecGetArray(yy,&y);
224: 
225:   for (i=0; i<m; i++) {
226:     idx    = a->j + a->i[i] ;
227:     v      = a->a + a->i[i] ;
228:     n      = a->i[i+1] - a->i[i];
229:     alpha1 = x[2*i];
230:     alpha2 = x[2*i+1];
231:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
232:   }
233:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
234:   VecRestoreArray(xx,&x);
235:   VecRestoreArray(yy,&y);
236:   return(0);
237: }

241: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
242: {
243:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
244:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
245:   PetscScalar    *x,*y,*v,sum1, sum2;
247:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
248:   PetscInt       n,i,jrow,j;

251:   if (yy != zz) {VecCopy(yy,zz);}
252:   VecGetArray(xx,&x);
253:   VecGetArray(zz,&y);
254:   idx  = a->j;
255:   v    = a->a;
256:   ii   = a->i;

258:   for (i=0; i<m; i++) {
259:     jrow = ii[i];
260:     n    = ii[i+1] - jrow;
261:     sum1  = 0.0;
262:     sum2  = 0.0;
263:     for (j=0; j<n; j++) {
264:       sum1 += v[jrow]*x[2*idx[jrow]];
265:       sum2 += v[jrow]*x[2*idx[jrow]+1];
266:       jrow++;
267:      }
268:     y[2*i]   += sum1;
269:     y[2*i+1] += sum2;
270:   }

272:   PetscLogFlops(4*a->nz - 2*m);
273:   VecRestoreArray(xx,&x);
274:   VecRestoreArray(zz,&y);
275:   return(0);
276: }
279: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
280: {
281:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
282:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
283:   PetscScalar    *x,*y,*v,alpha1,alpha2;
285:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

288:   if (yy != zz) {VecCopy(yy,zz);}
289:   VecGetArray(xx,&x);
290:   VecGetArray(zz,&y);
291: 
292:   for (i=0; i<m; i++) {
293:     idx   = a->j + a->i[i] ;
294:     v     = a->a + a->i[i] ;
295:     n     = a->i[i+1] - a->i[i];
296:     alpha1 = x[2*i];
297:     alpha2 = x[2*i+1];
298:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
299:   }
300:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
301:   VecRestoreArray(xx,&x);
302:   VecRestoreArray(zz,&y);
303:   return(0);
304: }
305: /* --------------------------------------------------------------------------------------*/
308: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309: {
310:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
311:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
312:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
314:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
315:   PetscInt       n,i,jrow,j;

318:   VecGetArray(xx,&x);
319:   VecGetArray(yy,&y);
320:   idx  = a->j;
321:   v    = a->a;
322:   ii   = a->i;

324:   for (i=0; i<m; i++) {
325:     jrow = ii[i];
326:     n    = ii[i+1] - jrow;
327:     sum1  = 0.0;
328:     sum2  = 0.0;
329:     sum3  = 0.0;
330:     for (j=0; j<n; j++) {
331:       sum1 += v[jrow]*x[3*idx[jrow]];
332:       sum2 += v[jrow]*x[3*idx[jrow]+1];
333:       sum3 += v[jrow]*x[3*idx[jrow]+2];
334:       jrow++;
335:      }
336:     y[3*i]   = sum1;
337:     y[3*i+1] = sum2;
338:     y[3*i+2] = sum3;
339:   }

341:   PetscLogFlops(6*a->nz - 3*m);
342:   VecRestoreArray(xx,&x);
343:   VecRestoreArray(yy,&y);
344:   return(0);
345: }

349: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
350: {
351:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
352:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
353:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
355:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

358:   VecSet(yy,zero);
359:   VecGetArray(xx,&x);
360:   VecGetArray(yy,&y);
361: 
362:   for (i=0; i<m; i++) {
363:     idx    = a->j + a->i[i];
364:     v      = a->a + a->i[i];
365:     n      = a->i[i+1] - a->i[i];
366:     alpha1 = x[3*i];
367:     alpha2 = x[3*i+1];
368:     alpha3 = x[3*i+2];
369:     while (n-->0) {
370:       y[3*(*idx)]   += alpha1*(*v);
371:       y[3*(*idx)+1] += alpha2*(*v);
372:       y[3*(*idx)+2] += alpha3*(*v);
373:       idx++; v++;
374:     }
375:   }
376:   PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
377:   VecRestoreArray(xx,&x);
378:   VecRestoreArray(yy,&y);
379:   return(0);
380: }

384: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385: {
386:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
387:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
388:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
390:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
391:   PetscInt       n,i,jrow,j;

394:   if (yy != zz) {VecCopy(yy,zz);}
395:   VecGetArray(xx,&x);
396:   VecGetArray(zz,&y);
397:   idx  = a->j;
398:   v    = a->a;
399:   ii   = a->i;

401:   for (i=0; i<m; i++) {
402:     jrow = ii[i];
403:     n    = ii[i+1] - jrow;
404:     sum1  = 0.0;
405:     sum2  = 0.0;
406:     sum3  = 0.0;
407:     for (j=0; j<n; j++) {
408:       sum1 += v[jrow]*x[3*idx[jrow]];
409:       sum2 += v[jrow]*x[3*idx[jrow]+1];
410:       sum3 += v[jrow]*x[3*idx[jrow]+2];
411:       jrow++;
412:      }
413:     y[3*i]   += sum1;
414:     y[3*i+1] += sum2;
415:     y[3*i+2] += sum3;
416:   }

418:   PetscLogFlops(6*a->nz);
419:   VecRestoreArray(xx,&x);
420:   VecRestoreArray(zz,&y);
421:   return(0);
422: }
425: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
426: {
427:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
428:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
429:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
431:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

434:   if (yy != zz) {VecCopy(yy,zz);}
435:   VecGetArray(xx,&x);
436:   VecGetArray(zz,&y);
437:   for (i=0; i<m; i++) {
438:     idx    = a->j + a->i[i] ;
439:     v      = a->a + a->i[i] ;
440:     n      = a->i[i+1] - a->i[i];
441:     alpha1 = x[3*i];
442:     alpha2 = x[3*i+1];
443:     alpha3 = x[3*i+2];
444:     while (n-->0) {
445:       y[3*(*idx)]   += alpha1*(*v);
446:       y[3*(*idx)+1] += alpha2*(*v);
447:       y[3*(*idx)+2] += alpha3*(*v);
448:       idx++; v++;
449:     }
450:   }
451:   PetscLogFlops(6*a->nz);
452:   VecRestoreArray(xx,&x);
453:   VecRestoreArray(zz,&y);
454:   return(0);
455: }

457: /* ------------------------------------------------------------------------------*/
460: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
461: {
462:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
463:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
464:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
466:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
467:   PetscInt       n,i,jrow,j;

470:   VecGetArray(xx,&x);
471:   VecGetArray(yy,&y);
472:   idx  = a->j;
473:   v    = a->a;
474:   ii   = a->i;

476:   for (i=0; i<m; i++) {
477:     jrow = ii[i];
478:     n    = ii[i+1] - jrow;
479:     sum1  = 0.0;
480:     sum2  = 0.0;
481:     sum3  = 0.0;
482:     sum4  = 0.0;
483:     for (j=0; j<n; j++) {
484:       sum1 += v[jrow]*x[4*idx[jrow]];
485:       sum2 += v[jrow]*x[4*idx[jrow]+1];
486:       sum3 += v[jrow]*x[4*idx[jrow]+2];
487:       sum4 += v[jrow]*x[4*idx[jrow]+3];
488:       jrow++;
489:      }
490:     y[4*i]   = sum1;
491:     y[4*i+1] = sum2;
492:     y[4*i+2] = sum3;
493:     y[4*i+3] = sum4;
494:   }

496:   PetscLogFlops(8*a->nz - 4*m);
497:   VecRestoreArray(xx,&x);
498:   VecRestoreArray(yy,&y);
499:   return(0);
500: }

504: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
505: {
506:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
507:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
508:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
510:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

513:   VecSet(yy,zero);
514:   VecGetArray(xx,&x);
515:   VecGetArray(yy,&y);
516:   for (i=0; i<m; i++) {
517:     idx    = a->j + a->i[i] ;
518:     v      = a->a + a->i[i] ;
519:     n      = a->i[i+1] - a->i[i];
520:     alpha1 = x[4*i];
521:     alpha2 = x[4*i+1];
522:     alpha3 = x[4*i+2];
523:     alpha4 = x[4*i+3];
524:     while (n-->0) {
525:       y[4*(*idx)]   += alpha1*(*v);
526:       y[4*(*idx)+1] += alpha2*(*v);
527:       y[4*(*idx)+2] += alpha3*(*v);
528:       y[4*(*idx)+3] += alpha4*(*v);
529:       idx++; v++;
530:     }
531:   }
532:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
533:   VecRestoreArray(xx,&x);
534:   VecRestoreArray(yy,&y);
535:   return(0);
536: }

540: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
541: {
542:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
543:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
544:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
546:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
547:   PetscInt       n,i,jrow,j;

550:   if (yy != zz) {VecCopy(yy,zz);}
551:   VecGetArray(xx,&x);
552:   VecGetArray(zz,&y);
553:   idx  = a->j;
554:   v    = a->a;
555:   ii   = a->i;

557:   for (i=0; i<m; i++) {
558:     jrow = ii[i];
559:     n    = ii[i+1] - jrow;
560:     sum1  = 0.0;
561:     sum2  = 0.0;
562:     sum3  = 0.0;
563:     sum4  = 0.0;
564:     for (j=0; j<n; j++) {
565:       sum1 += v[jrow]*x[4*idx[jrow]];
566:       sum2 += v[jrow]*x[4*idx[jrow]+1];
567:       sum3 += v[jrow]*x[4*idx[jrow]+2];
568:       sum4 += v[jrow]*x[4*idx[jrow]+3];
569:       jrow++;
570:      }
571:     y[4*i]   += sum1;
572:     y[4*i+1] += sum2;
573:     y[4*i+2] += sum3;
574:     y[4*i+3] += sum4;
575:   }

577:   PetscLogFlops(8*a->nz - 4*m);
578:   VecRestoreArray(xx,&x);
579:   VecRestoreArray(zz,&y);
580:   return(0);
581: }
584: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
585: {
586:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
587:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
588:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
590:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

593:   if (yy != zz) {VecCopy(yy,zz);}
594:   VecGetArray(xx,&x);
595:   VecGetArray(zz,&y);
596: 
597:   for (i=0; i<m; i++) {
598:     idx    = a->j + a->i[i] ;
599:     v      = a->a + a->i[i] ;
600:     n      = a->i[i+1] - a->i[i];
601:     alpha1 = x[4*i];
602:     alpha2 = x[4*i+1];
603:     alpha3 = x[4*i+2];
604:     alpha4 = x[4*i+3];
605:     while (n-->0) {
606:       y[4*(*idx)]   += alpha1*(*v);
607:       y[4*(*idx)+1] += alpha2*(*v);
608:       y[4*(*idx)+2] += alpha3*(*v);
609:       y[4*(*idx)+3] += alpha4*(*v);
610:       idx++; v++;
611:     }
612:   }
613:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
614:   VecRestoreArray(xx,&x);
615:   VecRestoreArray(zz,&y);
616:   return(0);
617: }
618: /* ------------------------------------------------------------------------------*/

622: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
623: {
624:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
625:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
626:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
628:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
629:   PetscInt       n,i,jrow,j;

632:   VecGetArray(xx,&x);
633:   VecGetArray(yy,&y);
634:   idx  = a->j;
635:   v    = a->a;
636:   ii   = a->i;

638:   for (i=0; i<m; i++) {
639:     jrow = ii[i];
640:     n    = ii[i+1] - jrow;
641:     sum1  = 0.0;
642:     sum2  = 0.0;
643:     sum3  = 0.0;
644:     sum4  = 0.0;
645:     sum5  = 0.0;
646:     for (j=0; j<n; j++) {
647:       sum1 += v[jrow]*x[5*idx[jrow]];
648:       sum2 += v[jrow]*x[5*idx[jrow]+1];
649:       sum3 += v[jrow]*x[5*idx[jrow]+2];
650:       sum4 += v[jrow]*x[5*idx[jrow]+3];
651:       sum5 += v[jrow]*x[5*idx[jrow]+4];
652:       jrow++;
653:      }
654:     y[5*i]   = sum1;
655:     y[5*i+1] = sum2;
656:     y[5*i+2] = sum3;
657:     y[5*i+3] = sum4;
658:     y[5*i+4] = sum5;
659:   }

661:   PetscLogFlops(10*a->nz - 5*m);
662:   VecRestoreArray(xx,&x);
663:   VecRestoreArray(yy,&y);
664:   return(0);
665: }

669: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
670: {
671:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
672:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
673:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
675:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

678:   VecSet(yy,zero);
679:   VecGetArray(xx,&x);
680:   VecGetArray(yy,&y);
681: 
682:   for (i=0; i<m; i++) {
683:     idx    = a->j + a->i[i] ;
684:     v      = a->a + a->i[i] ;
685:     n      = a->i[i+1] - a->i[i];
686:     alpha1 = x[5*i];
687:     alpha2 = x[5*i+1];
688:     alpha3 = x[5*i+2];
689:     alpha4 = x[5*i+3];
690:     alpha5 = x[5*i+4];
691:     while (n-->0) {
692:       y[5*(*idx)]   += alpha1*(*v);
693:       y[5*(*idx)+1] += alpha2*(*v);
694:       y[5*(*idx)+2] += alpha3*(*v);
695:       y[5*(*idx)+3] += alpha4*(*v);
696:       y[5*(*idx)+4] += alpha5*(*v);
697:       idx++; v++;
698:     }
699:   }
700:   PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
701:   VecRestoreArray(xx,&x);
702:   VecRestoreArray(yy,&y);
703:   return(0);
704: }

708: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
709: {
710:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
711:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
712:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
714:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
715:   PetscInt       n,i,jrow,j;

718:   if (yy != zz) {VecCopy(yy,zz);}
719:   VecGetArray(xx,&x);
720:   VecGetArray(zz,&y);
721:   idx  = a->j;
722:   v    = a->a;
723:   ii   = a->i;

725:   for (i=0; i<m; i++) {
726:     jrow = ii[i];
727:     n    = ii[i+1] - jrow;
728:     sum1  = 0.0;
729:     sum2  = 0.0;
730:     sum3  = 0.0;
731:     sum4  = 0.0;
732:     sum5  = 0.0;
733:     for (j=0; j<n; j++) {
734:       sum1 += v[jrow]*x[5*idx[jrow]];
735:       sum2 += v[jrow]*x[5*idx[jrow]+1];
736:       sum3 += v[jrow]*x[5*idx[jrow]+2];
737:       sum4 += v[jrow]*x[5*idx[jrow]+3];
738:       sum5 += v[jrow]*x[5*idx[jrow]+4];
739:       jrow++;
740:      }
741:     y[5*i]   += sum1;
742:     y[5*i+1] += sum2;
743:     y[5*i+2] += sum3;
744:     y[5*i+3] += sum4;
745:     y[5*i+4] += sum5;
746:   }

748:   PetscLogFlops(10*a->nz);
749:   VecRestoreArray(xx,&x);
750:   VecRestoreArray(zz,&y);
751:   return(0);
752: }

756: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
757: {
758:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
759:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
760:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
762:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

765:   if (yy != zz) {VecCopy(yy,zz);}
766:   VecGetArray(xx,&x);
767:   VecGetArray(zz,&y);
768: 
769:   for (i=0; i<m; i++) {
770:     idx    = a->j + a->i[i] ;
771:     v      = a->a + a->i[i] ;
772:     n      = a->i[i+1] - a->i[i];
773:     alpha1 = x[5*i];
774:     alpha2 = x[5*i+1];
775:     alpha3 = x[5*i+2];
776:     alpha4 = x[5*i+3];
777:     alpha5 = x[5*i+4];
778:     while (n-->0) {
779:       y[5*(*idx)]   += alpha1*(*v);
780:       y[5*(*idx)+1] += alpha2*(*v);
781:       y[5*(*idx)+2] += alpha3*(*v);
782:       y[5*(*idx)+3] += alpha4*(*v);
783:       y[5*(*idx)+4] += alpha5*(*v);
784:       idx++; v++;
785:     }
786:   }
787:   PetscLogFlops(10*a->nz);
788:   VecRestoreArray(xx,&x);
789:   VecRestoreArray(zz,&y);
790:   return(0);
791: }

793: /* ------------------------------------------------------------------------------*/
796: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
797: {
798:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
799:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
800:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
802:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
803:   PetscInt       n,i,jrow,j;

806:   VecGetArray(xx,&x);
807:   VecGetArray(yy,&y);
808:   idx  = a->j;
809:   v    = a->a;
810:   ii   = a->i;

812:   for (i=0; i<m; i++) {
813:     jrow = ii[i];
814:     n    = ii[i+1] - jrow;
815:     sum1  = 0.0;
816:     sum2  = 0.0;
817:     sum3  = 0.0;
818:     sum4  = 0.0;
819:     sum5  = 0.0;
820:     sum6  = 0.0;
821:     for (j=0; j<n; j++) {
822:       sum1 += v[jrow]*x[6*idx[jrow]];
823:       sum2 += v[jrow]*x[6*idx[jrow]+1];
824:       sum3 += v[jrow]*x[6*idx[jrow]+2];
825:       sum4 += v[jrow]*x[6*idx[jrow]+3];
826:       sum5 += v[jrow]*x[6*idx[jrow]+4];
827:       sum6 += v[jrow]*x[6*idx[jrow]+5];
828:       jrow++;
829:      }
830:     y[6*i]   = sum1;
831:     y[6*i+1] = sum2;
832:     y[6*i+2] = sum3;
833:     y[6*i+3] = sum4;
834:     y[6*i+4] = sum5;
835:     y[6*i+5] = sum6;
836:   }

838:   PetscLogFlops(12*a->nz - 6*m);
839:   VecRestoreArray(xx,&x);
840:   VecRestoreArray(yy,&y);
841:   return(0);
842: }

846: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
847: {
848:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
849:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
850:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
852:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

855:   VecSet(yy,zero);
856:   VecGetArray(xx,&x);
857:   VecGetArray(yy,&y);

859:   for (i=0; i<m; i++) {
860:     idx    = a->j + a->i[i] ;
861:     v      = a->a + a->i[i] ;
862:     n      = a->i[i+1] - a->i[i];
863:     alpha1 = x[6*i];
864:     alpha2 = x[6*i+1];
865:     alpha3 = x[6*i+2];
866:     alpha4 = x[6*i+3];
867:     alpha5 = x[6*i+4];
868:     alpha6 = x[6*i+5];
869:     while (n-->0) {
870:       y[6*(*idx)]   += alpha1*(*v);
871:       y[6*(*idx)+1] += alpha2*(*v);
872:       y[6*(*idx)+2] += alpha3*(*v);
873:       y[6*(*idx)+3] += alpha4*(*v);
874:       y[6*(*idx)+4] += alpha5*(*v);
875:       y[6*(*idx)+5] += alpha6*(*v);
876:       idx++; v++;
877:     }
878:   }
879:   PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
880:   VecRestoreArray(xx,&x);
881:   VecRestoreArray(yy,&y);
882:   return(0);
883: }

887: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
888: {
889:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
890:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
891:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
893:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
894:   PetscInt       n,i,jrow,j;

897:   if (yy != zz) {VecCopy(yy,zz);}
898:   VecGetArray(xx,&x);
899:   VecGetArray(zz,&y);
900:   idx  = a->j;
901:   v    = a->a;
902:   ii   = a->i;

904:   for (i=0; i<m; i++) {
905:     jrow = ii[i];
906:     n    = ii[i+1] - jrow;
907:     sum1  = 0.0;
908:     sum2  = 0.0;
909:     sum3  = 0.0;
910:     sum4  = 0.0;
911:     sum5  = 0.0;
912:     sum6  = 0.0;
913:     for (j=0; j<n; j++) {
914:       sum1 += v[jrow]*x[6*idx[jrow]];
915:       sum2 += v[jrow]*x[6*idx[jrow]+1];
916:       sum3 += v[jrow]*x[6*idx[jrow]+2];
917:       sum4 += v[jrow]*x[6*idx[jrow]+3];
918:       sum5 += v[jrow]*x[6*idx[jrow]+4];
919:       sum6 += v[jrow]*x[6*idx[jrow]+5];
920:       jrow++;
921:      }
922:     y[6*i]   += sum1;
923:     y[6*i+1] += sum2;
924:     y[6*i+2] += sum3;
925:     y[6*i+3] += sum4;
926:     y[6*i+4] += sum5;
927:     y[6*i+5] += sum6;
928:   }

930:   PetscLogFlops(12*a->nz);
931:   VecRestoreArray(xx,&x);
932:   VecRestoreArray(zz,&y);
933:   return(0);
934: }

938: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
939: {
940:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
941:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
942:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
944:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

947:   if (yy != zz) {VecCopy(yy,zz);}
948:   VecGetArray(xx,&x);
949:   VecGetArray(zz,&y);
950: 
951:   for (i=0; i<m; i++) {
952:     idx    = a->j + a->i[i] ;
953:     v      = a->a + a->i[i] ;
954:     n      = a->i[i+1] - a->i[i];
955:     alpha1 = x[6*i];
956:     alpha2 = x[6*i+1];
957:     alpha3 = x[6*i+2];
958:     alpha4 = x[6*i+3];
959:     alpha5 = x[6*i+4];
960:     alpha6 = x[6*i+5];
961:     while (n-->0) {
962:       y[6*(*idx)]   += alpha1*(*v);
963:       y[6*(*idx)+1] += alpha2*(*v);
964:       y[6*(*idx)+2] += alpha3*(*v);
965:       y[6*(*idx)+3] += alpha4*(*v);
966:       y[6*(*idx)+4] += alpha5*(*v);
967:       y[6*(*idx)+5] += alpha6*(*v);
968:       idx++; v++;
969:     }
970:   }
971:   PetscLogFlops(12*a->nz);
972:   VecRestoreArray(xx,&x);
973:   VecRestoreArray(zz,&y);
974:   return(0);
975: }

977: /* ------------------------------------------------------------------------------*/
980: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
981: {
982:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
983:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
984:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
986:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
987:   PetscInt       n,i,jrow,j;

990:   VecGetArray(xx,&x);
991:   VecGetArray(yy,&y);
992:   idx  = a->j;
993:   v    = a->a;
994:   ii   = a->i;

996:   for (i=0; i<m; i++) {
997:     jrow = ii[i];
998:     n    = ii[i+1] - jrow;
999:     sum1  = 0.0;
1000:     sum2  = 0.0;
1001:     sum3  = 0.0;
1002:     sum4  = 0.0;
1003:     sum5  = 0.0;
1004:     sum6  = 0.0;
1005:     sum7  = 0.0;
1006:     for (j=0; j<n; j++) {
1007:       sum1 += v[jrow]*x[7*idx[jrow]];
1008:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1009:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1010:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1011:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1012:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1013:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1014:       jrow++;
1015:      }
1016:     y[7*i]   = sum1;
1017:     y[7*i+1] = sum2;
1018:     y[7*i+2] = sum3;
1019:     y[7*i+3] = sum4;
1020:     y[7*i+4] = sum5;
1021:     y[7*i+5] = sum6;
1022:     y[7*i+6] = sum7;
1023:   }

1025:   PetscLogFlops(14*a->nz - 7*m);
1026:   VecRestoreArray(xx,&x);
1027:   VecRestoreArray(yy,&y);
1028:   return(0);
1029: }

1033: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1034: {
1035:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1036:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1037:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1039:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1042:   VecSet(yy,zero);
1043:   VecGetArray(xx,&x);
1044:   VecGetArray(yy,&y);

1046:   for (i=0; i<m; i++) {
1047:     idx    = a->j + a->i[i] ;
1048:     v      = a->a + a->i[i] ;
1049:     n      = a->i[i+1] - a->i[i];
1050:     alpha1 = x[7*i];
1051:     alpha2 = x[7*i+1];
1052:     alpha3 = x[7*i+2];
1053:     alpha4 = x[7*i+3];
1054:     alpha5 = x[7*i+4];
1055:     alpha6 = x[7*i+5];
1056:     alpha7 = x[7*i+6];
1057:     while (n-->0) {
1058:       y[7*(*idx)]   += alpha1*(*v);
1059:       y[7*(*idx)+1] += alpha2*(*v);
1060:       y[7*(*idx)+2] += alpha3*(*v);
1061:       y[7*(*idx)+3] += alpha4*(*v);
1062:       y[7*(*idx)+4] += alpha5*(*v);
1063:       y[7*(*idx)+5] += alpha6*(*v);
1064:       y[7*(*idx)+6] += alpha7*(*v);
1065:       idx++; v++;
1066:     }
1067:   }
1068:   PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1069:   VecRestoreArray(xx,&x);
1070:   VecRestoreArray(yy,&y);
1071:   return(0);
1072: }

1076: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1077: {
1078:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1079:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1080:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1082:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1083:   PetscInt       n,i,jrow,j;

1086:   if (yy != zz) {VecCopy(yy,zz);}
1087:   VecGetArray(xx,&x);
1088:   VecGetArray(zz,&y);
1089:   idx  = a->j;
1090:   v    = a->a;
1091:   ii   = a->i;

1093:   for (i=0; i<m; i++) {
1094:     jrow = ii[i];
1095:     n    = ii[i+1] - jrow;
1096:     sum1  = 0.0;
1097:     sum2  = 0.0;
1098:     sum3  = 0.0;
1099:     sum4  = 0.0;
1100:     sum5  = 0.0;
1101:     sum6  = 0.0;
1102:     sum7  = 0.0;
1103:     for (j=0; j<n; j++) {
1104:       sum1 += v[jrow]*x[7*idx[jrow]];
1105:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1106:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1107:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1108:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1109:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1110:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1111:       jrow++;
1112:      }
1113:     y[7*i]   += sum1;
1114:     y[7*i+1] += sum2;
1115:     y[7*i+2] += sum3;
1116:     y[7*i+3] += sum4;
1117:     y[7*i+4] += sum5;
1118:     y[7*i+5] += sum6;
1119:     y[7*i+6] += sum7;
1120:   }

1122:   PetscLogFlops(14*a->nz);
1123:   VecRestoreArray(xx,&x);
1124:   VecRestoreArray(zz,&y);
1125:   return(0);
1126: }

1130: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1131: {
1132:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1133:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1134:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1136:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1139:   if (yy != zz) {VecCopy(yy,zz);}
1140:   VecGetArray(xx,&x);
1141:   VecGetArray(zz,&y);
1142:   for (i=0; i<m; i++) {
1143:     idx    = a->j + a->i[i] ;
1144:     v      = a->a + a->i[i] ;
1145:     n      = a->i[i+1] - a->i[i];
1146:     alpha1 = x[7*i];
1147:     alpha2 = x[7*i+1];
1148:     alpha3 = x[7*i+2];
1149:     alpha4 = x[7*i+3];
1150:     alpha5 = x[7*i+4];
1151:     alpha6 = x[7*i+5];
1152:     alpha7 = x[7*i+6];
1153:     while (n-->0) {
1154:       y[7*(*idx)]   += alpha1*(*v);
1155:       y[7*(*idx)+1] += alpha2*(*v);
1156:       y[7*(*idx)+2] += alpha3*(*v);
1157:       y[7*(*idx)+3] += alpha4*(*v);
1158:       y[7*(*idx)+4] += alpha5*(*v);
1159:       y[7*(*idx)+5] += alpha6*(*v);
1160:       y[7*(*idx)+6] += alpha7*(*v);
1161:       idx++; v++;
1162:     }
1163:   }
1164:   PetscLogFlops(14*a->nz);
1165:   VecRestoreArray(xx,&x);
1166:   VecRestoreArray(zz,&y);
1167:   return(0);
1168: }

1172: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1173: {
1174:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1175:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1176:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1178:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1179:   PetscInt       n,i,jrow,j;

1182:   VecGetArray(xx,&x);
1183:   VecGetArray(yy,&y);
1184:   idx  = a->j;
1185:   v    = a->a;
1186:   ii   = a->i;

1188:   for (i=0; i<m; i++) {
1189:     jrow = ii[i];
1190:     n    = ii[i+1] - jrow;
1191:     sum1  = 0.0;
1192:     sum2  = 0.0;
1193:     sum3  = 0.0;
1194:     sum4  = 0.0;
1195:     sum5  = 0.0;
1196:     sum6  = 0.0;
1197:     sum7  = 0.0;
1198:     sum8  = 0.0;
1199:     for (j=0; j<n; j++) {
1200:       sum1 += v[jrow]*x[8*idx[jrow]];
1201:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1202:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1203:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1204:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1205:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1206:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1207:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1208:       jrow++;
1209:      }
1210:     y[8*i]   = sum1;
1211:     y[8*i+1] = sum2;
1212:     y[8*i+2] = sum3;
1213:     y[8*i+3] = sum4;
1214:     y[8*i+4] = sum5;
1215:     y[8*i+5] = sum6;
1216:     y[8*i+6] = sum7;
1217:     y[8*i+7] = sum8;
1218:   }

1220:   PetscLogFlops(16*a->nz - 8*m);
1221:   VecRestoreArray(xx,&x);
1222:   VecRestoreArray(yy,&y);
1223:   return(0);
1224: }

1228: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1229: {
1230:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1231:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1232:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1234:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1237:   VecSet(yy,zero);
1238:   VecGetArray(xx,&x);
1239:   VecGetArray(yy,&y);

1241:   for (i=0; i<m; i++) {
1242:     idx    = a->j + a->i[i] ;
1243:     v      = a->a + a->i[i] ;
1244:     n      = a->i[i+1] - a->i[i];
1245:     alpha1 = x[8*i];
1246:     alpha2 = x[8*i+1];
1247:     alpha3 = x[8*i+2];
1248:     alpha4 = x[8*i+3];
1249:     alpha5 = x[8*i+4];
1250:     alpha6 = x[8*i+5];
1251:     alpha7 = x[8*i+6];
1252:     alpha8 = x[8*i+7];
1253:     while (n-->0) {
1254:       y[8*(*idx)]   += alpha1*(*v);
1255:       y[8*(*idx)+1] += alpha2*(*v);
1256:       y[8*(*idx)+2] += alpha3*(*v);
1257:       y[8*(*idx)+3] += alpha4*(*v);
1258:       y[8*(*idx)+4] += alpha5*(*v);
1259:       y[8*(*idx)+5] += alpha6*(*v);
1260:       y[8*(*idx)+6] += alpha7*(*v);
1261:       y[8*(*idx)+7] += alpha8*(*v);
1262:       idx++; v++;
1263:     }
1264:   }
1265:   PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1266:   VecRestoreArray(xx,&x);
1267:   VecRestoreArray(yy,&y);
1268:   return(0);
1269: }

1273: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1274: {
1275:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1276:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1277:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1279:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1280:   PetscInt       n,i,jrow,j;

1283:   if (yy != zz) {VecCopy(yy,zz);}
1284:   VecGetArray(xx,&x);
1285:   VecGetArray(zz,&y);
1286:   idx  = a->j;
1287:   v    = a->a;
1288:   ii   = a->i;

1290:   for (i=0; i<m; i++) {
1291:     jrow = ii[i];
1292:     n    = ii[i+1] - jrow;
1293:     sum1  = 0.0;
1294:     sum2  = 0.0;
1295:     sum3  = 0.0;
1296:     sum4  = 0.0;
1297:     sum5  = 0.0;
1298:     sum6  = 0.0;
1299:     sum7  = 0.0;
1300:     sum8  = 0.0;
1301:     for (j=0; j<n; j++) {
1302:       sum1 += v[jrow]*x[8*idx[jrow]];
1303:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1304:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1305:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1306:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1307:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1308:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1309:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1310:       jrow++;
1311:      }
1312:     y[8*i]   += sum1;
1313:     y[8*i+1] += sum2;
1314:     y[8*i+2] += sum3;
1315:     y[8*i+3] += sum4;
1316:     y[8*i+4] += sum5;
1317:     y[8*i+5] += sum6;
1318:     y[8*i+6] += sum7;
1319:     y[8*i+7] += sum8;
1320:   }

1322:   PetscLogFlops(16*a->nz);
1323:   VecRestoreArray(xx,&x);
1324:   VecRestoreArray(zz,&y);
1325:   return(0);
1326: }

1330: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1331: {
1332:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1333:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1334:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1336:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1339:   if (yy != zz) {VecCopy(yy,zz);}
1340:   VecGetArray(xx,&x);
1341:   VecGetArray(zz,&y);
1342:   for (i=0; i<m; i++) {
1343:     idx    = a->j + a->i[i] ;
1344:     v      = a->a + a->i[i] ;
1345:     n      = a->i[i+1] - a->i[i];
1346:     alpha1 = x[8*i];
1347:     alpha2 = x[8*i+1];
1348:     alpha3 = x[8*i+2];
1349:     alpha4 = x[8*i+3];
1350:     alpha5 = x[8*i+4];
1351:     alpha6 = x[8*i+5];
1352:     alpha7 = x[8*i+6];
1353:     alpha8 = x[8*i+7];
1354:     while (n-->0) {
1355:       y[8*(*idx)]   += alpha1*(*v);
1356:       y[8*(*idx)+1] += alpha2*(*v);
1357:       y[8*(*idx)+2] += alpha3*(*v);
1358:       y[8*(*idx)+3] += alpha4*(*v);
1359:       y[8*(*idx)+4] += alpha5*(*v);
1360:       y[8*(*idx)+5] += alpha6*(*v);
1361:       y[8*(*idx)+6] += alpha7*(*v);
1362:       y[8*(*idx)+7] += alpha8*(*v);
1363:       idx++; v++;
1364:     }
1365:   }
1366:   PetscLogFlops(16*a->nz);
1367:   VecRestoreArray(xx,&x);
1368:   VecRestoreArray(zz,&y);
1369:   return(0);
1370: }

1372: /* ------------------------------------------------------------------------------*/
1375: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1376: {
1377:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1378:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1379:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1381:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1382:   PetscInt       n,i,jrow,j;

1385:   VecGetArray(xx,&x);
1386:   VecGetArray(yy,&y);
1387:   idx  = a->j;
1388:   v    = a->a;
1389:   ii   = a->i;

1391:   for (i=0; i<m; i++) {
1392:     jrow = ii[i];
1393:     n    = ii[i+1] - jrow;
1394:     sum1  = 0.0;
1395:     sum2  = 0.0;
1396:     sum3  = 0.0;
1397:     sum4  = 0.0;
1398:     sum5  = 0.0;
1399:     sum6  = 0.0;
1400:     sum7  = 0.0;
1401:     sum8  = 0.0;
1402:     sum9  = 0.0;
1403:     for (j=0; j<n; j++) {
1404:       sum1 += v[jrow]*x[9*idx[jrow]];
1405:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1406:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1407:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1408:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1409:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1410:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1411:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1412:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1413:       jrow++;
1414:      }
1415:     y[9*i]   = sum1;
1416:     y[9*i+1] = sum2;
1417:     y[9*i+2] = sum3;
1418:     y[9*i+3] = sum4;
1419:     y[9*i+4] = sum5;
1420:     y[9*i+5] = sum6;
1421:     y[9*i+6] = sum7;
1422:     y[9*i+7] = sum8;
1423:     y[9*i+8] = sum9;
1424:   }

1426:   PetscLogFlops(18*a->nz - 9*m);
1427:   VecRestoreArray(xx,&x);
1428:   VecRestoreArray(yy,&y);
1429:   return(0);
1430: }

1432: /* ------------------------------------------------------------------------------*/

1436: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1437: {
1438:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1439:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1440:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1442:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1445:   VecSet(yy,zero);
1446:   VecGetArray(xx,&x);
1447:   VecGetArray(yy,&y);

1449:   for (i=0; i<m; i++) {
1450:     idx    = a->j + a->i[i] ;
1451:     v      = a->a + a->i[i] ;
1452:     n      = a->i[i+1] - a->i[i];
1453:     alpha1 = x[9*i];
1454:     alpha2 = x[9*i+1];
1455:     alpha3 = x[9*i+2];
1456:     alpha4 = x[9*i+3];
1457:     alpha5 = x[9*i+4];
1458:     alpha6 = x[9*i+5];
1459:     alpha7 = x[9*i+6];
1460:     alpha8 = x[9*i+7];
1461:     alpha9 = x[9*i+8];
1462:     while (n-->0) {
1463:       y[9*(*idx)]   += alpha1*(*v);
1464:       y[9*(*idx)+1] += alpha2*(*v);
1465:       y[9*(*idx)+2] += alpha3*(*v);
1466:       y[9*(*idx)+3] += alpha4*(*v);
1467:       y[9*(*idx)+4] += alpha5*(*v);
1468:       y[9*(*idx)+5] += alpha6*(*v);
1469:       y[9*(*idx)+6] += alpha7*(*v);
1470:       y[9*(*idx)+7] += alpha8*(*v);
1471:       y[9*(*idx)+8] += alpha9*(*v);
1472:       idx++; v++;
1473:     }
1474:   }
1475:   PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1476:   VecRestoreArray(xx,&x);
1477:   VecRestoreArray(yy,&y);
1478:   return(0);
1479: }

1483: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1484: {
1485:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1486:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1487:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1489:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1490:   PetscInt       n,i,jrow,j;

1493:   if (yy != zz) {VecCopy(yy,zz);}
1494:   VecGetArray(xx,&x);
1495:   VecGetArray(zz,&y);
1496:   idx  = a->j;
1497:   v    = a->a;
1498:   ii   = a->i;

1500:   for (i=0; i<m; i++) {
1501:     jrow = ii[i];
1502:     n    = ii[i+1] - jrow;
1503:     sum1  = 0.0;
1504:     sum2  = 0.0;
1505:     sum3  = 0.0;
1506:     sum4  = 0.0;
1507:     sum5  = 0.0;
1508:     sum6  = 0.0;
1509:     sum7  = 0.0;
1510:     sum8  = 0.0;
1511:     sum9  = 0.0;
1512:     for (j=0; j<n; j++) {
1513:       sum1 += v[jrow]*x[9*idx[jrow]];
1514:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1515:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1516:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1517:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1518:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1519:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1520:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1521:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1522:       jrow++;
1523:      }
1524:     y[9*i]   += sum1;
1525:     y[9*i+1] += sum2;
1526:     y[9*i+2] += sum3;
1527:     y[9*i+3] += sum4;
1528:     y[9*i+4] += sum5;
1529:     y[9*i+5] += sum6;
1530:     y[9*i+6] += sum7;
1531:     y[9*i+7] += sum8;
1532:     y[9*i+8] += sum9;
1533:   }

1535:   PetscLogFlops(18*a->nz);
1536:   VecRestoreArray(xx,&x);
1537:   VecRestoreArray(zz,&y);
1538:   return(0);
1539: }

1543: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1544: {
1545:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1546:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1547:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1549:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1552:   if (yy != zz) {VecCopy(yy,zz);}
1553:   VecGetArray(xx,&x);
1554:   VecGetArray(zz,&y);
1555:   for (i=0; i<m; i++) {
1556:     idx    = a->j + a->i[i] ;
1557:     v      = a->a + a->i[i] ;
1558:     n      = a->i[i+1] - a->i[i];
1559:     alpha1 = x[9*i];
1560:     alpha2 = x[9*i+1];
1561:     alpha3 = x[9*i+2];
1562:     alpha4 = x[9*i+3];
1563:     alpha5 = x[9*i+4];
1564:     alpha6 = x[9*i+5];
1565:     alpha7 = x[9*i+6];
1566:     alpha8 = x[9*i+7];
1567:     alpha9 = x[9*i+8];
1568:     while (n-->0) {
1569:       y[9*(*idx)]   += alpha1*(*v);
1570:       y[9*(*idx)+1] += alpha2*(*v);
1571:       y[9*(*idx)+2] += alpha3*(*v);
1572:       y[9*(*idx)+3] += alpha4*(*v);
1573:       y[9*(*idx)+4] += alpha5*(*v);
1574:       y[9*(*idx)+5] += alpha6*(*v);
1575:       y[9*(*idx)+6] += alpha7*(*v);
1576:       y[9*(*idx)+7] += alpha8*(*v);
1577:       y[9*(*idx)+8] += alpha9*(*v);
1578:       idx++; v++;
1579:     }
1580:   }
1581:   PetscLogFlops(18*a->nz);
1582:   VecRestoreArray(xx,&x);
1583:   VecRestoreArray(zz,&y);
1584:   return(0);
1585: }
1586: /*--------------------------------------------------------------------------------------------*/
1589: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1590: {
1591:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1592:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1593:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1595:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1596:   PetscInt       n,i,jrow,j;

1599:   VecGetArray(xx,&x);
1600:   VecGetArray(yy,&y);
1601:   idx  = a->j;
1602:   v    = a->a;
1603:   ii   = a->i;

1605:   for (i=0; i<m; i++) {
1606:     jrow = ii[i];
1607:     n    = ii[i+1] - jrow;
1608:     sum1  = 0.0;
1609:     sum2  = 0.0;
1610:     sum3  = 0.0;
1611:     sum4  = 0.0;
1612:     sum5  = 0.0;
1613:     sum6  = 0.0;
1614:     sum7  = 0.0;
1615:     sum8  = 0.0;
1616:     sum9  = 0.0;
1617:     sum10 = 0.0;
1618:     for (j=0; j<n; j++) {
1619:       sum1  += v[jrow]*x[10*idx[jrow]];
1620:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1621:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1622:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1623:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1624:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1625:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1626:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1627:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1628:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1629:       jrow++;
1630:      }
1631:     y[10*i]   = sum1;
1632:     y[10*i+1] = sum2;
1633:     y[10*i+2] = sum3;
1634:     y[10*i+3] = sum4;
1635:     y[10*i+4] = sum5;
1636:     y[10*i+5] = sum6;
1637:     y[10*i+6] = sum7;
1638:     y[10*i+7] = sum8;
1639:     y[10*i+8] = sum9;
1640:     y[10*i+9] = sum10;
1641:   }

1643:   PetscLogFlops(20*a->nz - 10*m);
1644:   VecRestoreArray(xx,&x);
1645:   VecRestoreArray(yy,&y);
1646:   return(0);
1647: }

1651: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1652: {
1653:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1654:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1655:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1657:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1658:   PetscInt       n,i,jrow,j;

1661:   if (yy != zz) {VecCopy(yy,zz);}
1662:   VecGetArray(xx,&x);
1663:   VecGetArray(zz,&y);
1664:   idx  = a->j;
1665:   v    = a->a;
1666:   ii   = a->i;

1668:   for (i=0; i<m; i++) {
1669:     jrow = ii[i];
1670:     n    = ii[i+1] - jrow;
1671:     sum1  = 0.0;
1672:     sum2  = 0.0;
1673:     sum3  = 0.0;
1674:     sum4  = 0.0;
1675:     sum5  = 0.0;
1676:     sum6  = 0.0;
1677:     sum7  = 0.0;
1678:     sum8  = 0.0;
1679:     sum9  = 0.0;
1680:     sum10 = 0.0;
1681:     for (j=0; j<n; j++) {
1682:       sum1  += v[jrow]*x[10*idx[jrow]];
1683:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1684:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1685:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1686:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1687:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1688:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1689:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1690:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1691:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1692:       jrow++;
1693:      }
1694:     y[10*i]   += sum1;
1695:     y[10*i+1] += sum2;
1696:     y[10*i+2] += sum3;
1697:     y[10*i+3] += sum4;
1698:     y[10*i+4] += sum5;
1699:     y[10*i+5] += sum6;
1700:     y[10*i+6] += sum7;
1701:     y[10*i+7] += sum8;
1702:     y[10*i+8] += sum9;
1703:     y[10*i+9] += sum10;
1704:   }

1706:   PetscLogFlops(20*a->nz - 10*m);
1707:   VecRestoreArray(xx,&x);
1708:   VecRestoreArray(yy,&y);
1709:   return(0);
1710: }

1714: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1715: {
1716:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1717:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1718:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1720:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1723:   VecSet(yy,zero);
1724:   VecGetArray(xx,&x);
1725:   VecGetArray(yy,&y);

1727:   for (i=0; i<m; i++) {
1728:     idx    = a->j + a->i[i] ;
1729:     v      = a->a + a->i[i] ;
1730:     n      = a->i[i+1] - a->i[i];
1731:     alpha1 = x[10*i];
1732:     alpha2 = x[10*i+1];
1733:     alpha3 = x[10*i+2];
1734:     alpha4 = x[10*i+3];
1735:     alpha5 = x[10*i+4];
1736:     alpha6 = x[10*i+5];
1737:     alpha7 = x[10*i+6];
1738:     alpha8 = x[10*i+7];
1739:     alpha9 = x[10*i+8];
1740:     alpha10 = x[10*i+9];
1741:     while (n-->0) {
1742:       y[10*(*idx)]   += alpha1*(*v);
1743:       y[10*(*idx)+1] += alpha2*(*v);
1744:       y[10*(*idx)+2] += alpha3*(*v);
1745:       y[10*(*idx)+3] += alpha4*(*v);
1746:       y[10*(*idx)+4] += alpha5*(*v);
1747:       y[10*(*idx)+5] += alpha6*(*v);
1748:       y[10*(*idx)+6] += alpha7*(*v);
1749:       y[10*(*idx)+7] += alpha8*(*v);
1750:       y[10*(*idx)+8] += alpha9*(*v);
1751:       y[10*(*idx)+9] += alpha10*(*v);
1752:       idx++; v++;
1753:     }
1754:   }
1755:   PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1756:   VecRestoreArray(xx,&x);
1757:   VecRestoreArray(yy,&y);
1758:   return(0);
1759: }

1763: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1764: {
1765:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1766:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1767:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1769:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1772:   if (yy != zz) {VecCopy(yy,zz);}
1773:   VecGetArray(xx,&x);
1774:   VecGetArray(zz,&y);
1775:   for (i=0; i<m; i++) {
1776:     idx    = a->j + a->i[i] ;
1777:     v      = a->a + a->i[i] ;
1778:     n      = a->i[i+1] - a->i[i];
1779:     alpha1 = x[10*i];
1780:     alpha2 = x[10*i+1];
1781:     alpha3 = x[10*i+2];
1782:     alpha4 = x[10*i+3];
1783:     alpha5 = x[10*i+4];
1784:     alpha6 = x[10*i+5];
1785:     alpha7 = x[10*i+6];
1786:     alpha8 = x[10*i+7];
1787:     alpha9 = x[10*i+8];
1788:     alpha10 = x[10*i+9];
1789:     while (n-->0) {
1790:       y[10*(*idx)]   += alpha1*(*v);
1791:       y[10*(*idx)+1] += alpha2*(*v);
1792:       y[10*(*idx)+2] += alpha3*(*v);
1793:       y[10*(*idx)+3] += alpha4*(*v);
1794:       y[10*(*idx)+4] += alpha5*(*v);
1795:       y[10*(*idx)+5] += alpha6*(*v);
1796:       y[10*(*idx)+6] += alpha7*(*v);
1797:       y[10*(*idx)+7] += alpha8*(*v);
1798:       y[10*(*idx)+8] += alpha9*(*v);
1799:       y[10*(*idx)+9] += alpha10*(*v);
1800:       idx++; v++;
1801:     }
1802:   }
1803:   PetscLogFlops(20*a->nz);
1804:   VecRestoreArray(xx,&x);
1805:   VecRestoreArray(zz,&y);
1806:   return(0);
1807: }


1810: /*--------------------------------------------------------------------------------------------*/
1813: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1814: {
1815:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1816:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1817:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1818:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1820:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1821:   PetscInt       n,i,jrow,j;

1824:   VecGetArray(xx,&x);
1825:   VecGetArray(yy,&y);
1826:   idx  = a->j;
1827:   v    = a->a;
1828:   ii   = a->i;

1830:   for (i=0; i<m; i++) {
1831:     jrow = ii[i];
1832:     n    = ii[i+1] - jrow;
1833:     sum1  = 0.0;
1834:     sum2  = 0.0;
1835:     sum3  = 0.0;
1836:     sum4  = 0.0;
1837:     sum5  = 0.0;
1838:     sum6  = 0.0;
1839:     sum7  = 0.0;
1840:     sum8  = 0.0;
1841:     sum9  = 0.0;
1842:     sum10 = 0.0;
1843:     sum11 = 0.0;
1844:     sum12 = 0.0;
1845:     sum13 = 0.0;
1846:     sum14 = 0.0;
1847:     sum15 = 0.0;
1848:     sum16 = 0.0;
1849:     for (j=0; j<n; j++) {
1850:       sum1  += v[jrow]*x[16*idx[jrow]];
1851:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1852:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1853:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1854:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1855:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1856:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1857:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1858:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1859:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1860:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1861:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1862:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1863:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1864:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1865:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1866:       jrow++;
1867:      }
1868:     y[16*i]    = sum1;
1869:     y[16*i+1]  = sum2;
1870:     y[16*i+2]  = sum3;
1871:     y[16*i+3]  = sum4;
1872:     y[16*i+4]  = sum5;
1873:     y[16*i+5]  = sum6;
1874:     y[16*i+6]  = sum7;
1875:     y[16*i+7]  = sum8;
1876:     y[16*i+8]  = sum9;
1877:     y[16*i+9]  = sum10;
1878:     y[16*i+10] = sum11;
1879:     y[16*i+11] = sum12;
1880:     y[16*i+12] = sum13;
1881:     y[16*i+13] = sum14;
1882:     y[16*i+14] = sum15;
1883:     y[16*i+15] = sum16;
1884:   }

1886:   PetscLogFlops(32*a->nz - 16*m);
1887:   VecRestoreArray(xx,&x);
1888:   VecRestoreArray(yy,&y);
1889:   return(0);
1890: }

1894: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1895: {
1896:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1897:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1898:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1899:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1901:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1904:   VecSet(yy,zero);
1905:   VecGetArray(xx,&x);
1906:   VecGetArray(yy,&y);

1908:   for (i=0; i<m; i++) {
1909:     idx    = a->j + a->i[i] ;
1910:     v      = a->a + a->i[i] ;
1911:     n      = a->i[i+1] - a->i[i];
1912:     alpha1  = x[16*i];
1913:     alpha2  = x[16*i+1];
1914:     alpha3  = x[16*i+2];
1915:     alpha4  = x[16*i+3];
1916:     alpha5  = x[16*i+4];
1917:     alpha6  = x[16*i+5];
1918:     alpha7  = x[16*i+6];
1919:     alpha8  = x[16*i+7];
1920:     alpha9  = x[16*i+8];
1921:     alpha10 = x[16*i+9];
1922:     alpha11 = x[16*i+10];
1923:     alpha12 = x[16*i+11];
1924:     alpha13 = x[16*i+12];
1925:     alpha14 = x[16*i+13];
1926:     alpha15 = x[16*i+14];
1927:     alpha16 = x[16*i+15];
1928:     while (n-->0) {
1929:       y[16*(*idx)]    += alpha1*(*v);
1930:       y[16*(*idx)+1]  += alpha2*(*v);
1931:       y[16*(*idx)+2]  += alpha3*(*v);
1932:       y[16*(*idx)+3]  += alpha4*(*v);
1933:       y[16*(*idx)+4]  += alpha5*(*v);
1934:       y[16*(*idx)+5]  += alpha6*(*v);
1935:       y[16*(*idx)+6]  += alpha7*(*v);
1936:       y[16*(*idx)+7]  += alpha8*(*v);
1937:       y[16*(*idx)+8]  += alpha9*(*v);
1938:       y[16*(*idx)+9]  += alpha10*(*v);
1939:       y[16*(*idx)+10] += alpha11*(*v);
1940:       y[16*(*idx)+11] += alpha12*(*v);
1941:       y[16*(*idx)+12] += alpha13*(*v);
1942:       y[16*(*idx)+13] += alpha14*(*v);
1943:       y[16*(*idx)+14] += alpha15*(*v);
1944:       y[16*(*idx)+15] += alpha16*(*v);
1945:       idx++; v++;
1946:     }
1947:   }
1948:   PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);
1949:   VecRestoreArray(xx,&x);
1950:   VecRestoreArray(yy,&y);
1951:   return(0);
1952: }

1956: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1957: {
1958:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1959:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1960:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1961:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1963:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1964:   PetscInt       n,i,jrow,j;

1967:   if (yy != zz) {VecCopy(yy,zz);}
1968:   VecGetArray(xx,&x);
1969:   VecGetArray(zz,&y);
1970:   idx  = a->j;
1971:   v    = a->a;
1972:   ii   = a->i;

1974:   for (i=0; i<m; i++) {
1975:     jrow = ii[i];
1976:     n    = ii[i+1] - jrow;
1977:     sum1  = 0.0;
1978:     sum2  = 0.0;
1979:     sum3  = 0.0;
1980:     sum4  = 0.0;
1981:     sum5  = 0.0;
1982:     sum6  = 0.0;
1983:     sum7  = 0.0;
1984:     sum8  = 0.0;
1985:     sum9  = 0.0;
1986:     sum10 = 0.0;
1987:     sum11 = 0.0;
1988:     sum12 = 0.0;
1989:     sum13 = 0.0;
1990:     sum14 = 0.0;
1991:     sum15 = 0.0;
1992:     sum16 = 0.0;
1993:     for (j=0; j<n; j++) {
1994:       sum1  += v[jrow]*x[16*idx[jrow]];
1995:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1996:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1997:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1998:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1999:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2000:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2001:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2002:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2003:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2004:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2005:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2006:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2007:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2008:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2009:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2010:       jrow++;
2011:      }
2012:     y[16*i]    += sum1;
2013:     y[16*i+1]  += sum2;
2014:     y[16*i+2]  += sum3;
2015:     y[16*i+3]  += sum4;
2016:     y[16*i+4]  += sum5;
2017:     y[16*i+5]  += sum6;
2018:     y[16*i+6]  += sum7;
2019:     y[16*i+7]  += sum8;
2020:     y[16*i+8]  += sum9;
2021:     y[16*i+9]  += sum10;
2022:     y[16*i+10] += sum11;
2023:     y[16*i+11] += sum12;
2024:     y[16*i+12] += sum13;
2025:     y[16*i+13] += sum14;
2026:     y[16*i+14] += sum15;
2027:     y[16*i+15] += sum16;
2028:   }

2030:   PetscLogFlops(32*a->nz);
2031:   VecRestoreArray(xx,&x);
2032:   VecRestoreArray(zz,&y);
2033:   return(0);
2034: }

2038: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2039: {
2040:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2041:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2042:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2043:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2045:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

2048:   if (yy != zz) {VecCopy(yy,zz);}
2049:   VecGetArray(xx,&x);
2050:   VecGetArray(zz,&y);
2051:   for (i=0; i<m; i++) {
2052:     idx    = a->j + a->i[i] ;
2053:     v      = a->a + a->i[i] ;
2054:     n      = a->i[i+1] - a->i[i];
2055:     alpha1 = x[16*i];
2056:     alpha2 = x[16*i+1];
2057:     alpha3 = x[16*i+2];
2058:     alpha4 = x[16*i+3];
2059:     alpha5 = x[16*i+4];
2060:     alpha6 = x[16*i+5];
2061:     alpha7 = x[16*i+6];
2062:     alpha8 = x[16*i+7];
2063:     alpha9  = x[16*i+8];
2064:     alpha10 = x[16*i+9];
2065:     alpha11 = x[16*i+10];
2066:     alpha12 = x[16*i+11];
2067:     alpha13 = x[16*i+12];
2068:     alpha14 = x[16*i+13];
2069:     alpha15 = x[16*i+14];
2070:     alpha16 = x[16*i+15];
2071:     while (n-->0) {
2072:       y[16*(*idx)]   += alpha1*(*v);
2073:       y[16*(*idx)+1] += alpha2*(*v);
2074:       y[16*(*idx)+2] += alpha3*(*v);
2075:       y[16*(*idx)+3] += alpha4*(*v);
2076:       y[16*(*idx)+4] += alpha5*(*v);
2077:       y[16*(*idx)+5] += alpha6*(*v);
2078:       y[16*(*idx)+6] += alpha7*(*v);
2079:       y[16*(*idx)+7] += alpha8*(*v);
2080:       y[16*(*idx)+8]  += alpha9*(*v);
2081:       y[16*(*idx)+9]  += alpha10*(*v);
2082:       y[16*(*idx)+10] += alpha11*(*v);
2083:       y[16*(*idx)+11] += alpha12*(*v);
2084:       y[16*(*idx)+12] += alpha13*(*v);
2085:       y[16*(*idx)+13] += alpha14*(*v);
2086:       y[16*(*idx)+14] += alpha15*(*v);
2087:       y[16*(*idx)+15] += alpha16*(*v);
2088:       idx++; v++;
2089:     }
2090:   }
2091:   PetscLogFlops(32*a->nz);
2092:   VecRestoreArray(xx,&x);
2093:   VecRestoreArray(zz,&y);
2094:   return(0);
2095: }

2097: /*===================================================================================*/
2100: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2101: {
2102:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2106:   /* start the scatter */
2107:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2108:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2109:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2110:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2111:   return(0);
2112: }

2116: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2117: {
2118:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2122:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2123:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2124:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2125:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2126:   return(0);
2127: }

2131: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2132: {
2133:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2137:   /* start the scatter */
2138:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2139:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2140:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2141:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2142:   return(0);
2143: }

2147: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2148: {
2149:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2153:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2154:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2155:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2156:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2157:   return(0);
2158: }

2162: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2163: {
2164:   /* This routine requires testing -- but it's getting better. */
2165:   PetscErrorCode     ierr;
2166:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2167:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2168:   Mat                P=pp->AIJ;
2169:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2170:   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2171:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2172:   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2173:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2174:   MatScalar          *ca;

2177:   /* Start timer */
2178:   PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);

2180:   /* Get ij structure of P^T */
2181:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2183:   cn = pn*ppdof;
2184:   /* Allocate ci array, arrays for fill computation and */
2185:   /* free space for accumulating nonzero column info */
2186:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2187:   ci[0] = 0;

2189:   /* Work arrays for rows of P^T*A */
2190:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2191:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2192:   ptasparserow = ptadenserow  + an;
2193:   denserow     = ptasparserow + an;
2194:   sparserow    = denserow     + cn;

2196:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2197:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2198:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2199:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2200:   current_space = free_space;

2202:   /* Determine symbolic info for each row of C: */
2203:   for (i=0;i<pn;i++) {
2204:     ptnzi  = pti[i+1] - pti[i];
2205:     ptJ    = ptj + pti[i];
2206:     for (dof=0;dof<ppdof;dof++) {
2207:       ptanzi = 0;
2208:       /* Determine symbolic row of PtA: */
2209:       for (j=0;j<ptnzi;j++) {
2210:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2211:         arow = ptJ[j]*ppdof + dof;
2212:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2213:         anzj = ai[arow+1] - ai[arow];
2214:         ajj  = aj + ai[arow];
2215:         for (k=0;k<anzj;k++) {
2216:           if (!ptadenserow[ajj[k]]) {
2217:             ptadenserow[ajj[k]]    = -1;
2218:             ptasparserow[ptanzi++] = ajj[k];
2219:           }
2220:         }
2221:       }
2222:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2223:       ptaj = ptasparserow;
2224:       cnzi   = 0;
2225:       for (j=0;j<ptanzi;j++) {
2226:         /* Get offset within block of P */
2227:         pshift = *ptaj%ppdof;
2228:         /* Get block row of P */
2229:         prow = (*ptaj++)/ppdof; /* integer division */
2230:         /* P has same number of nonzeros per row as the compressed form */
2231:         pnzj = pi[prow+1] - pi[prow];
2232:         pjj  = pj + pi[prow];
2233:         for (k=0;k<pnzj;k++) {
2234:           /* Locations in C are shifted by the offset within the block */
2235:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2236:           if (!denserow[pjj[k]*ppdof+pshift]) {
2237:             denserow[pjj[k]*ppdof+pshift] = -1;
2238:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2239:           }
2240:         }
2241:       }

2243:       /* sort sparserow */
2244:       PetscSortInt(cnzi,sparserow);
2245: 
2246:       /* If free space is not available, make more free space */
2247:       /* Double the amount of total space in the list */
2248:       if (current_space->local_remaining<cnzi) {
2249:         PetscFreeSpaceGet(current_space->total_array_size,&current_space);
2250:       }

2252:       /* Copy data into free space, and zero out denserows */
2253:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2254:       current_space->array           += cnzi;
2255:       current_space->local_used      += cnzi;
2256:       current_space->local_remaining -= cnzi;

2258:       for (j=0;j<ptanzi;j++) {
2259:         ptadenserow[ptasparserow[j]] = 0;
2260:       }
2261:       for (j=0;j<cnzi;j++) {
2262:         denserow[sparserow[j]] = 0;
2263:       }
2264:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2265:       /*        For now, we will recompute what is needed. */
2266:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2267:     }
2268:   }
2269:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2270:   /* Allocate space for cj, initialize cj, and */
2271:   /* destroy list of free space and other temporary array(s) */
2272:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2273:   PetscFreeSpaceContiguous(&free_space,cj);
2274:   PetscFree(ptadenserow);
2275: 
2276:   /* Allocate space for ca */
2277:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2278:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2279: 
2280:   /* put together the new matrix */
2281:   MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);

2283:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2284:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2285:   c = (Mat_SeqAIJ *)((*C)->data);
2286:   c->freedata = PETSC_TRUE;
2287:   c->nonew    = 0;

2289:   /* Clean up. */
2290:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2292:   PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2293:   return(0);
2294: }

2298: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2299: {
2300:   /* This routine requires testing -- first draft only */
2302:   PetscInt       flops=0;
2303:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2304:   Mat            P=pp->AIJ;
2305:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2306:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2307:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2308:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2309:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2310:   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2311:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2312:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2315:   /* Allocate temporary array for storage of one row of A*P */
2316:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2317:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2319:   apj      = (PetscInt *)(apa + cn);
2320:   apjdense = apj + cn;

2322:   /* Clear old values in C */
2323:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2325:   for (i=0;i<am;i++) {
2326:     /* Form sparse row of A*P */
2327:     anzi  = ai[i+1] - ai[i];
2328:     apnzj = 0;
2329:     for (j=0;j<anzi;j++) {
2330:       /* Get offset within block of P */
2331:       pshift = *aj%ppdof;
2332:       /* Get block row of P */
2333:       prow   = *aj++/ppdof; /* integer division */
2334:       pnzj = pi[prow+1] - pi[prow];
2335:       pjj  = pj + pi[prow];
2336:       paj  = pa + pi[prow];
2337:       for (k=0;k<pnzj;k++) {
2338:         poffset = pjj[k]*ppdof+pshift;
2339:         if (!apjdense[poffset]) {
2340:           apjdense[poffset] = -1;
2341:           apj[apnzj++]      = poffset;
2342:         }
2343:         apa[poffset] += (*aa)*paj[k];
2344:       }
2345:       flops += 2*pnzj;
2346:       aa++;
2347:     }

2349:     /* Sort the j index array for quick sparse axpy. */
2350:     /* Note: a array does not need sorting as it is in dense storage locations. */
2351:     PetscSortInt(apnzj,apj);

2353:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2354:     prow    = i/ppdof; /* integer division */
2355:     pshift  = i%ppdof;
2356:     poffset = pi[prow];
2357:     pnzi = pi[prow+1] - poffset;
2358:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2359:     pJ   = pj+poffset;
2360:     pA   = pa+poffset;
2361:     for (j=0;j<pnzi;j++) {
2362:       crow   = (*pJ)*ppdof+pshift;
2363:       cjj    = cj + ci[crow];
2364:       caj    = ca + ci[crow];
2365:       pJ++;
2366:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2367:       for (k=0,nextap=0;nextap<apnzj;k++) {
2368:         if (cjj[k]==apj[nextap]) {
2369:           caj[k] += (*pA)*apa[apj[nextap++]];
2370:         }
2371:       }
2372:       flops += 2*apnzj;
2373:       pA++;
2374:     }

2376:     /* Zero the current row info for A*P */
2377:     for (j=0;j<apnzj;j++) {
2378:       apa[apj[j]]      = 0.;
2379:       apjdense[apj[j]] = 0;
2380:     }
2381:   }

2383:   /* Assemble the final matrix and clean up */
2384:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2385:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2386:   PetscFree(apa);
2387:   PetscLogFlops(flops);

2389:   return(0);
2390: }

2395: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2396: {
2397:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2398:   Mat               a = b->AIJ,B;
2399:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2400:   PetscErrorCode    ierr;
2401:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2402:   PetscInt          *cols;
2403:   PetscScalar       *vals;

2406:   MatGetSize(a,&m,&n);
2407:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2408:   for (i=0; i<m; i++) {
2409:     nmax = PetscMax(nmax,aij->ilen[i]);
2410:     for (j=0; j<dof; j++) {
2411:       ilen[dof*i+j] = aij->ilen[i];
2412:     }
2413:   }
2414:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2415:   MatSetOption(B,MAT_COLUMNS_SORTED);
2416:   PetscFree(ilen);
2417:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2418:   ii   = 0;
2419:   for (i=0; i<m; i++) {
2420:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2421:     for (j=0; j<dof; j++) {
2422:       for (k=0; k<ncols; k++) {
2423:         icols[k] = dof*cols[k]+j;
2424:       }
2425:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2426:       ii++;
2427:     }
2428:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2429:   }
2430:   PetscFree(icols);
2431:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2432:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2434:   if (reuse == MAT_REUSE_MATRIX) {
2435:     MatHeaderReplace(A,B);
2436:   } else {
2437:     *newmat = B;
2438:   }
2439:   return(0);
2440: }

2443:  #include src/mat/impls/aij/mpi/mpiaij.h

2448: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2449: {
2450:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2451:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2452:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2453:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2454:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2455:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2456:   PetscInt          dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
2457:   PetscInt          rstart,cstart,*garray,ii,k;
2458:   PetscErrorCode    ierr;
2459:   PetscScalar       *vals,*ovals;

2462:   PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2463:   for (i=0; i<A->rmap.n/dof; i++) {
2464:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2465:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2466:     for (j=0; j<dof; j++) {
2467:       dnz[dof*i+j] = AIJ->ilen[i];
2468:       onz[dof*i+j] = OAIJ->ilen[i];
2469:     }
2470:   }
2471:   MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2472:   MatSetOption(B,MAT_COLUMNS_SORTED);
2473:   PetscFree2(dnz,onz);

2475:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2476:   rstart = dof*maij->A->rmap.rstart;
2477:   cstart = dof*maij->A->cmap.rstart;
2478:   garray = mpiaij->garray;

2480:   ii = rstart;
2481:   for (i=0; i<A->rmap.n/dof; i++) {
2482:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2483:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2484:     for (j=0; j<dof; j++) {
2485:       for (k=0; k<ncols; k++) {
2486:         icols[k] = cstart + dof*cols[k]+j;
2487:       }
2488:       for (k=0; k<oncols; k++) {
2489:         oicols[k] = dof*garray[ocols[k]]+j;
2490:       }
2491:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2492:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2493:       ii++;
2494:     }
2495:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2496:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2497:   }
2498:   PetscFree2(icols,oicols);

2500:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2501:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2503:   if (reuse == MAT_REUSE_MATRIX) {
2504:     MatHeaderReplace(A,B);
2505:   } else {
2506:     *newmat = B;
2507:   }
2508:   return(0);
2509: }


2513: /* ---------------------------------------------------------------------------------- */
2514: /*MC
2515:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
2516:   operations for multicomponent problems.  It interpolates each component the same
2517:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2518:   and MATMPIAIJ for distributed matrices.

2520:   Operations provided:
2521: + MatMult
2522: . MatMultTranspose
2523: . MatMultAdd
2524: . MatMultTransposeAdd
2525: - MatView

2527:   Level: advanced

2529: M*/
2532: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2533: {
2535:   PetscMPIInt    size;
2536:   PetscInt       n;
2537:   Mat_MPIMAIJ    *b;
2538:   Mat            B;

2541:   PetscObjectReference((PetscObject)A);

2543:   if (dof == 1) {
2544:     *maij = A;
2545:   } else {
2546:     MatCreate(A->comm,&B);
2547:     MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2548:     B->assembled    = PETSC_TRUE;

2550:     MPI_Comm_size(A->comm,&size);
2551:     if (size == 1) {
2552:       MatSetType(B,MATSEQMAIJ);
2553:       B->ops->destroy = MatDestroy_SeqMAIJ;
2554:       B->ops->view    = MatView_SeqMAIJ;
2555:       b      = (Mat_MPIMAIJ*)B->data;
2556:       b->dof = dof;
2557:       b->AIJ = A;
2558:       if (dof == 2) {
2559:         B->ops->mult             = MatMult_SeqMAIJ_2;
2560:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2561:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2562:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2563:       } else if (dof == 3) {
2564:         B->ops->mult             = MatMult_SeqMAIJ_3;
2565:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2566:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2567:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2568:       } else if (dof == 4) {
2569:         B->ops->mult             = MatMult_SeqMAIJ_4;
2570:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2571:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2572:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2573:       } else if (dof == 5) {
2574:         B->ops->mult             = MatMult_SeqMAIJ_5;
2575:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2576:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2577:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2578:       } else if (dof == 6) {
2579:         B->ops->mult             = MatMult_SeqMAIJ_6;
2580:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2581:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2582:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2583:       } else if (dof == 7) {
2584:         B->ops->mult             = MatMult_SeqMAIJ_7;
2585:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2586:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2587:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2588:       } else if (dof == 8) {
2589:         B->ops->mult             = MatMult_SeqMAIJ_8;
2590:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2591:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2592:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2593:       } else if (dof == 9) {
2594:         B->ops->mult             = MatMult_SeqMAIJ_9;
2595:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2596:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2597:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2598:       } else if (dof == 10) {
2599:         B->ops->mult             = MatMult_SeqMAIJ_10;
2600:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2601:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2602:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2603:       } else if (dof == 16) {
2604:         B->ops->mult             = MatMult_SeqMAIJ_16;
2605:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2606:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2607:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2608:       } else {
2609:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2610:       }
2611:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2612:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2613:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2614:     } else {
2615:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2616:       IS         from,to;
2617:       Vec        gvec;
2618:       PetscInt   *garray,i;

2620:       MatSetType(B,MATMPIMAIJ);
2621:       B->ops->destroy = MatDestroy_MPIMAIJ;
2622:       B->ops->view    = MatView_MPIMAIJ;
2623:       b      = (Mat_MPIMAIJ*)B->data;
2624:       b->dof = dof;
2625:       b->A   = A;
2626:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2627:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

2629:       VecGetSize(mpiaij->lvec,&n);
2630:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2631:       VecSetBlockSize(b->w,dof);

2633:       /* create two temporary Index sets for build scatter gather */
2634:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2635:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2636:       ISCreateBlock(A->comm,dof,n,garray,&from);
2637:       PetscFree(garray);
2638:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

2640:       /* create temporary global vector to generate scatter context */
2641:       VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2642:       VecSetBlockSize(gvec,dof);

2644:       /* generate the scatter context */
2645:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

2647:       ISDestroy(from);
2648:       ISDestroy(to);
2649:       VecDestroy(gvec);

2651:       B->ops->mult             = MatMult_MPIMAIJ_dof;
2652:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2653:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2654:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2655:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2656:     }
2657:     *maij = B;
2658:     MatView_Private(B);
2659:   }
2660:   return(0);
2661: }