Actual source code: bdfact.c
1: #define PETSCMAT_DLL
3: /* Block diagonal matrix format - factorization and triangular solves */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/inline/ilu.h
10: PetscErrorCode MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
11: {
12: PetscTruth idn;
16: if (A->rmap.N != A->cmap.n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
17: if (isrow) {
18: ISIdentity(isrow,&idn);
19: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
20: }
21: if (iscol) {
22: ISIdentity(iscol,&idn);
23: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
24: }
25: if (info->levels != 0) {
26: SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
27: }
28: MatConvert(A,MATSAME,MAT_INITIAL_MATRIX,B);
30: /* Must set to zero for repeated calls with different nonzero structure */
31: (*B)->factor = 0;
32: return(0);
33: }
37: PetscErrorCode MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info)
38: {
39: PetscTruth idn;
43: /* For now, no fill is allocated in symbolic factorization phase, so we
44: directly use the input matrix for numeric factorization. */
45: if (A->rmap.N != A->cmap.n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
46: if (isrow) {
47: ISIdentity(isrow,&idn);
48: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
49: }
50: if (iscol) {
51: ISIdentity(iscol,&idn);
52: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
53: }
54: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
55: MatLUFactorNumeric(A,info,&A);
56: return(0);
57: }
59: /* --------------------------------------------------------------------------*/
62: PetscErrorCode MatLUFactorNumeric_SeqBDiag_N(Mat A,MatFactorInfo *info,Mat *B)
63: {
64: Mat C = *B;
65: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
66: PetscInt k,d,d2,dgk,elim_row,elim_col,bs = A->rmap.bs,knb,knb2,bs2 = bs*bs;
68: PetscInt dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock;
69: PetscInt *diag = a->diag, m = A->rmap.N,mainbd = a->mainbd,*dgptr,len,i;
70: PetscScalar **dv = a->diagv,*dd = dv[mainbd],*v_work;
71: PetscScalar *multiplier;
74: /* Copy input matrix to factored matrix if we've already factored the
75: matrix before AND the nonzero structure remains the same. This is done
76: in symbolic factorization the first time through, but there's no symbolic
77: factorization for successive calls with same matrix sparsity structure. */
78: if (C->factor == FACTOR_LU) {
79: for (i=0; i<a->nd; i++) {
80: len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
81: d = diag[i];
82: if (d > 0) {
83: PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
84: } else {
85: PetscMemcpy(dv[i],a1->diagv[i],len);
86: }
87: }
88: }
90: if (!a->pivot) {
91: PetscMalloc((m+1)*sizeof(PetscInt),&a->pivot);
92: PetscLogObjectMemory(C,m*sizeof(PetscInt));
93: }
94: PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
95: multiplier = v_work + bs;
96: PetscMalloc((mblock+nblock+1)*sizeof(PetscInt),&dgptr);
97: PetscMemzero(dgptr,(mblock+nblock)*sizeof(PetscInt));
98: for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
99: for (k=0; k<mblock; k++) { /* k = block pivot_row */
100: knb = k*bs; knb2 = knb*bs;
101: /* invert the diagonal block */
102: Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
103: for (d=mainbd-1; d>=0; d--) {
104: elim_row = k + diag[d];
105: if (elim_row < mblock) { /* sweep down */
106: /* dv[d][knb2]: test if entire block is zero? */
107: Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
108: for (d2=d+1; d2<nd; d2++) {
109: elim_col = elim_row - diag[d2];
110: if (elim_col >=0 && elim_col < nblock) {
111: dgk = k - elim_col;
112: if ((dnum = dgptr[dgk+mblock])) {
113: Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
114: &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
115: }
116: }
117: }
118: }
119: }
120: }
121: PetscFree(dgptr);
122: PetscFree(v_work);
123: if (!a->solvework) {
124: PetscMalloc(bs*sizeof(PetscScalar),&a->solvework);
125: }
126: C->factor = FACTOR_LU;
127: return(0);
128: }
132: PetscErrorCode MatLUFactorNumeric_SeqBDiag_1(Mat A,MatFactorInfo *info,Mat *B)
133: {
134: Mat C = *B;
135: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
137: PetscInt k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len;
138: PetscInt *diag = a->diag,n = A->cmap.n,m = A->rmap.N,mainbd = a->mainbd,*dgptr;
139: PetscScalar **dv = a->diagv,*dd = dv[mainbd],mult;
142: /* Copy input matrix to factored matrix if we've already factored the
143: matrix before AND the nonzero structure remains the same. This is done
144: in symbolic factorization the first time through, but there's no symbolic
145: factorization for successive calls with same matrix sparsity structure. */
146: if (C->factor == FACTOR_LU) {
147: for (i=0; i<nd; i++) {
148: len = a->bdlen[i] * sizeof(PetscScalar);
149: d = diag[i];
150: if (d > 0) {
151: PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
152: } else {
153: PetscMemcpy(dv[i],a1->diagv[i],len);
154: }
155: }
156: }
158: PetscMalloc((m+n+1)*sizeof(PetscInt),&dgptr);
159: PetscMemzero(dgptr,(m+n)*sizeof(PetscInt));
160: for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
161: for (k=0; k<m; k++) { /* k = pivot_row */
162: dd[k] = 1.0/dd[k];
163: for (d=mainbd-1; d>=0; d--) {
164: elim_row = k + diag[d];
165: if (elim_row < m) { /* sweep down */
166: if (dv[d][elim_row] != 0.0) {
167: dv[d][elim_row] *= dd[k];
168: mult = dv[d][elim_row];
169: for (d2=d+1; d2<nd; d2++) {
170: elim_col = elim_row - diag[d2];
171: dgk = k - elim_col;
172: if (elim_col >=0 && elim_col < n) {
173: if ((dnum = dgptr[dgk+m])) {
174: dv[d2][elim_row] -= mult * dv[dnum-1][k];
175: }
176: }
177: }
178: }
179: }
180: }
181: }
182: PetscFree(dgptr);
183: C->factor = FACTOR_LU;
184: return(0);
185: }
187: /* -----------------------------------------------------------------*/
191: PetscErrorCode MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
192: {
193: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
195: PetscInt i,d,loc,mainbd = a->mainbd;
196: PetscInt n = A->cmap.n,m = A->rmap.N,*diag = a->diag,col;
197: PetscScalar *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;
200: VecGetArray(xx,&x);
201: VecGetArray(yy,&y);
202: /* forward solve the lower triangular part */
203: for (i=0; i<m; i++) {
204: sum = x[i];
205: for (d=0; d<mainbd; d++) {
206: loc = i - diag[d];
207: if (loc >= 0) sum -= dv[d][i] * y[loc];
208: }
209: y[i] = sum;
210: }
211: /* backward solve the upper triangular part */
212: for (i=m-1; i>=0; i--) {
213: sum = y[i];
214: for (d=mainbd+1; d<a->nd; d++) {
215: col = i - diag[d];
216: if (col < n) sum -= dv[d][i] * y[col];
217: }
218: y[i] = sum*dd[i];
219: }
220: VecRestoreArray(xx,&x);
221: VecRestoreArray(yy,&y);
222: PetscLogFlops(2*a->nz - A->cmap.n);
223: return(0);
224: }
228: PetscErrorCode MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
229: {
230: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
231: PetscInt i,d,loc,mainbd = a->mainbd;
232: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
234: PetscInt m = A->rmap.N,*diag = a->diag,col;
235: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
236: PetscScalar w0,w1,sum0,sum1;
239: VecGetArray(xx,&x);
240: VecGetArray(yy,&y);
241: PetscMemcpy(y,x,m*sizeof(PetscScalar));
243: /* forward solve the lower triangular part */
244: if (mainbd != 0) {
245: inb = 0;
246: for (i=0; i<mblock; i++) {
247: sum0 = sum1 = 0.0;
248: for (d=0; d<mainbd; d++) {
249: loc = 2*(i - diag[d]);
250: if (loc >= 0) {
251: dvt = &dv[d][4*i];
252: w0 = y[loc]; w1 = y[loc+1];
253: sum0 += dvt[0]*w0 + dvt[2]*w1;
254: sum1 += dvt[1]*w0 + dvt[3]*w1;
255: }
256: }
257: y[inb] -= sum0; y[inb+1] -= sum1;
259: inb += 2;
260: }
261: }
262: /* backward solve the upper triangular part */
263: inb = 2*(mblock-1); inb2 = 2*inb;
264: for (i=mblock-1; i>=0; i--) {
265: sum0 = y[inb]; sum1 = y[inb+1];
266: for (d=mainbd+1; d<a->nd; d++) {
267: col = 2*(i - diag[d]);
268: if (col < 2*nblock) {
269: dvt = &dv[d][4*i];
270: w0 = y[col]; w1 = y[col+1];
271: sum0 -= dvt[0]*w0 + dvt[2]*w1;
272: sum1 -= dvt[1]*w0 + dvt[3]*w1;
273: }
274: }
275: dvt = dd+inb2;
276: y[inb] = dvt[0]*sum0 + dvt[2]*sum1;
277: y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
278: inb -= 2; inb2 -= 4;
279: }
280: VecRestoreArray(xx,&x);
281: VecRestoreArray(yy,&y);
282: PetscLogFlops(2*a->nz - A->cmap.n);
283: return(0);
284: }
288: PetscErrorCode MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
289: {
290: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
291: PetscInt i,d,loc,mainbd = a->mainbd;
292: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
294: PetscInt m = A->rmap.N,*diag = a->diag,col;
295: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
296: PetscScalar w0,w1,w2,sum0,sum1,sum2;
299: VecGetArray(xx,&x);
300: VecGetArray(yy,&y);
301: PetscMemcpy(y,x,m*sizeof(PetscScalar));
303: /* forward solve the lower triangular part */
304: if (mainbd != 0) {
305: inb = 0;
306: for (i=0; i<mblock; i++) {
307: sum0 = sum1 = sum2 = 0.0;
308: for (d=0; d<mainbd; d++) {
309: loc = 3*(i - diag[d]);
310: if (loc >= 0) {
311: dvt = &dv[d][9*i];
312: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
313: sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
314: sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
315: sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
316: }
317: }
318: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
319: inb += 3;
320: }
321: }
322: /* backward solve the upper triangular part */
323: inb = 3*(mblock-1); inb2 = 3*inb;
324: for (i=mblock-1; i>=0; i--) {
325: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2];
326: for (d=mainbd+1; d<a->nd; d++) {
327: col = 3*(i - diag[d]);
328: if (col < 3*nblock) {
329: dvt = &dv[d][9*i];
330: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
331: sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
332: sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
333: sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
334: }
335: }
336: dvt = dd+inb2;
337: y[inb] = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
338: y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
339: y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
340: inb -= 3; inb2 -= 9;
341: }
342: VecRestoreArray(xx,&x);
343: VecRestoreArray(yy,&y);
344: PetscLogFlops(2*a->nz - A->cmap.n);
345: return(0);
346: }
350: PetscErrorCode MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
351: {
352: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
353: PetscInt i,d,loc,mainbd = a->mainbd;
354: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
356: PetscInt m = A->rmap.N,*diag = a->diag,col;
357: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
358: PetscScalar w0,w1,w2,w3,sum0,sum1,sum2,sum3;
361: VecGetArray(xx,&x);
362: VecGetArray(yy,&y);
363: PetscMemcpy(y,x,m*sizeof(PetscScalar));
365: /* forward solve the lower triangular part */
366: if (mainbd != 0) {
367: inb = 0;
368: for (i=0; i<mblock; i++) {
369: sum0 = sum1 = sum2 = sum3 = 0.0;
370: for (d=0; d<mainbd; d++) {
371: loc = 4*(i - diag[d]);
372: if (loc >= 0) {
373: dvt = &dv[d][16*i];
374: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
375: sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
376: sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
377: sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
378: sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
379: }
380: }
381: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
382: inb += 4;
383: }
384: }
385: /* backward solve the upper triangular part */
386: inb = 4*(mblock-1); inb2 = 4*inb;
387: for (i=mblock-1; i>=0; i--) {
388: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2]; sum3 = y[inb+3];
389: for (d=mainbd+1; d<a->nd; d++) {
390: col = 4*(i - diag[d]);
391: if (col < 4*nblock) {
392: dvt = &dv[d][16*i];
393: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
394: sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
395: sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
396: sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
397: sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
398: }
399: }
400: dvt = dd+inb2;
401: y[inb] = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
402: y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
403: y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
404: y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
405: inb -= 4; inb2 -= 16;
406: }
407: VecRestoreArray(xx,&x);
408: VecRestoreArray(yy,&y);
409: PetscLogFlops(2*a->nz - A->cmap.n);
410: return(0);
411: }
415: PetscErrorCode MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
416: {
417: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
418: PetscInt i,d,loc,mainbd = a->mainbd;
419: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
421: PetscInt m = A->rmap.N,*diag = a->diag,col;
422: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
423: PetscScalar w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;
426: VecGetArray(xx,&x);
427: VecGetArray(yy,&y);
428: PetscMemcpy(y,x,m*sizeof(PetscScalar));
430: /* forward solve the lower triangular part */
431: if (mainbd != 0) {
432: inb = 0;
433: for (i=0; i<mblock; i++) {
434: sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
435: for (d=0; d<mainbd; d++) {
436: loc = 5*(i - diag[d]);
437: if (loc >= 0) {
438: dvt = &dv[d][25*i];
439: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
440: sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
441: sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
442: sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
443: sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
444: sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
445: }
446: }
447: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
448: y[inb+4] -= sum4;
449: inb += 5;
450: }
451: }
452: /* backward solve the upper triangular part */
453: inb = 5*(mblock-1); inb2 = 5*inb;
454: for (i=mblock-1; i>=0; i--) {
455: sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
456: for (d=mainbd+1; d<a->nd; d++) {
457: col = 5*(i - diag[d]);
458: if (col < 5*nblock) {
459: dvt = &dv[d][25*i];
460: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
461: sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
462: sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
463: sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
464: sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
465: sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
466: }
467: }
468: dvt = dd+inb2;
469: y[inb] = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
470: + dvt[20]*sum4;
471: y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
472: + dvt[21]*sum4;
473: y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
474: + dvt[22]*sum4;
475: y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
476: + dvt[23]*sum4;
477: y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
478: + dvt[24]*sum4;
479: inb -= 5; inb2 -= 25;
480: }
481: VecRestoreArray(xx,&x);
482: VecRestoreArray(yy,&y);
483: PetscLogFlops(2*a->nz - A->cmap.n);
484: return(0);
485: }
489: PetscErrorCode MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
490: {
491: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
492: PetscInt i,d,loc,mainbd = a->mainbd;
493: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
495: PetscInt bs = A->rmap.bs,m = A->rmap.N,*diag = a->diag,col,bs2 = bs*bs;
496: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
497: PetscScalar *work = a->solvework;
500: VecGetArray(xx,&x);
501: VecGetArray(yy,&y);
502: PetscMemcpy(y,x,m*sizeof(PetscScalar));
504: /* forward solve the lower triangular part */
505: if (mainbd != 0) {
506: inb = 0;
507: for (i=0; i<mblock; i++) {
508: for (d=0; d<mainbd; d++) {
509: loc = i - diag[d];
510: if (loc >= 0) {
511: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
512: }
513: }
514: inb += bs;
515: }
516: }
517: /* backward solve the upper triangular part */
518: inb = bs*(mblock-1); inb2 = inb*bs;
519: for (i=mblock-1; i>=0; i--) {
520: for (d=mainbd+1; d<a->nd; d++) {
521: col = i - diag[d];
522: if (col < nblock) {
523: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
524: }
525: }
526: Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
527: PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
528: inb -= bs; inb2 -= bs2;
529: }
530: VecRestoreArray(xx,&x);
531: VecRestoreArray(yy,&y);
532: PetscLogFlops(2*a->nz - A->cmap.n);
533: return(0);
534: }