Actual source code: cholbs.c

  1: #define PETSCMAT_DLL

 3:  #include petsc.h

  5: /* We must define MLOG for BlockSolve logging */
  6: #if defined(PETSC_USE_LOG)
  7: #define MLOG
  8: #endif

 10:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

 14: PetscErrorCode MatCholeskyFactorNumeric_MPIRowbs(Mat mat,MatFactorInfo *info,Mat *factp)
 15: {
 16:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
 18: #if defined(PETSC_USE_LOG)
 19:   PetscReal flop1 = BSlocal_flops();
 20: #endif


 24:   if (!mbs->blocksolveassembly) {
 25:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 26:   }

 28:   /* Do prep work if same nonzero structure as previously factored matrix */
 29:   if (mbs->factor == FACTOR_CHOLESKY) {
 30:     /* Copy the nonzeros */
 31:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 32:   }
 33:   /* Form incomplete Cholesky factor */
 34:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 35:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 36:     CHKERRBS(0); mbs->failures++;
 37:     /* Copy only the nonzeros */
 38:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 39:     /* Increment the diagonal shift */
 40:     mbs->alpha += 0.1;
 41:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 42:     PetscInfo3(mat,"BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",mbs->failures,mbs->ierr,mbs->alpha);
 43:   }
 44: #if defined(PETSC_USE_LOG)
 45:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 46: #endif

 48:   mbs->factor = FACTOR_CHOLESKY;
 49:   return(0);
 50: }

 54: PetscErrorCode MatLUFactorNumeric_MPIRowbs(Mat mat,MatFactorInfo *info,Mat *factp)
 55: {
 56:   Mat_MPIRowbs   *mbs = (Mat_MPIRowbs*)mat->data;

 58: #if defined(PETSC_USE_LOG)
 60:   PetscReal      flop1 = BSlocal_flops();
 61: #endif

 64:   if (!mbs->blocksolveassembly) {
 65:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 66:   }

 68:   /* Do prep work if same nonzero structure as previously factored matrix */
 69:   if (mbs->factor == FACTOR_LU) {
 70:     /* Copy the nonzeros */
 71:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 72:   }
 73:   /* Form incomplete Cholesky factor */
 74:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 75:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 76:     CHKERRBS(0); mbs->failures++;
 77:     /* Copy only the nonzeros */
 78:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 79:     /* Increment the diagonal shift */
 80:     mbs->alpha += 0.1;
 81:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 82:     PetscInfo3(mat,"BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",mbs->failures,mbs->ierr,mbs->alpha);
 83:   }
 84:   mbs->factor = FACTOR_LU;
 85:   (*factp)->assembled = PETSC_TRUE;
 86: #if defined(PETSC_USE_LOG)
 87:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 88: #endif
 89:   return(0);
 90: }
 91: /* ------------------------------------------------------------------- */
 94: PetscErrorCode MatSolve_MPIRowbs(Mat mat,Vec x,Vec y)
 95: {
 96:   Mat          submat = (Mat) mat->data;
 97:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
 99:   PetscScalar  *ya,*xa,*xworka;

101: #if defined(PETSC_USE_LOG)
102:   PetscReal flop1 = BSlocal_flops();
103: #endif

106:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
107:   if (!mbs->vecs_permscale) {
108:     VecGetArray(x,&xa);
109:     VecGetArray(mbs->xwork,&xworka);
110:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
111:     VecRestoreArray(x,&xa);
112:     VecRestoreArray(mbs->xwork,&xworka);
113:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
114:   } else {
115:     VecCopy(x,y);
116:   }

118:   VecGetArray(y,&ya);
119:   if (mbs->procinfo->single) {
120:     /* Use BlockSolve routine for no cliques/inodes */
121:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
122:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
123:   } else {
124:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
125:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
126:   }
127:   VecRestoreArray(y,&ya);

129:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
130:   if (!mbs->vecs_permscale) {
131:     VecPointwiseMult(mbs->xwork,y,mbs->diag);
132:     VecGetArray(y,&ya);
133:     VecGetArray(mbs->xwork,&xworka);
134:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
135:     VecRestoreArray(y,&ya);
136:     VecRestoreArray(mbs->xwork,&xworka);
137:   }
138: #if defined(PETSC_USE_LOG)
139:   PetscLogFlops((int)(BSlocal_flops()-flop1));
140: #endif
141:   return(0);
142: }

144: /* ------------------------------------------------------------------- */
147: PetscErrorCode MatForwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
148: {
149:   Mat          submat = (Mat) mat->data;
150:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
152:   PetscScalar  *ya,*xa,*xworka;

154: #if defined(PETSC_USE_LOG)
155:   PetscReal flop1 = BSlocal_flops();
156: #endif

159:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
160:   if (!mbs->vecs_permscale) {
161:     VecGetArray(x,&xa);
162:     VecGetArray(mbs->xwork,&xworka);
163:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
164:     VecRestoreArray(x,&xa);
165:     VecRestoreArray(mbs->xwork,&xworka);
166:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
167:   } else {
168:     VecCopy(x,y);
169:   }

171:   VecGetArray(y,&ya);
172:   if (mbs->procinfo->single){
173:     /* Use BlockSolve routine for no cliques/inodes */
174:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
175:   } else {
176:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
177:   }
178:   VecRestoreArray(y,&ya);

180: #if defined(PETSC_USE_LOG)
181:   PetscLogFlops((int)(BSlocal_flops()-flop1));
182: #endif

184:   return(0);
185: }

187: /* ------------------------------------------------------------------- */
190: PetscErrorCode MatBackwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
191: {
192:   Mat          submat = (Mat) mat->data;
193:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
195:   PetscScalar  *ya,*xworka;

197: #if defined (PETSC_USE_LOG)
198:   PetscReal flop1 = BSlocal_flops();
199: #endif

202:   VecCopy(x,y);

204:   VecGetArray(y,&ya);
205:   if (mbs->procinfo->single) {
206:     /* Use BlockSolve routine for no cliques/inodes */
207:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
208:   } else {
209:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
210:   }
211:   VecRestoreArray(y,&ya);

213:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
214:   if (!mbs->vecs_permscale) {
215:     VecPointwiseMult(mbs->xwork,y,mbs->diag);
216:     VecGetArray(y,&ya);
217:     VecGetArray(mbs->xwork,&xworka);
218:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
219:     VecRestoreArray(y,&ya);
220:     VecRestoreArray(mbs->xwork,&xworka);
221:   }
222: #if defined (PETSC_USE_LOG)
223:   PetscLogFlops((int)(BSlocal_flops()-flop1));
224: #endif
225:   return(0);
226: }


229: /* 
230:     The logging variables required by BlockSolve, 

232:     This is an ugly hack that allows PETSc to run properly with BlockSolve regardless
233:   of whether PETSc or BlockSolve is compiled with logging turned on. 

235:     It is bad because it relys on BlockSolve's internals not changing related to 
236:   logging but we have no choice, plus it is unlikely BlockSolve will be developed
237:   in the near future anyways.
238: */
239: PETSCMAT_DLLEXPORT double MLOG_flops;
240: PETSCMAT_DLLEXPORT double MLOG_event_flops;
241: PETSCMAT_DLLEXPORT double MLOG_time_stamp;
242: PETSCMAT_DLLEXPORT PetscErrorCode    MLOG_sequence_num;
243: #if defined (MLOG_MAX_EVNTS) 
244: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
245: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
246: #else
247: typedef struct __MLOG_log_type {
248:         double        time_stamp;
249:         double        total_time;
250:         double  flops;
251:         int        event_num;
252: } MLOG_log_type;
253: #define        MLOG_MAX_EVNTS 1300
254: #define        MLOG_MAX_ACCUM 75
255: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
256: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
257: #endif