Actual source code: gs.c
1: #define PETSCKSP_DLL
3: /***********************************gs.c***************************************
5: Author: Henry M. Tufo III
7: e-mail: hmt@cs.brown.edu
9: snail-mail:
10: Division of Applied Mathematics
11: Brown University
12: Providence, RI 02912
14: Last Modification:
15: 6.21.97
16: ************************************gs.c**************************************/
18: /***********************************gs.c***************************************
19: File Description:
20: -----------------
22: ************************************gs.c**************************************/
24: #include src/ksp/pc/impls/tfs/tfs.h
26: /* default length of number of items via tree - doubles if exceeded */
27: #define TREE_BUF_SZ 2048;
28: #define GS_VEC_SZ 1
32: /***********************************gs.c***************************************
33: Type: struct gather_scatter_id
34: ------------------------------
36: ************************************gs.c**************************************/
37: typedef struct gather_scatter_id {
38: int id;
39: int nel_min;
40: int nel_max;
41: int nel_sum;
42: int negl;
43: int gl_max;
44: int gl_min;
45: int repeats;
46: int ordered;
47: int positive;
48: PetscScalar *vals;
50: /* bit mask info */
51: int *my_proc_mask;
52: int mask_sz;
53: int *ngh_buf;
54: int ngh_buf_sz;
55: int *nghs;
56: int num_nghs;
57: int max_nghs;
58: int *pw_nghs;
59: int num_pw_nghs;
60: int *tree_nghs;
61: int num_tree_nghs;
63: int num_loads;
65: /* repeats == true -> local info */
66: int nel; /* number of unique elememts */
67: int *elms; /* of size nel */
68: int nel_total;
69: int *local_elms; /* of size nel_total */
70: int *companion; /* of size nel_total */
72: /* local info */
73: int num_local_total;
74: int local_strength;
75: int num_local;
76: int *num_local_reduce;
77: int **local_reduce;
78: int num_local_gop;
79: int *num_gop_local_reduce;
80: int **gop_local_reduce;
82: /* pairwise info */
83: int level;
84: int num_pairs;
85: int max_pairs;
86: int loc_node_pairs;
87: int max_node_pairs;
88: int min_node_pairs;
89: int avg_node_pairs;
90: int *pair_list;
91: int *msg_sizes;
92: int **node_list;
93: int len_pw_list;
94: int *pw_elm_list;
95: PetscScalar *pw_vals;
97: MPI_Request *msg_ids_in;
98: MPI_Request *msg_ids_out;
100: PetscScalar *out;
101: PetscScalar *in;
102: int msg_total;
104: /* tree - crystal accumulator info */
105: int max_left_over;
106: int *pre;
107: int *in_num;
108: int *out_num;
109: int **in_list;
110: int **out_list;
112: /* new tree work*/
113: int tree_nel;
114: int *tree_elms;
115: PetscScalar *tree_buf;
116: PetscScalar *tree_work;
118: int tree_map_sz;
119: int *tree_map_in;
120: int *tree_map_out;
122: /* current memory status */
123: int gl_bss_min;
124: int gl_perm_min;
126: /* max segment size for gs_gop_vec() */
127: int vec_sz;
129: /* hack to make paul happy */
130: MPI_Comm gs_comm;
132: } gs_id;
135: /* to be made public */
137: /* PRIVATE - and definitely not exported */
138: /*static void gs_print_template( gs_id* gs, int who);*/
139: /*static void gs_print_stemplate( gs_id* gs, int who);*/
141: static gs_id *gsi_check_args(int *elms, int nel, int level);
142: static void gsi_via_bit_mask(gs_id *gs);
143: static void get_ngh_buf(gs_id *gs);
144: static void set_pairwise(gs_id *gs);
145: static gs_id * gsi_new(void);
146: static void set_tree(gs_id *gs);
148: /* same for all but vector flavor */
149: static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150: /* vector flavor */
151: static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
153: static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154: static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155: static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156: static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157: static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
160: static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161: static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162: static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163: static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164: static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
166: static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167: static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168: static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
170: static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171: static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172: static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173: static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174: static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
176: static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177: static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178: static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179: static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180: static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
182: static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183: static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184: static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185: static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186: static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
188: static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189: static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190: static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191: static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192: static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
194: static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195: static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196: static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197: static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198: static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
200: static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201: static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202: static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203: static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204: static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
206: static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207: static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208: static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209: static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
213: /* global vars */
214: /* from comm.c module */
216: /* module state inf and fortran interface */
217: static int num_gs_ids = 0;
219: /* should make this dynamic ... later */
220: static int msg_buf=MAX_MSG_BUF;
221: static int vec_sz=GS_VEC_SZ;
222: static int *tree_buf=NULL;
223: static int tree_buf_sz=0;
224: static int ntree=0;
227: /******************************************************************************
228: Function: gs_init_()
230: Input :
231: Output:
232: Return:
233: Description:
234: ******************************************************************************/
235: void gs_init_vec_sz(int size)
236: {
237: /* vec_ch = TRUE; */
239: vec_sz = size;
240: }
242: /******************************************************************************
243: Function: gs_init_()
245: Input :
246: Output:
247: Return:
248: Description:
249: ******************************************************************************/
250: void gs_init_msg_buf_sz(int buf_size)
251: {
252: /* msg_ch = TRUE; */
254: msg_buf = buf_size;
255: }
257: /******************************************************************************
258: Function: gs_init()
260: Input :
262: Output:
264: RETURN:
266: Description:
267: ******************************************************************************/
268: gs_id *
269: gs_init( int *elms, int nel, int level)
270: {
271: gs_id *gs;
272: MPI_Group gs_group;
273: MPI_Comm gs_comm;
275: /* ensure that communication package has been initialized */
276: comm_init();
279: /* determines if we have enough dynamic/semi-static memory */
280: /* checks input, allocs and sets gd_id template */
281: gs = gsi_check_args(elms,nel,level);
283: /* only bit mask version up and working for the moment */
284: /* LATER :: get int list version working for sparse pblms */
285: gsi_via_bit_mask(gs);
288: MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289: MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290: gs->gs_comm=gs_comm;
292: return(gs);
293: }
297: /******************************************************************************
298: Function: gsi_new()
300: Input :
301: Output:
302: Return:
303: Description:
305: elm list must >= 0!!!
306: elm repeats allowed
307: ******************************************************************************/
308: static
309: gs_id *
310: gsi_new(void)
311: {
312: gs_id *gs;
313: gs = (gs_id *) malloc(sizeof(gs_id));
314: PetscMemzero(gs,sizeof(gs_id));
315: return(gs);
316: }
320: /******************************************************************************
321: Function: gsi_check_args()
323: Input :
324: Output:
325: Return:
326: Description:
328: elm list must >= 0!!!
329: elm repeats allowed
330: local working copy of elms is sorted
331: ******************************************************************************/
332: static
333: gs_id *
334: gsi_check_args(int *in_elms, int nel, int level)
335: {
336: int i, j, k, t2;
337: int *companion, *elms, *unique, *iptr;
338: int num_local=0, *num_to_reduce, **local_reduce;
339: int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
340: int vals[sizeof(oprs)/sizeof(oprs[0])-1];
341: int work[sizeof(oprs)/sizeof(oprs[0])-1];
342: gs_id *gs;
346: #ifdef SAFE
347: if (!in_elms)
348: {error_msg_fatal("elms point to nothing!!!\n");}
350: if (nel<0)
351: {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
353: if (nel==0)
354: {error_msg_warning("I don't have any elements!!!\n");}
355: #endif
357: /* get space for gs template */
358: gs = gsi_new();
359: gs->id = ++num_gs_ids;
361: /* hmt 6.4.99 */
362: /* caller can set global ids that don't participate to 0 */
363: /* gs_init ignores all zeros in elm list */
364: /* negative global ids are still invalid */
365: for (i=j=0;i<nel;i++)
366: {if (in_elms[i]!=0) {j++;}}
368: k=nel; nel=j;
370: /* copy over in_elms list and create inverse map */
371: elms = (int*) malloc((nel+1)*sizeof(PetscInt));
372: companion = (int*) malloc(nel*sizeof(PetscInt));
373: /* ivec_c_index(companion,nel); */
374: /* ivec_copy(elms,in_elms,nel); */
375: for (i=j=0;i<k;i++)
376: {
377: if (in_elms[i]!=0)
378: {elms[j] = in_elms[i]; companion[j++] = i;}
379: }
381: if (j!=nel)
382: {error_msg_fatal("nel j mismatch!\n");}
384: #ifdef SAFE
385: /* pre-pass ... check to see if sorted */
386: elms[nel] = INT_MAX;
387: iptr = elms;
388: unique = elms+1;
389: j=0;
390: while (*iptr!=INT_MAX)
391: {
392: if (*iptr++>*unique++)
393: {j=1; break;}
394: }
396: /* set up inverse map */
397: if (j)
398: {
399: error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
400: SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
401: }
402: else
403: {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
404: #else
405: SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
406: #endif
407: elms[nel] = INT_MIN;
409: /* first pass */
410: /* determine number of unique elements, check pd */
411: for (i=k=0;i<nel;i+=j)
412: {
413: t2 = elms[i];
414: j=++i;
415:
416: /* clump 'em for now */
417: while (elms[j]==t2) {j++;}
418:
419: /* how many together and num local */
420: if (j-=i)
421: {num_local++; k+=j;}
422: }
424: /* how many unique elements? */
425: gs->repeats=k;
426: gs->nel = nel-k;
429: /* number of repeats? */
430: gs->num_local = num_local;
431: num_local+=2;
432: gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
433: gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
435: unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
436: gs->elms = unique;
437: gs->nel_total = nel;
438: gs->local_elms = elms;
439: gs->companion = companion;
441: /* compess map as well as keep track of local ops */
442: for (num_local=i=j=0;i<gs->nel;i++)
443: {
444: k=j;
445: t2 = unique[i] = elms[j];
446: companion[i] = companion[j];
447:
448: while (elms[j]==t2) {j++;}
450: if ((t2=(j-k))>1)
451: {
452: /* number together */
453: num_to_reduce[num_local] = t2++;
454: iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
456: /* to use binary searching don't remap until we check intersection */
457: *iptr++ = i;
458:
459: /* note that we're skipping the first one */
460: while (++k<j)
461: {*(iptr++) = companion[k];}
462: *iptr = -1;
463: }
464: }
466: /* sentinel for ngh_buf */
467: unique[gs->nel]=INT_MAX;
469: /* for two partition sort hack */
470: num_to_reduce[num_local] = 0;
471: local_reduce[num_local] = NULL;
472: num_to_reduce[++num_local] = 0;
473: local_reduce[num_local] = NULL;
475: /* load 'em up */
476: /* note one extra to hold NON_UNIFORM flag!!! */
477: vals[2] = vals[1] = vals[0] = nel;
478: if (gs->nel>0)
479: {
480: vals[3] = unique[0]; /* ivec_lb(elms,nel); */
481: vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */
482: }
483: else
484: {
485: vals[3] = INT_MAX; /* ivec_lb(elms,nel); */
486: vals[4] = INT_MIN; /* ivec_ub(elms,nel); */
487: }
488: vals[5] = level;
489: vals[6] = num_gs_ids;
491: /* GLOBAL: send 'em out */
492: giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
494: /* must be semi-pos def - only pairwise depends on this */
495: /* LATER - remove this restriction */
496: if (vals[3]<0)
497: {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
499: if (vals[4]==INT_MAX)
500: {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
502: gs->nel_min = vals[0];
503: gs->nel_max = vals[1];
504: gs->nel_sum = vals[2];
505: gs->gl_min = vals[3];
506: gs->gl_max = vals[4];
507: gs->negl = vals[4]-vals[3]+1;
509: if (gs->negl<=0)
510: {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
511:
512: /* LATER :: add level == -1 -> program selects level */
513: if (vals[5]<0)
514: {vals[5]=0;}
515: else if (vals[5]>num_nodes)
516: {vals[5]=num_nodes;}
517: gs->level = vals[5];
519: return(gs);
520: }
523: /******************************************************************************
524: Function: gsi_via_bit_mask()
526: Input :
527: Output:
528: Return:
529: Description:
532: ******************************************************************************/
533: static
534: void
535: gsi_via_bit_mask(gs_id *gs)
536: {
537: int i, nel, *elms;
538: int t1;
539: int **reduce;
540: int *map;
542: /* totally local removes ... ct_bits == 0 */
543: get_ngh_buf(gs);
545: if (gs->level)
546: {set_pairwise(gs);}
548: if (gs->max_left_over)
549: {set_tree(gs);}
551: /* intersection local and pairwise/tree? */
552: gs->num_local_total = gs->num_local;
553: gs->gop_local_reduce = gs->local_reduce;
554: gs->num_gop_local_reduce = gs->num_local_reduce;
556: map = gs->companion;
558: /* is there any local compression */
559: if (!gs->num_local) {
560: gs->local_strength = NONE;
561: gs->num_local_gop = 0;
562: } else {
563: /* ok find intersection */
564: map = gs->companion;
565: reduce = gs->local_reduce;
566: for (i=0, t1=0; i<gs->num_local; i++, reduce++)
567: {
568: if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
569: ||
570: ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
571: {
572: /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
573: t1++;
574: if (gs->num_local_reduce[i]<=0)
575: {error_msg_fatal("nobody in list?");}
576: gs->num_local_reduce[i] *= -1;
577: }
578: **reduce=map[**reduce];
579: }
581: /* intersection is empty */
582: if (!t1)
583: {
584: gs->local_strength = FULL;
585: gs->num_local_gop = 0;
586: }
587: /* intersection not empty */
588: else
589: {
590: gs->local_strength = PARTIAL;
591: SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
592: gs->num_local + 1, SORT_INT_PTR);
594: gs->num_local_gop = t1;
595: gs->num_local_total = gs->num_local;
596: gs->num_local -= t1;
597: gs->gop_local_reduce = gs->local_reduce;
598: gs->num_gop_local_reduce = gs->num_local_reduce;
600: for (i=0; i<t1; i++)
601: {
602: if (gs->num_gop_local_reduce[i]>=0)
603: {error_msg_fatal("they aren't negative?");}
604: gs->num_gop_local_reduce[i] *= -1;
605: gs->local_reduce++;
606: gs->num_local_reduce++;
607: }
608: gs->local_reduce++;
609: gs->num_local_reduce++;
610: }
611: }
613: elms = gs->pw_elm_list;
614: nel = gs->len_pw_list;
615: for (i=0; i<nel; i++)
616: {elms[i] = map[elms[i]];}
618: elms = gs->tree_map_in;
619: nel = gs->tree_map_sz;
620: for (i=0; i<nel; i++)
621: {elms[i] = map[elms[i]];}
623: /* clean up */
624: free((void*) gs->local_elms);
625: free((void*) gs->companion);
626: free((void*) gs->elms);
627: free((void*) gs->ngh_buf);
628: gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
629: }
633: /******************************************************************************
634: Function: place_in_tree()
636: Input :
637: Output:
638: Return:
639: Description:
642: ******************************************************************************/
643: static
644: void
645: place_in_tree( int elm)
646: {
647: int *tp, n;
650: if (ntree==tree_buf_sz)
651: {
652: if (tree_buf_sz)
653: {
654: tp = tree_buf;
655: n = tree_buf_sz;
656: tree_buf_sz<<=1;
657: tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
658: ivec_copy(tree_buf,tp,n);
659: free(tp);
660: }
661: else
662: {
663: tree_buf_sz = TREE_BUF_SZ;
664: tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
665: }
666: }
668: tree_buf[ntree++] = elm;
669: }
673: /******************************************************************************
674: Function: get_ngh_buf()
676: Input :
677: Output:
678: Return:
679: Description:
682: ******************************************************************************/
683: static
684: void
685: get_ngh_buf(gs_id *gs)
686: {
687: int i, j, npw=0, ntree_map=0;
688: int p_mask_size, ngh_buf_size, buf_size;
689: int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
690: int *ngh_buf, *buf1, *buf2;
691: int offset, per_load, num_loads, or_ct, start, end;
692: int *ptr1, *ptr2, i_start, negl, nel, *elms;
693: int oper=GL_B_OR;
694: int *ptr3, *t_mask, level, ct1, ct2;
696: /* to make life easier */
697: nel = gs->nel;
698: elms = gs->elms;
699: level = gs->level;
700:
701: /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
702: p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
703: set_bit_mask(p_mask,p_mask_size,my_id);
705: /* allocate space for masks and info bufs */
706: gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
707: gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
708: gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
709: t_mask = (int*) malloc(p_mask_size);
710: gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
712: /* comm buffer size ... memory usage bounded by ~2*msg_buf */
713: /* had thought I could exploit rendezvous threshold */
715: /* default is one pass */
716: per_load = negl = gs->negl;
717: gs->num_loads = num_loads = 1;
718: i=p_mask_size*negl;
720: /* possible overflow on buffer size */
721: /* overflow hack */
722: if (i<0) {i=INT_MAX;}
724: buf_size = PetscMin(msg_buf,i);
726: /* can we do it? */
727: if (p_mask_size>buf_size)
728: {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
730: /* get giop buf space ... make *only* one malloc */
731: buf1 = (int*) malloc(buf_size<<1);
733: /* more than one gior exchange needed? */
734: if (buf_size!=i)
735: {
736: per_load = buf_size/p_mask_size;
737: buf_size = per_load*p_mask_size;
738: gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
739: }
742: /* convert buf sizes from #bytes to #ints - 32 bit only! */
743: #ifdef SAFE
744: p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
745: #else
746: p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
747: #endif
748:
749: /* find giop work space */
750: buf2 = buf1+buf_size;
752: /* hold #ints needed for processor masks */
753: gs->mask_sz=p_mask_size;
755: /* init buffers */
756: ivec_zero(sh_proc_mask,p_mask_size);
757: ivec_zero(pw_sh_proc_mask,p_mask_size);
758: ivec_zero(ngh_buf,ngh_buf_size);
760: /* HACK reset tree info */
761: tree_buf=NULL;
762: tree_buf_sz=ntree=0;
764: /* queue the tree elements for now */
765: /* elms_q = new_queue(); */
766:
767: /* can also queue tree info for pruned or forest implememtation */
768: /* mask_q = new_queue(); */
770: /* ok do it */
771: for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
772: {
773: /* identity for bitwise or is 000...000 */
774: ivec_zero(buf1,buf_size);
776: /* load msg buffer */
777: for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
778: {
779: offset = (offset-start)*p_mask_size;
780: ivec_copy(buf1+offset,p_mask,p_mask_size);
781: }
783: /* GLOBAL: pass buffer */
784: giop(buf1,buf2,buf_size,&oper);
787: /* unload buffer into ngh_buf */
788: ptr2=(elms+i_start);
789: for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
790: {
791: /* I own it ... may have to pairwise it */
792: if (j==*ptr2)
793: {
794: /* do i share it w/anyone? */
795: #ifdef SAFE
796: ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
797: #else
798: ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
799: #endif
800: /* guess not */
801: if (ct1<2)
802: {ptr2++; ptr1+=p_mask_size; continue;}
804: /* i do ... so keep info and turn off my bit */
805: ivec_copy(ptr1,ptr3,p_mask_size);
806: ivec_xor(ptr1,p_mask,p_mask_size);
807: ivec_or(sh_proc_mask,ptr1,p_mask_size);
808:
809: /* is it to be done pairwise? */
810: if (--ct1<=level)
811: {
812: npw++;
813:
814: /* turn on high bit to indicate pw need to process */
815: *ptr2++ |= TOP_BIT;
816: ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
817: ptr1+=p_mask_size;
818: continue;
819: }
821: /* get set for next and note that I have a tree contribution */
822: /* could save exact elm index for tree here -> save a search */
823: ptr2++; ptr1+=p_mask_size; ntree_map++;
824: }
825: /* i don't but still might be involved in tree */
826: else
827: {
829: /* shared by how many? */
830: #ifdef SAFE
831: ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
832: #else
833: ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
834: #endif
836: /* none! */
837: if (ct1<2)
838: {continue;}
840: /* is it going to be done pairwise? but not by me of course!*/
841: if (--ct1<=level)
842: {continue;}
843: }
844: /* LATER we're going to have to process it NOW */
845: /* nope ... tree it */
846: place_in_tree(j);
847: }
848: }
850: free((void*)t_mask);
851: free((void*)buf1);
853: gs->len_pw_list=npw;
854: gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
856: /* expand from bit mask list to int list and save ngh list */
857: gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
858: bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
860: gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
862: oper = GL_MAX;
863: ct1 = gs->num_nghs;
864: giop(&ct1,&ct2,1,&oper);
865: gs->max_nghs = ct1;
867: gs->tree_map_sz = ntree_map;
868: gs->max_left_over=ntree;
870: free((void*)p_mask);
871: free((void*)sh_proc_mask);
873: }
879: /******************************************************************************
880: Function: pairwise_init()
882: Input :
883: Output:
884: Return:
885: Description:
887: if an element is shared by fewer that level# of nodes do pairwise exch
888: ******************************************************************************/
889: static
890: void
891: set_pairwise(gs_id *gs)
892: {
893: int i, j;
894: int p_mask_size;
895: int *p_mask, *sh_proc_mask, *tmp_proc_mask;
896: int *ngh_buf, *buf2;
897: int offset;
898: int *msg_list, *msg_size, **msg_nodes, nprs;
899: int *pairwise_elm_list, len_pair_list=0;
900: int *iptr, t1, i_start, nel, *elms;
901: int ct;
905: /* to make life easier */
906: nel = gs->nel;
907: elms = gs->elms;
908: ngh_buf = gs->ngh_buf;
909: sh_proc_mask = gs->pw_nghs;
911: /* need a few temp masks */
912: p_mask_size = len_bit_mask(num_nodes);
913: p_mask = (int*) malloc(p_mask_size);
914: tmp_proc_mask = (int*) malloc(p_mask_size);
916: /* set mask to my my_id's bit mask */
917: set_bit_mask(p_mask,p_mask_size,my_id);
919: #ifdef SAFE
920: p_mask_size /= sizeof(PetscInt);
921: #else
922: p_mask_size >>= 2;
923: #endif
924:
925: len_pair_list=gs->len_pw_list;
926: gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
928: /* how many processors (nghs) do we have to exchange with? */
929: nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
932: /* allocate space for gs_gop() info */
933: gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs);
934: gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs);
935: gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
937: /* init msg_size list */
938: ivec_zero(msg_size,nprs);
940: /* expand from bit mask list to int list */
941: bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
942:
943: /* keep list of elements being handled pairwise */
944: for (i=j=0;i<nel;i++)
945: {
946: if (elms[i] & TOP_BIT)
947: {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
948: }
949: pairwise_elm_list[j] = -1;
951: gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1));
952: gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
953: gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1));
954: gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
955: gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
957: /* find who goes to each processor */
958: for (i_start=i=0;i<nprs;i++)
959: {
960: /* processor i's mask */
961: set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
963: /* det # going to processor i */
964: for (ct=j=0;j<len_pair_list;j++)
965: {
966: buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
967: ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
968: if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
969: {ct++;}
970: }
971: msg_size[i] = ct;
972: i_start = PetscMax(i_start,ct);
974: /*space to hold nodes in message to first neighbor */
975: msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
977: for (j=0;j<len_pair_list;j++)
978: {
979: buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
980: ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
981: if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
982: {*iptr++ = j;}
983: }
984: *iptr = -1;
985: }
986: msg_nodes[nprs] = NULL;
988: j=gs->loc_node_pairs=i_start;
989: t1 = GL_MAX;
990: giop(&i_start,&offset,1,&t1);
991: gs->max_node_pairs = i_start;
993: i_start=j;
994: t1 = GL_MIN;
995: giop(&i_start,&offset,1,&t1);
996: gs->min_node_pairs = i_start;
998: i_start=j;
999: t1 = GL_ADD;
1000: giop(&i_start,&offset,1,&t1);
1001: gs->avg_node_pairs = i_start/num_nodes + 1;
1003: i_start=nprs;
1004: t1 = GL_MAX;
1005: giop(&i_start,&offset,1,&t1);
1006: gs->max_pairs = i_start;
1009: /* remap pairwise in tail of gsi_via_bit_mask() */
1010: gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1011: gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1012: gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1014: /* reset malloc pool */
1015: free((void*)p_mask);
1016: free((void*)tmp_proc_mask);
1018: }
1022: /******************************************************************************
1023: Function: set_tree()
1025: Input :
1026: Output:
1027: Return:
1028: Description:
1030: to do pruned tree just save ngh buf copy for each one and decode here!
1031: ******************************************************************************/
1032: static
1033: void
1034: set_tree(gs_id *gs)
1035: {
1036: int i, j, n, nel;
1037: int *iptr_in, *iptr_out, *tree_elms, *elms;
1040: /* local work ptrs */
1041: elms = gs->elms;
1042: nel = gs->nel;
1044: /* how many via tree */
1045: gs->tree_nel = n = ntree;
1046: gs->tree_elms = tree_elms = iptr_in = tree_buf;
1047: gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1048: gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1049: j=gs->tree_map_sz;
1050: gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1));
1051: gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1053: /* search the longer of the two lists */
1054: /* note ... could save this info in get_ngh_buf and save searches */
1055: if (n<=nel)
1056: {
1057: /* bijective fct w/remap - search elm list */
1058: for (i=0; i<n; i++)
1059: {
1060: if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1061: {*iptr_in++ = j; *iptr_out++ = i;}
1062: }
1063: }
1064: else
1065: {
1066: for (i=0; i<nel; i++)
1067: {
1068: if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1069: {*iptr_in++ = i; *iptr_out++ = j;}
1070: }
1071: }
1073: /* sentinel */
1074: *iptr_in = *iptr_out = -1;
1076: }
1079: /******************************************************************************
1080: Function: gather_scatter
1082: Input :
1083: Output:
1084: Return:
1085: Description:
1086: ******************************************************************************/
1087: static
1088: void
1089: gs_gop_local_out( gs_id *gs, PetscScalar *vals)
1090: {
1091: int *num, *map, **reduce;
1092: PetscScalar tmp;
1095: num = gs->num_gop_local_reduce;
1096: reduce = gs->gop_local_reduce;
1097: while ((map = *reduce++))
1098: {
1099: /* wall */
1100: if (*num == 2)
1101: {
1102: num ++;
1103: vals[map[1]] = vals[map[0]];
1104: }
1105: /* corner shared by three elements */
1106: else if (*num == 3)
1107: {
1108: num ++;
1109: vals[map[2]] = vals[map[1]] = vals[map[0]];
1110: }
1111: /* corner shared by four elements */
1112: else if (*num == 4)
1113: {
1114: num ++;
1115: vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1116: }
1117: /* general case ... odd geoms ... 3D*/
1118: else
1119: {
1120: num++;
1121: tmp = *(vals + *map++);
1122: while (*map >= 0)
1123: {*(vals + *map++) = tmp;}
1124: }
1125: }
1126: }
1130: /******************************************************************************
1131: Function: gather_scatter
1133: Input :
1134: Output:
1135: Return:
1136: Description:
1137: ******************************************************************************/
1138: void
1139: gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1140: {
1141: /* local only operations!!! */
1142: if (gs->num_local)
1143: {gs_gop_local_binary(gs,vals,fct);}
1144:
1145: /* if intersection tree/pairwise and local isn't empty */
1146: if (gs->num_local_gop)
1147: {
1148: gs_gop_local_in_binary(gs,vals,fct);
1149:
1150: /* pairwise */
1151: if (gs->num_pairs)
1152: {gs_gop_pairwise_binary(gs,vals,fct);}
1153:
1154: /* tree */
1155: else if (gs->max_left_over)
1156: {gs_gop_tree_binary(gs,vals,fct);}
1157:
1158: gs_gop_local_out(gs,vals);
1159: }
1160: /* if intersection tree/pairwise and local is empty */
1161: else
1162: {
1163: /* pairwise */
1164: if (gs->num_pairs)
1165: {gs_gop_pairwise_binary(gs,vals,fct);}
1166:
1167: /* tree */
1168: else if (gs->max_left_over)
1169: {gs_gop_tree_binary(gs,vals,fct);}
1170: }
1171: }
1175: /******************************************************************************
1176: Function: gather_scatter
1178: Input :
1179: Output:
1180: Return:
1181: Description:
1182: ******************************************************************************/
1183: static
1184: void
1185: gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct)
1186: {
1187: int *num, *map, **reduce;
1188: PetscScalar tmp;
1191: num = gs->num_local_reduce;
1192: reduce = gs->local_reduce;
1193: while ((map = *reduce))
1194: {
1195: num ++;
1196: (*fct)(&tmp,NULL,1);
1197: /* tmp = 0.0; */
1198: while (*map >= 0)
1199: {(*fct)(&tmp,(vals + *map),1); map++;}
1200: /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1201:
1202: map = *reduce++;
1203: while (*map >= 0)
1204: {*(vals + *map++) = tmp;}
1205: }
1206: }
1210: /******************************************************************************
1211: Function: gather_scatter
1213: Input :
1214: Output:
1215: Return:
1216: Description:
1217: ******************************************************************************/
1218: static
1219: void
1220: gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct)
1221: {
1222: int *num, *map, **reduce;
1223: PetscScalar *base;
1226: num = gs->num_gop_local_reduce;
1228: reduce = gs->gop_local_reduce;
1229: while ((map = *reduce++))
1230: {
1231: num++;
1232: base = vals + *map++;
1233: while (*map >= 0)
1234: {(*fct)(base,(vals + *map),1); map++;}
1235: }
1236: }
1240: /******************************************************************************
1241: Function: gather_scatter
1243: VERSION 3 ::
1245: Input :
1246: Output:
1247: Return:
1248: Description:
1249: ******************************************************************************/
1250: static
1251: void
1252: gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals,
1253: rbfp fct)
1254: {
1255: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1256: int *iptr, *msg_list, *msg_size, **msg_nodes;
1257: int *pw, *list, *size, **nodes;
1258: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1259: MPI_Status status;
1262: /* strip and load s */
1263: msg_list =list = gs->pair_list;
1264: msg_size =size = gs->msg_sizes;
1265: msg_nodes=nodes = gs->node_list;
1266: iptr=pw = gs->pw_elm_list;
1267: dptr1=dptr3 = gs->pw_vals;
1268: msg_ids_in = ids_in = gs->msg_ids_in;
1269: msg_ids_out = ids_out = gs->msg_ids_out;
1270: dptr2 = gs->out;
1271: in1=in2 = gs->in;
1273: /* post the receives */
1274: /* msg_nodes=nodes; */
1275: do
1276: {
1277: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1278: second one *list and do list++ afterwards */
1279: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1280: gs->gs_comm, msg_ids_in++);
1281: in1 += *size++;
1282: }
1283: while (*++msg_nodes);
1284: msg_nodes=nodes;
1286: /* load gs values into in out gs buffers */
1287: while (*iptr >= 0)
1288: {*dptr3++ = *(in_vals + *iptr++);}
1290: /* load out buffers and post the sends */
1291: while ((iptr = *msg_nodes++))
1292: {
1293: dptr3 = dptr2;
1294: while (*iptr >= 0)
1295: {*dptr2++ = *(dptr1 + *iptr++);}
1296: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1297: /* is msg_ids_out++ correct? */
1298: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1299: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1300: }
1302: if (gs->max_left_over)
1303: {gs_gop_tree_binary(gs,in_vals,fct);}
1305: /* process the received data */
1306: msg_nodes=nodes;
1307: while ((iptr = *nodes++))
1308: {
1309: /* Should I check the return value of MPI_Wait() or status? */
1310: /* Can this loop be replaced by a call to MPI_Waitall()? */
1311: MPI_Wait(ids_in++, &status);
1312: while (*iptr >= 0)
1313: {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1314: /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1315: }
1317: /* replace vals */
1318: while (*pw >= 0)
1319: {*(in_vals + *pw++) = *dptr1++;}
1321: /* clear isend message handles */
1322: /* This changed for clarity though it could be the same */
1323: while (*msg_nodes++)
1324: /* Should I check the return value of MPI_Wait() or status? */
1325: /* Can this loop be replaced by a call to MPI_Waitall()? */
1326: {MPI_Wait(ids_out++, &status);}
1327: }
1331: /******************************************************************************
1332: Function: gather_scatter
1334: Input :
1335: Output:
1336: Return:
1337: Description:
1338: ******************************************************************************/
1339: static
1340: void
1341: gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct)
1342: {
1343: int size;
1344: int *in, *out;
1345: PetscScalar *buf, *work;
1347: in = gs->tree_map_in;
1348: out = gs->tree_map_out;
1349: buf = gs->tree_buf;
1350: work = gs->tree_work;
1351: size = gs->tree_nel;
1353: /* load vals vector w/identity */
1354: (*fct)(buf,NULL,size);
1355:
1356: /* load my contribution into val vector */
1357: while (*in >= 0)
1358: {(*fct)((buf + *out++),(vals + *in++),-1);}
1360: gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1362: in = gs->tree_map_in;
1363: out = gs->tree_map_out;
1364: while (*in >= 0)
1365: {(*fct)((vals + *in++),(buf + *out++),-1);}
1367: }
1372: /******************************************************************************
1373: Function: gather_scatter
1375: Input :
1376: Output:
1377: Return:
1378: Description:
1379: ******************************************************************************/
1380: void
1381: gs_gop( gs_id *gs, PetscScalar *vals, const char *op)
1382: {
1383: switch (*op) {
1384: case '+':
1385: gs_gop_plus(gs,vals);
1386: break;
1387: case '*':
1388: gs_gop_times(gs,vals);
1389: break;
1390: case 'a':
1391: gs_gop_min_abs(gs,vals);
1392: break;
1393: case 'A':
1394: gs_gop_max_abs(gs,vals);
1395: break;
1396: case 'e':
1397: gs_gop_exists(gs,vals);
1398: break;
1399: case 'm':
1400: gs_gop_min(gs,vals);
1401: break;
1402: case 'M':
1403: gs_gop_max(gs,vals); break;
1404: /*
1405: if (*(op+1)=='\0')
1406: {gs_gop_max(gs,vals); break;}
1407: else if (*(op+1)=='X')
1408: {gs_gop_max_abs(gs,vals); break;}
1409: else if (*(op+1)=='N')
1410: {gs_gop_min_abs(gs,vals); break;}
1411: */
1412: default:
1413: error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1414: error_msg_warning("gs_gop() :: default :: plus");
1415: gs_gop_plus(gs,vals);
1416: break;
1417: }
1418: }
1421: /******************************************************************************
1422: Function: gather_scatter
1424: Input :
1425: Output:
1426: Return:
1427: Description:
1428: ******************************************************************************/
1429: static void
1430: gs_gop_exists( gs_id *gs, PetscScalar *vals)
1431: {
1432: /* local only operations!!! */
1433: if (gs->num_local)
1434: {gs_gop_local_exists(gs,vals);}
1436: /* if intersection tree/pairwise and local isn't empty */
1437: if (gs->num_local_gop)
1438: {
1439: gs_gop_local_in_exists(gs,vals);
1441: /* pairwise */
1442: if (gs->num_pairs)
1443: {gs_gop_pairwise_exists(gs,vals);}
1444:
1445: /* tree */
1446: else if (gs->max_left_over)
1447: {gs_gop_tree_exists(gs,vals);}
1448:
1449: gs_gop_local_out(gs,vals);
1450: }
1451: /* if intersection tree/pairwise and local is empty */
1452: else
1453: {
1454: /* pairwise */
1455: if (gs->num_pairs)
1456: {gs_gop_pairwise_exists(gs,vals);}
1457:
1458: /* tree */
1459: else if (gs->max_left_over)
1460: {gs_gop_tree_exists(gs,vals);}
1461: }
1462: }
1466: /******************************************************************************
1467: Function: gather_scatter
1469: Input :
1470: Output:
1471: Return:
1472: Description:
1473: ******************************************************************************/
1474: static
1475: void
1476: gs_gop_local_exists( gs_id *gs, PetscScalar *vals)
1477: {
1478: int *num, *map, **reduce;
1479: PetscScalar tmp;
1482: num = gs->num_local_reduce;
1483: reduce = gs->local_reduce;
1484: while ((map = *reduce))
1485: {
1486: num ++;
1487: tmp = 0.0;
1488: while (*map >= 0)
1489: {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1490:
1491: map = *reduce++;
1492: while (*map >= 0)
1493: {*(vals + *map++) = tmp;}
1494: }
1495: }
1499: /******************************************************************************
1500: Function: gather_scatter
1502: Input :
1503: Output:
1504: Return:
1505: Description:
1506: ******************************************************************************/
1507: static
1508: void
1509: gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals)
1510: {
1511: int *num, *map, **reduce;
1512: PetscScalar *base;
1515: num = gs->num_gop_local_reduce;
1516: reduce = gs->gop_local_reduce;
1517: while ((map = *reduce++))
1518: {
1519: num++;
1520: base = vals + *map++;
1521: while (*map >= 0)
1522: {*base = EXISTS(*base,*(vals + *map)); map++;}
1523: }
1524: }
1528: /******************************************************************************
1529: Function: gather_scatter
1531: VERSION 3 ::
1533: Input :
1534: Output:
1535: Return:
1536: Description:
1537: ******************************************************************************/
1538: static
1539: void
1540: gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals)
1541: {
1542: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1543: int *iptr, *msg_list, *msg_size, **msg_nodes;
1544: int *pw, *list, *size, **nodes;
1545: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1546: MPI_Status status;
1549: /* strip and load s */
1550: msg_list =list = gs->pair_list;
1551: msg_size =size = gs->msg_sizes;
1552: msg_nodes=nodes = gs->node_list;
1553: iptr=pw = gs->pw_elm_list;
1554: dptr1=dptr3 = gs->pw_vals;
1555: msg_ids_in = ids_in = gs->msg_ids_in;
1556: msg_ids_out = ids_out = gs->msg_ids_out;
1557: dptr2 = gs->out;
1558: in1=in2 = gs->in;
1560: /* post the receives */
1561: /* msg_nodes=nodes; */
1562: do
1563: {
1564: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1565: second one *list and do list++ afterwards */
1566: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1567: gs->gs_comm, msg_ids_in++);
1568: in1 += *size++;
1569: }
1570: while (*++msg_nodes);
1571: msg_nodes=nodes;
1573: /* load gs values into in out gs buffers */
1574: while (*iptr >= 0)
1575: {*dptr3++ = *(in_vals + *iptr++);}
1577: /* load out buffers and post the sends */
1578: while ((iptr = *msg_nodes++))
1579: {
1580: dptr3 = dptr2;
1581: while (*iptr >= 0)
1582: {*dptr2++ = *(dptr1 + *iptr++);}
1583: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1584: /* is msg_ids_out++ correct? */
1585: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1586: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1587: }
1589: if (gs->max_left_over)
1590: {gs_gop_tree_exists(gs,in_vals);}
1592: /* process the received data */
1593: msg_nodes=nodes;
1594: while ((iptr = *nodes++))
1595: {
1596: /* Should I check the return value of MPI_Wait() or status? */
1597: /* Can this loop be replaced by a call to MPI_Waitall()? */
1598: MPI_Wait(ids_in++, &status);
1599: while (*iptr >= 0)
1600: {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1601: }
1603: /* replace vals */
1604: while (*pw >= 0)
1605: {*(in_vals + *pw++) = *dptr1++;}
1607: /* clear isend message handles */
1608: /* This changed for clarity though it could be the same */
1609: while (*msg_nodes++)
1610: /* Should I check the return value of MPI_Wait() or status? */
1611: /* Can this loop be replaced by a call to MPI_Waitall()? */
1612: {MPI_Wait(ids_out++, &status);}
1613: }
1617: /******************************************************************************
1618: Function: gather_scatter
1620: Input :
1621: Output:
1622: Return:
1623: Description:
1624: ******************************************************************************/
1625: static
1626: void
1627: gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1628: {
1629: int size;
1630: int *in, *out;
1631: PetscScalar *buf, *work;
1632: int op[] = {GL_EXISTS,0};
1635: in = gs->tree_map_in;
1636: out = gs->tree_map_out;
1637: buf = gs->tree_buf;
1638: work = gs->tree_work;
1639: size = gs->tree_nel;
1641: rvec_zero(buf,size);
1643: while (*in >= 0)
1644: {
1645: /*
1646: printf("%d :: out=%d\n",my_id,*out);
1647: printf("%d :: in=%d\n",my_id,*in);
1648: */
1649: *(buf + *out++) = *(vals + *in++);
1650: }
1652: grop(buf,work,size,op);
1654: in = gs->tree_map_in;
1655: out = gs->tree_map_out;
1657: while (*in >= 0)
1658: {*(vals + *in++) = *(buf + *out++);}
1660: }
1664: /******************************************************************************
1665: Function: gather_scatter
1667: Input :
1668: Output:
1669: Return:
1670: Description:
1671: ******************************************************************************/
1672: static void
1673: gs_gop_max_abs( gs_id *gs, PetscScalar *vals)
1674: {
1675: /* local only operations!!! */
1676: if (gs->num_local)
1677: {gs_gop_local_max_abs(gs,vals);}
1679: /* if intersection tree/pairwise and local isn't empty */
1680: if (gs->num_local_gop)
1681: {
1682: gs_gop_local_in_max_abs(gs,vals);
1684: /* pairwise */
1685: if (gs->num_pairs)
1686: {gs_gop_pairwise_max_abs(gs,vals);}
1687:
1688: /* tree */
1689: else if (gs->max_left_over)
1690: {gs_gop_tree_max_abs(gs,vals);}
1691:
1692: gs_gop_local_out(gs,vals);
1693: }
1694: /* if intersection tree/pairwise and local is empty */
1695: else
1696: {
1697: /* pairwise */
1698: if (gs->num_pairs)
1699: {gs_gop_pairwise_max_abs(gs,vals);}
1700:
1701: /* tree */
1702: else if (gs->max_left_over)
1703: {gs_gop_tree_max_abs(gs,vals);}
1704: }
1705: }
1709: /******************************************************************************
1710: Function: gather_scatter
1712: Input :
1713: Output:
1714: Return:
1715: Description:
1716: ******************************************************************************/
1717: static
1718: void
1719: gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals)
1720: {
1721: int *num, *map, **reduce;
1722: PetscScalar tmp;
1725: num = gs->num_local_reduce;
1726: reduce = gs->local_reduce;
1727: while ((map = *reduce))
1728: {
1729: num ++;
1730: tmp = 0.0;
1731: while (*map >= 0)
1732: {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1733:
1734: map = *reduce++;
1735: while (*map >= 0)
1736: {*(vals + *map++) = tmp;}
1737: }
1738: }
1742: /******************************************************************************
1743: Function: gather_scatter
1745: Input :
1746: Output:
1747: Return:
1748: Description:
1749: ******************************************************************************/
1750: static
1751: void
1752: gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals)
1753: {
1754: int *num, *map, **reduce;
1755: PetscScalar *base;
1758: num = gs->num_gop_local_reduce;
1759: reduce = gs->gop_local_reduce;
1760: while ((map = *reduce++))
1761: {
1762: num++;
1763: base = vals + *map++;
1764: while (*map >= 0)
1765: {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1766: }
1767: }
1771: /******************************************************************************
1772: Function: gather_scatter
1774: VERSION 3 ::
1776: Input :
1777: Output:
1778: Return:
1779: Description:
1780: ******************************************************************************/
1781: static
1782: void
1783: gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals)
1784: {
1785: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1786: int *iptr, *msg_list, *msg_size, **msg_nodes;
1787: int *pw, *list, *size, **nodes;
1788: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1789: MPI_Status status;
1792: /* strip and load s */
1793: msg_list =list = gs->pair_list;
1794: msg_size =size = gs->msg_sizes;
1795: msg_nodes=nodes = gs->node_list;
1796: iptr=pw = gs->pw_elm_list;
1797: dptr1=dptr3 = gs->pw_vals;
1798: msg_ids_in = ids_in = gs->msg_ids_in;
1799: msg_ids_out = ids_out = gs->msg_ids_out;
1800: dptr2 = gs->out;
1801: in1=in2 = gs->in;
1803: /* post the receives */
1804: /* msg_nodes=nodes; */
1805: do
1806: {
1807: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1808: second one *list and do list++ afterwards */
1809: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1810: gs->gs_comm, msg_ids_in++);
1811: in1 += *size++;
1812: }
1813: while (*++msg_nodes);
1814: msg_nodes=nodes;
1816: /* load gs values into in out gs buffers */
1817: while (*iptr >= 0)
1818: {*dptr3++ = *(in_vals + *iptr++);}
1820: /* load out buffers and post the sends */
1821: while ((iptr = *msg_nodes++))
1822: {
1823: dptr3 = dptr2;
1824: while (*iptr >= 0)
1825: {*dptr2++ = *(dptr1 + *iptr++);}
1826: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1827: /* is msg_ids_out++ correct? */
1828: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1829: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1830: }
1832: if (gs->max_left_over)
1833: {gs_gop_tree_max_abs(gs,in_vals);}
1835: /* process the received data */
1836: msg_nodes=nodes;
1837: while ((iptr = *nodes++))
1838: {
1839: /* Should I check the return value of MPI_Wait() or status? */
1840: /* Can this loop be replaced by a call to MPI_Waitall()? */
1841: MPI_Wait(ids_in++, &status);
1842: while (*iptr >= 0)
1843: {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1844: }
1846: /* replace vals */
1847: while (*pw >= 0)
1848: {*(in_vals + *pw++) = *dptr1++;}
1850: /* clear isend message handles */
1851: /* This changed for clarity though it could be the same */
1852: while (*msg_nodes++)
1853: /* Should I check the return value of MPI_Wait() or status? */
1854: /* Can this loop be replaced by a call to MPI_Waitall()? */
1855: {MPI_Wait(ids_out++, &status);}
1856: }
1860: /******************************************************************************
1861: Function: gather_scatter
1863: Input :
1864: Output:
1865: Return:
1866: Description:
1867: ******************************************************************************/
1868: static
1869: void
1870: gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1871: {
1872: int size;
1873: int *in, *out;
1874: PetscScalar *buf, *work;
1875: int op[] = {GL_MAX_ABS,0};
1878: in = gs->tree_map_in;
1879: out = gs->tree_map_out;
1880: buf = gs->tree_buf;
1881: work = gs->tree_work;
1882: size = gs->tree_nel;
1884: rvec_zero(buf,size);
1886: while (*in >= 0)
1887: {
1888: /*
1889: printf("%d :: out=%d\n",my_id,*out);
1890: printf("%d :: in=%d\n",my_id,*in);
1891: */
1892: *(buf + *out++) = *(vals + *in++);
1893: }
1895: grop(buf,work,size,op);
1897: in = gs->tree_map_in;
1898: out = gs->tree_map_out;
1900: while (*in >= 0)
1901: {*(vals + *in++) = *(buf + *out++);}
1903: }
1907: /******************************************************************************
1908: Function: gather_scatter
1910: Input :
1911: Output:
1912: Return:
1913: Description:
1914: ******************************************************************************/
1915: static void
1916: gs_gop_max( gs_id *gs, PetscScalar *vals)
1917: {
1919: /* local only operations!!! */
1920: if (gs->num_local)
1921: {gs_gop_local_max(gs,vals);}
1923: /* if intersection tree/pairwise and local isn't empty */
1924: if (gs->num_local_gop)
1925: {
1926: gs_gop_local_in_max(gs,vals);
1928: /* pairwise */
1929: if (gs->num_pairs)
1930: {gs_gop_pairwise_max(gs,vals);}
1931:
1932: /* tree */
1933: else if (gs->max_left_over)
1934: {gs_gop_tree_max(gs,vals);}
1935:
1936: gs_gop_local_out(gs,vals);
1937: }
1938: /* if intersection tree/pairwise and local is empty */
1939: else
1940: {
1941: /* pairwise */
1942: if (gs->num_pairs)
1943: {gs_gop_pairwise_max(gs,vals);}
1944:
1945: /* tree */
1946: else if (gs->max_left_over)
1947: {gs_gop_tree_max(gs,vals);}
1948: }
1949: }
1953: /******************************************************************************
1954: Function: gather_scatter
1956: Input :
1957: Output:
1958: Return:
1959: Description:
1960: ******************************************************************************/
1961: static
1962: void
1963: gs_gop_local_max( gs_id *gs, PetscScalar *vals)
1964: {
1965: int *num, *map, **reduce;
1966: PetscScalar tmp;
1969: num = gs->num_local_reduce;
1970: reduce = gs->local_reduce;
1971: while ((map = *reduce))
1972: {
1973: num ++;
1974: tmp = -REAL_MAX;
1975: while (*map >= 0)
1976: {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1977:
1978: map = *reduce++;
1979: while (*map >= 0)
1980: {*(vals + *map++) = tmp;}
1981: }
1982: }
1986: /******************************************************************************
1987: Function: gather_scatter
1989: Input :
1990: Output:
1991: Return:
1992: Description:
1993: ******************************************************************************/
1994: static
1995: void
1996: gs_gop_local_in_max( gs_id *gs, PetscScalar *vals)
1997: {
1998: int *num, *map, **reduce;
1999: PetscScalar *base;
2002: num = gs->num_gop_local_reduce;
2003: reduce = gs->gop_local_reduce;
2004: while ((map = *reduce++))
2005: {
2006: num++;
2007: base = vals + *map++;
2008: while (*map >= 0)
2009: {*base = PetscMax(*base,*(vals + *map)); map++;}
2010: }
2011: }
2015: /******************************************************************************
2016: Function: gather_scatter
2018: VERSION 3 ::
2020: Input :
2021: Output:
2022: Return:
2023: Description:
2024: ******************************************************************************/
2025: static
2026: void
2027: gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals)
2028: {
2029: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2030: int *iptr, *msg_list, *msg_size, **msg_nodes;
2031: int *pw, *list, *size, **nodes;
2032: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2033: MPI_Status status;
2036: /* strip and load s */
2037: msg_list =list = gs->pair_list;
2038: msg_size =size = gs->msg_sizes;
2039: msg_nodes=nodes = gs->node_list;
2040: iptr=pw = gs->pw_elm_list;
2041: dptr1=dptr3 = gs->pw_vals;
2042: msg_ids_in = ids_in = gs->msg_ids_in;
2043: msg_ids_out = ids_out = gs->msg_ids_out;
2044: dptr2 = gs->out;
2045: in1=in2 = gs->in;
2047: /* post the receives */
2048: /* msg_nodes=nodes; */
2049: do
2050: {
2051: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2052: second one *list and do list++ afterwards */
2053: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2054: gs->gs_comm, msg_ids_in++);
2055: in1 += *size++;
2056: }
2057: while (*++msg_nodes);
2058: msg_nodes=nodes;
2060: /* load gs values into in out gs buffers */
2061: while (*iptr >= 0)
2062: {*dptr3++ = *(in_vals + *iptr++);}
2064: /* load out buffers and post the sends */
2065: while ((iptr = *msg_nodes++))
2066: {
2067: dptr3 = dptr2;
2068: while (*iptr >= 0)
2069: {*dptr2++ = *(dptr1 + *iptr++);}
2070: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2071: /* is msg_ids_out++ correct? */
2072: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2073: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2074: }
2076: if (gs->max_left_over)
2077: {gs_gop_tree_max(gs,in_vals);}
2079: /* process the received data */
2080: msg_nodes=nodes;
2081: while ((iptr = *nodes++))
2082: {
2083: /* Should I check the return value of MPI_Wait() or status? */
2084: /* Can this loop be replaced by a call to MPI_Waitall()? */
2085: MPI_Wait(ids_in++, &status);
2086: while (*iptr >= 0)
2087: {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2088: }
2090: /* replace vals */
2091: while (*pw >= 0)
2092: {*(in_vals + *pw++) = *dptr1++;}
2094: /* clear isend message handles */
2095: /* This changed for clarity though it could be the same */
2096: while (*msg_nodes++)
2097: /* Should I check the return value of MPI_Wait() or status? */
2098: /* Can this loop be replaced by a call to MPI_Waitall()? */
2099: {MPI_Wait(ids_out++, &status);}
2100: }
2104: /******************************************************************************
2105: Function: gather_scatter
2107: Input :
2108: Output:
2109: Return:
2110: Description:
2111: ******************************************************************************/
2112: static
2113: void
2114: gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2115: {
2116: int size;
2117: int *in, *out;
2118: PetscScalar *buf, *work;
2119:
2120: in = gs->tree_map_in;
2121: out = gs->tree_map_out;
2122: buf = gs->tree_buf;
2123: work = gs->tree_work;
2124: size = gs->tree_nel;
2126: rvec_set(buf,-REAL_MAX,size);
2128: while (*in >= 0)
2129: {*(buf + *out++) = *(vals + *in++);}
2131: in = gs->tree_map_in;
2132: out = gs->tree_map_out;
2133: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2134: while (*in >= 0)
2135: {*(vals + *in++) = *(work + *out++);}
2137: }
2141: /******************************************************************************
2142: Function: gather_scatter
2144: Input :
2145: Output:
2146: Return:
2147: Description:
2148: ******************************************************************************/
2149: static void
2150: gs_gop_min_abs( gs_id *gs, PetscScalar *vals)
2151: {
2153: /* local only operations!!! */
2154: if (gs->num_local)
2155: {gs_gop_local_min_abs(gs,vals);}
2157: /* if intersection tree/pairwise and local isn't empty */
2158: if (gs->num_local_gop)
2159: {
2160: gs_gop_local_in_min_abs(gs,vals);
2162: /* pairwise */
2163: if (gs->num_pairs)
2164: {gs_gop_pairwise_min_abs(gs,vals);}
2165:
2166: /* tree */
2167: else if (gs->max_left_over)
2168: {gs_gop_tree_min_abs(gs,vals);}
2169:
2170: gs_gop_local_out(gs,vals);
2171: }
2172: /* if intersection tree/pairwise and local is empty */
2173: else
2174: {
2175: /* pairwise */
2176: if (gs->num_pairs)
2177: {gs_gop_pairwise_min_abs(gs,vals);}
2178:
2179: /* tree */
2180: else if (gs->max_left_over)
2181: {gs_gop_tree_min_abs(gs,vals);}
2182: }
2183: }
2187: /******************************************************************************
2188: Function: gather_scatter
2190: Input :
2191: Output:
2192: Return:
2193: Description:
2194: ******************************************************************************/
2195: static
2196: void
2197: gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals)
2198: {
2199: int *num, *map, **reduce;
2200: PetscScalar tmp;
2203: num = gs->num_local_reduce;
2204: reduce = gs->local_reduce;
2205: while ((map = *reduce))
2206: {
2207: num ++;
2208: tmp = REAL_MAX;
2209: while (*map >= 0)
2210: {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2211:
2212: map = *reduce++;
2213: while (*map >= 0)
2214: {*(vals + *map++) = tmp;}
2215: }
2216: }
2220: /******************************************************************************
2221: Function: gather_scatter
2223: Input :
2224: Output:
2225: Return:
2226: Description:
2227: ******************************************************************************/
2228: static
2229: void
2230: gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals)
2231: {
2232: int *num, *map, **reduce;
2233: PetscScalar *base;
2235: num = gs->num_gop_local_reduce;
2236: reduce = gs->gop_local_reduce;
2237: while ((map = *reduce++))
2238: {
2239: num++;
2240: base = vals + *map++;
2241: while (*map >= 0)
2242: {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2243: }
2244: }
2248: /******************************************************************************
2249: Function: gather_scatter
2251: VERSION 3 ::
2253: Input :
2254: Output:
2255: Return:
2256: Description:
2257: ******************************************************************************/
2258: static
2259: void
2260: gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals)
2261: {
2262: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2263: int *iptr, *msg_list, *msg_size, **msg_nodes;
2264: int *pw, *list, *size, **nodes;
2265: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2266: MPI_Status status;
2269: /* strip and load s */
2270: msg_list =list = gs->pair_list;
2271: msg_size =size = gs->msg_sizes;
2272: msg_nodes=nodes = gs->node_list;
2273: iptr=pw = gs->pw_elm_list;
2274: dptr1=dptr3 = gs->pw_vals;
2275: msg_ids_in = ids_in = gs->msg_ids_in;
2276: msg_ids_out = ids_out = gs->msg_ids_out;
2277: dptr2 = gs->out;
2278: in1=in2 = gs->in;
2280: /* post the receives */
2281: /* msg_nodes=nodes; */
2282: do
2283: {
2284: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2285: second one *list and do list++ afterwards */
2286: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2287: gs->gs_comm, msg_ids_in++);
2288: in1 += *size++;
2289: }
2290: while (*++msg_nodes);
2291: msg_nodes=nodes;
2293: /* load gs values into in out gs buffers */
2294: while (*iptr >= 0)
2295: {*dptr3++ = *(in_vals + *iptr++);}
2297: /* load out buffers and post the sends */
2298: while ((iptr = *msg_nodes++))
2299: {
2300: dptr3 = dptr2;
2301: while (*iptr >= 0)
2302: {*dptr2++ = *(dptr1 + *iptr++);}
2303: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2304: /* is msg_ids_out++ correct? */
2305: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2306: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2307: }
2309: if (gs->max_left_over)
2310: {gs_gop_tree_min_abs(gs,in_vals);}
2312: /* process the received data */
2313: msg_nodes=nodes;
2314: while ((iptr = *nodes++))
2315: {
2316: /* Should I check the return value of MPI_Wait() or status? */
2317: /* Can this loop be replaced by a call to MPI_Waitall()? */
2318: MPI_Wait(ids_in++, &status);
2319: while (*iptr >= 0)
2320: {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2321: }
2323: /* replace vals */
2324: while (*pw >= 0)
2325: {*(in_vals + *pw++) = *dptr1++;}
2327: /* clear isend message handles */
2328: /* This changed for clarity though it could be the same */
2329: while (*msg_nodes++)
2330: /* Should I check the return value of MPI_Wait() or status? */
2331: /* Can this loop be replaced by a call to MPI_Waitall()? */
2332: {MPI_Wait(ids_out++, &status);}
2333: }
2337: /******************************************************************************
2338: Function: gather_scatter
2340: Input :
2341: Output:
2342: Return:
2343: Description:
2344: ******************************************************************************/
2345: static
2346: void
2347: gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2348: {
2349: int size;
2350: int *in, *out;
2351: PetscScalar *buf, *work;
2352: int op[] = {GL_MIN_ABS,0};
2355: in = gs->tree_map_in;
2356: out = gs->tree_map_out;
2357: buf = gs->tree_buf;
2358: work = gs->tree_work;
2359: size = gs->tree_nel;
2361: rvec_set(buf,REAL_MAX,size);
2363: while (*in >= 0)
2364: {*(buf + *out++) = *(vals + *in++);}
2366: in = gs->tree_map_in;
2367: out = gs->tree_map_out;
2368: grop(buf,work,size,op);
2369: while (*in >= 0)
2370: {*(vals + *in++) = *(buf + *out++);}
2372: }
2376: /******************************************************************************
2377: Function: gather_scatter
2379: Input :
2380: Output:
2381: Return:
2382: Description:
2383: ******************************************************************************/
2384: static void
2385: gs_gop_min( gs_id *gs, PetscScalar *vals)
2386: {
2388: /* local only operations!!! */
2389: if (gs->num_local)
2390: {gs_gop_local_min(gs,vals);}
2392: /* if intersection tree/pairwise and local isn't empty */
2393: if (gs->num_local_gop)
2394: {
2395: gs_gop_local_in_min(gs,vals);
2397: /* pairwise */
2398: if (gs->num_pairs)
2399: {gs_gop_pairwise_min(gs,vals);}
2400:
2401: /* tree */
2402: else if (gs->max_left_over)
2403: {gs_gop_tree_min(gs,vals);}
2404:
2405: gs_gop_local_out(gs,vals);
2406: }
2407: /* if intersection tree/pairwise and local is empty */
2408: else
2409: {
2410: /* pairwise */
2411: if (gs->num_pairs)
2412: {gs_gop_pairwise_min(gs,vals);}
2413:
2414: /* tree */
2415: else if (gs->max_left_over)
2416: {gs_gop_tree_min(gs,vals);}
2417: }
2418: }
2422: /******************************************************************************
2423: Function: gather_scatter
2425: Input :
2426: Output:
2427: Return:
2428: Description:
2429: ******************************************************************************/
2430: static
2431: void
2432: gs_gop_local_min( gs_id *gs, PetscScalar *vals)
2433: {
2434: int *num, *map, **reduce;
2435: PetscScalar tmp;
2437: num = gs->num_local_reduce;
2438: reduce = gs->local_reduce;
2439: while ((map = *reduce))
2440: {
2441: num ++;
2442: tmp = REAL_MAX;
2443: while (*map >= 0)
2444: {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2445:
2446: map = *reduce++;
2447: while (*map >= 0)
2448: {*(vals + *map++) = tmp;}
2449: }
2450: }
2454: /******************************************************************************
2455: Function: gather_scatter
2457: Input :
2458: Output:
2459: Return:
2460: Description:
2461: ******************************************************************************/
2462: static
2463: void
2464: gs_gop_local_in_min( gs_id *gs, PetscScalar *vals)
2465: {
2466: int *num, *map, **reduce;
2467: PetscScalar *base;
2469: num = gs->num_gop_local_reduce;
2470: reduce = gs->gop_local_reduce;
2471: while ((map = *reduce++))
2472: {
2473: num++;
2474: base = vals + *map++;
2475: while (*map >= 0)
2476: {*base = PetscMin(*base,*(vals + *map)); map++;}
2477: }
2478: }
2482: /******************************************************************************
2483: Function: gather_scatter
2485: VERSION 3 ::
2487: Input :
2488: Output:
2489: Return:
2490: Description:
2491: ******************************************************************************/
2492: static
2493: void
2494: gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals)
2495: {
2496: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2497: int *iptr, *msg_list, *msg_size, **msg_nodes;
2498: int *pw, *list, *size, **nodes;
2499: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2500: MPI_Status status;
2503: /* strip and load s */
2504: msg_list =list = gs->pair_list;
2505: msg_size =size = gs->msg_sizes;
2506: msg_nodes=nodes = gs->node_list;
2507: iptr=pw = gs->pw_elm_list;
2508: dptr1=dptr3 = gs->pw_vals;
2509: msg_ids_in = ids_in = gs->msg_ids_in;
2510: msg_ids_out = ids_out = gs->msg_ids_out;
2511: dptr2 = gs->out;
2512: in1=in2 = gs->in;
2514: /* post the receives */
2515: /* msg_nodes=nodes; */
2516: do
2517: {
2518: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2519: second one *list and do list++ afterwards */
2520: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2521: gs->gs_comm, msg_ids_in++);
2522: in1 += *size++;
2523: }
2524: while (*++msg_nodes);
2525: msg_nodes=nodes;
2527: /* load gs values into in out gs buffers */
2528: while (*iptr >= 0)
2529: {*dptr3++ = *(in_vals + *iptr++);}
2531: /* load out buffers and post the sends */
2532: while ((iptr = *msg_nodes++))
2533: {
2534: dptr3 = dptr2;
2535: while (*iptr >= 0)
2536: {*dptr2++ = *(dptr1 + *iptr++);}
2537: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2538: /* is msg_ids_out++ correct? */
2539: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2540: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2541: }
2543: /* process the received data */
2544: if (gs->max_left_over)
2545: {gs_gop_tree_min(gs,in_vals);}
2547: msg_nodes=nodes;
2548: while ((iptr = *nodes++))
2549: {
2550: /* Should I check the return value of MPI_Wait() or status? */
2551: /* Can this loop be replaced by a call to MPI_Waitall()? */
2552: MPI_Wait(ids_in++, &status);
2553: while (*iptr >= 0)
2554: {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2555: }
2557: /* replace vals */
2558: while (*pw >= 0)
2559: {*(in_vals + *pw++) = *dptr1++;}
2561: /* clear isend message handles */
2562: /* This changed for clarity though it could be the same */
2563: while (*msg_nodes++)
2564: /* Should I check the return value of MPI_Wait() or status? */
2565: /* Can this loop be replaced by a call to MPI_Waitall()? */
2566: {MPI_Wait(ids_out++, &status);}
2567: }
2571: /******************************************************************************
2572: Function: gather_scatter
2574: Input :
2575: Output:
2576: Return:
2577: Description:
2578: ******************************************************************************/
2579: static
2580: void
2581: gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2582: {
2583: int size;
2584: int *in, *out;
2585: PetscScalar *buf, *work;
2586:
2587: in = gs->tree_map_in;
2588: out = gs->tree_map_out;
2589: buf = gs->tree_buf;
2590: work = gs->tree_work;
2591: size = gs->tree_nel;
2593: rvec_set(buf,REAL_MAX,size);
2595: while (*in >= 0)
2596: {*(buf + *out++) = *(vals + *in++);}
2598: in = gs->tree_map_in;
2599: out = gs->tree_map_out;
2600: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2601: while (*in >= 0)
2602: {*(vals + *in++) = *(work + *out++);}
2603: }
2607: /******************************************************************************
2608: Function: gather_scatter
2610: Input :
2611: Output:
2612: Return:
2613: Description:
2614: ******************************************************************************/
2615: static void
2616: gs_gop_times( gs_id *gs, PetscScalar *vals)
2617: {
2619: /* local only operations!!! */
2620: if (gs->num_local)
2621: {gs_gop_local_times(gs,vals);}
2623: /* if intersection tree/pairwise and local isn't empty */
2624: if (gs->num_local_gop)
2625: {
2626: gs_gop_local_in_times(gs,vals);
2628: /* pairwise */
2629: if (gs->num_pairs)
2630: {gs_gop_pairwise_times(gs,vals);}
2631:
2632: /* tree */
2633: else if (gs->max_left_over)
2634: {gs_gop_tree_times(gs,vals);}
2635:
2636: gs_gop_local_out(gs,vals);
2637: }
2638: /* if intersection tree/pairwise and local is empty */
2639: else
2640: {
2641: /* pairwise */
2642: if (gs->num_pairs)
2643: {gs_gop_pairwise_times(gs,vals);}
2644:
2645: /* tree */
2646: else if (gs->max_left_over)
2647: {gs_gop_tree_times(gs,vals);}
2648: }
2649: }
2653: /******************************************************************************
2654: Function: gather_scatter
2656: Input :
2657: Output:
2658: Return:
2659: Description:
2660: ******************************************************************************/
2661: static
2662: void
2663: gs_gop_local_times( gs_id *gs, PetscScalar *vals)
2664: {
2665: int *num, *map, **reduce;
2666: PetscScalar tmp;
2668: num = gs->num_local_reduce;
2669: reduce = gs->local_reduce;
2670: while ((map = *reduce))
2671: {
2672: /* wall */
2673: if (*num == 2)
2674: {
2675: num ++; reduce++;
2676: vals[map[1]] = vals[map[0]] *= vals[map[1]];
2677: }
2678: /* corner shared by three elements */
2679: else if (*num == 3)
2680: {
2681: num ++; reduce++;
2682: vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2683: }
2684: /* corner shared by four elements */
2685: else if (*num == 4)
2686: {
2687: num ++; reduce++;
2688: vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2689: (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2690: }
2691: /* general case ... odd geoms ... 3D*/
2692: else
2693: {
2694: num ++;
2695: tmp = 1.0;
2696: while (*map >= 0)
2697: {tmp *= *(vals + *map++);}
2699: map = *reduce++;
2700: while (*map >= 0)
2701: {*(vals + *map++) = tmp;}
2702: }
2703: }
2704: }
2708: /******************************************************************************
2709: Function: gather_scatter
2711: Input :
2712: Output:
2713: Return:
2714: Description:
2715: ******************************************************************************/
2716: static
2717: void
2718: gs_gop_local_in_times( gs_id *gs, PetscScalar *vals)
2719: {
2720: int *num, *map, **reduce;
2721: PetscScalar *base;
2723: num = gs->num_gop_local_reduce;
2724: reduce = gs->gop_local_reduce;
2725: while ((map = *reduce++))
2726: {
2727: /* wall */
2728: if (*num == 2)
2729: {
2730: num ++;
2731: vals[map[0]] *= vals[map[1]];
2732: }
2733: /* corner shared by three elements */
2734: else if (*num == 3)
2735: {
2736: num ++;
2737: vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2738: }
2739: /* corner shared by four elements */
2740: else if (*num == 4)
2741: {
2742: num ++;
2743: vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2744: }
2745: /* general case ... odd geoms ... 3D*/
2746: else
2747: {
2748: num++;
2749: base = vals + *map++;
2750: while (*map >= 0)
2751: {*base *= *(vals + *map++);}
2752: }
2753: }
2754: }
2758: /******************************************************************************
2759: Function: gather_scatter
2761: VERSION 3 ::
2763: Input :
2764: Output:
2765: Return:
2766: Description:
2767: ******************************************************************************/
2768: static
2769: void
2770: gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals)
2771: {
2772: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2773: int *iptr, *msg_list, *msg_size, **msg_nodes;
2774: int *pw, *list, *size, **nodes;
2775: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2776: MPI_Status status;
2779: /* strip and load s */
2780: msg_list =list = gs->pair_list;
2781: msg_size =size = gs->msg_sizes;
2782: msg_nodes=nodes = gs->node_list;
2783: iptr=pw = gs->pw_elm_list;
2784: dptr1=dptr3 = gs->pw_vals;
2785: msg_ids_in = ids_in = gs->msg_ids_in;
2786: msg_ids_out = ids_out = gs->msg_ids_out;
2787: dptr2 = gs->out;
2788: in1=in2 = gs->in;
2790: /* post the receives */
2791: /* msg_nodes=nodes; */
2792: do
2793: {
2794: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2795: second one *list and do list++ afterwards */
2796: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2797: gs->gs_comm, msg_ids_in++);
2798: in1 += *size++;
2799: }
2800: while (*++msg_nodes);
2801: msg_nodes=nodes;
2803: /* load gs values into in out gs buffers */
2804: while (*iptr >= 0)
2805: {*dptr3++ = *(in_vals + *iptr++);}
2807: /* load out buffers and post the sends */
2808: while ((iptr = *msg_nodes++))
2809: {
2810: dptr3 = dptr2;
2811: while (*iptr >= 0)
2812: {*dptr2++ = *(dptr1 + *iptr++);}
2813: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2814: /* is msg_ids_out++ correct? */
2815: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2816: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2817: }
2819: if (gs->max_left_over)
2820: {gs_gop_tree_times(gs,in_vals);}
2822: /* process the received data */
2823: msg_nodes=nodes;
2824: while ((iptr = *nodes++))
2825: {
2826: /* Should I check the return value of MPI_Wait() or status? */
2827: /* Can this loop be replaced by a call to MPI_Waitall()? */
2828: MPI_Wait(ids_in++, &status);
2829: while (*iptr >= 0)
2830: {*(dptr1 + *iptr++) *= *in2++;}
2831: }
2833: /* replace vals */
2834: while (*pw >= 0)
2835: {*(in_vals + *pw++) = *dptr1++;}
2837: /* clear isend message handles */
2838: /* This changed for clarity though it could be the same */
2839: while (*msg_nodes++)
2840: /* Should I check the return value of MPI_Wait() or status? */
2841: /* Can this loop be replaced by a call to MPI_Waitall()? */
2842: {MPI_Wait(ids_out++, &status);}
2843: }
2847: /******************************************************************************
2848: Function: gather_scatter
2850: Input :
2851: Output:
2852: Return:
2853: Description:
2854: ******************************************************************************/
2855: static
2856: void
2857: gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2858: {
2859: int size;
2860: int *in, *out;
2861: PetscScalar *buf, *work;
2862:
2863: in = gs->tree_map_in;
2864: out = gs->tree_map_out;
2865: buf = gs->tree_buf;
2866: work = gs->tree_work;
2867: size = gs->tree_nel;
2869: rvec_one(buf,size);
2871: while (*in >= 0)
2872: {*(buf + *out++) = *(vals + *in++);}
2874: in = gs->tree_map_in;
2875: out = gs->tree_map_out;
2876: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2877: while (*in >= 0)
2878: {*(vals + *in++) = *(work + *out++);}
2880: }
2884: /******************************************************************************
2885: Function: gather_scatter
2888: Input :
2889: Output:
2890: Return:
2891: Description:
2892: ******************************************************************************/
2893: static void
2894: gs_gop_plus( gs_id *gs, PetscScalar *vals)
2895: {
2897: /* local only operations!!! */
2898: if (gs->num_local)
2899: {gs_gop_local_plus(gs,vals);}
2901: /* if intersection tree/pairwise and local isn't empty */
2902: if (gs->num_local_gop)
2903: {
2904: gs_gop_local_in_plus(gs,vals);
2906: /* pairwise will NOT do tree inside ... */
2907: if (gs->num_pairs)
2908: {gs_gop_pairwise_plus(gs,vals);}
2910: /* tree */
2911: if (gs->max_left_over)
2912: {gs_gop_tree_plus(gs,vals);}
2913:
2914: gs_gop_local_out(gs,vals);
2915: }
2916: /* if intersection tree/pairwise and local is empty */
2917: else
2918: {
2919: /* pairwise will NOT do tree inside */
2920: if (gs->num_pairs)
2921: {gs_gop_pairwise_plus(gs,vals);}
2922:
2923: /* tree */
2924: if (gs->max_left_over)
2925: {gs_gop_tree_plus(gs,vals);}
2926: }
2928: }
2932: /******************************************************************************
2933: Function: gather_scatter
2935: Input :
2936: Output:
2937: Return:
2938: Description:
2939: ******************************************************************************/
2940: static
2941: void
2942: gs_gop_local_plus( gs_id *gs, PetscScalar *vals)
2943: {
2944: int *num, *map, **reduce;
2945: PetscScalar tmp;
2948: num = gs->num_local_reduce;
2949: reduce = gs->local_reduce;
2950: while ((map = *reduce))
2951: {
2952: /* wall */
2953: if (*num == 2)
2954: {
2955: num ++; reduce++;
2956: vals[map[1]] = vals[map[0]] += vals[map[1]];
2957: }
2958: /* corner shared by three elements */
2959: else if (*num == 3)
2960: {
2961: num ++; reduce++;
2962: vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2963: }
2964: /* corner shared by four elements */
2965: else if (*num == 4)
2966: {
2967: num ++; reduce++;
2968: vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2969: (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2970: }
2971: /* general case ... odd geoms ... 3D*/
2972: else
2973: {
2974: num ++;
2975: tmp = 0.0;
2976: while (*map >= 0)
2977: {tmp += *(vals + *map++);}
2979: map = *reduce++;
2980: while (*map >= 0)
2981: {*(vals + *map++) = tmp;}
2982: }
2983: }
2984: }
2988: /******************************************************************************
2989: Function: gather_scatter
2991: Input :
2992: Output:
2993: Return:
2994: Description:
2995: ******************************************************************************/
2996: static
2997: void
2998: gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals)
2999: {
3000: int *num, *map, **reduce;
3001: PetscScalar *base;
3004: num = gs->num_gop_local_reduce;
3005: reduce = gs->gop_local_reduce;
3006: while ((map = *reduce++))
3007: {
3008: /* wall */
3009: if (*num == 2)
3010: {
3011: num ++;
3012: vals[map[0]] += vals[map[1]];
3013: }
3014: /* corner shared by three elements */
3015: else if (*num == 3)
3016: {
3017: num ++;
3018: vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3019: }
3020: /* corner shared by four elements */
3021: else if (*num == 4)
3022: {
3023: num ++;
3024: vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3025: }
3026: /* general case ... odd geoms ... 3D*/
3027: else
3028: {
3029: num++;
3030: base = vals + *map++;
3031: while (*map >= 0)
3032: {*base += *(vals + *map++);}
3033: }
3034: }
3035: }
3039: /******************************************************************************
3040: Function: gather_scatter
3042: VERSION 3 ::
3044: Input :
3045: Output:
3046: Return:
3047: Description:
3048: ******************************************************************************/
3049: static
3050: void
3051: gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals)
3052: {
3053: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3054: int *iptr, *msg_list, *msg_size, **msg_nodes;
3055: int *pw, *list, *size, **nodes;
3056: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3057: MPI_Status status;
3060: /* strip and load s */
3061: msg_list =list = gs->pair_list;
3062: msg_size =size = gs->msg_sizes;
3063: msg_nodes=nodes = gs->node_list;
3064: iptr=pw = gs->pw_elm_list;
3065: dptr1=dptr3 = gs->pw_vals;
3066: msg_ids_in = ids_in = gs->msg_ids_in;
3067: msg_ids_out = ids_out = gs->msg_ids_out;
3068: dptr2 = gs->out;
3069: in1=in2 = gs->in;
3071: /* post the receives */
3072: /* msg_nodes=nodes; */
3073: do
3074: {
3075: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3076: second one *list and do list++ afterwards */
3077: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3078: gs->gs_comm, msg_ids_in++);
3079: in1 += *size++;
3080: }
3081: while (*++msg_nodes);
3082: msg_nodes=nodes;
3084: /* load gs values into in out gs buffers */
3085: while (*iptr >= 0)
3086: {*dptr3++ = *(in_vals + *iptr++);}
3088: /* load out buffers and post the sends */
3089: while ((iptr = *msg_nodes++))
3090: {
3091: dptr3 = dptr2;
3092: while (*iptr >= 0)
3093: {*dptr2++ = *(dptr1 + *iptr++);}
3094: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3095: /* is msg_ids_out++ correct? */
3096: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3097: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3098: }
3100: /* do the tree while we're waiting */
3101: if (gs->max_left_over)
3102: {gs_gop_tree_plus(gs,in_vals);}
3104: /* process the received data */
3105: msg_nodes=nodes;
3106: while ((iptr = *nodes++))
3107: {
3108: /* Should I check the return value of MPI_Wait() or status? */
3109: /* Can this loop be replaced by a call to MPI_Waitall()? */
3110: MPI_Wait(ids_in++, &status);
3111: while (*iptr >= 0)
3112: {*(dptr1 + *iptr++) += *in2++;}
3113: }
3115: /* replace vals */
3116: while (*pw >= 0)
3117: {*(in_vals + *pw++) = *dptr1++;}
3119: /* clear isend message handles */
3120: /* This changed for clarity though it could be the same */
3121: while (*msg_nodes++)
3122: /* Should I check the return value of MPI_Wait() or status? */
3123: /* Can this loop be replaced by a call to MPI_Waitall()? */
3124: {MPI_Wait(ids_out++, &status);}
3126: }
3130: /******************************************************************************
3131: Function: gather_scatter
3133: Input :
3134: Output:
3135: Return:
3136: Description:
3137: ******************************************************************************/
3138: static
3139: void
3140: gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3141: {
3142: int size;
3143: int *in, *out;
3144: PetscScalar *buf, *work;
3145:
3146: in = gs->tree_map_in;
3147: out = gs->tree_map_out;
3148: buf = gs->tree_buf;
3149: work = gs->tree_work;
3150: size = gs->tree_nel;
3152: rvec_zero(buf,size);
3154: while (*in >= 0)
3155: {*(buf + *out++) = *(vals + *in++);}
3157: in = gs->tree_map_in;
3158: out = gs->tree_map_out;
3159: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3160: while (*in >= 0)
3161: {*(vals + *in++) = *(work + *out++);}
3163: }
3165: /******************************************************************************
3166: Function: gs_free()
3168: Input :
3170: Output:
3172: Return:
3174: Description:
3175: if (gs->sss) {free((void*) gs->sss);}
3176: ******************************************************************************/
3177: void
3178: gs_free( gs_id *gs)
3179: {
3180: int i;
3183: if (gs->nghs) {free((void*) gs->nghs);}
3184: if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3186: /* tree */
3187: if (gs->max_left_over)
3188: {
3189: if (gs->tree_elms) {free((void*) gs->tree_elms);}
3190: if (gs->tree_buf) {free((void*) gs->tree_buf);}
3191: if (gs->tree_work) {free((void*) gs->tree_work);}
3192: if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3193: if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3194: }
3196: /* pairwise info */
3197: if (gs->num_pairs)
3198: {
3199: /* should be NULL already */
3200: if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3201: if (gs->elms) {free((void*) gs->elms);}
3202: if (gs->local_elms) {free((void*) gs->local_elms);}
3203: if (gs->companion) {free((void*) gs->companion);}
3204:
3205: /* only set if pairwise */
3206: if (gs->vals) {free((void*) gs->vals);}
3207: if (gs->in) {free((void*) gs->in);}
3208: if (gs->out) {free((void*) gs->out);}
3209: if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3210: if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3211: if (gs->pw_vals) {free((void*) gs->pw_vals);}
3212: if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3213: if (gs->node_list)
3214: {
3215: for (i=0;i<gs->num_pairs;i++)
3216: {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3217: free((void*) gs->node_list);
3218: }
3219: if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3220: if (gs->pair_list) {free((void*) gs->pair_list);}
3221: }
3223: /* local info */
3224: if (gs->num_local_total>=0)
3225: {
3226: for (i=0;i<gs->num_local_total+1;i++)
3227: /* for (i=0;i<gs->num_local_total;i++) */
3228: {
3229: if (gs->num_gop_local_reduce[i])
3230: {free((void*) gs->gop_local_reduce[i]);}
3231: }
3232: }
3234: /* if intersection tree/pairwise and local isn't empty */
3235: if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3236: if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3238: free((void*) gs);
3239: }
3246: /******************************************************************************
3247: Function: gather_scatter
3249: Input :
3250: Output:
3251: Return:
3252: Description:
3253: ******************************************************************************/
3254: void
3255: gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step)
3256: {
3258: switch (*op) {
3259: case '+':
3260: gs_gop_vec_plus(gs,vals,step);
3261: break;
3262: default:
3263: error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3264: error_msg_warning("gs_gop_vec() :: default :: plus");
3265: gs_gop_vec_plus(gs,vals,step);
3266: break;
3267: }
3268: }
3272: /******************************************************************************
3273: Function: gather_scatter
3275: Input :
3276: Output:
3277: Return:
3278: Description:
3279: ******************************************************************************/
3280: static void
3281: gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step)
3282: {
3283: if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3285: /* local only operations!!! */
3286: if (gs->num_local)
3287: {gs_gop_vec_local_plus(gs,vals,step);}
3289: /* if intersection tree/pairwise and local isn't empty */
3290: if (gs->num_local_gop)
3291: {
3292: gs_gop_vec_local_in_plus(gs,vals,step);
3294: /* pairwise */
3295: if (gs->num_pairs)
3296: {gs_gop_vec_pairwise_plus(gs,vals,step);}
3298: /* tree */
3299: else if (gs->max_left_over)
3300: {gs_gop_vec_tree_plus(gs,vals,step);}
3302: gs_gop_vec_local_out(gs,vals,step);
3303: }
3304: /* if intersection tree/pairwise and local is empty */
3305: else
3306: {
3307: /* pairwise */
3308: if (gs->num_pairs)
3309: {gs_gop_vec_pairwise_plus(gs,vals,step);}
3311: /* tree */
3312: else if (gs->max_left_over)
3313: {gs_gop_vec_tree_plus(gs,vals,step);}
3314: }
3315: }
3319: /******************************************************************************
3320: Function: gather_scatter
3322: Input :
3323: Output:
3324: Return:
3325: Description:
3326: ******************************************************************************/
3327: static
3328: void
3329: gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals,
3330: int step)
3331: {
3332: int *num, *map, **reduce;
3333: PetscScalar *base;
3336: num = gs->num_local_reduce;
3337: reduce = gs->local_reduce;
3338: while ((map = *reduce))
3339: {
3340: base = vals + map[0] * step;
3342: /* wall */
3343: if (*num == 2)
3344: {
3345: num++; reduce++;
3346: rvec_add (base,vals+map[1]*step,step);
3347: rvec_copy(vals+map[1]*step,base,step);
3348: }
3349: /* corner shared by three elements */
3350: else if (*num == 3)
3351: {
3352: num++; reduce++;
3353: rvec_add (base,vals+map[1]*step,step);
3354: rvec_add (base,vals+map[2]*step,step);
3355: rvec_copy(vals+map[2]*step,base,step);
3356: rvec_copy(vals+map[1]*step,base,step);
3357: }
3358: /* corner shared by four elements */
3359: else if (*num == 4)
3360: {
3361: num++; reduce++;
3362: rvec_add (base,vals+map[1]*step,step);
3363: rvec_add (base,vals+map[2]*step,step);
3364: rvec_add (base,vals+map[3]*step,step);
3365: rvec_copy(vals+map[3]*step,base,step);
3366: rvec_copy(vals+map[2]*step,base,step);
3367: rvec_copy(vals+map[1]*step,base,step);
3368: }
3369: /* general case ... odd geoms ... 3D */
3370: else
3371: {
3372: num++;
3373: while (*++map >= 0)
3374: {rvec_add (base,vals+*map*step,step);}
3375:
3376: map = *reduce;
3377: while (*++map >= 0)
3378: {rvec_copy(vals+*map*step,base,step);}
3379:
3380: reduce++;
3381: }
3382: }
3383: }
3387: /******************************************************************************
3388: Function: gather_scatter
3390: Input :
3391: Output:
3392: Return:
3393: Description:
3394: ******************************************************************************/
3395: static
3396: void
3397: gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals,
3398: int step)
3399: {
3400: int *num, *map, **reduce;
3401: PetscScalar *base;
3403: num = gs->num_gop_local_reduce;
3404: reduce = gs->gop_local_reduce;
3405: while ((map = *reduce++))
3406: {
3407: base = vals + map[0] * step;
3409: /* wall */
3410: if (*num == 2)
3411: {
3412: num ++;
3413: rvec_add(base,vals+map[1]*step,step);
3414: }
3415: /* corner shared by three elements */
3416: else if (*num == 3)
3417: {
3418: num ++;
3419: rvec_add(base,vals+map[1]*step,step);
3420: rvec_add(base,vals+map[2]*step,step);
3421: }
3422: /* corner shared by four elements */
3423: else if (*num == 4)
3424: {
3425: num ++;
3426: rvec_add(base,vals+map[1]*step,step);
3427: rvec_add(base,vals+map[2]*step,step);
3428: rvec_add(base,vals+map[3]*step,step);
3429: }
3430: /* general case ... odd geoms ... 3D*/
3431: else
3432: {
3433: num++;
3434: while (*++map >= 0)
3435: {rvec_add(base,vals+*map*step,step);}
3436: }
3437: }
3438: }
3441: /******************************************************************************
3442: Function: gather_scatter
3444: Input :
3445: Output:
3446: Return:
3447: Description:
3448: ******************************************************************************/
3449: static
3450: void
3451: gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals,
3452: int step)
3453: {
3454: int *num, *map, **reduce;
3455: PetscScalar *base;
3458: num = gs->num_gop_local_reduce;
3459: reduce = gs->gop_local_reduce;
3460: while ((map = *reduce++))
3461: {
3462: base = vals + map[0] * step;
3464: /* wall */
3465: if (*num == 2)
3466: {
3467: num ++;
3468: rvec_copy(vals+map[1]*step,base,step);
3469: }
3470: /* corner shared by three elements */
3471: else if (*num == 3)
3472: {
3473: num ++;
3474: rvec_copy(vals+map[1]*step,base,step);
3475: rvec_copy(vals+map[2]*step,base,step);
3476: }
3477: /* corner shared by four elements */
3478: else if (*num == 4)
3479: {
3480: num ++;
3481: rvec_copy(vals+map[1]*step,base,step);
3482: rvec_copy(vals+map[2]*step,base,step);
3483: rvec_copy(vals+map[3]*step,base,step);
3484: }
3485: /* general case ... odd geoms ... 3D*/
3486: else
3487: {
3488: num++;
3489: while (*++map >= 0)
3490: {rvec_copy(vals+*map*step,base,step);}
3491: }
3492: }
3493: }
3497: /******************************************************************************
3498: Function: gather_scatter
3500: VERSION 3 ::
3502: Input :
3503: Output:
3504: Return:
3505: Description:
3506: ******************************************************************************/
3507: static
3508: void
3509: gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals,
3510: int step)
3511: {
3512: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3513: int *iptr, *msg_list, *msg_size, **msg_nodes;
3514: int *pw, *list, *size, **nodes;
3515: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3516: MPI_Status status;
3517: PetscBLASInt i1;
3520: /* strip and load s */
3521: msg_list =list = gs->pair_list;
3522: msg_size =size = gs->msg_sizes;
3523: msg_nodes=nodes = gs->node_list;
3524: iptr=pw = gs->pw_elm_list;
3525: dptr1=dptr3 = gs->pw_vals;
3526: msg_ids_in = ids_in = gs->msg_ids_in;
3527: msg_ids_out = ids_out = gs->msg_ids_out;
3528: dptr2 = gs->out;
3529: in1=in2 = gs->in;
3531: /* post the receives */
3532: /* msg_nodes=nodes; */
3533: do
3534: {
3535: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3536: second one *list and do list++ afterwards */
3537: MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3538: gs->gs_comm, msg_ids_in++);
3539: in1 += *size++ *step;
3540: }
3541: while (*++msg_nodes);
3542: msg_nodes=nodes;
3544: /* load gs values into in out gs buffers */
3545: while (*iptr >= 0)
3546: {
3547: rvec_copy(dptr3,in_vals + *iptr*step,step);
3548: dptr3+=step;
3549: iptr++;
3550: }
3552: /* load out buffers and post the sends */
3553: while ((iptr = *msg_nodes++))
3554: {
3555: dptr3 = dptr2;
3556: while (*iptr >= 0)
3557: {
3558: rvec_copy(dptr2,dptr1 + *iptr*step,step);
3559: dptr2+=step;
3560: iptr++;
3561: }
3562: MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3563: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3564: }
3566: /* tree */
3567: if (gs->max_left_over)
3568: {gs_gop_vec_tree_plus(gs,in_vals,step);}
3570: /* process the received data */
3571: msg_nodes=nodes;
3572: while ((iptr = *nodes++)){
3573: PetscScalar d1 = 1.0;
3574: /* Should I check the return value of MPI_Wait() or status? */
3575: /* Can this loop be replaced by a call to MPI_Waitall()? */
3576: MPI_Wait(ids_in++, &status);
3577: while (*iptr >= 0) {
3578: BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3579: in2+=step;
3580: iptr++;
3581: }
3582: }
3584: /* replace vals */
3585: while (*pw >= 0)
3586: {
3587: rvec_copy(in_vals + *pw*step,dptr1,step);
3588: dptr1+=step;
3589: pw++;
3590: }
3592: /* clear isend message handles */
3593: /* This changed for clarity though it could be the same */
3594: while (*msg_nodes++)
3595: /* Should I check the return value of MPI_Wait() or status? */
3596: /* Can this loop be replaced by a call to MPI_Waitall()? */
3597: {MPI_Wait(ids_out++, &status);}
3600: }
3604: /******************************************************************************
3605: Function: gather_scatter
3607: Input :
3608: Output:
3609: Return:
3610: Description:
3611: ******************************************************************************/
3612: static
3613: void
3614: gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step)
3615: {
3616: int size, *in, *out;
3617: PetscScalar *buf, *work;
3618: int op[] = {GL_ADD,0};
3619: PetscBLASInt i1 = 1;
3622: /* copy over to local variables */
3623: in = gs->tree_map_in;
3624: out = gs->tree_map_out;
3625: buf = gs->tree_buf;
3626: work = gs->tree_work;
3627: size = gs->tree_nel*step;
3629: /* zero out collection buffer */
3630: rvec_zero(buf,size);
3633: /* copy over my contributions */
3634: while (*in >= 0)
3635: {
3636: BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3637: }
3639: /* perform fan in/out on full buffer */
3640: /* must change grop to handle the blas */
3641: grop(buf,work,size,op);
3643: /* reset */
3644: in = gs->tree_map_in;
3645: out = gs->tree_map_out;
3647: /* get the portion of the results I need */
3648: while (*in >= 0)
3649: {
3650: BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3651: }
3653: }
3657: /******************************************************************************
3658: Function: gather_scatter
3660: Input :
3661: Output:
3662: Return:
3663: Description:
3664: ******************************************************************************/
3665: void
3666: gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim)
3667: {
3669: switch (*op) {
3670: case '+':
3671: gs_gop_plus_hc(gs,vals,dim);
3672: break;
3673: default:
3674: error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3675: error_msg_warning("gs_gop_hc() :: default :: plus\n");
3676: gs_gop_plus_hc(gs,vals,dim);
3677: break;
3678: }
3679: }
3683: /******************************************************************************
3684: Function: gather_scatter
3686: Input :
3687: Output:
3688: Return:
3689: Description:
3690: ******************************************************************************/
3691: static void
3692: gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim)
3693: {
3694: /* if there's nothing to do return */
3695: if (dim<=0)
3696: {return;}
3698: /* can't do more dimensions then exist */
3699: dim = PetscMin(dim,i_log2_num_nodes);
3701: /* local only operations!!! */
3702: if (gs->num_local)
3703: {gs_gop_local_plus(gs,vals);}
3705: /* if intersection tree/pairwise and local isn't empty */
3706: if (gs->num_local_gop)
3707: {
3708: gs_gop_local_in_plus(gs,vals);
3710: /* pairwise will do tree inside ... */
3711: if (gs->num_pairs)
3712: {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3714: /* tree only */
3715: else if (gs->max_left_over)
3716: {gs_gop_tree_plus_hc(gs,vals,dim);}
3717:
3718: gs_gop_local_out(gs,vals);
3719: }
3720: /* if intersection tree/pairwise and local is empty */
3721: else
3722: {
3723: /* pairwise will do tree inside */
3724: if (gs->num_pairs)
3725: {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3726:
3727: /* tree */
3728: else if (gs->max_left_over)
3729: {gs_gop_tree_plus_hc(gs,vals,dim);}
3730: }
3732: }
3735: /******************************************************************************
3736: VERSION 3 ::
3738: Input :
3739: Output:
3740: Return:
3741: Description:
3742: ******************************************************************************/
3743: static
3744: void
3745: gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim)
3746: {
3747: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3748: int *iptr, *msg_list, *msg_size, **msg_nodes;
3749: int *pw, *list, *size, **nodes;
3750: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3751: MPI_Status status;
3752: int i, mask=1;
3754: for (i=1; i<dim; i++)
3755: {mask<<=1; mask++;}
3758: /* strip and load s */
3759: msg_list =list = gs->pair_list;
3760: msg_size =size = gs->msg_sizes;
3761: msg_nodes=nodes = gs->node_list;
3762: iptr=pw = gs->pw_elm_list;
3763: dptr1=dptr3 = gs->pw_vals;
3764: msg_ids_in = ids_in = gs->msg_ids_in;
3765: msg_ids_out = ids_out = gs->msg_ids_out;
3766: dptr2 = gs->out;
3767: in1=in2 = gs->in;
3769: /* post the receives */
3770: /* msg_nodes=nodes; */
3771: do
3772: {
3773: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3774: second one *list and do list++ afterwards */
3775: if ((my_id|mask)==(*list|mask))
3776: {
3777: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3778: gs->gs_comm, msg_ids_in++);
3779: in1 += *size++;
3780: }
3781: else
3782: {list++; size++;}
3783: }
3784: while (*++msg_nodes);
3786: /* load gs values into in out gs buffers */
3787: while (*iptr >= 0)
3788: {*dptr3++ = *(in_vals + *iptr++);}
3790: /* load out buffers and post the sends */
3791: msg_nodes=nodes;
3792: list = msg_list;
3793: while ((iptr = *msg_nodes++))
3794: {
3795: if ((my_id|mask)==(*list|mask))
3796: {
3797: dptr3 = dptr2;
3798: while (*iptr >= 0)
3799: {*dptr2++ = *(dptr1 + *iptr++);}
3800: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3801: /* is msg_ids_out++ correct? */
3802: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3803: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3804: }
3805: else
3806: {list++; msg_size++;}
3807: }
3809: /* do the tree while we're waiting */
3810: if (gs->max_left_over)
3811: {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3813: /* process the received data */
3814: msg_nodes=nodes;
3815: list = msg_list;
3816: while ((iptr = *nodes++))
3817: {
3818: if ((my_id|mask)==(*list|mask))
3819: {
3820: /* Should I check the return value of MPI_Wait() or status? */
3821: /* Can this loop be replaced by a call to MPI_Waitall()? */
3822: MPI_Wait(ids_in++, &status);
3823: while (*iptr >= 0)
3824: {*(dptr1 + *iptr++) += *in2++;}
3825: }
3826: list++;
3827: }
3829: /* replace vals */
3830: while (*pw >= 0)
3831: {*(in_vals + *pw++) = *dptr1++;}
3833: /* clear isend message handles */
3834: /* This changed for clarity though it could be the same */
3835: while (*msg_nodes++)
3836: {
3837: if ((my_id|mask)==(*msg_list|mask))
3838: {
3839: /* Should I check the return value of MPI_Wait() or status? */
3840: /* Can this loop be replaced by a call to MPI_Waitall()? */
3841: MPI_Wait(ids_out++, &status);
3842: }
3843: msg_list++;
3844: }
3847: }
3851: /******************************************************************************
3852: Function: gather_scatter
3854: Input :
3855: Output:
3856: Return:
3857: Description:
3858: ******************************************************************************/
3859: static
3860: void
3861: gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3862: {
3863: int size;
3864: int *in, *out;
3865: PetscScalar *buf, *work;
3866: int op[] = {GL_ADD,0};
3868: in = gs->tree_map_in;
3869: out = gs->tree_map_out;
3870: buf = gs->tree_buf;
3871: work = gs->tree_work;
3872: size = gs->tree_nel;
3874: rvec_zero(buf,size);
3876: while (*in >= 0)
3877: {*(buf + *out++) = *(vals + *in++);}
3879: in = gs->tree_map_in;
3880: out = gs->tree_map_out;
3882: grop_hc(buf,work,size,op,dim);
3884: while (*in >= 0)
3885: {*(vals + *in++) = *(buf + *out++);}
3887: }