553 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			553 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * Copyright (c) 2003, 2007-14 Matteo Frigo
							 | 
						||
| 
								 | 
							
								 * Copyright (c) 1999-2003, 2007-8 Massachusetts Institute of Technology
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is free software; you can redistribute it and/or modify
							 | 
						||
| 
								 | 
							
								 * it under the terms of the GNU General Public License as published by
							 | 
						||
| 
								 | 
							
								 * the Free Software Foundation; either version 2 of the License, or
							 | 
						||
| 
								 | 
							
								 * (at your option) any later version.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is distributed in the hope that it will be useful,
							 | 
						||
| 
								 | 
							
								 * but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						||
| 
								 | 
							
								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
							 | 
						||
| 
								 | 
							
								 * GNU General Public License for more details.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * You should have received a copy of the GNU General Public License
							 | 
						||
| 
								 | 
							
								 * along with this program; if not, write to the Free Software
							 | 
						||
| 
								 | 
							
								 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**********************************************************************/
							 | 
						||
| 
								 | 
							
								/* This is a modified and combined version of the sched.c and
							 | 
						||
| 
								 | 
							
								   test_sched.c files shipped with FFTW 2, written to implement and
							 | 
						||
| 
								 | 
							
								   test various all-to-all communications scheduling patterns.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   It is not used in FFTW 3, but I keep it around in case we ever want
							 | 
						||
| 
								 | 
							
								   to play with this again or to change algorithms.  In particular, I
							 | 
						||
| 
								 | 
							
								   used it to implement and test the fill1_comm_sched routine in
							 | 
						||
| 
								 | 
							
								   transpose-pairwise.c, which allows us to create a schedule for one
							 | 
						||
| 
								 | 
							
								   process at a time and is much more compact than the FFTW 2 code.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   Note that the scheduling algorithm is somewhat modified from that
							 | 
						||
| 
								 | 
							
								   of FFTW 2.  Originally, I thought that one "stall" in the schedule
							 | 
						||
| 
								 | 
							
								   was unavoidable for odd numbers of processes, since this is the
							 | 
						||
| 
								 | 
							
								   case for the soccer-timetabling problem.  However, because of the
							 | 
						||
| 
								 | 
							
								   self-communication step, we can use the self-communication to fill
							 | 
						||
| 
								 | 
							
								   in the stalls.  (Thanks to Ralf Wildenhues for pointing this out.)
							 | 
						||
| 
								 | 
							
								   This greatly simplifies the process re-sorting algorithm. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**********************************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <stdio.h>
							 | 
						||
| 
								 | 
							
								#include <stdlib.h>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* This file contains routines to compute communications schedules for
							 | 
						||
| 
								 | 
							
								   all-to-all communications (complete exchanges) that are performed
							 | 
						||
| 
								 | 
							
								   in-place.  (That is, the block that processor x sends to processor
							 | 
						||
| 
								 | 
							
								   y gets replaced on processor x by a block received from processor y.)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   A schedule, int **sched, is a two-dimensional array where
							 | 
						||
| 
								 | 
							
								   sched[pe][i] is the processor that pe expects to exchange a message
							 | 
						||
| 
								 | 
							
								   with on the i-th step of the exchange.  sched[pe][i] == -1 for the
							 | 
						||
| 
								 | 
							
								   i after the last exchange scheduled on pe.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   Here, processors (pe's, for processing elements), are numbered from
							 | 
						||
| 
								 | 
							
								   0 to npes-1.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   There are a couple of constraints that a schedule should satisfy
							 | 
						||
| 
								 | 
							
								   (besides the obvious one that every processor has to communicate
							 | 
						||
| 
								 | 
							
								   with every other processor exactly once).
							 | 
						||
| 
								 | 
							
								   
							 | 
						||
| 
								 | 
							
								   * First, and most importantly, there must be no deadlocks.
							 | 
						||
| 
								 | 
							
								   
							 | 
						||
| 
								 | 
							
								   * Second, we would like to overlap communications as much as possible,
							 | 
						||
| 
								 | 
							
								   so that all exchanges occur in parallel.  It turns out that perfect
							 | 
						||
| 
								 | 
							
								   overlap is possible for all number of processes (npes).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   It turns out that this scheduling problem is actually well-studied,
							 | 
						||
| 
								 | 
							
								   and good solutions are known.  The problem is known as a
							 | 
						||
| 
								 | 
							
								   "time-tabling" problem, and is specifically the problem of
							 | 
						||
| 
								 | 
							
								   scheduling a sports competition (where n teams must compete exactly
							 | 
						||
| 
								 | 
							
								   once with every other team).  The problem is discussed and
							 | 
						||
| 
								 | 
							
								   algorithms are presented in:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   [1] J. A. M. Schreuder, "Constructing Timetables for Sport
							 | 
						||
| 
								 | 
							
								   Competitions," Mathematical Programming Study 13, pp. 58-67 (1980).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   [2] A. Schaerf, "Scheduling Sport Tournaments using Constraint
							 | 
						||
| 
								 | 
							
								   Logic Programming," Proc. of 12th Europ. Conf. on
							 | 
						||
| 
								 | 
							
								   Artif. Intell. (ECAI-96), pp. 634-639 (Budapest 1996).
							 | 
						||
| 
								 | 
							
								   http://hermes.dis.uniromal.it/~aschaerf/publications.html
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   (These people actually impose a lot of additional constraints that
							 | 
						||
| 
								 | 
							
								   we don't care about, so they are solving harder problems. [1] gives
							 | 
						||
| 
								 | 
							
								   a simple enough algorithm for our purposes, though.)
							 | 
						||
| 
								 | 
							
								   
							 | 
						||
| 
								 | 
							
								   In the timetabling problem, N teams can all play one another in N-1
							 | 
						||
| 
								 | 
							
								   steps if N is even, and N steps if N is odd.  Here, however,
							 | 
						||
| 
								 | 
							
								   there is a "self-communication" step (a team must also "play itself")
							 | 
						||
| 
								 | 
							
								   and so we can always make an optimal N-step schedule regardless of N.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   However, we have to do more: for a particular processor, the
							 | 
						||
| 
								 | 
							
								   communications schedule must be sorted in ascending or descending
							 | 
						||
| 
								 | 
							
								   order of processor index.  (This is necessary so that the data
							 | 
						||
| 
								 | 
							
								   coming in for the transpose does not overwrite data that will be
							 | 
						||
| 
								 | 
							
								   sent later; for that processor the incoming and outgoing blocks are
							 | 
						||
| 
								 | 
							
								   of different non-zero sizes.)  Fortunately, because the schedule
							 | 
						||
| 
								 | 
							
								   is stall free, each parallel step of the schedule is independent
							 | 
						||
| 
								 | 
							
								   of every other step, and we can reorder the steps arbitrarily
							 | 
						||
| 
								 | 
							
								   to achieve any desired order on a particular process.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								void free_comm_schedule(int **sched, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     if (sched) {
							 | 
						||
| 
								 | 
							
									  int i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									  for (i = 0; i < npes; ++i)
							 | 
						||
| 
								 | 
							
									       free(sched[i]);
							 | 
						||
| 
								 | 
							
									  free(sched);
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								void empty_comm_schedule(int **sched, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int i;
							 | 
						||
| 
								 | 
							
								     for (i = 0; i < npes; ++i)
							 | 
						||
| 
								 | 
							
									  sched[i][0] = -1;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								extern void fill_comm_schedule(int **sched, int npes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Create a new communications schedule for a given number of processors.
							 | 
						||
| 
								 | 
							
								   The schedule is initialized to a deadlock-free, maximum overlap
							 | 
						||
| 
								 | 
							
								   schedule.  Returns NULL on an error (may print a message to
							 | 
						||
| 
								 | 
							
								   stderr if there is a program bug detected).  */
							 | 
						||
| 
								 | 
							
								int **make_comm_schedule(int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int **sched;
							 | 
						||
| 
								 | 
							
								     int i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     sched = (int **) malloc(sizeof(int *) * npes);
							 | 
						||
| 
								 | 
							
								     if (!sched)
							 | 
						||
| 
								 | 
							
									  return NULL;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     for (i = 0; i < npes; ++i)
							 | 
						||
| 
								 | 
							
									  sched[i] = NULL;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     for (i = 0; i < npes; ++i) {
							 | 
						||
| 
								 | 
							
									  sched[i] = (int *) malloc(sizeof(int) * 10 * (npes + 1));
							 | 
						||
| 
								 | 
							
									  if (!sched[i]) {
							 | 
						||
| 
								 | 
							
									       free_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
									       return NULL;
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								     
							 | 
						||
| 
								 | 
							
								     empty_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
								     fill_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     if (!check_comm_schedule(sched,npes)) {
							 | 
						||
| 
								 | 
							
									  free_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
									  return NULL;
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     return sched;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static void add_dest_to_comm_schedule(int **sched, int pe, int dest)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int i;
							 | 
						||
| 
								 | 
							
								     
							 | 
						||
| 
								 | 
							
								     for (i = 0; sched[pe][i] != -1; ++i)
							 | 
						||
| 
								 | 
							
									  ;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     sched[pe][i] = dest;
							 | 
						||
| 
								 | 
							
								     sched[pe][i+1] = -1;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static void add_pair_to_comm_schedule(int **sched, int pe1, int pe2)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     add_dest_to_comm_schedule(sched, pe1, pe2);
							 | 
						||
| 
								 | 
							
								     if (pe1 != pe2)
							 | 
						||
| 
								 | 
							
									  add_dest_to_comm_schedule(sched, pe2, pe1);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Simplification of algorithm presented in [1] (we have fewer
							 | 
						||
| 
								 | 
							
								   constraints).  Produces a perfect schedule (npes steps).  */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								void fill_comm_schedule(int **sched, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int pe, i, n;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     if (npes % 2 == 0) {
							 | 
						||
| 
								 | 
							
									  n = npes;
							 | 
						||
| 
								 | 
							
									  for (pe = 0; pe < npes; ++pe)
							 | 
						||
| 
								 | 
							
									       add_pair_to_comm_schedule(sched,pe,pe);
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								     else
							 | 
						||
| 
								 | 
							
									  n = npes + 1;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     for (pe = 0; pe < n - 1; ++pe) {
							 | 
						||
| 
								 | 
							
									  add_pair_to_comm_schedule(sched, pe, npes % 2 == 0 ? npes - 1 : pe);
							 | 
						||
| 
								 | 
							
									  
							 | 
						||
| 
								 | 
							
									  for (i = 1; i < n/2; ++i) {
							 | 
						||
| 
								 | 
							
									       int pe_a, pe_b;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       pe_a = pe - i;
							 | 
						||
| 
								 | 
							
									       if (pe_a < 0)
							 | 
						||
| 
								 | 
							
										    pe_a += n - 1;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       pe_b = (pe + i) % (n - 1);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       add_pair_to_comm_schedule(sched,pe_a,pe_b);
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* given an array sched[npes], fills it with the communications
							 | 
						||
| 
								 | 
							
								   schedule for process pe. */
							 | 
						||
| 
								 | 
							
								void fill1_comm_sched(int *sched, int which_pe, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int pe, i, n, s = 0;
							 | 
						||
| 
								 | 
							
								     if (npes % 2 == 0) {
							 | 
						||
| 
								 | 
							
									  n = npes;
							 | 
						||
| 
								 | 
							
									  sched[s++] = which_pe;
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								     else
							 | 
						||
| 
								 | 
							
									  n = npes + 1;
							 | 
						||
| 
								 | 
							
								     for (pe = 0; pe < n - 1; ++pe) {
							 | 
						||
| 
								 | 
							
									  if (npes % 2 == 0) {
							 | 
						||
| 
								 | 
							
									       if (pe == which_pe) sched[s++] = npes - 1;
							 | 
						||
| 
								 | 
							
									       else if (npes - 1 == which_pe) sched[s++] = pe;
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
									  else if (pe == which_pe) sched[s++] = pe;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									  if (pe != which_pe && which_pe < n - 1) {
							 | 
						||
| 
								 | 
							
									       i = (pe - which_pe + (n - 1)) % (n - 1);
							 | 
						||
| 
								 | 
							
									       if (i < n/2)
							 | 
						||
| 
								 | 
							
										    sched[s++] = (pe + i) % (n - 1);
							 | 
						||
| 
								 | 
							
									       
							 | 
						||
| 
								 | 
							
									       i = (which_pe - pe + (n - 1)) % (n - 1);
							 | 
						||
| 
								 | 
							
									       if (i < n/2)
							 | 
						||
| 
								 | 
							
										    sched[s++] = (pe - i + (n - 1)) % (n - 1);
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								     if (s != npes) {
							 | 
						||
| 
								 | 
							
									  fprintf(stderr, "bug in fill1_com_schedule (%d, %d/%d)\n", 
							 | 
						||
| 
								 | 
							
										  s, which_pe, npes);
							 | 
						||
| 
								 | 
							
									  exit(EXIT_FAILURE);
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* sort the communication schedule sched for npes so that the schedule
							 | 
						||
| 
								 | 
							
								   on process sortpe is ascending or descending (!ascending). */
							 | 
						||
| 
								 | 
							
								static void sort1_comm_sched(int *sched, int npes, int sortpe, int ascending)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int *sortsched, i;
							 | 
						||
| 
								 | 
							
								     sortsched = (int *) malloc(npes * sizeof(int) * 2);
							 | 
						||
| 
								 | 
							
								     fill1_comm_sched(sortsched, sortpe, npes);
							 | 
						||
| 
								 | 
							
								     if (ascending)
							 | 
						||
| 
								 | 
							
								          for (i = 0; i < npes; ++i)
							 | 
						||
| 
								 | 
							
								               sortsched[npes + sortsched[i]] = sched[i];
							 | 
						||
| 
								 | 
							
								     else
							 | 
						||
| 
								 | 
							
								          for (i = 0; i < npes; ++i)
							 | 
						||
| 
								 | 
							
								               sortsched[2*npes - 1 - sortsched[i]] = sched[i];
							 | 
						||
| 
								 | 
							
								     for (i = 0; i < npes; ++i)
							 | 
						||
| 
								 | 
							
								          sched[i] = sortsched[npes + i];
							 | 
						||
| 
								 | 
							
								     free(sortsched);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Below, we have various checks in case of bugs: */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* check for deadlocks by simulating the schedule and looking for
							 | 
						||
| 
								 | 
							
								   cycles in the dependency list; returns 0 if there are deadlocks
							 | 
						||
| 
								 | 
							
								   (or other errors) */
							 | 
						||
| 
								 | 
							
								static int check_schedule_deadlock(int **sched, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int *step, *depend, *visited, pe, pe2, period, done = 0;
							 | 
						||
| 
								 | 
							
								     int counter = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     /* step[pe] is the step in the schedule that a given pe is on */
							 | 
						||
| 
								 | 
							
								     step = (int *) malloc(sizeof(int) * npes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     /* depend[pe] is the pe' that pe is currently waiting for a message
							 | 
						||
| 
								 | 
							
									from (-1 if none) */
							 | 
						||
| 
								 | 
							
								     depend = (int *) malloc(sizeof(int) * npes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     /* visited[pe] tells whether we have visited the current pe already
							 | 
						||
| 
								 | 
							
									when we are looking for cycles. */
							 | 
						||
| 
								 | 
							
								     visited = (int *) malloc(sizeof(int) * npes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     if (!step || !depend || !visited) {
							 | 
						||
| 
								 | 
							
									  free(step); free(depend); free(visited);
							 | 
						||
| 
								 | 
							
									  return 0;
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     for (pe = 0; pe < npes; ++pe)
							 | 
						||
| 
								 | 
							
									  step[pe] = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     while (!done) {
							 | 
						||
| 
								 | 
							
									  ++counter;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									  for (pe = 0; pe < npes; ++pe)
							 | 
						||
| 
								 | 
							
									       depend[pe] = sched[pe][step[pe]];
							 | 
						||
| 
								 | 
							
									  
							 | 
						||
| 
								 | 
							
									  /* now look for cycles in the dependencies with period > 2: */
							 | 
						||
| 
								 | 
							
									  for (pe = 0; pe < npes; ++pe)
							 | 
						||
| 
								 | 
							
									       if (depend[pe] != -1) {
							 | 
						||
| 
								 | 
							
										    for (pe2 = 0; pe2 < npes; ++pe2)
							 | 
						||
| 
								 | 
							
											 visited[pe2] = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										    period = 0;
							 | 
						||
| 
								 | 
							
										    pe2 = pe;
							 | 
						||
| 
								 | 
							
										    do {
							 | 
						||
| 
								 | 
							
											 visited[pe2] = period + 1;
							 | 
						||
| 
								 | 
							
											 pe2 = depend[pe2];
							 | 
						||
| 
								 | 
							
											 period++;
							 | 
						||
| 
								 | 
							
										    } while (pe2 != -1 && !visited[pe2]);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										    if (pe2 == -1) {
							 | 
						||
| 
								 | 
							
											 fprintf(stderr,
							 | 
						||
| 
								 | 
							
												 "BUG: unterminated cycle in schedule!\n");
							 | 
						||
| 
								 | 
							
											 free(step); free(depend);
							 | 
						||
| 
								 | 
							
											 free(visited);
							 | 
						||
| 
								 | 
							
											 return 0;
							 | 
						||
| 
								 | 
							
										    }
							 | 
						||
| 
								 | 
							
										    if (period - (visited[pe2] - 1) > 2) {
							 | 
						||
| 
								 | 
							
											 fprintf(stderr,"BUG: deadlock in schedule!\n");
							 | 
						||
| 
								 | 
							
											 free(step); free(depend);
							 | 
						||
| 
								 | 
							
											 free(visited);
							 | 
						||
| 
								 | 
							
											 return 0;
							 | 
						||
| 
								 | 
							
										    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										    if (pe2 == pe)
							 | 
						||
| 
								 | 
							
											 step[pe]++;
							 | 
						||
| 
								 | 
							
									       }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									  done = 1;
							 | 
						||
| 
								 | 
							
									  for (pe = 0; pe < npes; ++pe)
							 | 
						||
| 
								 | 
							
									       if (sched[pe][step[pe]] != -1) {
							 | 
						||
| 
								 | 
							
										    done = 0;
							 | 
						||
| 
								 | 
							
										    break;
							 | 
						||
| 
								 | 
							
									       }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     free(step); free(depend); free(visited);
							 | 
						||
| 
								 | 
							
								     return (counter > 0 ? counter : 1);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* sanity checks; prints message and returns 0 on failure.
							 | 
						||
| 
								 | 
							
								   undocumented feature: the return value on success is actually the
							 | 
						||
| 
								 | 
							
								   number of steps required for the schedule to complete, counting
							 | 
						||
| 
								 | 
							
								   stalls. */
							 | 
						||
| 
								 | 
							
								int check_comm_schedule(int **sched, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int pe, i, comm_pe;
							 | 
						||
| 
								 | 
							
								     
							 | 
						||
| 
								 | 
							
								     for (pe = 0; pe < npes; ++pe) {
							 | 
						||
| 
								 | 
							
									  for (comm_pe = 0; comm_pe < npes; ++comm_pe) {
							 | 
						||
| 
								 | 
							
									       for (i = 0; sched[pe][i] != -1 && sched[pe][i] != comm_pe; ++i)
							 | 
						||
| 
								 | 
							
										    ;
							 | 
						||
| 
								 | 
							
									       if (sched[pe][i] == -1) {
							 | 
						||
| 
								 | 
							
										    fprintf(stderr,"BUG: schedule never sends message from "
							 | 
						||
| 
								 | 
							
											    "%d to %d.\n",pe,comm_pe);
							 | 
						||
| 
								 | 
							
										    return 0;  /* never send message to comm_pe */
							 | 
						||
| 
								 | 
							
									       }
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
									  for (i = 0; sched[pe][i] != -1; ++i)
							 | 
						||
| 
								 | 
							
									       ;
							 | 
						||
| 
								 | 
							
									  if (i != npes) {
							 | 
						||
| 
								 | 
							
									       fprintf(stderr,"BUG: schedule sends too many messages from "
							 | 
						||
| 
								 | 
							
										       "%d\n",pe);
							 | 
						||
| 
								 | 
							
									       return 0;
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								     return check_schedule_deadlock(sched,npes);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* invert the order of all the schedules; this has no effect on
							 | 
						||
| 
								 | 
							
								   its required properties. */
							 | 
						||
| 
								 | 
							
								void invert_comm_schedule(int **sched, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int pe, i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     for (pe = 0; pe < npes; ++pe)
							 | 
						||
| 
								 | 
							
									  for (i = 0; i < npes/2; ++i) {
							 | 
						||
| 
								 | 
							
									       int dummy = sched[pe][i];
							 | 
						||
| 
								 | 
							
									       sched[pe][i] = sched[pe][npes-1-i];
							 | 
						||
| 
								 | 
							
									       sched[pe][npes-1-i] = dummy;
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Sort the schedule for sort_pe in ascending order of processor
							 | 
						||
| 
								 | 
							
								   index.  Unfortunately, for odd npes (when schedule has a stall
							 | 
						||
| 
								 | 
							
								   to begin with) this will introduce an extra stall due to
							 | 
						||
| 
								 | 
							
								   the motion of the self-communication past a stall.  We could
							 | 
						||
| 
								 | 
							
								   fix this if it were really important.  Actually, we don't
							 | 
						||
| 
								 | 
							
								   get an extra stall when sort_pe == 0 or npes-1, which is sufficient
							 | 
						||
| 
								 | 
							
								   for our purposes. */
							 | 
						||
| 
								 | 
							
								void sort_comm_schedule(int **sched, int npes, int sort_pe)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int i,j,pe;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     /* Note that we can do this sort in O(npes) swaps because we know
							 | 
						||
| 
								 | 
							
									that the numbers we are sorting are just 0...npes-1.   But we'll
							 | 
						||
| 
								 | 
							
									just do a bubble sort for simplicity here. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     for (i = 0; i < npes - 1; ++i)
							 | 
						||
| 
								 | 
							
									  for (j = i + 1; j < npes; ++j)
							 | 
						||
| 
								 | 
							
									       if (sched[sort_pe][i] > sched[sort_pe][j]) {
							 | 
						||
| 
								 | 
							
										    for (pe = 0; pe < npes; ++pe) {
							 | 
						||
| 
								 | 
							
											 int s = sched[pe][i];
							 | 
						||
| 
								 | 
							
											 sched[pe][i] = sched[pe][j];
							 | 
						||
| 
								 | 
							
											 sched[pe][j] = s;
							 | 
						||
| 
								 | 
							
										    }
							 | 
						||
| 
								 | 
							
									       }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* print the schedule (for debugging purposes) */
							 | 
						||
| 
								 | 
							
								void print_comm_schedule(int **sched, int npes)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int pe, i, width;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     if (npes < 10)
							 | 
						||
| 
								 | 
							
									  width = 1;
							 | 
						||
| 
								 | 
							
								     else if (npes < 100)
							 | 
						||
| 
								 | 
							
									  width = 2;
							 | 
						||
| 
								 | 
							
								     else
							 | 
						||
| 
								 | 
							
									  width = 3;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     for (pe = 0; pe < npes; ++pe) {
							 | 
						||
| 
								 | 
							
									  printf("pe %*d schedule:", width, pe);
							 | 
						||
| 
								 | 
							
									  for (i = 0; sched[pe][i] != -1; ++i)
							 | 
						||
| 
								 | 
							
									       printf("  %*d",width,sched[pe][i]);
							 | 
						||
| 
								 | 
							
									  printf("\n");
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								int main(int argc, char **argv)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								     int **sched;
							 | 
						||
| 
								 | 
							
								     int npes = -1, sortpe = -1, steps, i;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     if (argc >= 2) {
							 | 
						||
| 
								 | 
							
									  npes = atoi(argv[1]);
							 | 
						||
| 
								 | 
							
									  if (npes <= 0) {
							 | 
						||
| 
								 | 
							
									       fprintf(stderr,"npes must be positive!");
							 | 
						||
| 
								 | 
							
									       return 1;
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								     if (argc >= 3) {
							 | 
						||
| 
								 | 
							
									  sortpe = atoi(argv[2]);
							 | 
						||
| 
								 | 
							
									  if (sortpe < 0 || sortpe >= npes) {
							 | 
						||
| 
								 | 
							
									       fprintf(stderr,"sortpe must be between 0 and npes-1.\n");
							 | 
						||
| 
								 | 
							
									       return 1;
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     if (npes != -1) {
							 | 
						||
| 
								 | 
							
									  printf("Computing schedule for npes = %d:\n",npes);
							 | 
						||
| 
								 | 
							
									  sched = make_comm_schedule(npes);
							 | 
						||
| 
								 | 
							
									  if (!sched) {
							 | 
						||
| 
								 | 
							
									       fprintf(stderr,"Out of memory!");
							 | 
						||
| 
								 | 
							
									       return 6;
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
									  
							 | 
						||
| 
								 | 
							
									  if (steps = check_comm_schedule(sched,npes))
							 | 
						||
| 
								 | 
							
									       printf("schedule OK (takes %d steps to complete).\n", steps);
							 | 
						||
| 
								 | 
							
									  else
							 | 
						||
| 
								 | 
							
									       printf("schedule not OK.\n");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									  print_comm_schedule(sched, npes);
							 | 
						||
| 
								 | 
							
									  
							 | 
						||
| 
								 | 
							
									  if (sortpe != -1) {
							 | 
						||
| 
								 | 
							
									       printf("\nRe-creating schedule for pe = %d...\n", sortpe);
							 | 
						||
| 
								 | 
							
									       int *sched1 = (int*) malloc(sizeof(int) * npes);
							 | 
						||
| 
								 | 
							
									       for (i = 0; i < npes; ++i) sched1[i] = -1;
							 | 
						||
| 
								 | 
							
									       fill1_comm_sched(sched1, sortpe, npes);
							 | 
						||
| 
								 | 
							
									       printf("  =");
							 | 
						||
| 
								 | 
							
									       for (i = 0; i < npes; ++i) 
							 | 
						||
| 
								 | 
							
										    printf("  %*d", npes < 10 ? 1 : (npes < 100 ? 2 : 3),
							 | 
						||
| 
								 | 
							
											   sched1[i]);
							 | 
						||
| 
								 | 
							
									       printf("\n");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       printf("\nSorting schedule for sortpe = %d...\n", sortpe);
							 | 
						||
| 
								 | 
							
									       sort_comm_schedule(sched,npes,sortpe);
							 | 
						||
| 
								 | 
							
									       
							 | 
						||
| 
								 | 
							
									       if (steps = check_comm_schedule(sched,npes))
							 | 
						||
| 
								 | 
							
										    printf("schedule OK (takes %d steps to complete).\n", 
							 | 
						||
| 
								 | 
							
											   steps);
							 | 
						||
| 
								 | 
							
									       else
							 | 
						||
| 
								 | 
							
										    printf("schedule not OK.\n");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       print_comm_schedule(sched, npes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       printf("\nInverting schedule...\n");
							 | 
						||
| 
								 | 
							
									       invert_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
									       
							 | 
						||
| 
								 | 
							
									       if (steps = check_comm_schedule(sched,npes))
							 | 
						||
| 
								 | 
							
										    printf("schedule OK (takes %d steps to complete).\n", 
							 | 
						||
| 
								 | 
							
											   steps);
							 | 
						||
| 
								 | 
							
									       else
							 | 
						||
| 
								 | 
							
										    printf("schedule not OK.\n");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       print_comm_schedule(sched, npes);
							 | 
						||
| 
								 | 
							
									       
							 | 
						||
| 
								 | 
							
									       free_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									       free(sched1);
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								     else {
							 | 
						||
| 
								 | 
							
									  printf("Doing infinite tests...\n");
							 | 
						||
| 
								 | 
							
									  for (npes = 1; ; ++npes) {
							 | 
						||
| 
								 | 
							
									       int *sched1 = (int*) malloc(sizeof(int) * npes);
							 | 
						||
| 
								 | 
							
									       printf("npes = %d...",npes);
							 | 
						||
| 
								 | 
							
									       sched = make_comm_schedule(npes);
							 | 
						||
| 
								 | 
							
									       if (!sched) {
							 | 
						||
| 
								 | 
							
										    fprintf(stderr,"Out of memory!\n");
							 | 
						||
| 
								 | 
							
										    return 5;
							 | 
						||
| 
								 | 
							
									       }
							 | 
						||
| 
								 | 
							
									       for (sortpe = 0; sortpe < npes; ++sortpe) {
							 | 
						||
| 
								 | 
							
										    empty_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
										    fill_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
										    if (!check_comm_schedule(sched,npes)) {
							 | 
						||
| 
								 | 
							
											 fprintf(stderr,
							 | 
						||
| 
								 | 
							
												 "\n -- fill error for sortpe = %d!\n",sortpe);
							 | 
						||
| 
								 | 
							
											 return 2;
							 | 
						||
| 
								 | 
							
										    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										    for (i = 0; i < npes; ++i) sched1[i] = -1;
							 | 
						||
| 
								 | 
							
										    fill1_comm_sched(sched1, sortpe, npes);
							 | 
						||
| 
								 | 
							
										    for (i = 0; i < npes; ++i)
							 | 
						||
| 
								 | 
							
											 if (sched1[i] != sched[sortpe][i])
							 | 
						||
| 
								 | 
							
											      fprintf(stderr,
							 | 
						||
| 
								 | 
							
												      "\n -- fill1 error for pe = %d!\n",
							 | 
						||
| 
								 | 
							
												      sortpe);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										    sort_comm_schedule(sched,npes,sortpe);
							 | 
						||
| 
								 | 
							
										    if (!check_comm_schedule(sched,npes)) {
							 | 
						||
| 
								 | 
							
											 fprintf(stderr,
							 | 
						||
| 
								 | 
							
												 "\n -- sort error for sortpe = %d!\n",sortpe);
							 | 
						||
| 
								 | 
							
											 return 3;
							 | 
						||
| 
								 | 
							
										    }
							 | 
						||
| 
								 | 
							
										    invert_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
										    if (!check_comm_schedule(sched,npes)) {
							 | 
						||
| 
								 | 
							
											 fprintf(stderr,
							 | 
						||
| 
								 | 
							
												 "\n -- invert error for sortpe = %d!\n",
							 | 
						||
| 
								 | 
							
												 sortpe);
							 | 
						||
| 
								 | 
							
											 return 4;
							 | 
						||
| 
								 | 
							
										    }
							 | 
						||
| 
								 | 
							
									       }
							 | 
						||
| 
								 | 
							
									       free_comm_schedule(sched,npes);
							 | 
						||
| 
								 | 
							
									       printf("OK\n");
							 | 
						||
| 
								 | 
							
									       if (npes % 50 == 0)
							 | 
						||
| 
								 | 
							
										    printf("(...Hit Ctrl-C to stop...)\n");
							 | 
						||
| 
								 | 
							
									       free(sched1);
							 | 
						||
| 
								 | 
							
									  }
							 | 
						||
| 
								 | 
							
								     }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								     return 0;
							 | 
						||
| 
								 | 
							
								}
							 |