304 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			304 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|   | /* See bench.c.  We keep a few common subroutines in this file so
 | ||
|  |    that they can be re-used in the MPI test program. */ | ||
|  | 
 | ||
|  | #include <math.h>
 | ||
|  | #include <stdio.h>
 | ||
|  | #include <string.h>
 | ||
|  | #include "tests/fftw-bench.h"
 | ||
|  | 
 | ||
|  | /* define to enable code that traps floating-point exceptions.
 | ||
|  |    Disabled by default because I don't want to worry about the | ||
|  |    portability of such code.  feenableexcept() seems to be a GNU | ||
|  |    thing */ | ||
|  | #undef TRAP_FP_EXCEPTIONS
 | ||
|  | 
 | ||
|  | #ifdef TRAP_FP_EXCEPTIONS
 | ||
|  | #  include <signal.h>
 | ||
|  | #  include <fenv.h>
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #ifdef _OPENMP
 | ||
|  | #  include <omp.h>
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #ifdef HAVE_SMP
 | ||
|  | int threads_ok = 1; | ||
|  | #endif
 | ||
|  | 
 | ||
|  | FFTW(plan) the_plan = 0; | ||
|  | 
 | ||
|  | static const char *wisdat = "wis.dat"; | ||
|  | unsigned the_flags = 0; | ||
|  | int paranoid = 0; | ||
|  | int usewisdom = 0; | ||
|  | int havewisdom = 0; | ||
|  | int nthreads = 1; | ||
|  | int amnesia = 0; | ||
|  | 
 | ||
|  | extern void install_hook(void);  /* in hook.c */ | ||
|  | extern void uninstall_hook(void);  /* in hook.c */ | ||
|  | 
 | ||
|  | #ifdef FFTW_RANDOM_ESTIMATOR
 | ||
|  | extern unsigned FFTW(random_estimate_seed); | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #ifdef TRAP_FP_EXCEPTIONS
 | ||
|  | static void sigfpe_handler(int sig, siginfo_t *info, void *context) | ||
|  | { | ||
|  |      /* fftw code is not supposed to generate FP exceptions */ | ||
|  |      UNUSED(sig); UNUSED(info); UNUSED(context); | ||
|  |      fprintf(stderr, "caught FPE, aborting\n"); | ||
|  |      abort(); | ||
|  | } | ||
|  | 
 | ||
|  | static void setup_sigfpe_handler(void) | ||
|  | { | ||
|  |   struct sigaction a; | ||
|  |   feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW); | ||
|  |   memset(&a, 0, sizeof(a)); | ||
|  |   a.sa_sigaction = sigfpe_handler; | ||
|  |   a.sa_flags = SA_SIGINFO; | ||
|  |   if (sigaction(SIGFPE, &a, NULL) == -1) { | ||
|  |        fprintf(stderr, "cannot install sigfpe handler\n"); | ||
|  |        exit(1); | ||
|  |   } | ||
|  | } | ||
|  | #else
 | ||
|  | static void setup_sigfpe_handler(void) | ||
|  | { | ||
|  | } | ||
|  | #endif
 | ||
|  | 
 | ||
|  | /* dummy serial threads backend for testing threads_set_callback */ | ||
|  | static void serial_threads(void *(*work)(char *), char *jobdata, size_t elsize, int njobs, void *data) | ||
|  | { | ||
|  |      int i; | ||
|  |      (void) data; /* unused */ | ||
|  |      for (i = 0; i < njobs; ++i) | ||
|  |           work(jobdata + elsize * i); | ||
|  | } | ||
|  | 
 | ||
|  | void useropt(const char *arg) | ||
|  | { | ||
|  |      int x; | ||
|  |      double y; | ||
|  | 
 | ||
|  |      if (!strcmp(arg, "patient")) the_flags |= FFTW_PATIENT; | ||
|  |      else if (!strcmp(arg, "estimate")) the_flags |= FFTW_ESTIMATE; | ||
|  |      else if (!strcmp(arg, "estimatepat")) the_flags |= FFTW_ESTIMATE_PATIENT; | ||
|  |      else if (!strcmp(arg, "exhaustive")) the_flags |= FFTW_EXHAUSTIVE; | ||
|  |      else if (!strcmp(arg, "unaligned")) the_flags |= FFTW_UNALIGNED; | ||
|  |      else if (!strcmp(arg, "nosimd")) the_flags |= FFTW_NO_SIMD; | ||
|  |      else if (!strcmp(arg, "noindirectop")) the_flags |= FFTW_NO_INDIRECT_OP; | ||
|  |      else if (!strcmp(arg, "wisdom-only")) the_flags |= FFTW_WISDOM_ONLY; | ||
|  |      else if (sscanf(arg, "flag=%d", &x) == 1) the_flags |= x; | ||
|  |      else if (sscanf(arg, "bflag=%d", &x) == 1) the_flags |= 1U << x; | ||
|  |      else if (!strcmp(arg, "paranoid")) paranoid = 1; | ||
|  |      else if (!strcmp(arg, "wisdom")) usewisdom = 1; | ||
|  |      else if (!strcmp(arg, "amnesia")) amnesia = 1; | ||
|  |      else if (!strcmp(arg, "threads_callback")) | ||
|  | #ifdef HAVE_SMP
 | ||
|  |           FFTW(threads_set_callback)(serial_threads, NULL); | ||
|  | #else
 | ||
|  |           fprintf(stderr, "Serial FFTW; ignoring threads_callback option.\n"); | ||
|  | #endif
 | ||
|  |      else if (sscanf(arg, "nthreads=%d", &x) == 1) nthreads = x; | ||
|  | #ifdef FFTW_RANDOM_ESTIMATOR
 | ||
|  |      else if (sscanf(arg, "eseed=%d", &x) == 1) FFTW(random_estimate_seed) = x; | ||
|  | #endif
 | ||
|  |      else if (sscanf(arg, "timelimit=%lg", &y) == 1) { | ||
|  | 	  FFTW(set_timelimit)(y); | ||
|  |      } | ||
|  | 
 | ||
|  |      else fprintf(stderr, "unknown user option: %s.  Ignoring.\n", arg); | ||
|  | } | ||
|  | 
 | ||
|  | void rdwisdom(void) | ||
|  | { | ||
|  |      FILE *f; | ||
|  |      double tim; | ||
|  |      int success = 0; | ||
|  | 
 | ||
|  |      if (havewisdom) return; | ||
|  | 
 | ||
|  | #ifdef HAVE_SMP
 | ||
|  |      if (threads_ok) { | ||
|  | 	  BENCH_ASSERT(FFTW(init_threads)()); | ||
|  | 	  FFTW(plan_with_nthreads)(nthreads); | ||
|  | 	  BENCH_ASSERT(FFTW(planner_nthreads)() == nthreads); | ||
|  |           FFTW(make_planner_thread_safe)(); | ||
|  | #ifdef _OPENMP
 | ||
|  | 	  omp_set_num_threads(nthreads); | ||
|  | #endif
 | ||
|  |      } | ||
|  |      else if (nthreads > 1 && verbose > 1) { | ||
|  | 	  fprintf(stderr, "bench: WARNING - nthreads = %d, but threads not supported\n", nthreads); | ||
|  | 	  nthreads = 1; | ||
|  |      } | ||
|  | #endif
 | ||
|  | 
 | ||
|  |      if (!usewisdom) return; | ||
|  | 
 | ||
|  |      timer_start(USER_TIMER); | ||
|  |      if ((f = fopen(wisdat, "r"))) { | ||
|  | 	  if (!import_wisdom(f)) | ||
|  | 	       fprintf(stderr, "bench: ERROR reading wisdom\n"); | ||
|  | 	  else | ||
|  | 	       success = 1; | ||
|  | 	  fclose(f); | ||
|  |      } | ||
|  |      tim = timer_stop(USER_TIMER); | ||
|  | 
 | ||
|  |      if (success) { | ||
|  | 	  if (verbose > 1) printf("READ WISDOM (%g seconds): ", tim); | ||
|  | 
 | ||
|  | 	  if (verbose > 3) | ||
|  | 	       export_wisdom(stdout); | ||
|  | 	  if (verbose > 1) | ||
|  | 	       printf("\n"); | ||
|  |      } | ||
|  |      havewisdom = 1; | ||
|  | } | ||
|  | 
 | ||
|  | void wrwisdom(void) | ||
|  | { | ||
|  |      FILE *f; | ||
|  |      double tim; | ||
|  |      if (!havewisdom) return; | ||
|  | 
 | ||
|  |      timer_start(USER_TIMER); | ||
|  |      if ((f = fopen(wisdat, "w"))) { | ||
|  | 	  export_wisdom(f); | ||
|  | 	  fclose(f); | ||
|  |      } | ||
|  |      tim = timer_stop(USER_TIMER); | ||
|  |      if (verbose > 1) printf("write wisdom took %g seconds\n", tim); | ||
|  | } | ||
|  | 
 | ||
|  | static unsigned preserve_input_flags(bench_problem *p) | ||
|  | { | ||
|  |      /*
 | ||
|  |       * fftw3 cannot preserve input for multidimensional c2r transforms. | ||
|  |       * Enforce FFTW_DESTROY_INPUT | ||
|  |       */ | ||
|  |      if (p->kind == PROBLEM_REAL && | ||
|  | 	 p->sign > 0 && | ||
|  | 	 !p->in_place && | ||
|  | 	 p->sz->rnk > 1) | ||
|  | 	  p->destroy_input = 1; | ||
|  | 
 | ||
|  |      if (p->destroy_input) | ||
|  | 	  return FFTW_DESTROY_INPUT; | ||
|  |      else | ||
|  | 	  return FFTW_PRESERVE_INPUT; | ||
|  | } | ||
|  | 
 | ||
|  | int can_do(bench_problem *p) | ||
|  | { | ||
|  |      double tim; | ||
|  | 
 | ||
|  |      if (verbose > 2 && p->pstring) | ||
|  | 	  printf("Planning %s...\n", p->pstring); | ||
|  |      rdwisdom(); | ||
|  | 
 | ||
|  |      timer_start(USER_TIMER); | ||
|  |      the_plan = mkplan(p, preserve_input_flags(p) | the_flags | FFTW_ESTIMATE); | ||
|  |      tim = timer_stop(USER_TIMER); | ||
|  |      if (verbose > 2) printf("estimate-planner time: %g s\n", tim); | ||
|  | 
 | ||
|  |      if (the_plan) { | ||
|  | 	  FFTW(destroy_plan)(the_plan); | ||
|  | 	  return 1; | ||
|  |      } | ||
|  |      return 0; | ||
|  | } | ||
|  | 
 | ||
|  | void setup(bench_problem *p) | ||
|  | { | ||
|  |      double tim; | ||
|  | 
 | ||
|  |      setup_sigfpe_handler(); | ||
|  | 
 | ||
|  |      if (amnesia) { | ||
|  | 	  FFTW(forget_wisdom)(); | ||
|  | 	  havewisdom = 0; | ||
|  |      } | ||
|  | 
 | ||
|  |      /* Regression test: check that fftw_malloc exists and links
 | ||
|  |       * properly */ | ||
|  |      { | ||
|  |           void *ptr = FFTW(malloc(42)); | ||
|  |           BENCH_ASSERT(FFTW(alignment_of)((bench_real *)ptr) == 0); | ||
|  |           FFTW(free(ptr)); | ||
|  |      } | ||
|  | 
 | ||
|  |      rdwisdom(); | ||
|  |      install_hook(); | ||
|  | 
 | ||
|  | #ifdef HAVE_SMP
 | ||
|  |      if (verbose > 1 && nthreads > 1) printf("NTHREADS = %d\n", nthreads); | ||
|  | #endif
 | ||
|  | 
 | ||
|  |      timer_start(USER_TIMER); | ||
|  |      the_plan = mkplan(p, preserve_input_flags(p) | the_flags); | ||
|  |      tim = timer_stop(USER_TIMER); | ||
|  |      if (verbose > 1) printf("planner time: %g s\n", tim); | ||
|  | 
 | ||
|  |      BENCH_ASSERT(the_plan); | ||
|  | 
 | ||
|  |      { | ||
|  | 	  double add, mul, nfma, cost, pcost; | ||
|  | 	  FFTW(flops)(the_plan, &add, &mul, &nfma); | ||
|  | 	  cost = FFTW(estimate_cost)(the_plan); | ||
|  | 	  pcost = FFTW(cost)(the_plan); | ||
|  | 	  if (verbose > 1) { | ||
|  | 	       FFTW(print_plan)(the_plan); | ||
|  | 	       printf("\n"); | ||
|  | 	       printf("flops: %0.0f add, %0.0f mul, %0.0f fma\n", | ||
|  | 		      add, mul, nfma); | ||
|  | 	       printf("estimated cost: %f, pcost = %f\n", cost, pcost); | ||
|  | 	  } | ||
|  |      } | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | void doit(int iter, bench_problem *p) | ||
|  | { | ||
|  |      int i; | ||
|  |      FFTW(plan) q = the_plan; | ||
|  | 
 | ||
|  |      UNUSED(p); | ||
|  |      for (i = 0; i < iter; ++i) | ||
|  | 	  FFTW(execute)(q); | ||
|  | } | ||
|  | 
 | ||
|  | void done(bench_problem *p) | ||
|  | { | ||
|  |      UNUSED(p); | ||
|  | 
 | ||
|  |      FFTW(destroy_plan)(the_plan); | ||
|  |      uninstall_hook(); | ||
|  | } | ||
|  | 
 | ||
|  | void cleanup(void) | ||
|  | { | ||
|  |      initial_cleanup(); | ||
|  | 
 | ||
|  |      wrwisdom(); | ||
|  | #ifdef HAVE_SMP
 | ||
|  |      FFTW(cleanup_threads)(); | ||
|  | #else
 | ||
|  |      FFTW(cleanup)(); | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #    ifdef FFTW_DEBUG_MALLOC
 | ||
|  |      { | ||
|  | 	  /* undocumented memory checker */ | ||
|  | 	  FFTW_EXTERN void FFTW(malloc_print_minfo)(int v); | ||
|  | 	  FFTW(malloc_print_minfo)(verbose); | ||
|  |      } | ||
|  | #    endif
 | ||
|  | 
 | ||
|  |      final_cleanup(); | ||
|  | } |