304 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			304 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* See bench.c.  We keep a few common subroutines in this file so
 | |
|    that they can be re-used in the MPI test program. */
 | |
| 
 | |
| #include <math.h>
 | |
| #include <stdio.h>
 | |
| #include <string.h>
 | |
| #include "tests/fftw-bench.h"
 | |
| 
 | |
| /* define to enable code that traps floating-point exceptions.
 | |
|    Disabled by default because I don't want to worry about the
 | |
|    portability of such code.  feenableexcept() seems to be a GNU
 | |
|    thing */
 | |
| #undef TRAP_FP_EXCEPTIONS
 | |
| 
 | |
| #ifdef TRAP_FP_EXCEPTIONS
 | |
| #  include <signal.h>
 | |
| #  include <fenv.h>
 | |
| #endif
 | |
| 
 | |
| #ifdef _OPENMP
 | |
| #  include <omp.h>
 | |
| #endif
 | |
| 
 | |
| #ifdef HAVE_SMP
 | |
| int threads_ok = 1;
 | |
| #endif
 | |
| 
 | |
| FFTW(plan) the_plan = 0;
 | |
| 
 | |
| static const char *wisdat = "wis.dat";
 | |
| unsigned the_flags = 0;
 | |
| int paranoid = 0;
 | |
| int usewisdom = 0;
 | |
| int havewisdom = 0;
 | |
| int nthreads = 1;
 | |
| int amnesia = 0;
 | |
| 
 | |
| extern void install_hook(void);  /* in hook.c */
 | |
| extern void uninstall_hook(void);  /* in hook.c */
 | |
| 
 | |
| #ifdef FFTW_RANDOM_ESTIMATOR
 | |
| extern unsigned FFTW(random_estimate_seed);
 | |
| #endif
 | |
| 
 | |
| #ifdef TRAP_FP_EXCEPTIONS
 | |
| static void sigfpe_handler(int sig, siginfo_t *info, void *context)
 | |
| {
 | |
|      /* fftw code is not supposed to generate FP exceptions */
 | |
|      UNUSED(sig); UNUSED(info); UNUSED(context);
 | |
|      fprintf(stderr, "caught FPE, aborting\n");
 | |
|      abort();
 | |
| }
 | |
| 
 | |
| static void setup_sigfpe_handler(void)
 | |
| {
 | |
|   struct sigaction a;
 | |
|   feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW);
 | |
|   memset(&a, 0, sizeof(a));
 | |
|   a.sa_sigaction = sigfpe_handler;
 | |
|   a.sa_flags = SA_SIGINFO;
 | |
|   if (sigaction(SIGFPE, &a, NULL) == -1) {
 | |
|        fprintf(stderr, "cannot install sigfpe handler\n");
 | |
|        exit(1);
 | |
|   }
 | |
| }
 | |
| #else
 | |
| static void setup_sigfpe_handler(void)
 | |
| {
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /* dummy serial threads backend for testing threads_set_callback */
 | |
| static void serial_threads(void *(*work)(char *), char *jobdata, size_t elsize, int njobs, void *data)
 | |
| {
 | |
|      int i;
 | |
|      (void) data; /* unused */
 | |
|      for (i = 0; i < njobs; ++i)
 | |
|           work(jobdata + elsize * i);
 | |
| }
 | |
| 
 | |
| void useropt(const char *arg)
 | |
| {
 | |
|      int x;
 | |
|      double y;
 | |
| 
 | |
|      if (!strcmp(arg, "patient")) the_flags |= FFTW_PATIENT;
 | |
|      else if (!strcmp(arg, "estimate")) the_flags |= FFTW_ESTIMATE;
 | |
|      else if (!strcmp(arg, "estimatepat")) the_flags |= FFTW_ESTIMATE_PATIENT;
 | |
|      else if (!strcmp(arg, "exhaustive")) the_flags |= FFTW_EXHAUSTIVE;
 | |
|      else if (!strcmp(arg, "unaligned")) the_flags |= FFTW_UNALIGNED;
 | |
|      else if (!strcmp(arg, "nosimd")) the_flags |= FFTW_NO_SIMD;
 | |
|      else if (!strcmp(arg, "noindirectop")) the_flags |= FFTW_NO_INDIRECT_OP;
 | |
|      else if (!strcmp(arg, "wisdom-only")) the_flags |= FFTW_WISDOM_ONLY;
 | |
|      else if (sscanf(arg, "flag=%d", &x) == 1) the_flags |= x;
 | |
|      else if (sscanf(arg, "bflag=%d", &x) == 1) the_flags |= 1U << x;
 | |
|      else if (!strcmp(arg, "paranoid")) paranoid = 1;
 | |
|      else if (!strcmp(arg, "wisdom")) usewisdom = 1;
 | |
|      else if (!strcmp(arg, "amnesia")) amnesia = 1;
 | |
|      else if (!strcmp(arg, "threads_callback"))
 | |
| #ifdef HAVE_SMP
 | |
|           FFTW(threads_set_callback)(serial_threads, NULL);
 | |
| #else
 | |
|           fprintf(stderr, "Serial FFTW; ignoring threads_callback option.\n");
 | |
| #endif
 | |
|      else if (sscanf(arg, "nthreads=%d", &x) == 1) nthreads = x;
 | |
| #ifdef FFTW_RANDOM_ESTIMATOR
 | |
|      else if (sscanf(arg, "eseed=%d", &x) == 1) FFTW(random_estimate_seed) = x;
 | |
| #endif
 | |
|      else if (sscanf(arg, "timelimit=%lg", &y) == 1) {
 | |
| 	  FFTW(set_timelimit)(y);
 | |
|      }
 | |
| 
 | |
|      else fprintf(stderr, "unknown user option: %s.  Ignoring.\n", arg);
 | |
| }
 | |
| 
 | |
| void rdwisdom(void)
 | |
| {
 | |
|      FILE *f;
 | |
|      double tim;
 | |
|      int success = 0;
 | |
| 
 | |
|      if (havewisdom) return;
 | |
| 
 | |
| #ifdef HAVE_SMP
 | |
|      if (threads_ok) {
 | |
| 	  BENCH_ASSERT(FFTW(init_threads)());
 | |
| 	  FFTW(plan_with_nthreads)(nthreads);
 | |
| 	  BENCH_ASSERT(FFTW(planner_nthreads)() == nthreads);
 | |
|           FFTW(make_planner_thread_safe)();
 | |
| #ifdef _OPENMP
 | |
| 	  omp_set_num_threads(nthreads);
 | |
| #endif
 | |
|      }
 | |
|      else if (nthreads > 1 && verbose > 1) {
 | |
| 	  fprintf(stderr, "bench: WARNING - nthreads = %d, but threads not supported\n", nthreads);
 | |
| 	  nthreads = 1;
 | |
|      }
 | |
| #endif
 | |
| 
 | |
|      if (!usewisdom) return;
 | |
| 
 | |
|      timer_start(USER_TIMER);
 | |
|      if ((f = fopen(wisdat, "r"))) {
 | |
| 	  if (!import_wisdom(f))
 | |
| 	       fprintf(stderr, "bench: ERROR reading wisdom\n");
 | |
| 	  else
 | |
| 	       success = 1;
 | |
| 	  fclose(f);
 | |
|      }
 | |
|      tim = timer_stop(USER_TIMER);
 | |
| 
 | |
|      if (success) {
 | |
| 	  if (verbose > 1) printf("READ WISDOM (%g seconds): ", tim);
 | |
| 
 | |
| 	  if (verbose > 3)
 | |
| 	       export_wisdom(stdout);
 | |
| 	  if (verbose > 1)
 | |
| 	       printf("\n");
 | |
|      }
 | |
|      havewisdom = 1;
 | |
| }
 | |
| 
 | |
| void wrwisdom(void)
 | |
| {
 | |
|      FILE *f;
 | |
|      double tim;
 | |
|      if (!havewisdom) return;
 | |
| 
 | |
|      timer_start(USER_TIMER);
 | |
|      if ((f = fopen(wisdat, "w"))) {
 | |
| 	  export_wisdom(f);
 | |
| 	  fclose(f);
 | |
|      }
 | |
|      tim = timer_stop(USER_TIMER);
 | |
|      if (verbose > 1) printf("write wisdom took %g seconds\n", tim);
 | |
| }
 | |
| 
 | |
| static unsigned preserve_input_flags(bench_problem *p)
 | |
| {
 | |
|      /*
 | |
|       * fftw3 cannot preserve input for multidimensional c2r transforms.
 | |
|       * Enforce FFTW_DESTROY_INPUT
 | |
|       */
 | |
|      if (p->kind == PROBLEM_REAL &&
 | |
| 	 p->sign > 0 &&
 | |
| 	 !p->in_place &&
 | |
| 	 p->sz->rnk > 1)
 | |
| 	  p->destroy_input = 1;
 | |
| 
 | |
|      if (p->destroy_input)
 | |
| 	  return FFTW_DESTROY_INPUT;
 | |
|      else
 | |
| 	  return FFTW_PRESERVE_INPUT;
 | |
| }
 | |
| 
 | |
| int can_do(bench_problem *p)
 | |
| {
 | |
|      double tim;
 | |
| 
 | |
|      if (verbose > 2 && p->pstring)
 | |
| 	  printf("Planning %s...\n", p->pstring);
 | |
|      rdwisdom();
 | |
| 
 | |
|      timer_start(USER_TIMER);
 | |
|      the_plan = mkplan(p, preserve_input_flags(p) | the_flags | FFTW_ESTIMATE);
 | |
|      tim = timer_stop(USER_TIMER);
 | |
|      if (verbose > 2) printf("estimate-planner time: %g s\n", tim);
 | |
| 
 | |
|      if (the_plan) {
 | |
| 	  FFTW(destroy_plan)(the_plan);
 | |
| 	  return 1;
 | |
|      }
 | |
|      return 0;
 | |
| }
 | |
| 
 | |
| void setup(bench_problem *p)
 | |
| {
 | |
|      double tim;
 | |
| 
 | |
|      setup_sigfpe_handler();
 | |
| 
 | |
|      if (amnesia) {
 | |
| 	  FFTW(forget_wisdom)();
 | |
| 	  havewisdom = 0;
 | |
|      }
 | |
| 
 | |
|      /* Regression test: check that fftw_malloc exists and links
 | |
|       * properly */
 | |
|      {
 | |
|           void *ptr = FFTW(malloc(42));
 | |
|           BENCH_ASSERT(FFTW(alignment_of)((bench_real *)ptr) == 0);
 | |
|           FFTW(free(ptr));
 | |
|      }
 | |
| 
 | |
|      rdwisdom();
 | |
|      install_hook();
 | |
| 
 | |
| #ifdef HAVE_SMP
 | |
|      if (verbose > 1 && nthreads > 1) printf("NTHREADS = %d\n", nthreads);
 | |
| #endif
 | |
| 
 | |
|      timer_start(USER_TIMER);
 | |
|      the_plan = mkplan(p, preserve_input_flags(p) | the_flags);
 | |
|      tim = timer_stop(USER_TIMER);
 | |
|      if (verbose > 1) printf("planner time: %g s\n", tim);
 | |
| 
 | |
|      BENCH_ASSERT(the_plan);
 | |
| 
 | |
|      {
 | |
| 	  double add, mul, nfma, cost, pcost;
 | |
| 	  FFTW(flops)(the_plan, &add, &mul, &nfma);
 | |
| 	  cost = FFTW(estimate_cost)(the_plan);
 | |
| 	  pcost = FFTW(cost)(the_plan);
 | |
| 	  if (verbose > 1) {
 | |
| 	       FFTW(print_plan)(the_plan);
 | |
| 	       printf("\n");
 | |
| 	       printf("flops: %0.0f add, %0.0f mul, %0.0f fma\n",
 | |
| 		      add, mul, nfma);
 | |
| 	       printf("estimated cost: %f, pcost = %f\n", cost, pcost);
 | |
| 	  }
 | |
|      }
 | |
| }
 | |
| 
 | |
| 
 | |
| void doit(int iter, bench_problem *p)
 | |
| {
 | |
|      int i;
 | |
|      FFTW(plan) q = the_plan;
 | |
| 
 | |
|      UNUSED(p);
 | |
|      for (i = 0; i < iter; ++i)
 | |
| 	  FFTW(execute)(q);
 | |
| }
 | |
| 
 | |
| void done(bench_problem *p)
 | |
| {
 | |
|      UNUSED(p);
 | |
| 
 | |
|      FFTW(destroy_plan)(the_plan);
 | |
|      uninstall_hook();
 | |
| }
 | |
| 
 | |
| void cleanup(void)
 | |
| {
 | |
|      initial_cleanup();
 | |
| 
 | |
|      wrwisdom();
 | |
| #ifdef HAVE_SMP
 | |
|      FFTW(cleanup_threads)();
 | |
| #else
 | |
|      FFTW(cleanup)();
 | |
| #endif
 | |
| 
 | |
| #    ifdef FFTW_DEBUG_MALLOC
 | |
|      {
 | |
| 	  /* undocumented memory checker */
 | |
| 	  FFTW_EXTERN void FFTW(malloc_print_minfo)(int v);
 | |
| 	  FFTW(malloc_print_minfo)(verbose);
 | |
|      }
 | |
| #    endif
 | |
| 
 | |
|      final_cleanup();
 | |
| }
 | 
