260 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			260 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|   | /* fftw hook to be used in the benchmark program.  
 | ||
|  |     | ||
|  |    We keep it in a separate file because  | ||
|  | 
 | ||
|  |    1) bench.c is supposed to test the API---we do not want to #include | ||
|  |       "ifftw.h" and accidentally use internal symbols/macros. | ||
|  |    2) this code is a royal mess.  The messiness is due to | ||
|  |       A) confusion between internal fftw tensors and bench_tensor's | ||
|  |          (which we want to keep separate because the benchmark | ||
|  | 	  program tests other routines too) | ||
|  |       B) despite A), our desire to recycle the libbench verifier. | ||
|  | */ | ||
|  | 
 | ||
|  | #include <stdio.h>
 | ||
|  | #include "libbench2/bench-user.h"
 | ||
|  | 
 | ||
|  | #define CALLING_FFTW /* hack for Windows DLL nonsense */
 | ||
|  | #include "api/api.h"
 | ||
|  | #include "dft/dft.h"
 | ||
|  | #include "rdft/rdft.h"
 | ||
|  | 
 | ||
|  | extern int paranoid; /* in bench.c */ | ||
|  | extern X(plan) the_plan; /* in bench.c */ | ||
|  | 
 | ||
|  | /*
 | ||
|  |   transform an fftw tensor into a bench_tensor. | ||
|  | */ | ||
|  | static bench_tensor *fftw_tensor_to_bench_tensor(tensor *t) | ||
|  | { | ||
|  |      bench_tensor *bt = mktensor(t->rnk); | ||
|  | 
 | ||
|  |      if (FINITE_RNK(t->rnk)) { | ||
|  | 	  int i; | ||
|  | 	  for (i = 0; i < t->rnk; ++i) { | ||
|  | 	       /* FIXME: 64-bit unclean because of INT -> int conversion */ | ||
|  | 	       bt->dims[i].n = t->dims[i].n; | ||
|  | 	       bt->dims[i].is = t->dims[i].is; | ||
|  | 	       bt->dims[i].os = t->dims[i].os; | ||
|  | 	       BENCH_ASSERT(bt->dims[i].n == t->dims[i].n); | ||
|  | 	       BENCH_ASSERT(bt->dims[i].is == t->dims[i].is); | ||
|  | 	       BENCH_ASSERT(bt->dims[i].os == t->dims[i].os); | ||
|  | 	  } | ||
|  |      } | ||
|  |      return bt; | ||
|  | } | ||
|  | 
 | ||
|  | /*
 | ||
|  |   transform an fftw problem into a bench_problem. | ||
|  | */ | ||
|  | static bench_problem *fftw_problem_to_bench_problem(planner *plnr, | ||
|  | 						    const problem *p_) | ||
|  | { | ||
|  |      bench_problem *bp = 0; | ||
|  |      switch (p_->adt->problem_kind) { | ||
|  | 	 case PROBLEM_DFT: | ||
|  | 	 { | ||
|  | 	      const problem_dft *p = (const problem_dft *) p_; | ||
|  | 	   | ||
|  | 	      if (!p->ri || !p->ii) | ||
|  | 		   abort(); | ||
|  | 
 | ||
|  | 	      bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); | ||
|  | 
 | ||
|  | 	      bp->kind = PROBLEM_COMPLEX; | ||
|  | 	      bp->sign = FFT_SIGN; | ||
|  | 	      bp->split = 1; /* tensor strides are in R's, not C's */ | ||
|  | 	      bp->in = UNTAINT(p->ri); | ||
|  | 	      bp->out = UNTAINT(p->ro); | ||
|  | 	      bp->ini = UNTAINT(p->ii); | ||
|  | 	      bp->outi = UNTAINT(p->io); | ||
|  | 	      bp->inphys = bp->outphys = 0; | ||
|  | 	      bp->iphyssz = bp->ophyssz = 0; | ||
|  | 	      bp->in_place = p->ri == p->ro; | ||
|  | 	      bp->sz = fftw_tensor_to_bench_tensor(p->sz); | ||
|  | 	      bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); | ||
|  | 	      bp->k = 0; | ||
|  | 	      break; | ||
|  | 	 } | ||
|  | 	 case PROBLEM_RDFT: | ||
|  | 	 { | ||
|  | 	      const problem_rdft *p = (const problem_rdft *) p_; | ||
|  | 	      int i; | ||
|  | 
 | ||
|  | 	      if (!p->I || !p->O) | ||
|  | 		   abort(); | ||
|  | 
 | ||
|  | 	      for (i = 0; i < p->sz->rnk; ++i) | ||
|  | 		   switch (p->kind[i]) { | ||
|  | 		       case R2HC01: | ||
|  | 		       case R2HC10: | ||
|  | 		       case R2HC11: | ||
|  | 		       case HC2R01: | ||
|  | 		       case HC2R10: | ||
|  | 		       case HC2R11: | ||
|  | 			    return bp; | ||
|  | 		       default: | ||
|  | 			    ; | ||
|  | 		   } | ||
|  | 	   | ||
|  | 	      bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); | ||
|  | 
 | ||
|  | 	      bp->kind = PROBLEM_R2R; | ||
|  | 	      bp->sign = FFT_SIGN; | ||
|  | 	      bp->split = 0; | ||
|  | 	      bp->in = UNTAINT(p->I); | ||
|  | 	      bp->out = UNTAINT(p->O); | ||
|  | 	      bp->ini = bp->outi = 0; | ||
|  | 	      bp->inphys = bp->outphys = 0; | ||
|  | 	      bp->iphyssz = bp->ophyssz = 0; | ||
|  | 	      bp->in_place = p->I == p->O; | ||
|  | 	      bp->sz = fftw_tensor_to_bench_tensor(p->sz); | ||
|  | 	      bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); | ||
|  | 	      bp->k = (r2r_kind_t *) bench_malloc(sizeof(r2r_kind_t) * p->sz->rnk); | ||
|  | 	      for (i = 0; i < p->sz->rnk; ++i) | ||
|  | 		   switch (p->kind[i]) { | ||
|  | 		       case R2HC: bp->k[i] = R2R_R2HC; break; | ||
|  | 		       case HC2R: bp->k[i] = R2R_HC2R; break; | ||
|  | 		       case DHT: bp->k[i] = R2R_DHT; break; | ||
|  | 		       case REDFT00: bp->k[i] = R2R_REDFT00; break; | ||
|  | 		       case REDFT01: bp->k[i] = R2R_REDFT01; break; | ||
|  | 		       case REDFT10: bp->k[i] = R2R_REDFT10; break; | ||
|  | 		       case REDFT11: bp->k[i] = R2R_REDFT11; break; | ||
|  | 		       case RODFT00: bp->k[i] = R2R_RODFT00; break; | ||
|  | 		       case RODFT01: bp->k[i] = R2R_RODFT01; break; | ||
|  | 		       case RODFT10: bp->k[i] = R2R_RODFT10; break; | ||
|  | 		       case RODFT11: bp->k[i] = R2R_RODFT11; break; | ||
|  | 		       default: CK(0); | ||
|  | 		   } | ||
|  | 	      break; | ||
|  | 	 } | ||
|  | 	 case PROBLEM_RDFT2: | ||
|  | 	 { | ||
|  | 	      const problem_rdft2 *p = (const problem_rdft2 *) p_; | ||
|  | 	      int rnk = p->sz->rnk; | ||
|  | 	   | ||
|  | 	      if (!p->r0 || !p->r1 || !p->cr || !p->ci) | ||
|  | 		   abort(); | ||
|  | 	       | ||
|  | 	      /* give up verifying rdft2 R2HCII */ | ||
|  | 	      if (p->kind != R2HC && p->kind != HC2R) | ||
|  | 		   return bp; | ||
|  | 
 | ||
|  | 	      if (rnk > 0) { | ||
|  | 		   /* can't verify separate even/odd arrays for now */ | ||
|  | 		   if (2 * (p->r1 - p->r0) != | ||
|  | 		       ((p->kind == R2HC) ?  | ||
|  | 			p->sz->dims[rnk-1].is : p->sz->dims[rnk-1].os)) | ||
|  | 			return bp; | ||
|  | 	      } | ||
|  | 
 | ||
|  | 	      bp = (bench_problem *) bench_malloc(sizeof(bench_problem)); | ||
|  | 
 | ||
|  | 	      bp->kind = PROBLEM_REAL; | ||
|  | 	      bp->sign = p->kind == R2HC ? FFT_SIGN : -FFT_SIGN; | ||
|  | 	      bp->split = 1; /* tensor strides are in R's, not C's */ | ||
|  | 	      if (p->kind == R2HC) { | ||
|  | 		   bp->sign = FFT_SIGN; | ||
|  | 		   bp->in = UNTAINT(p->r0); | ||
|  | 		   bp->out = UNTAINT(p->cr); | ||
|  | 		   bp->ini = 0; | ||
|  | 		   bp->outi = UNTAINT(p->ci); | ||
|  | 	      } | ||
|  | 	      else { | ||
|  | 		   bp->sign = -FFT_SIGN; | ||
|  | 		   bp->out = UNTAINT(p->r0); | ||
|  | 		   bp->in = UNTAINT(p->cr); | ||
|  | 		   bp->outi = 0; | ||
|  | 		   bp->ini = UNTAINT(p->ci); | ||
|  | 	      } | ||
|  | 	      bp->inphys = bp->outphys = 0; | ||
|  | 	      bp->iphyssz = bp->ophyssz = 0; | ||
|  | 	      bp->in_place = p->r0 == p->cr; | ||
|  | 	      bp->sz = fftw_tensor_to_bench_tensor(p->sz); | ||
|  | 	      if (rnk > 0) { | ||
|  | 		   if (p->kind == R2HC) | ||
|  | 			bp->sz->dims[rnk-1].is /= 2; | ||
|  | 		   else  | ||
|  | 			bp->sz->dims[rnk-1].os /= 2; | ||
|  | 	      } | ||
|  | 	      bp->vecsz = fftw_tensor_to_bench_tensor(p->vecsz); | ||
|  | 	      bp->k = 0; | ||
|  | 	      break; | ||
|  | 	 } | ||
|  | 	 default:  | ||
|  | 	      abort(); | ||
|  |      } | ||
|  | 
 | ||
|  |      bp->userinfo = 0; | ||
|  |      bp->pstring = 0; | ||
|  |      bp->destroy_input = !NO_DESTROY_INPUTP(plnr); | ||
|  | 
 | ||
|  |      return bp; | ||
|  | } | ||
|  | 
 | ||
|  | static void hook(planner *plnr, plan *pln, const problem *p_, int optimalp) | ||
|  | { | ||
|  |      int rounds = 5; | ||
|  |      double tol = SINGLE_PRECISION ? 1.0e-3 : 1.0e-10; | ||
|  |      UNUSED(optimalp); | ||
|  | 
 | ||
|  |      if (verbose > 5) { | ||
|  | 	  printer *pr = X(mkprinter_file)(stdout); | ||
|  | 	  pr->print(pr, "%P:%(%p%)\n", p_, pln); | ||
|  | 	  X(printer_destroy)(pr); | ||
|  | 	  printf("cost %g  \n\n", pln->pcost); | ||
|  |      } | ||
|  | 
 | ||
|  |      if (paranoid) { | ||
|  | 	  bench_problem *bp; | ||
|  | 
 | ||
|  | 	  bp = fftw_problem_to_bench_problem(plnr, p_); | ||
|  | 	  if (bp) { | ||
|  | 	       X(plan) the_plan_save = the_plan; | ||
|  | 
 | ||
|  | 	       the_plan = (apiplan *) MALLOC(sizeof(apiplan), PLANS); | ||
|  | 	       the_plan->pln = pln; | ||
|  | 	       the_plan->prb = (problem *) p_; | ||
|  | 
 | ||
|  | 	       X(plan_awake)(pln, AWAKE_SQRTN_TABLE); | ||
|  | 	       verify_problem(bp, rounds, tol); | ||
|  | 	       X(plan_awake)(pln, SLEEPY); | ||
|  | 
 | ||
|  | 	       X(ifree)(the_plan); | ||
|  | 	       the_plan = the_plan_save; | ||
|  | 
 | ||
|  | 	       problem_destroy(bp); | ||
|  | 	  } | ||
|  | 
 | ||
|  |      } | ||
|  | } | ||
|  | 
 | ||
|  | static void paranoid_checks(void) | ||
|  | { | ||
|  |      /* FIXME: assumes char = 8 bits, which is false on at least one
 | ||
|  | 	DSP I know of. */ | ||
|  | #if 0
 | ||
|  |      /* if flags_t is not 64 bits i want to know it. */ | ||
|  |      CK(sizeof(flags_t) == 8); | ||
|  | 
 | ||
|  |      CK(sizeof(md5uint) >= 4); | ||
|  | #endif
 | ||
|  | 
 | ||
|  |      CK(sizeof(uintptr_t) >= sizeof(R *)); | ||
|  | 
 | ||
|  |      CK(sizeof(INT) >= sizeof(R *)); | ||
|  | } | ||
|  | 
 | ||
|  | void install_hook(void) | ||
|  | { | ||
|  |      planner *plnr = X(the_planner)(); | ||
|  |      plnr->hook = hook; | ||
|  |      paranoid_checks(); | ||
|  | } | ||
|  | 
 | ||
|  | void uninstall_hook(void) | ||
|  | { | ||
|  |      planner *plnr = X(the_planner)(); | ||
|  |      plnr->hook = 0; | ||
|  | } |