173 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			173 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * Copyright (c) 2003, 2007-14 Matteo Frigo
							 | 
						||
| 
								 | 
							
								 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is free software; you can redistribute it and/or modify
							 | 
						||
| 
								 | 
							
								 * it under the terms of the GNU General Public License as published by
							 | 
						||
| 
								 | 
							
								 * the Free Software Foundation; either version 2 of the License, or
							 | 
						||
| 
								 | 
							
								 * (at your option) any later version.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is distributed in the hope that it will be useful,
							 | 
						||
| 
								 | 
							
								 * but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						||
| 
								 | 
							
								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
							 | 
						||
| 
								 | 
							
								 * GNU General Public License for more details.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * You should have received a copy of the GNU General Public License
							 | 
						||
| 
								 | 
							
								 * along with this program; if not, write to the Free Software
							 | 
						||
| 
								 | 
							
								 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * This header file must include every file or define every
							 | 
						||
| 
								 | 
							
								 * type or macro which is required to compile a codelet.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef __RDFT_CODELET_H__
							 | 
						||
| 
								 | 
							
								#define __RDFT_CODELET_H__
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include "kernel/ifftw.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**************************************************************
							 | 
						||
| 
								 | 
							
								 * types of codelets
							 | 
						||
| 
								 | 
							
								 **************************************************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* FOOab, with a,b in {0,1}, denotes the FOO transform
							 | 
						||
| 
								 | 
							
								   where a/b say whether the input/output are shifted by
							 | 
						||
| 
								 | 
							
								   half a sample/slot. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef enum {
							 | 
						||
| 
								 | 
							
								     R2HC00, R2HC01, R2HC10, R2HC11,
							 | 
						||
| 
								 | 
							
								     HC2R00, HC2R01, HC2R10, HC2R11,
							 | 
						||
| 
								 | 
							
								     DHT, 
							 | 
						||
| 
								 | 
							
								     REDFT00, REDFT01, REDFT10, REDFT11, /* real-even == DCT's */
							 | 
						||
| 
								 | 
							
								     RODFT00, RODFT01, RODFT10, RODFT11  /*  real-odd == DST's */
							 | 
						||
| 
								 | 
							
								} rdft_kind;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* standard R2HC/HC2R transforms are unshifted */
							 | 
						||
| 
								 | 
							
								#define R2HC R2HC00
							 | 
						||
| 
								 | 
							
								#define HC2R HC2R00
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define R2HCII R2HC01
							 | 
						||
| 
								 | 
							
								#define HC2RIII HC2R10
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* (k) >= R2HC00 produces a warning under gcc because checking x >= 0
							 | 
						||
| 
								 | 
							
								   is superfluous for unsigned values...but it is needed because other
							 | 
						||
| 
								 | 
							
								   compilers (e.g. icc) may define the enum to be a signed int...grrr. */
							 | 
						||
| 
								 | 
							
								#define R2HC_KINDP(k) ((k) >= R2HC00 && (k) <= R2HC11) /* uses kr2hc_genus */
							 | 
						||
| 
								 | 
							
								#define HC2R_KINDP(k) ((k) >= HC2R00 && (k) <= HC2R11) /* uses khc2r_genus */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define R2R_KINDP(k) ((k) >= DHT) /* uses kr2r_genus */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define REDFT_KINDP(k) ((k) >= REDFT00 && (k) <= REDFT11)
							 | 
						||
| 
								 | 
							
								#define RODFT_KINDP(k) ((k) >= RODFT00 && (k) <= RODFT11)
							 | 
						||
| 
								 | 
							
								#define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* codelets with real input (output) and complex output (input) */
							 | 
						||
| 
								 | 
							
								typedef struct kr2c_desc_s kr2c_desc;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct {
							 | 
						||
| 
								 | 
							
								     rdft_kind kind;
							 | 
						||
| 
								 | 
							
								     INT vl;
							 | 
						||
| 
								 | 
							
								} kr2c_genus;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								struct kr2c_desc_s {
							 | 
						||
| 
								 | 
							
								     INT n;    /* size of transform computed */
							 | 
						||
| 
								 | 
							
								     const char *nam;
							 | 
						||
| 
								 | 
							
								     opcnt ops;
							 | 
						||
| 
								 | 
							
								     const kr2c_genus *genus;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef void (*kr2c) (R *R0, R *R1, R *Cr, R *Ci,
							 | 
						||
| 
								 | 
							
										      stride rs, stride csr, stride csi,
							 | 
						||
| 
								 | 
							
										      INT vl, INT ivs, INT ovs);
							 | 
						||
| 
								 | 
							
								void X(kr2c_register)(planner *p, kr2c codelet, const kr2c_desc *desc);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* half-complex to half-complex DIT/DIF codelets: */
							 | 
						||
| 
								 | 
							
								typedef struct hc2hc_desc_s hc2hc_desc;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct {
							 | 
						||
| 
								 | 
							
								     rdft_kind kind;
							 | 
						||
| 
								 | 
							
								     INT vl;
							 | 
						||
| 
								 | 
							
								} hc2hc_genus;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								struct hc2hc_desc_s {
							 | 
						||
| 
								 | 
							
								     INT radix;
							 | 
						||
| 
								 | 
							
								     const char *nam;
							 | 
						||
| 
								 | 
							
								     const tw_instr *tw;
							 | 
						||
| 
								 | 
							
								     const hc2hc_genus *genus;
							 | 
						||
| 
								 | 
							
								     opcnt ops;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef void (*khc2hc) (R *rioarray, R *iioarray, const R *W,
							 | 
						||
| 
								 | 
							
											stride rs, INT mb, INT me, INT ms);
							 | 
						||
| 
								 | 
							
								void X(khc2hc_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* half-complex to rdft2-complex DIT/DIF codelets: */
							 | 
						||
| 
								 | 
							
								typedef struct hc2c_desc_s hc2c_desc;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef enum {
							 | 
						||
| 
								 | 
							
								     HC2C_VIA_RDFT,
							 | 
						||
| 
								 | 
							
								     HC2C_VIA_DFT
							 | 
						||
| 
								 | 
							
								} hc2c_kind;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct {
							 | 
						||
| 
								 | 
							
								     int (*okp)(
							 | 
						||
| 
								 | 
							
									  const R *Rp, const R *Ip, const R *Rm, const R *Im, 
							 | 
						||
| 
								 | 
							
									  INT rs, INT mb, INT me, INT ms, 
							 | 
						||
| 
								 | 
							
									  const planner *plnr);
							 | 
						||
| 
								 | 
							
								     rdft_kind kind;
							 | 
						||
| 
								 | 
							
								     INT vl;
							 | 
						||
| 
								 | 
							
								} hc2c_genus;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								struct hc2c_desc_s {
							 | 
						||
| 
								 | 
							
								     INT radix;
							 | 
						||
| 
								 | 
							
								     const char *nam;
							 | 
						||
| 
								 | 
							
								     const tw_instr *tw;
							 | 
						||
| 
								 | 
							
								     const hc2c_genus *genus;
							 | 
						||
| 
								 | 
							
								     opcnt ops;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef void (*khc2c) (R *Rp, R *Ip, R *Rm, R *Im, const R *W,
							 | 
						||
| 
								 | 
							
										       stride rs, INT mb, INT me, INT ms);
							 | 
						||
| 
								 | 
							
								void X(khc2c_register)(planner *p, khc2c codelet, const hc2c_desc *desc,
							 | 
						||
| 
								 | 
							
										       hc2c_kind hc2ckind);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_r2cf);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_r2cb);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_sse2);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_avx);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_avx_128_fma);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_avx2);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_avx2_128);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_avx512);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_kcvi);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_altivec);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_vsx);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_neon);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_generic_simd128);
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_generic_simd256);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* real-input & output DFT-like codelets (DHT, etc.) */
							 | 
						||
| 
								 | 
							
								typedef struct kr2r_desc_s kr2r_desc;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef struct {
							 | 
						||
| 
								 | 
							
								     INT vl;
							 | 
						||
| 
								 | 
							
								} kr2r_genus;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								struct kr2r_desc_s {
							 | 
						||
| 
								 | 
							
								     INT n;    /* size of transform computed */
							 | 
						||
| 
								 | 
							
								     const char *nam;
							 | 
						||
| 
								 | 
							
								     opcnt ops;
							 | 
						||
| 
								 | 
							
								     const kr2r_genus *genus;
							 | 
						||
| 
								 | 
							
								     rdft_kind kind;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								typedef void (*kr2r) (const R *I, R *O, stride is, stride os,
							 | 
						||
| 
								 | 
							
										      INT vl, INT ivs, INT ovs);
							 | 
						||
| 
								 | 
							
								void X(kr2r_register)(planner *p, kr2r codelet, const kr2r_desc *desc);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								extern const solvtab X(solvtab_rdft_r2r);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#endif				/* __RDFT_CODELET_H__ */
							 |