-
Notifications
You must be signed in to change notification settings - Fork 58
/
pffft_double.c
147 lines (117 loc) · 5.37 KB
/
pffft_double.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/* Copyright (c) 2013 Julien Pommier ( [email protected] )
Copyright (c) 2020 Hayati Ayguen ( [email protected] )
Copyright (c) 2020 Dario Mambro ( [email protected] )
Based on original fortran 77 code from FFTPACKv4 from NETLIB
(http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber
of NCAR, in 1985.
As confirmed by the NCAR fftpack software curators, the following
FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
released under the same terms.
FFTPACK license:
http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
Copyright (c) 2004 the University Corporation for Atmospheric
Research ("UCAR"). All rights reserved. Developed by NCAR's
Computational and Information Systems Laboratory, UCAR,
www.cisl.ucar.edu.
Redistribution and use of the Software in source and binary forms,
with or without modification, is permitted provided that the
following conditions are met:
- Neither the names of NCAR's Computational and Information Systems
Laboratory, the University Corporation for Atmospheric Research,
nor the names of its sponsors or contributors may be used to
endorse or promote products derived from this Software without
specific prior written permission.
- Redistributions of source code must retain the above copyright
notices, this list of conditions, and the disclaimer below.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the disclaimer below in the
documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
PFFFT : a Pretty Fast FFT.
This file is largerly based on the original FFTPACK implementation, modified in
order to take advantage of SIMD instructions of modern CPUs.
*/
/*
NOTE: This file is adapted from Julien Pommier's original PFFFT,
which works on 32 bit floating point precision using SSE instructions,
to work with 64 bit floating point precision using AVX instructions.
Author: Dario Mambro @ https://github.com/unevens/pffft
*/
#include "pffft_double.h"
/* detect compiler flavour */
#if defined(_MSC_VER)
# define COMPILER_MSVC
#elif defined(__GNUC__)
# define COMPILER_GCC
#endif
#ifdef COMPILER_MSVC
# define _USE_MATH_DEFINES
# include <malloc.h>
#elif defined(__MINGW32__) || defined(__MINGW64__)
# include <malloc.h>
#else
# include <alloca.h>
#endif
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#if defined(COMPILER_GCC)
# define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline))
# define NEVER_INLINE(return_type) return_type __attribute__ ((noinline))
# define RESTRICT __restrict
# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__];
#elif defined(COMPILER_MSVC)
# define ALWAYS_INLINE(return_type) __forceinline return_type
# define NEVER_INLINE(return_type) __declspec(noinline) return_type
# define RESTRICT __restrict
# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
#endif
#ifdef COMPILER_MSVC
#pragma warning( disable : 4244 4305 4204 4456 )
#endif
/*
vector support macros: the rest of the code is independant of
AVX -- adding support for other platforms with 4-element
vectors should be limited to these macros
*/
#include "simd/pf_double.h"
/* have code comparable with this definition */
#define float double
#define SETUP_STRUCT PFFFTD_Setup
#define FUNC_NEW_SETUP pffftd_new_setup
#define FUNC_DESTROY pffftd_destroy_setup
#define FUNC_TRANSFORM_UNORDRD pffftd_transform
#define FUNC_TRANSFORM_ORDERED pffftd_transform_ordered
#define FUNC_ZREORDER pffftd_zreorder
#define FUNC_ZCONVOLVE_ACCUMULATE pffftd_zconvolve_accumulate
#define FUNC_ZCONVOLVE_NO_ACCU pffftd_zconvolve_no_accu
#define FUNC_ALIGNED_MALLOC pffftd_aligned_malloc
#define FUNC_ALIGNED_FREE pffftd_aligned_free
#define FUNC_SIMD_SIZE pffftd_simd_size
#define FUNC_MIN_FFT_SIZE pffftd_min_fft_size
#define FUNC_IS_VALID_SIZE pffftd_is_valid_size
#define FUNC_NEAREST_SIZE pffftd_nearest_transform_size
#define FUNC_SIMD_ARCH pffftd_simd_arch
#define FUNC_VALIDATE_SIMD_A validate_pffftd_simd
#define FUNC_VALIDATE_SIMD_EX validate_pffftd_simd_ex
#define FUNC_CPLX_FINALIZE pffftd_cplx_finalize
#define FUNC_CPLX_PREPROCESS pffftd_cplx_preprocess
#define FUNC_REAL_PREPROCESS_4X4 pffftd_real_preprocess_4x4
#define FUNC_REAL_PREPROCESS pffftd_real_preprocess
#define FUNC_REAL_FINALIZE_4X4 pffftd_real_finalize_4x4
#define FUNC_REAL_FINALIZE pffftd_real_finalize
#define FUNC_TRANSFORM_INTERNAL pffftd_transform_internal
#define FUNC_COS cos
#define FUNC_SIN sin
#include "pffft_priv_impl.h"