root / synthbench / euroben-ports / base / C / mod2f / .svn / text-base / cfft4.c.svn-base @ 0:839f52ef7657
History | View | Annotate | Download (13.3 kB)
1 |
#include <math.h> |
---|---|
2 |
#include <stdio.h> |
3 |
#include <stdlib.h> |
4 |
|
5 |
void cfft4( int is, int m, double ur[], double ui[], double xr[], double xi[], |
6 |
double wr[], double wi[] ) |
7 |
/* ---------------------------------------------------------------------- |
8 |
! --- CFFT4 performs an N-point complex-to-complex FFT, where N = 2**M. |
9 |
! On input XR holds the Real part of the complex array; |
10 |
! XI holds the Imaginary part. |
11 |
! On output XR contains the Real part of the transformed array. |
12 |
! XI then contains the Imaginary part of the result. |
13 |
! WR and WI are work arrays that store Real and Imaginary parts |
14 |
! of intermediate results. |
15 |
! UR and UI contain rotation factors for various stages of the |
16 |
! transformation. |
17 |
! |
18 |
! --- Before doing the actual transformation UR and UI must be |
19 |
! initialised by calling CFFT4 with IS = 0, and M = MX, where MX |
20 |
! is the maximum value of M that will be needed for any call of |
21 |
! CFFT4. |
22 |
! |
23 |
! --- A call to CFFT4 with IS = 1 (or -1) indicates a call to perform |
24 |
! a FFT with positive (or negative) exponentials. |
25 |
! |
26 |
! For any call to CFFT4 M must be M >= 2. |
27 |
! Dimensions of the arrays XR, XI, UR, UI, WR, WI are: |
28 |
! |
29 |
! XR[N], XI[N], WR[N], WI[N]. |
30 |
! UR[2**MX+1], UI[2**MX+1] for MX even. |
31 |
! UR[2*2**(MX/3)+1], UI[2*2**(MX/3)+1] for MX odd. |
32 |
! |
33 |
! In this routine we use 8-byte Real elements (doubles). |
34 |
! ---------------------------------------------------------------------- */ |
35 |
{ |
36 |
int i, it, j, j0, j1, j2, j3, j4, j5, k , kn, ku, k0, |
37 |
k1, k2, k3, ln, l1, l2, l3, l4, mx, n , nu, n1, n2, n3; |
38 |
const double pi = 3.141592653589793; |
39 |
double c0r, c0i, c1r, c1i, c2r, c2i, c3r, c3i, |
40 |
d0r, d0i, d1r, d1i, d2r, d2i, d3r, d3i, |
41 |
t , ti , u1r, u1i, u2r, u2i, u3r, u3i, |
42 |
x1 , x2 , w1 , w2; |
43 |
// --------------------------------------------------------------------- |
44 |
// --- Initialise the U array with sines and cosines in a manner that |
45 |
// permits stride one access at each FFT iteration. |
46 |
|
47 |
if ( is == 0 ) { |
48 |
nu = (int)pow( 2, ((m+1)/2*2) )/3; |
49 |
ur[0] = 64*nu + m; |
50 |
ku = 1; |
51 |
ln = 1; |
52 |
for( j = 0; j <= (m+1)/2 - 1; j++ ) { |
53 |
t = pi/(2*ln); |
54 |
for( i = 0; i <= ln - 1; i++ ) { |
55 |
ti = i*t; |
56 |
ur[i+ku] = cos( ti ); |
57 |
ui[i+ku] = sin( ti ); |
58 |
} |
59 |
ku = ku + ln; |
60 |
ln = 4*ln; |
61 |
} |
62 |
return; |
63 |
} |
64 |
// --------------------------------------------------------------------- |
65 |
// --- Transformation call to CFFT4: set initial parameters. |
66 |
|
67 |
n = (int)pow( 2, m ); |
68 |
k = ur[0]; |
69 |
mx = k%64; |
70 |
// --------------------------------------------------------------------- |
71 |
// --- Check for invalid input parameters. |
72 |
|
73 |
if ( ( is != 1 && is != -1 ) || m < 2 || m > mx ) { |
74 |
printf( " *** Error in CFFT4: Either U has not been initialised\n" ); |
75 |
printf( " or one of the input parameters IS or M is invalid:\n" ); |
76 |
printf( " is = %5d; m = %5d\n", is, m ); |
77 |
abort(); |
78 |
} |
79 |
// --------------------------------------------------------------------- |
80 |
|
81 |
nu = k/64; |
82 |
n1 = n/4; |
83 |
n2 = 2*n1; |
84 |
n3 = 3*n1; |
85 |
ln = n1; |
86 |
// --------------------------------------------------------------------- |
87 |
// --- For the first two radix-4 iterations, the inner loop increments J |
88 |
// instead of K. Non-unit stride memory access is used but the longer |
89 |
// vector lengths should compensate for that. |
90 |
|
91 |
j0 = 0; |
92 |
j1 = j0 + 1; |
93 |
j2 = j0 + 2; |
94 |
j3 = j0 + 3; |
95 |
k0 = j0; |
96 |
k1 = k0 + n1; |
97 |
k2 = k0 + n2; |
98 |
k3 = k0 + n3; |
99 |
for( j = 0; j <= ln - 1; j++ ) { |
100 |
c0r = xr[j+k0]; |
101 |
c0i = xi[j+k0]; |
102 |
c1r = xr[j+k1]; |
103 |
c1i = xi[j+k1]; |
104 |
c2r = xr[j+k2]; |
105 |
c2i = xi[j+k2]; |
106 |
c3r = xr[j+k3]; |
107 |
c3i = xi[j+k3]; |
108 |
d0r = c0r + c2r; |
109 |
d0i = c0i + c2i; |
110 |
d1r = c0r - c2r; |
111 |
d1i = c0i - c2i; |
112 |
d2r = c1r + c3r; |
113 |
d2i = c1i + c3i; |
114 |
d3r = is*(-c1i + c3i); |
115 |
d3i = is*( c1r - c3r); |
116 |
j4 = 4*j; |
117 |
wr[j4+j0] = d0r + d2r; |
118 |
wi[j4+j0] = d0i + d2i; |
119 |
wr[j4+j1] = d1r + d3r; |
120 |
wi[j4+j1] = d1i + d3i; |
121 |
wr[j4+j2] = d0r - d2r; |
122 |
wi[j4+j2] = d0i - d2i; |
123 |
wr[j4+j3] = d1r - d3r; |
124 |
wi[j4+j3] = d1i - d3i; |
125 |
} |
126 |
// --------------------------------------------------------------------- |
127 |
// --- If M = 2 all transformation work is done, just copy WR/WI into |
128 |
// XR/XI (label L200). |
129 |
// If M = 3, one radix-2 iteration still has to be done (label L300). |
130 |
|
131 |
if ( m == 2 ) goto L200; |
132 |
if ( m == 3 ) goto L300; |
133 |
|
134 |
// --- Else: |
135 |
// ---------------------------------------------------------------------- |
136 |
// --- Perform the second radix-4 iteration. |
137 |
|
138 |
ln = ln/4; |
139 |
for( k = 0; k <= 3; k++ ) { |
140 |
j0 = k; |
141 |
j1 = j0 + 4; |
142 |
j2 = j0 + 8; |
143 |
j3 = j0 + 12; |
144 |
k0 = j0; |
145 |
k1 = k0 + n1; |
146 |
k2 = k0 + n2; |
147 |
k3 = k0 + n3; |
148 |
u1r = ur[k+2]; |
149 |
u1i = is*ui[k+2]; |
150 |
u2r = u1r*u1r - u1i*u1i; |
151 |
u2i = 2.0*u1r*u1i; |
152 |
u3r = u1r*u2r - u1i*u2i; |
153 |
u3i = u1r*u2i + u1i*u2r; |
154 |
for( j = 0; j <= ln - 1; j++ ) { |
155 |
j4 = 4*j; |
156 |
c0r = wr[j4+k0]; |
157 |
c0i = wi[j4+k0]; |
158 |
w1 = wr[j4+k1]; |
159 |
w2 = wi[j4+k1]; |
160 |
c1r = u1r*w1 - u1i*w2; |
161 |
c1i = u1r*w2 + u1i*w1; |
162 |
w1 = wr[j4+k2]; |
163 |
w2 = wi[j4+k2]; |
164 |
c2r = u2r*w1 - u2i*w2; |
165 |
c2i = u2r*w2 + u2i*w1; |
166 |
w1 = wr[j4+k3]; |
167 |
w2 = wi[j4+k3]; |
168 |
c3r = u3r*w1 - u3i*w2; |
169 |
c3i = u3r*w2 + u3i*w1; |
170 |
d0r = c0r + c2r; |
171 |
d0i = c0i + c2i; |
172 |
d1r = c0r - c2r; |
173 |
d1i = c0i - c2i; |
174 |
d2r = c1r + c3r; |
175 |
d2i = c1i + c3i; |
176 |
d3r = is*(-c1i + c3i); |
177 |
d3i = is*( c1r - c3r); |
178 |
j5 = 16*j; |
179 |
xr[j5+j0] = d0r + d2r; |
180 |
xi[j5+j0] = d0i + d2i; |
181 |
xr[j5+j1] = d1r + d3r; |
182 |
xi[j5+j1] = d1i + d3i; |
183 |
xr[j5+j2] = d0r - d2r; |
184 |
xi[j5+j2] = d0i - d2i; |
185 |
xr[j5+j3] = d1r - d3r; |
186 |
xi[j5+j3] = d1i - d3i; |
187 |
} |
188 |
} |
189 |
// --------------------------------------------------------------------- |
190 |
// --- Set parameters for subsequent iterations. |
191 |
|
192 |
ku = 7; |
193 |
ln = ln/4; |
194 |
l1 = 16; |
195 |
l2 = 32; |
196 |
l3 = 48; |
197 |
l4 = 64; |
198 |
// --------------------------------------------------------------------- |
199 |
// --- For all other radix-4 iterations, the inner loops increment K. In |
200 |
// this manner, all array references have unit stride. The radix-4 |
201 |
// iterations are performed in pairs: XR/XI to WR/WI and vice |
202 |
// versa. This eliminates the need to copy data between X and W |
203 |
// after each iteration. |
204 |
// --- If IT = M - 1 then only one radix-2 iteration needs to be done |
205 |
// (label L100). |
206 |
|
207 |
for( it = 4; it <= m - 1; it+= 4 ) { |
208 |
if ( it == m - 1 ) goto L100; |
209 |
for( j = 0; j <= ln - 1; j++ ) { |
210 |
j0 = j*l4; |
211 |
j1 = j0 + l1; |
212 |
j2 = j0 + l2; |
213 |
j3 = j0 + l3; |
214 |
k0 = j*l1; |
215 |
k1 = k0 + n1; |
216 |
k2 = k0 + n2; |
217 |
k3 = k0 + n3; |
218 |
for( k = 0; k <= l1 - 1; k++ ) { |
219 |
u1r = ur[k+ku-1]; |
220 |
u1i = is*ui[k+ku-1]; |
221 |
u2r = u1r*u1r - u1i*u1i; |
222 |
u2i = 2.0*u1r*u1i; |
223 |
u3r = u1r*u2r - u1i*u2i; |
224 |
u3i = u1r*u2i + u1i*u2r; |
225 |
c0r = xr[k+k0]; |
226 |
c0i = xi[k+k0]; |
227 |
x1 = xr[k+k1]; |
228 |
x2 = xi[k+k1]; |
229 |
c1r = u1r*x1 - u1i*x2; |
230 |
c1i = u1r*x2 + u1i*x1; |
231 |
x1 = xr[k+k2]; |
232 |
x2 = xi[k+k2]; |
233 |
c2r = u2r*x1 - u2i*x2; |
234 |
c2i = u2r*x2 + u2i*x1; |
235 |
x1 = xr[k+k3]; |
236 |
x2 = xi[k+k3]; |
237 |
c3r = u3r*x1 - u3i*x2; |
238 |
c3i = u3r*x2 + u3i*x1; |
239 |
d0r = c0r + c2r; |
240 |
d0i = c0i + c2i; |
241 |
d1r = c0r - c2r; |
242 |
d1i = c0i - c2i; |
243 |
d2r = c1r + c3r; |
244 |
d2i = c1i + c3i; |
245 |
d3r = is*(-c1i + c3i); |
246 |
d3i = is*( c1r - c3r); |
247 |
wr[k+j0] = d0r + d2r; |
248 |
wi[k+j0] = d0i + d2i; |
249 |
wr[k+j1] = d1r + d3r; |
250 |
wi[k+j1] = d1i + d3i; |
251 |
wr[k+j2] = d0r - d2r; |
252 |
wi[k+j2] = d0i - d2i; |
253 |
wr[k+j3] = d1r - d3r; |
254 |
wi[k+j3] = d1i - d3i; |
255 |
} |
256 |
} |
257 |
ku = ku + l1; |
258 |
ln = ln/4; |
259 |
l1 = l4; |
260 |
l2 = 2*l1; |
261 |
l3 = 3*l1; |
262 |
l4 = 4*l1; |
263 |
// ---------------------------------------------------------------------- |
264 |
// --- If IT = M - 2 the computation is complete except for copying |
265 |
// WR/WI to XR/XI (label L200). |
266 |
// --- If IT = M - 3 then only one radix-2 iteration remains to be done |
267 |
// (label L300). |
268 |
|
269 |
if ( it == m - 2 ) goto L200; |
270 |
if ( it == m - 3 ) goto L300; |
271 |
|
272 |
for( j = 0; j <= ln - 1; j++ ) { |
273 |
j0 = j*l4; |
274 |
j1 = j0 + l1; |
275 |
j2 = j0 + l2; |
276 |
j3 = j0 + l3; |
277 |
k0 = j*l1; |
278 |
k1 = k0 + n1; |
279 |
k2 = k0 + n2; |
280 |
k3 = k0 + n3; |
281 |
for( k = 0; k <= l1 - 1; k++ ) { |
282 |
u1r = ur[k+ku-1]; |
283 |
u1i = is*ui[k+ku-1]; |
284 |
u2r = u1r*u1r - u1i*u1i; |
285 |
u2i = 2.0*u1r*u1i; |
286 |
u3r = u1r*u2r - u1i*u2i; |
287 |
u3i = u1r*u2i + u1i*u2r; |
288 |
c0r = wr[k+k0]; |
289 |
c0i = wi[k+k0]; |
290 |
w1 = wr[k+k1]; |
291 |
w2 = wi[k+k1]; |
292 |
c1r = u1r*w1 - u1i*w2; |
293 |
c1i = u1r*w2 + u1i*w1; |
294 |
w1 = wr[k+k2]; |
295 |
w2 = wi[k+k2]; |
296 |
c2r = u2r*w1 - u2i*w2; |
297 |
c2i = u2r*w2 + u2i*w1; |
298 |
w1 = wr[k+k3]; |
299 |
w2 = wi[k+k3]; |
300 |
c3r = u3r*w1 - u3i*w2; |
301 |
c3i = u3r*w2 + u3i*w1; |
302 |
d0r = c0r + c2r; |
303 |
d0i = c0i + c2i; |
304 |
d1r = c0r - c2r; |
305 |
d1i = c0i - c2i; |
306 |
d2r = c1r + c3r; |
307 |
d2i = c1i + c3i; |
308 |
d3r = is*(-c1i + c3i); |
309 |
d3i = is*( c1r - c3r); |
310 |
xr[k+j0] = d0r + d2r; |
311 |
xi[k+j0] = d0i + d2i; |
312 |
xr[k+j1] = d1r + d3r; |
313 |
xi[k+j1] = d1i + d3i; |
314 |
xr[k+j2] = d0r - d2r; |
315 |
xi[k+j2] = d0i - d2i; |
316 |
xr[k+j3] = d1r - d3r; |
317 |
xi[k+j3] = d1i - d3i; |
318 |
} |
319 |
} |
320 |
ku = ku + l1; |
321 |
ln = ln/4; |
322 |
l1 = l4; |
323 |
l2 = 2*l1; |
324 |
l3 = 3*l1; |
325 |
l4 = 4*l1; |
326 |
} |
327 |
return; |
328 |
// ---------------------------------------------------------------------- |
329 |
// --- Perform one radix-2 iteration to complete the calculation. This |
330 |
// branch is taken when the last iteration performed above is stored |
331 |
// in XR/XI. |
332 |
|
333 |
L100: l2 = l1/2; |
334 |
j0 = 0; |
335 |
j1 = j0 + l1; |
336 |
j2 = j0 + l2; |
337 |
j3 = j1 + l2; |
338 |
k0 = 0; |
339 |
k1 = k0 + l1; |
340 |
k2 = k0 + l2; |
341 |
k3 = k1 + l2; |
342 |
for( k = 0; k <= l2 - 1; k++ ) { |
343 |
u1r = ur[2*k+ku-1]; |
344 |
u1i = is*ui[2*k+ku-1]; |
345 |
c0r = xr[k+k0]; |
346 |
c0i = xi[k+k0]; |
347 |
x1 = xr[k+k1]; |
348 |
x2 = xi[k+k1]; |
349 |
c1r = u1r*x1 - u1i*x2; |
350 |
c1i = u1r*x2 + u1i*x1; |
351 |
wr[k+j0] = c0r + c1r; |
352 |
wi[k+j0] = c0i + c1i; |
353 |
wr[k+j1] = c0r - c1r; |
354 |
wi[k+j1] = c0i - c1i; |
355 |
u2r = -is*u1i; |
356 |
u2i = is*u1r; |
357 |
c0r = xr[k+k2]; |
358 |
c0i = xi[k+k2]; |
359 |
x1 = xr[k+k3]; |
360 |
x2 = xi[k+k3]; |
361 |
c1r = u2r*x1 - u2i*x2; |
362 |
c1i = u2r*x2 + u2i*x1; |
363 |
wr[k+j2] = c0r + c1r; |
364 |
wi[k+j2] = c0i + c1i; |
365 |
wr[k+j3] = c0r - c1r; |
366 |
wi[k+j3] = c0i - c1i; |
367 |
} |
368 |
L200: for( k = 0; k <= n - 1; k++ ) { |
369 |
xr[k] = wr[k]; |
370 |
xi[k] = wi[k]; |
371 |
} |
372 |
return; |
373 |
// --------------------------------------------------------------------- |
374 |
// --- Perform one radix-2 iteration to complete the calculation. This |
375 |
// branch is taken if the last iteration performed above is stored |
376 |
// in W. If M = 3, the usual settings of L1 and KU do not hold, so |
377 |
// they have to be set in this case. |
378 |
|
379 |
L300: if ( m == 3 ) { |
380 |
l1 = 4; |
381 |
ku = 3; |
382 |
} |
383 |
l2 = l1/2; |
384 |
j0 = 0; |
385 |
j1 = j0 + l1; |
386 |
j2 = j0 + l2; |
387 |
j3 = j1 + l2; |
388 |
k0 = 0; |
389 |
k1 = k0 + l1; |
390 |
k2 = k0 + l2; |
391 |
k3 = k1 + l2; |
392 |
for( k = 0; k <= l2 - 1; k++ ) { |
393 |
u1r = ur[2*k+ku-1]; |
394 |
u1i = is*ui[2*k+ku-1]; |
395 |
c0r = wr[k+k0]; |
396 |
c0i = wi[k+k0]; |
397 |
w1 = wr[k+k1]; |
398 |
w2 = wi[k+k1]; |
399 |
c1r = u1r*w1 - u1i*w2; |
400 |
c1i = u1r*w2 + u1i*w1; |
401 |
xr[k+j0] = c0r + c1r; |
402 |
xi[k+j0] = c0i + c1i; |
403 |
xr[k+j1] = c0r - c1r; |
404 |
xi[k+j1] = c0i - c1i; |
405 |
u2r = -is*u1i; |
406 |
u2i = is*u1r; |
407 |
c0r = wr[k+k2]; |
408 |
c0i = wi[k+k2]; |
409 |
w1 = wr[k+k3]; |
410 |
w2 = wi[k+k3]; |
411 |
c1r = u2r*w1 - u2i*w2; |
412 |
c1i = u2r*w2 + u2i*w1; |
413 |
xr[k+j2] = c0r + c1r; |
414 |
xi[k+j2] = c0i + c1i; |
415 |
xr[k+j3] = c0r - c1r; |
416 |
xi[k+j3] = c0i - c1i; |
417 |
} |
418 |
// --------------------------------------------------------------------- |
419 |
// --- Finished: Return |
420 |
} |