Statistics
| Branch: | Revision:

root / synthbench / euroben-ports / base / C / mod2f / .svn / text-base / cfft4.c.svn-base @ 0:839f52ef7657

History | View | Annotate | Download (13.3 kB)

1
#include <math.h>
2
#include <stdio.h>
3
#include <stdlib.h>
4

    
5
void cfft4( int is, int m, double ur[], double ui[], double xr[], double xi[],
6
            double wr[], double wi[] )
7
/* ----------------------------------------------------------------------
8
! --- CFFT4 performs an N-point complex-to-complex FFT, where N = 2**M.
9
!     On input XR holds the Real part of the complex array;
10
!     XI holds the Imaginary part.
11
!     On output XR contains the Real part of the transformed array.
12
!     XI then contains the Imaginary part of the result.
13
!     WR and WI are work arrays that store Real and Imaginary parts
14
!     of intermediate results.
15
!     UR and UI contain rotation factors for various stages of the
16
!     transformation.
17
!
18
! --- Before doing the actual transformation UR and UI must be
19
!     initialised by calling CFFT4 with IS = 0, and M = MX, where MX
20
!     is the maximum value of M that will be needed for any call of
21
!     CFFT4.
22
!
23
! --- A call to CFFT4 with IS = 1 (or -1) indicates a call to perform
24
!     a FFT with positive (or negative) exponentials.
25
!
26
!     For any call to CFFT4 M must be M >= 2.
27
!     Dimensions of the arrays XR, XI, UR, UI, WR, WI are:
28
!
29
!     XR[N], XI[N], WR[N], WI[N].
30
!     UR[2**MX+1],       UI[2**MX+1]       for MX even.
31
!     UR[2*2**(MX/3)+1], UI[2*2**(MX/3)+1] for MX odd.
32
!
33
!     In this routine we use 8-byte Real elements (doubles).
34
! ---------------------------------------------------------------------- */
35
{
36
   int          i,  it, j,  j0, j1, j2, j3, j4, j5, k , kn, ku, k0,
37
                k1, k2, k3, ln, l1, l2, l3, l4, mx, n , nu, n1, n2, n3;
38
   const double pi = 3.141592653589793;
39
   double       c0r, c0i, c1r, c1i, c2r, c2i, c3r, c3i,
40
                d0r, d0i, d1r, d1i, d2r, d2i, d3r, d3i,
41
                t  , ti , u1r, u1i, u2r, u2i, u3r, u3i,
42
                x1 , x2 , w1 , w2;
43
// ---------------------------------------------------------------------
44
// --- Initialise the U array with sines and cosines in a manner that
45
//     permits stride one access at each FFT iteration.
46

    
47
       if ( is == 0 ) {
48
         nu     = (int)pow( 2, ((m+1)/2*2) )/3;
49
         ur[0] = 64*nu + m;
50
         ku    = 1;
51
         ln    = 1;
52
         for( j = 0; j <= (m+1)/2 - 1; j++ ) {
53
            t = pi/(2*ln);
54
            for( i = 0; i <= ln - 1; i++ ) {
55
               ti       = i*t;
56
               ur[i+ku] = cos( ti );
57
               ui[i+ku] = sin( ti );
58
            }
59
            ku = ku + ln;
60
            ln = 4*ln;
61
         }
62
         return;
63
      }
64
// ---------------------------------------------------------------------
65
// --- Transformation call to CFFT4: set initial parameters.
66

    
67
      n  = (int)pow( 2, m );
68
      k  = ur[0];
69
      mx = k%64;
70
// ---------------------------------------------------------------------
71
// --- Check for invalid input parameters.
72

    
73
      if ( ( is != 1 && is != -1 ) || m < 2 || m > mx ) {
74
         printf( " *** Error in CFFT4: Either U has not been initialised\n" );
75
         printf( "  or one of the input parameters IS or M is invalid:\n" );
76
         printf( " is = %5d; m = %5d\n", is, m );
77
         abort();
78
      }
79
// ---------------------------------------------------------------------
80

    
81
      nu = k/64;
82
      n1 = n/4;
83
      n2 = 2*n1;
84
      n3 = 3*n1;
85
      ln = n1;
86
// ---------------------------------------------------------------------
87
// --- For the first two radix-4 iterations, the inner loop increments J
88
//     instead of K. Non-unit stride memory access is used but the longer
89
//     vector lengths should compensate for that.
90

    
91
      j0 = 0;
92
      j1 = j0 + 1;
93
      j2 = j0 + 2;
94
      j3 = j0 + 3;
95
      k0 = j0;
96
      k1 = k0 + n1;
97
      k2 = k0 + n2;
98
      k3 = k0 + n3;
99
      for( j = 0; j <= ln - 1; j++ ) {
100
         c0r = xr[j+k0];
101
         c0i = xi[j+k0];
102
         c1r = xr[j+k1];
103
         c1i = xi[j+k1];
104
         c2r = xr[j+k2];
105
         c2i = xi[j+k2];
106
         c3r = xr[j+k3];
107
         c3i = xi[j+k3];
108
         d0r = c0r + c2r;
109
         d0i = c0i + c2i;
110
         d1r = c0r - c2r;
111
         d1i = c0i - c2i;
112
         d2r = c1r + c3r;
113
         d2i = c1i + c3i;
114
         d3r = is*(-c1i + c3i);
115
         d3i = is*( c1r - c3r);
116
         j4  = 4*j;
117
         wr[j4+j0] = d0r + d2r;
118
         wi[j4+j0] = d0i + d2i;
119
         wr[j4+j1] = d1r + d3r;
120
         wi[j4+j1] = d1i + d3i;
121
         wr[j4+j2] = d0r - d2r;
122
         wi[j4+j2] = d0i - d2i;
123
         wr[j4+j3] = d1r - d3r;
124
         wi[j4+j3] = d1i - d3i;
125
      }
126
// ---------------------------------------------------------------------
127
// --- If M = 2 all transformation work is done, just copy WR/WI into
128
//     XR/XI (label L200).
129
//     If M = 3, one radix-2 iteration still has to be done (label L300).
130

    
131
      if ( m == 2 ) goto L200;
132
      if ( m == 3 ) goto L300;
133

    
134
// --- Else:
135
// ----------------------------------------------------------------------
136
// --- Perform the second radix-4 iteration.
137

    
138
      ln = ln/4;
139
      for( k = 0; k <= 3; k++ ) {
140
         j0  = k;
141
         j1  = j0 + 4;
142
         j2  = j0 + 8;
143
         j3  = j0 + 12;
144
         k0  = j0;
145
         k1  = k0 + n1;
146
         k2  = k0 + n2;
147
         k3  = k0 + n3;
148
         u1r =    ur[k+2];
149
         u1i = is*ui[k+2];
150
         u2r = u1r*u1r - u1i*u1i;
151
         u2i = 2.0*u1r*u1i;
152
         u3r = u1r*u2r - u1i*u2i;
153
         u3i = u1r*u2i + u1i*u2r;
154
         for( j = 0; j <= ln - 1; j++ ) {
155
            j4  = 4*j;
156
            c0r = wr[j4+k0];
157
            c0i = wi[j4+k0];
158
            w1  = wr[j4+k1];
159
            w2  = wi[j4+k1];
160
            c1r = u1r*w1 - u1i*w2;
161
            c1i = u1r*w2 + u1i*w1;
162
            w1  = wr[j4+k2];
163
            w2  = wi[j4+k2];
164
            c2r = u2r*w1 - u2i*w2;
165
            c2i = u2r*w2 + u2i*w1;
166
            w1  = wr[j4+k3];
167
            w2  = wi[j4+k3];
168
            c3r = u3r*w1 - u3i*w2;
169
            c3i = u3r*w2 + u3i*w1;
170
            d0r = c0r + c2r;
171
            d0i = c0i + c2i;
172
            d1r = c0r - c2r;
173
            d1i = c0i - c2i;
174
            d2r = c1r + c3r;
175
            d2i = c1i + c3i;
176
            d3r = is*(-c1i + c3i);
177
            d3i = is*( c1r - c3r);
178
            j5  = 16*j;
179
            xr[j5+j0] = d0r + d2r;
180
            xi[j5+j0] = d0i + d2i;
181
            xr[j5+j1] = d1r + d3r;
182
            xi[j5+j1] = d1i + d3i;
183
            xr[j5+j2] = d0r - d2r;
184
            xi[j5+j2] = d0i - d2i;
185
            xr[j5+j3] = d1r - d3r;
186
            xi[j5+j3] = d1i - d3i;
187
         }
188
      }
189
// ---------------------------------------------------------------------
190
// --- Set parameters for subsequent iterations.
191

    
192
      ku = 7;
193
      ln = ln/4;
194
      l1 = 16;
195
      l2 = 32;
196
      l3 = 48;
197
      l4 = 64;
198
// ---------------------------------------------------------------------
199
// --- For all other radix-4 iterations, the inner loops increment K. In
200
//     this manner, all array references have unit stride. The radix-4
201
//     iterations are performed in pairs: XR/XI to WR/WI and vice
202
//     versa. This eliminates the need to copy data between X and W
203
//     after each iteration.
204
// --- If IT = M - 1 then only one radix-2 iteration needs to be done
205
//     (label L100).
206

    
207
      for( it = 4; it <= m - 1; it+= 4 ) {
208
         if ( it == m - 1 ) goto L100;
209
         for(  j = 0; j <= ln - 1; j++ ) {
210
            j0 = j*l4;
211
            j1 = j0 + l1;
212
            j2 = j0 + l2;
213
            j3 = j0 + l3;
214
            k0 = j*l1;
215
            k1 = k0 + n1;
216
            k2 = k0 + n2;
217
            k3 = k0 + n3;
218
            for( k = 0; k <= l1 - 1; k++ ) {
219
               u1r =    ur[k+ku-1];
220
               u1i = is*ui[k+ku-1];
221
               u2r = u1r*u1r - u1i*u1i;
222
               u2i = 2.0*u1r*u1i;
223
               u3r = u1r*u2r - u1i*u2i;
224
               u3i = u1r*u2i + u1i*u2r;
225
               c0r = xr[k+k0];
226
               c0i = xi[k+k0];
227
               x1  = xr[k+k1];
228
               x2  = xi[k+k1];
229
               c1r = u1r*x1 - u1i*x2;
230
               c1i = u1r*x2 + u1i*x1;
231
               x1  = xr[k+k2];
232
               x2  = xi[k+k2];
233
               c2r = u2r*x1 - u2i*x2;
234
               c2i = u2r*x2 + u2i*x1;
235
               x1  = xr[k+k3];
236
               x2  = xi[k+k3];
237
               c3r = u3r*x1 - u3i*x2;
238
               c3i = u3r*x2 + u3i*x1;
239
               d0r = c0r + c2r;
240
               d0i = c0i + c2i;
241
               d1r = c0r - c2r;
242
               d1i = c0i - c2i;
243
               d2r = c1r + c3r;
244
               d2i = c1i + c3i;
245
               d3r = is*(-c1i + c3i);
246
               d3i = is*( c1r - c3r);
247
               wr[k+j0] = d0r + d2r;
248
               wi[k+j0] = d0i + d2i;
249
               wr[k+j1] = d1r + d3r;
250
               wi[k+j1] = d1i + d3i;
251
               wr[k+j2] = d0r - d2r;
252
               wi[k+j2] = d0i - d2i;
253
               wr[k+j3] = d1r - d3r;
254
               wi[k+j3] = d1i - d3i;
255
            }
256
         }
257
         ku = ku + l1;
258
         ln = ln/4;
259
         l1 = l4;
260
         l2 = 2*l1;
261
         l3 = 3*l1;
262
         l4 = 4*l1;
263
//  ----------------------------------------------------------------------
264
// --- If IT = M - 2 the computation is complete except for copying
265
//                   WR/WI to XR/XI (label L200).
266
// --- If IT = M - 3 then only one radix-2 iteration remains to be done
267
//                   (label L300).
268

    
269
         if ( it == m - 2 ) goto L200;
270
         if ( it == m - 3 ) goto L300;
271

    
272
         for( j = 0; j <= ln - 1; j++ ) {
273
            j0 = j*l4;
274
            j1 = j0 + l1;
275
            j2 = j0 + l2;
276
            j3 = j0 + l3;
277
            k0 = j*l1;
278
            k1 = k0 + n1;
279
            k2 = k0 + n2;
280
            k3 = k0 + n3;
281
            for( k = 0; k <= l1 - 1; k++ ) {
282
               u1r =    ur[k+ku-1];
283
               u1i = is*ui[k+ku-1];
284
               u2r = u1r*u1r - u1i*u1i;
285
               u2i = 2.0*u1r*u1i;
286
               u3r = u1r*u2r - u1i*u2i;
287
               u3i = u1r*u2i + u1i*u2r;
288
               c0r = wr[k+k0];
289
               c0i = wi[k+k0];
290
               w1  = wr[k+k1];
291
               w2  = wi[k+k1];
292
               c1r = u1r*w1 - u1i*w2;
293
               c1i = u1r*w2 + u1i*w1;
294
               w1  = wr[k+k2];
295
               w2  = wi[k+k2];
296
               c2r = u2r*w1 - u2i*w2;
297
               c2i = u2r*w2 + u2i*w1;
298
               w1  = wr[k+k3];
299
               w2  = wi[k+k3];
300
               c3r = u3r*w1 - u3i*w2;
301
               c3i = u3r*w2 + u3i*w1;
302
               d0r = c0r + c2r;
303
               d0i = c0i + c2i;
304
               d1r = c0r - c2r;
305
               d1i = c0i - c2i;
306
               d2r = c1r + c3r;
307
               d2i = c1i + c3i;
308
               d3r = is*(-c1i + c3i);
309
               d3i = is*( c1r - c3r);
310
               xr[k+j0] = d0r + d2r;
311
               xi[k+j0] = d0i + d2i;
312
               xr[k+j1] = d1r + d3r;
313
               xi[k+j1] = d1i + d3i;
314
               xr[k+j2] = d0r - d2r;
315
               xi[k+j2] = d0i - d2i;
316
               xr[k+j3] = d1r - d3r;
317
               xi[k+j3] = d1i - d3i;
318
            }
319
         }
320
         ku = ku + l1;
321
         ln = ln/4;
322
         l1 = l4;
323
         l2 = 2*l1;
324
         l3 = 3*l1;
325
         l4 = 4*l1;
326
      }
327
      return;
328
//  ----------------------------------------------------------------------
329
// --- Perform one radix-2 iteration to complete the calculation. This
330
//     branch is taken when the last iteration performed above is stored
331
//     in XR/XI.
332

    
333
L100: l2 = l1/2;
334
      j0 = 0;
335
      j1 = j0 + l1;
336
      j2 = j0 + l2;
337
      j3 = j1 + l2;
338
      k0 = 0;
339
      k1 = k0 + l1;
340
      k2 = k0 + l2;
341
      k3 = k1 + l2;
342
      for( k = 0; k <= l2 - 1; k++ ) {
343
         u1r =    ur[2*k+ku-1];
344
         u1i = is*ui[2*k+ku-1];
345
         c0r = xr[k+k0];
346
         c0i = xi[k+k0];
347
         x1  = xr[k+k1];
348
         x2  = xi[k+k1];
349
         c1r = u1r*x1 - u1i*x2;
350
         c1i = u1r*x2 + u1i*x1;
351
         wr[k+j0] = c0r + c1r;
352
         wi[k+j0] = c0i + c1i;
353
         wr[k+j1] = c0r - c1r;
354
         wi[k+j1] = c0i - c1i;
355
         u2r = -is*u1i;
356
         u2i =  is*u1r;
357
         c0r = xr[k+k2];
358
         c0i = xi[k+k2];
359
         x1  = xr[k+k3];
360
         x2  = xi[k+k3];
361
         c1r = u2r*x1 - u2i*x2;
362
         c1i = u2r*x2 + u2i*x1;
363
         wr[k+j2] = c0r + c1r;
364
         wi[k+j2] = c0i + c1i;
365
         wr[k+j3] = c0r - c1r;
366
         wi[k+j3] = c0i - c1i;
367
      }
368
L200: for( k = 0; k <= n - 1; k++ ) {
369
         xr[k] = wr[k];
370
         xi[k] = wi[k];
371
      }
372
      return;
373
// ---------------------------------------------------------------------
374
// --- Perform one radix-2 iteration to complete the calculation. This
375
//     branch is taken if the last iteration performed above is stored
376
//     in W. If M = 3, the usual settings of L1 and KU do not hold, so
377
//     they have to be set in this case.
378

    
379
L300: if ( m == 3 ) {
380
         l1 = 4;
381
         ku = 3; 
382
      }
383
      l2 = l1/2;
384
      j0 = 0;
385
      j1 = j0 + l1;
386
      j2 = j0 + l2;
387
      j3 = j1 + l2;
388
      k0 = 0;
389
      k1 = k0 + l1;
390
      k2 = k0 + l2;
391
      k3 = k1 + l2;
392
      for( k = 0; k <= l2 - 1; k++ ) {
393
         u1r =    ur[2*k+ku-1];
394
         u1i = is*ui[2*k+ku-1];
395
         c0r = wr[k+k0];
396
         c0i = wi[k+k0];
397
         w1  = wr[k+k1];
398
         w2  = wi[k+k1];
399
         c1r = u1r*w1 - u1i*w2;
400
         c1i = u1r*w2 + u1i*w1;
401
         xr[k+j0] = c0r + c1r;
402
         xi[k+j0] = c0i + c1i;
403
         xr[k+j1] = c0r - c1r;
404
         xi[k+j1] = c0i - c1i;
405
         u2r = -is*u1i;
406
         u2i =  is*u1r;
407
         c0r = wr[k+k2];
408
         c0i = wi[k+k2];
409
         w1  = wr[k+k3];
410
         w2  = wi[k+k3];
411
         c1r = u2r*w1 - u2i*w2;
412
         c1i = u2r*w2 + u2i*w1;
413
         xr[k+j2] = c0r + c1r;
414
         xi[k+j2] = c0i + c1i;
415
         xr[k+j3] = c0r - c1r;
416
         xi[k+j3] = c0i - c1i;
417
      }
418
// ---------------------------------------------------------------------
419
// --- Finished: Return
420
}