Statistics
| Branch: | Revision:

root / synthbench / euroben-dm / mod2f / cfft4.f @ 0:839f52ef7657

History | View | Annotate | Download (13.3 kB)

1
      Subroutine cfft4( is, m, ur, ui, xr, xi, wr, wi )
2
! ----------------------------------------------------------------------
3
! --- CFFT4 performs an N-point complex-to-complex FFT, where N = 2**M.
4
!     On input XR holds the Real part of the complex array;
5
!     XI holds the Imaginary part.
6
!     On output XR contains the Real part of the transformed array.
7
!     XI then contains the Imaginary part of the result.
8
!     WR and WI are work arrays that store Real and Imaginary parts
9
!     of intermediate results.
10
!     UR and UI contain rotation factors for various stages of the
11
!     transformation.
12
! 
13
! --- Before doing the actual transformation UR and UI must be
14
!     initialised by calling CFFT4 with IS = 0, and M = MX, where MX
15
!     is the maximum value of M that will be needed for any call of
16
!     CFFT4. 
17
!
18
! --- A call to CFFT4 with IS = 1 (or -1) indicates a call to perform
19
!     a FFT with positive (or negative) exponentials.
20
!
21
!     For any call to CFFT4 M must be M >= 2.
22
!     Dimensions of the arrays XR, XI, UR, UI, WR, WI are:
23
!
24
!     XR(N), XI(N), WR(N), WI(N).
25
!     UR(2**MX+1),       UI(2**MX+1)       for MX even.
26
!     UR(2*2**(MX/3)+1), UI(2*2**(MX/3)+1) for MX odd.     
27
!
28
!     In this routine we use 8-byte Real elements.
29
! ----------------------------------------------------------------------
30
      Use                    numerics
31
      Real(l_)            :: ur(*), ui(*), xr(*), xi(*), wr(*), wi(*)
32
      Integer             :: is, m
33

    
34
      Integer             :: i,  it, j,  j0, j1, j2, j3, j4, j5, k , kn,
35
     &                       ku, k0, k1, k2, k3, ln, l1, l2, l3, l4, mx,
36
     &                       n , nu, n1, n2, n3
37
      Real(l_), Parameter :: pi = 3.141592653589793_l_
38
      Real(l_)            :: c0r, c0i, c1r, c1i, c2r, c2i, c3r, c3i,
39
     &                       d0r, d0i, d1r, d1i, d2r, d2i, d3r, d3i,
40
     &                       t  , ti , u1r, u1i, u2r, u2i, u3r, u3i,
41
     &                       x1 , x2 , w1 , w2
42
! ----------------------------------------------------------------------
43
!
44
      If ( is == 0 ) Then
45
! ----------------------------------------------------------------------
46
! --- Initialise the U array with sines and cosines in a manner that
47
!     permits stride one access at each FFT iteration.
48

    
49
         nu    = 2**((m+1)/2*2)/3
50
         ur(1) = 64*nu + m
51
         ku    = 2
52
         ln    = 1
53

    
54
         Do j = 0, (m+1)/2 - 1
55
            t = pi/(2*ln)
56
cdir$ ivdep
57
            Do i = 0, ln - 1
58
               ti       = i*t
59
               ur(i+ku) = Cos( ti )
60
               ui(i+ku) = Sin( ti )
61
            End Do
62
            ku = ku + ln
63
            ln = 4*ln
64
         End Do
65
         Return
66
      End If
67
! ----------------------------------------------------------------------
68
! --- Transformation call to CFFT4: set initial parameters.
69

    
70
      n  = 2**m
71
      k  = ur(1)
72
      mx = Mod( k, 64 )
73
! ----------------------------------------------------------------------
74
! --- Check for invalid input parameters.
75

    
76
      If ( ( is /= 1 .AND. is /= -1 ) .OR. m < 2 .OR. m > mx ) Then
77
         Print 1000, is, m
78
         Stop
79
      End If
80
! ----------------------------------------------------------------------
81
!
82
      nu = k/64
83
      n1 = n/4
84
      n2 = 2*n1
85
      n3 = 3*n1
86
      ln = n1
87
! ----------------------------------------------------------------------
88
! --- For the first two radix-4 iterations, the inner loop increments J
89
!     instead of K. Non-unit stride memory access is used but the longer
90
!     vector lengths should compensate for that.
91

    
92
      j0 = 1
93
      j1 = j0 + 1
94
      j2 = j0 + 2
95
      j3 = j0 + 3
96
      k0 = j0
97
      k1 = k0 + n1
98
      k2 = k0 + n2
99
      k3 = k0 + n3
100
cdir$ ivdep
101
      Do j = 0, ln - 1
102
         c0r = xr(j+k0)
103
         c0i = xi(j+k0)
104
         c1r = xr(j+k1)
105
         c1i = xi(j+k1)
106
         c2r = xr(j+k2)
107
         c2i = xi(j+k2)
108
         c3r = xr(j+k3)
109
         c3i = xi(j+k3)
110
         d0r = c0r + c2r
111
         d0i = c0i + c2i
112
         d1r = c0r - c2r
113
         d1i = c0i - c2i
114
         d2r = c1r + c3r
115
         d2i = c1i + c3i
116
         d3r = is*(-c1i + c3i)
117
         d3i = is*( c1r - c3r)
118
         j4  = 4*j
119
         wr(j4+j0) = d0r + d2r
120
         wi(j4+j0) = d0i + d2i
121
         wr(j4+j1) = d1r + d3r
122
         wi(j4+j1) = d1i + d3i
123
         wr(j4+j2) = d0r - d2r
124
         wi(j4+j2) = d0i - d2i
125
         wr(j4+j3) = d1r - d3r
126
         wi(j4+j3) = d1i - d3i
127
      End Do
128
! ----------------------------------------------------------------------
129
! --- If M = 2 all transformation work is done, just copy WR/WI into
130
!     XR/XI (label 200). 
131
!     If M = 3, one radix-2 iteration still has to be done (label 300).
132

    
133
      If ( m == 2 ) Go To 200
134
      If ( m == 3 ) Go To 300
135

    
136
! --- Else:
137
! ----------------------------------------------------------------------
138
! --- Perform the second radix-4 iteration.
139

    
140
      ln = ln/4
141
      Do k = 0, 3
142
         j0  = k + 1
143
         j1  = j0 + 4
144
         j2  = j0 + 8
145
         j3  = j0 + 12
146
         k0  = j0
147
         k1  = k0 + n1
148
         k2  = k0 + n2
149
         k3  = k0 + n3
150
         u1r =    ur(k+3)
151
         u1i = is*ui(k+3)
152
         u2r = u1r*u1r - u1i*u1i
153
         u2i = 2.0D0*u1r*u1i
154
         u3r = u1r*u2r - u1i*u2i
155
         u3i = u1r*u2i + u1i*u2r
156
cdir$ ivdep
157
         Do j = 0, ln - 1
158
            j4  = 4*j
159
            c0r = wr(j4+k0)
160
            c0i = wi(j4+k0)
161
            w1  = wr(j4+k1)
162
            w2  = wi(j4+k1)
163
            c1r = u1r*w1 - u1i*w2
164
            c1i = u1r*w2 + u1i*w1
165
            w1  = wr(j4+k2)
166
            w2  = wi(j4+k2)
167
            c2r = u2r*w1 - u2i*w2
168
            c2i = u2r*w2 + u2i*w1
169
            w1  = wr(j4+k3)
170
            w2  = wi(j4+k3)
171
            c3r = u3r*w1 - u3i*w2
172
            c3i = u3r*w2 + u3i*w1
173
            d0r = c0r + c2r
174
            d0i = c0i + c2i
175
            d1r = c0r - c2r
176
            d1i = c0i - c2i
177
            d2r = c1r + c3r
178
            d2i = c1i + c3i
179
            d3r = is*(-c1i + c3i)
180
            d3i = is*( c1r - c3r)
181
            j5  = 16*j
182
            xr(j5+j0) = d0r + d2r
183
            xi(j5+j0) = d0i + d2i
184
            xr(j5+j1) = d1r + d3r
185
            xi(j5+j1) = d1i + d3i
186
            xr(j5+j2) = d0r - d2r
187
            xi(j5+j2) = d0i - d2i
188
            xr(j5+j3) = d1r - d3r
189
            xi(j5+j3) = d1i - d3i
190
         End Do
191
      End Do
192
! ----------------------------------------------------------------------
193
! --- Set parameters for subsequent iterations.
194

    
195
      ku = 7
196
      ln = ln/4
197
      l1 = 16
198
      l2 = 32
199
      l3 = 48
200
      l4 = 64
201
! ----------------------------------------------------------------------
202
! --- For all other radix-4 iterations, the inner loops increment K. In
203
!     this manner, all array references have unit stride. The radix-4 
204
!     iterations are performed in pairs: XR/XI to WR/WI and vice 
205
!     versa. This eliminates the need to copy data between X and W
206
!     after each iteration.
207
! --- If IT = M - 1 then only one radix-2 iteration needs to be done
208
!     (label 100).
209

    
210
      Do it = 4, m - 1, 4
211

    
212
         If ( it == m - 1 ) Go To 100
213

    
214
         Do j = 0, ln - 1
215
            j0 = j*l4 + 1
216
            j1 = j0 + l1
217
            j2 = j0 + l2
218
            j3 = j0 + l3
219
            k0 = j*l1 + 1
220
            k1 = k0 + n1
221
            k2 = k0 + n2
222
            k3 = k0 + n3
223
cdir$ ivdep
224
            Do k = 0, l1 - 1
225
               u1r =    ur(k+ku)
226
               u1i = is*ui(k+ku)
227
               u2r = u1r*u1r - u1i*u1i
228
               u2i = 2.0D0*u1r*u1i
229
               u3r = u1r*u2r - u1i*u2i
230
               u3i = u1r*u2i + u1i*u2r
231
               c0r = xr(k+k0)
232
               c0i = xi(k+k0)
233
               x1  = xr(k+k1)
234
               x2  = xi(k+k1)
235
               c1r = u1r*x1 - u1i*x2
236
               c1i = u1r*x2 + u1i*x1
237
               x1  = xr(k+k2)
238
               x2  = xi(k+k2)
239
               c2r = u2r*x1 - u2i*x2
240
               c2i = u2r*x2 + u2i*x1
241
               x1  = xr(k+k3)
242
               x2  = xi(k+k3)
243
               c3r = u3r*x1 - u3i*x2
244
               c3i = u3r*x2 + u3i*x1
245
               d0r = c0r + c2r
246
               d0i = c0i + c2i
247
               d1r = c0r - c2r
248
               d1i = c0i - c2i
249
               d2r = c1r + c3r
250
               d2i = c1i + c3i
251
               d3r = is*(-c1i + c3i)
252
               d3i = is*( c1r - c3r)
253
               wr(k+j0) = d0r + d2r
254
               wi(k+j0) = d0i + d2i
255
               wr(k+j1) = d1r + d3r
256
               wi(k+j1) = d1i + d3i
257
               wr(k+j2) = d0r - d2r
258
               wi(k+j2) = d0i - d2i
259
               wr(k+j3) = d1r - d3r
260
               wi(k+j3) = d1i - d3i
261
            End Do
262
         End Do
263

    
264
         ku = ku + l1
265
         ln = ln/4
266
         l1 = l4
267
         l2 = 2*l1
268
         l3 = 3*l1
269
         l4 = 4*l1
270
! ----------------------------------------------------------------------
271
! --- If IT = M - 2 the computation is complete except for copying
272
!                   WR/WI to XR/XI (label 200).
273
! --- If IT = M - 3 then only one radix-2 iteration remains to be done
274
!                   (label 300).
275

    
276
         If ( it == m - 2 ) Go To 200
277
         If ( it == m - 3 ) Go To 300
278
!
279
         Do j = 0, ln - 1
280
            j0 = j*l4 + 1
281
            j1 = j0 + l1
282
            j2 = j0 + l2
283
            j3 = j0 + l3
284
            k0 = j*l1 + 1
285
            k1 = k0 + n1
286
            k2 = k0 + n2
287
            k3 = k0 + n3
288
cdir$ ivdep
289
            Do k = 0, l1 - 1
290
               u1r =    ur(k+ku)
291
               u1i = is*ui(k+ku)
292
               u2r = u1r*u1r - u1i*u1i
293
               u2i = 2.0D0*u1r*u1i
294
               u3r = u1r*u2r - u1i*u2i
295
               u3i = u1r*u2i + u1i*u2r
296
               c0r = wr(k+k0)
297
               c0i = wi(k+k0)
298
               w1  = wr(k+k1)
299
               w2  = wi(k+k1)
300
               c1r = u1r*w1 - u1i*w2
301
               c1i = u1r*w2 + u1i*w1
302
               w1  = wr(k+k2)
303
               w2  = wi(k+k2)
304
               c2r = u2r*w1 - u2i*w2
305
               c2i = u2r*w2 + u2i*w1
306
               w1  = wr(k+k3)
307
               w2  = wi(k+k3)
308
               c3r = u3r*w1 - u3i*w2
309
               c3i = u3r*w2 + u3i*w1
310
               d0r = c0r + c2r
311
               d0i = c0i + c2i
312
               d1r = c0r - c2r
313
               d1i = c0i - c2i
314
               d2r = c1r + c3r
315
               d2i = c1i + c3i
316
               d3r = is*(-c1i + c3i)
317
               d3i = is*( c1r - c3r)
318
               xr(k+j0) = d0r + d2r
319
               xi(k+j0) = d0i + d2i
320
               xr(k+j1) = d1r + d3r
321
               xi(k+j1) = d1i + d3i
322
               xr(k+j2) = d0r - d2r
323
               xi(k+j2) = d0i - d2i
324
               xr(k+j3) = d1r - d3r
325
               xi(k+j3) = d1i - d3i
326
            End Do
327
         End Do
328
         ku = ku + l1
329
         ln = ln/4
330
         l1 = l4
331
         l2 = 2*l1
332
         l3 = 3*l1
333
         l4 = 4*l1
334
      End Do
335
      Go To 400
336
! ----------------------------------------------------------------------
337
! --- Perform one radix-2 iteration to complete the calculation. This
338
!     branch is taken when the last iteration performed above is stored
339
!     in XR/XI.
340

    
341
  100 l2 = l1/2
342
      j0 = 1
343
      j1 = j0 + l1
344
      j2 = j0 + l2
345
      j3 = j1 + l2
346
      k0 = 1
347
      k1 = k0 + l1
348
      k2 = k0 + l2
349
      k3 = k1 + l2
350
cdir$ ivdep
351
      Do k = 0, l2 - 1
352
         u1r =    ur(2*k + ku)
353
         u1i = is*ui(2*k + ku)
354
         c0r = xr(k+k0)
355
         c0i = xi(k+k0)
356
         x1  = xr(k+k1)
357
         x2  = xi(k+k1)
358
         c1r = u1r*x1 - u1i*x2
359
         c1i = u1r*x2 + u1i*x1
360
         wr(k+j0) = c0r + c1r
361
         wi(k+j0) = c0i + c1i
362
         wr(k+j1) = c0r - c1r
363
         wi(k+j1) = c0i - c1i
364
         u2r = -is*u1i
365
         u2i =  is*u1r
366
         c0r = xr(k+k2)
367
         c0i = xi(k+k2)
368
         x1  = xr(k+k3)
369
         x2  = xi(k+k3)
370
         c1r = u2r*x1 - u2i*x2
371
         c1i = u2r*x2 + u2i*x1
372
         wr(k+j2) = c0r + c1r
373
         wi(k+j2) = c0i + c1i
374
         wr(k+j3) = c0r - c1r
375
         wi(k+j3) = c0i - c1i
376
      End Do
377

    
378
  200 Do k = 1, n
379
         xr(k) = wr(k)
380
         xi(k) = wi(k)
381
      End Do
382
      Go To 400
383
! ----------------------------------------------------------------------
384
! --- Perform one radix-2 iteration to complete the calculation. This
385
!     branch is taken if the last iteration performed above is stored
386
!     in W. If M = 3, the usual settings of L1 and KU do not hold, so
387
!     they have to be set in this case.
388

    
389
  300 If ( m == 3 ) Then
390
         l1 = 4
391
         ku = 3
392
      End If
393
      l2 = l1/2
394
      j0 = 1
395
      j1 = j0 + l1
396
      j2 = j0 + l2
397
      j3 = j1 + l2
398
      k0 = 1
399
      k1 = k0 + l1
400
      k2 = k0 + l2
401
      k3 = k1 + l2
402
cdir$ ivdep
403
      Do k = 0, l2 - 1
404
         u1r =    ur(2*k + ku)
405
         u1i = is*ui(2*k + ku)
406
         c0r = wr(k+k0)
407
         c0i = wi(k+k0)
408
         w1  = wr(k+k1)
409
         w2  = wi(k+k1)
410
         c1r = u1r*w1 - u1i*w2
411
         c1i = u1r*w2 + u1i*w1
412
         xr(k+j0) = c0r + c1r
413
         xi(k+j0) = c0i + c1i
414
         xr(k+j1) = c0r - c1r
415
         xi(k+j1) = c0i - c1i
416
         u2r = -is*u1i
417
         u2i =  is*u1r
418
         c0r = wr(k+k2)
419
         c0i = wi(k+k2)
420
         w1  = wr(k+k3)
421
         w2  = wi(k+k3)
422
         c1r = u2r*w1 - u2i*w2
423
         c1i = u2r*w2 + u2i*w1
424
         xr(k+j2) = c0r + c1r
425
         xi(k+j2) = c0i + c1i
426
         xr(k+j3) = c0r - c1r
427
         xi(k+j3) = c0i - c1i
428
      End Do
429
! ----------------------------------------------------------------------
430
! --- Finished: Return
431
 
432
  400 Return
433
! ----------------------------------------------------------------------
434
! --- Formats:
435

    
436
 1000 Format(' *** Error in CFFT4: Either U has not been initialised',/
437
     &       ,' or one of the input parameters IS or M is invalid:',/
438
     &       ,' IS = ', I5, '; M = ', I5)
439
! ----------------------------------------------------------------------
440
      End Subroutine cfft4