root / synthbench / euroben-dm / mod2f / cfft4.f @ 0:839f52ef7657
History | View | Annotate | Download (13.3 kB)
1 |
Subroutine cfft4( is, m, ur, ui, xr, xi, wr, wi ) |
---|---|
2 |
! ---------------------------------------------------------------------- |
3 |
! --- CFFT4 performs an N-point complex-to-complex FFT, where N = 2**M. |
4 |
! On input XR holds the Real part of the complex array; |
5 |
! XI holds the Imaginary part. |
6 |
! On output XR contains the Real part of the transformed array. |
7 |
! XI then contains the Imaginary part of the result. |
8 |
! WR and WI are work arrays that store Real and Imaginary parts |
9 |
! of intermediate results. |
10 |
! UR and UI contain rotation factors for various stages of the |
11 |
! transformation. |
12 |
! |
13 |
! --- Before doing the actual transformation UR and UI must be |
14 |
! initialised by calling CFFT4 with IS = 0, and M = MX, where MX |
15 |
! is the maximum value of M that will be needed for any call of |
16 |
! CFFT4. |
17 |
! |
18 |
! --- A call to CFFT4 with IS = 1 (or -1) indicates a call to perform |
19 |
! a FFT with positive (or negative) exponentials. |
20 |
! |
21 |
! For any call to CFFT4 M must be M >= 2. |
22 |
! Dimensions of the arrays XR, XI, UR, UI, WR, WI are: |
23 |
! |
24 |
! XR(N), XI(N), WR(N), WI(N). |
25 |
! UR(2**MX+1), UI(2**MX+1) for MX even. |
26 |
! UR(2*2**(MX/3)+1), UI(2*2**(MX/3)+1) for MX odd. |
27 |
! |
28 |
! In this routine we use 8-byte Real elements. |
29 |
! ---------------------------------------------------------------------- |
30 |
Use numerics |
31 |
Real(l_) :: ur(*), ui(*), xr(*), xi(*), wr(*), wi(*) |
32 |
Integer :: is, m |
33 |
|
34 |
Integer :: i, it, j, j0, j1, j2, j3, j4, j5, k , kn, |
35 |
& ku, k0, k1, k2, k3, ln, l1, l2, l3, l4, mx, |
36 |
& n , nu, n1, n2, n3 |
37 |
Real(l_), Parameter :: pi = 3.141592653589793_l_ |
38 |
Real(l_) :: c0r, c0i, c1r, c1i, c2r, c2i, c3r, c3i, |
39 |
& d0r, d0i, d1r, d1i, d2r, d2i, d3r, d3i, |
40 |
& t , ti , u1r, u1i, u2r, u2i, u3r, u3i, |
41 |
& x1 , x2 , w1 , w2 |
42 |
! ---------------------------------------------------------------------- |
43 |
! |
44 |
If ( is == 0 ) Then |
45 |
! ---------------------------------------------------------------------- |
46 |
! --- Initialise the U array with sines and cosines in a manner that |
47 |
! permits stride one access at each FFT iteration. |
48 |
|
49 |
nu = 2**((m+1)/2*2)/3 |
50 |
ur(1) = 64*nu + m |
51 |
ku = 2 |
52 |
ln = 1 |
53 |
|
54 |
Do j = 0, (m+1)/2 - 1 |
55 |
t = pi/(2*ln) |
56 |
cdir$ ivdep |
57 |
Do i = 0, ln - 1 |
58 |
ti = i*t |
59 |
ur(i+ku) = Cos( ti ) |
60 |
ui(i+ku) = Sin( ti ) |
61 |
End Do |
62 |
ku = ku + ln |
63 |
ln = 4*ln |
64 |
End Do |
65 |
Return |
66 |
End If |
67 |
! ---------------------------------------------------------------------- |
68 |
! --- Transformation call to CFFT4: set initial parameters. |
69 |
|
70 |
n = 2**m |
71 |
k = ur(1) |
72 |
mx = Mod( k, 64 ) |
73 |
! ---------------------------------------------------------------------- |
74 |
! --- Check for invalid input parameters. |
75 |
|
76 |
If ( ( is /= 1 .AND. is /= -1 ) .OR. m < 2 .OR. m > mx ) Then |
77 |
Print 1000, is, m |
78 |
Stop |
79 |
End If |
80 |
! ---------------------------------------------------------------------- |
81 |
! |
82 |
nu = k/64 |
83 |
n1 = n/4 |
84 |
n2 = 2*n1 |
85 |
n3 = 3*n1 |
86 |
ln = n1 |
87 |
! ---------------------------------------------------------------------- |
88 |
! --- For the first two radix-4 iterations, the inner loop increments J |
89 |
! instead of K. Non-unit stride memory access is used but the longer |
90 |
! vector lengths should compensate for that. |
91 |
|
92 |
j0 = 1 |
93 |
j1 = j0 + 1 |
94 |
j2 = j0 + 2 |
95 |
j3 = j0 + 3 |
96 |
k0 = j0 |
97 |
k1 = k0 + n1 |
98 |
k2 = k0 + n2 |
99 |
k3 = k0 + n3 |
100 |
cdir$ ivdep |
101 |
Do j = 0, ln - 1 |
102 |
c0r = xr(j+k0) |
103 |
c0i = xi(j+k0) |
104 |
c1r = xr(j+k1) |
105 |
c1i = xi(j+k1) |
106 |
c2r = xr(j+k2) |
107 |
c2i = xi(j+k2) |
108 |
c3r = xr(j+k3) |
109 |
c3i = xi(j+k3) |
110 |
d0r = c0r + c2r |
111 |
d0i = c0i + c2i |
112 |
d1r = c0r - c2r |
113 |
d1i = c0i - c2i |
114 |
d2r = c1r + c3r |
115 |
d2i = c1i + c3i |
116 |
d3r = is*(-c1i + c3i) |
117 |
d3i = is*( c1r - c3r) |
118 |
j4 = 4*j |
119 |
wr(j4+j0) = d0r + d2r |
120 |
wi(j4+j0) = d0i + d2i |
121 |
wr(j4+j1) = d1r + d3r |
122 |
wi(j4+j1) = d1i + d3i |
123 |
wr(j4+j2) = d0r - d2r |
124 |
wi(j4+j2) = d0i - d2i |
125 |
wr(j4+j3) = d1r - d3r |
126 |
wi(j4+j3) = d1i - d3i |
127 |
End Do |
128 |
! ---------------------------------------------------------------------- |
129 |
! --- If M = 2 all transformation work is done, just copy WR/WI into |
130 |
! XR/XI (label 200). |
131 |
! If M = 3, one radix-2 iteration still has to be done (label 300). |
132 |
|
133 |
If ( m == 2 ) Go To 200 |
134 |
If ( m == 3 ) Go To 300 |
135 |
|
136 |
! --- Else: |
137 |
! ---------------------------------------------------------------------- |
138 |
! --- Perform the second radix-4 iteration. |
139 |
|
140 |
ln = ln/4 |
141 |
Do k = 0, 3 |
142 |
j0 = k + 1 |
143 |
j1 = j0 + 4 |
144 |
j2 = j0 + 8 |
145 |
j3 = j0 + 12 |
146 |
k0 = j0 |
147 |
k1 = k0 + n1 |
148 |
k2 = k0 + n2 |
149 |
k3 = k0 + n3 |
150 |
u1r = ur(k+3) |
151 |
u1i = is*ui(k+3) |
152 |
u2r = u1r*u1r - u1i*u1i |
153 |
u2i = 2.0D0*u1r*u1i |
154 |
u3r = u1r*u2r - u1i*u2i |
155 |
u3i = u1r*u2i + u1i*u2r |
156 |
cdir$ ivdep |
157 |
Do j = 0, ln - 1 |
158 |
j4 = 4*j |
159 |
c0r = wr(j4+k0) |
160 |
c0i = wi(j4+k0) |
161 |
w1 = wr(j4+k1) |
162 |
w2 = wi(j4+k1) |
163 |
c1r = u1r*w1 - u1i*w2 |
164 |
c1i = u1r*w2 + u1i*w1 |
165 |
w1 = wr(j4+k2) |
166 |
w2 = wi(j4+k2) |
167 |
c2r = u2r*w1 - u2i*w2 |
168 |
c2i = u2r*w2 + u2i*w1 |
169 |
w1 = wr(j4+k3) |
170 |
w2 = wi(j4+k3) |
171 |
c3r = u3r*w1 - u3i*w2 |
172 |
c3i = u3r*w2 + u3i*w1 |
173 |
d0r = c0r + c2r |
174 |
d0i = c0i + c2i |
175 |
d1r = c0r - c2r |
176 |
d1i = c0i - c2i |
177 |
d2r = c1r + c3r |
178 |
d2i = c1i + c3i |
179 |
d3r = is*(-c1i + c3i) |
180 |
d3i = is*( c1r - c3r) |
181 |
j5 = 16*j |
182 |
xr(j5+j0) = d0r + d2r |
183 |
xi(j5+j0) = d0i + d2i |
184 |
xr(j5+j1) = d1r + d3r |
185 |
xi(j5+j1) = d1i + d3i |
186 |
xr(j5+j2) = d0r - d2r |
187 |
xi(j5+j2) = d0i - d2i |
188 |
xr(j5+j3) = d1r - d3r |
189 |
xi(j5+j3) = d1i - d3i |
190 |
End Do |
191 |
End Do |
192 |
! ---------------------------------------------------------------------- |
193 |
! --- Set parameters for subsequent iterations. |
194 |
|
195 |
ku = 7 |
196 |
ln = ln/4 |
197 |
l1 = 16 |
198 |
l2 = 32 |
199 |
l3 = 48 |
200 |
l4 = 64 |
201 |
! ---------------------------------------------------------------------- |
202 |
! --- For all other radix-4 iterations, the inner loops increment K. In |
203 |
! this manner, all array references have unit stride. The radix-4 |
204 |
! iterations are performed in pairs: XR/XI to WR/WI and vice |
205 |
! versa. This eliminates the need to copy data between X and W |
206 |
! after each iteration. |
207 |
! --- If IT = M - 1 then only one radix-2 iteration needs to be done |
208 |
! (label 100). |
209 |
|
210 |
Do it = 4, m - 1, 4 |
211 |
|
212 |
If ( it == m - 1 ) Go To 100 |
213 |
|
214 |
Do j = 0, ln - 1 |
215 |
j0 = j*l4 + 1 |
216 |
j1 = j0 + l1 |
217 |
j2 = j0 + l2 |
218 |
j3 = j0 + l3 |
219 |
k0 = j*l1 + 1 |
220 |
k1 = k0 + n1 |
221 |
k2 = k0 + n2 |
222 |
k3 = k0 + n3 |
223 |
cdir$ ivdep |
224 |
Do k = 0, l1 - 1 |
225 |
u1r = ur(k+ku) |
226 |
u1i = is*ui(k+ku) |
227 |
u2r = u1r*u1r - u1i*u1i |
228 |
u2i = 2.0D0*u1r*u1i |
229 |
u3r = u1r*u2r - u1i*u2i |
230 |
u3i = u1r*u2i + u1i*u2r |
231 |
c0r = xr(k+k0) |
232 |
c0i = xi(k+k0) |
233 |
x1 = xr(k+k1) |
234 |
x2 = xi(k+k1) |
235 |
c1r = u1r*x1 - u1i*x2 |
236 |
c1i = u1r*x2 + u1i*x1 |
237 |
x1 = xr(k+k2) |
238 |
x2 = xi(k+k2) |
239 |
c2r = u2r*x1 - u2i*x2 |
240 |
c2i = u2r*x2 + u2i*x1 |
241 |
x1 = xr(k+k3) |
242 |
x2 = xi(k+k3) |
243 |
c3r = u3r*x1 - u3i*x2 |
244 |
c3i = u3r*x2 + u3i*x1 |
245 |
d0r = c0r + c2r |
246 |
d0i = c0i + c2i |
247 |
d1r = c0r - c2r |
248 |
d1i = c0i - c2i |
249 |
d2r = c1r + c3r |
250 |
d2i = c1i + c3i |
251 |
d3r = is*(-c1i + c3i) |
252 |
d3i = is*( c1r - c3r) |
253 |
wr(k+j0) = d0r + d2r |
254 |
wi(k+j0) = d0i + d2i |
255 |
wr(k+j1) = d1r + d3r |
256 |
wi(k+j1) = d1i + d3i |
257 |
wr(k+j2) = d0r - d2r |
258 |
wi(k+j2) = d0i - d2i |
259 |
wr(k+j3) = d1r - d3r |
260 |
wi(k+j3) = d1i - d3i |
261 |
End Do |
262 |
End Do |
263 |
|
264 |
ku = ku + l1 |
265 |
ln = ln/4 |
266 |
l1 = l4 |
267 |
l2 = 2*l1 |
268 |
l3 = 3*l1 |
269 |
l4 = 4*l1 |
270 |
! ---------------------------------------------------------------------- |
271 |
! --- If IT = M - 2 the computation is complete except for copying |
272 |
! WR/WI to XR/XI (label 200). |
273 |
! --- If IT = M - 3 then only one radix-2 iteration remains to be done |
274 |
! (label 300). |
275 |
|
276 |
If ( it == m - 2 ) Go To 200 |
277 |
If ( it == m - 3 ) Go To 300 |
278 |
! |
279 |
Do j = 0, ln - 1 |
280 |
j0 = j*l4 + 1 |
281 |
j1 = j0 + l1 |
282 |
j2 = j0 + l2 |
283 |
j3 = j0 + l3 |
284 |
k0 = j*l1 + 1 |
285 |
k1 = k0 + n1 |
286 |
k2 = k0 + n2 |
287 |
k3 = k0 + n3 |
288 |
cdir$ ivdep |
289 |
Do k = 0, l1 - 1 |
290 |
u1r = ur(k+ku) |
291 |
u1i = is*ui(k+ku) |
292 |
u2r = u1r*u1r - u1i*u1i |
293 |
u2i = 2.0D0*u1r*u1i |
294 |
u3r = u1r*u2r - u1i*u2i |
295 |
u3i = u1r*u2i + u1i*u2r |
296 |
c0r = wr(k+k0) |
297 |
c0i = wi(k+k0) |
298 |
w1 = wr(k+k1) |
299 |
w2 = wi(k+k1) |
300 |
c1r = u1r*w1 - u1i*w2 |
301 |
c1i = u1r*w2 + u1i*w1 |
302 |
w1 = wr(k+k2) |
303 |
w2 = wi(k+k2) |
304 |
c2r = u2r*w1 - u2i*w2 |
305 |
c2i = u2r*w2 + u2i*w1 |
306 |
w1 = wr(k+k3) |
307 |
w2 = wi(k+k3) |
308 |
c3r = u3r*w1 - u3i*w2 |
309 |
c3i = u3r*w2 + u3i*w1 |
310 |
d0r = c0r + c2r |
311 |
d0i = c0i + c2i |
312 |
d1r = c0r - c2r |
313 |
d1i = c0i - c2i |
314 |
d2r = c1r + c3r |
315 |
d2i = c1i + c3i |
316 |
d3r = is*(-c1i + c3i) |
317 |
d3i = is*( c1r - c3r) |
318 |
xr(k+j0) = d0r + d2r |
319 |
xi(k+j0) = d0i + d2i |
320 |
xr(k+j1) = d1r + d3r |
321 |
xi(k+j1) = d1i + d3i |
322 |
xr(k+j2) = d0r - d2r |
323 |
xi(k+j2) = d0i - d2i |
324 |
xr(k+j3) = d1r - d3r |
325 |
xi(k+j3) = d1i - d3i |
326 |
End Do |
327 |
End Do |
328 |
ku = ku + l1 |
329 |
ln = ln/4 |
330 |
l1 = l4 |
331 |
l2 = 2*l1 |
332 |
l3 = 3*l1 |
333 |
l4 = 4*l1 |
334 |
End Do |
335 |
Go To 400 |
336 |
! ---------------------------------------------------------------------- |
337 |
! --- Perform one radix-2 iteration to complete the calculation. This |
338 |
! branch is taken when the last iteration performed above is stored |
339 |
! in XR/XI. |
340 |
|
341 |
100 l2 = l1/2 |
342 |
j0 = 1 |
343 |
j1 = j0 + l1 |
344 |
j2 = j0 + l2 |
345 |
j3 = j1 + l2 |
346 |
k0 = 1 |
347 |
k1 = k0 + l1 |
348 |
k2 = k0 + l2 |
349 |
k3 = k1 + l2 |
350 |
cdir$ ivdep |
351 |
Do k = 0, l2 - 1 |
352 |
u1r = ur(2*k + ku) |
353 |
u1i = is*ui(2*k + ku) |
354 |
c0r = xr(k+k0) |
355 |
c0i = xi(k+k0) |
356 |
x1 = xr(k+k1) |
357 |
x2 = xi(k+k1) |
358 |
c1r = u1r*x1 - u1i*x2 |
359 |
c1i = u1r*x2 + u1i*x1 |
360 |
wr(k+j0) = c0r + c1r |
361 |
wi(k+j0) = c0i + c1i |
362 |
wr(k+j1) = c0r - c1r |
363 |
wi(k+j1) = c0i - c1i |
364 |
u2r = -is*u1i |
365 |
u2i = is*u1r |
366 |
c0r = xr(k+k2) |
367 |
c0i = xi(k+k2) |
368 |
x1 = xr(k+k3) |
369 |
x2 = xi(k+k3) |
370 |
c1r = u2r*x1 - u2i*x2 |
371 |
c1i = u2r*x2 + u2i*x1 |
372 |
wr(k+j2) = c0r + c1r |
373 |
wi(k+j2) = c0i + c1i |
374 |
wr(k+j3) = c0r - c1r |
375 |
wi(k+j3) = c0i - c1i |
376 |
End Do |
377 |
|
378 |
200 Do k = 1, n |
379 |
xr(k) = wr(k) |
380 |
xi(k) = wi(k) |
381 |
End Do |
382 |
Go To 400 |
383 |
! ---------------------------------------------------------------------- |
384 |
! --- Perform one radix-2 iteration to complete the calculation. This |
385 |
! branch is taken if the last iteration performed above is stored |
386 |
! in W. If M = 3, the usual settings of L1 and KU do not hold, so |
387 |
! they have to be set in this case. |
388 |
|
389 |
300 If ( m == 3 ) Then |
390 |
l1 = 4 |
391 |
ku = 3 |
392 |
End If |
393 |
l2 = l1/2 |
394 |
j0 = 1 |
395 |
j1 = j0 + l1 |
396 |
j2 = j0 + l2 |
397 |
j3 = j1 + l2 |
398 |
k0 = 1 |
399 |
k1 = k0 + l1 |
400 |
k2 = k0 + l2 |
401 |
k3 = k1 + l2 |
402 |
cdir$ ivdep |
403 |
Do k = 0, l2 - 1 |
404 |
u1r = ur(2*k + ku) |
405 |
u1i = is*ui(2*k + ku) |
406 |
c0r = wr(k+k0) |
407 |
c0i = wi(k+k0) |
408 |
w1 = wr(k+k1) |
409 |
w2 = wi(k+k1) |
410 |
c1r = u1r*w1 - u1i*w2 |
411 |
c1i = u1r*w2 + u1i*w1 |
412 |
xr(k+j0) = c0r + c1r |
413 |
xi(k+j0) = c0i + c1i |
414 |
xr(k+j1) = c0r - c1r |
415 |
xi(k+j1) = c0i - c1i |
416 |
u2r = -is*u1i |
417 |
u2i = is*u1r |
418 |
c0r = wr(k+k2) |
419 |
c0i = wi(k+k2) |
420 |
w1 = wr(k+k3) |
421 |
w2 = wi(k+k3) |
422 |
c1r = u2r*w1 - u2i*w2 |
423 |
c1i = u2r*w2 + u2i*w1 |
424 |
xr(k+j2) = c0r + c1r |
425 |
xi(k+j2) = c0i + c1i |
426 |
xr(k+j3) = c0r - c1r |
427 |
xi(k+j3) = c0i - c1i |
428 |
End Do |
429 |
! ---------------------------------------------------------------------- |
430 |
! --- Finished: Return |
431 |
|
432 |
400 Return |
433 |
! ---------------------------------------------------------------------- |
434 |
! --- Formats: |
435 |
|
436 |
1000 Format(' *** Error in CFFT4: Either U has not been initialised',/ |
437 |
& ,' or one of the input parameters IS or M is invalid:',/ |
438 |
& ,' IS = ', I5, '; M = ', I5) |
439 |
! ---------------------------------------------------------------------- |
440 |
End Subroutine cfft4 |