/synthbench/euroben-dm/mod2f/cfft4.f - PRACE NPT - Greek Research and Technology Network's projects

root / synthbench / euroben-dm / mod2f / cfft4.f @ 0:839f52ef7657

History | View | Annotate | Download (13.3 kB)

             Subroutine cfft4( is, m, ur, ui, xr, xi, wr, wi )
       ! ----------------------------------------------------------------------
       ! --- CFFT4 performs an N-point complex-to-complex FFT, where N = 2**M.
       !     On input XR holds the Real part of the complex array;
       !     XI holds the Imaginary part.
       !     On output XR contains the Real part of the transformed array.
       !     XI then contains the Imaginary part of the result.
       !     WR and WI are work arrays that store Real and Imaginary parts
       !     of intermediate results.
       !     UR and UI contain rotation factors for various stages of the
       !     transformation.
+      !
       ! --- Before doing the actual transformation UR and UI must be
       !     initialised by calling CFFT4 with IS = 0, and M = MX, where MX
       !     is the maximum value of M that will be needed for any call of
       !     CFFT4.
+      !
       ! --- A call to CFFT4 with IS = 1 (or -1) indicates a call to perform
       !     a FFT with positive (or negative) exponentials.
+      !
       !     For any call to CFFT4 M must be M >= 2.
       !     Dimensions of the arrays XR, XI, UR, UI, WR, WI are:
+      !
       !     XR(N), XI(N), WR(N), WI(N).
       !     UR(2**MX+1),       UI(2**MX+1)       for MX even.
       !     UR(2*2**(MX/3)+1), UI(2*2**(MX/3)+1) for MX odd.
+      !
       !     In this routine we use 8-byte Real elements.
       ! ----------------------------------------------------------------------
             Use                    numerics
             Real(l_)            :: ur(*), ui(*), xr(*), xi(*), wr(*), wi(*)
             Integer             :: is, m
             Integer             :: i,  it, j,  j0, j1, j2, j3, j4, j5, k , kn,
            &                       ku, k0, k1, k2, k3, ln, l1, l2, l3, l4, mx,
            &                       n , nu, n1, n2, n3
             Real(l_), Parameter :: pi = 3.141592653589793_l_
             Real(l_)            :: c0r, c0i, c1r, c1i, c2r, c2i, c3r, c3i,
            &                       d0r, d0i, d1r, d1i, d2r, d2i, d3r, d3i,
            &                       t  , ti , u1r, u1i, u2r, u2i, u3r, u3i,
            &                       x1 , x2 , w1 , w2
       ! ----------------------------------------------------------------------
+      !
             If ( is == 0 ) Then
       ! ----------------------------------------------------------------------
       ! --- Initialise the U array with sines and cosines in a manner that
       !     permits stride one access at each FFT iteration.
                nu    = 2**((m+1)/2*2)/3
                ur(1) = 64*nu + m
                ku    = 2
                ln    = 1
                Do j = 0, (m+1)/2 - 1
                   t = pi/(2*ln)
       cdir$ ivdep
                   Do i = 0, ln - 1
                      ti       = i*t
                      ur(i+ku) = Cos( ti )
                      ui(i+ku) = Sin( ti )
                   End Do
                   ku = ku + ln
                   ln = 4*ln
                End Do
                Return
             End If
       ! ----------------------------------------------------------------------
       ! --- Transformation call to CFFT4: set initial parameters.
             n  = 2**m
             k  = ur(1)
             mx = Mod( k, 64 )
       ! ----------------------------------------------------------------------
       ! --- Check for invalid input parameters.
             If ( ( is /= 1 .AND. is /= -1 ) .OR. m < 2 .OR. m > mx ) Then
                Print 1000, is, m
                Stop
             End If
       ! ----------------------------------------------------------------------
+      !
             nu = k/64
             n1 = n/4
             n2 = 2*n1
             n3 = 3*n1
             ln = n1
       ! ----------------------------------------------------------------------
       ! --- For the first two radix-4 iterations, the inner loop increments J
       !     instead of K. Non-unit stride memory access is used but the longer
       !     vector lengths should compensate for that.
             j0 = 1
             j1 = j0 + 1
             j2 = j0 + 2
             j3 = j0 + 3
             k0 = j0
             k1 = k0 + n1
             k2 = k0 + n2
             k3 = k0 + n3
       cdir$ ivdep
             Do j = 0, ln - 1
                c0r = xr(j+k0)
                c0i = xi(j+k0)
                c1r = xr(j+k1)
                c1i = xi(j+k1)
                c2r = xr(j+k2)
                c2i = xi(j+k2)
                c3r = xr(j+k3)
                c3i = xi(j+k3)
                d0r = c0r + c2r
                d0i = c0i + c2i
                d1r = c0r - c2r
                d1i = c0i - c2i
                d2r = c1r + c3r
                d2i = c1i + c3i
                d3r = is*(-c1i + c3i)
                d3i = is*( c1r - c3r)
                j4  = 4*j
                wr(j4+j0) = d0r + d2r
                wi(j4+j0) = d0i + d2i
                wr(j4+j1) = d1r + d3r
                wi(j4+j1) = d1i + d3i
                wr(j4+j2) = d0r - d2r
                wi(j4+j2) = d0i - d2i
                wr(j4+j3) = d1r - d3r
                wi(j4+j3) = d1i - d3i
             End Do
       ! ----------------------------------------------------------------------
       ! --- If M = 2 all transformation work is done, just copy WR/WI into
       !     XR/XI (label 200).
       !     If M = 3, one radix-2 iteration still has to be done (label 300).
             If ( m == 2 ) Go To 200
             If ( m == 3 ) Go To 300
       ! --- Else:
       ! ----------------------------------------------------------------------
       ! --- Perform the second radix-4 iteration.
             ln = ln/4
             Do k = 0, 3
                j0  = k + 1
                j1  = j0 + 4
                j2  = j0 + 8
                j3  = j0 + 12
                k0  = j0
                k1  = k0 + n1
                k2  = k0 + n2
                k3  = k0 + n3
                u1r =    ur(k+3)
                u1i = is*ui(k+3)
                u2r = u1r*u1r - u1i*u1i
                u2i = 2.0D0*u1r*u1i
                u3r = u1r*u2r - u1i*u2i
                u3i = u1r*u2i + u1i*u2r
       cdir$ ivdep
                Do j = 0, ln - 1
                   j4  = 4*j
                   c0r = wr(j4+k0)
                   c0i = wi(j4+k0)
                   w1  = wr(j4+k1)
                   w2  = wi(j4+k1)
                   c1r = u1r*w1 - u1i*w2
                   c1i = u1r*w2 + u1i*w1
                   w1  = wr(j4+k2)
                   w2  = wi(j4+k2)
                   c2r = u2r*w1 - u2i*w2
                   c2i = u2r*w2 + u2i*w1
                   w1  = wr(j4+k3)
                   w2  = wi(j4+k3)
                   c3r = u3r*w1 - u3i*w2
                   c3i = u3r*w2 + u3i*w1
                   d0r = c0r + c2r
                   d0i = c0i + c2i
                   d1r = c0r - c2r
                   d1i = c0i - c2i
                   d2r = c1r + c3r
                   d2i = c1i + c3i
                   d3r = is*(-c1i + c3i)
                   d3i = is*( c1r - c3r)
                   j5  = 16*j
                   xr(j5+j0) = d0r + d2r
                   xi(j5+j0) = d0i + d2i
                   xr(j5+j1) = d1r + d3r
                   xi(j5+j1) = d1i + d3i
                   xr(j5+j2) = d0r - d2r
                   xi(j5+j2) = d0i - d2i
                   xr(j5+j3) = d1r - d3r
                   xi(j5+j3) = d1i - d3i
                End Do
             End Do
       ! ----------------------------------------------------------------------
       ! --- Set parameters for subsequent iterations.
             ku = 7
             ln = ln/4
             l1 = 16
             l2 = 32
             l3 = 48
             l4 = 64
       ! ----------------------------------------------------------------------
       ! --- For all other radix-4 iterations, the inner loops increment K. In
       !     this manner, all array references have unit stride. The radix-4
       !     iterations are performed in pairs: XR/XI to WR/WI and vice
       !     versa. This eliminates the need to copy data between X and W
       !     after each iteration.
       ! --- If IT = M - 1 then only one radix-2 iteration needs to be done
       !     (label 100).
             Do it = 4, m - 1, 4
                If ( it == m - 1 ) Go To 100
                Do j = 0, ln - 1
                   j0 = j*l4 + 1
                   j1 = j0 + l1
                   j2 = j0 + l2
                   j3 = j0 + l3
                   k0 = j*l1 + 1
                   k1 = k0 + n1
                   k2 = k0 + n2
                   k3 = k0 + n3
       cdir$ ivdep
                   Do k = 0, l1 - 1
                      u1r =    ur(k+ku)
                      u1i = is*ui(k+ku)
                      u2r = u1r*u1r - u1i*u1i
                      u2i = 2.0D0*u1r*u1i
                      u3r = u1r*u2r - u1i*u2i
                      u3i = u1r*u2i + u1i*u2r
                      c0r = xr(k+k0)
                      c0i = xi(k+k0)
                      x1  = xr(k+k1)
                      x2  = xi(k+k1)
                      c1r = u1r*x1 - u1i*x2
                      c1i = u1r*x2 + u1i*x1
                      x1  = xr(k+k2)
                      x2  = xi(k+k2)
                      c2r = u2r*x1 - u2i*x2
                      c2i = u2r*x2 + u2i*x1
                      x1  = xr(k+k3)
                      x2  = xi(k+k3)
                      c3r = u3r*x1 - u3i*x2
                      c3i = u3r*x2 + u3i*x1
                      d0r = c0r + c2r
                      d0i = c0i + c2i
                      d1r = c0r - c2r
                      d1i = c0i - c2i
                      d2r = c1r + c3r
                      d2i = c1i + c3i
                      d3r = is*(-c1i + c3i)
                      d3i = is*( c1r - c3r)
                      wr(k+j0) = d0r + d2r
                      wi(k+j0) = d0i + d2i
                      wr(k+j1) = d1r + d3r
                      wi(k+j1) = d1i + d3i
                      wr(k+j2) = d0r - d2r
                      wi(k+j2) = d0i - d2i
                      wr(k+j3) = d1r - d3r
                      wi(k+j3) = d1i - d3i
                   End Do
                End Do
                ku = ku + l1
                ln = ln/4
                l1 = l4
                l2 = 2*l1
                l3 = 3*l1
                l4 = 4*l1
       ! ----------------------------------------------------------------------
       ! --- If IT = M - 2 the computation is complete except for copying
       !                   WR/WI to XR/XI (label 200).
       ! --- If IT = M - 3 then only one radix-2 iteration remains to be done
       !                   (label 300).
                If ( it == m - 2 ) Go To 200
                If ( it == m - 3 ) Go To 300
+      !
                Do j = 0, ln - 1
                   j0 = j*l4 + 1
                   j1 = j0 + l1
                   j2 = j0 + l2
                   j3 = j0 + l3
                   k0 = j*l1 + 1
                   k1 = k0 + n1
                   k2 = k0 + n2
                   k3 = k0 + n3
       cdir$ ivdep
                   Do k = 0, l1 - 1
                      u1r =    ur(k+ku)
                      u1i = is*ui(k+ku)
                      u2r = u1r*u1r - u1i*u1i
                      u2i = 2.0D0*u1r*u1i
                      u3r = u1r*u2r - u1i*u2i
                      u3i = u1r*u2i + u1i*u2r
                      c0r = wr(k+k0)
                      c0i = wi(k+k0)
                      w1  = wr(k+k1)
                      w2  = wi(k+k1)
                      c1r = u1r*w1 - u1i*w2
                      c1i = u1r*w2 + u1i*w1
                      w1  = wr(k+k2)
                      w2  = wi(k+k2)
                      c2r = u2r*w1 - u2i*w2
                      c2i = u2r*w2 + u2i*w1
                      w1  = wr(k+k3)
                      w2  = wi(k+k3)
                      c3r = u3r*w1 - u3i*w2
                      c3i = u3r*w2 + u3i*w1
                      d0r = c0r + c2r
                      d0i = c0i + c2i
                      d1r = c0r - c2r
                      d1i = c0i - c2i
                      d2r = c1r + c3r
                      d2i = c1i + c3i
                      d3r = is*(-c1i + c3i)
                      d3i = is*( c1r - c3r)
                      xr(k+j0) = d0r + d2r
                      xi(k+j0) = d0i + d2i
                      xr(k+j1) = d1r + d3r
                      xi(k+j1) = d1i + d3i
                      xr(k+j2) = d0r - d2r
                      xi(k+j2) = d0i - d2i
                      xr(k+j3) = d1r - d3r
                      xi(k+j3) = d1i - d3i
                   End Do
                End Do
                ku = ku + l1
                ln = ln/4
                l1 = l4
                l2 = 2*l1
                l3 = 3*l1
                l4 = 4*l1
             End Do
             Go To 400
       ! ----------------------------------------------------------------------
       ! --- Perform one radix-2 iteration to complete the calculation. This
       !     branch is taken when the last iteration performed above is stored
       !     in XR/XI.
 l2 = l1/2
             j0 = 1
             j1 = j0 + l1
             j2 = j0 + l2
             j3 = j1 + l2
             k0 = 1
             k1 = k0 + l1
             k2 = k0 + l2
             k3 = k1 + l2
       cdir$ ivdep
             Do k = 0, l2 - 1
                u1r =    ur(2*k + ku)
                u1i = is*ui(2*k + ku)
                c0r = xr(k+k0)
                c0i = xi(k+k0)
                x1  = xr(k+k1)
                x2  = xi(k+k1)
                c1r = u1r*x1 - u1i*x2
                c1i = u1r*x2 + u1i*x1
                wr(k+j0) = c0r + c1r
                wi(k+j0) = c0i + c1i
                wr(k+j1) = c0r - c1r
                wi(k+j1) = c0i - c1i
                u2r = -is*u1i
                u2i =  is*u1r
                c0r = xr(k+k2)
                c0i = xi(k+k2)
                x1  = xr(k+k3)
                x2  = xi(k+k3)
                c1r = u2r*x1 - u2i*x2
                c1i = u2r*x2 + u2i*x1
                wr(k+j2) = c0r + c1r
                wi(k+j2) = c0i + c1i
                wr(k+j3) = c0r - c1r
                wi(k+j3) = c0i - c1i
             End Do
 Do k = 1, n
                xr(k) = wr(k)
                xi(k) = wi(k)
             End Do
             Go To 400
       ! ----------------------------------------------------------------------
       ! --- Perform one radix-2 iteration to complete the calculation. This
       !     branch is taken if the last iteration performed above is stored
       !     in W. If M = 3, the usual settings of L1 and KU do not hold, so
       !     they have to be set in this case.
 If ( m == 3 ) Then
                l1 = 4
                ku = 3
             End If
             l2 = l1/2
             j0 = 1
             j1 = j0 + l1
             j2 = j0 + l2
             j3 = j1 + l2
             k0 = 1
             k1 = k0 + l1
             k2 = k0 + l2
             k3 = k1 + l2
       cdir$ ivdep
             Do k = 0, l2 - 1
                u1r =    ur(2*k + ku)
                u1i = is*ui(2*k + ku)
                c0r = wr(k+k0)
                c0i = wi(k+k0)
                w1  = wr(k+k1)
                w2  = wi(k+k1)
                c1r = u1r*w1 - u1i*w2
                c1i = u1r*w2 + u1i*w1
                xr(k+j0) = c0r + c1r
                xi(k+j0) = c0i + c1i
                xr(k+j1) = c0r - c1r
                xi(k+j1) = c0i - c1i
                u2r = -is*u1i
                u2i =  is*u1r
                c0r = wr(k+k2)
                c0i = wi(k+k2)
                w1  = wr(k+k3)
                w2  = wi(k+k3)
                c1r = u2r*w1 - u2i*w2
                c1i = u2r*w2 + u2i*w1
                xr(k+j2) = c0r + c1r
                xi(k+j2) = c0i + c1i
                xr(k+j3) = c0r - c1r
                xi(k+j3) = c0i - c1i
             End Do
       ! ----------------------------------------------------------------------
       ! --- Finished: Return
 Return
       ! ----------------------------------------------------------------------
       ! --- Formats:
 Format(' *** Error in CFFT4: Either U has not been initialised',/
            &       ,' or one of the input parameters IS or M is invalid:',/
            &       ,' IS = ', I5, '; M = ', I5)
       ! ----------------------------------------------------------------------
             End Subroutine cfft4