        program incomp


*                    incomp system 
* The code features
* incompressible fluid
* spheriod in precession frame; Omegap=Omegapx
* 3 grid system: 1 cube; 2 north pole; 3 south pole (2 and 3 with 
*   orthographic projection)
* @ Runge-Kutta in time: 2nd and 3rd order(TVD), 4th order(non-TVD);
*********************************************************************
c The code uses a Poisson solver in spheroidal coordinate, which
c uses FFT routines and routines from FISHPACK by NCAR.
c c FFT programs are provided by
c http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
c Copyright Takuya OOURA, 1996-2001 
c You may use, copy, modify and distribute this code for any purpose 
c (include commercial use) and without fee. Please refer to his package 
c when you modify the code.
c
c The program Blktri.f and other routines it uses are used in this 
c program, but are not included here;
c they are available from
c http://www.cisl.ucar.edu/css/software/fishpack/
c copyright (c) 1999 by UCAR
c
c typical compilation using ifort:
c ifort /O3 /4R8 /QxS webfluidnonlinear.f
c


        parameter( mtm=4, mn=4, mngp=20000, mx1=51, my1=51, mz1=51 )
        parameter( mx2=71, my2=71, mz2=61+1 )
        parameter( mx3=71, my3=71, mz3=61+1 )

c for grid 5 and 5b and 5 ex
      integer nphi, nmu, nnu, nmuh
      parameter (nphi=64*2, nmu=nphi, nnu=nphi/2, nmuh=nmu/2)
      real amu(nmu), aaanu(nnu), aphi(nphi), amuh(nmuh)
      integer ig5(nphi+2, nmuh+2, nnu+2), ngp5(nphi+2, nmuh+2, nnu+2, 3)
      real drngp5(nphi+2, nmuh+2, nnu+2, 3)
      integer ig5b(nphi+2,nnu+2), ngp5b(nphi+2, nnu+2,3)
      real drngp5b(nphi+2,nnu+2,3), g5b(nphi+2,nnu+2)
      real uc5(nphi+2, nmuh+2, nnu+2, 3), uc5b(nphi+2, nnu+2, 3)
      real uc5old(nphi+2, nmuh+2, nnu+2, 3)
      real cc, aa, aa2, amu0, philength, amulength, anulength
      real dphi, dmu, dnu, pi, pphi, aamu, aanu
c for double
      real uc5p(nphi, nmuh, nnu),uc5pdouble(nphi, nmu, nnu)
      real uc5pex(nphi+2, nmuh+2, nnu+2)
      real bndrynu(nphi,nmu,2), bndrymu(nphi,nnu)
      real bndrynumu(nphi,2), errmax, diff
      real uc5bndry(nphi,nnu,2)
      
c for 5 ex and Grid 1, 2, 3
c      real uc5ex(nphi+2, nmuh+2, nnu+2)
      integer ngp15(mx1,my1,mz1,3)
      real drngp15(mx1,my1,mz1,3)      
      integer ngp25(mx2,my2,mz2,3)
      real drngp25(mx2,my2,mz2,3)      
      integer ngp35(mx3,my3,mz3,3)
      real drngp35(mx3,my3,mz3,3)      
      integer ngp25b(mx2,my2,2)
      real drngp25b(mx2,my2,2)      
      integer ngp35b(mx3,my3,2)
      real drngp35b(mx3,my3,2)      

c
c

        dimension uc1(mx1,my1,mz1,mn), rhs1(mx1,my1,mz1,mn-1,mtm)
     1 , ig1(mx1,my1,mz1), g1(mx1,my1,mz1,3), rhsp1(mx1,my1,mz1)
     1 , ngp12(mngp,3), dr12(mngp,3)
     1 , ngp13(mngp,3), dr13(mngp,3)

        dimension uc2(mx2,my2,mz2,mn), rhs2(mx2,my2,mz2,mn-1,mtm)
     1 , ig2(mx2,my2,mz2), g2(mx2,my2,mz2,3), trans2(mx2,my2,mz2,3,3)
     1 , ngp21(mngp,3), dr21(mngp,3), rhsp2(mx2,my2,mz2)
     1 , ngp23(mngp,3), dr23(mngp,3)
     1 , uc2bndry(mx2,my2,3,9)

        dimension uc3(mx3,my3,mz3,mn), rhs3(mx3,my3,mz3,mn-1,mtm)
     1 , ig3(mx3,my3,mz3), g3(mx3,my3,mz3,3), trans3(mx3,my3,mz3,3,3)
     1 , ngp31(mngp,3), dr31(mngp,3), rhsp3(mx3,my3,mz3)
     1 , ngp32(mngp,3), dr32(mngp,3)
     1 , uc3bndry(mx3,my3,3,9)



      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 


c
c for grid 5
c cc=aspect ratio of the spheroid
      cc=0.8
      amu0=0.5*(alog(1.+cc)-alog(1.-cc))
      aa2=1.-cc*cc
      aa=sqrt(aa2)
        pi=4.*atan(1.)
        philength=2.*pi
        amulength=2.*amu0
        anulength=pi

        dphi = philength / float(nphi)
        
        dmu = amulength / float(nmu)       

        dnu = anulength / float(nnu)       
c
        do i = 1, nphi
          aphi(i) = (i-1) * dphi
        enddo

        do j = 1, nmu
          amu(j) = -amu0 + (float(j)-0.5) * dmu
        enddo

        do j = 1, nmuh
          amuh(j) =  (float(j)-0.5) * dmu
        enddo


        do k = 1, nnu
          aaanu(k) = (float(k)-0.5) * dnu
        enddo
        
c



c parameters
      factor=cc
      ca=1.
      cb=ca*factor

	omega=1.
      omegap=0.25

      anu=0.001
c
      omegapx=omegap

      dr1=2.*0.65/(mx1-1)
      ds1=dr1
      dt1=dr1

      dr2=2./float(mx2-1)
      ds2=2./float(my2-1)
      dt2=(1.-0.4)/float(mz2-2)

      dr3=2./float(mx3-1)
      ds3=2./float(my3-1)
      dt3=(1.-0.4)/float(mz3-2)

c lrstrt=0, initialization; =1 restart
c tf= final time; mt: order in time integration
      lrstrt=0
      ntot=10000
      tf=600.*2.
      mt=2
      cfl=0.6

c**************  begin reading in parameters  ***********************

      nx1=mx1
      ny1=my1
      nz1=mz1
      nxs1=1
      nxe1=nx1
      nys1=1
      nye1=ny1
      nzs1=1
      nze1=nz1

      nx2=mx2
      ny2=my2
      nz2=mz2
      nxs2=1
      nxe2=nx2
      nys2=1
      nye2=ny2
      nzs2=1
      nze2=nz2-1

      nx3=mx3
      ny3=my3
      nz3=mz3
      nxs3=1
      nxe3=nx3
      nys3=1
      nye3=ny3
      nzs3=1
      nze3=nz3-1


c read grid info
c G1
      read(81) ig1, g1
      icount=0

      do i=1,nx1
      do j=1,ny1
      do k=1,nz1
      if(ig1(i,j,k).eq.-2) then
      icount=icount+1
      read(82, 821) (ngp12(icount,m),m=1,3), (dr12(icount,m),m=1,3)
  821 format(3i5,3e20.12)
	end if 
      end do
      end do
      end do
      numngp12=icount      
      if(icount.gt.mngp) 
     1 write(*,*) 'trouble in interpolating points', icount

      icount=0
      do i=1,nx1
      do j=1,ny1
      do k=1,nz1
      if(ig1(i,j,k).eq.-3) then
      icount=icount+1
      read(83, 821) (ngp13(icount,m),m=1,3), (dr13(icount,m),m=1,3)
	end if 
      end do
      end do
      end do
      numngp13=icount      
      if(icount.gt.mngp) 
     1 write(*,*) 'trouble in interpolating points', icount

c G2
      read(84) ig2, g2, trans2
       icount=0
      do i=1,nx2
      do j=1,ny2
      do k=1,nz2
      if(ig2(i,j,k).eq.-1) then
      icount=icount+1
      read(85, 821) (ngp21(icount,m),m=1,3), (dr21(icount,m),m=1,3)
	end if 
      end do
      end do
      end do
      numngp21=icount      

      icount=0
      do i=1,nx2
      do j=1,ny2
      do k=1,nz2
      if(ig2(i,j,k).eq.-3) then
      icount=icount+1
      read(86, 821) (ngp23(icount,m),m=1,3), (dr23(icount,m),m=1,3)
	end if 
      end do
      end do
      end do

      numngp23=icount      
      if(icount.gt.mngp) 
     1 write(*,*) 'trouble in interpolating points', icount



c G3

      read(87) ig3, g3, trans3

      icount=0
      do i=1,nx3
      do j=1,ny3
      do k=1,nz3
      if(ig3(i,j,k).eq.-1) then
      icount=icount+1
      read(88, 821) (ngp31(icount,m),m=1,3), (dr31(icount,m),m=1,3)
	end if 
      end do
      end do
      end do
      numngp31=icount      
      if(icount.gt.mngp) 
     1 write(*,*) 'trouble in interpolating points', icount

      icount=0
      do i=1,nx3
      do j=1,ny3
      do k=1,nz3
      if(ig3(i,j,k).eq.-2) then
      icount=icount+1
      read(89, 821) (ngp32(icount,m),m=1,3), (dr32(icount,m),m=1,3)
	end if 
      end do
      end do
      end do

      numngp32=icount      
      if(icount.gt.mngp) 
     1 write(*,*) 'trouble in interpolating points', icount


c for G5 and G5b
      read(91) ig5, ngp5, drngp5, ig5b, ngp5b, drngp5b
      read(92) ngp15, drngp15, ngp25, drngp25, ngp35, drngp35
      read(93) ngp25b, drngp25b, ngp35b, drngp35b



c set up coefficients in grids 2 and 3
      igrid=2
      call bndryvdd(uc2bndry,g2, trans2,ig2,mx2,my2,mz2,
     1 dr2,ds2,dt2, nxs2,nxe2,nys2,nye2,nzs2,nze2,igrid)
      igrid=3
      call bndryvdd(uc3bndry,g3, trans3,ig3,mx3,my3,mz3,
     1 dr3,ds3,dt3, nxs3,nxe3,nys3,nye3,nzs3,nze3,igrid)


c Initial  state:
c

	if( lrstrt .ne. 0 ) then
c restart
	  read(8) nts, tc, uc1, uc2, uc3
      write(*,*) 'nts= ', nts
	  close(8)
       else
c initialization
	  nts = 0
        tc = 0.0
c
c set uc=0
      do 11 m=1,4
      do 11 i=1,nx1
      do 11 j=1,ny1
      do 11 k=1,nz1
      uc1(i,j,k,m)=0.
   11 continue


      do 12 m=1,4
      do 12 i=1,nx2
      do 12 j=1,ny2
      do 12 k=1,nz2
      uc2(i,j,k,m)=0.
   12 continue


      do 13 m=1,4
      do 13 i=1,nx3
      do 13 j=1,ny3
      do 13 k=1,nz3
      uc3(i,j,k,m)=0.
   13 continue

      igrid=1
      call setup(uc1,ig1,g1,mx1,my1,mz1,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,igrid)
      igrid=2
      call setup(uc2,ig2,g2,mx2,my2,mz2,
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2+1,igrid)
      igrid=3
      call setup(uc3,ig3,g3,mx3,my3,mz3,
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3+1,igrid)

      ifort=10
c	do k=1,nz1
c	do j=1,ny1
c	do i=1,nx1
c	write(ifort,1102) g1(i,j,k,1), g1(i,j,k,2), g1(i,j,k,3), 
c     1      ig1(i,j,k),(uc1(i,j,k,ii),ii=1,4) 
c	end do
c	end do
c	end do
c      ifort=10
c	do k=1,nz2
c	do j=1,ny2
c	do i=1,nx2
c	write(ifort,1102) g2(i,j,k,1), g2(i,j,k,2), g2(i,j,k,3), 
c     1      ig2(i,j,k),(uc2(i,j,k,ii),ii=1,4) 
c	end do
c	end do
c	end do
 1102 format(3f10.5,i3,4e13.4)

       end if

c

***************  end of setting up initial condition ****************


         istop = 0


	do 1001 nt = nts+1, ntot
	do 1000 io = 1, mt

c for grid 1
      igrid=1
      call motion1(uc1,g1, rhs1,ig1,mx1,my1,mz1,dr1,ds1,dt1,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,igrid,io)


c for grid 2
      igrid=2
      call motion2(uc2,g2, trans2,rhs2,ig2,mx2,my2,mz2,
     1 dr2,ds2,dt2, nxs2,nxe2,nys2,nye2,nzs2,nze2,igrid,io)

c for grid 3
      igrid=3
      call motion2(uc3,g3, trans3,rhs3,ig3,mx3,my3,mz3,
     1 dr3,ds3,dt3, nxs3,nxe3,nys3,nye3,nzs3,nze3,igrid,io)

c solve pressure eq
c interpolating data among grids and set bndry conditions
c
c
c points on grid 2 from grid 3
      igrid=-3
      call interpRHS2(rhs2,ig2, mx2, my2, mz2,igrid, 
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2+1,ngp23, dr23, 
     1 numngp23, rhs3,mx3,my3,mz3, dr3,ds3,dt3,mngp,io)

c grid 3 from grid 2
      igrid=-2
      call interpRHS2(rhs3,ig3, mx3, my3, mz3,igrid, 
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3+1,ngp32, dr32, 
     1 numngp32, rhs2,mx2,my2,mz2, dr2,ds2,dt2,mngp,io)


c  interpolating points on G1, obtaining from G2
      igrid=-2
      call interpRHS1(rhs1, ig1,mx1,my1,mz1,igrid,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,
     1 ngp12, dr12, numngp12, rhs2,mx2,my2,mz2, dr2,ds2,dt2,mngp,io)

c  interpolating points on G1, obtaining from G3
      igrid=-3
      call interpRHS1(rhs1, ig1,mx1,my1,mz1,igrid, 
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,
     1 ngp13, dr13, numngp13, rhs3,mx3,my3,mz3, dr3,ds3,dt3,mngp,io)

c points on grid 2 from grid 1
      igrid=-1
      call interpRHS1(rhs2,ig2, mx2, my2, mz2,igrid, 
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2,
     1 ngp21, dr21, numngp21, rhs1,mx1,my1,mz1, dr1,ds1,dt1,mngp,io)

c points on grid 2 from grid 3
      igrid=-3
      call interpRHS2(rhs2,ig2, mx2, my2, mz2,igrid, 
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2+1,
     1 ngp23, dr23, numngp23, rhs3,mx3,my3,mz3, dr3,ds3,dt3,mngp,io)

c grid 3 from grid 1
      igrid=-1
      call interpRHS1(rhs3,ig3, mx3, my3, mz3,igrid, 
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3,
     1 ngp31, dr31, numngp31, rhs1,mx1,my1,mz1, dr1,ds1,dt1,mngp,io)

c grid 3 from grid 2
      igrid=-2
      call interpRHS2(rhs3,ig3, mx3, my3, mz3,igrid, 
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3+1,
     1 ngp32, dr32, numngp32, rhs2,mx2,my2,mz2, dr2,ds2,dt2,mngp,io)

c modifications: (a) find u* on G5 and its bndry
c                (b) on G5 find div u* and solve Laplacian(p)=div u*
c                    subject the bndry condition Grad p= u*_mu
c                (c) find grad p and get u(new)= u*- grad p
c                (d) find u(new) on G1, G2, and G3    
c 
      
      igrid=1
      call interpRHS5(uc5, ig5, ngp5, drngp5, nphi, nmuh, nnu, 
     1  igrid,  rhs1, mx1, my1, mz1, dr1, ds1, dt1, io)
c from grid 2
      igrid=2
      call interpRHS5(uc5, ig5, ngp5, drngp5, nphi, nmuh, nnu, 
     1  igrid,  rhs2, mx2, my2, mz2, dr2, ds2, dt2, io)
c from grid 3
      igrid=3
      call interpRHS5(uc5, ig5, ngp5, drngp5, nphi, nmuh, nnu, 
     1  igrid,  rhs3, mx3, my3, mz3, dr3, ds3, dt3, io)

c bndry of G5 from grid 2 and from grid3
      igrid = 2
      call interpRHS5b(uc5b,  ig5b, ngp5b, drngp5b, nphi, nnu, 
     1 igrid, rhs2,  mx2, my2, mz2, dr2, ds2, dt2, xi0, aa, io)
c  from grid 3
      igrid = 3
      call interpRHS5b(uc5b,  ig5b, ngp5b, drngp5b, nphi, nnu, 
     1 igrid, rhs3,  mx3, my3, mz3, dr3, ds3, dt3, xi0, aa, io)
     
      call bndryuc5b(uc5b, nphi, nmuh, nnu)      
      

            

c move uc5 and uc5b to uc5

      call getuc5(uc5, uc5b, nphi, nmuh, nnu)  
      
     
c           
c Compute div u on G5
      call divu5(uc5, uc5p, nphi, nmuh, nnu, dphi,dmu,dnu,aa)

      

c double the grid
      call doublediv(uc5p, uc5pdouble, nphi, nmu, nnu)


c set bndry condition on UC5b
      call Vn5mu0(uc5b, uc5bndry, nphi, nnu, aphi, aaanu, amu0,aa)
           
c (b3) find Vn at negative mu0 (amu=-amu0)
      call Vn5nmu0(uc5bndry, nphi, nnu)


      call PoisSpheroid(uc5pdouble, uc5bndry, bndrynu, bndrymu,
     1 bndrynumu, aphi, amu, aaanu, nphi, nmu, nnu, aa, dphi, dmu, dnu)

     
cc move to uc5pex
      call reducep(uc5pex, uc5pdouble, bndrynu, bndrymu,
     1 bndrynumu, nphi, nmu, nnu)



c move to grid 1,2 and 3
c (d2) get p on grid 2, 3, and 1
c grid 1  

      igrid=1
      call interpPf5(uc1, ig1, mx1,my1,mz1, igrid, nxs1,nxe1,nys1,nye1,
     1 nzs1,nze1, ngp15, drngp15, uc5pex, nphi+2,nmuh+2,nnu+2, 
     1 dphi, dmu, dnu)

     
c grid 2
      igrid=2
      call interpPf5(uc2, ig2, mx2,my2,mz2, igrid, nxs2,nxe2,nys2,nye2,
     1 nzs2,nze2+1, ngp25, drngp25, uc5pex, nphi+2,nmuh+2,nnu+2,
     1 dphi, dmu, dnu)
      

c grid 3
      igrid=3
      call interpPf5(uc3, ig3, mx3,my3,mz3, igrid, nxs3,nxe3,nys3,nye3,
     1 nzs3,nze3+1, ngp35, drngp35, uc5pex, nphi+2,nmuh+2,nnu+2, 
     1 dphi, dmu, dnu)
     

c v at new step: v=v*-grad(p)
c for grid 1
      igrid=1
      call newRHS1(rhs1, uc1,g1, ig1,mx1,my1,mz1,dr1,ds1,dt1,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,igrid,io)

c for grid 2
      igrid=2
      call newRHS2(rhs2,uc2,uc2bndry,g2, trans2,ig2,mx2,my2,mz2,
     1 dr2,ds2,dt2, nxs2,nxe2,nys2,nye2,nzs2,nze2,igrid,io)

c for grid 3
      igrid=3
      call newRHS2(rhs3,uc3,uc3bndry,g3, trans3,ig3,mx3,my3,mz3,
     1 dr3,ds3,dt3, nxs3,nxe3,nys3,nye3,nzs3,nze3,igrid,io)

c

c
c set dt
c
      if(io.eq.1) then
      igrid=1
      call setdt(g1,ig1,uc1,mx1,my1,mz1,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,igrid,dtime1,anu,cfl)
      igrid=2
      call setdt(g2,ig2,uc2,mx2,my2,mz2,
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2,igrid,dtime2,anu,cfl)
      igrid=3
      call setdt(g3,ig3,uc3,mx3,my3,mz3,
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3,igrid,dtime3,anu,cfl)

      dtime=amin1(amin1(dtime1,dtime2), dtime3)
      dtime=dtime/sqrt(3.)
      dtime=amin1(dtime, 2.*3.1415926/50.)
      dtime=dtime/2.  

          if( ( tc + dtime ) .ge. tf ) then
            dtime = tf - tc
            istop = 1
          endif

	write(9,*) 'nt,dtime=',nt,dtime
      end if

c
c
      igrid=1
	call rk(uc1,rhs1,ig1,mx1,my1,mz1,mt,io,dtime,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,igrid)

      igrid=2
	call rk(uc2,rhs2,ig2,mx2,my2,mz2,mt,io,dtime,
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2,igrid)

      igrid=3
	call rk(uc3,rhs3,ig3,mx3,my3,mz3,mt,io,dtime,
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3,igrid)


c
c interpolating data among grids and set bndry conditions
c
c
c points on grid 2 from grid 3
      igrid=-3
      call interpv2(uc2,ig2, mx2, my2, mz2,igrid, 
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2+1,
     1 ngp23, dr23, numngp23, uc3,mx3,my3,mz3, dr3,ds3,dt3,mngp)

c grid 3 from grid 2
      igrid=-2
      call interpv2(uc3,ig3, mx3, my3, mz3,igrid, 
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3+1,
     1 ngp32, dr32, numngp32, uc2,mx2,my2,mz2, dr2,ds2,dt2,mngp)


c  interpolating points on G1, obtaining from G2
      igrid=-2
      call interpv1(uc1, ig1,mx1,my1,mz1,igrid,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,
     1 ngp12, dr12, numngp12, uc2,mx2,my2,mz2, dr2,ds2,dt2,mngp)

c  interpolating points on G1, obtaining from G3
      igrid=-3
      call interpv1(uc1, ig1,mx1,my1,mz1,igrid, 
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,
     1 ngp13, dr13, numngp13, uc3,mx3,my3,mz3, dr3,ds3,dt3,mngp)

c points on grid 2 from grid 1
      igrid=-1
      call interpv1(uc2,ig2, mx2, my2, mz2,igrid, 
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2,
     1 ngp21, dr21, numngp21, uc1,mx1,my1,mz1, dr1,ds1,dt1,mngp)

c points on grid 2 from grid 3
      igrid=-3
      call interpv2(uc2,ig2, mx2, my2, mz2,igrid, 
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2+1,
     1 ngp23, dr23, numngp23, uc3,mx3,my3,mz3, dr3,ds3,dt3,mngp)

c grid 3 from grid 1
      igrid=-1
      call interpv1(uc3,ig3, mx3, my3, mz3,igrid, 
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3,
     1 ngp31, dr31, numngp31, uc1,mx1,my1,mz1, dr1,ds1,dt1,mngp)

c grid 3 from grid 2
      igrid=-2
      call interpv2(uc3,ig3, mx3, my3, mz3,igrid, 
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3+1,
     1 ngp32, dr32, numngp32, uc2,mx2,my2,mz2, dr2,ds2,dt2,mngp)

c  
c set guard values for v
c
c
c
      igrid=2
      igridi=-3
      call bndryvaa(uc2,uc2bndry,g2, trans2,ig2,mx2,my2,mz2,
     1 dr2,ds2,dt2, nxs2,nxe2,nys2,nye2,nzs2,nze2,igrid,igridi)

      igrid=2
      call bndryvbb(uc2,uc2bndry,g2, trans2,ig2,mx2,my2,mz2,
     1 dr2,ds2,dt2, nxs2,nxe2,nys2,nye2,nzs2,nze2,igrid)

      igrid=3
      igridi=-2
      call bndryvaa(uc3,uc3bndry,g3, trans3,ig3,mx3,my3,mz3,
     1 dr3,ds3,dt3, nxs3,nxe3,nys3,nye3,nzs3,nze3,igrid,igridi)

      igrid=3
      call bndryvbb(uc3,uc3bndry,g3, trans3,ig3,mx3,my3,mz3,
     1 dr3,ds3,dt3, nxs3,nxe3,nys3,nye3,nzs3,nze3,igrid)

c points on grid 2 from grid 3
      igrid=-3
      call interpv(uc2,ig2, mx2, my2, mz2,igrid, 
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2+1,
     1 ngp23, dr23, numngp23, uc3,mx3,my3,mz3, dr3,ds3,dt3,mngp)

c grid 3 from grid 2
      igrid=-2
      call interpv(uc3,ig3, mx3, my3, mz3,igrid, 
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3+1,
     1 ngp32, dr32, numngp32, uc2,mx2,my2,mz2, dr2,ds2,dt2,mngp)




 1000 continue



          tc = tc + dtime

      ifort=10
      if(nt.eq.ntot) then
      ifort=ifort+1
      rewind ifort
	do k=1,nz1
	do j=1,ny1
	do i=1,nx1
	write(ifort,1102) g1(i,j,k,1), g1(i,j,k,2), g1(i,j,k,3), 
     1      ig1(i,j,k),(uc1(i,j,k,ii),ii=1,4)
	end do
	end do
	end do

      ifort=ifort+1
      rewind ifort
	do k=1,nz2
	do j=1,ny2
	do i=1,nx2
	write(ifort,1102) g2(i,j,k,1), g2(i,j,k,2), g2(i,j,k,3), 
     1      ig2(i,j,k),(uc2(i,j,k,ii),ii=1,4)
	end do
	end do
	end do


      ifort=ifort+1
      rewind ifort
        do k=1,nz3
        do j=1,ny3
        do i=1,nx3
        write(ifort,1102) g3(i,j,k,1), g3(i,j,k,2), g3(i,j,k,3), 
     1      ig3(i,j,k),(uc3(i,j,k,ii),ii=1,4)
	end do
	end do
	end do
      end if


 1101 format(3e17.8,i5,e17.8)

c	end if

      if(mod(nt,10).eq.0) then
      igrid=1
      call interpv5(uc5, ig5, ngp5, drngp5, nphi, nmuh, nnu, 
     1  igrid,  uc1, mx1, my1, mz1, dr1, ds1, dt1)
c from grid 2

      igrid=2
      call interpv5(uc5, ig5, ngp5, drngp5, nphi, nmuh, nnu, 
     1  igrid,  uc2, mx2, my2, mz2, dr2, ds2, dt2)
c from grid 3
      
      igrid=3
      call interpv5(uc5, ig5, ngp5, drngp5, nphi, nmuh, nnu, 
     1  igrid,  uc3, mx3, my3, mz3, dr3, ds3, dt3)
      
      call energy5(uc5, nphi,nmuh,nnu,dmu,dnu,dphi,aa, eke51,eke52)

      write(*,1091) nt, tc, eke51,eke52,eke51+eke52
      write(62,1091) nt, tc, eke51,eke52,eke51+eke52
      end if


c energy
      igrid=1
      call energy(uc1,ig1,g1,mx1,my1,mz1,
     1 nxs1,nxe1,nys1,nye1,nzs1,nze1,igrid,eke1)
      igrid=2
      call energy(uc2,ig2,g2,mx2,my2,mz2,
     1 nxs2,nxe2,nys2,nye2,nzs2,nze2+1,igrid,eke2)
      igrid=3
      call energy(uc3,ig3,g3,mx3,my3,mz3,
     1 nxs3,nxe3,nys3,nye3,nzs3,nze3+1,igrid,eke3)

c      write(*,1091) nt, tc, eke1,eke2,eke3

      write(61,1091) nt, tc, eke1,eke2,eke3
 1091 format(i7,4e16.6)

          if( istop.eq.1 ) goto 1002
c      write(9,*) 'nt = ', nt, '  tc = ', tc
 1001 continue

*****678***************  end  time evolution ***************************       
 1002 continue

 1004  continue

 9991 format(3i4,4e13.5)

       write(99) nt-1,tc,uc1,uc2,uc3


	stop
	end



	subroutine rk(uc,rhs,ig,mx,my,mz,mt,io,dt,
     1 nxs,nxe,nys,nye,nzs,nze,igrid)

        parameter( mtm=4, mn=3)

        dimension uc(mx,my,mz,mn+1),rhs(mx,my,mz,mn,mtm),ig(mx,my,mz)
	if( io.lt.1 .or. io.gt.mt ) then
	  write(*,*) 'Check the RK iteration controller "io"!'
	endif

*   mt = 2 ---> 2nd order Runge-Kutta (TVD)

        if(mt.eq.2) then

cc
cc
          if( io.eq.1 ) then
            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe
            if(ig(i,j,k).eq.igrid) then
            do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + dt * rhs(i,j,k,m,1)
            enddo
            end if
            enddo
            enddo
            end do

          else

            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe

            if(ig(i,j,k).eq.igrid) then
            do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + 0.5 * dt * (-rhs(i,j,k,m,1) 
     &                                           + rhs(i,j,k,m,2))
cc
cc
            enddo
            end if

            enddo
            enddo
            end do

          endif

        endif

*   mt = 3 ---> 3rd order Runge-Kutta (TVD)

        if(mt.eq.3) then

          if( io.eq.1 ) then
            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe
            if(ig(i,j,k).eq.igrid) then
	      do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + dt * rhs(i,j,k,m,1)
            enddo
            end if
            enddo
            enddo
            end do

          else if( io.eq.2 ) then

            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe
            if(ig(i,j,k).eq.igrid) then
	    do m = 1, mn
            uc(i,j,k,m) = uc(i,j,k,m) + 0.25 * dt * (-3.*rhs(i,j,k,m,1) 
     &                                              + rhs(i,j,k,m,2))
            enddo
            end if

            enddo
            end do
            enddo

          else
            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe

            if(ig(i,j,k).eq.igrid) then

	    do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + dt * (-rhs(i,j,k,m,1) 
     &                 - rhs(i,j,k,m,2) + 8.*rhs(i,j,k,m,3) ) / 12.
            enddo
            end if
            enddo
            end do
            enddo
          endif
        endif

*   mt = 4 ---> 4th order Runge-Kutta (non-TVD)

        if(mt.eq.4) then

          if( io.eq.1 ) then
            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe
            if(ig(i,j,k).eq.igrid) then
	    do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + 0.5 * dt * rhs(i,j,k,m,1) 
            enddo
            end if

            enddo
            end do
            enddo

         else if( io.eq.2 ) then
            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe

            if(ig(i,j,k).eq.igrid) then
	    do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + 0.5 * dt * (-rhs(i,j,k,m,1) 
     &                                           + rhs(i,j,k,m,2))
            enddo
            end if

            end do
            enddo
            enddo

          else if( io.eq.3 ) then

            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe
            if(ig(i,j,k).eq.igrid) then
	    do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + 0.5 * dt * (  -rhs(i,j,k,m,2) 
     &                                           + 2.*rhs(i,j,k,m,3) )
            enddo
            end if

            enddo
            end do
            enddo
          else

            do k = nzs, nze
            do j = nys, nye
            do i = nxs, nxe
            if(ig(i,j,k).eq.igrid) then
	    do m = 1, mn
              uc(i,j,k,m) = uc(i,j,k,m) + dt * ( rhs(i,j,k,m,1) 
     &                                   + 2.*rhs(i,j,k,m,2)
     &                    - 4.*rhs(i,j,k,m,3) + rhs(i,j,k,m,4) ) / 6.
            enddo
            end if
            end do
            enddo
            enddo
          endif

        endif

	return

	end





      subroutine newRHS2(rhs,uc,ucbndry,g,trans,ig,mx,my,mz,dr,ds,dt,
     1  nxs,nxe,nys,nye,nzs,nze, igrid, io)

      parameter (mtm=4, mn=4)
      dimension rhs(mx,my,mz,3,mtm)
      dimension uc(mx,my,mz,mn), trans(mx,my,mz,3,3), ig(mx,my,mz)
      dimension g(mx,my,mz,3)
      dimension drdx(3,3), dudr(3,3), dudx(3,3), dpdr(3), dpdx(3)
      dimension d2uxdr(3,3,3), drxdr(3,3,3), d2u(3)
      dimension ucbndry(mx,my,3,9)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 


c for grid 2

      do 200 i = nxs, nxe
	do 200 j = nys, nye
	do 200 k = nzs, nze 

      if (ig(i,j,k).eq.igrid) then
c interior points
c 
c drdx(1,1)=dr/dx, drdx(1,2)=dr/dy, drdx(1,3)=dr/dz,
c drdx(2,1)=ds/dx, drdx(2,2)=ds/dy, drdx(2,3)=ds/dz,
c drdx(3,1)=dt/dx, drdx(3,2)=dt/dy, drdx(3,3)=dt/dz.

      do m=1,3
      do n=1,3
      drdx(m,n)=trans(i,j,k,m,n)
      end do
      end do

c dpdr(1)=dp/dr, dpdr(2)=dp/ds, dpdr(3)=dp/dt.
      m=4
      dpdr(1)=(uc(i+1,j,k,m)-uc(i-1,j,k,m))/(2.*dr)
      dpdr(2)=(uc(i,j+1,k,m)-uc(i,j-1,k,m))/(2.*ds)
      dpdr(3)=(uc(i,j,k+1,m)-uc(i,j,k-1,m))/(2.*dt)
c
c dpdx(1)=dp/dx, dpdx(2)=dp/dy, dpdx(3)=dp/dz
      do n=1,3
      dpdx(n)=dpdr(1)*drdx(1,n)+dpdr(2)*drdx(2,n)+ dpdr(3)*drdx(3,n)
      end do
c
      rhs(i,j,k,1,io) = rhs(i,j,k,1,io)- dpdx(1)         
      rhs(i,j,k,2,io) = rhs(i,j,k,2,io)- dpdx(2)         
      rhs(i,j,k,3,io) = rhs(i,j,k,3,io)- dpdx(3)

      end if         

  200 continue

c on bndry

c      cL=sqrt(ca**2-cb**2)
      k = nze  
      do 300 i = nxs, nxe
	do 300 j = nys, nye

      if (ig(i,j,k).eq.igrid) then
c interior points

c      x = g(i,j,k,1)
c      y = g(i,j,k,2)
c      z = g(i,j,k,3)

c into spheroidal coordinate ksi, th, phi
c      coshksi=ca/cL
c      sinhksi=cb/cL
c      costh=z/cb
c      sinth=sqrt(x*x+y*y)/ca
c      if(sinth.gt.1.e-10) then
c      sinphi=y/(ca*sinth)
c      cosphi=x/(ca*sinth)
c      else
c      sinphi=0.
c      cosphi=1.
c      end if
c into spheroidal coordinate ksi, th, phi
      coshksi= ucbndry(i,j,2,4)
      sinhksi= ucbndry(i,j,2,5)
      costh= ucbndry(i,j,2,6)
      sinth= ucbndry(i,j,2,7)
      cosphi= ucbndry(i,j,2,8)
      sinphi= ucbndry(i,j,2,9)

c
      gg=1./sqrt(costh**2+sinhksi**2)

c set vksi=0 (bndry condition)
      vksi=0.

      vth = gg*( (rhs(i,j,k,1,io)* cosphi + rhs(i,j,k,2,io)* sinphi)
     1          * coshksi* costh
     1 - rhs(i,j,k,3,io)* sinhksi* sinth )

      vphi =  -rhs(i,j,k,1,io)* sinphi + rhs(i,j,k,2,io)* cosphi

c 
c now convert to vx, vy, and vz
c
      rhs(i,j,k,1,io) = gg*cosphi*(vksi*sinhksi*sinth+vth*coshksi*costh)
     1       -sinphi*vphi
      rhs(i,j,k,2,io) = gg*sinphi*(vksi*sinhksi*sinth+vth*coshksi*costh)
     1      +cosphi*vphi
      rhs(i,j,k,3,io) = gg*(coshksi*costh*vksi-sinhksi*sinth*vth)
  
      end if

  300 continue


      return
      end




      subroutine motion2(uc,g,trans,rhs,ig,mx,my,mz,dr,ds,dt,
     1  nxs,nxe,nys,nye,nzs,nze, igrid, io, cost, sint)

      parameter (mtm=4, mn=4)
      dimension uc(mx,my,mz,mn), trans(mx,my,mz,3,3), ig(mx,my,mz)
      dimension rhs(mx,my,mz,mn-1,mtm), g(mx,my,mz,3)
      dimension drdx(3,3), dudr(3,3), dudx(3,3), dpdr(3), dpdx(3)
      dimension d2uxdr(3,3,3), drxdr(3,3,3), d2u(3) 
      dimension duudx(3,3), duudr(3,3,3)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 

c for grid 2
c      consta=2.* ca**2*omegap/ (ca**2-cb**2)
c      constb=2.* cb**2*omegap/ (ca**2-cb**2)
      cc=cb
      aeta=1./cc**2-1
      amu=2.*omegap/aeta
      oetamu=(1.+aeta)*amu


      do 200 i = nxs, nxe
	do 200 j = nys, nye
	do 200 k = nzs, nze

      if (ig(i,j,k).eq.igrid) then
c interior points
      x = g(i,j,k,1)
      y = g(i,j,k,2)
      z = g(i,j,k,3)

c      vx=uc(i,j,k,1)
c      vy=uc(i,j,k,2)
c      vz=uc(i,j,k,3)

c 
c drdx(1,1)=dr/dx, drdx(1,2)=dr/dy, drdx(1,3)=dr/dz,
c drdx(2,1)=ds/dx, drdx(2,2)=ds/dy, drdx(2,3)=ds/dz,
c drdx(3,1)=dt/dx, drdx(3,2)=dt/dy, drdx(3,3)=dt/dz.

      do m=1,3
      do n=1,3
      drdx(m,n)=trans(i,j,k,m,n)
      end do
      end do

c
c dudr(1,1)=dux/dr, dudr(1,2)=dux/ds, dudt(1,3)=dux/dt,
c dudr(2,1)=duy/dr, dudr(2,2)=duy/ds, dudt(2,3)=duy/dt,
c dudr(3,1)=duz/dr, dudr(3,2)=duz/ds, dudt(3,3)=duz/dt.

      do m=1,3
      dudr(m,1)=(uc(i+1,j,k,m)-uc(i-1,j,k,m))/(2.*dr)
      dudr(m,2)=(uc(i,j+1,k,m)-uc(i,j-1,k,m))/(2.*ds)
      dudr(m,3)=(uc(i,j,k+1,m)-uc(i,j,k-1,m))/(2.*dt)
      end do

c
c
c  duudr(1,1,1)=d(ux ux)/dr, duudr(1,1,2)=d(ux ux)/ds, duudr(1,1,3)=d(ux ux)/dt
c  duudr(1,2,1)=d(ux uy)/dr, duudr(1,2,2)=d(ux uy)/ds, duudr(1,2,3)=d(ux uy)/dt
c  duudr(1,3,1)=d(ux uz)/dr, duudr(1,3,2)=d(ux uz)/ds, duudr(1,3,3)=d(ux uz)/dt
c  duudr(2,2,1)=d(uy uy)/dr, duudr(2,2,2)=d(uy uy)/ds, duudr(2,2,3)=d(uy uy)/dt
c  duudr(2,3,1)=d(uy uz)/dr, duudr(2,3,2)=d(uy uz)/ds, duudr(2,3,3)=d(uy uz)/dt
c  duudr(3,3,1)=d(uz uz)/dr, duudr(3,3,2)=d(uz uz)/ds, duudr(3,3,3)=d(uz uz)/dt
c  duudr(2,1,m)=duudr(1,2,m)
c  duudr(3,1,m)=duudr(1,3,m)
c  duudr(3,2,m)=duudr(2,3,m)
c
c the following routine can be simplified because of duudr(2,1,:)=duudr(2,1,:) etc
      do n=1,3
      do m=1,3
      duudr(m,n,1)=(uc(i+1,j,k,m)*uc(i+1,j,k,n)
     1             -uc(i-1,j,k,m)*uc(i-1,j,k,n))/(2.* dr)
      duudr(m,n,2)=(uc(i,j+1,k,m)*uc(i,j+1,k,n)
     1             -uc(i,j-1,k,m)*uc(i,j-1,k,n))/(2.* ds)
      duudr(m,n,3)=(uc(i,j,k+1,m)*uc(i,j,k+1,n)
     1             -uc(i,j,k-1,m)*uc(i,j,k-1,n))/(2.* dt)
      end do
      end do
c
c duudx(1,1)=d(ux ux)/dx, duudx(1,2)=d(uy ux)/dy, duudx(1,3)=d(uz ux)/dz
c duudx(2,1)=d(ux uy)/dx, duudx(2,2)=d(uy uy)/dy, duudx(2,3)=d(uz uy)/dz
c duudx(3,1)=d(ux uz)/dx, duudx(3,2)=d(uy uz)/dy, duudx(3,3)=d(uz u3)/dz

      do n=1,3
      do m=1,3
      duudx(m,n)=duudr(n,m,1)*drdx(1,n)+duudr(n,m,2)*drdx(2,n)
     1          +duudr(n,m,3)*drdx(3,n)
      end do
      end do


c d2uxdr(1,1,1)=d2ux/dr/dr, d2uxdr(1,1,2)=d2ux/dr/ds, 
c d2uxdr(1,1,3)=d2ux/dr/dt,  
c d2uxdr(1,2,1)=d2ux/ds/dr, d2uxdr(1,2,2)=d2ux/ds/ds,
c d2uxdr(1,2,3)=d2ux/ds/dt,  
c d2uxdr(1,3,1)=d2ux/dt/dr, d2uxdr(1,3,2)=d2ux/dt/ds,
c d2uxdr(1,3,3)=d2ux/dt/dt,
c d2uxdr(2,1,1)=d2uy/dr/dr; d2uxdr(3,1,1)=d2uz/dr/dr.  

      do m=1,3
      d2uxdr(m,1,1)=(uc(i+1,j,k,m)-2.*uc(i,j,k,m)+ uc(i-1,j,k,m))/dr**2
      d2uxdr(m,1,2)=(uc(i+1,j+1,k,m)-uc(i+1,j-1,k,m)
     1                 -uc(i-1,j+1,k,m)+uc(i-1,j-1,k,m))/(4.*dr*ds)
      d2uxdr(m,1,3)=(uc(i+1,j,k+1,m)-uc(i+1,j,k-1,m)
     1                 -uc(i-1,j,k+1,m)+uc(i-1,j,k-1,m))/(4.*dr*dt)
      d2uxdr(m,2,2)=(uc(i,j+1,k,m)-2.*uc(i,j,k,m)+ uc(i,j-1,k,m))/ds**2
      d2uxdr(m,2,3)=(uc(i,j+1,k+1,m)-uc(i,j+1,k-1,m)
     1                 -uc(i,j-1,k+1,m)+uc(i,j-1,k-1,m))/(4.*ds*dt)
      d2uxdr(m,3,3)=(uc(i,j,k+1,m)-2.*uc(i,j,k,m)+ uc(i,j,k-1,m))/dt**2
      d2uxdr(m,2,1)=d2uxdr(m,1,2)
      d2uxdr(m,3,1)=d2uxdr(m,1,3)
      d2uxdr(m,3,2)=d2uxdr(m,2,3)

      end do

c
c

c drxdr(1,1,1)=d(drdx)/dr, drxdr(1,1,2)=d(drdx)/ds, drxdr(1,1,3)=d(drdx)/dt,
c drxdr(2,1,1)=d(dsdx)/dr, drxdr(2,1,2)=d(dsdx)/ds, drxdr(2,1,3)=d(dsdx)/dt,
c drxdr(3,1,1)=d(dtdx)/dr, drxdr(3,1,2)=d(dtdx)/ds, drxdr(3,1,3)=d(dsdx)/dt.
c

c drxdr(1,2,1)=d(drdy)/dr, drxdr(1,2,2)=d(drdy)/ds, drxdr(1,2,3)=d(drdy)/dt,
c drxdr(2,2,1)=d(dsdy)/dr, drxdr(2,2,2)=d(dsdy)/ds, drxdr(2,2,3)=d(dsdy)/dt,
c drxdr(3,2,1)=d(dtdy)/dr, drxdr(3,2,2)=d(dtdy)/ds, drxdr(3,2,3)=d(dtdy)/dt.

c

c drxdr(1,3,1)=d(drdz)/dr, drxdr(1,3,2)=d(drdz)/ds, drxdr(1,3,3)=d(drdz)/dt,
c drxdr(2,3,1)=d(dsdz)/dr, drxdr(2,3,2)=d(dsdz)/ds, drxdr(2,3,3)=d(dsdz)/dt,
c drxdr(3,3,1)=d(dtdz)/dr, drxdr(3,3,2)=d(dtdz)/ds, drxdr(3,3,3)=d(dtdz)/dt.

c

      do m=1,3
      do n=1,3

      drxdr(m,n,1)=(trans(i+1,j,k,m,n)- trans(i-1,j,k,m,n))/(2.*dr)
      drxdr(m,n,2)=(trans(i,j+1,k,m,n)- trans(i,j-1,k,m,n))/(2.*ds)
      drxdr(m,n,3)=(trans(i,j,k+1,m,n)- trans(i,j,k-1,m,n))/(2.*dt)

      end do
      end do

c

c dudx(1,1)=dux/dx, dudx(1,2)=dux/dy dudx(1,3)=dux/dz
c dudx(2,1)=duy/dx, dudx(2,2)=duy/dy dudx(2,3)=duy/dz
c dudx(3,1)=duz/dx, dudx(3,2)=duz/dy dudx(3,3)=duy/dz

      do m=1,3
      do n=1,3
      dudx(m,n)=dudr(m,1)*drdx(1,n)+dudr(m,2)*drdx(2,n)
     1	+ dudr(m,3)*drdx(3,n)
      end do
      end do

c
c Laplacian(Ux): d2Uxdxx+d2Uxdyy+d2Uxdzz
      do nn=1,3
      d2u(nn)=0
      do n=1,3
      do mm=1,3
      do m=1,3
      d2u(nn)=d2u(nn)+d2uxdr(nn,mm,m)*drdx(mm,n)*drdx(m,n)
     1 +drxdr(m,n,mm)*drdx(mm,n)*dudr(nn,m)
      end do
      end do
      end do
      end do
      rhs(i,j,k,1,io) =
c -d(vx vx)/dx - d(vy vx)/dy - d(vz vx)/dz
c     1 -duudx(1,1)-duudx(1,2)-duudx(1,3) 
     1 + y*dudx(1,1)-(x-oetamu*z)*dudx(1,2) - amu*y*dudx(1,3)
     1 + uc(i,j,k,2) +  anu*d2u(1)

      rhs(i,j,k,2,io) =
c -d(vx vy)/dx - d(vy vy)/dy - d(vz vy)/dz
c     1 -duudx(2,1)-duudx(2,2)-duudx(2,3)  
     1 + y*dudx(2,1)-(x-oetamu*z)*dudx(2,2) - amu*y*dudx(2,3)
     1 - uc(i,j,k,1) + (oetamu+2.*omegap)*uc(i,j,k,3) + anu*d2u(2)

      rhs(i,j,k,3,io) =
c -d(vx vz)/dx - d(vy vz)/dy - d(vz vz)/dz   
c     1 -duudx(3,1)-duudx(3,2)-duudx(3,3)  
     1 + y*dudx(3,1)-(x-oetamu*z)*dudx(3,2) - amu*y*dudx(3,3)
     1 - (amu+2.*omegap)*uc(i,j,k,2) + anu*d2u(3)
   
      end if

200	continue
      return
      end


      subroutine setdt(g1,ig,uc,mx,my,mz
     1 ,nxs,nxe,nys,nye,nzs,nze,igrid,dt,anumax,cfl)
      parameter (mtm=4, mn=4) 
      dimension uc(mx,my,mz,mn), ig(mx,my,mz), g1(mx,my,mz,3)
      common /var1/omega,omegap, omegapx,omegapz,ca,cb,anu, factor 

      cc=cb
      aeta=1./cc**2-1
      amu=2.*omegap/aeta
      oetamu=(1.+aeta)*amu

      dt=1.e20
      do 200 i = nxs, nxe
	do 200 j = nys, nye
	do 200 k = nzs, nze


      if (ig(i,j,k).eq.igrid) then
      vx0 = -g1(i,j,k,2)
      vy0 = g1(i,j,k,1) -oetamu *g1(i,j,k,3)
      vz0 = amu * g1(i,j,k,2)

      dr=sqrt((g1(i+1,j+1,k+1,1)-g1(i,j,k,1))**2
     1  +(g1(i+1,j+1,k+1,2)-g1(i,j,k,2))**2
     1  +(g1(i+1,j+1,k+1,3)-g1(i,j,k,3))**2)
      v=sqrt((uc(i,j,k,1)+vx0)**2+(uc(i,j,k,2)+vy0)**2
     1  +(uc(i,j,k,3)+vz0)**2)
      if (v.ne. 0.) dt=amin1(dt,dr/v*cfl)
	dt=amin1(dt,0.2*dr**2/anumax)
c	if(i.ge.11 .and. j.ge.10 .and. k.ge.18) then
c	 write(*,*) i,j,k,dt,dr,v
c      end if

	end if
  200 continue

      return
      end








      subroutine newRHS1(rhs,uc,g,ig,mx,my,mz,dx,dy,dz,
     1  nxs,nxe,nys,nye,nzs,nze, igrid, io)

      parameter (mtm=4, mn=4) 
      dimension rhs(mx,my,mz,3,mtm),uc(mx,my,mz,mn), 
     1 ig(mx,my,mz), g(mx,my,mz,3)
      dimension dudx(3,3), dpdx(3), d2u(3)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 

c for grid 1
      
      consta=2.* ca**2*omegap/ (ca**2-cb**2)
      constb=2.* cb**2*omegap/ (ca**2-cb**2)

      do 200 i = nxs, nxe
	do 200 j = nys, nye
	do 200 k = nzs, nze

      if (ig(i,j,k).eq.igrid) then

c interior points
      m=4
      rhs(i,j,k,1,io)=rhs(i,j,k,1,io) 
     1  - (uc(i+1,j,k,m)-uc(i-1,j,k,m))/(2.*dx)
      rhs(i,j,k,2,io)=rhs(i,j,k,2,io) 
     1  - (uc(i,j+1,k,m)-uc(i,j-1,k,m))/(2.*dy)
      rhs(i,j,k,3,io)=rhs(i,j,k,3,io) 
     1  - (uc(i,j,k+1,m)-uc(i,j,k-1,m))/(2.*dz)
      end if

200	continue

      return

      end



      subroutine motion1(uc,g,rhs,ig,mx,my,mz,dx,dy,dz,
     1  nxs,nxe,nys,nye,nzs,nze, igrid, io, cost, sint)

      parameter (mtm=4, mn=4) 
      dimension uc(mx,my,mz,mn), ig(mx,my,mz), g(mx,my,mz,3)
      dimension rhs(mx,my,mz,mn-1,mtm)
      dimension dudx(3,3), dpdx(3), d2u(3), duudx(3,3)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 

c for grid 1
      
c      consta=2.* ca**2*omegap/ (ca**2-cb**2)
c      constb=2.* cb**2*omegap/ (ca**2-cb**2)
      cc=cb
      aeta=1./cc**2-1
      amu=2.*omegap/aeta
      oetamu=(1.+aeta)*amu

      do 200 i = nxs, nxe
	do 200 j = nys, nye
	do 200 k = nzs, nze

      if (ig(i,j,k).eq.igrid) then

c interior points

      x = g(i,j,k,1)
      y = g(i,j,k,2)
      z = g(i,j,k,3)
c      vx=uc(i,j,k,1)
c      vy=uc(i,j,k,2)
c      vz=uc(i,j,k,3)
c
c duudx(1,1)=d(ux ux)/dx, duudx(1,2)=d(uy ux)/dy, duudx(1,3)=d(uz ux)/dz
c duudx(2,1)=d(ux uy)/dx, duudx(2,2)=d(uy uy)/dy, duudx(2,3)=d(uz uy)/dz
c duudx(3,1)=d(ux uz)/dx, duudx(3,2)=d(uy uz)/dy, duudx(3,3)=d(uz uz)/dz
      do m=1,3
      duudx(m,1)= (uc(i+1,j,k,1)*uc(i+1,j,k,m)
     1            -uc(i-1,j,k,1)*uc(i-1,j,k,m))/(2.*dx) 
      duudx(m,2)= (uc(i,j+1,k,2)*uc(i,j+1,k,m)
     1            -uc(i,j-1,k,2)*uc(i,j-1,k,m))/(2.*dy) 
      duudx(m,3)= (uc(i,j,k+1,3)*uc(i,j,k+1,m)
     1            -uc(i,j,k-1,3)*uc(i,j,k-1,m))/(2.*dz)
      end do
      

C
c dudx(1,1)=dux/dx, dudx(1,2)=dux/dy dudx(1,3)=dux/dz
c dudx(2,1)=duy/dx, dudx(2,2)=duy/dy dudx(2,3)=duy/dz
c dudx(3,1)=duz/dx, dudx(3,2)=duz/dy dudx(3,3)=duy/dz

      do m=1,3
      dudx(m,1)= (uc(i+1,j,k,m)-uc(i-1,j,k,m))/(2.*dx) 
      dudx(m,2)= (uc(i,j+1,k,m)-uc(i,j-1,k,m))/(2.*dy) 
      dudx(m,3)= (uc(i,j,k+1,m)-uc(i,j,k-1,m))/(2.*dz)
      end do


c
c Laplacian(Ux): d2u(1)=d2(ux); d2u(2)=d2(uy); d2u(3)=d2(uz)

      do m=1,3
      d2u(m)=(uc(i+1,j,k,m)-2.*uc(i,j,k,m)+uc(i-1,j,k,m))/dx**2 
     1  +(uc(i,j+1,k,m)-2.*uc(i,j,k,m)+uc(i,j-1,k,m))/dy**2
     1  +(uc(i,j,k+1,m)-2.*uc(i,j,k,m)+uc(i,j,k-1,m))/dz**2
      end do
c
      rhs(i,j,k,1,io) =
c -d(vx vx)/dx - d(vy vx)/dy - d(vz vx)/dz
c     1 -duudx(1,1)-duudx(1,2)-duudx(1,3) +
     1 y*dudx(1,1)-(x-oetamu*z)*dudx(1,2) - amu*y*dudx(1,3)
     1 + uc(i,j,k,2) +  anu*d2u(1)

      rhs(i,j,k,2,io) =
c -d(vx vy)/dx - d(vy vy)/dy - d(vz vy)/dz
c     1 -duudx(2,1)-duudx(2,2)-duudx(2,3) + 
     1 y*dudx(2,1)-(x-oetamu*z)*dudx(2,2) - amu*y*dudx(2,3)
     1 - uc(i,j,k,1) + (oetamu+2.*omegap)*uc(i,j,k,3) + anu*d2u(2)


      rhs(i,j,k,3,io) =
c -d(vx vz)/dx - d(vy vz)/dy - d(vz vz)/dz   
c     1 -duudx(3,1)-duudx(3,2)-duudx(3,3) + 
     1 y*dudx(3,1)-(x-oetamu*z)*dudx(3,2) - amu*y*dudx(3,3)
     1 - (amu+2.*omegap)*uc(i,j,k,2) + anu*d2u(3)

      end if

200	continue
      return
      end










      subroutine bndryvaa(uc,ucbndry,g,trans,ig,mx,my,mz,dr,ds,dt,
     1 nxs,nxe,nys,nye,nzs,nze, igrid, igridI)
c      ig=igrid --> interior point
c         igridI --> interpolation points

      parameter (mtm=4, mn=4)
      dimension uc(mx,my,mz,mn), trans(mx,my,mz,3,3), ig(mx,my,mz)
      dimension g(mx,my,mz,3)
      dimension drdx(3,3), dudr(3,3), dudx(3,3), dpdr(3), dpdx(3)
      dimension d2uxdr(3,3,3), drxdr(3,3,3)
c ucbndry=vksi, vth, vphi (spheroidal) at 3 shells
      dimension ucbndry(mx,my,3,9)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 

c ca: major radius (x, y); cb: minor radius (z)
c	
      cL=sqrt(ca**2-cb**2)

c find v on the first inner point
      k=nze-1   

      do 200 i = nxs, nxe
	do 200 j = nys, nye
c
c interior points and also bndry points from other grid

      if (ig(i,j,k).eq.igrid .or. ig(i,j,k) .eq. igridI) then
c interior points

c      x = g(i,j,k,1)
c      y = g(i,j,k,2)
c      z = g(i,j,k,3)

c into spheroidal coordinate ksi, th, phi
      coshksi= ucbndry(i,j,1,4)
      sinhksi= ucbndry(i,j,1,5)
      costh= ucbndry(i,j,1,6)
      sinth= ucbndry(i,j,1,7)
      cosphi= ucbndry(i,j,1,8)
      sinphi= ucbndry(i,j,1,9)
c
      gg=1./sqrt(costh**2+sinhksi**2)


c vksi, vth, vphi

      ucbndry(i,j,1,1)=gg*( (uc(i,j,k,1)* cosphi + uc(i,j,k,2)* sinphi)
     1          * sinhksi* sinth
     1 + uc(i,j,k,3)* coshksi* costh )

      ucbndry(i,j,1,2)=gg*( (uc(i,j,k,1)* cosphi + uc(i,j,k,2)* sinphi)
     1          * coshksi* costh
     1 - uc(i,j,k,3)* sinhksi* sinth )

      ucbndry(i,j,1,3)=- uc(i,j,k,1)* sinphi + uc(i,j,k,2)* cosphi
	end if

200	continue


c find v on boundary
      k=nze   

c      vksimax=0.
      do 300 i = nxs, nxe
	do 300 j = nys, nye

      if (ig(i,j,k).eq.igrid .or. ig(i,j,k) .eq. igridI) then

c      if (ig(i,j,k).eq.igrid) then

c interior points

c      x = g(i,j,k,1)
c      y = g(i,j,k,2)
c      z = g(i,j,k,3)

c into spheroidal coordinate ksi, th, phi
      coshksi= ucbndry(i,j,2,4)
      sinhksi= ucbndry(i,j,2,5)
      costh= ucbndry(i,j,2,6)
      sinth= ucbndry(i,j,2,7)
      cosphi= ucbndry(i,j,2,8)
      sinphi= ucbndry(i,j,2,9)

c
      gg=1./sqrt(costh**2+sinhksi**2)

c vksi, vth, vphi

      ucbndry(i,j,2,1)=gg*( (uc(i,j,k,1)* cosphi + uc(i,j,k,2)* sinphi)
     1          * sinhksi* sinth
     1 + uc(i,j,k,3)* coshksi* costh )

c      vksimax=amax1(vksimax,abs(ucbndry(i,j,2,1)))
      ucbndry(i,j,2,1)=0.

      ucbndry(i,j,2,2)=gg*( (uc(i,j,k,1)* cosphi + uc(i,j,k,2)* sinphi)
     1          * coshksi* costh
     1 - uc(i,j,k,3)* sinhksi* sinth )

      ucbndry(i,j,2,3)=- uc(i,j,k,1)* sinphi + uc(i,j,k,2)* cosphi
	end if

300	continue

c      write(*,*) 'vksimax= ', vksimax

      return

      end

      subroutine bndryvbb(uc,ucbndry,g,trans,ig,mx,my,mz,dr,ds,dt,
     1  nxs,nxe,nys,nye,nzs,nze, igrid)

      parameter (mtm=4, mn=4)
      dimension uc(mx,my,mz,mn), trans(mx,my,mz,3,3), ig(mx,my,mz)
      dimension g(mx,my,mz,3)
      dimension drdx(3,3), dudr(3,3), dudx(3,3), dpdr(3), dpdx(3)
      dimension d2uxdr(3,3,3), drxdr(3,3,3)
c ucbndry=vksi, vth, vphi (spheroidal) at 3 shells
      dimension ucbndry(mx,my,3,9), dxdksi(3), drdksi(3)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 

c ca: major radius (x, y); cb: minor radius (z)
c	
      cL=sqrt(ca**2-cb**2)

c find v on the first inner point
      k=nze
      kk=nze+1
      do 200 i = nxs, nxe
	do 200 j = nys, nye

      if (ig(i,j,k).eq.igrid) then
c interior points

c     x = g(i,j,k,1)
c     y = g(i,j,k,2)
c     z = g(i,j,k,3)

c into spheroidal coordinate ksi, th, phi
      coshksi= ucbndry(i,j,2,4)
      sinhksi= ucbndry(i,j,2,5)
      costh= ucbndry(i,j,2,6)
      sinth= ucbndry(i,j,2,7)
      cosphi= ucbndry(i,j,2,8)
      sinphi= ucbndry(i,j,2,9)
c
      gg=1./sqrt(costh**2+sinhksi**2)

c 
c drdx(1,1)=dr/dx, drdx(1,2)=dr/dy, drdx(1,3)=dr/dz,
c drdx(2,1)=ds/dx, drdx(2,2)=ds/dy, drdx(2,3)=ds/dz,
c drdx(3,1)=dt/dx, drdx(3,2)=dt/dy, drdx(3,3)=dt/dz.

      do m=1,3
      do n=1,3
      drdx(m,n)=trans(i,j,k,m,n)
      end do
      end do
c
c dxdksi(1)=dx/dksi, dxdksi(2)=dy/dksi, dxdksi(3)=dz/dksi
      dxdksi(1)=cL*sinhksi*sinth*cosphi
      dxdksi(2)=cL*sinhksi*sinth*sinphi
      dxdksi(3)=cL*coshksi*costh


c drdksi(1)=dr/dksi= dr/dx dx/dksi + dr/dy dy/dksi + dr/dz dz/dksi
c drdksi(2)=ds/dksi= ds/dx dx/dksi + ds/dy dy/dksi + ds/dz dz/dksi
c drdksi(3)=dt/dksi= dt/dx dx/dksi + dt/dy dy/dksi + dt/dz dz/dksi
      do n=1,3
      drdksi(n)=0.
      do m=1,3
      drdksi(n)=drdksi(n)+drdx(n,m)*dxdksi(m)
      end do
      end do

c dpdr(1)=dvth/dr, dpdr(2)=dvth/ds, dpdr(3)=dvth/dt.
      m=2     
      dpdr(1)=(ucbndry(i+1,j,2,m)-ucbndry(i-1,j,2,m))/(2.*dr)
      dpdr(2)=(ucbndry(i,j+1,2,m)-ucbndry(i,j-1,2,m))/(2.*ds)
c dvth/dksi=gg^2*coshksi*sinhksi*vth
      vthG=ucbndry(i,j,1,m)
     1  +2.*dt* (gg*gg*coshksi*sinhksi*ucbndry(i,j,2,2) 
     2           -dpdr(1)*drdksi(1)- dpdr(2)*drdksi(2)) / drdksi(3)



c dpdr(1)=dvphi/dr, dpdr(2)=dvphi/ds, dpdr(3)=dvphi/dt.
      m=3     
      dpdr(1)=(ucbndry(i+1,j,2,m)-ucbndry(i-1,j,2,m))/(2.*dr)
      dpdr(2)=(ucbndry(i,j+1,2,m)-ucbndry(i,j-1,2,m))/(2.*ds)

c stress free:
c dvphi/dksi=sinhksi/coshksi*vphi
      vphiG=ucbndry(i,j,1,m)
     1  +2.*dt* (sinhksi/coshksi*ucbndry(i,j,2,3)
     2          -dpdr(1)*drdksi(1)- dpdr(2)*drdksi(2)) / drdksi(3)
c
c div v=0
c
c
c dudr(1,1)=dux/dr, dudr(1,2)=dux/ds, dudt(1,3)=dux/dt,
c dudr(2,1)=duy/dr, dudr(2,2)=duy/ds, dudt(2,3)=duy/dt,
c dudr(3,1)=duz/dr, dudr(3,2)=duz/ds, dudt(3,3)=duz/dt.

      do m=1,3
      dudr(m,1)=(uc(i+1,j,k,m)-uc(i-1,j,k,m))/(2.*dr)
      dudr(m,2)=(uc(i,j+1,k,m)-uc(i,j-1,k,m))/(2.*ds)
c      dudr(m,3)=(uc(i,j,k+1,m)-uc(i,j,k-1,m))/(2.*dt)
      end do

      aa=dudr(1,1)*drdx(1,1)+dudr(1,2)*drdx(2,1) 
     1 + dudr(2,1)*drdx(1,2)+dudr(2,2)*drdx(2,2)
     1 + dudr(3,1)*drdx(1,3)+dudr(3,2)*drdx(2,3)

c at the guard point: vx= ax vksi + bx vth + cx vphi
c                     vy= ay vksi + by vth + cy vphi
c similarly for vz

c guard point
c into spheroidal coordinate ksi, th, phi
      coshksi= ucbndry(i,j,3,4)
      sinhksi= ucbndry(i,j,3,5)
      costh= ucbndry(i,j,3,6)
      sinth= ucbndry(i,j,3,7)
      cosphi= ucbndry(i,j,3,8)
      sinphi= ucbndry(i,j,3,9)

      gg=1./sqrt(costh**2+sinhksi**2)
      ax=gg*sinhksi*sinth*cosphi
      bx=gg*coshksi*costh*cosphi
      cx=-sinphi
      ay=gg*sinhksi*sinth*sinphi
      by=gg*coshksi*costh*sinphi
      cy=+cosphi
      az=gg*coshksi*costh
      bz=-gg*sinhksi*sinth
      cz=0.
c
      aa=2*dt*aa+(bx*vthG+cx*vphiG)*drdx(3,1)
     1 + (by*vthG+cy*vphiG)*drdx(3,2)
     1 +(bz*vthG+cz*vphiG)*drdx(3,3)
     1  - uc(i,j,nze-1,1)*drdx(3,1) - uc(i,j,nze-1,2)*drdx(3,2)
     1  - uc(i,j,nze-1,3)*drdx(3,3)

      vksiG=-aa/(ax*drdx(3,1)+ay*drdx(3,2)+az*drdx(3,3))

c now combine them to form vxG, vyG, and vzG
      uc(i,j,kk,1)=ax*vksiG+bx*vthG+cx*vphiG
      uc(i,j,kk,2)=ay*vksiG+by*vthG+cy*vphiG
      uc(i,j,kk,3)=az*vksiG+bz*vthG+cz*vphiG
c
      end if
200	continue


      return

      end


      subroutine bndryvdd(ucbndry,g,trans,ig,mx,my,mz,dr,ds,dt,
     1  nxs,nxe,nys,nye,nzs,nze, igrid)

c ucbndry(i,j,k,m)
c k=1,2,3: k=1 => k=nze-1 (inner point)
c          k=2  k=nze bndry point
c          k=3  k=nze+1 guard point
c m=1,2,3 vx,vy, vz
c m=4,5 coshksi, sinhksi
c m=6,7 costh, sinth
c m=8,9 cosphi sinphi


      parameter (mtm=4, mn=4)
      dimension ucbndry(mx,my,3,9), trans(mx,my,mz,3,3), ig(mx,my,mz)
      dimension g(mx,my,mz,3)
      dimension drdx(3,3), dudr(3,3), dudx(3,3), dpdr(3), dpdx(3)
      dimension d2uxdr(3,3,3), drxdr(3,3,3)
c ucbndry=vksi, vth, vphi (spheroidal) at 3 shells
c      dimension ucbndry(mx,my,3,3)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 

c ca: major radius (x, y); cb: minor radius (z)
c	
      cL=sqrt(ca**2-cb**2)
      cL2=cL*cL

      kk=0
      do 200 k = nze-1, nze+1
      kk=kk+1
      do 200 i = nxs, nxe
	do 200 j = nys, nye

      x = g(i,j,k,1)
      y = g(i,j,k,2)
      z = g(i,j,k,3)

c compute cosh(ksi), costh, and sinphi and cosphi
      rr=x*x+y*y
      r=sqrt(rr)
      zz=z*z
      bb=-(1+(rr+zz)/cL2)
      cc=rr/cL2
      Q=0.5* (-bb+sqrt(bb**2-4.*cc))
      coshksi=sqrt(Q)
      sinhksi=sqrt(Q-1.)
      sinth=r/(cL*coshksi)
      costh=z/(cL*sinhksi)
      if(r.ge.1.e-7) then
      cosphi=x/r
      sinphi=y/r
      else
      sinphi=0.
      cosphi=1.
      end if

c save them in ucbndry
      ucbndry(i,j,kk,4)=coshksi
      ucbndry(i,j,kk,5)=sinhksi
      ucbndry(i,j,kk,6)=costh
      ucbndry(i,j,kk,7)=sinth
      ucbndry(i,j,kk,8)=cosphi
      ucbndry(i,j,kk,9)=sinphi

200	continue


      return

      end


      subroutine interpv1(uc1, ig, mx,my,mz, igrid, nxs,nxe,nys,nye,
     1 nzs,nze, ngp, drngp, numngp, uc2,mx2,my2,mz2, dr,ds,dt,mngp)
      dimension uc1(mx,my,mz,4), ig(mx,my,mz), ngp(mngp,3)
     1	, drngp(mngp,3)

      dimension uc2(mx2,my2,mz2,4)
c interpolating data among grids and set bndry conditions
c

c grid 1 from grid 2 (3d interpolation)
      intnum=0
      do 10 i=nxs, nxe
      do 10 j=nys, nye
      do 10 k=nzs, nze
      if(ig(i,j,k).eq.igrid) then
c	if(i.eq.14 .and.j.eq.24 .and. k.eq.57) then
c	temppp=1.
c	end if
      intnum=intnum+1
      ngpi=ngp(intnum,1)
      ngpj=ngp(intnum,2)
      ngpk=ngp(intnum,3)
      ddr=drngp(intnum,1)
      dds=drngp(intnum,2)
      ddt=drngp(intnum,3)
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt


      do m=1,3
       fmm0 = uc2(ngpi-1,ngpj-1, ngpk ,m) 
       fm00 = uc2(ngpi-1,ngpj, ngpk ,m)  
       fm10 = uc2(ngpi-1,ngpj+1, ngpk ,m)  
       f0m0 = uc2(ngpi,ngpj-1, ngpk ,m)  
       f000 = uc2(ngpi,ngpj, ngpk ,m)   
       f010 = uc2(ngpi,ngpj+1, ngpk ,m)   
       f1m0 = uc2(ngpi+1,ngpj-1, ngpk ,m)  
       f100 = uc2(ngpi+1,ngpj, ngpk ,m)   
       f110 = uc2(ngpi+1,ngpj+1, ngpk ,m)
       fmm1 = uc2(ngpi-1,ngpj-1,ngpk+1 ,m) 
       fm01 = uc2(ngpi-1,ngpj,ngpk+1 ,m)  
       fm11 = uc2(ngpi-1,ngpj+1,ngpk+1 ,m)  
       f0m1 = uc2(ngpi,ngpj-1,ngpk+1 ,m)  
       f001 = uc2(ngpi,ngpj,ngpk+1 ,m)   
       f011 = uc2(ngpi,ngpj+1,ngpk+1 ,m)   
       f1m1 = uc2(ngpi+1,ngpj-1,ngpk+1 ,m)  
       f101 = uc2(ngpi+1,ngpj,ngpk+1 ,m)   
       f111 = uc2(ngpi+1,ngpj+1,ngpk+1 ,m)
       fmmm = uc2(ngpi-1,ngpj-1,ngpk-1 ,m) 
       fm0m = uc2(ngpi-1,ngpj,ngpk-1 ,m)  
       fm1m = uc2(ngpi-1,ngpj+1,ngpk-1 ,m)  
       f0mm = uc2(ngpi,ngpj-1,ngpk-1 ,m)  
       f00m = uc2(ngpi,ngpj,ngpk-1 ,m)   
       f01m = uc2(ngpi,ngpj+1,ngpk-1 ,m)   
       f1mm = uc2(ngpi+1,ngpj-1,ngpk-1 ,m)  
       f10m = uc2(ngpi+1,ngpj,ngpk-1 ,m)   
       f11m = uc2(ngpi+1,ngpj+1,ngpk-1 ,m)

      c000=f000
      c100=0.5*(f100-fm00)
      c010=0.5*(f010-f0m0)
      c001=0.5*(f001-f00m)
      c200=0.5*(-2.*f000+f100+fm00)
      c020=0.5*(-2.*f000 + f010 + f0m0)
      c002=0.5*(-2.* f000 + f001 + f00m)
      c110=0.25*(f110 - f1m0 - fm10 + fmm0)
      c011=0.25*(f011 - f01m - f0m1 + f0mm)
      c101=0.25*(f101 - f10m - fm01 + fm0m)
      c210=0.25*(-2.* f010 + 2.* f0m0 + f110 - f1m0 + fm10 - fmm0)
      c201=0.25*(-2.* f001 + 2.* f00m + f101 - f10m + fm01 - fm0m)
      c120=0.25*(-2.*f100 + f110 + f1m0 + 2.*fm00 - fm10 - fmm0)
      c021=0.25*(-2.*f001 + 2.*f00m + f011 - f01m + f0m1 - f0mm)
      c102=0.25*(-2.*f100 + f101 + f10m + 2.*fm00 - fm01 - fm0m)
      c012=0.25*(-2.*f010 + f011 + f01m + 2.*f0m0 - f0m1 - f0mm)
      c111=0.125*(f111 - f11m - f1m1 + f1mm - fm11 + fm1m + fmm1 - fmmm)
      c220=0.25*(4.*f000 - 2.*f010 - 2.*f0m0 - 2.*f100 + f110 
     1 + f1m0 - 2.*fm00 + fm10 + fmm0)
      c202=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f100 + f101 
     1 + f10m - 2.*fm00 + fm01 + fm0m)
      c022=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f010 + f011 
     1 + f01m - 2.*f0m0 + f0m1 + f0mm)
      c211=0.125*(-2.*f011 + 2.*f01m + 2.*f0m1 - 2.*f0mm 
     1 + f111 - f11m - f1m1 + f1mm + fm11 - fm1m - fmm1 + fmmm)
      c121=0.125*(-2.*f101 + 2.*f10m + f111 - f11m 
     1 + f1m1 - f1mm + 2.*fm01 - 2.*fm0m - fm11 + fm1m - fmm1 + fmmm )
      c112=0.125*(-2.*f110 + f111 + f11m + 2.*f1m0 
     1 - f1m1 - f1mm + 2.*fm10 - fm11 - fm1m - 2.*fmm0 + fmm1 + fmmm)
      c221=0.125*(4.*f001 - 4.*f00m - 2.*f011 + 2.*f01m 
     1 - 2.*f0m1 + 2.*f0mm - 2.*f101 + 2.*f10m + f111 
     1 - f11m + f1m1 - f1mm - 2.*fm01 + 2.*fm0m 
     1 + fm11 -fm1m + fmm1 - fmmm )
      c212=0.125*(4.*f010 - 2.*f011 - 2.*f01m - 4.*f0m0 
     1 + 2.*f0m1 + 2.*f0mm - 2.*f110  
     1 + f111 + f11m + 2.*f1m0 - f1m1 - f1mm - 2.*fm10 + fm11 + fm1m  
     1 + 2.*fmm0 - fmm1 - fmmm) 
      c122=0.125*(4.*f100 - 2.*f101 - 2.*f10m - 2.*f110 
     1 + f111 + f11m - 2.*f1m0  
     1 + f1m1 + f1mm - 4.*fm00 + 2.*fm01 + 2.*fm0m 
     1 + 2.*fm10 - fm11 - fm1m + 2.*fmm0 - fmm1 - fmmm)
      c222=0.125*(-8.*f000 + 4.*f001 + 4.*f00m + 4.*f010 
     1 - 2.*f011 - 2.*f01m + 4.*f0m0 - 2.*f0m1 - 2.*f0mm 
     1 + 4.*f100 - 2.*f101 - 2.*f10m - 2.*f110  
     1 + f111 + f11m - 2.*f1m0 + f1m1 + f1mm + 4.*fm00 
     1 - 2.*fm01 - 2.*fm0m - 2.*fm10 + fm11 
     1 + fm1m - 2.*fmm0 + fmm1 + fmmm) 

      uc1(i,j,k,m)=
     1 c000 + c100*alpha + c010*beta + c001*gamma +
     1 c200*alpha**2 + c020*beta**2 + c002*gamma**2 +
     1 c110*alpha*beta + c011*beta*gamma + c101*alpha*gamma +
     1 c210*alpha**2*beta + c201*alpha**2*gamma 
     1 + c120*alpha*beta**2 + c021*beta**2*gamma +
     1 c102*alpha*gamma**2 + c012*beta*gamma**2 + c111*alpha*beta*gamma 
     1 + c220*alpha**2*beta**2 + c202*alpha**2*gamma**2 
     1 + c022*beta**2*gamma**2 +
     1 c211*alpha**2*beta*gamma + c121*alpha*beta**2*gamma 
     1 + c112*alpha*beta*gamma**2 +
     1 c221*alpha**2*beta**2*gamma + c212*alpha**2*beta*gamma**2 
     1 + c122*alpha*beta**2*gamma**2 +
     1 c222*alpha**2*beta**2*gamma**2

      end do

      end if

  10  continue
      return
      end

      subroutine interpRHS1(uc1, ig, mx,my,mz, igrid, nxs,nxe,nys,nye,
     1 nzs,nze, ngp, drngp, numngp, uc2,mx2,my2,mz2, dr,ds,dt,mngp,io)
      parameter(mtm=4)
      dimension uc1(mx,my,mz,3,mtm), ig(mx,my,mz), ngp(mngp,3)
     1	, drngp(mngp,3)

      dimension uc2(mx2,my2,mz2,3,mtm)
c interpolating data among grids and set bndry conditions
c

c grid 1 from grid 2 (3d interpolation)
      intnum=0
      do 10 i=nxs, nxe
      do 10 j=nys, nye
      do 10 k=nzs, nze
      if(ig(i,j,k).eq.igrid) then
c	if(i.eq.14 .and.j.eq.24 .and. k.eq.57) then
c	temppp=1.
c	end if
      intnum=intnum+1
      ngpi=ngp(intnum,1)
      ngpj=ngp(intnum,2)
      ngpk=ngp(intnum,3)
      ddr=drngp(intnum,1)
      dds=drngp(intnum,2)
      ddt=drngp(intnum,3)
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt


      do m=1,3
       fmm0 = uc2(ngpi-1,ngpj-1, ngpk, m, io) 
       fm00 = uc2(ngpi-1,ngpj, ngpk, m, io)  
       fm10 = uc2(ngpi-1,ngpj+1, ngpk, m, io)  
       f0m0 = uc2(ngpi,ngpj-1, ngpk, m, io)  
       f000 = uc2(ngpi,ngpj, ngpk, m, io)   
       f010 = uc2(ngpi,ngpj+1, ngpk, m, io)   
       f1m0 = uc2(ngpi+1,ngpj-1, ngpk, m, io)  
       f100 = uc2(ngpi+1,ngpj, ngpk, m, io)   
       f110 = uc2(ngpi+1,ngpj+1, ngpk, m, io)
       fmm1 = uc2(ngpi-1,ngpj-1,ngpk+1, m, io) 
       fm01 = uc2(ngpi-1,ngpj,ngpk+1, m, io)  
       fm11 = uc2(ngpi-1,ngpj+1,ngpk+1, m, io)  
       f0m1 = uc2(ngpi,ngpj-1,ngpk+1, m, io)  
       f001 = uc2(ngpi,ngpj,ngpk+1, m, io)   
       f011 = uc2(ngpi,ngpj+1,ngpk+1, m, io)   
       f1m1 = uc2(ngpi+1,ngpj-1,ngpk+1, m, io)  
       f101 = uc2(ngpi+1,ngpj,ngpk+1, m, io)   
       f111 = uc2(ngpi+1,ngpj+1,ngpk+1, m, io)
       fmmm = uc2(ngpi-1,ngpj-1,ngpk-1, m, io) 
       fm0m = uc2(ngpi-1,ngpj,ngpk-1, m, io)  
       fm1m = uc2(ngpi-1,ngpj+1,ngpk-1, m, io)  
       f0mm = uc2(ngpi,ngpj-1,ngpk-1, m, io)  
       f00m = uc2(ngpi,ngpj,ngpk-1, m, io)   
       f01m = uc2(ngpi,ngpj+1,ngpk-1, m, io)   
       f1mm = uc2(ngpi+1,ngpj-1,ngpk-1, m, io)  
       f10m = uc2(ngpi+1,ngpj,ngpk-1, m, io)   
       f11m = uc2(ngpi+1,ngpj+1,ngpk-1, m, io)

      c000=f000
      c100=0.5*(f100-fm00)
      c010=0.5*(f010-f0m0)
      c001=0.5*(f001-f00m)
      c200=0.5*(-2.*f000+f100+fm00)
      c020=0.5*(-2.*f000 + f010 + f0m0)
      c002=0.5*(-2.* f000 + f001 + f00m)
      c110=0.25*(f110 - f1m0 - fm10 + fmm0)
      c011=0.25*(f011 - f01m - f0m1 + f0mm)
      c101=0.25*(f101 - f10m - fm01 + fm0m)
      c210=0.25*(-2.* f010 + 2.* f0m0 + f110 - f1m0 + fm10 - fmm0)
      c201=0.25*(-2.* f001 + 2.* f00m + f101 - f10m + fm01 - fm0m)
      c120=0.25*(-2.*f100 + f110 + f1m0 + 2.*fm00 - fm10 - fmm0)
      c021=0.25*(-2.*f001 + 2.*f00m + f011 - f01m + f0m1 - f0mm)
      c102=0.25*(-2.*f100 + f101 + f10m + 2.*fm00 - fm01 - fm0m)
      c012=0.25*(-2.*f010 + f011 + f01m + 2.*f0m0 - f0m1 - f0mm)
      c111=0.125*(f111 - f11m - f1m1 + f1mm - fm11 + fm1m + fmm1 - fmmm)
      c220=0.25*(4.*f000 - 2.*f010 - 2.*f0m0 - 2.*f100 + f110 
     1 + f1m0 - 2.*fm00 + fm10 + fmm0)
      c202=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f100 + f101 
     1 + f10m - 2.*fm00 + fm01 + fm0m)
      c022=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f010 + f011 
     1 + f01m - 2.*f0m0 + f0m1 + f0mm)
      c211=0.125*(-2.*f011 + 2.*f01m + 2.*f0m1 - 2.*f0mm 
     1 + f111 - f11m - f1m1 + f1mm + fm11 - fm1m - fmm1 + fmmm)
      c121=0.125*(-2.*f101 + 2.*f10m + f111 - f11m 
     1 + f1m1 - f1mm + 2.*fm01 - 2.*fm0m - fm11 + fm1m - fmm1 + fmmm )
      c112=0.125*(-2.*f110 + f111 + f11m + 2.*f1m0 
     1 - f1m1 - f1mm + 2.*fm10 - fm11 - fm1m - 2.*fmm0 + fmm1 + fmmm)
      c221=0.125*(4.*f001 - 4.*f00m - 2.*f011 + 2.*f01m 
     1 - 2.*f0m1 + 2.*f0mm - 2.*f101 + 2.*f10m + f111 
     1 - f11m + f1m1 - f1mm - 2.*fm01 + 2.*fm0m 
     1 + fm11 -fm1m + fmm1 - fmmm )
      c212=0.125*(4.*f010 - 2.*f011 - 2.*f01m - 4.*f0m0 
     1 + 2.*f0m1 + 2.*f0mm - 2.*f110  
     1 + f111 + f11m + 2.*f1m0 - f1m1 - f1mm - 2.*fm10 + fm11 + fm1m  
     1 + 2.*fmm0 - fmm1 - fmmm) 
      c122=0.125*(4.*f100 - 2.*f101 - 2.*f10m - 2.*f110 
     1 + f111 + f11m - 2.*f1m0  
     1 + f1m1 + f1mm - 4.*fm00 + 2.*fm01 + 2.*fm0m 
     1 + 2.*fm10 - fm11 - fm1m + 2.*fmm0 - fmm1 - fmmm)
      c222=0.125*(-8.*f000 + 4.*f001 + 4.*f00m + 4.*f010 
     1 - 2.*f011 - 2.*f01m + 4.*f0m0 - 2.*f0m1 - 2.*f0mm 
     1 + 4.*f100 - 2.*f101 - 2.*f10m - 2.*f110  
     1 + f111 + f11m - 2.*f1m0 + f1m1 + f1mm + 4.*fm00 
     1 - 2.*fm01 - 2.*fm0m - 2.*fm10 + fm11 
     1 + fm1m - 2.*fmm0 + fmm1 + fmmm) 

      uc1(i,j,k,m,io)=
     1 c000 + c100*alpha + c010*beta + c001*gamma +
     1 c200*alpha**2 + c020*beta**2 + c002*gamma**2 +
     1 c110*alpha*beta + c011*beta*gamma + c101*alpha*gamma +
     1 c210*alpha**2*beta + c201*alpha**2*gamma 
     1 + c120*alpha*beta**2 + c021*beta**2*gamma +
     1 c102*alpha*gamma**2 + c012*beta*gamma**2 + c111*alpha*beta*gamma 
     1 + c220*alpha**2*beta**2 + c202*alpha**2*gamma**2 
     1 + c022*beta**2*gamma**2 +
     1 c211*alpha**2*beta*gamma + c121*alpha*beta**2*gamma 
     1 + c112*alpha*beta*gamma**2 +
     1 c221*alpha**2*beta**2*gamma + c212*alpha**2*beta*gamma**2 
     1 + c122*alpha*beta**2*gamma**2 +
     1 c222*alpha**2*beta**2*gamma**2

      end do

      end if

  10  continue
      return
      end


      subroutine interpv2(uc1, ig, mx,my,mz, igrid, nxs,nxe,nys,nye,
     1 nzs,nze, ngp, drngp, numngp, uc2,mx2,my2,mz2, dr,ds,dt,mngp)
      dimension uc1(mx,my,mz,4), ig(mx,my,mz), ngp(mngp,3)
     1	, drngp(mngp,3)

      dimension uc2(mx2,my2,mz2,4)
c interpolating data among grids and set bndry conditions
c

c grid 1 from grid 2 (3d interpolation)
      intnum=0
      do 10 i=nxs, nxe
      do 10 j=nys, nye
      do 10 k=nzs, nze
      if(ig(i,j,k).eq.igrid) then
c	if(i.eq.14 .and.j.eq.24 .and. k.eq.57) then
c	temppp=1.
c	end if
      intnum=intnum+1
      ngpi=ngp(intnum,1)
      ngpj=ngp(intnum,2)
      ngpk=ngp(intnum,3)
      ddr=drngp(intnum,1)
      dds=drngp(intnum,2)
      ddt=drngp(intnum,3)
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt

c gamma should be zero!  -> interpolation on [r, s] surface

      do m=1,3
      fmm = uc2(ngpi-1, ngpj-1 , ngpk, m)  
      fm0 = uc2(ngpi-1, ngpj , ngpk, m) 
      fm1 = uc2(ngpi-1, ngpj+1 , ngpk, m)   
      f0m = uc2(ngpi, ngpj-1 , ngpk, m)   
      f00 = uc2(ngpi, ngpj , ngpk, m) 
      f01 = uc2(ngpi, ngpj+1 , ngpk, m)    
      f1m = uc2(ngpi+1, ngpj-1 , ngpk, m)   
      f10 = uc2(ngpi+1, ngpj , ngpk, m) 
      f11 = uc2(ngpi+1, ngpj+1 , ngpk, m)
       
      c00=f00
      c01=0.5*(f01-f0m)
      c02=0.5*(-2.*f00 + f01 + f0m)
      c10=0.5*(f10 - fm0 )
      c11=0.25*(f11 - f1m - fm1 + fmm)
      c12=0.25*(-2.*f10 + f11 + f1m + 2.*fm0 - fm1 - fmm)
      c20=0.5*(-2.*f00 + f10 + fm0)
      c21=0.25*(-2.*f01 + 2.*f0m + f11 - f1m + fm1 - fmm)
      c22=0.25*(4.*f00 - 2.*f01 - 2.*f0m - 2.*f10 
     1  + f11 + f1m - 2.*fm0 + fm1 + fmm)
     
      uc1(i,j,k,m)= c00 + c10*alpha + c01 *beta +
     1  c20*alpha**2 + c11*alpha *beta + c02 *beta**2 +
     1  c21*alpha**2 *beta + c12*alpha *beta**2 + c22*alpha**2 *beta**2


      end do

      end if

  10  continue
      return 
      end

      subroutine interpRHS2(uc1, ig, mx,my,mz, igrid, nxs,nxe,nys,nye,
     1 nzs,nze, ngp, drngp, numngp, uc2,mx2,my2,mz2, dr,ds,dt,mngp,io)
      parameter(mtm=4)
      dimension uc1(mx,my,mz,3,mtm), ig(mx,my,mz), ngp(mngp,3)
     1	, drngp(mngp,3)

      dimension uc2(mx2,my2,mz2,3,mtm)
c interpolating data among grids and set bndry conditions
c

c grid 1 from grid 2 (3d interpolation)
      intnum=0
      do 10 i=nxs, nxe
      do 10 j=nys, nye
      do 10 k=nzs, nze
      if(ig(i,j,k).eq.igrid) then
c	if(i.eq.14 .and.j.eq.24 .and. k.eq.57) then
c	temppp=1.
c	end if
      intnum=intnum+1
      ngpi=ngp(intnum,1)
      ngpj=ngp(intnum,2)
      ngpk=ngp(intnum,3)
      ddr=drngp(intnum,1)
      dds=drngp(intnum,2)
      ddt=drngp(intnum,3)
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt

c gamma should be zero!  -> interpolation on [r, s] surface

      do m=1,3
      fmm = uc2(ngpi-1, ngpj-1 , ngpk, m, io)  
      fm0 = uc2(ngpi-1, ngpj , ngpk, m, io) 
      fm1 = uc2(ngpi-1, ngpj+1 , ngpk, m, io)   
      f0m = uc2(ngpi, ngpj-1 , ngpk, m, io)   
      f00 = uc2(ngpi, ngpj , ngpk, m, io) 
      f01 = uc2(ngpi, ngpj+1 , ngpk, m, io)    
      f1m = uc2(ngpi+1, ngpj-1 , ngpk, m, io)   
      f10 = uc2(ngpi+1, ngpj , ngpk, m, io) 
      f11 = uc2(ngpi+1, ngpj+1 , ngpk, m, io)
       
      c00=f00
      c01=0.5*(f01-f0m)
      c02=0.5*(-2.*f00 + f01 + f0m)
      c10=0.5*(f10 - fm0 )
      c11=0.25*(f11 - f1m - fm1 + fmm)
      c12=0.25*(-2.*f10 + f11 + f1m + 2.*fm0 - fm1 - fmm)
      c20=0.5*(-2.*f00 + f10 + fm0)
      c21=0.25*(-2.*f01 + 2.*f0m + f11 - f1m + fm1 - fmm)
      c22=0.25*(4.*f00 - 2.*f01 - 2.*f0m - 2.*f10 
     1  + f11 + f1m - 2.*fm0 + fm1 + fmm)
     
      uc1(i,j,k,m,io)= c00 + c10*alpha + c01 *beta +
     1  c20*alpha**2 + c11*alpha *beta + c02 *beta**2 +
     1  c21*alpha**2 *beta + c12*alpha *beta**2 + c22*alpha**2 *beta**2


      end do

      end if

  10  continue
      return
      end











      subroutine interpv(uc1, ig, mx,my,mz, igrid, nxs,nxe,nys,nye,
     1 nzs,nze, ngp, drngp, numngp, uc2,mx2,my2,mz2, dr,ds,dt,mngp)
      dimension uc1(mx,my,mz,4), ig(mx,my,mz), ngp(mngp,3)
     1	, drngp(mngp,3)

      dimension uc2(mx2,my2,mz2,4)
c interpolating data among grids and set bndry conditions
c

c grid 1 from grid 2
      intnum=0
      do 10 i=nxs, nxe
      do 10 j=nys, nye
      do 10 k=nzs, nze
      if(ig(i,j,k).eq.igrid) then
c	if(i.eq.14 .and.j.eq.24 .and. k.eq.57) then
c	temppp=1.
c	end if
      intnum=intnum+1
      ngpi=ngp(intnum,1)
      ngpj=ngp(intnum,2)
      ngpk=ngp(intnum,3)
      ddr=drngp(intnum,1)
      dds=drngp(intnum,2)
      ddt=drngp(intnum,3)
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt

      if(ngpk.ne.nze) then

      do m=1,3
	uc111=uc2(ngpi,ngpj,ngpk,m)
      uc121=uc2(ngpi,ngpj+1,ngpk,m)
      uc211=uc2(ngpi+1,ngpj,ngpk,m)
      uc221=uc2(ngpi+1,ngpj+1,ngpk,m)
	uc112=uc2(ngpi,ngpj,ngpk+1,m)
      uc122=uc2(ngpi,ngpj+1,ngpk+1,m)
      uc212=uc2(ngpi+1,ngpj,ngpk+1,m)
      uc222=uc2(ngpi+1,ngpj+1,ngpk+1,m)


	uc1old=uc1(i,j,k,m)



      uc1(i,j,k,m)=((1.-alpha)*(1.-beta)*uc2(ngpi,ngpj,ngpk,m)
     1 +(1.-alpha)* beta *uc2(ngpi,ngpj+1,ngpk,m)
     1 + alpha*(1.-beta)*uc2(ngpi+1,ngpj,ngpk,m)
     1 + alpha*beta*uc2(ngpi+1,ngpj+1,ngpk,m))*(1.-gamma)
     1         +((1.-alpha)*(1.-beta)*uc2(ngpi,ngpj,ngpk+1,m)
     1 +(1.-alpha)* beta *uc2(ngpi,ngpj+1,ngpk+1,m)
     1 + alpha*(1.-beta)*uc2(ngpi+1,ngpj,ngpk+1,m)
     1 + alpha*beta*uc2(ngpi+1,ngpj+1,ngpk+1,m))*gamma

      end do

      else
c ngpk=nze      
      do m=1,3
	uc111=uc2(ngpi,ngpj,ngpk,m)
      uc121=uc2(ngpi,ngpj+1,ngpk,m)
      uc211=uc2(ngpi+1,ngpj,ngpk,m)
      uc221=uc2(ngpi+1,ngpj+1,ngpk,m)

	uc1old=uc1(i,j,k,m)

      uc1(i,j,k,m)=((1.-alpha)*(1.-beta)*uc2(ngpi,ngpj,ngpk,m)
     1 +(1.-alpha)* beta *uc2(ngpi,ngpj+1,ngpk,m)
     1 + alpha*(1.-beta)*uc2(ngpi+1,ngpj,ngpk,m)
     1 + alpha*beta*uc2(ngpi+1,ngpj+1,ngpk,m))

      end do
      end if
      
      end if

  10  continue
      return
      end


    






      subroutine setup(uc,ig,g,mx,my,mz,
     1 nxs,nxe,nys,nye,nzs,nze,igrid)
      parameter (mtm=4, mn=4)
      dimension uc(mx,my,mz,mn), g(mx,my,mz,3), ig(mx,my,mz)
      common /var1/ omega, omegap, omegapx,omegapz,ca,cb,anu, factor 

c      factor=0.8
c      ca=1.
c      cb=ca*factor
      c2=cb**2
      pi=3.14159265359
      
      aa0=1.e-3*0.01
c      aa0=1.
c      
      aa1=0.0150871 * aa0
      aa2=-0.0446851* aa0
      aa3=-0.166318* aa0
      aa4=0.073174* aa0
      aa5=0.177694* aa0
      aa6=0.0861582* aa0
      aa7=0.755539* aa0
      aa8=-0.0767787* aa0

      do i=nxs, nxe
      do j=nys, nye
      do k=nzs, nze
c      if(ig(i,j,k).eq.igrid) then
      x=g(i,j,k,1)     
      y=g(i,j,k,2)
      z=g(i,j,k,3)
      u1x =(y**2-z**2/c2)
      u1y = - x * y 
      u1z = x * z 
      u2x =(1.- x**2 - 2.* y**2  - 2. * z**2/c2)  
      u2y =x* y  
      u2z =x* z 
      u3x=x * y  
      u3y=+(z*z/c2 - x*x)
      u3z= - y* z 
      u4x=x* y  
      u4y=+(1. - 2. * x*x - y*y - 2.* z*z/c2)  
      u4z=+ y *z 
      u5x=-x* z /c2  
      u5y=+y* z/c2  
      u5z=+ (x*x-y*y) 
      u6x=x * z /c2  
      u6y=+y * z/c2  
      u6z=+ (1.- 2.* x*x- 2.* y*y- z*z/c2) 
      u7x=y * z /c2 
      u7y=0.
      u7z= - x * y 
      u8x=0.
      u8y= - x * z /c2  
      u8z=+x * y 


      uc(i,j,k,1)=aa1*u1x+aa2*u2x+ aa3*u3x+aa4*u4x+
     1 aa5*u5x+aa6*u6x+ aa7*u7x+aa8*u8x
      uc(i,j,k,2)=aa1*u1y+aa2*u2y+ aa3*u3y+aa4*u4y+
     1 aa5*u5y+aa6*u6y+ aa7*u7y+aa8*u8y
      uc(i,j,k,3)=aa1*u1z+aa2*u2z+ aa3*u3z+aa4*u4z+
     1 aa5*u5z+aa6*u6z+ aa7*u7z+aa8*u8z
      uc(i,j,k,4)=0.

c      end if
      end do
      end do
      end do
      return
      end

     
      subroutine energy(uc,ig,g,mx,my,mz,
     1 nxs,nxe,nys,nye,nzs,nze,igrid,ekeout)
      parameter (mtm=4, mn=4)
      dimension uc(mx,my,mz,mn), g(mx,my,mz,3), ig(mx,my,mz)

      pi=3.14159265359
      eke=0.
      do 100 i=nxs, nxe
      do 100 j=nys, nye
      do 100 k=nzs, nze
      if(ig(i,j,k).eq.igrid) then
      eke=eke+0.5*(uc(i,j,k,1)**2+uc(i,j,k,2)**2+uc(i,j,k,3)**2)
      end if
  100 continue
      ekeout=eke
c     write(9,*) 'eke=',eke
      return
      end

      subroutine energy5(uc5,nphi, nmuh, nnu, dmu, dnu, dphi, aa,
     1   eke1, eke2)
      dimension uc5(nphi+2,nmuh+2,nnu+2,3)
      
        eke1=0.
        eke2=0.
        do j = 2, nmuh+1 
        aamu= (j-1.5) *dmu    
        sinhmu=sinh(aamu)
        coshmu=cosh(aamu)
                             
        do i = 2, nphi+1
        pphi=(i-2) * dphi
        cosphi=cos(pphi)
        sinphi=sin(pphi)


        do k = 2, nnu/2+1
        aanu= (k-1.5)* dnu
        cosnu=cos(aanu)
        sinnu=sin(aanu)
        eke1=eke1+ 0.5*(uc5(i,j,k,1)**2
     1   + uc5(i,j,k,2)**2 + uc5(i,j,k,3)**2)
     2   *aa**3*coshmu*sinnu*(sinhmu**2+cosnu**2)
        end do

        do k = nnu/2+2, nnu+1
        aanu= (k-1.5)* dnu
        cosnu=cos(aanu)
        sinnu=sin(aanu)
        eke2=eke2+ 0.5*(uc5(i,j,k,1)**2
     1   + uc5(i,j,k,2)**2 + uc5(i,j,k,3)**2)
     2   *aa**3*coshmu*sinnu*(sinhmu**2+cosnu**2)
        end do
        end do
        end do
        eke1=eke1*dmu*dnu*dphi
        eke2=eke2*dmu*dnu*dphi

      return
      end



      subroutine interpv5(uc5, ig5, ngp5, drngp5, mx5, my5, mz5, 
     1  igrid, uc, mx,my,mz, dr, ds, dt)
      dimension uc5(mx5+2,my5+2,mz5+2,3), ig5(mx5+2,my5+2,mz5+2)
     1 ,ngp5(mx5+2,my5+2,mz5+2,3),drngp5(mx5+2,my5+2,mz5+2,3)
      dimension uc(mx,my,mz,4)

c input uc(mx,my,mz)
c output uc5(mx5,my5,mz5) interpolated from uc      
c
c find div on Grid 5 from Grid 1, 2, or 3
c 
      do 10 i=2, mx5+1
      do 10 j=2, my5+1
      do 10 k=2, mz5+1
      if(ig5(i,j,k).eq.igrid) then
      ngpi=ngp5(i,j,k,1)
      ngpj=ngp5(i,j,k,2)
      ngpk=ngp5(i,j,k,3)
      ddr=drngp5(i,j,k,1)
      dds=drngp5(i,j,k,2)
      ddt=drngp5(i,j,k,3)    
c      write(*,*) i,j, ngpi,ngpj,ngpk, ddr, dds,ddt
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt    
c
      do m=1,3
       fmm0 = uc(ngpi-1,ngpj-1, ngpk, m) 
       fm00 = uc(ngpi-1,ngpj, ngpk, m)  
       fm10 = uc(ngpi-1,ngpj+1, ngpk, m)  
       f0m0 = uc(ngpi,ngpj-1, ngpk, m)  
       f000 = uc(ngpi,ngpj, ngpk, m)   
       f010 = uc(ngpi,ngpj+1, ngpk, m)   
       f1m0 = uc(ngpi+1,ngpj-1, ngpk, m)  
       f100 = uc(ngpi+1,ngpj, ngpk, m)   
       f110 = uc(ngpi+1,ngpj+1, ngpk, m)
       fmm1 = uc(ngpi-1,ngpj-1,ngpk+1, m) 
       fm01 = uc(ngpi-1,ngpj,ngpk+1, m)  
       fm11 = uc(ngpi-1,ngpj+1,ngpk+1, m)  
       f0m1 = uc(ngpi,ngpj-1,ngpk+1, m)  
       f001 = uc(ngpi,ngpj,ngpk+1, m)   
       f011 = uc(ngpi,ngpj+1,ngpk+1, m)   
       f1m1 = uc(ngpi+1,ngpj-1,ngpk+1, m)  
       f101 = uc(ngpi+1,ngpj,ngpk+1, m)   
       f111 = uc(ngpi+1,ngpj+1,ngpk+1, m)
       fmmm = uc(ngpi-1,ngpj-1,ngpk-1, m) 
       fm0m = uc(ngpi-1,ngpj,ngpk-1, m)  
       fm1m = uc(ngpi-1,ngpj+1,ngpk-1, m)  
       f0mm = uc(ngpi,ngpj-1,ngpk-1, m)  
       f00m = uc(ngpi,ngpj,ngpk-1, m)   
       f01m = uc(ngpi,ngpj+1,ngpk-1, m)   
       f1mm = uc(ngpi+1,ngpj-1,ngpk-1, m)  
       f10m = uc(ngpi+1,ngpj,ngpk-1, m)   
       f11m = uc(ngpi+1,ngpj+1,ngpk-1, m)

      c000=f000
      c100=0.5*(f100-fm00)
      c010=0.5*(f010-f0m0)
      c001=0.5*(f001-f00m)
      c200=0.5*(-2.*f000+f100+fm00)
      c020=0.5*(-2.*f000 + f010 + f0m0)
      c002=0.5*(-2.* f000 + f001 + f00m)
      c110=0.25*(f110 - f1m0 - fm10 + fmm0)
      c011=0.25*(f011 - f01m - f0m1 + f0mm)
      c101=0.25*(f101 - f10m - fm01 + fm0m)
      c210=0.25*(-2.* f010 + 2.* f0m0 + f110 - f1m0 + fm10 - fmm0)
      c201=0.25*(-2.* f001 + 2.* f00m + f101 - f10m + fm01 - fm0m)
      c120=0.25*(-2.*f100 + f110 + f1m0 + 2.*fm00 - fm10 - fmm0)
      c021=0.25*(-2.*f001 + 2.*f00m + f011 - f01m + f0m1 - f0mm)
      c102=0.25*(-2.*f100 + f101 + f10m + 2.*fm00 - fm01 - fm0m)
      c012=0.25*(-2.*f010 + f011 + f01m + 2.*f0m0 - f0m1 - f0mm)
      c111=0.125*(f111 - f11m - f1m1 + f1mm - fm11 + fm1m + fmm1 - fmmm)
      c220=0.25*(4.*f000 - 2.*f010 - 2.*f0m0 - 2.*f100 + f110 
     1 + f1m0 - 2.*fm00 + fm10 + fmm0)
      c202=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f100 + f101 
     1 + f10m - 2.*fm00 + fm01 + fm0m)
      c022=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f010 + f011 
     1 + f01m - 2.*f0m0 + f0m1 + f0mm)
      c211=0.125*(-2.*f011 + 2.*f01m + 2.*f0m1 - 2.*f0mm 
     1 + f111 - f11m - f1m1 + f1mm + fm11 - fm1m - fmm1 + fmmm)
      c121=0.125*(-2.*f101 + 2.*f10m + f111 - f11m 
     1 + f1m1 - f1mm + 2.*fm01 - 2.*fm0m - fm11 + fm1m - fmm1 + fmmm )
      c112=0.125*(-2.*f110 + f111 + f11m + 2.*f1m0 
     1 - f1m1 - f1mm + 2.*fm10 - fm11 - fm1m - 2.*fmm0 + fmm1 + fmmm)
      c221=0.125*(4.*f001 - 4.*f00m - 2.*f011 + 2.*f01m 
     1 - 2.*f0m1 + 2.*f0mm - 2.*f101 + 2.*f10m + f111 
     1 - f11m + f1m1 - f1mm - 2.*fm01 + 2.*fm0m 
     1 + fm11 -fm1m + fmm1 - fmmm )
      c212=0.125*(4.*f010 - 2.*f011 - 2.*f01m - 4.*f0m0 
     1 + 2.*f0m1 + 2.*f0mm - 2.*f110  
     1 + f111 + f11m + 2.*f1m0 - f1m1 - f1mm - 2.*fm10 + fm11 + fm1m  
     1 + 2.*fmm0 - fmm1 - fmmm) 
      c122=0.125*(4.*f100 - 2.*f101 - 2.*f10m - 2.*f110 
     1 + f111 + f11m - 2.*f1m0  
     1 + f1m1 + f1mm - 4.*fm00 + 2.*fm01 + 2.*fm0m 
     1 + 2.*fm10 - fm11 - fm1m + 2.*fmm0 - fmm1 - fmmm)
      c222=0.125*(-8.*f000 + 4.*f001 + 4.*f00m + 4.*f010 
     1 - 2.*f011 - 2.*f01m + 4.*f0m0 - 2.*f0m1 - 2.*f0mm 
     1 + 4.*f100 - 2.*f101 - 2.*f10m - 2.*f110  
     1 + f111 + f11m - 2.*f1m0 + f1m1 + f1mm + 4.*fm00 
     1 - 2.*fm01 - 2.*fm0m - 2.*fm10 + fm11 
     1 + fm1m - 2.*fmm0 + fmm1 + fmmm) 

      uc5(i,j,k,m)=
     1 c000 + c100*alpha + c010*beta + c001*gamma +
     1 c200*alpha**2 + c020*beta**2 + c002*gamma**2 +
     1 c110*alpha*beta + c011*beta*gamma + c101*alpha*gamma +
     1 c210*alpha**2*beta + c201*alpha**2*gamma 
     1 + c120*alpha*beta**2 + c021*beta**2*gamma +
     1 c102*alpha*gamma**2 + c012*beta*gamma**2 + c111*alpha*beta*gamma 
     1 + c220*alpha**2*beta**2 + c202*alpha**2*gamma**2 
     1 + c022*beta**2*gamma**2 +
     1 c211*alpha**2*beta*gamma + c121*alpha*beta**2*gamma 
     1 + c112*alpha*beta*gamma**2 +
     1 c221*alpha**2*beta**2*gamma + c212*alpha**2*beta*gamma**2 
     1 + c122*alpha*beta**2*gamma**2 +
     1 c222*alpha**2*beta**2*gamma**2
      end do
 
      end if
  10  continue
      return
      end

      subroutine interpRHS5(uc5, ig5, ngp5, drngp5, mx5, my5, mz5, 
     1  igrid, uc, mx,my,mz, dr, ds, dt, io)
      parameter(mtm=4)
      dimension uc5(mx5+2,my5+2,mz5+2,3), ig5(mx5+2,my5+2,mz5+2)
     1 ,ngp5(mx5+2,my5+2,mz5+2,3),drngp5(mx5+2,my5+2,mz5+2,3)
      dimension uc(mx,my,mz,3,mtm)

c input uc(mx,my,mz)
c output uc5(mx5,my5,mz5) interpolated from uc      
c
c find div on Grid 5 from Grid 1, 2, or 3
c 


      do 10 i=2, mx5+1
      do 10 j=2, my5+1
      do 10 k=2, mz5+1
      if(ig5(i,j,k).eq.igrid) then
      ngpi=ngp5(i,j,k,1)
      ngpj=ngp5(i,j,k,2)
      ngpk=ngp5(i,j,k,3)
      ddr=drngp5(i,j,k,1)
      dds=drngp5(i,j,k,2)
      ddt=drngp5(i,j,k,3)    
c      write(*,*) i,j, ngpi,ngpj,ngpk, ddr, dds,ddt
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt    
c
      do m=1,3
       fmm0 = uc(ngpi-1,ngpj-1, ngpk, m, io) 
       fm00 = uc(ngpi-1,ngpj, ngpk, m, io)  
       fm10 = uc(ngpi-1,ngpj+1, ngpk, m, io)  
       f0m0 = uc(ngpi,ngpj-1, ngpk, m, io)  
       f000 = uc(ngpi,ngpj, ngpk, m, io)   
       f010 = uc(ngpi,ngpj+1, ngpk, m, io)   
       f1m0 = uc(ngpi+1,ngpj-1, ngpk, m, io)  
       f100 = uc(ngpi+1,ngpj, ngpk, m, io)   
       f110 = uc(ngpi+1,ngpj+1, ngpk, m, io)
       fmm1 = uc(ngpi-1,ngpj-1,ngpk+1, m, io) 
       fm01 = uc(ngpi-1,ngpj,ngpk+1, m, io)  
       fm11 = uc(ngpi-1,ngpj+1,ngpk+1, m, io)  
       f0m1 = uc(ngpi,ngpj-1,ngpk+1, m, io)  
       f001 = uc(ngpi,ngpj,ngpk+1, m, io)   
       f011 = uc(ngpi,ngpj+1,ngpk+1, m, io)   
       f1m1 = uc(ngpi+1,ngpj-1,ngpk+1, m, io)  
       f101 = uc(ngpi+1,ngpj,ngpk+1, m, io)   
       f111 = uc(ngpi+1,ngpj+1,ngpk+1, m, io)
       fmmm = uc(ngpi-1,ngpj-1,ngpk-1, m, io) 
       fm0m = uc(ngpi-1,ngpj,ngpk-1, m, io)  
       fm1m = uc(ngpi-1,ngpj+1,ngpk-1, m, io)  
       f0mm = uc(ngpi,ngpj-1,ngpk-1, m, io)  
       f00m = uc(ngpi,ngpj,ngpk-1, m, io)   
       f01m = uc(ngpi,ngpj+1,ngpk-1, m, io)   
       f1mm = uc(ngpi+1,ngpj-1,ngpk-1, m, io)  
       f10m = uc(ngpi+1,ngpj,ngpk-1, m, io)   
       f11m = uc(ngpi+1,ngpj+1,ngpk-1, m, io)

      c000=f000
      c100=0.5*(f100-fm00)
      c010=0.5*(f010-f0m0)
      c001=0.5*(f001-f00m)
      c200=0.5*(-2.*f000+f100+fm00)
      c020=0.5*(-2.*f000 + f010 + f0m0)
      c002=0.5*(-2.* f000 + f001 + f00m)
      c110=0.25*(f110 - f1m0 - fm10 + fmm0)
      c011=0.25*(f011 - f01m - f0m1 + f0mm)
      c101=0.25*(f101 - f10m - fm01 + fm0m)
      c210=0.25*(-2.* f010 + 2.* f0m0 + f110 - f1m0 + fm10 - fmm0)
      c201=0.25*(-2.* f001 + 2.* f00m + f101 - f10m + fm01 - fm0m)
      c120=0.25*(-2.*f100 + f110 + f1m0 + 2.*fm00 - fm10 - fmm0)
      c021=0.25*(-2.*f001 + 2.*f00m + f011 - f01m + f0m1 - f0mm)
      c102=0.25*(-2.*f100 + f101 + f10m + 2.*fm00 - fm01 - fm0m)
      c012=0.25*(-2.*f010 + f011 + f01m + 2.*f0m0 - f0m1 - f0mm)
      c111=0.125*(f111 - f11m - f1m1 + f1mm - fm11 + fm1m + fmm1 - fmmm)
      c220=0.25*(4.*f000 - 2.*f010 - 2.*f0m0 - 2.*f100 + f110 
     1 + f1m0 - 2.*fm00 + fm10 + fmm0)
      c202=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f100 + f101 
     1 + f10m - 2.*fm00 + fm01 + fm0m)
      c022=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f010 + f011 
     1 + f01m - 2.*f0m0 + f0m1 + f0mm)
      c211=0.125*(-2.*f011 + 2.*f01m + 2.*f0m1 - 2.*f0mm 
     1 + f111 - f11m - f1m1 + f1mm + fm11 - fm1m - fmm1 + fmmm)
      c121=0.125*(-2.*f101 + 2.*f10m + f111 - f11m 
     1 + f1m1 - f1mm + 2.*fm01 - 2.*fm0m - fm11 + fm1m - fmm1 + fmmm )
      c112=0.125*(-2.*f110 + f111 + f11m + 2.*f1m0 
     1 - f1m1 - f1mm + 2.*fm10 - fm11 - fm1m - 2.*fmm0 + fmm1 + fmmm)
      c221=0.125*(4.*f001 - 4.*f00m - 2.*f011 + 2.*f01m 
     1 - 2.*f0m1 + 2.*f0mm - 2.*f101 + 2.*f10m + f111 
     1 - f11m + f1m1 - f1mm - 2.*fm01 + 2.*fm0m 
     1 + fm11 -fm1m + fmm1 - fmmm )
      c212=0.125*(4.*f010 - 2.*f011 - 2.*f01m - 4.*f0m0 
     1 + 2.*f0m1 + 2.*f0mm - 2.*f110  
     1 + f111 + f11m + 2.*f1m0 - f1m1 - f1mm - 2.*fm10 + fm11 + fm1m  
     1 + 2.*fmm0 - fmm1 - fmmm) 
      c122=0.125*(4.*f100 - 2.*f101 - 2.*f10m - 2.*f110 
     1 + f111 + f11m - 2.*f1m0  
     1 + f1m1 + f1mm - 4.*fm00 + 2.*fm01 + 2.*fm0m 
     1 + 2.*fm10 - fm11 - fm1m + 2.*fmm0 - fmm1 - fmmm)
      c222=0.125*(-8.*f000 + 4.*f001 + 4.*f00m + 4.*f010 
     1 - 2.*f011 - 2.*f01m + 4.*f0m0 - 2.*f0m1 - 2.*f0mm 
     1 + 4.*f100 - 2.*f101 - 2.*f10m - 2.*f110  
     1 + f111 + f11m - 2.*f1m0 + f1m1 + f1mm + 4.*fm00 
     1 - 2.*fm01 - 2.*fm0m - 2.*fm10 + fm11 
     1 + fm1m - 2.*fmm0 + fmm1 + fmmm) 

      uc5(i,j,k,m)=
     1 c000 + c100*alpha + c010*beta + c001*gamma +
     1 c200*alpha**2 + c020*beta**2 + c002*gamma**2 +
     1 c110*alpha*beta + c011*beta*gamma + c101*alpha*gamma +
     1 c210*alpha**2*beta + c201*alpha**2*gamma 
     1 + c120*alpha*beta**2 + c021*beta**2*gamma +
     1 c102*alpha*gamma**2 + c012*beta*gamma**2 + c111*alpha*beta*gamma 
     1 + c220*alpha**2*beta**2 + c202*alpha**2*gamma**2 
     1 + c022*beta**2*gamma**2 +
     1 c211*alpha**2*beta*gamma + c121*alpha*beta**2*gamma 
     1 + c112*alpha*beta*gamma**2 +
     1 c221*alpha**2*beta**2*gamma + c212*alpha**2*beta*gamma**2 
     1 + c122*alpha*beta**2*gamma**2 +
     1 c222*alpha**2*beta**2*gamma**2
      end do
 
      end if
  10  continue
      return
      end





      subroutine interpRHS5b(uc5b,ig5b, ngp5b, drngp5b,
     1  mx5b,my5b, igrid, uc2, mx2,my2,mz2, dr,ds,dt, xi, aa, io)
c
      parameter(mtm=4) 
c mx5b=nphi, my5b=nnu 
      dimension uc5b(mx5b+2,my5b+2,2), 
     1  ig5b(mx5b+2,my5b+2),ngp5b(mx5b+2,my5b+2,3)
     1	, drngp5b(mx5b+2,my5b+2,3)
      dimension uc2(mx2,my2,mz2,3,mtm)
c
ccc      
      do 10 i=2, mx5b+1
      do 10 j=2, my5b+1

      if(ig5b(i,j).eq.igrid) then
      ngpi=ngp5b(i,j,1)
      ngpj=ngp5b(i,j,2)
      ngpk=ngp5b(i,j,3)
      ddr=drngp5b(i,j,1)
      dds=drngp5b(i,j,2)
      ddt=drngp5b(i,j,3)
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt    
c
      do m=1,3
      fmm = uc2(ngpi-1, ngpj-1 , ngpk, m, io)  
      fm0 = uc2(ngpi-1, ngpj , ngpk, m, io) 
      fm1 = uc2(ngpi-1, ngpj+1 ,ngpk, m, io)   
      f0m = uc2(ngpi, ngpj-1 ,ngpk, m, io)   
      f00 = uc2(ngpi, ngpj ,ngpk, m, io) 
      f01 = uc2(ngpi, ngpj+1 ,ngpk, m, io)    
      f1m = uc2(ngpi+1, ngpj-1 ,ngpk, m, io)   
      f10 = uc2(ngpi+1, ngpj ,ngpk, m, io) 
      f11 = uc2(ngpi+1, ngpj+1 ,ngpk, m, io)
       
      c00=f00
      c01=0.5*(f01-f0m)
      c02=0.5*(-2.*f00 + f01 + f0m)
      c10=0.5*(f10 - fm0 )
      c11=0.25*(f11 - f1m - fm1 + fmm)
      c12=0.25*(-2.*f10 + f11 + f1m + 2.*fm0 - fm1 - fmm)
      c20=0.5*(-2.*f00 + f10 + fm0)
      c21=0.25*(-2.*f01 + 2.*f0m + f11 - f1m + fm1 - fmm)
      c22=0.25*(4.*f00 - 2.*f01 - 2.*f0m - 2.*f10 
     1  + f11 + f1m - 2.*fm0 + fm1 + fmm)
     
      uc5b(i,j,m)= c00 + c10*alpha + c01 *beta +
     1  c20*alpha**2 + c11*alpha *beta + c02 *beta**2 +
     1  c21*alpha**2 *beta + c12*alpha *beta**2 + c22*alpha**2 *beta**2

      end do

      end if
  10  continue
      return
      end



      subroutine divu5(uc5ex,divu,nphi,nmuh, nnu, dphi,dmu,dnu,aa)
      dimension divu(nphi, nmuh, nnu), uc5ex(nphi+2,nmuh+2, nnu+2,3)
      
      icount=0
c      write(*,*) 'in divu5'
        do j = 1, nmuh 
        aamu= (j-.5) *dmu    
        sinhmu=sinh(aamu)
        coshmu=cosh(aamu)
                             
        do i = 1, nphi
        pphi=(i-1) * dphi
        cosphi=cos(pphi)
        sinphi=sin(pphi)


        do k = 1, nnu
        aanu= (k-.5)* dnu
        cosnu=cos(aanu)
        sinnu=sin(aanu)


c      g=1./sqrt(cosh(aamu)**2-sin(aanu)**2)
      g2=1./(cosh(aamu)**2-sin(aanu)**2)

c here tmp=dVx/dx
      dvxdmu=(uc5ex(i+1,j+2,k+1,1)-uc5ex(i+1,j,k+1,1))/(2.*dmu)
      dvxdnu=(uc5ex(i+1,j+1,k+2,1)-uc5ex(i+1,j+1,k,1))/(2.*dnu)
      dvxdph=(uc5ex(i+2,j+1,k+1,1)-uc5ex(i,j+1,k+1,1))/(2.*dphi)
      dvxdnu1=dvxdnu
      if(k.eq.1) then
      dvxdnu= 
     1  (4.*uc5ex(i+1,j+1,k+2,1)-uc5ex(i+1,j+1,k+3,1)
     1   -3.*uc5ex(i+1,j+1,k+1,1))  /(2.*dnu)

      end if
      if(k.eq.nnu) then
      dvxdnu= 
     1  (4.*uc5ex(i+1,j+1,k,1)-uc5ex(i+1,j+1,k-1,1)
     1  -3.*uc5ex(i+1,j+1,k+1,1)) /(-2.*dnu)

      end if


      vx1=1./aa*(g2*sinhmu*sinnu*cosphi* dvxdmu
     2 +g2*coshmu*cosnu*cosphi* dvxdnu )
     3 -1./aa*sinphi/(coshmu*sinnu)* dvxdph
  


c here tmp=dVy/dy        
      vy1=1./aa*(g2*sinhmu*sinnu*sinphi*
     1 (uc5ex(i+1,j+2,k+1,2)-uc5ex(i+1,j,k+1,2))/(2.*dmu)
     2 +g2*coshmu*cosnu*sinphi*
     1 (uc5ex(i+1,j+1,k+2,2)-uc5ex(i+1,j+1,k,2))/(2.*dnu) )
     3 +1./aa*cosphi/(coshmu*sinnu)*
     3  (uc5ex(i+2,j+1,k+1,2)-uc5ex(i,j+1,k+1,2))/(2.*dphi) 


c here tmp=dVz/dz        
      vz1=1./aa*(g2*coshmu*cosnu*
     1 (uc5ex(i+1,j+2,k+1,3)-uc5ex(i+1,j,k+1,3))/(2.*dmu)
     2 -g2*sinhmu*sinnu*
     1 (uc5ex(i+1,j+1,k+2,3)-uc5ex(i+1,j+1,k,3))/(2.*dnu) )


      x=aa*cosh(aamu)* sin(aanu)* cos(pphi)
      y=aa*cosh(aamu)* sin(aanu)* sin(pphi)
      z=aa*sinh(aamu)* cos(aanu)
      
c      tmp=exp(x+y+z)*0.
      
c      icount=icount+1
c      if(i.eq.49 .and. j.eq. 54 .and. k.ge.60 .and. k.le.64) then
      
c      write(*,'(3i4,6f10.5)') i,j,k, x,y,z, aamu, aanu, pphi
c      write(*,'(5e17.7)') vx1, vy1, vz1, tmp, g2
c      write(*,'(4e17.7)') cosh(aamu), sinh(aamu), cos(pphi), sin(pphi)
c      write(*,'(2e17.7)') uc5ex(i+1,j+2,k+1,1),uc5ex(i+1,j,k+1,1)
c      write(*,'(2e17.7)') uc5ex(i+1,j+1,k+2,1),uc5ex(i+1,j+1,k,1)
c      write(*,'(2e17.7)') uc5ex(i+2,j+1,k+1,1),uc5ex(i,j+1,k+1,1) 
c      write(*,'(2e17.7)') dvxdnu1, dvxdnu


c      end if

c     if(i.eq.49 .and. j.eq. 54 .and. k.ge.1 .and. k.le.5) then
      
c     write(*,'(3i4,6f10.5)') i,j,k, x,y,z, aamu, aanu, pphi
c     write(*,*) vx1, vy1, vz1, tmp, g2
c     end if

      divu(i,j,k)=vx1+vy1+vz1
      
      end do
      end do
      end do
      return
      end

      subroutine bndryuc5b(uc5b, nphi, nmuh, nnu) 
      dimension  uc5b(nphi+2, nnu+2, 3)


c      
      do k=2, nnu+1
      uc5b(1,k,1)=uc5b(nphi+1,k,1)      
      uc5b(1,k,2)=uc5b(nphi+1,k,2)      
      uc5b(1,k,3)=uc5b(nphi+1,k,3)      
      uc5b(nphi+2,k,1)=uc5b(2,k,1)      
      uc5b(nphi+2,k,2)=uc5b(2,k,2)      
      uc5b(nphi+2,k,3)=uc5b(2,k,3)      
      end do


      do i= 1, nphi+2
      ii=i+nphi/2
      if(ii.gt. nphi+2) ii=ii-nphi     
      do m= 1, 3
      uc5b(i,1,m)= uc5b(ii,2,m)
      uc5b(i,nnu+2,m)=uc5b(ii,nnu+1,m)
      end do      
      end do

      
      return
      end




      subroutine getuc5(uc5, uc5b, nphi, nmuh, nnu) 
      dimension uc5(nphi+2, nmuh+2, nnu+2, 3), uc5b(nphi+2, nnu+2, 3)

      dimension data2(nphi,nmuh+2)
      
      real  w(0:nphi/2-1), factorn
      integer ipp(0:2+nphi), kk(nphi)
c
c check


      ipp(0)=0
      factorn=2./float(nphi)
      
      n1=nphi
      do i=1,n1/2
      kk(2*i)=i-1
      kk(2*i-1)=i-1
      end do
      kk(2)=n1/2

      
      
      j=nmuh+2
      do i=2, nphi+1
      do k=2, nnu+1
      uc5(i,j,k,1)=uc5b(i,k,1)            
      uc5(i,j,k,2)=uc5b(i,k,2)            
      uc5(i,j,k,3)=uc5b(i,k,3)            
      end do
      end do
      
c      icount=0
      j=1
      do i=2, nphi+1
      do k=2, nnu+1
      uc5(i,j,k,1)=uc5(i,2,nnu+3-k,1)            
      uc5(i,j,k,2)=uc5(i,2,nnu+3-k,2)            
      uc5(i,j,k,3)=uc5(i,2,nnu+3-k,3)
c      icount=icount+1
c      if(icount.le.30) then
c      write(*,*) 'in getuc', i,j,k
c      write(*,*) (uc5(i,j,k,ii),ii=1,3)
c      end if          
      end do
      end do
c      stop
c      
      do j=1, nmuh+2
      do k=2, nnu+1
      uc5(1,j,k,1)=uc5(nphi+1,j,k,1)      
      uc5(1,j,k,2)=uc5(nphi+1,j,k,2)      
      uc5(1,j,k,3)=uc5(nphi+1,j,k,3)      
      uc5(nphi+2,j,k,1)=uc5(2,j,k,1)      
      uc5(nphi+2,j,k,2)=uc5(2,j,k,2)      
      uc5(nphi+2,j,k,3)=uc5(2,j,k,3)      
      end do
      end do




c      k=1
c      do m=1,3
c      do j = 1, nmuh+2
c      do i = 1, nphi
c      data2(i,j)=uc5(i+1,j,k+1,m)
c      end do
c      end do
      
c      do j = 1, nmuh+2     
c      call rdft(n1,1,data2(1,j),ipp,w)
c      end do

c      do i=1, nphi
c      ii=kk(i)
c      factor=1.
c      if(mod(ii,2).ne.0) factor=-1.
c      do j = 1, nmuh+2
c      data2(i,j)=factor *data2(i,j)
c      end do
c      end do

      
c      do j = 1, nmuh+2
c      call rdft(n1,-1,data2(1,j),ipp,w)
c      end do

c      do i = 1, nphi
c      do j = 1, nmuh+2
c      uc5(i+1,j,1,m)=data2(i,j)*factorn
c      end do
c      end do
c      end do

c      k=nnu
c      do m=1,3
c      do j = 1, nmuh+2
c      do i = 1, nphi
c      data2(i,j)=uc5(i+1,j,k+1,m)
c      end do
c      end do
      
c      do j = 1, nmuh+2     
c      call rdft(n1,1,data2(1,j),ipp,w)
c      end do

c      do i=1, nphi
c      ii=kk(i)
c      factor=1.
c      if(mod(ii,2).ne.0) factor=-1.
c      do j = 1, nmuh+2
c      data2(i,j)=factor *data2(i,j)
c      end do
c      end do

      
c      do j = 1, nmuh+2
c      call rdft(n1,-1,data2(1,j),ipp,w)
c      end do

c      do i = 1, nphi
c      do j = 1, nmuh+2
c      uc5(i+1,j,nnu+2,m)=data2(i,j)*factorn
c      end do
c      end do
c      end do
      
c the above should be the same as the following  
c find values at nu=-dnu/2 and nu=pi+dnu/2
c using the fact f(-nu, phi)= f(nu, phi+pi)
c                f(pi+nu,phi)=f(pi-nu, phi+pi)
c check this out
      do m= 1, 3
      do j= 1, nmuh+2
      do i= 1, nphi+2
      ii=i+nphi/2
      if(ii.gt. nphi+2) ii=ii-nphi       
c      uc5(i,j,1,m)= uc5(i+nphi/2,j,2,m)
c      uc5(i,j,nnu+2,m)=uc5(i+nphi/2,j,nnu+1,m)
      uc5(i,j,1,m)= uc5(ii,j,2,m)
      uc5(i,j,nnu+2,m)=uc5(ii,j,nnu+1,m)
      end do      
      end do
      end do    



c
      
      
      
c linear extrapolation to j=nmuh+2
c note before the routine uc5ex(:,nmuh+2,:) is at mu=mu0
c from uc5b
      do m=1,3
      do i = 1, nphi+2
      do k = 1, nnu+2
      uc5(i,nmuh+2,k,m)=2.* uc5(i,nmuh+2,k,m) -uc5(i,nmuh+1,k,m)
      end do
      end do
      end do
      
c      i=1
c      j=1
c      do k=1,10
c      write(*,'(3i4,3e16.6)') i,j,k, (uc5(i,j,k,ii),ii=1,3)
c      end do
      
      return
      end
      
      subroutine doublediv(half, full, nphi, nmu, nnu)
      real half(nphi, nmu/2, nnu), full(nphi, nmu, nnu)
c full system  obtained from half system
c mu>0
        do i = 1, nphi
        do j = nmu/2+1, nmu
        do k = 1, nnu
        full(i,j,k)= half(i,j-nmu/2,k)
        end do
        end do
        end do	  
c mu<0 by symmetry
        do i = 1, nphi
        do j = 1, nmu/2
        do k = 1, nnu
        full(i,j,k)= full(i,nmu+1-j,nnu+1-k) 
        end do
        end do
        end do	  
        return
        end
        
      subroutine reducep(halfex, full, bndrynu, 
     1   bndrymu, bndrymunu, nphi, nmu, nnu)
      real halfex(nphi+2, nmu/2+2, nnu+2), full(nphi, nmu, nnu)
      real bndrynu(nphi, nmu, 2)
      real bndrymu(nphi, nnu)
      real bndrymunu(nphi,2)

        do i = 1, nphi
        do j = nmu/2, nmu
        do k = 1, nnu
        halfex(i+1,j+1-nmu/2,k+1)= full(i,j,k)
        end do
        end do
        end do	  

        
c for nu=-dnu/2 and nu=pi+dnu/2
        
        do i = 1, nphi
        do j = nmu/2, nmu
        halfex(i+1,j+1-nmu/2,1)= bndrynu(i,j,1)
        halfex(i+1,j+1-nmu/2,nnu+2)= bndrynu(i,j,2)
        end do
        end do

       
c for mu=nmu/2+2 in halfex
        j=nmu+1
        do i = 1, nphi
        do k = 1, nnu
        halfex(i+1,j+1-nmu/2,k+1)= bndrymu(i,k)
        end do
        end do

c for mu=nmu/2+2 and nu=-dnu/2, dnu/2+pi
        j=nmu+1
        do i=1, nphi
        halfex(i+1,j+1-nmu/2,1)= bndrymunu(i,1)
        halfex(i+1,j+1-nmu/2,nnu+2)= bndrymunu(i,2)
        end do

c for phi=-dphi and phi=2*pi
        do j = 1, nmu/2+2
        do k = 1, nnu+2
        halfex(1,j,k)=halfex(nphi+1,j,k)       
        halfex(nphi+2,j,k)=halfex(2,j,k)
        end do
        end do       
        
        return
        end
        
      subroutine Vn5nmu0(uc5bndry, nphi, nnu)
      real uc5bndry(nphi, nnu, 2)
      
        do i = 1, nphi
        do k = 1, nnu
              uc5bndry(i,k,2)= -uc5bndry(i,nnu+1-k,1)
        end do
        end do

        return
        end

      subroutine Vn5mu0(uc5b, uc5bndry, nphi, nnu, aphi, aaanu, amu0,aa)
      dimension uc5b(nphi+2, nnu+2,3), uc5bndry(nphi, nnu, 2)
      dimension aphi(nphi), aaanu(nnu)
      
    	sinhxi=sinh(amu0)
	coshxi=cosh(amu0)
      
      do i=1, nphi
      pphi=aphi(i)
      cosphi=cos(pphi)
      sinphi=sin(pphi)
      do j=1, nnu
      aanu=aaanu(j)
    	costh=cos(aanu)
      sinth=sin(aanu)
      

c
c      x=aa*cosh(amu0)* sin(aanu)* cos(pphi)
c      y=aa*cosh(amu0)* sin(aanu)* sin(pphi)
c      z=aa*sinh(amu0)* cos(aanu)
c      uc5b(i+1,j+1,1)=exp(x+y+z)
c      uc5b(i+1,j+1,2)=exp(x+y+z)
c      uc5b(i+1,j+1,3)=exp(x+y+z)
c
      uc5bndry(i,j,1)=aa*( 
     1 (uc5b(i+1,j+1,1)* cosphi + uc5b(i+1,j+1,2)* sinphi)
     1          * sinhxi* sinth
     1 + uc5b(i+1,j+1,3)* coshxi* costh )
      end do
      end do
      return
      end

      
      subroutine interpPf5(uc1, ig, mx,my,mz, igrid, nxs,nxe,nys,nye,
     1 nzs,nze, ngp, drngp, uc2,mx2,my2,mz2, dr,ds,dt)
      dimension uc1(mx,my,mz,4), ig(mx,my,mz), ngp(mx,my,mz,3)
     1	, drngp(mx,my,mz,3)
      dimension uc2(mx2,my2,mz2)
c interpolating data among grids and set bndry conditions
c
c grid 1 from grid 2 
     
      do 10 i=nxs, nxe
      do 10 j=nys, nye
      do 10 k=nzs, nze
c      if(ig(i,j,k).eq.igrid) then
      if(ig(i,j,k).ne.0) then
      
      ngpi=ngp(i,j,k,1)
      ngpj=ngp(i,j,k,2)
      ngpk=ngp(i,j,k,3)
      ddr=drngp(i,j,k,1)
      dds=drngp(i,j,k,2)
      ddt=drngp(i,j,k,3)
      alpha=ddr/dr
      beta=dds/ds
	gamma=ddt/dt
	
	
       fmm0 = uc2(ngpi-1,ngpj-1, ngpk) 
       fm00 = uc2(ngpi-1,ngpj, ngpk)  
       fm10 = uc2(ngpi-1,ngpj+1, ngpk)  
       f0m0 = uc2(ngpi,ngpj-1, ngpk)  
       f000 = uc2(ngpi,ngpj, ngpk)   
       f010 = uc2(ngpi,ngpj+1, ngpk)   
       f1m0 = uc2(ngpi+1,ngpj-1, ngpk)  
       f100 = uc2(ngpi+1,ngpj, ngpk)   
       f110 = uc2(ngpi+1,ngpj+1, ngpk)
       fmm1 = uc2(ngpi-1,ngpj-1,ngpk+1) 
       fm01 = uc2(ngpi-1,ngpj,ngpk+1)  
       fm11 = uc2(ngpi-1,ngpj+1,ngpk+1)  
       f0m1 = uc2(ngpi,ngpj-1,ngpk+1)  
       f001 = uc2(ngpi,ngpj,ngpk+1)   
       f011 = uc2(ngpi,ngpj+1,ngpk+1)   
       f1m1 = uc2(ngpi+1,ngpj-1,ngpk+1)  
       f101 = uc2(ngpi+1,ngpj,ngpk+1)   
       f111 = uc2(ngpi+1,ngpj+1,ngpk+1)
       fmmm = uc2(ngpi-1,ngpj-1,ngpk-1) 
       fm0m = uc2(ngpi-1,ngpj,ngpk-1)  
       fm1m = uc2(ngpi-1,ngpj+1,ngpk-1)  
       f0mm = uc2(ngpi,ngpj-1,ngpk-1)  
       f00m = uc2(ngpi,ngpj,ngpk-1)   
       f01m = uc2(ngpi,ngpj+1,ngpk-1)   
       f1mm = uc2(ngpi+1,ngpj-1,ngpk-1)  
       f10m = uc2(ngpi+1,ngpj,ngpk-1)   
       f11m = uc2(ngpi+1,ngpj+1,ngpk-1)

      c000=f000
      c100=0.5*(f100-fm00)
      c010=0.5*(f010-f0m0)
      c001=0.5*(f001-f00m)
      c200=0.5*(-2.*f000+f100+fm00)
      c020=0.5*(-2.*f000 + f010 + f0m0)
      c002=0.5*(-2.* f000 + f001 + f00m)
      c110=0.25*(f110 - f1m0 - fm10 + fmm0)
      c011=0.25*(f011 - f01m - f0m1 + f0mm)
      c101=0.25*(f101 - f10m - fm01 + fm0m)
      c210=0.25*(-2.* f010 + 2.* f0m0 + f110 - f1m0 + fm10 - fmm0)
      c201=0.25*(-2.* f001 + 2.* f00m + f101 - f10m + fm01 - fm0m)
      c120=0.25*(-2.*f100 + f110 + f1m0 + 2.*fm00 - fm10 - fmm0)
      c021=0.25*(-2.*f001 + 2.*f00m + f011 - f01m + f0m1 - f0mm)
      c102=0.25*(-2.*f100 + f101 + f10m + 2.*fm00 - fm01 - fm0m)
      c012=0.25*(-2.*f010 + f011 + f01m + 2.*f0m0 - f0m1 - f0mm)
      c111=0.125*(f111 - f11m - f1m1 + f1mm - fm11 + fm1m + fmm1 - fmmm)
      c220=0.25*(4.*f000 - 2.*f010 - 2.*f0m0 - 2.*f100 + f110 
     1 + f1m0 - 2.*fm00 + fm10 + fmm0)
      c202=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f100 + f101 
     1 + f10m - 2.*fm00 + fm01 + fm0m)
      c022=0.25*(4.*f000 - 2.*f001 - 2.*f00m - 2.*f010 + f011 
     1 + f01m - 2.*f0m0 + f0m1 + f0mm)
      c211=0.125*(-2.*f011 + 2.*f01m + 2.*f0m1 - 2.*f0mm 
     1 + f111 - f11m - f1m1 + f1mm + fm11 - fm1m - fmm1 + fmmm)
      c121=0.125*(-2.*f101 + 2.*f10m + f111 - f11m 
     1 + f1m1 - f1mm + 2.*fm01 - 2.*fm0m - fm11 + fm1m - fmm1 + fmmm )
      c112=0.125*(-2.*f110 + f111 + f11m + 2.*f1m0 
     1 - f1m1 - f1mm + 2.*fm10 - fm11 - fm1m - 2.*fmm0 + fmm1 + fmmm)
      c221=0.125*(4.*f001 - 4.*f00m - 2.*f011 + 2.*f01m 
     1 - 2.*f0m1 + 2.*f0mm - 2.*f101 + 2.*f10m + f111 
     1 - f11m + f1m1 - f1mm - 2.*fm01 + 2.*fm0m 
     1 + fm11 -fm1m + fmm1 - fmmm )
      c212=0.125*(4.*f010 - 2.*f011 - 2.*f01m - 4.*f0m0 
     1 + 2.*f0m1 + 2.*f0mm - 2.*f110  
     1 + f111 + f11m + 2.*f1m0 - f1m1 - f1mm - 2.*fm10 + fm11 + fm1m  
     1 + 2.*fmm0 - fmm1 - fmmm) 
      c122=0.125*(4.*f100 - 2.*f101 - 2.*f10m - 2.*f110 
     1 + f111 + f11m - 2.*f1m0  
     1 + f1m1 + f1mm - 4.*fm00 + 2.*fm01 + 2.*fm0m 
     1 + 2.*fm10 - fm11 - fm1m + 2.*fmm0 - fmm1 - fmmm)
      c222=0.125*(-8.*f000 + 4.*f001 + 4.*f00m + 4.*f010 
     1 - 2.*f011 - 2.*f01m + 4.*f0m0 - 2.*f0m1 - 2.*f0mm 
     1 + 4.*f100 - 2.*f101 - 2.*f10m - 2.*f110  
     1 + f111 + f11m - 2.*f1m0 + f1m1 + f1mm + 4.*fm00 
     1 - 2.*fm01 - 2.*fm0m - 2.*fm10 + fm11 
     1 + fm1m - 2.*fmm0 + fmm1 + fmmm) 

      uc1(i,j,k,4)=
     1 c000 + c100*alpha + c010*beta + c001*gamma +
     1 c200*alpha**2 + c020*beta**2 + c002*gamma**2 +
     1 c110*alpha*beta + c011*beta*gamma + c101*alpha*gamma +
     1 c210*alpha**2*beta + c201*alpha**2*gamma 
     1 + c120*alpha*beta**2 + c021*beta**2*gamma +
     1 c102*alpha*gamma**2 + c012*beta*gamma**2 + c111*alpha*beta*gamma 
     1 + c220*alpha**2*beta**2 + c202*alpha**2*gamma**2 
     1 + c022*beta**2*gamma**2 +
     1 c211*alpha**2*beta*gamma + c121*alpha*beta**2*gamma 
     1 + c112*alpha*beta*gamma**2 +
     1 c221*alpha**2*beta**2*gamma + c212*alpha**2*beta*gamma**2 
     1 + c122*alpha*beta**2*gamma**2 +
     1 c222*alpha**2*beta**2*gamma**2

      

 8811 format(3f10.6)
 8812 format(4f10.6)
 

      end if
  10  continue
      return
      end






      subroutine PoisSpheroid(data3, data3bndry, bndrynu, bndrymu,
     1 bndrynumu, phi, amu, anu, n1, n2, n3, aa, dphi, dmu, dnu)
      implicit none
c
      integer n1, n2, n3
      real data3(n1,n2,n3), data3bndry(n1,n3,2)
      real bndrynu(n1,n2,2), bndrymu(n1,n3)
      real bndrynumu(n1,2)

      real  w(0:n1/2-1), factorn
      integer ipp(0:2+n1), kk(n1)
          
c for Blktri.f
      integer iflag, ierror
      real am(n2), bm(n2), cm(n2), bbm(n2)      
      real an(n3), bn(n3), cn(n3), bbn(n3), yy(n2,n3), yyave
c work space (here M=n2, N=n3)
c C                          A ONE-DIMENSIONAL ARRAY THAT MUST BE         
C                          PROVIDED BY THE USER FOR WORK SPACE.         
C                          IF NP=1 DEFINE K=INT(LOG2(N))+1 AND          
C                          SET L=2**(K+1) THEN W MUST HAVE DIMENSION    
C                          (K-2)*L+K+5+MAX(2N,6M)
      real wsave(30000)
c            
c
      real phi(n1), amu(n2), anu(n3), pphi, aamu, aanu, pi
      real aasin(n3), aacos(n3), aasinh(n2), aacosh(n2)
      real philength, amulength, anulength, dphi, dmu, dnu, dmu2, dnu2
      real ssin, ccos, ccosh, ssinh, aa
      integer nphi, nmu, nnu, i, j, k, ii, imax
      
      imax=n1/2/3
      
      ipp(0)=0
      factorn=2./float(n1)
      
      nphi = n1
	nmu = n2
      nnu = n3
       
c      dphi=phi(n1)-phi(n1-1)
c      dmu=amu(n2)-amu(n2-1)
c      dnu=anu(n3)-anu(n3-1)
  
      
      do j = 1, nmu
      aamu=amu(j)
      aasinh(j)=sinh(aamu)
      aacosh(j)=cosh(aamu)
      end do

      do k = 1, nnu
      aanu=anu(k)
      aasin(k)=sin(aanu)
      aacos(k)=cos(aanu)
      end do


c
c
c kk
      do i=1,n1/2
      kk(2*i)=i-1
      kk(2*i-1)=i-1
      end do
      kk(2)=n1/2

cc set AM, BM, CM, AN, BN, CN

      do j = 1, nmu
c      aamu=amu(j)
      ssinh=aasinh(j)
      ccosh=aacosh(j)
      am(j)=(1./dmu-0.5*ssinh/ccosh)/dmu
      bbm(j)= -2./dmu**2
      cm(j)=(1./dmu+0.5*ssinh/ccosh)/dmu
      end do

c conservative      
c      do j = 1, nmu
c      aamu=amu(j)
c      ssinh=aasinh(j)
c      ccosh=aacosh(j)
c      am(j)=cosh(aamu-0.5*dmu)/(ccosh*dmu**2)
c      bbm(j)= -(cosh(aamu-0.5*dmu)+cosh(aamu+0.5*dmu))/(ccosh*dmu**2)
c      cm(j)=cosh(aamu+0.5*dmu)/(ccosh*dmu**2)
c      end do
      
c for Neumann bndry condition      
      j=nmu
      bbm(j)=bbm(j)+cm(j)

      j=1
      bbm(j)=bbm(j)+am(j)

      
      do k = 1, nnu
c      aanu=anu(k)
      ccos=aacos(k)
      ssin=aasin(k)
      an(k)= (1./dnu- 0.5*ccos/ssin) /dnu
      bbn(k)= -2./dnu**2 
      cn(k)= (1./dnu+ 0.5*ccos/ssin) /dnu
      end do      

c conservative:
c      do k = 1, nnu
c      aanu=anu(k)
c      ccos=aacos(k)
c      ssin=aasin(k)
c      an(k)= sin(aanu-0.5*dnu)/(ssin*dnu**2)
c      bbn(k)=-(sin(aanu-0.5*dnu)+sin(aanu+0.5*dnu))/(ssin*dnu**2)
c      cn(k)= sin(aanu+0.5*dnu)/(ssin*dnu**2)
c      end do      
      
c
c      call date_and_time(real_clock(1),real_clock(2))
c      print *, real_clock(1), real_clock(2)
      
      do j = 1, nmu
      do k = 1, nnu
      call rdft(n1,1,data3(1,j,k),ipp,w)
      end do
      end do

      do k = 1, nnu
      call rdft(n1,1,data3bndry(1,k,1),ipp,w)
      end do

      do k = 1, nnu
      call rdft(n1,1,data3bndry(1,k,2),ipp,w)
      end do


      do i=1, nphi

      ii=kk(i)
     
      do  k = 1, nnu
      bn(k) = bbn(k) - float(ii**2)/aasin(k)**2
      end do
      do  j = 1, nmu
      bm(j) = bbm(j) + float(ii**2)/aacosh(j)**2
      end do


      
c set bndry conditions for nu:
      bn(1)=bn(1)+(-1)**ii*an(1)
      bn(nnu)=bn(nnu)+(-1)**ii*cn(nnu)
      
      
      do j = 1, nmu
      do k = 1, nnu
      yy(j,k)=data3(i,j,k)*(aasinh(j)**2+aacos(k)**2)*aa**2
c      yy(j,k)=data3(i,j,k)*aa**2
      end do
      end do
      
c      if(ii.eq.0) then
c      do j = 1, nmu
c      do k = 1, nnu
c      yy(j,k)=(data3(i,j,k)-0.0001*0.)
c     1   *(aasinh(j)**2+aacos(k)**2)*aa**2
c      end do
c      end do
c      end if

c bndry conditions at surface

      j=nmu
      do k=1, nnu
      yy(j,k)=yy(j,k)-cm(j)*data3bndry(i,k,1)*dmu
      end do

      j=1
      do k=1, nnu
      yy(j,k)=yy(j,k)+am(j)*data3bndry(i,k,2)*dmu
      end do

c      if(i.le.4) then
c       write(*,*) 'yy=', i, yy(5,5), yy(5,6), yy(5,7)
c       write(*,*) 'An=', an(2), an(3), an(4)
c       write(*,*) 'Bn=', bn(2), bn(3), bn(4)
c       write(*,*) 'Cn=', cn(2), cn(3), cn(4)
c       write(*,*) 'Am=', am(2), am(3), am(4)
c       write(*,*) 'Bm=', bm(2), bm(3), bm(4)
c       write(*,*) 'Cm=', cm(2), cm(3), cm(4)
c      end if


      
c 
      iflag=0 
c     CALL BLKTRI (IFLG,NP,N,AN,BN,CN,MP,M,AM,BM,CM,IDIMY,Y,IERROR,W)
      CALL BLKTRI (IFLaG,1,Nnu,AN,BN,CN,1,nmu,AM,BM,CM,nmu,
     1  yy,IERROR,Wsave)
      iflag=1 
      CALL BLKTRI (IFLaG,1,Nnu,AN,BN,CN,1,nmu,AM,BM,CM,nmu,
     1  yy,IERROR,Wsave)
c      write(*,*) i, ierror

      do j = 1, nmu
      do k = 1, nnu
      data3(i,j,k)=yy(j,k)
      end do
      end do

      if(ii.gt.imax) then
      do j = 1, nmu
      do k = 1, nnu
      data3(i,j,k)=0.
      end do
      end do
      end if

      
      if(ii.eq.0) then
      yyave=0.
      do j = 1, nmu
      do k = 1, nnu
      yyave=yyave+yy(j,k)
      end do
      end do
      yyave=yyave/float(nmu*nnu)


      do j = 1, nmu
      do k = 1, nnu
      data3(i,j,k)=yy(j,k)-yyave
c      data3(i,j,k)=0.
      end do
      end do
      end if

c      if(i.le.4) write(*,*) 'yy=', i, yy(5,5), yy(5,6), yy(5,7)
      
c      if(ii.ne.0) then
c      do j = 1, nmu
c      do k = 1, nnu
cc      data3(i,j,k)=0.
c      end do
c      end do
c      end if
      
c for anu=0 and anu=pi
c      if(mod(ii,1).eq.0) then
c      if(ii.eq.0) then
c      do j=1, nmu
c      bndrynu0(i,j)=data3(i,j,1)
c      bndrynupi(i,j)=data3(i,j,nnu)

c      end do
c      else
c      do j=1, nmu
c      bndrynu0(i,j)=0.
c      bndrynupi(i,j)=0.
c      end do
c      end if      

c for anu=-dnu/2 and anu=pi+dnu/2
      if(mod(ii,2).eq.0) then
      do j=1, nmu
      bndrynu(i,j,1)=data3(i,j,1)
      bndrynu(i,j,2)=data3(i,j,nnu)
      end do
      else
      do j=1, nmu
      bndrynu(i,j,1)=-data3(i,j,1)
      bndrynu(i,j,2)=-data3(i,j,nnu)
      end do
      end if      

c for amu=amu0+dmu/2
      do k = 1, nnu
      bndrymu(i,k)= data3(i, nmu, k)+ data3bndry(i,k,1) *dmu
      end do
      
c for anu=-dnu/2 and anu=pi+dnu/2 and amu=amu0+dmu0/2
      if(mod(ii,2).eq.0) then
      bndrynumu(i,1)=bndrymu(i,1)
      bndrynumu(i,2)=bndrymu(i,nnu)
      else
      bndrynumu(i,1)=-bndrymu(i,1)
      bndrynumu(i,2)=-bndrymu(i,nnu)
      end if      

     
      end do

      do j = 1, nmu
      do k = 1, nnu
      call rdft(n1,-1,data3(1,j,k),ipp,w)
      end do
      end do
      
c for anu=0 and pi      
      do j=1, nmu
      call rdft(n1,-1,bndrynu(1,j,1),ipp,w)
      call rdft(n1,-1,bndrynu(1,j,2),ipp,w)
      end do
      
c for amu=amu0+dmu/2      
      do k=1, nnu
      call rdft(n1,-1,bndrymu(1,k),ipp,w)
      end do

c for anu=-dnu/2, pi+dnu/2 and mu=amu0+dmu/2
      call rdft(n1,-1,bndrynumu(1,1),ipp,w)
      call rdft(n1,-1,bndrynumu(1,2),ipp,w)
      
      
c      call date_and_time(real_clock(1),real_clock(2))
c      print *, real_clock(1), real_clock(2)


      do i = 1, nphi
	do j = 1, nmu
      do k = 1, nnu
	  data3(i,j,k)=data3(i,j,k)*factorn
      end do
	end do
	end do

      do i = 1, nphi
      do j = 1, nmu
      bndrynu(i,j,1)=bndrynu(i,j,1)*factorn
      bndrynu(i,j,2)=bndrynu(i,j,2)*factorn
      end do
      end do
      
      do i = 1, nphi
      do k = 1, nnu
      bndrymu(i,k)=bndrymu(i,k)*factorn
      end do
      end do
      
      do i = 1, nphi
      bndrynumu(i,1)=bndrynumu(i,1)*factorn
      bndrynumu(i,2)=bndrynumu(i,2)*factorn
      end do

      return
      end

c     file blktri.f
c
c  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
c  .                                                             .
c  .                  copyright (c) 1999 by UCAR                 .
c  .                                                             .
c  .       UNIVERSITY CORPORATION for ATMOSPHERIC RESEARCH       .
c  .                                                             .
c  .                      all rights reserved                    .
c  .                                                             .
c  .                                                             .
c  .                      FISHPACK version 4.0                   .
c  .                                                             .
c  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
c
c
C
C     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
C     *                                                               *
C     *                        F I S H P A C K                        *
C     *                                                               *
C     *                                                               *
C     *     A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE SOLUTION OF      *
C     *                                                               *
C     *      SEPARABLE ELLIPTIC PARTIAL DIFFERENTIAL EQUATIONS        *
C     *                                                               *
C     *                  (VERSION 4.0 , JUNE 1999)                    *
C     *                                                               *
C     *                             BY                                *
C     *                                                               *
C     *        JOHN ADAMS, PAUL SWARZTRAUBER AND ROLAND SWEET         *
C     *                                                               *
C     *                             OF                                *
C     *                                                               *
C     *         THE NATIONAL CENTER FOR ATMOSPHERIC RESEARCH          *
C     *                                                               *
C     *                BOULDER, COLORADO  (80307)  U.S.A.             *
C     *                                                               *
C     *                   WHICH IS SPONSORED BY                       *
C     *                                                               *
C     *              THE NATIONAL SCIENCE FOUNDATION                  *
C     *                                                               *
C     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
C
C
c      SUBROUTINE BLKTRI (IFLG,NP,N,AN,BN,CN,MP,M,AM,BM,CM,IDIMY,Y,      
c     +                   IERROR,W)
C

! Fast Fourier/Cosine/Sine Transform
!     dimension   :one
!     data length :power of 2
!     decimation  :frequency
!     radix       :split-radix
!     data        :inplace
!     table       :use
! subroutines
!     cdft: Complex Discrete Fourier Transform
!     rdft: Real Discrete Fourier Transform
!     ddct: Discrete Cosine Transform
!     ddst: Discrete Sine Transform
!     dfct: Cosine Transform of RDFT (Real Symmetric DFT)
!     dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
!
!
! -------- Complex DFT (Discrete Fourier Transform) --------
!     [definition]
!         <case1>
!             X(k) = sum_j=0^n-1 x(j)*exp(2*pi*i*j*k/n), 0<=k<n
!         <case2>
!             X(k) = sum_j=0^n-1 x(j)*exp(-2*pi*i*j*k/n), 0<=k<n
!         (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
!     [usage]
!         <case1>
!             ip(0) = 0  ! first time only
!             call cdft(2*n, 1, a, ip, w)
!         <case2>
!             ip(0) = 0  ! first time only
!             call cdft(2*n, -1, a, ip, w)
!     [parameters]
!         2*n          :data length (integer)
!                       n >= 1, n = power of 2
!         a(0:2*n-1)   :input/output data (real*8)
!                       input data
!                           a(2*j) = Re(x(j)),
!                           a(2*j+1) = Im(x(j)), 0<=j<n
!                       output data
!                           a(2*k) = Re(X(k)),
!                           a(2*k+1) = Im(X(k)), 0<=k<n
!         ip(0:*)      :work area for bit reversal (integer)
!                       length of ip >= 2+sqrt(n)
!                       strictly,
!                       length of ip >=
!                           2+2**(int(log(n+0.5)/log(2.0))/2).
!                       ip(0),ip(1) are pointers of the cos/sin table.
!         w(0:n/2-1)   :cos/sin table (real*8)
!                       w(),ip() are initialized if ip(0) = 0.
!     [remark]
!         Inverse of
!             call cdft(2*n, -1, a, ip, w)
!         is
!             call cdft(2*n, 1, a, ip, w)
!             do j = 0, 2 * n - 1
!                 a(j) = a(j) / n
!             end do
!         .
!
!
! -------- Real DFT / Inverse of Real DFT --------
!     [definition]
!         <case1> RDFT
!             R(k) = sum_j=0^n-1 a(j)*cos(2*pi*j*k/n), 0<=k<=n/2
!             I(k) = sum_j=0^n-1 a(j)*sin(2*pi*j*k/n), 0<k<n/2
!         <case2> IRDFT (excluding scale)
!             a(k) = (R(0) + R(n/2)*cos(pi*k))/2 +
!                    sum_j=1^n/2-1 R(j)*cos(2*pi*j*k/n) +
!                    sum_j=1^n/2-1 I(j)*sin(2*pi*j*k/n), 0<=k<n
!     [usage]
!         <case1>
!             ip(0) = 0  ! first time only
!             call rdft(n, 1, a, ip, w)
!         <case2>
!             ip(0) = 0  ! first time only
!             call rdft(n, -1, a, ip, w)
!     [parameters]
!         n            :data length (integer)
!                       n >= 2, n = power of 2
!         a(0:n-1)     :input/output data (real*8)
!                       <case1>
!                           output data
!                               a(2*k) = R(k), 0<=k<n/2
!                               a(2*k+1) = I(k), 0<k<n/2
!                               a(1) = R(n/2)
!                       <case2>
!                           input data
!                               a(2*j) = R(j), 0<=j<n/2
!                               a(2*j+1) = I(j), 0<j<n/2
!                               a(1) = R(n/2)
!         ip(0:*)      :work area for bit reversal (integer)
!                       length of ip >= 2+sqrt(n/2)
!                       strictly,
!                       length of ip >=
!                           2+2**(int(log(n/2+0.5)/log(2.0))/2).
!                       ip(0),ip(1) are pointers of the cos/sin table.
!         w(0:n/2-1)   :cos/sin table (real*8)
!                       w(),ip() are initialized if ip(0) = 0.
!     [remark]
!         Inverse of
!             call rdft(n, 1, a, ip, w)
!         is
!             call rdft(n, -1, a, ip, w)
!             do j = 0, n - 1
!                 a(j) = a(j) * 2 / n
!             end do
!         .
!
!
! -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
!     [definition]
!         <case1> IDCT (excluding scale)
!             C(k) = sum_j=0^n-1 a(j)*cos(pi*j*(k+1/2)/n), 0<=k<n
!         <case2> DCT
!             C(k) = sum_j=0^n-1 a(j)*cos(pi*(j+1/2)*k/n), 0<=k<n
!     [usage]
!         <case1>
!             ip(0) = 0  ! first time only
!             call ddct(n, 1, a, ip, w)
!         <case2>
!             ip(0) = 0  ! first time only
!             call ddct(n, -1, a, ip, w)
!     [parameters]
!         n            :data length (integer)
!                       n >= 2, n = power of 2
!         a(0:n-1)     :input/output data (real*8)
!                       output data
!                           a(k) = C(k), 0<=k<n
!         ip(0:*)      :work area for bit reversal (integer)
!                       length of ip >= 2+sqrt(n/2)
!                       strictly,
!                       length of ip >=
!                           2+2**(int(log(n/2+0.5)/log(2.0))/2).
!                       ip(0),ip(1) are pointers of the cos/sin table.
!         w(0:n*5/4-1) :cos/sin table (real*8)
!                       w(),ip() are initialized if ip(0) = 0.
!     [remark]
!         Inverse of
!             call ddct(n, -1, a, ip, w)
!         is
!             a(0) = a(0) / 2
!             call ddct(n, 1, a, ip, w)
!             do j = 0, n - 1
!                 a(j) = a(j) * 2 / n
!             end do
!         .
!
!
! -------- DST (Discrete Sine Transform) / Inverse of DST --------
!     [definition]
!         <case1> IDST (excluding scale)
!             S(k) = sum_j=1^n A(j)*sin(pi*j*(k+1/2)/n), 0<=k<n
!         <case2> DST
!             S(k) = sum_j=0^n-1 a(j)*sin(pi*(j+1/2)*k/n), 0<k<=n
!     [usage]
!         <case1>
!             ip(0) = 0  ! first time only
!             call ddst(n, 1, a, ip, w)
!         <case2>
!             ip(0) = 0  ! first time only
!             call ddst(n, -1, a, ip, w)
!     [parameters]
!         n            :data length (integer)
!                       n >= 2, n = power of 2
!         a(0:n-1)     :input/output data (real*8)
!                       <case1>
!                           input data
!                               a(j) = A(j), 0<j<n
!                               a(0) = A(n)
!                           output data
!                               a(k) = S(k), 0<=k<n
!                       <case2>
!                           output data
!                               a(k) = S(k), 0<k<n
!                               a(0) = S(n)
!         ip(0:*)      :work area for bit reversal (integer)
!                       length of ip >= 2+sqrt(n/2)
!                       strictly,
!                       length of ip >=
!                           2+2**(int(log(n/2+0.5)/log(2.0))/2).
!                       ip(0),ip(1) are pointers of the cos/sin table.
!         w(0:n*5/4-1) :cos/sin table (real*8)
!                       w(),ip() are initialized if ip(0) = 0.
!     [remark]
!         Inverse of
!             call ddst(n, -1, a, ip, w)
!         is
!             a(0) = a(0) / 2
!             call ddst(n, 1, a, ip, w)
!             do j = 0, n - 1
!                 a(j) = a(j) * 2 / n
!             end do
!         .
!
!
! -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
!     [definition]
!         C(k) = sum_j=0^n a(j)*cos(pi*j*k/n), 0<=k<=n
!     [usage]
!         ip(0) = 0  ! first time only
!         call dfct(n, a, t, ip, w)
!     [parameters]
!         n            :data length - 1 (integer)
!                       n >= 2, n = power of 2
!         a(0:n)       :input/output data (real*8)
!                       output data
!                           a(k) = C(k), 0<=k<=n
!         t(0:n/2)     :work area (real*8)
!         ip(0:*)      :work area for bit reversal (integer)
!                       length of ip >= 2+sqrt(n/4)
!                       strictly,
!                       length of ip >=
!                           2+2**(int(log(n/4+0.5)/log(2.0))/2).
!                       ip(0),ip(1) are pointers of the cos/sin table.
!         w(0:n*5/8-1) :cos/sin table (real*8)
!                       w(),ip() are initialized if ip(0) = 0.
!     [remark]
!         Inverse of
!             a(0) = a(0) / 2
!             a(n) = a(n) / 2
!             call dfct(n, a, t, ip, w)
!         is
!             a(0) = a(0) / 2
!             a(n) = a(n) / 2
!             call dfct(n, a, t, ip, w)
!             do j = 0, n
!                 a(j) = a(j) * 2 / n
!             end do
!         .
!
!
! -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
!     [definition]
!         S(k) = sum_j=1^n-1 a(j)*sin(pi*j*k/n), 0<k<n
!     [usage]
!         ip(0) = 0  ! first time only
!         call dfst(n, a, t, ip, w)
!     [parameters]
!         n            :data length + 1 (integer)
!                       n >= 2, n = power of 2
!         a(0:n-1)     :input/output data (real*8)
!                       output data
!                           a(k) = S(k), 0<k<n
!                       (a(0) is used for work area)
!         t(0:n/2-1)   :work area (real*8)
!         ip(0:*)      :work area for bit reversal (integer)
!                       length of ip >= 2+sqrt(n/4)
!                       strictly,
!                       length of ip >=
!                           2+2**(int(log(n/4+0.5)/log(2.0))/2).
!                       ip(0),ip(1) are pointers of the cos/sin table.
!         w(0:n*5/8-1) :cos/sin table (real*8)
!                       w(),ip() are initialized if ip(0) = 0.
!     [remark]
!         Inverse of
!             call dfst(n, a, t, ip, w)
!         is
!             call dfst(n, a, t, ip, w)
!             do j = 1, n - 1
!                 a(j) = a(j) * 2 / n
!             end do
!         .
!
!
! Appendix :
!     The cos/sin table is recalculated when the larger table required.
!     w() and ip() are compatible with all routines.
!
!
      subroutine cdft(n, isgn, a, ip, w)
      integer n, isgn, ip(0 : *), nw
      real*8 a(0 : n - 1), w(0 : *)
      nw = ip(0)
      if (n .gt. 4 * nw) then
          nw = n / 4
          call makewt(nw, ip, w)
      end if
      if (isgn .ge. 0) then
          call cftfsub(n, a, ip, nw, w)
      else
          call cftbsub(n, a, ip, nw, w)
      end if
      end
!
      subroutine rdft(n, isgn, a, ip, w)
      integer n, isgn, ip(0 : *), nw, nc
      real*8 a(0 : n - 1), w(0 : *), xi
      nw = ip(0)
      if (n .gt. 4 * nw) then
          nw = n / 4
          call makewt(nw, ip, w)
      end if
      nc = ip(1)
      if (n .gt. 4 * nc) then
          nc = n / 4
          call makect(nc, ip, w(nw))
      end if
      if (isgn .ge. 0) then
          if (n .gt. 4) then
              call cftfsub(n, a, ip, nw, w)
              call rftfsub(n, a, nc, w(nw))
          else if (n .eq. 4) then
              call cftfsub(n, a, ip, nw, w)
          end if
          xi = a(0) - a(1)
          a(0) = a(0) + a(1)
          a(1) = xi
      else
          a(1) = 0.5d0 * (a(0) - a(1))
          a(0) = a(0) - a(1)
          if (n .gt. 4) then
              call rftbsub(n, a, nc, w(nw))
              call cftbsub(n, a, ip, nw, w)
          else if (n .eq. 4) then
              call cftbsub(n, a, ip, nw, w)
          end if
      end if
      end
!
      subroutine ddct(n, isgn, a, ip, w)
      integer n, isgn, ip(0 : *), j, nw, nc
      real*8 a(0 : n - 1), w(0 : *), xr
      nw = ip(0)
      if (n .gt. 4 * nw) then
          nw = n / 4
          call makewt(nw, ip, w)
      end if
      nc = ip(1)
      if (n .gt. nc) then
          nc = n
          call makect(nc, ip, w(nw))
      end if
      if (isgn .lt. 0) then
          xr = a(n - 1)
          do j = n - 2, 2, -2
              a(j + 1) = a(j) - a(j - 1)
              a(j) = a(j) + a(j - 1)
          end do
          a(1) = a(0) - xr
          a(0) = a(0) + xr
          if (n .gt. 4) then
              call rftbsub(n, a, nc, w(nw))
              call cftbsub(n, a, ip, nw, w)
          else if (n .eq. 4) then
              call cftbsub(n, a, ip, nw, w)
          end if
      end if
      call dctsub(n, a, nc, w(nw))
      if (isgn .ge. 0) then
          if (n .gt. 4) then
              call cftfsub(n, a, ip, nw, w)
              call rftfsub(n, a, nc, w(nw))
          else if (n .eq. 4) then
              call cftfsub(n, a, ip, nw, w)
          end if
          xr = a(0) - a(1)
          a(0) = a(0) + a(1)
          do j = 2, n - 2, 2
              a(j - 1) = a(j) - a(j + 1)
              a(j) = a(j) + a(j + 1)
          end do
          a(n - 1) = xr
      end if
      end
!
      subroutine ddst(n, isgn, a, ip, w)
      integer n, isgn, ip(0 : *), j, nw, nc
      real*8 a(0 : n - 1), w(0 : *), xr
      nw = ip(0)
      if (n .gt. 4 * nw) then
          nw = n / 4
          call makewt(nw, ip, w)
      end if
      nc = ip(1)
      if (n .gt. nc) then
          nc = n
          call makect(nc, ip, w(nw))
      end if
      if (isgn .lt. 0) then
          xr = a(n - 1)
          do j = n - 2, 2, -2
              a(j + 1) = -a(j) - a(j - 1)
              a(j) = a(j) - a(j - 1)
          end do
          a(1) = a(0) + xr
          a(0) = a(0) - xr
          if (n .gt. 4) then
              call rftbsub(n, a, nc, w(nw))
              call cftbsub(n, a, ip, nw, w)
          else if (n .eq. 4) then
              call cftbsub(n, a, ip, nw, w)
          end if
      end if
      call dstsub(n, a, nc, w(nw))
      if (isgn .ge. 0) then
          if (n .gt. 4) then
              call cftfsub(n, a, ip, nw, w)
              call rftfsub(n, a, nc, w(nw))
          else if (n .eq. 4) then
              call cftfsub(n, a, ip, nw, w)
          end if
          xr = a(0) - a(1)
          a(0) = a(0) + a(1)
          do j = 2, n - 2, 2
              a(j - 1) = -a(j) - a(j + 1)
              a(j) = a(j) - a(j + 1)
          end do
          a(n - 1) = -xr
      end if
      end
!
      subroutine dfct(n, a, t, ip, w)
      integer n, ip(0 : *), j, k, l, m, mh, nw, nc
      real*8 a(0 : n), t(0 : n / 2), w(0 : *), xr, xi, yr, yi
      nw = ip(0)
      if (n .gt. 8 * nw) then
          nw = n / 8
          call makewt(nw, ip, w)
      end if
      nc = ip(1)
      if (n .gt. 2 * nc) then
          nc = n / 2
          call makect(nc, ip, w(nw))
      end if
      m = n / 2
      yi = a(m)
      xi = a(0) + a(n)
      a(0) = a(0) - a(n)
      t(0) = xi - yi
      t(m) = xi + yi
      if (n .gt. 2) then
          mh = m / 2
          do j = 1, mh - 1
              k = m - j
              xr = a(j) - a(n - j)
              xi = a(j) + a(n - j)
              yr = a(k) - a(n - k)
              yi = a(k) + a(n - k)
              a(j) = xr
              a(k) = yr
              t(j) = xi - yi
              t(k) = xi + yi
          end do
          t(mh) = a(mh) + a(n - mh)
          a(mh) = a(mh) - a(n - mh)
          call dctsub(m, a, nc, w(nw))
          if (m .gt. 4) then
              call cftfsub(m, a, ip, nw, w)
              call rftfsub(m, a, nc, w(nw))
          else if (m .eq. 4) then
              call cftfsub(m, a, ip, nw, w)
          end if
          a(n - 1) = a(0) - a(1)
          a(1) = a(0) + a(1)
          do j = m - 2, 2, -2
              a(2 * j + 1) = a(j) + a(j + 1)
              a(2 * j - 1) = a(j) - a(j + 1)
          end do
          l = 2
          m = mh
          do while (m .ge. 2)
              call dctsub(m, t, nc, w(nw))
              if (m .gt. 4) then
                  call cftfsub(m, t, ip, nw, w)
                  call rftfsub(m, t, nc, w(nw))
              else if (m .eq. 4) then
                  call cftfsub(m, t, ip, nw, w)
              end if
              a(n - l) = t(0) - t(1)
              a(l) = t(0) + t(1)
              k = 0
              do j = 2, m - 2, 2
                  k = k + 4 * l
                  a(k - l) = t(j) - t(j + 1)
                  a(k + l) = t(j) + t(j + 1)
              end do
              l = 2 * l
              mh = m / 2
              do j = 0, mh - 1
                  k = m - j
                  t(j) = t(m + k) - t(m + j)
                  t(k) = t(m + k) + t(m + j)
              end do
              t(mh) = t(m + mh)
              m = mh
          end do
          a(l) = t(0)
          a(n) = t(2) - t(1)
          a(0) = t(2) + t(1)
      else
          a(1) = a(0)
          a(2) = t(0)
          a(0) = t(1)
      end if
      end
!
      subroutine dfst(n, a, t, ip, w)
      integer n, ip(0 : *), j, k, l, m, mh, nw, nc
      real*8 a(0 : n - 1), t(0 : n / 2 - 1), w(0 : *), xr, xi, yr, yi
      nw = ip(0)
      if (n .gt. 8 * nw) then
          nw = n / 8
          call makewt(nw, ip, w)
      end if
      nc = ip(1)
      if (n .gt. 2 * nc) then
          nc = n / 2
          call makect(nc, ip, w(nw))
      end if
      if (n .gt. 2) then
          m = n / 2
          mh = m / 2
          do j = 1, mh - 1
              k = m - j
              xr = a(j) + a(n - j)
              xi = a(j) - a(n - j)
              yr = a(k) + a(n - k)
              yi = a(k) - a(n - k)
              a(j) = xr
              a(k) = yr
              t(j) = xi + yi
              t(k) = xi - yi
          end do
          t(0) = a(mh) - a(n - mh)
          a(mh) = a(mh) + a(n - mh)
          a(0) = a(m)
          call dstsub(m, a, nc, w(nw))
          if (m .gt. 4) then
              call cftfsub(m, a, ip, nw, w)
              call rftfsub(m, a, nc, w(nw))
          else if (m .eq. 4) then
              call cftfsub(m, a, ip, nw, w)
          end if
          a(n - 1) = a(1) - a(0)
          a(1) = a(0) + a(1)
          do j = m - 2, 2, -2
              a(2 * j + 1) = a(j) - a(j + 1)
              a(2 * j - 1) = -a(j) - a(j + 1)
          end do
          l = 2
          m = mh
          do while (m .ge. 2)
              call dstsub(m, t, nc, w(nw))
              if (m .gt. 4) then
                  call cftfsub(m, t, ip, nw, w)
                  call rftfsub(m, t, nc, w(nw))
              else if (m .eq. 4) then
                  call cftfsub(m, t, ip, nw, w)
              end if
              a(n - l) = t(1) - t(0)
              a(l) = t(0) + t(1)
              k = 0
              do j = 2, m - 2, 2
                  k = k + 4 * l
                  a(k - l) = -t(j) - t(j + 1)
                  a(k + l) = t(j) - t(j + 1)
              end do
              l = 2 * l
              mh = m / 2
              do j = 1, mh - 1
                  k = m - j
                  t(j) = t(m + k) + t(m + j)
                  t(k) = t(m + k) - t(m + j)
              end do
              t(0) = t(m + mh)
              m = mh
          end do
          a(l) = t(0)
      end if
      a(0) = 0
      end
!
! -------- initializing routines --------
!
      subroutine makewt(nw, ip, w)
      integer nw, ip(0 : *), j, nwh, nw0, nw1
      real*8 w(0 : nw - 1), delta, wn4r, wk1r, wk1i, wk3r, wk3i
      ip(0) = nw
      ip(1) = 1
      if (nw .gt. 2) then
          nwh = nw / 2
          delta = atan(1.0d0) / nwh
          wn4r = cos(delta * nwh)
          w(0) = 1
          w(1) = wn4r
          if (nwh .eq. 4) then
              w(2) = cos(delta * 2)
              w(3) = sin(delta * 2)
          else if (nwh .gt. 4) then
              call makeipt(nw, ip)
              w(2) = 0.5d0 / cos(delta * 2)
              w(3) = 0.5d0 / cos(delta * 6)
              do j = 4, nwh - 4, 4
                  w(j) = cos(delta * j)
                  w(j + 1) = sin(delta * j)
                  w(j + 2) = cos(3 * delta * j)
                  w(j + 3) = -sin(3 * delta * j)
              end do
          end if
          nw0 = 0
          do while (nwh .gt. 2)
              nw1 = nw0 + nwh
              nwh = nwh / 2
              w(nw1) = 1
              w(nw1 + 1) = wn4r
              if (nwh .eq. 4) then
                  wk1r = w(nw0 + 4)
                  wk1i = w(nw0 + 5)
                  w(nw1 + 2) = wk1r
                  w(nw1 + 3) = wk1i
              else if (nwh .gt. 4) then
                  wk1r = w(nw0 + 4)
                  wk3r = w(nw0 + 6)
                  w(nw1 + 2) = 0.5d0 / wk1r
                  w(nw1 + 3) = 0.5d0 / wk3r
                  do j = 4, nwh - 4, 4
                      wk1r = w(nw0 + 2 * j)
                      wk1i = w(nw0 + 2 * j + 1)
                      wk3r = w(nw0 + 2 * j + 2)
                      wk3i = w(nw0 + 2 * j + 3)
                      w(nw1 + j) = wk1r
                      w(nw1 + j + 1) = wk1i
                      w(nw1 + j + 2) = wk3r
                      w(nw1 + j + 3) = wk3i
                  end do
              end if
              nw0 = nw1
          end do
      end if
      end
!
      subroutine makeipt(nw, ip)
      integer nw, ip(0 : *), j, l, m, m2, p, q
      ip(2) = 0
      ip(3) = 16
      m = 2
      l = nw
      do while (l .gt. 32)
          m2 = 2 * m
          q = 8 * m2
          do j = m, m2 - 1
              p = 4 * ip(j)
              ip(m + j) = p
              ip(m2 + j) = p + q
          end do
          m = m2
          l = l / 4
      end do
      end
!
      subroutine makect(nc, ip, c)
      integer nc, ip(0 : *), j, nch
      real*8 c(0 : nc - 1), delta
      ip(1) = nc
      if (nc .gt. 1) then
          nch = nc / 2
          delta = atan(1.0d0) / nch
          c(0) = cos(delta * nch)
          c(nch) = 0.5d0 * c(0)
          do j = 1, nch - 1
              c(j) = 0.5d0 * cos(delta * j)
              c(nc - j) = 0.5d0 * sin(delta * j)
          end do
      end if
      end
!
! -------- child routines --------
!
      subroutine cftfsub(n, a, ip, nw, w)
      integer n, ip(0 : *), nw
      real*8 a(0 : n - 1), w(0 : nw - 1)
      if (n .gt. 8) then
          if (n .gt. 32) then
              call cftf1st(n, a, w(nw - n / 4))
              if (n .gt. 512) then
                  call cftrec4(n, a, nw, w)
              else if (n .gt. 128) then
                  call cftleaf(n, 1, a, nw, w)
              else
                  call cftfx41(n, a, nw, w)
              end if
              call bitrv2(n, ip, a)
          else if (n .eq. 32) then
              call cftf161(a, w(nw - 8))
              call bitrv216(a)
          else
              call cftf081(a, w)
              call bitrv208(a)
          end if
      else if (n .eq. 8) then
          call cftf040(a)
      else if (n .eq. 4) then
          call cftx020(a)
      end if
      end
!
      subroutine cftbsub(n, a, ip, nw, w)
      integer n, ip(0 : *), nw
      real*8 a(0 : n - 1), w(0 : nw - 1)
      if (n .gt. 8) then
          if (n .gt. 32) then
              call cftb1st(n, a, w(nw - n / 4))
              if (n .gt. 512) then
                  call cftrec4(n, a, nw, w)
              else if (n .gt. 128) then
                  call cftleaf(n, 1, a, nw, w)
              else
                  call cftfx41(n, a, nw, w)
              end if
              call bitrv2conj(n, ip, a)
          else if (n .eq. 32) then
              call cftf161(a, w(nw - 8))
              call bitrv216neg(a)
          else
              call cftf081(a, w)
              call bitrv208neg(a)
          end if
      else if (n .eq. 8) then
          call cftb040(a)
      else if (n .eq. 4) then
          call cftx020(a)
      end if
      end
!
      subroutine bitrv2(n, ip, a)
      integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm
      real*8 a(0 : n - 1), xr, xi, yr, yi
      m = 1
      l = n / 4
      do while (l .gt. 8)
          m = m * 2
          l = l / 4
      end do
      nh = n / 2
      nm = 4 * m
      if (l .eq. 8) then
          do k = 0, m - 1
              do j = 0, k - 1
                  j1 = 4 * j + 2 * ip(m + k)
                  k1 = 4 * k + 2 * ip(m + j)
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nh
                  k1 = k1 + 2
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + 2
                  k1 = k1 + nh
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nh
                  k1 = k1 - 2
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
              end do
              k1 = 4 * k + 2 * ip(m + k)
              j1 = k1 + 2
              k1 = k1 + nh
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 + nm
              k1 = k1 + 2 * nm
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 + nm
              k1 = k1 - nm
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 - 2
              k1 = k1 - nh
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 + nh + 2
              k1 = k1 + nh + 2
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 - nh + nm
              k1 = k1 + 2 * nm - 2
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
          end do
      else
          do k = 0, m - 1
              do j = 0, k - 1
                  j1 = 4 * j + ip(m + k)
                  k1 = 4 * k + ip(m + j)
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nh
                  k1 = k1 + 2
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + 2
                  k1 = k1 + nh
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nh
                  k1 = k1 - 2
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = a(j1 + 1)
                  yr = a(k1)
                  yi = a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
              end do
              k1 = 4 * k + ip(m + k)
              j1 = k1 + 2
              k1 = k1 + nh
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 + nm
              k1 = k1 + nm
              xr = a(j1)
              xi = a(j1 + 1)
              yr = a(k1)
              yi = a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
          end do
      end if
      end
!
      subroutine bitrv2conj(n, ip, a)
      integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm
      real*8 a(0 : n - 1), xr, xi, yr, yi
      m = 1
      l = n / 4
      do while (l .gt. 8)
          m = m * 2
          l = l / 4
      end do
      nh = n / 2
      nm = 4 * m
      if (l .eq. 8) then
          do k = 0, m - 1
              do j = 0, k - 1
                  j1 = 4 * j + 2 * ip(m + k)
                  k1 = 4 * k + 2 * ip(m + j)
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nh
                  k1 = k1 + 2
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + 2
                  k1 = k1 + nh
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nh
                  k1 = k1 - 2
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - 2 * nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
              end do
              k1 = 4 * k + 2 * ip(m + k)
              j1 = k1 + 2
              k1 = k1 + nh
              a(j1 - 1) = -a(j1 - 1)
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              a(k1 + 3) = -a(k1 + 3)
              j1 = j1 + nm
              k1 = k1 + 2 * nm
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 + nm
              k1 = k1 - nm
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 - 2
              k1 = k1 - nh
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 + nh + 2
              k1 = k1 + nh + 2
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              j1 = j1 - nh + nm
              k1 = k1 + 2 * nm - 2
              a(j1 - 1) = -a(j1 - 1)
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              a(k1 + 3) = -a(k1 + 3)
          end do
      else
          do k = 0, m - 1
              do j = 0, k - 1
                  j1 = 4 * j + ip(m + k)
                  k1 = 4 * k + ip(m + j)
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nh
                  k1 = k1 + 2
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + 2
                  k1 = k1 + nh
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 + nm
                  k1 = k1 + nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nh
                  k1 = k1 - 2
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
                  j1 = j1 - nm
                  k1 = k1 - nm
                  xr = a(j1)
                  xi = -a(j1 + 1)
                  yr = a(k1)
                  yi = -a(k1 + 1)
                  a(j1) = yr
                  a(j1 + 1) = yi
                  a(k1) = xr
                  a(k1 + 1) = xi
              end do
              k1 = 4 * k + ip(m + k)
              j1 = k1 + 2
              k1 = k1 + nh
              a(j1 - 1) = -a(j1 - 1)
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              a(k1 + 3) = -a(k1 + 3)
              j1 = j1 + nm
              k1 = k1 + nm
              a(j1 - 1) = -a(j1 - 1)
              xr = a(j1)
              xi = -a(j1 + 1)
              yr = a(k1)
              yi = -a(k1 + 1)
              a(j1) = yr
              a(j1 + 1) = yi
              a(k1) = xr
              a(k1 + 1) = xi
              a(k1 + 3) = -a(k1 + 3)
          end do
      end if
      end
!
      subroutine bitrv216(a)
      real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i
      real*8 x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i
      real*8 x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i
      x1r = a(2)
      x1i = a(3)
      x2r = a(4)
      x2i = a(5)
      x3r = a(6)
      x3i = a(7)
      x4r = a(8)
      x4i = a(9)
      x5r = a(10)
      x5i = a(11)
      x7r = a(14)
      x7i = a(15)
      x8r = a(16)
      x8i = a(17)
      x10r = a(20)
      x10i = a(21)
      x11r = a(22)
      x11i = a(23)
      x12r = a(24)
      x12i = a(25)
      x13r = a(26)
      x13i = a(27)
      x14r = a(28)
      x14i = a(29)
      a(2) = x8r
      a(3) = x8i
      a(4) = x4r
      a(5) = x4i
      a(6) = x12r
      a(7) = x12i
      a(8) = x2r
      a(9) = x2i
      a(10) = x10r
      a(11) = x10i
      a(14) = x14r
      a(15) = x14i
      a(16) = x1r
      a(17) = x1i
      a(20) = x5r
      a(21) = x5i
      a(22) = x13r
      a(23) = x13i
      a(24) = x3r
      a(25) = x3i
      a(26) = x11r
      a(27) = x11i
      a(28) = x7r
      a(29) = x7i
      end
!
      subroutine bitrv216neg(a)
      real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i
      real*8 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i
      real*8 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i
      real*8 x13r, x13i, x14r, x14i, x15r, x15i
      x1r = a(2)
      x1i = a(3)
      x2r = a(4)
      x2i = a(5)
      x3r = a(6)
      x3i = a(7)
      x4r = a(8)
      x4i = a(9)
      x5r = a(10)
      x5i = a(11)
      x6r = a(12)
      x6i = a(13)
      x7r = a(14)
      x7i = a(15)
      x8r = a(16)
      x8i = a(17)
      x9r = a(18)
      x9i = a(19)
      x10r = a(20)
      x10i = a(21)
      x11r = a(22)
      x11i = a(23)
      x12r = a(24)
      x12i = a(25)
      x13r = a(26)
      x13i = a(27)
      x14r = a(28)
      x14i = a(29)
      x15r = a(30)
      x15i = a(31)
      a(2) = x15r
      a(3) = x15i
      a(4) = x7r
      a(5) = x7i
      a(6) = x11r
      a(7) = x11i
      a(8) = x3r
      a(9) = x3i
      a(10) = x13r
      a(11) = x13i
      a(12) = x5r
      a(13) = x5i
      a(14) = x9r
      a(15) = x9i
      a(16) = x1r
      a(17) = x1i
      a(18) = x14r
      a(19) = x14i
      a(20) = x6r
      a(21) = x6i
      a(22) = x10r
      a(23) = x10i
      a(24) = x2r
      a(25) = x2i
      a(26) = x12r
      a(27) = x12i
      a(28) = x4r
      a(29) = x4i
      a(30) = x8r
      a(31) = x8i
      end
!
      subroutine bitrv208(a)
      real*8 a(0 : 15), x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i
      x1r = a(2)
      x1i = a(3)
      x3r = a(6)
      x3i = a(7)
      x4r = a(8)
      x4i = a(9)
      x6r = a(12)
      x6i = a(13)
      a(2) = x4r
      a(3) = x4i
      a(6) = x6r
      a(7) = x6i
      a(8) = x1r
      a(9) = x1i
      a(12) = x3r
      a(13) = x3i
      end
!
      subroutine bitrv208neg(a)
      real*8 a(0 : 15), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i
      real*8 x5r, x5i, x6r, x6i, x7r, x7i
      x1r = a(2)
      x1i = a(3)
      x2r = a(4)
      x2i = a(5)
      x3r = a(6)
      x3i = a(7)
      x4r = a(8)
      x4i = a(9)
      x5r = a(10)
      x5i = a(11)
      x6r = a(12)
      x6i = a(13)
      x7r = a(14)
      x7i = a(15)
      a(2) = x7r
      a(3) = x7i
      a(4) = x3r
      a(5) = x3i
      a(6) = x5r
      a(7) = x5i
      a(8) = x1r
      a(9) = x1i
      a(10) = x6r
      a(11) = x6i
      a(12) = x2r
      a(13) = x2i
      a(14) = x4r
      a(15) = x4i
      end
!
      subroutine cftf1st(n, a, w)
      integer n, j, j0, j1, j2, j3, k, m, mh
      real*8 a(0 : n - 1), w(0 : *)
      real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i
      real*8 wd1r, wd1i, wd3r, wd3i
      real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i
      mh = n / 8
      m = 2 * mh
      j1 = m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(0) + a(j2)
      x0i = a(1) + a(j2 + 1)
      x1r = a(0) - a(j2)
      x1i = a(1) - a(j2 + 1)
      x2r = a(j1) + a(j3)
      x2i = a(j1 + 1) + a(j3 + 1)
      x3r = a(j1) - a(j3)
      x3i = a(j1 + 1) - a(j3 + 1)
      a(0) = x0r + x2r
      a(1) = x0i + x2i
      a(j1) = x0r - x2r
      a(j1 + 1) = x0i - x2i
      a(j2) = x1r - x3i
      a(j2 + 1) = x1i + x3r
      a(j3) = x1r + x3i
      a(j3 + 1) = x1i - x3r
      wn4r = w(1)
      csc1 = w(2)
      csc3 = w(3)
      wd1r = 1
      wd1i = 0
      wd3r = 1
      wd3i = 0
      k = 0
      do j = 2, mh - 6, 4
          k = k + 4
          wk1r = csc1 * (wd1r + w(k))
          wk1i = csc1 * (wd1i + w(k + 1))
          wk3r = csc3 * (wd3r + w(k + 2))
          wk3i = csc3 * (wd3i + w(k + 3))
          wd1r = w(k)
          wd1i = w(k + 1)
          wd3r = w(k + 2)
          wd3i = w(k + 3)
          j1 = j + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j) + a(j2)
          x0i = a(j + 1) + a(j2 + 1)
          x1r = a(j) - a(j2)
          x1i = a(j + 1) - a(j2 + 1)
          y0r = a(j + 2) + a(j2 + 2)
          y0i = a(j + 3) + a(j2 + 3)
          y1r = a(j + 2) - a(j2 + 2)
          y1i = a(j + 3) - a(j2 + 3)
          x2r = a(j1) + a(j3)
          x2i = a(j1 + 1) + a(j3 + 1)
          x3r = a(j1) - a(j3)
          x3i = a(j1 + 1) - a(j3 + 1)
          y2r = a(j1 + 2) + a(j3 + 2)
          y2i = a(j1 + 3) + a(j3 + 3)
          y3r = a(j1 + 2) - a(j3 + 2)
          y3i = a(j1 + 3) - a(j3 + 3)
          a(j) = x0r + x2r
          a(j + 1) = x0i + x2i
          a(j + 2) = y0r + y2r
          a(j + 3) = y0i + y2i
          a(j1) = x0r - x2r
          a(j1 + 1) = x0i - x2i
          a(j1 + 2) = y0r - y2r
          a(j1 + 3) = y0i - y2i
          x0r = x1r - x3i
          x0i = x1i + x3r
          a(j2) = wk1r * x0r - wk1i * x0i
          a(j2 + 1) = wk1r * x0i + wk1i * x0r
          x0r = y1r - y3i
          x0i = y1i + y3r
          a(j2 + 2) = wd1r * x0r - wd1i * x0i
          a(j2 + 3) = wd1r * x0i + wd1i * x0r
          x0r = x1r + x3i
          x0i = x1i - x3r
          a(j3) = wk3r * x0r + wk3i * x0i
          a(j3 + 1) = wk3r * x0i - wk3i * x0r
          x0r = y1r + y3i
          x0i = y1i - y3r
          a(j3 + 2) = wd3r * x0r + wd3i * x0i
          a(j3 + 3) = wd3r * x0i - wd3i * x0r
          j0 = m - j
          j1 = j0 + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j0) + a(j2)
          x0i = a(j0 + 1) + a(j2 + 1)
          x1r = a(j0) - a(j2)
          x1i = a(j0 + 1) - a(j2 + 1)
          y0r = a(j0 - 2) + a(j2 - 2)
          y0i = a(j0 - 1) + a(j2 - 1)
          y1r = a(j0 - 2) - a(j2 - 2)
          y1i = a(j0 - 1) - a(j2 - 1)
          x2r = a(j1) + a(j3)
          x2i = a(j1 + 1) + a(j3 + 1)
          x3r = a(j1) - a(j3)
          x3i = a(j1 + 1) - a(j3 + 1)
          y2r = a(j1 - 2) + a(j3 - 2)
          y2i = a(j1 - 1) + a(j3 - 1)
          y3r = a(j1 - 2) - a(j3 - 2)
          y3i = a(j1 - 1) - a(j3 - 1)
          a(j0) = x0r + x2r
          a(j0 + 1) = x0i + x2i
          a(j0 - 2) = y0r + y2r
          a(j0 - 1) = y0i + y2i
          a(j1) = x0r - x2r
          a(j1 + 1) = x0i - x2i
          a(j1 - 2) = y0r - y2r
          a(j1 - 1) = y0i - y2i
          x0r = x1r - x3i
          x0i = x1i + x3r
          a(j2) = wk1i * x0r - wk1r * x0i
          a(j2 + 1) = wk1i * x0i + wk1r * x0r
          x0r = y1r - y3i
          x0i = y1i + y3r
          a(j2 - 2) = wd1i * x0r - wd1r * x0i
          a(j2 - 1) = wd1i * x0i + wd1r * x0r
          x0r = x1r + x3i
          x0i = x1i - x3r
          a(j3) = wk3i * x0r + wk3r * x0i
          a(j3 + 1) = wk3i * x0i - wk3r * x0r
          x0r = y1r + y3i
          x0i = y1i - y3r
          a(j3 - 2) = wd3i * x0r + wd3r * x0i
          a(j3 - 1) = wd3i * x0i - wd3r * x0r
      end do
      wk1r = csc1 * (wd1r + wn4r)
      wk1i = csc1 * (wd1i + wn4r)
      wk3r = csc3 * (wd3r - wn4r)
      wk3i = csc3 * (wd3i - wn4r)
      j0 = mh
      j1 = j0 + m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(j0 - 2) + a(j2 - 2)
      x0i = a(j0 - 1) + a(j2 - 1)
      x1r = a(j0 - 2) - a(j2 - 2)
      x1i = a(j0 - 1) - a(j2 - 1)
      x2r = a(j1 - 2) + a(j3 - 2)
      x2i = a(j1 - 1) + a(j3 - 1)
      x3r = a(j1 - 2) - a(j3 - 2)
      x3i = a(j1 - 1) - a(j3 - 1)
      a(j0 - 2) = x0r + x2r
      a(j0 - 1) = x0i + x2i
      a(j1 - 2) = x0r - x2r
      a(j1 - 1) = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      a(j2 - 2) = wk1r * x0r - wk1i * x0i
      a(j2 - 1) = wk1r * x0i + wk1i * x0r
      x0r = x1r + x3i
      x0i = x1i - x3r
      a(j3 - 2) = wk3r * x0r + wk3i * x0i
      a(j3 - 1) = wk3r * x0i - wk3i * x0r
      x0r = a(j0) + a(j2)
      x0i = a(j0 + 1) + a(j2 + 1)
      x1r = a(j0) - a(j2)
      x1i = a(j0 + 1) - a(j2 + 1)
      x2r = a(j1) + a(j3)
      x2i = a(j1 + 1) + a(j3 + 1)
      x3r = a(j1) - a(j3)
      x3i = a(j1 + 1) - a(j3 + 1)
      a(j0) = x0r + x2r
      a(j0 + 1) = x0i + x2i
      a(j1) = x0r - x2r
      a(j1 + 1) = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      a(j2) = wn4r * (x0r - x0i)
      a(j2 + 1) = wn4r * (x0i + x0r)
      x0r = x1r + x3i
      x0i = x1i - x3r
      a(j3) = -wn4r * (x0r + x0i)
      a(j3 + 1) = -wn4r * (x0i - x0r)
      x0r = a(j0 + 2) + a(j2 + 2)
      x0i = a(j0 + 3) + a(j2 + 3)
      x1r = a(j0 + 2) - a(j2 + 2)
      x1i = a(j0 + 3) - a(j2 + 3)
      x2r = a(j1 + 2) + a(j3 + 2)
      x2i = a(j1 + 3) + a(j3 + 3)
      x3r = a(j1 + 2) - a(j3 + 2)
      x3i = a(j1 + 3) - a(j3 + 3)
      a(j0 + 2) = x0r + x2r
      a(j0 + 3) = x0i + x2i
      a(j1 + 2) = x0r - x2r
      a(j1 + 3) = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      a(j2 + 2) = wk1i * x0r - wk1r * x0i
      a(j2 + 3) = wk1i * x0i + wk1r * x0r
      x0r = x1r + x3i
      x0i = x1i - x3r
      a(j3 + 2) = wk3i * x0r + wk3r * x0i
      a(j3 + 3) = wk3i * x0i - wk3r * x0r
      end
!
      subroutine cftb1st(n, a, w)
      integer n, j, j0, j1, j2, j3, k, m, mh
      real*8 a(0 : n - 1), w(0 : *)
      real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i
      real*8 wd1r, wd1i, wd3r, wd3i
      real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i
      mh = n / 8
      m = 2 * mh
      j1 = m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(0) + a(j2)
      x0i = -a(1) - a(j2 + 1)
      x1r = a(0) - a(j2)
      x1i = -a(1) + a(j2 + 1)
      x2r = a(j1) + a(j3)
      x2i = a(j1 + 1) + a(j3 + 1)
      x3r = a(j1) - a(j3)
      x3i = a(j1 + 1) - a(j3 + 1)
      a(0) = x0r + x2r
      a(1) = x0i - x2i
      a(j1) = x0r - x2r
      a(j1 + 1) = x0i + x2i
      a(j2) = x1r + x3i
      a(j2 + 1) = x1i + x3r
      a(j3) = x1r - x3i
      a(j3 + 1) = x1i - x3r
      wn4r = w(1)
      csc1 = w(2)
      csc3 = w(3)
      wd1r = 1
      wd1i = 0
      wd3r = 1
      wd3i = 0
      k = 0
      do j = 2, mh - 6, 4
          k = k + 4
          wk1r = csc1 * (wd1r + w(k))
          wk1i = csc1 * (wd1i + w(k + 1))
          wk3r = csc3 * (wd3r + w(k + 2))
          wk3i = csc3 * (wd3i + w(k + 3))
          wd1r = w(k)
          wd1i = w(k + 1)
          wd3r = w(k + 2)
          wd3i = w(k + 3)
          j1 = j + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j) + a(j2)
          x0i = -a(j + 1) - a(j2 + 1)
          x1r = a(j) - a(j2)
          x1i = -a(j + 1) + a(j2 + 1)
          y0r = a(j + 2) + a(j2 + 2)
          y0i = -a(j + 3) - a(j2 + 3)
          y1r = a(j + 2) - a(j2 + 2)
          y1i = -a(j + 3) + a(j2 + 3)
          x2r = a(j1) + a(j3)
          x2i = a(j1 + 1) + a(j3 + 1)
          x3r = a(j1) - a(j3)
          x3i = a(j1 + 1) - a(j3 + 1)
          y2r = a(j1 + 2) + a(j3 + 2)
          y2i = a(j1 + 3) + a(j3 + 3)
          y3r = a(j1 + 2) - a(j3 + 2)
          y3i = a(j1 + 3) - a(j3 + 3)
          a(j) = x0r + x2r
          a(j + 1) = x0i - x2i
          a(j + 2) = y0r + y2r
          a(j + 3) = y0i - y2i
          a(j1) = x0r - x2r
          a(j1 + 1) = x0i + x2i
          a(j1 + 2) = y0r - y2r
          a(j1 + 3) = y0i + y2i
          x0r = x1r + x3i
          x0i = x1i + x3r
          a(j2) = wk1r * x0r - wk1i * x0i
          a(j2 + 1) = wk1r * x0i + wk1i * x0r
          x0r = y1r + y3i
          x0i = y1i + y3r
          a(j2 + 2) = wd1r * x0r - wd1i * x0i
          a(j2 + 3) = wd1r * x0i + wd1i * x0r
          x0r = x1r - x3i
          x0i = x1i - x3r
          a(j3) = wk3r * x0r + wk3i * x0i
          a(j3 + 1) = wk3r * x0i - wk3i * x0r
          x0r = y1r - y3i
          x0i = y1i - y3r
          a(j3 + 2) = wd3r * x0r + wd3i * x0i
          a(j3 + 3) = wd3r * x0i - wd3i * x0r
          j0 = m - j
          j1 = j0 + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j0) + a(j2)
          x0i = -a(j0 + 1) - a(j2 + 1)
          x1r = a(j0) - a(j2)
          x1i = -a(j0 + 1) + a(j2 + 1)
          y0r = a(j0 - 2) + a(j2 - 2)
          y0i = -a(j0 - 1) - a(j2 - 1)
          y1r = a(j0 - 2) - a(j2 - 2)
          y1i = -a(j0 - 1) + a(j2 - 1)
          x2r = a(j1) + a(j3)
          x2i = a(j1 + 1) + a(j3 + 1)
          x3r = a(j1) - a(j3)
          x3i = a(j1 + 1) - a(j3 + 1)
          y2r = a(j1 - 2) + a(j3 - 2)
          y2i = a(j1 - 1) + a(j3 - 1)
          y3r = a(j1 - 2) - a(j3 - 2)
          y3i = a(j1 - 1) - a(j3 - 1)
          a(j0) = x0r + x2r
          a(j0 + 1) = x0i - x2i
          a(j0 - 2) = y0r + y2r
          a(j0 - 1) = y0i - y2i
          a(j1) = x0r - x2r
          a(j1 + 1) = x0i + x2i
          a(j1 - 2) = y0r - y2r
          a(j1 - 1) = y0i + y2i
          x0r = x1r + x3i
          x0i = x1i + x3r
          a(j2) = wk1i * x0r - wk1r * x0i
          a(j2 + 1) = wk1i * x0i + wk1r * x0r
          x0r = y1r + y3i
          x0i = y1i + y3r
          a(j2 - 2) = wd1i * x0r - wd1r * x0i
          a(j2 - 1) = wd1i * x0i + wd1r * x0r
          x0r = x1r - x3i
          x0i = x1i - x3r
          a(j3) = wk3i * x0r + wk3r * x0i
          a(j3 + 1) = wk3i * x0i - wk3r * x0r
          x0r = y1r - y3i
          x0i = y1i - y3r
          a(j3 - 2) = wd3i * x0r + wd3r * x0i
          a(j3 - 1) = wd3i * x0i - wd3r * x0r
      end do
      wk1r = csc1 * (wd1r + wn4r)
      wk1i = csc1 * (wd1i + wn4r)
      wk3r = csc3 * (wd3r - wn4r)
      wk3i = csc3 * (wd3i - wn4r)
      j0 = mh
      j1 = j0 + m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(j0 - 2) + a(j2 - 2)
      x0i = -a(j0 - 1) - a(j2 - 1)
      x1r = a(j0 - 2) - a(j2 - 2)
      x1i = -a(j0 - 1) + a(j2 - 1)
      x2r = a(j1 - 2) + a(j3 - 2)
      x2i = a(j1 - 1) + a(j3 - 1)
      x3r = a(j1 - 2) - a(j3 - 2)
      x3i = a(j1 - 1) - a(j3 - 1)
      a(j0 - 2) = x0r + x2r
      a(j0 - 1) = x0i - x2i
      a(j1 - 2) = x0r - x2r
      a(j1 - 1) = x0i + x2i
      x0r = x1r + x3i
      x0i = x1i + x3r
      a(j2 - 2) = wk1r * x0r - wk1i * x0i
      a(j2 - 1) = wk1r * x0i + wk1i * x0r
      x0r = x1r - x3i
      x0i = x1i - x3r
      a(j3 - 2) = wk3r * x0r + wk3i * x0i
      a(j3 - 1) = wk3r * x0i - wk3i * x0r
      x0r = a(j0) + a(j2)
      x0i = -a(j0 + 1) - a(j2 + 1)
      x1r = a(j0) - a(j2)
      x1i = -a(j0 + 1) + a(j2 + 1)
      x2r = a(j1) + a(j3)
      x2i = a(j1 + 1) + a(j3 + 1)
      x3r = a(j1) - a(j3)
      x3i = a(j1 + 1) - a(j3 + 1)
      a(j0) = x0r + x2r
      a(j0 + 1) = x0i - x2i
      a(j1) = x0r - x2r
      a(j1 + 1) = x0i + x2i
      x0r = x1r + x3i
      x0i = x1i + x3r
      a(j2) = wn4r * (x0r - x0i)
      a(j2 + 1) = wn4r * (x0i + x0r)
      x0r = x1r - x3i
      x0i = x1i - x3r
      a(j3) = -wn4r * (x0r + x0i)
      a(j3 + 1) = -wn4r * (x0i - x0r)
      x0r = a(j0 + 2) + a(j2 + 2)
      x0i = -a(j0 + 3) - a(j2 + 3)
      x1r = a(j0 + 2) - a(j2 + 2)
      x1i = -a(j0 + 3) + a(j2 + 3)
      x2r = a(j1 + 2) + a(j3 + 2)
      x2i = a(j1 + 3) + a(j3 + 3)
      x3r = a(j1 + 2) - a(j3 + 2)
      x3i = a(j1 + 3) - a(j3 + 3)
      a(j0 + 2) = x0r + x2r
      a(j0 + 3) = x0i - x2i
      a(j1 + 2) = x0r - x2r
      a(j1 + 3) = x0i + x2i
      x0r = x1r + x3i
      x0i = x1i + x3r
      a(j2 + 2) = wk1i * x0r - wk1r * x0i
      a(j2 + 3) = wk1i * x0i + wk1r * x0r
      x0r = x1r - x3i
      x0i = x1i - x3r
      a(j3 + 2) = wk3i * x0r + wk3r * x0i
      a(j3 + 3) = wk3i * x0i - wk3r * x0r
      end
!
      subroutine cftrec4(n, a, nw, w)
      integer n, nw, cfttree, isplt, j, k, m
      real*8 a(0 : n - 1), w(0 : nw - 1)
      m = n
      do while (m .gt. 512)
          m = m / 4
          call cftmdl1(m, a(n - m), w(nw - m / 2))
      end do
      call cftleaf(m, 1, a(n - m), nw, w)
      k = 0
      do j = n - m, m, -m
          k = k + 1
          isplt = cfttree(m, j, k, a, nw, w)
          call cftleaf(m, isplt, a(j - m), nw, w)
      end do
      end
!
      integer function cfttree(n, j, k, a, nw, w)
      integer n, j, k, nw, i, isplt, m
      real*8 a(0 : n - 1), w(0 : nw - 1)
      if (mod(k, 4) .ne. 0) then
          isplt = mod(k, 2)
          if (isplt .ne. 0) then
              call cftmdl1(n, a(j - n), w(nw - n / 2))
          else
              call cftmdl2(n, a(j - n), w(nw - n))
          end if
      else
          m = n
          i = k
          do while (mod(i, 4) .eq. 0)
              m = m * 4
              i = i / 4
          end do
          isplt = mod(i, 2)
          if (isplt .ne. 0) then
              do while (m .gt. 128)
                  call cftmdl1(m, a(j - m), w(nw - m / 2))
                  m = m / 4
              end do
          else
              do while (m .gt. 128)
                  call cftmdl2(m, a(j - m), w(nw - m))
                  m = m / 4
              end do
          end if
      end if
      cfttree = isplt
      end
!
      subroutine cftleaf(n, isplt, a, nw, w)
      integer n, isplt, nw
      real*8 a(0 : n - 1), w(0 : nw - 1)
      if (n .eq. 512) then
          call cftmdl1(128, a, w(nw - 64))
          call cftf161(a, w(nw - 8))
          call cftf162(a(32), w(nw - 32))
          call cftf161(a(64), w(nw - 8))
          call cftf161(a(96), w(nw - 8))
          call cftmdl2(128, a(128), w(nw - 128))
          call cftf161(a(128), w(nw - 8))
          call cftf162(a(160), w(nw - 32))
          call cftf161(a(192), w(nw - 8))
          call cftf162(a(224), w(nw - 32))
          call cftmdl1(128, a(256), w(nw - 64))
          call cftf161(a(256), w(nw - 8))
          call cftf162(a(288), w(nw - 32))
          call cftf161(a(320), w(nw - 8))
          call cftf161(a(352), w(nw - 8))
          if (isplt .ne. 0) then
              call cftmdl1(128, a(384), w(nw - 64))
              call cftf161(a(480), w(nw - 8))
          else
              call cftmdl2(128, a(384), w(nw - 128))
              call cftf162(a(480), w(nw - 32))
          end if
          call cftf161(a(384), w(nw - 8))
          call cftf162(a(416), w(nw - 32))
          call cftf161(a(448), w(nw - 8))
      else
          call cftmdl1(64, a, w(nw - 32))
          call cftf081(a, w(nw - 8))
          call cftf082(a(16), w(nw - 8))
          call cftf081(a(32), w(nw - 8))
          call cftf081(a(48), w(nw - 8))
          call cftmdl2(64, a(64), w(nw - 64))
          call cftf081(a(64), w(nw - 8))
          call cftf082(a(80), w(nw - 8))
          call cftf081(a(96), w(nw - 8))
          call cftf082(a(112), w(nw - 8))
          call cftmdl1(64, a(128), w(nw - 32))
          call cftf081(a(128), w(nw - 8))
          call cftf082(a(144), w(nw - 8))
          call cftf081(a(160), w(nw - 8))
          call cftf081(a(176), w(nw - 8))
          if (isplt .ne. 0) then
              call cftmdl1(64, a(192), w(nw - 32))
              call cftf081(a(240), w(nw - 8))
          else
              call cftmdl2(64, a(192), w(nw - 64))
              call cftf082(a(240), w(nw - 8))
          end if
          call cftf081(a(192), w(nw - 8))
          call cftf082(a(208), w(nw - 8))
          call cftf081(a(224), w(nw - 8))
      end if
      end
!
      subroutine cftmdl1(n, a, w)
      integer n, j, j0, j1, j2, j3, k, m, mh
      real*8 a(0 : n - 1), w(0 : *)
      real*8 wn4r, wk1r, wk1i, wk3r, wk3i
      real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      mh = n / 8
      m = 2 * mh
      j1 = m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(0) + a(j2)
      x0i = a(1) + a(j2 + 1)
      x1r = a(0) - a(j2)
      x1i = a(1) - a(j2 + 1)
      x2r = a(j1) + a(j3)
      x2i = a(j1 + 1) + a(j3 + 1)
      x3r = a(j1) - a(j3)
      x3i = a(j1 + 1) - a(j3 + 1)
      a(0) = x0r + x2r
      a(1) = x0i + x2i
      a(j1) = x0r - x2r
      a(j1 + 1) = x0i - x2i
      a(j2) = x1r - x3i
      a(j2 + 1) = x1i + x3r
      a(j3) = x1r + x3i
      a(j3 + 1) = x1i - x3r
      wn4r = w(1)
      k = 0
      do j = 2, mh - 2, 2
          k = k + 4
          wk1r = w(k)
          wk1i = w(k + 1)
          wk3r = w(k + 2)
          wk3i = w(k + 3)
          j1 = j + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j) + a(j2)
          x0i = a(j + 1) + a(j2 + 1)
          x1r = a(j) - a(j2)
          x1i = a(j + 1) - a(j2 + 1)
          x2r = a(j1) + a(j3)
          x2i = a(j1 + 1) + a(j3 + 1)
          x3r = a(j1) - a(j3)
          x3i = a(j1 + 1) - a(j3 + 1)
          a(j) = x0r + x2r
          a(j + 1) = x0i + x2i
          a(j1) = x0r - x2r
          a(j1 + 1) = x0i - x2i
          x0r = x1r - x3i
          x0i = x1i + x3r
          a(j2) = wk1r * x0r - wk1i * x0i
          a(j2 + 1) = wk1r * x0i + wk1i * x0r
          x0r = x1r + x3i
          x0i = x1i - x3r
          a(j3) = wk3r * x0r + wk3i * x0i
          a(j3 + 1) = wk3r * x0i - wk3i * x0r
          j0 = m - j
          j1 = j0 + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j0) + a(j2)
          x0i = a(j0 + 1) + a(j2 + 1)
          x1r = a(j0) - a(j2)
          x1i = a(j0 + 1) - a(j2 + 1)
          x2r = a(j1) + a(j3)
          x2i = a(j1 + 1) + a(j3 + 1)
          x3r = a(j1) - a(j3)
          x3i = a(j1 + 1) - a(j3 + 1)
          a(j0) = x0r + x2r
          a(j0 + 1) = x0i + x2i
          a(j1) = x0r - x2r
          a(j1 + 1) = x0i - x2i
          x0r = x1r - x3i
          x0i = x1i + x3r
          a(j2) = wk1i * x0r - wk1r * x0i
          a(j2 + 1) = wk1i * x0i + wk1r * x0r
          x0r = x1r + x3i
          x0i = x1i - x3r
          a(j3) = wk3i * x0r + wk3r * x0i
          a(j3 + 1) = wk3i * x0i - wk3r * x0r
      end do
      j0 = mh
      j1 = j0 + m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(j0) + a(j2)
      x0i = a(j0 + 1) + a(j2 + 1)
      x1r = a(j0) - a(j2)
      x1i = a(j0 + 1) - a(j2 + 1)
      x2r = a(j1) + a(j3)
      x2i = a(j1 + 1) + a(j3 + 1)
      x3r = a(j1) - a(j3)
      x3i = a(j1 + 1) - a(j3 + 1)
      a(j0) = x0r + x2r
      a(j0 + 1) = x0i + x2i
      a(j1) = x0r - x2r
      a(j1 + 1) = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      a(j2) = wn4r * (x0r - x0i)
      a(j2 + 1) = wn4r * (x0i + x0r)
      x0r = x1r + x3i
      x0i = x1i - x3r
      a(j3) = -wn4r * (x0r + x0i)
      a(j3 + 1) = -wn4r * (x0i - x0r)
      end
!
      subroutine cftmdl2(n, a, w)
      integer n, j, j0, j1, j2, j3, k, kr, m, mh
      real*8 a(0 : n - 1), w(0 : *)
      real*8 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i
      real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      real*8 y0r, y0i, y2r, y2i
      mh = n / 8
      m = 2 * mh
      wn4r = w(1)
      j1 = m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(0) - a(j2 + 1)
      x0i = a(1) + a(j2)
      x1r = a(0) + a(j2 + 1)
      x1i = a(1) - a(j2)
      x2r = a(j1) - a(j3 + 1)
      x2i = a(j1 + 1) + a(j3)
      x3r = a(j1) + a(j3 + 1)
      x3i = a(j1 + 1) - a(j3)
      y0r = wn4r * (x2r - x2i)
      y0i = wn4r * (x2i + x2r)
      a(0) = x0r + y0r
      a(1) = x0i + y0i
      a(j1) = x0r - y0r
      a(j1 + 1) = x0i - y0i
      y0r = wn4r * (x3r - x3i)
      y0i = wn4r * (x3i + x3r)
      a(j2) = x1r - y0i
      a(j2 + 1) = x1i + y0r
      a(j3) = x1r + y0i
      a(j3 + 1) = x1i - y0r
      k = 0
      kr = 2 * m
      do j = 2, mh - 2, 2
          k = k + 4
          wk1r = w(k)
          wk1i = w(k + 1)
          wk3r = w(k + 2)
          wk3i = w(k + 3)
          kr = kr - 4
          wd1i = w(kr)
          wd1r = w(kr + 1)
          wd3i = w(kr + 2)
          wd3r = w(kr + 3)
          j1 = j + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j) - a(j2 + 1)
          x0i = a(j + 1) + a(j2)
          x1r = a(j) + a(j2 + 1)
          x1i = a(j + 1) - a(j2)
          x2r = a(j1) - a(j3 + 1)
          x2i = a(j1 + 1) + a(j3)
          x3r = a(j1) + a(j3 + 1)
          x3i = a(j1 + 1) - a(j3)
          y0r = wk1r * x0r - wk1i * x0i
          y0i = wk1r * x0i + wk1i * x0r
          y2r = wd1r * x2r - wd1i * x2i
          y2i = wd1r * x2i + wd1i * x2r
          a(j) = y0r + y2r
          a(j + 1) = y0i + y2i
          a(j1) = y0r - y2r
          a(j1 + 1) = y0i - y2i
          y0r = wk3r * x1r + wk3i * x1i
          y0i = wk3r * x1i - wk3i * x1r
          y2r = wd3r * x3r + wd3i * x3i
          y2i = wd3r * x3i - wd3i * x3r
          a(j2) = y0r + y2r
          a(j2 + 1) = y0i + y2i
          a(j3) = y0r - y2r
          a(j3 + 1) = y0i - y2i
          j0 = m - j
          j1 = j0 + m
          j2 = j1 + m
          j3 = j2 + m
          x0r = a(j0) - a(j2 + 1)
          x0i = a(j0 + 1) + a(j2)
          x1r = a(j0) + a(j2 + 1)
          x1i = a(j0 + 1) - a(j2)
          x2r = a(j1) - a(j3 + 1)
          x2i = a(j1 + 1) + a(j3)
          x3r = a(j1) + a(j3 + 1)
          x3i = a(j1 + 1) - a(j3)
          y0r = wd1i * x0r - wd1r * x0i
          y0i = wd1i * x0i + wd1r * x0r
          y2r = wk1i * x2r - wk1r * x2i
          y2i = wk1i * x2i + wk1r * x2r
          a(j0) = y0r + y2r
          a(j0 + 1) = y0i + y2i
          a(j1) = y0r - y2r
          a(j1 + 1) = y0i - y2i
          y0r = wd3i * x1r + wd3r * x1i
          y0i = wd3i * x1i - wd3r * x1r
          y2r = wk3i * x3r + wk3r * x3i
          y2i = wk3i * x3i - wk3r * x3r
          a(j2) = y0r + y2r
          a(j2 + 1) = y0i + y2i
          a(j3) = y0r - y2r
          a(j3 + 1) = y0i - y2i
      end do
      wk1r = w(m)
      wk1i = w(m + 1)
      j0 = mh
      j1 = j0 + m
      j2 = j1 + m
      j3 = j2 + m
      x0r = a(j0) - a(j2 + 1)
      x0i = a(j0 + 1) + a(j2)
      x1r = a(j0) + a(j2 + 1)
      x1i = a(j0 + 1) - a(j2)
      x2r = a(j1) - a(j3 + 1)
      x2i = a(j1 + 1) + a(j3)
      x3r = a(j1) + a(j3 + 1)
      x3i = a(j1 + 1) - a(j3)
      y0r = wk1r * x0r - wk1i * x0i
      y0i = wk1r * x0i + wk1i * x0r
      y2r = wk1i * x2r - wk1r * x2i
      y2i = wk1i * x2i + wk1r * x2r
      a(j0) = y0r + y2r
      a(j0 + 1) = y0i + y2i
      a(j1) = y0r - y2r
      a(j1 + 1) = y0i - y2i
      y0r = wk1i * x1r - wk1r * x1i
      y0i = wk1i * x1i + wk1r * x1r
      y2r = wk1r * x3r - wk1i * x3i
      y2i = wk1r * x3i + wk1i * x3r
      a(j2) = y0r - y2r
      a(j2 + 1) = y0i - y2i
      a(j3) = y0r + y2r
      a(j3 + 1) = y0i + y2i
      end
!
      subroutine cftfx41(n, a, nw, w)
      integer n, nw
      real*8 a(0 : n - 1), w(0 : nw - 1)
      if (n .eq. 128) then
          call cftf161(a, w(nw - 8))
          call cftf162(a(32), w(nw - 32))
          call cftf161(a(64), w(nw - 8))
          call cftf161(a(96), w(nw - 8))
      else
          call cftf081(a, w(nw - 8))
          call cftf082(a(16), w(nw - 8))
          call cftf081(a(32), w(nw - 8))
          call cftf081(a(48), w(nw - 8))
      end if
      end
!
      subroutine cftf161(a, w)
      real*8 a(0 : 31), w(0 : *), wn4r, wk1r, wk1i
      real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i
      real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i
      real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i
      real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i
      wn4r = w(1)
      wk1r = w(2)
      wk1i = w(3)
      x0r = a(0) + a(16)
      x0i = a(1) + a(17)
      x1r = a(0) - a(16)
      x1i = a(1) - a(17)
      x2r = a(8) + a(24)
      x2i = a(9) + a(25)
      x3r = a(8) - a(24)
      x3i = a(9) - a(25)
      y0r = x0r + x2r
      y0i = x0i + x2i
      y4r = x0r - x2r
      y4i = x0i - x2i
      y8r = x1r - x3i
      y8i = x1i + x3r
      y12r = x1r + x3i
      y12i = x1i - x3r
      x0r = a(2) + a(18)
      x0i = a(3) + a(19)
      x1r = a(2) - a(18)
      x1i = a(3) - a(19)
      x2r = a(10) + a(26)
      x2i = a(11) + a(27)
      x3r = a(10) - a(26)
      x3i = a(11) - a(27)
      y1r = x0r + x2r
      y1i = x0i + x2i
      y5r = x0r - x2r
      y5i = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      y9r = wk1r * x0r - wk1i * x0i
      y9i = wk1r * x0i + wk1i * x0r
      x0r = x1r + x3i
      x0i = x1i - x3r
      y13r = wk1i * x0r - wk1r * x0i
      y13i = wk1i * x0i + wk1r * x0r
      x0r = a(4) + a(20)
      x0i = a(5) + a(21)
      x1r = a(4) - a(20)
      x1i = a(5) - a(21)
      x2r = a(12) + a(28)
      x2i = a(13) + a(29)
      x3r = a(12) - a(28)
      x3i = a(13) - a(29)
      y2r = x0r + x2r
      y2i = x0i + x2i
      y6r = x0r - x2r
      y6i = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      y10r = wn4r * (x0r - x0i)
      y10i = wn4r * (x0i + x0r)
      x0r = x1r + x3i
      x0i = x1i - x3r
      y14r = wn4r * (x0r + x0i)
      y14i = wn4r * (x0i - x0r)
      x0r = a(6) + a(22)
      x0i = a(7) + a(23)
      x1r = a(6) - a(22)
      x1i = a(7) - a(23)
      x2r = a(14) + a(30)
      x2i = a(15) + a(31)
      x3r = a(14) - a(30)
      x3i = a(15) - a(31)
      y3r = x0r + x2r
      y3i = x0i + x2i
      y7r = x0r - x2r
      y7i = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      y11r = wk1i * x0r - wk1r * x0i
      y11i = wk1i * x0i + wk1r * x0r
      x0r = x1r + x3i
      x0i = x1i - x3r
      y15r = wk1r * x0r - wk1i * x0i
      y15i = wk1r * x0i + wk1i * x0r
      x0r = y12r - y14r
      x0i = y12i - y14i
      x1r = y12r + y14r
      x1i = y12i + y14i
      x2r = y13r - y15r
      x2i = y13i - y15i
      x3r = y13r + y15r
      x3i = y13i + y15i
      a(24) = x0r + x2r
      a(25) = x0i + x2i
      a(26) = x0r - x2r
      a(27) = x0i - x2i
      a(28) = x1r - x3i
      a(29) = x1i + x3r
      a(30) = x1r + x3i
      a(31) = x1i - x3r
      x0r = y8r + y10r
      x0i = y8i + y10i
      x1r = y8r - y10r
      x1i = y8i - y10i
      x2r = y9r + y11r
      x2i = y9i + y11i
      x3r = y9r - y11r
      x3i = y9i - y11i
      a(16) = x0r + x2r
      a(17) = x0i + x2i
      a(18) = x0r - x2r
      a(19) = x0i - x2i
      a(20) = x1r - x3i
      a(21) = x1i + x3r
      a(22) = x1r + x3i
      a(23) = x1i - x3r
      x0r = y5r - y7i
      x0i = y5i + y7r
      x2r = wn4r * (x0r - x0i)
      x2i = wn4r * (x0i + x0r)
      x0r = y5r + y7i
      x0i = y5i - y7r
      x3r = wn4r * (x0r - x0i)
      x3i = wn4r * (x0i + x0r)
      x0r = y4r - y6i
      x0i = y4i + y6r
      x1r = y4r + y6i
      x1i = y4i - y6r
      a(8) = x0r + x2r
      a(9) = x0i + x2i
      a(10) = x0r - x2r
      a(11) = x0i - x2i
      a(12) = x1r - x3i
      a(13) = x1i + x3r
      a(14) = x1r + x3i
      a(15) = x1i - x3r
      x0r = y0r + y2r
      x0i = y0i + y2i
      x1r = y0r - y2r
      x1i = y0i - y2i
      x2r = y1r + y3r
      x2i = y1i + y3i
      x3r = y1r - y3r
      x3i = y1i - y3i
      a(0) = x0r + x2r
      a(1) = x0i + x2i
      a(2) = x0r - x2r
      a(3) = x0i - x2i
      a(4) = x1r - x3i
      a(5) = x1i + x3r
      a(6) = x1r + x3i
      a(7) = x1i - x3r
      end
!
      subroutine cftf162(a, w)
      real*8 a(0 : 31), w(0 : *)
      real*8 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i
      real*8 x0r, x0i, x1r, x1i, x2r, x2i
      real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i
      real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i
      real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i
      real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i
      wn4r = w(1)
      wk1r = w(4)
      wk1i = w(5)
      wk3r = w(6)
      wk3i = -w(7)
      wk2r = w(8)
      wk2i = w(9)
      x1r = a(0) - a(17)
      x1i = a(1) + a(16)
      x0r = a(8) - a(25)
      x0i = a(9) + a(24)
      x2r = wn4r * (x0r - x0i)
      x2i = wn4r * (x0i + x0r)
      y0r = x1r + x2r
      y0i = x1i + x2i
      y4r = x1r - x2r
      y4i = x1i - x2i
      x1r = a(0) + a(17)
      x1i = a(1) - a(16)
      x0r = a(8) + a(25)
      x0i = a(9) - a(24)
      x2r = wn4r * (x0r - x0i)
      x2i = wn4r * (x0i + x0r)
      y8r = x1r - x2i
      y8i = x1i + x2r
      y12r = x1r + x2i
      y12i = x1i - x2r
      x0r = a(2) - a(19)
      x0i = a(3) + a(18)
      x1r = wk1r * x0r - wk1i * x0i
      x1i = wk1r * x0i + wk1i * x0r
      x0r = a(10) - a(27)
      x0i = a(11) + a(26)
      x2r = wk3i * x0r - wk3r * x0i
      x2i = wk3i * x0i + wk3r * x0r
      y1r = x1r + x2r
      y1i = x1i + x2i
      y5r = x1r - x2r
      y5i = x1i - x2i
      x0r = a(2) + a(19)
      x0i = a(3) - a(18)
      x1r = wk3r * x0r - wk3i * x0i
      x1i = wk3r * x0i + wk3i * x0r
      x0r = a(10) + a(27)
      x0i = a(11) - a(26)
      x2r = wk1r * x0r + wk1i * x0i
      x2i = wk1r * x0i - wk1i * x0r
      y9r = x1r - x2r
      y9i = x1i - x2i
      y13r = x1r + x2r
      y13i = x1i + x2i
      x0r = a(4) - a(21)
      x0i = a(5) + a(20)
      x1r = wk2r * x0r - wk2i * x0i
      x1i = wk2r * x0i + wk2i * x0r
      x0r = a(12) - a(29)
      x0i = a(13) + a(28)
      x2r = wk2i * x0r - wk2r * x0i
      x2i = wk2i * x0i + wk2r * x0r
      y2r = x1r + x2r
      y2i = x1i + x2i
      y6r = x1r - x2r
      y6i = x1i - x2i
      x0r = a(4) + a(21)
      x0i = a(5) - a(20)
      x1r = wk2i * x0r - wk2r * x0i
      x1i = wk2i * x0i + wk2r * x0r
      x0r = a(12) + a(29)
      x0i = a(13) - a(28)
      x2r = wk2r * x0r - wk2i * x0i
      x2i = wk2r * x0i + wk2i * x0r
      y10r = x1r - x2r
      y10i = x1i - x2i
      y14r = x1r + x2r
      y14i = x1i + x2i
      x0r = a(6) - a(23)
      x0i = a(7) + a(22)
      x1r = wk3r * x0r - wk3i * x0i
      x1i = wk3r * x0i + wk3i * x0r
      x0r = a(14) - a(31)
      x0i = a(15) + a(30)
      x2r = wk1i * x0r - wk1r * x0i
      x2i = wk1i * x0i + wk1r * x0r
      y3r = x1r + x2r
      y3i = x1i + x2i
      y7r = x1r - x2r
      y7i = x1i - x2i
      x0r = a(6) + a(23)
      x0i = a(7) - a(22)
      x1r = wk1i * x0r + wk1r * x0i
      x1i = wk1i * x0i - wk1r * x0r
      x0r = a(14) + a(31)
      x0i = a(15) - a(30)
      x2r = wk3i * x0r - wk3r * x0i
      x2i = wk3i * x0i + wk3r * x0r
      y11r = x1r + x2r
      y11i = x1i + x2i
      y15r = x1r - x2r
      y15i = x1i - x2i
      x1r = y0r + y2r
      x1i = y0i + y2i
      x2r = y1r + y3r
      x2i = y1i + y3i
      a(0) = x1r + x2r
      a(1) = x1i + x2i
      a(2) = x1r - x2r
      a(3) = x1i - x2i
      x1r = y0r - y2r
      x1i = y0i - y2i
      x2r = y1r - y3r
      x2i = y1i - y3i
      a(4) = x1r - x2i
      a(5) = x1i + x2r
      a(6) = x1r + x2i
      a(7) = x1i - x2r
      x1r = y4r - y6i
      x1i = y4i + y6r
      x0r = y5r - y7i
      x0i = y5i + y7r
      x2r = wn4r * (x0r - x0i)
      x2i = wn4r * (x0i + x0r)
      a(8) = x1r + x2r
      a(9) = x1i + x2i
      a(10) = x1r - x2r
      a(11) = x1i - x2i
      x1r = y4r + y6i
      x1i = y4i - y6r
      x0r = y5r + y7i
      x0i = y5i - y7r
      x2r = wn4r * (x0r - x0i)
      x2i = wn4r * (x0i + x0r)
      a(12) = x1r - x2i
      a(13) = x1i + x2r
      a(14) = x1r + x2i
      a(15) = x1i - x2r
      x1r = y8r + y10r
      x1i = y8i + y10i
      x2r = y9r - y11r
      x2i = y9i - y11i
      a(16) = x1r + x2r
      a(17) = x1i + x2i
      a(18) = x1r - x2r
      a(19) = x1i - x2i
      x1r = y8r - y10r
      x1i = y8i - y10i
      x2r = y9r + y11r
      x2i = y9i + y11i
      a(20) = x1r - x2i
      a(21) = x1i + x2r
      a(22) = x1r + x2i
      a(23) = x1i - x2r
      x1r = y12r - y14i
      x1i = y12i + y14r
      x0r = y13r + y15i
      x0i = y13i - y15r
      x2r = wn4r * (x0r - x0i)
      x2i = wn4r * (x0i + x0r)
      a(24) = x1r + x2r
      a(25) = x1i + x2i
      a(26) = x1r - x2r
      a(27) = x1i - x2i
      x1r = y12r + y14i
      x1i = y12i - y14r
      x0r = y13r - y15i
      x0i = y13i + y15r
      x2r = wn4r * (x0r - x0i)
      x2i = wn4r * (x0i + x0r)
      a(28) = x1r - x2i
      a(29) = x1i + x2r
      a(30) = x1r + x2i
      a(31) = x1i - x2r
      end
!
      subroutine cftf081(a, w)
      real*8 a(0 : 15), w(0 : *)
      real*8 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i
      real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i
      wn4r = w(1)
      x0r = a(0) + a(8)
      x0i = a(1) + a(9)
      x1r = a(0) - a(8)
      x1i = a(1) - a(9)
      x2r = a(4) + a(12)
      x2i = a(5) + a(13)
      x3r = a(4) - a(12)
      x3i = a(5) - a(13)
      y0r = x0r + x2r
      y0i = x0i + x2i
      y2r = x0r - x2r
      y2i = x0i - x2i
      y1r = x1r - x3i
      y1i = x1i + x3r
      y3r = x1r + x3i
      y3i = x1i - x3r
      x0r = a(2) + a(10)
      x0i = a(3) + a(11)
      x1r = a(2) - a(10)
      x1i = a(3) - a(11)
      x2r = a(6) + a(14)
      x2i = a(7) + a(15)
      x3r = a(6) - a(14)
      x3i = a(7) - a(15)
      y4r = x0r + x2r
      y4i = x0i + x2i
      y6r = x0r - x2r
      y6i = x0i - x2i
      x0r = x1r - x3i
      x0i = x1i + x3r
      x2r = x1r + x3i
      x2i = x1i - x3r
      y5r = wn4r * (x0r - x0i)
      y5i = wn4r * (x0r + x0i)
      y7r = wn4r * (x2r - x2i)
      y7i = wn4r * (x2r + x2i)
      a(8) = y1r + y5r
      a(9) = y1i + y5i
      a(10) = y1r - y5r
      a(11) = y1i - y5i
      a(12) = y3r - y7i
      a(13) = y3i + y7r
      a(14) = y3r + y7i
      a(15) = y3i - y7r
      a(0) = y0r + y4r
      a(1) = y0i + y4i
      a(2) = y0r - y4r
      a(3) = y0i - y4i
      a(4) = y2r - y6i
      a(5) = y2i + y6r
      a(6) = y2r + y6i
      a(7) = y2i - y6r
      end
!
      subroutine cftf082(a, w)
      real*8 a(0 : 15), w(0 : *)
      real*8 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i
      real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i
      real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i
      wn4r = w(1)
      wk1r = w(2)
      wk1i = w(3)
      y0r = a(0) - a(9)
      y0i = a(1) + a(8)
      y1r = a(0) + a(9)
      y1i = a(1) - a(8)
      x0r = a(4) - a(13)
      x0i = a(5) + a(12)
      y2r = wn4r * (x0r - x0i)
      y2i = wn4r * (x0i + x0r)
      x0r = a(4) + a(13)
      x0i = a(5) - a(12)
      y3r = wn4r * (x0r - x0i)
      y3i = wn4r * (x0i + x0r)
      x0r = a(2) - a(11)
      x0i = a(3) + a(10)
      y4r = wk1r * x0r - wk1i * x0i
      y4i = wk1r * x0i + wk1i * x0r
      x0r = a(2) + a(11)
      x0i = a(3) - a(10)
      y5r = wk1i * x0r - wk1r * x0i
      y5i = wk1i * x0i + wk1r * x0r
      x0r = a(6) - a(15)
      x0i = a(7) + a(14)
      y6r = wk1i * x0r - wk1r * x0i
      y6i = wk1i * x0i + wk1r * x0r
      x0r = a(6) + a(15)
      x0i = a(7) - a(14)
      y7r = wk1r * x0r - wk1i * x0i
      y7i = wk1r * x0i + wk1i * x0r
      x0r = y0r + y2r
      x0i = y0i + y2i
      x1r = y4r + y6r
      x1i = y4i + y6i
      a(0) = x0r + x1r
      a(1) = x0i + x1i
      a(2) = x0r - x1r
      a(3) = x0i - x1i
      x0r = y0r - y2r
      x0i = y0i - y2i
      x1r = y4r - y6r
      x1i = y4i - y6i
      a(4) = x0r - x1i
      a(5) = x0i + x1r
      a(6) = x0r + x1i
      a(7) = x0i - x1r
      x0r = y1r - y3i
      x0i = y1i + y3r
      x1r = y5r - y7r
      x1i = y5i - y7i
      a(8) = x0r + x1r
      a(9) = x0i + x1i
      a(10) = x0r - x1r
      a(11) = x0i - x1i
      x0r = y1r + y3i
      x0i = y1i - y3r
      x1r = y5r + y7r
      x1i = y5i + y7i
      a(12) = x0r - x1i
      a(13) = x0i + x1r
      a(14) = x0r + x1i
      a(15) = x0i - x1r
      end
!
      subroutine cftf040(a)
      real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      x0r = a(0) + a(4)
      x0i = a(1) + a(5)
      x1r = a(0) - a(4)
      x1i = a(1) - a(5)
      x2r = a(2) + a(6)
      x2i = a(3) + a(7)
      x3r = a(2) - a(6)
      x3i = a(3) - a(7)
      a(0) = x0r + x2r
      a(1) = x0i + x2i
      a(2) = x1r - x3i
      a(3) = x1i + x3r
      a(4) = x0r - x2r
      a(5) = x0i - x2i
      a(6) = x1r + x3i
      a(7) = x1i - x3r
      end
!
      subroutine cftb040(a)
      real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i
      x0r = a(0) + a(4)
      x0i = a(1) + a(5)
      x1r = a(0) - a(4)
      x1i = a(1) - a(5)
      x2r = a(2) + a(6)
      x2i = a(3) + a(7)
      x3r = a(2) - a(6)
      x3i = a(3) - a(7)
      a(0) = x0r + x2r
      a(1) = x0i + x2i
      a(2) = x1r + x3i
      a(3) = x1i - x3r
      a(4) = x0r - x2r
      a(5) = x0i - x2i
      a(6) = x1r - x3i
      a(7) = x1i + x3r
      end
!
      subroutine cftx020(a)
      real*8 a(0 : 3), x0r, x0i
      x0r = a(0) - a(2)
      x0i = a(1) - a(3)
      a(0) = a(0) + a(2)
      a(1) = a(1) + a(3)
      a(2) = x0r
      a(3) = x0i
      end
!
      subroutine rftfsub(n, a, nc, c)
      integer n, nc, j, k, kk, ks, m
      real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi
      m = n / 2
      ks = 2 * nc / m
      kk = 0
      do j = 2, m - 2, 2
          k = n - j
          kk = kk + ks
          wkr = 0.5d0 - c(nc - kk)
          wki = c(kk)
          xr = a(j) - a(k)
          xi = a(j + 1) + a(k + 1)
          yr = wkr * xr - wki * xi
          yi = wkr * xi + wki * xr
          a(j) = a(j) - yr
          a(j + 1) = a(j + 1) - yi
          a(k) = a(k) + yr
          a(k + 1) = a(k + 1) - yi
      end do
      end
!
      subroutine rftbsub(n, a, nc, c)
      integer n, nc, j, k, kk, ks, m
      real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi
      m = n / 2
      ks = 2 * nc / m
      kk = 0
      do j = 2, m - 2, 2
          k = n - j
          kk = kk + ks
          wkr = 0.5d0 - c(nc - kk)
          wki = c(kk)
          xr = a(j) - a(k)
          xi = a(j + 1) + a(k + 1)
          yr = wkr * xr + wki * xi
          yi = wkr * xi - wki * xr
          a(j) = a(j) - yr
          a(j + 1) = a(j + 1) - yi
          a(k) = a(k) + yr
          a(k + 1) = a(k + 1) - yi
      end do
      end
!
      subroutine dctsub(n, a, nc, c)
      integer n, nc, j, k, kk, ks, m
      real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr
      m = n / 2
      ks = nc / n
      kk = 0
      do j = 1, m - 1
          k = n - j
          kk = kk + ks
          wkr = c(kk) - c(nc - kk)
          wki = c(kk) + c(nc - kk)
          xr = wki * a(j) - wkr * a(k)
          a(j) = wkr * a(j) + wki * a(k)
          a(k) = xr
      end do
      a(m) = c(0) * a(m)
      end
!
      subroutine dstsub(n, a, nc, c)
      integer n, nc, j, k, kk, ks, m
      real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr
      m = n / 2
      ks = nc / n
      kk = 0
      do j = 1, m - 1
          k = n - j
          kk = kk + ks
          wkr = c(kk) - c(nc - kk)
          wki = c(kk) + c(nc - kk)
          xr = wki * a(k) - wkr * a(j)
          a(k) = wkr * a(k) + wki * a(j)
          a(j) = xr
      end do
      a(m) = c(0) * a(m)
      end
!








      
     

                  

      




     









     


