cndaqiang Web Linux DFT

Fortran使用Scalapack求解HC=SCE

2019-08-04
cndaqiang
RSS

Scalapack的内容和用法见Fortran 科学计算

参考

ScaLAPACK: a portable linear algebra library for distributed memory computers — design issues and performance
SIESTA源码

数据

H原子的scf中由五条轨道(NAOs)作为基组得到的H,S: H.data S.data

计算方法

源码

HC_SCE.f90

program HC_SCE
    use m_mpi_my
    implicit none
    
    INTEGER,parameter :: dp=8,DLEN_=9
    REAL(dp),allocatable    ::  H(:,:),S(:,:),C(:,:),E(:)
    REAL(dp),allocatable    ::  H_2d(:,:),S_2d(:,:),C_2d(:,:),E_2d(:)
    INTEGER :: m,lm,ln,blocksize,i,j
    !矩阵H_aux(m,m), local H(lm,m)
    REAL(dp) :: work(80),iwork(80)
    INTEGER :: lwork=80,liwork=45    
    
    INTEGER :: ICTXT,ICTXT_2d,NPROW,NPCOL,myROW,myCOL
    INTEGER :: DESCH(DLEN_),DESCH_2d(DLEN_),DESCE(DLEN_),INFO,DSCALE
    INTEGER :: ICTXT_1d,ICTXT_3d,ICTXT_4d
    call mpi_start()


    
!********************************************************************************   
!1D网格计算
!********************************************************************************
!**************************
!处理器grid
!**************************
    NPCOL=1
    NPROW=4
    CALL BLACS_GET( -1, 0, ICTXT ) 
    
    !call SL_INIT(ICTXT,NPROW,NPCOL)
    !BLACS_GET与SL_INIT貌似写一个就行
    
    call blacs_gridinit(ICTXT,'C',NPROW,NPCOL)
    call blacs_gridinfo(ICTXT,NPROW,NPCOL,myROW,myCOL)
        
    !write(*,*) node,NPROW,NPCOL,myROW,mycol
!**************************
!数据网格
!读入数据H,S
!**************************
    m=5
    lm=1
    if(node .eq. 0) lm=2
    allocate(H(lm,m),S(lm,m),C(lm,m),E(m))
    blocksize=1
    call DESCINIT(DESCH,m,m,blocksize,blocksize,0,0,ICTXT,lm,INFO)
    call DESCINIT(DESCH,5,5,1,1,0,0,ICTXT,lm,INFO)
    !write(*,*) node,DESCH
    !write(*,*) INFO
    !call DESCINIT(DESCE_2d,m,m,blocksize,blocksize,0,0,ICTXT_2d,lm,INFO)
    call PdLAREAD("H.data",H,DESCH,0,0,WORK)
    call PdLAREAD("S.data",S,DESCH,0,0,WORK)
    if(IOnode) write(*,*) "H is : -------------------------------"
    DO i=0,4  
        
        if(node .eq. mod(i,np)  ) write(*,*) node,i,":",H(1+i/np,:)   !DO i=1,lm
        call sleep(1)           !    write(*,*) node,i,":",S(i,:) 
    END DO 
    !call sleep(1)
    if(IOnode) write(*,*) "S is : -------------------------------"
    DO i=0,4  
        
        if(node .eq. mod(i,np)  ) write(*,*) node,i,":",S(1+i/np,:)   !DO i=1,lm
        call sleep(1)           !    write(*,*) node,i,":",S(i,:) 
    END DO 
    
!****************************
!Hz=Szw   
!此处:S=U^T*U ,S=U
!Hz=U^T*Uzw  
!C=U^(-T)*H*U^(-1) U是上三角矩阵
!Cy=wy -> w,y ! C是一个上三角矩阵,可以告诉scalapack 
!U^z=y;z=U^(-1)*y
!函数:https://scc.ustc.edu.cn/zlsc/tc4600/intel/2017.0.098/mkl/common/mklman_f/index.htm
!****************************
    call pdpotrf('U',m,S,1,1,DESCH,INFO)
            
            !call pdpotrf(uplo, n, a, ia, ja, desca, info)
            !uplo= 'U' or 'L'
                !sub(A) = UH*U if uplo='U',
                !sub(A) = L*LH if uplo='L'
            !n (global) INTEGER. The order of the distributed matrix sub(A) (n≥0).
!
!    DO i=0,4
!        if(node .eq. mod(i,np)) write(*,*) node,i,"SU",S(1+i/np,:)
!        !call sleep(1)        
!    END DO 
 
!****************************
!Hz=Szw   
!S=U^T*U ,S=U
!Hz=U^T*Uzw  
!此处:C=U^(-T)*H*U^(-1) U是上三角矩阵
!Cy=wy -> w,y ! C是一个上三角矩阵,可以告诉scalapack 
!U^z=y;z=U^(-1)*y
!函数:https://scc.ustc.edu.cn/zlsc/tc4600/intel/2017.0.098/mkl/common/mklman_f/index.htm
!*****************************    
    call pdsyngst(1,'U',m,H,1,1,DESCH,S,1,1, DESCH,DSCALE,work,lwork,info)
    !H=inv(UH)*sub( A )*inv(U)
    
    !call pdsyngst (ibtype, uplo, n, a, ia, ja, desca, b, ib, jb, descb, scale, work, lwork, info )
    !结果保存到a
    !ibtype
        != 1: compute inv(UH)*sub( A )*inv(U) or inv(L)*sub( A )*inv(LH);
        != 2 or 3: compute U*sub( A )*UH or LH*sub( A )*L.
    !uplo
        != 'U': Upper triangle of sub( A ) is stored and sub( B ) is factored as UH*U;
        != 'L': Lower triangle of sub( A ) is stored and sub( B ) is factored as L*LH.
    !n The order of the matrices sub( A ) and sub( B ). n >= 0.
    !SCALE : output ,no use
    !work
    !lwork 需要判断!!!
    !lwork
    !(local or global)
    !INTEGER.
    !The size of the array work.
    !lwork is local input and must be at least lwork >= MAX( NB * ( NP0 +1 ), 3 * NB )
    !
    !When ibtype = 1 and uplo = 'L', p?syngst provides improved performance when lwork >= 2 * NP0 * NB + NQ0 * NB + NB * NB,
    !
    !where NB = mb_a = nb_a,
    !
    !NP0 = numroc( n, NB, 0, 0, NPROW ),
    !
    !NQ0 = numroc( n, NB, 0, 0, NPROW ),
    !
    !numroc is a ScaLAPACK tool functions
    !
    !MYROW, MYCOL, NPROW and NPCOL can be determined by calling the subroutine blacs_gridinfo.
    !
    !If lwork = -1, then lwork is global input and a workspace query is assumed; the routine only calculates the optimal size for all work arrays. Each of these values is returned in the first entry of the corresponding work array, and no error message is issued by pxerbla.

!    DO i=0,4
!        if(node .eq. mod(i,np)) write(*,*) node,i,":",H(1+i/np,:)
!        call sleep(1)        
!    END DO 
    
!****************************
!Hz=Szw   
!S=U^T*U ,S=U
!Hz=U^T*Uzw  
!C=U^(-T)*H*U^(-1) U是上三角矩阵
!此处 Cy=wy -> w,y ! C是一个上三角矩阵,可以告诉scalapack 
!U^z=y;z=U^(-1)*y
!函数:https://scc.ustc.edu.cn/zlsc/tc4600/intel/2017.0.098/mkl/common/mklman_f/index.htm
!*****************************  

     call pdsyevd('V','U',m,H,1,1,DESCH,E,C,1,1,DESCH,work,lwork,iwork,liwork,INFO)
     
     !call pdsyevd(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, iwork, liwork, info)
        !jobz
           !(global) CHARACTER*1. Must be 'N' or 'V'.
           !Specifies if it is necessary to compute the eigenvectors:
           !If jobz = 'N', then only eigenvalues are computed.
           !If jobz = 'V', then eigenvalues and eigenvectors are computed.
        !uplo
           !(global) CHARACTER*1. Must be 'U' or 'L'.
           !Specifies whether the upper or lower triangular part of the Hermitian matrix A is stored:
           !If uplo = 'U', a stores the upper triangular part of A.
           !If uplo = 'L', a stores the lower triangular part of A.
           
        !w 的维度w(n), n是A(n,n)的维度w

        !lwork需要计算最小值

        !iwork
            !(local) INTEGER. Workspace array of size liwork.
        !liwork
            !(local) INTEGER , size of iwork.
            !liwork = 7*n + 8*npcol + 2.    
            !liwork >= 7*5+8*1+2=45 都可以

     !DO i=0,4
     !    if(node .eq. mod(i,np)) write(*,*) node,i,":",C(1+i/np,:)
     !    !call sleep(1)        
     !END DO 
     !
!****************************
!Hz=Szw   
!S=U^T*U ,S=U
!Hz=U^T*Uzw  
!C=U^(-T)*H*U^(-1) U是上三角矩阵
!Cy=wy -> w,y ! C是一个上三角矩阵,可以告诉scalapack 
!此处 U^z=y;z=U^(-1)*y
!函数:https://scc.ustc.edu.cn/zlsc/tc4600/intel/2017.0.098/mkl/common/mklman_f/index.htm
!*****************************  
     call pdtrsm('Left','U','N','Non-unit',m,m,1.0_dp,S,1,1,DESCH,C,1,1,DESCH)
              
              !求解S*X=Z
              !X的解保存到z2d中去,此时z2d才变成真的z2d
              
              !call pdtrsm(side, uplo, transa, diag, m, n, alpha,a , ia, ja, desca, b, ib, jb, descb)
              !The p?trsm routines solve one of the following distributed matrix equations:
              !op(sub(A))*X = alpha*sub(B),
              !or
              !X*op(sub(A)) = alpha*sub(B),
              !if side = 'L' or 'l', then op(sub(A))*X = alpha*sub(B);
              !if side = 'R' or 'r', then X*op(sub(A)) = alpha*sub(B).
              !uplo
                  !if uplo = 'U' or 'u', then the matrix is upper triangular;
                  !if uplo = 'L' or 'l', then the matrix is low triangular.
              !transa
                  !if transa = 'N' or 'n', then op(sub(A)) = sub(A);
                  !if transa = 'T' or 't', then op(sub(A)) = sub(A)';
                  !if transa = 'C' or 'c', then op(sub(A)) = conjg(sub(A)').
              !diag
              !(global) CHARACTER*1. Specifies whether the matrix sub(A) is unit triangular:
                  !if diag = 'U' or 'u' then the matrix is unit triangular;
                  !if diag = 'N' or 'n', then the matrix is not unit triangular.
              !b  Overwritten by the solution distributed matrix X. 
             
     DO i=0,4
         if(node .eq. mod(i,np)) then
            write(*,*) "Eig",E(i+1),"Vector",C(1+i/np,:)
         endif
         call sleep(1)        
     END DO 
       
    !CALL BLACS_GRIDEXIT( ICTXT )
    
    !deallocate(H,S,C,E)


!********************************************************************************
!2D网格计算    
!********************************************************************************

!**************************
!处理器grid
!**************************
    
    NPCOL=2
    NPROW=2
!建立多个处理器网格的方法
    CALL BLACS_GET( -1, 0, ICTXT_1d )
    call blacs_gridinit(ICTXT_1d,'C',NPROW,NPCOL)
    call blacs_gridinfo(ICTXT_1d,NPROW,NPCOL,myROW,myCOL)
    !
    CALL BLACS_GET( -1, 0, ICTXT_2d )
    call blacs_gridinit(ICTXT_2d,'C',NPROW,NPCOL)
    call blacs_gridinfo(ICTXT_2d,NPROW,NPCOL,myROW,myCOL)
    !
    CALL BLACS_GET( -1, 0, ICTXT_3d )
    call blacs_gridinit(ICTXT_3d,'C',NPROW,NPCOL)
    !call blacs_gridinfo(ICTXT_3d,NPROW,NPCOL,myROW,myCOL)
    CALL BLACS_GET( -1, 0, ICTXT_4d )
    call blacs_gridinit(ICTXT_4d,'C',NPROW,NPCOL)
    !call blacs_gridinfo(ICTXT_4d,NPROW,NPCOL,myROW,myCOL)    
    if(IOnode) write(*,*) "CNQ:ICTXT" ,ICTXT,ICTXT_1d,ICTXT_2d,ICTXT_3d,ICTXT_4d
!不同的处理器网不会互相干扰,有各自的ICTXT

!**************************
!数据网格

!**************************
    !在ICTXT情况下读入数据,测试不同网格下的转换
    call PdLAREAD("H.data",H,DESCH,0,0,WORK)
    call PdLAREAD("S.data",S,DESCH,0,0,WORK)  
    
    m=5
    blocksize=1
    lm=3
    ln=3
    if(myROW .eq. 1) lm=2
    if(myCol .eq. 1) ln=2
    allocate(H_2d(lm,ln),S_2d(lm,ln),C_2d(lm,ln),E_2d(m))
    call DESCINIT(DESCH_2d,m,m,blocksize,blocksize,0,0,ICTXT_2d,lm,INFO)
    !write(*,*) INFO
    !call DESCINIT(DESCE_2d,m,m,blocksize,blocksize,0,0,ICTXT_2d,lm,INFO)
    call pdgemr2d(m, m, H, 1, 1, DESCH, H_2d, 1, 1, DESCH_2d, ictxt_2d)
    !在不同的处理器/数据网格间复制矩阵
    !call pdgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt)
    !b "=" a 
    !ictxt 当ictxt与ictxt_2d的所有处理器一样时,用谁都可以
    !The context encompassing at least the union of all processes
    !                 in context A and context B. All processes in 
    !                  the context ictxt must call this routine, 
    !                   even if they do not own a piece of either matrix.
    
     !This can check H have be copied succeed
     !DO i=0,4
     !    if(myROW .eq. mod(i,NPROW) .and. myCOL .eq. 0 ) then
     !       j=i/2
     !       write(*,*) "H2d",j+1,i,H_2d(j+1,1)
     !    endif
     !    call sleep(1)        
     !END DO
     call pdgemr2d(m, m, S, 1, 1, DESCH, S_2d, 1, 1, DESCH_2d, ictxt_2d)
  
     !计算过程同1D
     call pdpotrf('U',m,S_2d,1,1,DESCH_2d,INFO)
     call pdsyngst(1,'U',m,H_2d,1,1,DESCH_2d,S_2d,1,1, DESCH_2d,DSCALE,work,lwork,info)
     liwork=53
     !liwork=7*5+8*2+2=53
     call pdsyevd('V','U',m,H_2d,1,1,DESCH_2d,E_2d,C_2d,1,1,DESCH_2d,work,lwork,iwork,liwork,INFO)
     call pdtrsm('Left','U','N','Non-unit',m,m,1.0_dp,S_2d,1,1,DESCH_2d,C_2d,1,1,DESCH_2d)
     if(node .eq. 0) write(*,*) "Use 2D Eigvalue is:"
     if(node .eq. 0) write(*,*) E_2d
    
    deallocate(H,S,C,E)
    deallocate(H_2d,S_2d,C_2d,E_2d)
    CALL BLACS_GRIDEXIT( ICTXT )
    CALL BLACS_GRIDEXIT( ICTXT_1d )    
    CALL BLACS_GRIDEXIT( ICTXT_2d )    
    CALL BLACS_GRIDEXIT( ICTXT_3d )    
    CALL BLACS_GRIDEXIT( ICTXT_4d )    
    call mpi_end()
end program HC_SCE

将代码中的’U’换为’L’可以得到同样的本征值解,只是本征矢量有些不同

处理结果

使用matlab检验计算结果

>> H

H =

   -0.4240   -0.4014   -0.0000    0.0000   -0.0000
   -0.4014   -0.2996   -0.0000    0.0000   -0.0000
   -0.0000   -0.0000    0.8041   -0.0000   -0.0000
    0.0000    0.0000   -0.0000    0.8042    0.0000
   -0.0000   -0.0000   -0.0000    0.0000    0.8041

>> S

S =

    1.0000    0.9548         0   -0.0000         0
    0.9548    1.0000         0   -0.0000         0
         0         0    1.0000         0         0
    0.0000    0.0000         0    1.0000         0
         0         0         0         0    1.0000

>> C

C =

   -1.0275    0.0004   -0.0000   -0.0000   -3.2032
    0.0289   -0.0004    0.0000    0.0000    3.3639
   -0.0000   -0.7084   -0.7058   -0.0000   -0.0001
    0.0000   -0.0000   -0.0000    1.0000   -0.0000
   -0.0000   -0.7058    0.7084    0.0000   -0.0001

>> %HC=SCE
>> %inv(C)*inv(S)*HC=E
>> inv(C)*inv(S)*H*C

ans =

   -0.4241    0.0000    0.0000    0.0000   -0.0000
    0.0000    0.8041   -0.0000    0.0000   -0.0000
    0.0000   -0.0000    0.8041    0.0000   -0.0000
   -0.0000   -0.0000    0.0000    0.8042    0.0000
    0.0000    0.0000    0.0000    0.0000    0.9098

>> 

对角线的值即为E


本文首发于我的博客@cndaqiang.
本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!


目录

访客数据