/**********************************************************************
  MD_pac.c:

     MD_pac.c is a subroutine to perform molecular dynamics
     simulations and geometry optimization.

  Log of MD_pac.c:

     22/Nov/2001  Released by T. Ozaki
     15/Dec/2003  DIISs are added by H. Kino
     14/May/2004  NVT_VS is added by M. Ohfuti
     25/May/2004  Modified by T. Ozaki

***********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include "openmx_common.h"
#include "lapack_prototypes.h"

#ifdef nompi
#include "mimic_mpi.h"
#else
#include "mpi.h"
#endif

static void NoMD(int iter);
static void VerletXYZ(int iter);
static void NVT_VS(int iter);  /* added by mari */
static void NVT_NH(int iter); 
static void Steepest_Descent(int iter, int SD_scaling_flag);
static void GDIIS(int iter, int iter0);
static void Geometry_Opt_DIIS(int iter);
static void Correct_Position_In_First_Cell();


double MD_pac(int iter)
{
  double time0;
  double TStime,TEtime;
  int numprocs,myid;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  dtime(&TStime);

  if (myid==Host_ID){
    printf("\n*******************************************************\n"); 
    printf("             MD or geometry opt. at MD =%2d              \n",iter);
    printf("*******************************************************\n\n"); 
  }

  switch (MD_switch) {
    case  0: NoMD(iter);                 break;
    case  1: VerletXYZ(iter);            break;
    case  2: NVT_VS(iter);               break;  /* added by mari */
    case  3: Steepest_Descent(iter,1);   break;
    case  4:                             break;  /* not used */
    case  5:                             break;  /* not used */
    case  6:                             break;  /* not used */
    case  7: Geometry_Opt_DIIS(iter);    break;                    
    case  8:                             break;  /* not used */
    case  9: NVT_NH(iter);               break;
    case 10:                             break;  /* not used */
  }

  /****************************************************
    correct atoms which are out of the first unit cell 
  ****************************************************/

  if (MD_switch!=0) Correct_Position_In_First_Cell();

  dtime(&TEtime);
  time0 = TEtime - TStime;
  return time0;
}


void NoMD(int iter)
{
  char fileCoord[YOUSO10];
  FILE *fp_crd,*fp_SD;
  int i,j,k;
  char buf[fp_bsize];          /* setvbuf */
  int numprocs,myid,ID;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
 
  MD_Opt_OK = 1;

  if (myid==Host_ID){ 

    if (MD_Opt_OK==1 || iter==MD_IterNumber){

      strcpy(fileCoord,".crd");
      fnjoint(filepath,filename,fileCoord);
      if ((fp_crd = fopen(fileCoord,"w")) != NULL){

#ifdef xt3
        setvbuf(fp_crd,buf,_IOFBF,fp_bsize);  /* setvbuf */
#endif

        fprintf(fp_crd,"\n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"       xyz-coordinates (Ang) and forces (Hartree/Bohr)  \n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"***********************************************************\n\n");

        fprintf(fp_crd,"<coordinates.forces\n");
        fprintf(fp_crd,"  %i\n",atomnum);
        for (k=1; k<=atomnum; k++){
          i = WhatSpecies[k];
          j = Spe_WhatAtom[i];
          fprintf(fp_crd," %4d  %4s   %9.5f %9.5f %9.5f  %15.12f %15.12f %15.12f\n",
                  k,Atom_Symbol[j],
	          Gxyz[k][1]*BohrR,Gxyz[k][2]*BohrR,Gxyz[k][3]*BohrR,
	    	  -Gxyz[k][17],-Gxyz[k][18],-Gxyz[k][19]);
        }
        fprintf(fp_crd,"coordinates.forces>\n");
        fclose(fp_crd);
      }
      else
        printf("error(1) in MD_pac.c\n");
    }

  } /* if (myid==Host_ID) */

}



void Correct_Position_In_First_Cell()
{
  int i,Mc_AN,Gc_AN,ct_AN;
  int itmp;
  int numprocs,myid,ID,tag=999;
  double Cxyz[4];

  MPI_Status stat;
  MPI_Request request;

  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
    Gc_AN = M2G[Mc_AN];
    Cxyz[1] = Gxyz[Gc_AN][1] - Grid_Origin[1];
    Cxyz[2] = Gxyz[Gc_AN][2] - Grid_Origin[2];
    Cxyz[3] = Gxyz[Gc_AN][3] - Grid_Origin[3];
    Cell_Gxyz[Gc_AN][1] = Dot_Product(Cxyz,rtv[1])*0.5/PI;
    Cell_Gxyz[Gc_AN][2] = Dot_Product(Cxyz,rtv[2])*0.5/PI;
    Cell_Gxyz[Gc_AN][3] = Dot_Product(Cxyz,rtv[3])*0.5/PI;

    for (i=1; i<=3; i++){
      if (1.0<fabs(Cell_Gxyz[Gc_AN][i])){
        if (0.0<=Cell_Gxyz[Gc_AN][i]){
          itmp = (int)Cell_Gxyz[Gc_AN][i]; 
          Cell_Gxyz[Gc_AN][i] = Cell_Gxyz[Gc_AN][i] - (double)itmp;
	}
        else{
          itmp = abs((int)Cell_Gxyz[Gc_AN][i]) + 1; 
          Cell_Gxyz[Gc_AN][i] = Cell_Gxyz[Gc_AN][i] + (double)itmp;
        }
      }
    }

    Gxyz[Gc_AN][1] =  Cell_Gxyz[Gc_AN][1]*tv[1][1]
                    + Cell_Gxyz[Gc_AN][2]*tv[2][1]
                    + Cell_Gxyz[Gc_AN][3]*tv[3][1] + Grid_Origin[1];

    Gxyz[Gc_AN][2] =  Cell_Gxyz[Gc_AN][1]*tv[1][2]
                    + Cell_Gxyz[Gc_AN][2]*tv[2][2]
                    + Cell_Gxyz[Gc_AN][3]*tv[3][2] + Grid_Origin[2];

    Gxyz[Gc_AN][3] =  Cell_Gxyz[Gc_AN][1]*tv[1][3]
                    + Cell_Gxyz[Gc_AN][2]*tv[2][3]
                    + Cell_Gxyz[Gc_AN][3]*tv[3][3] + Grid_Origin[3];
  }

  /****************
    MPI:  Gxyz
  *****************/

  for (ct_AN=1; ct_AN<=atomnum; ct_AN++){
    ID = G2ID[ct_AN];
    MPI_Bcast(&Gxyz[ct_AN][0], 4, MPI_DOUBLE, ID, mpi_comm_level1);
  }

}


void VerletXYZ(int iter)
{
  /***********************************************************
   NVE molecular dynamics with velocity-Verlet integrator
  ***********************************************************/
  /*********************************************************** 
   1 a.u.=2.4189*10^-2 fs, 1fs=41.341105 a.u. 
   Atom weight trasformation: proton = 1836.1526 a.u 
  ***********************************************************/
  /****************************************************
    Gxyz[][1] = x-coordinate at current step
    Gxyz[][2] = y-coordinate at current step
    Gxyz[][3] = z-coordinate at current step

    Gxyz[][14] = dEtot/dx at previous step
    Gxyz[][15] = dEtot/dy at previous step
    Gxyz[][16] = dEtot/dz at previous step

    Gxyz[][17] = dEtot/dx at current step
    Gxyz[][18] = dEtot/dy at current step
    Gxyz[][19] = dEtot/dz at current step

    Gxyz[][20] = atomic mass

    Gxyz[][21] = x-coordinate at previous step
    Gxyz[][22] = y-coordinate at previous step
    Gxyz[][23] = z-coordinate at previous step

    Gxyz[][24] = x-component of velocity at current step
    Gxyz[][25] = y-component of velocity at current step
    Gxyz[][26] = z-component of velocity at current step

    Gxyz[][27] = x-component of velocity at t+dt/2
    Gxyz[][28] = y-component of velocity at t+dt/2
    Gxyz[][29] = z-component of velocity at t+dt/2

    Gxyz[][30] = hx
    Gxyz[][31] = hy
    Gxyz[][32] = hz

  ****************************************************/

  double dt,dt2,back,sum,My_Ukc;
  double Wscale,scaled_force;
  int Mc_AN,Gc_AN,j,k,l;
  int numprocs,myid,ID;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);

  MD_Opt_OK = 0;
  dt = 41.3411*MD_TimeStep;
  dt2 = dt*dt;
  Wscale = 1836.1526;

  /****************************************************
                 velocity-Verlet algorithm
  ****************************************************/

  if (iter==1){

    /****************************************************
      first step in velocity Verlet 
    ****************************************************/

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];

      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){

          scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  

          /* v( r+0.5*dt ) */
          Gxyz[Gc_AN][26+j] = Gxyz[Gc_AN][23+j] + scaled_force*0.5*dt;
          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][26+j];

          /* r( r+dt ) */
          Gxyz[Gc_AN][20+j] = Gxyz[Gc_AN][j];
 	  Gxyz[Gc_AN][j] =  Gxyz[Gc_AN][j] + Gxyz[Gc_AN][26+j]*dt;

	}
      }
    }
  }
  else{

    /****************************************************
      second step in velocity Verlet 
    ****************************************************/

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){
          scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  
          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][26+j] + scaled_force*0.5*dt;
	}
      }
    }

    /****************************************************
                       Kinetic Energy 
    ****************************************************/

    My_Ukc = 0.0;

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      sum = 0.0;
      for (j=1; j<=3; j++){
        if (atom_Fixed_XYZ[Gc_AN][j]==0){
          sum += Gxyz[Gc_AN][j+23]*Gxyz[Gc_AN][j+23];
	}
      }
      My_Ukc = My_Ukc + 0.5*Gxyz[Gc_AN][20]*Wscale*sum;
    }

    /****************************************************
     MPI, Ukc
    ****************************************************/

    MPI_Allreduce(&My_Ukc, &Ukc, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

    /* calculation of temperature (K) */
    Temp = Ukc/(1.5*kB*(double)atomnum)*eV2Hartree;

    /****************************************************
      first step in velocity Verlet 
    ****************************************************/

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){

          scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  
          /* v( r+0.5*dt ) */
          Gxyz[Gc_AN][26+j] = Gxyz[Gc_AN][23+j] + scaled_force*0.5*dt;
          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][26+j];

          /* r( r+dt ) */
          Gxyz[Gc_AN][20+j] = Gxyz[Gc_AN][j];
 	  Gxyz[Gc_AN][j] =  Gxyz[Gc_AN][j] + Gxyz[Gc_AN][26+j]*dt;
	}

      }
    }
  }

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&Gxyz[Gc_AN][1],  3, MPI_DOUBLE, ID, mpi_comm_level1);
    MPI_Bcast(&Gxyz[Gc_AN][17],13, MPI_DOUBLE, ID, mpi_comm_level1);
  }
}




void Steepest_Descent(int iter, int SD_scaling_flag)
{
  /* 1au=2.4189*10^-2 fs, 1fs=41.341105 au */
  int i,j,k,l,Mc_AN,Gc_AN;
  double dt,SD_max,SD_min,SD_init,Atom_W,tmp0,scale;
  double My_Max_Force;
  char fileCoord[YOUSO10];
  char fileSD[YOUSO10];
  FILE *fp_crd,*fp_SD;
  int numprocs,myid,ID;
  double tmp1;
  char buf[fp_bsize];          /* setvbuf */

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  MPI_Barrier(mpi_comm_level1);

  MD_Opt_OK = 0;

  /****************************************************
   find the maximum value of force 
  ****************************************************/

  My_Max_Force = 0.0;
  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    Gc_AN = M2G[Mc_AN];

    tmp0 = 0.0;
    for (j=1; j<=3; j++){
      if (atom_Fixed_XYZ[Gc_AN][j]==0){
        tmp0 += Gxyz[Gc_AN][16+j]*Gxyz[Gc_AN][16+j];
      }
    }
    tmp0 = sqrt(tmp0); 
    if (My_Max_Force<tmp0) My_Max_Force = tmp0;
  }

  /****************************************************
   MPI, Max_Force
  ****************************************************/

  MPI_Allreduce(&My_Max_Force, &Max_Force, 1, MPI_DOUBLE, MPI_MAX, mpi_comm_level1);
  if (Max_Force<MD_Opt_criterion) MD_Opt_OK = 1;

  /****************************************************
   set up SD_scaling
  ****************************************************/

  dt = 41.3411*1.0;
  SD_init = dt*dt/1836.1526;
  SD_max = SD_init*10.0;   /* default 10  */
  SD_min = SD_init*0.005;  /* default 0.2 */
  Atom_W = 12.0;

  if (iter==1 || SD_scaling_flag==0){

    SD_scaling = SD_scaling_user/(Max_Force+1.0e-10);

    if (SD_max<SD_scaling) SD_scaling = SD_max;
    if (SD_scaling<SD_min) SD_scaling = SD_min;
  }

  else{

    if (Past_Utot[1]<Utot && iter%4==1){ 
      SD_scaling = SD_scaling/4.0;
    }
    else if (Past_Utot[1]<Past_Utot[2] && Utot<Past_Utot[1] && iter%4==1){
      SD_scaling = SD_scaling*1.2;
    }

    if (SD_max<SD_scaling) SD_scaling = SD_max;
    if (SD_scaling<SD_min) SD_scaling = SD_min;

    Past_Utot[5] = Past_Utot[4];
    Past_Utot[4] = Past_Utot[3];
    Past_Utot[3] = Past_Utot[2];
    Past_Utot[2] = Past_Utot[1];
    Past_Utot[1] = Utot;
  }
  
  if (myid==Host_ID) printf("<Steepest_Descent>  SD_scaling=%15.12f\n",SD_scaling);

  /****************************************************
    move atoms
  ****************************************************/

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){

    Gc_AN = M2G[Mc_AN];

    for (j=1; j<=3; j++){
      if (atom_Fixed_XYZ[Gc_AN][j]==0){
        Gxyz[Gc_AN][j] = Gxyz[Gc_AN][j] - SD_scaling*Gxyz[Gc_AN][16+j];
      }
    }

  }

  /****************************************************
   MPI, Gxyz
  ****************************************************/

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&Gxyz[Gc_AN][1],  3, MPI_DOUBLE, ID, mpi_comm_level1);
    MPI_Bcast(&Gxyz[Gc_AN][17], 3, MPI_DOUBLE, ID, mpi_comm_level1);
  }

  if (myid==Host_ID){ 

    printf("<Steepest_Descent>  |Maximum force| (Hartree/Bohr) =%15.12f\n",
             Max_Force);
    printf("<Steepest_Descent>  Criterion       (Hartree/Bohr) =%15.12f\n",
            MD_Opt_criterion);

    printf("\n");
    for (i=1; i<=atomnum; i++){
      printf("  atom=%3d, XYZ(ang) Fxyz(a.u.)=%9.4f %9.4f %9.4f  %9.4f %9.4f %9.4f\n",
              i,BohrR*Gxyz[i][1],BohrR*Gxyz[i][2],BohrR*Gxyz[i][3],
              Gxyz[i][17],Gxyz[i][18],Gxyz[i][19] ); 
    }   

    strcpy(fileSD,".SD");
    fnjoint(filepath,filename,fileSD);
    if ((fp_SD = fopen(fileSD,"a")) != NULL){

#ifdef xt3
      setvbuf(fp_SD,buf,_IOFBF,fp_bsize);  /* setvbuf */
#endif

      if (iter==1){
        fprintf(fp_SD,"\n");
        fprintf(fp_SD,"***********************************************************\n");
        fprintf(fp_SD,"***********************************************************\n");
        fprintf(fp_SD,"              History of geometry optimization             \n");
        fprintf(fp_SD,"***********************************************************\n");
        fprintf(fp_SD,"***********************************************************\n\n");

        fprintf(fp_SD,"  MD_iter   SD_scaling     |Maximum force|   Maximum step        Utot\n");
        fprintf(fp_SD,"                           (Hartree/Bohr)        (Ang)         (Hartree)\n\n");
      }
      fprintf(fp_SD,"  %3d  %15.8f  %15.8f  %15.8f  %15.8f\n",
              iter,SD_scaling,Max_Force,SD_scaling*Max_Force*BohrR, Utot);
      fclose(fp_SD);
    }
    else
      printf("1 file no error dayo!\n");

    if (MD_Opt_OK==1 || iter==MD_IterNumber){

      strcpy(fileCoord,".crd");
      fnjoint(filepath,filename,fileCoord);
      if ((fp_crd = fopen(fileCoord,"w")) != NULL){

#ifdef xt3
        setvbuf(fp_crd,buf,_IOFBF,fp_bsize);  /* setvbuf */
#endif

        fprintf(fp_crd,"\n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"       xyz-coordinates (Ang) and forces (Hartree/Bohr)  \n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"***********************************************************\n\n");

        fprintf(fp_crd,"<coordinates.forces\n");
        fprintf(fp_crd,"  %i\n",atomnum);
        for (k=1; k<=atomnum; k++){
          i = WhatSpecies[k];
          j = Spe_WhatAtom[i];
          fprintf(fp_crd," %4d  %4s   %9.5f %9.5f %9.5f  %15.12f %15.12f %15.12f\n",
                  k,Atom_Symbol[j],
	          Gxyz[k][1]*BohrR,Gxyz[k][2]*BohrR,Gxyz[k][3]*BohrR,
	    	  -Gxyz[k][17],-Gxyz[k][18],-Gxyz[k][19]);
        }
        fprintf(fp_crd,"coordinates.forces>\n");
        fclose(fp_crd);
      }
      else
        printf("error(1) in MD_pac.c\n");
    }

  } /* if (myid==Host_ID) */

}




void Geometry_Opt_DIIS(int iter)
{
  int i,iatom,k,diis_iter;
  double sMD_TimeStep,dt;
  static int local_iter=1;
  static int SD_iter=0,GDIIS_iter=0;
  static int flag=0;
  static int Every_iter;
  int everyiter,buf_iter;
  int numprocs,myid,ID;  

  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  MPI_Barrier(mpi_comm_level1);

  Every_iter = OptEveryDIIS;

  if (iter<M_GDIIS_HISTORY)
    diis_iter = iter;
  else   
    diis_iter = M_GDIIS_HISTORY;

  /* increament of iter */

  if (iter<OptStartDIIS){
    flag = 0;     
  }

  else if (iter==OptStartDIIS){
    flag = 1;     
    GDIIS_iter++;
  }

  else if (flag==0){
    SD_iter++; 
  }  
  else if (flag==1){
    GDIIS_iter++;
  }

  /* SD */

  if (flag==0){

    Steepest_Descent(iter,1);

    /* shift one */

    for (i=(diis_iter-2); 0<=i; i--) {
      for (iatom=1; iatom<=atomnum; iatom++) {
	for (k=1; k<=3; k++) {
	  GxyzHistoryIn[i+1][iatom][k] = GxyzHistoryIn[i][iatom][k];
	  GxyzHistoryR[i+1][iatom][k]  = GxyzHistoryR[i][iatom][k];
	}
      }
    }

    /* add GxyzHistoryIn and GxyzHisotryR */

    sMD_TimeStep = 0.05/(0.01*41.341105);
    dt = 41.341105 *  sMD_TimeStep;

    for (iatom=1; iatom<=atomnum; iatom++) {
      for (k=1; k<=3; k++) {
        if (atom_Fixed_XYZ[iatom][k]==0){
	  GxyzHistoryIn[0][iatom][k] =     Gxyz[iatom][k];
	  GxyzHistoryR[0][iatom][k]  = -dt*Gxyz[iatom][16+k];
	}
        else{
	  GxyzHistoryIn[0][iatom][k] = Gxyz[iatom][k];
	  GxyzHistoryR[0][iatom][k]  = 0.0;
        }
      }
    }

    /* initialize local_iter */

    local_iter = 1;

  }

  /* GDIIS */

  else {

    GDIIS(local_iter,iter);
    local_iter++;
  }

  /* check the number of iterations */

  if (Every_iter<=SD_iter){
    flag = 1;
    SD_iter = 0;
    GDIIS_iter = 0;
  }

  else if (Every_iter<=GDIIS_iter){
    flag = 0;
    SD_iter = 0;
    GDIIS_iter = 0;
  }

}


void GDIIS(int iter, int iter0)
{
  /* 1au=2.4189*10^-2 fs, 1fs=41.341105 au */

  char *func_name="DIIS";
  char *JOBB="L";
  double dt;
  double *A,*B,sumB,max_A, RR,dRi[4],dRj[4];
  double *work;
  double mixing,force_Max;
  static double sMD_TimeStep,dx_max=0.05; 
  double diff_dx;
  int *ipiv;
  INTEGER i,j,k,iatom,N,LDA,LWORK,info;
  int diis_iter;
  char fileCoord[YOUSO10];
  char fileSD[YOUSO10];
  FILE *fp_crd,*fp_SD;
  char buf[fp_bsize];          /* setvbuf */

  /* variables for MPI */
  int Gc_AN;
  int numprocs,myid,ID;  

  /* MPI myid */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  MPI_Barrier(mpi_comm_level1);

  /* share Gxyz */
  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&Gxyz[Gc_AN][1],  3, MPI_DOUBLE, ID, mpi_comm_level1);
    MPI_Bcast(&Gxyz[Gc_AN][17], 3, MPI_DOUBLE, ID, mpi_comm_level1);
  }

  if (myid!=Host_ID)   goto Last_Bcast; 

  /*********************** for myid==Host_ID **************************/

  if (iter<M_GDIIS_HISTORY)
    diis_iter = iter;
  else   
    diis_iter = M_GDIIS_HISTORY;

  /* shift one */

  for (i=(diis_iter-2); 0<=i; i--) {
    for (iatom=1; iatom<=atomnum; iatom++) {
      for (k=1; k<=3; k++) {
	GxyzHistoryIn[i+1][iatom][k] = GxyzHistoryIn[i][iatom][k];
	GxyzHistoryR[i+1][iatom][k]  = GxyzHistoryR[i][iatom][k];
      }
    }
  }

  /* allocation of arrays */

  A = (double*)malloc(sizeof(double)*(diis_iter+1)*(diis_iter+1));
  for (i=0; i<(diis_iter+1)*(diis_iter+1); i++) A[i] = 0.0;
  B = (double*)malloc(sizeof(double)*(diis_iter+1));

  /* Max of force */

  force_Max=0.0;
  for (iatom=1;iatom<=atomnum;iatom++)   {
    for (k=1;k<=3;k++) {
      if (atom_Fixed_XYZ[iatom][k]==0){
	if (force_Max< fabs(Gxyz[iatom][16+k]) ) force_Max = fabs(Gxyz[iatom][16+k]);
      }
    }
  }

  sMD_TimeStep = 0.05/(0.01*41.341105);

  if (2<=level_stdout){
    printf("<%s>  |Maximum force| (Hartree/Bohr) = %15.12f tuned_dt= %f\n",func_name,force_Max, sMD_TimeStep);
    printf("<%s>  Criterion      (Hartree/Bohr) = %15.12f\n", func_name, MD_Opt_criterion);
  }

  dt = 41.341105 *  sMD_TimeStep;

  /* add GxyzHistoryIn and GxyzHisotryR */

  for (iatom=1; iatom<=atomnum; iatom++)   {

    for (k=1;k<=3;k++) {
      if (atom_Fixed_XYZ[iatom][k]==0){
	GxyzHistoryIn[0][iatom][k] =     Gxyz[iatom][k];
	GxyzHistoryR[0][iatom][k]  = -dt*Gxyz[iatom][16+k];
      }
      else{
	GxyzHistoryIn[0][iatom][k] = Gxyz[iatom][k];
	GxyzHistoryR[0][iatom][k]  = 0.0;
      }
    }
  }

  for (i=0; i<diis_iter; i++) {
    for (j=0; j<diis_iter; j++) {

      RR = 0.0;

      for (iatom=1; iatom<=atomnum; iatom++)   {
	for (k=1; k<=3; k++) {
	  dRi[k] = GxyzHistoryR[i][iatom][k];
	  dRj[k] = GxyzHistoryR[j][iatom][k];
	}

	RR += dRi[1]*dRj[1] + dRi[2]*dRj[2] + dRi[3]*dRj[3];
      }

      A[ i*(diis_iter+1)+j ]= RR;
    }
  }

  /* find max of A */

  max_A = 0.0;

  for (i=0;i<diis_iter;i++) {
    for (j=0;j<diis_iter;j++) {
      RR = fabs(A[i*(diis_iter+1)+j]) ;
      if (max_A< RR ) max_A = RR;
    }
  }

  max_A = 1.0/max_A;

  for (i=0; i<diis_iter; i++) {
    for (j=0; j<diis_iter; j++) {
      A[ i*(diis_iter+1)+j ] *= max_A;
    }
  }

  for (i=0; i<diis_iter; i++) {
    A[ i*(diis_iter+1)+diis_iter ] = 1.0;
    A[ diis_iter*(diis_iter+1)+i ] = 1.0;
  }

  A[diis_iter*(diis_iter+1)+diis_iter] = 0.0;

  for (i=0; i<diis_iter; i++) B[i] = 0.0;
  B[diis_iter] = 1.0;

  if (2<=level_stdout){
    printf("<%s>  DIIS matrix\n",func_name);
    for (i=0; i<(diis_iter+1); i++) {
      printf("<%s> ",func_name);
      for (j=0; j<(diis_iter+1); j++) {
        printf("%6.3f ",A[i*(diis_iter+1)+j]);
      }
      printf("\n");
    }
  }

  /* lapack routine */

  N=diis_iter+1;
  LDA=diis_iter+1;
  LWORK=diis_iter+1;
  work=(double*)malloc(sizeof(double)*LWORK);
  ipiv = (int*)malloc(sizeof(int)*(diis_iter+1));

  i = 1; 

  if (2<=level_stdout){
    printf("M_GDIIS_HISTORY=%2d diis_iter=%2d\n",M_GDIIS_HISTORY,diis_iter);
  }

  F77_NAME(dsysv,DSYSV)( JOBB, &N, &i, A, &LDA,  ipiv, B, &LDA, work, &LWORK, &info);

  if (info!=0) {
    printf("<%s> dsysv_, info=%d\n",func_name,info);
    printf("<%s> \n",func_name);
    printf("<%s> ERROR, aborting\n",func_name);
    printf("<%s> \n",func_name);

    MD_Opt_OK =1; 
    /* no change */

    goto Last_Bcast ;
  }

  if (2<=level_stdout){
    printf("<%s> diis alpha=",func_name);
    sumB = 0;
    for (i=0; i<diis_iter; i++) {
      printf("%f ",B[i]);
      sumB += B[i];
    }
    printf("%lf\n",B[diis_iter]);
  }

  /* initialize */

  for (iatom=1; iatom<=atomnum; iatom++) {
    for (j=1; j<=3; j++) {
      Gxyz[iatom][j] = 0.0;
    }
  }

  /* add tilde{R} */

  for (iatom=1; iatom<=atomnum; iatom++) {
    for (j=1; j<=3; j++) {
      if (atom_Fixed_XYZ[iatom][j]==0){
	for (i=0; i<diis_iter; i++) {
	  Gxyz[iatom][j] += GxyzHistoryR[i][iatom][j]*B[i];
	}
      }
    }
  }

  sumB = 0.0;
  for (iatom=1; iatom<=atomnum; iatom++) {
    for (j=1; j<=3; j++) {
      sumB += Gxyz[iatom][j]*Gxyz[iatom][j] ;
    }
  }

  sumB = sqrt(sumB)/(double)atomnum;

  if (2<=level_stdout){
    printf("<%s> |tilde{R}|=%E\n",func_name, sumB);
  }

  if      (1.0e-2<sumB)  mixing = 0.1;
  else if (1.0e-3<sumB)  mixing = 0.2;
  else if (1.0e-4<sumB)  mixing = 0.3;
  else if (1.0e-5<sumB)  mixing = 0.4;
  else if (1.0e-6<sumB)  mixing = 0.5;
  else                   mixing = 0.6;

  if (2<=level_stdout){
    printf("<%s> mixing=%15.12f\n",func_name,mixing);
  }

  for (iatom=1; iatom<=atomnum; iatom++) {
    for (j=1;j<=3;j++) {
      Gxyz[iatom][j] *= mixing;
    }
  }

  /* tilde{x} */

  for (iatom=1;iatom<=atomnum;iatom++) {
    for (j=1;j<=3;j++) {
      for (i=0; i<diis_iter; i++) {
        Gxyz[iatom][j] += GxyzHistoryIn[i][iatom][j]*B[i]; 
      }
    }
  }

  diff_dx = 0.0;

  for (iatom=1;iatom<=atomnum;iatom++)   {
    for (k=1;k<=3;k++) {
      if (diff_dx< fabs(Gxyz[iatom][k]-GxyzHistoryIn[1][iatom][k]) ) {
        diff_dx =  fabs(Gxyz[iatom][k]-GxyzHistoryIn[1][iatom][k]) ;
      }
    }
  }

  if (2<=level_stdout){

    printf("<%s> diff_x= %f , dE= %f\n",func_name,diff_dx, fabs(Utot-Past_Utot[1]) );

    /* print atomic positions */
    printf("<%s> atomnum= %d\n",func_name,atomnum);
    for (i=1; i<=atomnum; i++){
      j = Spe_WhatAtom[WhatSpecies[i]];
      printf("  %3d %s XYZ(ang) Fxyz(a.u.)= %9.4f %9.4f %9.4f  %9.4f %9.4f %9.4f\n",
	   i,Atom_Symbol[j],
	   BohrR*Gxyz[i][1],BohrR*Gxyz[i][2],BohrR*Gxyz[i][3],
	   Gxyz[i][17],Gxyz[i][18],Gxyz[i][19] ); 
    }   
  }

  Past_Utot[1]=Utot;

  if (force_Max<MD_Opt_criterion )  MD_Opt_OK = 1;

  Max_Force = force_Max;
  SD_scaling_user = SD_scaling*Max_Force*0.2;

  /* free arrays */

  free(A);
  free(B);
  free(ipiv);
  free(work);

  /*********************** end of "myid==Host_ID" **************************/

 Last_Bcast: 

  MPI_Bcast(&MD_Opt_OK,1,MPI_INT, Host_ID, mpi_comm_level1);

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    /* ID = G2ID[Gc_AN]; */
    MPI_Bcast(&Gxyz[Gc_AN][1],  3, MPI_DOUBLE, Host_ID, mpi_comm_level1);
    /*    MPI_Bcast(&Gxyz[Gc_AN][17], 3, MPI_DOUBLE, Host_ID, mpi_comm_level1); */
  }


  if (myid==Host_ID){ 

    printf("<%s>  |Maximum force| (Hartree/Bohr) =%15.12f\n",
	   func_name,Max_Force);fflush(stdout);
    printf("<%s>  Criterion       (Hartree/Bohr) =%15.12f\n",
	   func_name,MD_Opt_criterion);fflush(stdout);

    printf("\n");
    for (i=1; i<=atomnum; i++){
      printf("     atom=%4d, XYZ(ang) Fxyz(a.u.)=%9.4f %9.4f %9.4f  %9.4f %9.4f %9.4f\n",
	     i,BohrR*Gxyz[i][1],BohrR*Gxyz[i][2],BohrR*Gxyz[i][3],
	     Gxyz[i][17],Gxyz[i][18],Gxyz[i][19] ); fflush(stdout);
    }   

    strcpy(fileSD,".SD");
    fnjoint(filepath,filename,fileSD);
    if ((fp_SD = fopen(fileSD,"a")) != NULL){

#ifdef xt3
      setvbuf(fp_SD,buf,_IOFBF,fp_bsize);  /* setvbuf */
#endif

      if (iter0==1){

        fprintf(fp_SD,"\n");
        fprintf(fp_SD,"***********************************************************\n");
        fprintf(fp_SD,"***********************************************************\n");
        fprintf(fp_SD,"              History of geometry optimization             \n");
        fprintf(fp_SD,"***********************************************************\n");
        fprintf(fp_SD,"***********************************************************\n\n");


        fprintf(fp_SD,"  MD_iter   SD_scaling     |Maximum force|   Maximum step        Utot\n");
        fprintf(fp_SD,"                           (Hartree/Bohr)        (Ang)         (Hartree)\n\n");
      }

      fprintf(fp_SD,"  %3d  %15.8f  %15.8f  %15.8f  %15.8f\n",
              iter0,SD_scaling,Max_Force,SD_scaling*Max_Force*BohrR,Utot);
      fclose(fp_SD);
    }
    else{
      printf("Could not open a file in MD_pac.!\n");
    }

    if (MD_Opt_OK==1 || iter==MD_IterNumber){

      strcpy(fileCoord,".crd");
      fnjoint(filepath,filename,fileCoord);
      if ((fp_crd = fopen(fileCoord,"w")) != NULL){

#ifdef xt3
        setvbuf(fp_crd,buf,_IOFBF,fp_bsize);  /* setvbuf */
#endif

        fprintf(fp_crd,"\n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"       xyz-coordinates (Ang) and forces (Hartree/Bohr)  \n");
        fprintf(fp_crd,"***********************************************************\n");
        fprintf(fp_crd,"***********************************************************\n\n");

        fprintf(fp_crd,"<coordinates.forces\n");
        fprintf(fp_crd,"  %i\n",atomnum);
        for (k=1; k<=atomnum; k++){
          i = WhatSpecies[k];
          j = Spe_WhatAtom[i];
          fprintf(fp_crd," %4d  %4s   %9.5f %9.5f %9.5f  %15.12f %15.12f %15.12f\n",
                  k,Atom_Symbol[j],
	          Gxyz[k][1]*BohrR,Gxyz[k][2]*BohrR,Gxyz[k][3]*BohrR,
	    	  -Gxyz[k][17],-Gxyz[k][18],-Gxyz[k][19]);
        }
        fprintf(fp_crd,"coordinates.forces>\n");
        fclose(fp_crd);
      }
      else
        printf("error(1) in MD_pac.c\n");
    }

  } /* if (myid==Host_ID) */

}











void NVT_VS(int iter)
{
  /* added by mari */
  /********************************************************
   This routine is added by Mari Ohfuti (May 20004).                

   a constant temperature molecular dynamics by a velocity
   scaling method with velocity-Verlet integrator
  ********************************************************/
  /******************************************************* 
   1 a.u.=2.4189*10^-2 fs, 1fs=41.341105 a.u. 
   Atom weight trasformation: proton = 1836.1526 a.u 
  ********************************************************/

  /****************************************************
    Gxyz[][1] = x-coordinate at current step
    Gxyz[][2] = y-coordinate at current step
    Gxyz[][3] = z-coordinate at current step

    Gxyz[][14] = dEtot/dx at previous step
    Gxyz[][15] = dEtot/dy at previous step
    Gxyz[][16] = dEtot/dz at previous step

    Gxyz[][17] = dEtot/dx at current step
    Gxyz[][18] = dEtot/dy at current step
    Gxyz[][19] = dEtot/dz at current step

    Gxyz[][20] = atomic mass

    Gxyz[][21] = x-coordinate at previous step
    Gxyz[][22] = y-coordinate at previous step
    Gxyz[][23] = z-coordinate at previous step

    Gxyz[][24] = x-component of velocity at current step
    Gxyz[][25] = y-component of velocity at current step
    Gxyz[][26] = z-component of velocity at current step

    Gxyz[][27] = x-component of velocity at t+dt/2
    Gxyz[][28] = y-component of velocity at t+dt/2
    Gxyz[][29] = z-component of velocity at t+dt/2

    Gxyz[][30] = hx
    Gxyz[][31] = hy
    Gxyz[][32] = hz

  ****************************************************/

  double dt,dt2,sum,My_Ukc,x,t,xyz0[4],xyz0_l[4];
  double Wscale;
  int Mc_AN,Gc_AN,i,j,k,l;
  int numprocs,myid,ID;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  MPI_Barrier(mpi_comm_level1);

  MD_Opt_OK = 0;
  dt = 41.3411*MD_TimeStep;
  dt2 = dt*dt;
  Wscale = 1836.1526;

  /****************************************************
                Velocity Verlet algorithm
  ****************************************************/

  if (iter==1){
    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];

      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){

	  Gxyz[Gc_AN][j] = Gxyz[Gc_AN][j]+dt*Gxyz[Gc_AN][23+j]
                          -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale)*dt2*0.50;
          Gxyz[Gc_AN][13+j] = Gxyz[Gc_AN][16+j];

	}
      }
    }
  }
  else{
    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];

      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){
 	  Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][23+j]-(Gxyz[Gc_AN][16+j]
                             +Gxyz[Gc_AN][13+j])/(Gxyz[Gc_AN][20]*Wscale)*dt*0.50;

	}
      }
    }
  }

  /****************************************************
                     Kinetic Energy 
  ****************************************************/

  Ukc=0.0;
  My_Ukc = 0.0;

  for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
    Gc_AN = M2G[Mc_AN];

    sum = 0.0;
    for (j=1; j<=3; j++){
      sum = sum + Gxyz[Gc_AN][j+23]*Gxyz[Gc_AN][j+23];
    }
    My_Ukc = My_Ukc + 0.5*Gxyz[Gc_AN][20]*Wscale*sum;
  }

  /****************************************************
   MPI: Ukc 
  ****************************************************/

  MPI_Allreduce(&My_Ukc, &Ukc, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

  /* calculation of temperature (K) */
  Temp = Ukc/(1.5*kB*(double)atomnum)*eV2Hartree;

  /* calculation of a given temperature (K) */
  for (i=1; i<=TempNum; i++) {
    if( (iter>NumScale[i-1]) && (iter<=NumScale[i]) ) {
      GivenTemp = TempPara[i-1][2] + (TempPara[i][2] - TempPara[i-1][2])*
          ((double)iter-(double)TempPara[i-1][1])/((double)TempPara[i][1]-(double)TempPara[i-1][1]);
    }
  }

  if(iter!=1) {

    x = 1.0;
    for (i=1; i<=TempNum; i++) {

      if( (iter>NumScale[i-1]) && (iter<=NumScale[i]) ) {

        /**************************************************
         find a scaling parameter, x, when MD step matches
         at the step where the temperature scaling is made.
         Otherwise, x = 1.0.
        **************************************************/

        if((iter-NumScale[i-1])%IntScale[i]==0) {

          GivenTemp = TempPara[i-1][2] + (TempPara[i][2] - TempPara[i-1][2])*
               ((double)iter-(double)TempPara[i-1][1])/((double)TempPara[i][1]-(double)TempPara[i-1][1]);
 
          x = GivenTemp + (Temp-GivenTemp)*RatScale[i];
          x = sqrt(1.5*kB*x/(Ukc*eV2Hartree)*(double)atomnum);
        }
      }
    }

    /* do scaling */

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){

          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][23+j]*x;
	  Gxyz[Gc_AN][j] = Gxyz[Gc_AN][j]+dt*Gxyz[Gc_AN][23+j]
                          -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale)*dt2*0.50;
          Gxyz[Gc_AN][13+j] = Gxyz[Gc_AN][16+j];

	}
      }
    }

  }

  /****************************************************
   MPI: Gxyz
  ****************************************************/

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&Gxyz[Gc_AN][1],  3, MPI_DOUBLE, ID, mpi_comm_level1);
    MPI_Bcast(&Gxyz[Gc_AN][14], 3, MPI_DOUBLE, ID, mpi_comm_level1);
    MPI_Bcast(&Gxyz[Gc_AN][24], 3, MPI_DOUBLE, ID, mpi_comm_level1);
  }

}




void NVT_NH(int iter)
{
  /***********************************************************
   a constant temperature molecular dynamics by a Nose-Hoover
   method with velocity-Verlet integrator
  ***********************************************************/
  /*********************************************************** 
   1 a.u.=2.4189*10^-2 fs, 1fs=41.341105 a.u. 
   Atom weight trasformation: proton = 1836.1526 a.u 
  ***********************************************************/
  /****************************************************
    Gxyz[][1] = x-coordinate at current step
    Gxyz[][2] = y-coordinate at current step
    Gxyz[][3] = z-coordinate at current step

    Gxyz[][14] = dEtot/dx at previous step
    Gxyz[][15] = dEtot/dy at previous step
    Gxyz[][16] = dEtot/dz at previous step

    Gxyz[][17] = dEtot/dx at current step
    Gxyz[][18] = dEtot/dy at current step
    Gxyz[][19] = dEtot/dz at current step

    Gxyz[][20] = atomic mass

    Gxyz[][21] = x-coordinate at previous step
    Gxyz[][22] = y-coordinate at previous step
    Gxyz[][23] = z-coordinate at previous step

    Gxyz[][24] = x-component of velocity at current step
    Gxyz[][25] = y-component of velocity at current step
    Gxyz[][26] = z-component of velocity at current step

    Gxyz[][27] = x-component of velocity at t+dt/2
    Gxyz[][28] = y-component of velocity at t+dt/2
    Gxyz[][29] = z-component of velocity at t+dt/2

    Gxyz[][30] = hx
    Gxyz[][31] = hy
    Gxyz[][32] = hz

  ****************************************************/

  int Mc_AN,Gc_AN,i,j,k,l,po,num,NH_switch;
  int numprocs,myid,ID;

  double dt,dt2,sum,My_sum,My_Ukc,x,t,xyz0[4],xyz0_l[4];
  double scaled_force,Wscale,back;
  double dzeta,dv,h_zeta;
  double My_sum1,sum1,My_sum2,sum2;

  /* MPI */
  MPI_Comm_size(mpi_comm_level1,&numprocs);
  MPI_Comm_rank(mpi_comm_level1,&myid);
  MPI_Barrier(mpi_comm_level1);

  MD_Opt_OK = 0;
  dt = 41.3411*MD_TimeStep;
  dt2 = dt*dt;
  Wscale = 1836.1526;

  /* find a given temperature by a linear interpolation */

  NH_switch = 0;
  i = 1;
  do {

    if ( TempPara[i][1]<=iter && iter<TempPara[i+1][1] ){

      GivenTemp = TempPara[i][2] + (TempPara[i+1][2] - TempPara[i][2])*
            ((double)iter-(double)TempPara[i][1])/((double)TempPara[i+1][1]-(double)TempPara[i][1]);

      NH_switch = 1; 
    }

    i++;
  } while (NH_switch==0 && i<=(TempNum-1));  

  /****************************************************
                Velocity Verlet algorithm
  ****************************************************/

  if (iter==1){

    NH_czeta = 0.0;
    NH_R = 0.0;

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){
        
          scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  

          /* v( r+0.5*dt ) */
          Gxyz[Gc_AN][26+j] = Gxyz[Gc_AN][23+j] + (scaled_force - NH_czeta*Gxyz[Gc_AN][23+j])*0.5*dt;
          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][26+j];

          /* r( r+dt ) */
          Gxyz[Gc_AN][20+j] = Gxyz[Gc_AN][j];
 	  Gxyz[Gc_AN][j] =  Gxyz[Gc_AN][j] + Gxyz[Gc_AN][26+j]*dt;
	}

      }
    }

    /* zeta( t+0.5*dt ) */

    NH_nzeta = NH_czeta + (Ukc - 1.5*kB*(double)atomnum*GivenTemp/eV2Hartree)*dt/(TempQ*Wscale);
    NH_czeta = NH_nzeta;

    /* R( r+dt ) */
    NH_R = NH_R + NH_nzeta*dt;
  }

  else if (NH_switch==1) {

    /*****************************************************
     second step:

     refinement of v and zeta by a Newton-Raphson method
    *****************************************************/

    po = 0;
    num = 0;

    do {

      /* Ukc */

      Ukc = 0.0;
      My_Ukc = 0.0;

      for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
        Gc_AN = M2G[Mc_AN];

        sum = 0.0;
        for (j=1; j<=3; j++){
          if (atom_Fixed_XYZ[Gc_AN][j]==0){
            sum = sum + Gxyz[Gc_AN][j+23]*Gxyz[Gc_AN][j+23];
	  }
        }
        My_Ukc += 0.5*Gxyz[Gc_AN][20]*Wscale*sum;
      }

      /* MPI: Ukc */

      MPI_Allreduce(&My_Ukc, &Ukc, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

      /* calculation of h */ 

      h_zeta = NH_nzeta
              + (Ukc - 1.5*kB*(double)atomnum*GivenTemp/eV2Hartree)*dt/(TempQ*Wscale) - NH_czeta;

      for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
        Gc_AN = M2G[Mc_AN];
        for (j=1; j<=3; j++){

          if (atom_Fixed_XYZ[Gc_AN][j]==0){

            scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  
            Gxyz[Gc_AN][j+29] = Gxyz[Gc_AN][26+j] + (scaled_force - NH_czeta*Gxyz[Gc_AN][23+j])*0.5*dt
                                -Gxyz[Gc_AN][23+j];
	  }
	}  
      }

      /* sum1 */
     
      sum1=0.0;
      My_sum1 = 0.0;

      for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
        Gc_AN = M2G[Mc_AN];

        sum = 0.0;
        for (j=1; j<=3; j++){
          if (atom_Fixed_XYZ[Gc_AN][j]==0){
            sum += Gxyz[Gc_AN][j+29]*Gxyz[Gc_AN][j+23];
	  }
        }
        My_sum1 += Gxyz[Gc_AN][20]*Wscale*sum*dt/(TempQ*Wscale);
      }

      /* MPI: sum1 */

      MPI_Allreduce(&My_sum1, &sum1, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

      /* sum2 */
     
      sum2=0.0;
      My_sum2 = 0.0;

      for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
        Gc_AN = M2G[Mc_AN];

        sum = 0.0;
        for (j=1; j<=3; j++){
          if (atom_Fixed_XYZ[Gc_AN][j]==0){
            sum += Gxyz[Gc_AN][j+23]*Gxyz[Gc_AN][j+23];
	  }
        }
        My_sum2 -= 0.5*Gxyz[Gc_AN][20]*Wscale*sum*dt*dt/(TempQ*Wscale);
      }

      /* MPI: sum2 */

      MPI_Allreduce(&My_sum2, &sum2, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

      /* new NH_czeta and new v */

      dzeta = (-h_zeta*(NH_czeta*0.5*dt+1.0)-sum1)/(-(NH_czeta*0.5*dt+1.0)+sum2);

      My_sum = 0.0;
      for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
        Gc_AN = M2G[Mc_AN];
        for (j=1; j<=3; j++){

          if (atom_Fixed_XYZ[Gc_AN][j]==0){
            dv = (Gxyz[Gc_AN][j+29] - 0.5*Gxyz[Gc_AN][j+23]*dt*dzeta)/(NH_czeta*0.5*dt + 1.0); 
            Gxyz[Gc_AN][j+23] += dv;
            My_sum += dv*dv; 
	  }
        }
      }

      NH_czeta += dzeta; 

      MPI_Allreduce(&My_sum, &sum, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

      sum += dzeta*dzeta;

      if (sum<1.0e-12) po = 1;

      num++; 

      if (20<num) po = 1;

    } while(po==0);

    /****************************************************
                       Kinetic Energy 
    ****************************************************/

    Ukc = 0.0;
    My_Ukc = 0.0;

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];

      sum = 0.0;
      for (j=1; j<=3; j++){
        if (atom_Fixed_XYZ[Gc_AN][j]==0){
          sum = sum + Gxyz[Gc_AN][j+23]*Gxyz[Gc_AN][j+23];
	}
      }
      My_Ukc += 0.5*Gxyz[Gc_AN][20]*Wscale*sum;
    }

    /****************************************************
     MPI: Ukc 
    ****************************************************/

    MPI_Allreduce(&My_Ukc, &Ukc, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

    /* calculation of temperature (K) */

    Temp = Ukc/(1.5*kB*(double)atomnum)*eV2Hartree;

    /****************************************************
     Nose-Hoover Hamiltonian which is a conserved quantity
    ****************************************************/

    NH_Ham = Utot + Ukc + 0.5*NH_czeta*NH_czeta*TempQ*Wscale
                        + 3.0*kB*(double)atomnum*GivenTemp*NH_R/eV2Hartree; 

    /*****************************************************
     first step:

       v(t)    ->  v(t+0.5*dt) 
       r(t)    ->  r(t+dt) 
       zeta(t) ->  zeta(t+0.5*dt)
       R(t)    ->  R(t+dt) 
    *****************************************************/

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){
        
          scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  

          /* v( r+0.5*dt ) */
          Gxyz[Gc_AN][26+j] = Gxyz[Gc_AN][23+j] + (scaled_force - NH_czeta*Gxyz[Gc_AN][23+j])*0.5*dt;
          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][26+j];

          /* r( r+dt ) */
          Gxyz[Gc_AN][20+j] = Gxyz[Gc_AN][j];
 	  Gxyz[Gc_AN][j] =  Gxyz[Gc_AN][j] + Gxyz[Gc_AN][26+j]*dt;
	}
      }
    }

    /* zeta( t+0.5*dt ) */

    NH_nzeta = NH_czeta + (Ukc - 1.5*kB*(double)atomnum*GivenTemp/eV2Hartree)*dt/(TempQ*Wscale);
    NH_czeta = NH_nzeta;

    /* R( r+dt ) */
    NH_R = NH_R + NH_nzeta*dt;
  }

  else {

    /****************************************************
      second step in velocity Verlet 
    ****************************************************/

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){
          scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  
          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][26+j] + scaled_force*0.5*dt;
	}
      }
    }

    /****************************************************
                       Kinetic Energy 
    ****************************************************/

    My_Ukc = 0.0;

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];

      sum = 0.0;
      for (j=1; j<=3; j++){
        if (atom_Fixed_XYZ[Gc_AN][j]==0){
          sum = sum + Gxyz[Gc_AN][j+23]*Gxyz[Gc_AN][j+23];
	}
      }
      My_Ukc = My_Ukc + 0.5*Gxyz[Gc_AN][20]*Wscale*sum;
    }

    /****************************************************
     MPI: Ukc 
    ****************************************************/

    MPI_Allreduce(&My_Ukc, &Ukc, 1, MPI_DOUBLE, MPI_SUM, mpi_comm_level1);

    /* calculation of temperature (K) */
    Temp = Ukc/(1.5*kB*(double)atomnum)*eV2Hartree;

    /****************************************************
      first step in velocity Verlet 
    ****************************************************/

    for (Mc_AN=1; Mc_AN<=Matomnum; Mc_AN++){
      Gc_AN = M2G[Mc_AN];
      for (j=1; j<=3; j++){

        if (atom_Fixed_XYZ[Gc_AN][j]==0){

          scaled_force = -Gxyz[Gc_AN][16+j]/(Gxyz[Gc_AN][20]*Wscale);  
          /* v( r+0.5*dt ) */
          Gxyz[Gc_AN][26+j] = Gxyz[Gc_AN][23+j] + scaled_force*0.5*dt;
          Gxyz[Gc_AN][23+j] = Gxyz[Gc_AN][26+j];

          /* r( r+dt ) */
          Gxyz[Gc_AN][20+j] = Gxyz[Gc_AN][j];
 	  Gxyz[Gc_AN][j] =  Gxyz[Gc_AN][j] + Gxyz[Gc_AN][26+j]*dt;
	}
      }
    }
  }

  /****************************************************
   MPI: Gxyz
  ****************************************************/

  for (Gc_AN=1; Gc_AN<=atomnum; Gc_AN++){
    ID = G2ID[Gc_AN];
    MPI_Bcast(&Gxyz[Gc_AN][1],   3, MPI_DOUBLE, ID, mpi_comm_level1);
    MPI_Bcast(&Gxyz[Gc_AN][14], 19, MPI_DOUBLE, ID, mpi_comm_level1);
  }
}
