#!/bin/bash -e 
###########################################################################
# This file is part of ST-CpHMD, version v4.1.2_GMX2018.
# 
# Copyright (c) 2005-2020, Instituto de Tecnologia Quimica e Biologica,
# Universidade Nova de Lisboa, Portugal.
# 
# ST-CpHMD is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 2 of the License, or (at your
# option) any later version.
# 
# ST-CpHMD is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with ST-CpHMD.  If not, see <http://www.gnu.org/licenses/>.
# 
# For further details and info check the manual.pdf file.
# 
# You can get ST-CpHMD at www.itqb.unl.pt/simulation
###########################################################################

#######################################################################
#
# This is the main program of the ST-CpHMD package.
#
# It runs the blocks of PB/MC and MD, which compose each cycle of the
# stochastic titration constant-pH MD method. It calls MEAD for PB
# calculations, PETIT for Monte Carlo, and Gromacs for MD. The update
# of the topology file after charge assignment is done with the
# program update-top present in this directory. The preparation of
# files for MEAD is done using programs from the meadTools
# package. The centering of the solute in the box, needed for the PB
# calculations, is done with the program fixbox, unless the user
# chooses not to (see manual).
#
# It takes as argument the name of the .pHmdp file (see templates
# directory and manual) with the definition of all settings/variables.
#
#######################################################################

main ()
{

prog=`basename $0` 
usage="Usage: $prog <MyProtein_001>.pHmdp
       <MyProtein_001>.pHmdp  : Constant-pH MD parameter file"

# Parse arguments
if [ $# != 1 ]; then message U "Wrong number of arguments."; fi

# Read Simulation Parameters
#Check if it exists; SC 2015
if [[ -f $1 ]]; then
    source $1            
else
    message E "Constant-pH MD parameter file called $1 is missing."
fi 

# Name of your simulation segment (the output files will
# be generated with this name)
blockname=${1%.*}
runname=${blockname%_*}
extraname=CpHMD_${blockname}    # basename for extra files #AB 2019


# functions.sh was removed and all functions are now in this file.  #AB 2019


#Check parameters in .pHmdp and assign defaults if needed
check_parameters $1

#Correct/convert a few variables
correct_variables

# Do some housekeeping
clean_up

# Check if all files are present:
check_files $1

# Let's keep track of the simulation place and time:
echo -e "Simulation run by $USER  @  $HOSTNAME\nInitial time: `date`" \
    > ${blockname}.info

# Build ff links needed for simulations
#gmx2018 #SC 2020
ln -s -f $ffDIR/${ffID}.ff .
ln -s -f $ffDIR/residuetypes.dat .

# Get .st files
"$MToolsDIR"/getst ${runname}.sites "$StDIR"

# Make auxiliary files
make_auxiliary_files

#### Starts the constant-pH MD cycle ####

#dt=0.001 ps is the default value in gromacs
TimeStep=`gawk '/^ *dt *=/{match($0,/=([^;$]+)/,a); dt=a[1]}END{print dt ~ /^ *$/ ? 0.001 : dt+0}' ${runname}.mdp` #SC 2015

#gmx2018 #SC 2020
#nstxout-compressed used to be called nstxtcout
#nstxout-compressed=0 is the default value in gromacs
WriteStep=`gawk '/^ *nstxout-compressed *=/{match($0,/=([^;$]+)/,a); wrtst=a[1]}END{print wrtst ~ /^ *$/ ? 0 :  wrtst+0}' ${runname}.mdp` #SC 2015
WriteTime=`echo "$WriteStep*$TimeStep" | bc -l`

#NEW: SC 2018-10-24
#InitTime=`echo "$EffectiveSteps*$TimeStep*($InitCycle-1)+$WriteTime" | bc -l`
InitTime=`echo "$EffectiveSteps*$TimeStep*($InitCycle-1)" | bc -l`
WriteInitTime=`echo "$InitTime+$WriteTime" | bc -l`

for (( Cycle=$InitCycle ; Cycle <=$EndCycle ; Cycle++ )); do

    # This section is just an index to keep track of the simulation
    sim_time=`echo "$EffectiveSteps*$TimeStep*($Cycle-1)+$WriteTime" | bc -l`
    echo -e "\nCycle = $Cycle; time = $sim_time ps; Date: `date "+%D %T"`" \
        >> ${blockname}.info

    ################### PB/MC PART #####################


    # Option $alllstates == y only available in-house
    if [[ $allstates == y ]]; then
	:
    else
	states_f=TMP_MCarlo_std.out

        # Call the Reduced Titration function when needed
	if [ $((Cycle % RTFrequency)) -eq 1 -a $RTThreshold != 0 ]; then
            echo -n "PB/MC (All) -  Cycle = $Cycle; Date: `date "+%D %T"` - " \
		>> ${blockname}.info
            run_PBMC red
            echo "`date "+%D %T"`" >> ${blockname}.info
	fi

        # Make sure the sites file is not empty...
	sitenumb=$(($(wc -l < ${runname}.sites)))
	if [ $sitenumb -ne 0 ]; then
        # ... and call the PB/MC function,...
            echo -n "PB/MC -        Cycle = $Cycle; Date: `date "+%D %T"` - " \
		>> ${blockname}.info
            run_PBMC std
            echo "`date "+%D %T"`" >> ${blockname}.info

        # ...write fractions to files and build a new topology.
            write_fractions
            build_topology

        # Otherwise...
	else
        # ... skip the PB/MC and write fractions to files 
        #(when RTThreshold = 0 there are no fractions to write;
        # useful for tests without PB/MC).
            message W "File ${runname}.sites is empty. PB/MC step is not performed in cycle $Cycle."
            if [ $RTThreshold != 0 ]; then 
		if [ $((Cycle % RTFrequency)) -eq 1 ]; then
            #Use the output from the Reduced Titration routine
                    cp -f TMP_MCarlo_mod.out TMP_MCarlo_std.out #SC 2017
                    write_fractions
		    cp -f TMP_CpHMD_red.top TMP_CpHMD.top  #SC 2017
		else  
                    write_fractions
		fi
            fi
	fi
    fi

    #### MD PART ####
    # Call dynamics with solvent relaxation 
    if [ $RelaxSteps != 0 ]; then
        echo -n "MD relax     - Cycle = $Cycle; Date: `date "+%D %T"` - " \
            >> ${blockname}.info
	run_relaxation 
        echo "`date "+%D %T"`" >> ${blockname}.info
    else
        mv TMP_effective.gro TMP_relax.gro
    fi

    # Call effective (full) dynamics
    echo -n "MD effective - Cycle = $Cycle; Date: `date "+%D %T"` - " \
        >> ${blockname}.info
    run_dynamics effective relax
    echo "`date "+%D %T"`" >> ${blockname}.info

    # Call Append data function
    data_append
done
#### Ends the constant-pH MD cycle ####

# Rename original .sites file
mv -f ${runname}-all.sites ${runname}.sites

#SC 2021-09-22
# Exclude first frame from xtc and rename it
echo -e "System" | \
    "$GroDIR"/gmx trjconv -f TMP_CpHMD.xtc -o ${blockname}.xtc \
             -b $WriteInitTime -quiet \
             -s TMP_effective.tpr

# Store segment outputs with unambigous name
for e in gro tpr edr; do                     #AB 2019 (log removed)
    mv -fb TMP_CpHMD.$e ${blockname}.$e
done
mv -fb TMP_CpHMD.log ${extraname}.log              #AB 2019

if [ $RTThreshold != 0 ]; then
    #for e in occ_red{,_mod} mocc_red ; do
    #    mv -fb TMP_CpHMD.$e ${blockname}.$e
    #for e in occ_RT occ_RTmax mocc_RT ; do    #AB 2019
    #    mv -fb TMP_CpHMD.$e ${extraname}.$e        #AB 2019
    #done
    mv -fb TMP_CpHMD.sites ${extraname}.sites
fi

if [ -f TMP_CpHMD.occ ]; then     # if .sites empty, write_fractions will never run #SC 2020
    for e in occ mocc ; do mv -fb TMP_CpHMD.$e ${blockname}.$e; done
fi


# Let's keep track of the simulation end time:
echo -e "\nEnd time:     `date`" >> ${blockname}.info

# Clean up function
clean_up

exit 0

}



check_parameters ()
{

    #These parameters are mandatory
    #mdrun is now optional #gmx2018 #SC 2020
    for Param in pH ffID temp ionicstr NRES \
                 RTThreshold RTFrequency \
	         RelaxSteps EffectiveSteps \
	         CpHDIR GroDIR MeadDIR MToolsDIR PetitDIR \
                 TOPin GROin \
	         nCPU \
                 InitCycle EndCycle 
    do
	#keep "" so it works for multiple words like $mdrun
	if [ -z "${!Param}" ]; then
	    message E  "Parameter $Param is empty. Check $1. Program will crash."	    
	fi
	
    done

    #These parameters are optional, taking default values
    if [ -z $nCPU_MD ]; then
	nCPU_MD=$nCPU
	message W  "Parameter nCPU_MD was empty. Default value is nCPU ($nCPU)."
    fi

    if [ -z $epsin ]; then
	epsin=2
	message W  "Parameter epsin was empty. Default value is $epsin."	    
    fi

    if [ -z $epssol ]; then
	epssol=80
	message W  "Parameter epssol was empty. Default value is $epssol."	    
    fi
	
    if [ -z $seed ]; then
	seed=1234567
	message W  "Parameter seed was empty. Default value is $seed."	    
    fi

    if [ -z $bsize ]; then
	bsize=10000
	message W  "Parameter bsize was empty. Default value is $bsize."	    
    fi

    if [ -z $pot ]; then
	pot=0
    fi

    if [ -z $GridSize ]; then
	GridSize=0
	message W  "Parameter GridSize was empty. Default value is $GridSize."	    
    fi

    if [ -z $SOL1st ]; then
	SOL1st="SOL"
	message W  "Parameter SOL1st was empty. Default value is $SOL1st."	    
    fi

    if [ -z $StDIR ]; then
	StDIR=$CpHDIR/St-G54a7
	message W  "Parameter StDIR was empty. Default value is $StDIR"	    
    fi

    if [ -z $ffDIR ]; then
	ffDIR=$CpHDIR/top
	message W  "Parameter ffDIR was empty. Default value is $ffDIR."	    
    fi

    #mdrun is now optional #gmx2018 #SC 2020
    if [ -z "$mdrun" ]; then
	mdrun="$GroDIR/gmx mdrun"
	message W  "Parameter mdrun was empty. Default value is $mdrun."	    
    fi

    # fixboxDIR is now mandatory.   #AB 2019

    if [ -z $dpH ]; then
	dpH=0
    fi

    #$SitesIN is mandatory if $dpH!=0
    if [[ $dpH != 0 && -z $SitesIN ]]; then
	message E  "Parameter SitesIN is empty and dpH=$dpH. Check $1. Program will crash."	    
    fi

    # Rcon removed  #AB 2019

    
    #NEW: SC 2018-05-23
    if [ -z $use_fixbox ]; then
	use_fixbox="y"
    fi

    #SC 2020
    if [[ $use_fixbox == "y" ]]; then
	if [ -z fixboxDIR ]; then
	    message E  "Parameter fixboxdir is empty. Check $1. Program will crash."	    
	fi
    fi
    

    for Param in allstates write_states use_fixbox     #NEW: SC 2018-05-23
    do
	if [[ ${!Param} != "n" && ${!Param} != "y" ]]; then
	     message E  "Parameter $Param must be either y (yes) or n (no). Check $1. Program will crash."	    
	fi
    done

}

correct_variables ()
{                   
    #Turns "0.0", "0.00", "0.000", etc into "0"
    GridSize=`gawk "BEGIN{print $GridSize+0}"`
    RTThreshold=`gawk "BEGIN{print $RTThreshold+0}"`
    dpH=`gawk "BEGIN{print $dpH+0}"` #SC 2015    # Don't use InHouse !!!

    # Define variable for stmodels
    offset=`gawk -v r=$NRES 'BEGIN{print 10**(2+int(log(r)/log(10)))}'` #SC 2015

    # Convert Ionic Strength from Molar to molecule/nm^3.
    ionicstrMolecule=$(gawk "BEGIN{print ${ionicstr}*0.6022}") #SC 2015

    #Check and adjust $RTThreshold if allstates==y
    if [[ $allstates == "y" && 	$RTThreshold != 0 ]]; then
	RTThreshold=0 ;
	message W  "Reduced titration incompatible with allstates=y. RTThreshold turned to $RTThreshold."	    	
    fi
}


check_files ()
{
    # Check for other necessary files (filenames must be respected)
    for f in ${runname}.{sites,mdp} $GROin $TOPin; do
	if [ ! -f $f ]; then
            message E  "File $f is missing. Program will crash"
	fi
    done


    if [[ $(wc -l < ${runname}.sites) == 0 && $RTThreshold != 0 ]]; then
        message E  "File ${runname}.sites is empty. Not compatible with reduced titration option. Program will crash" 
    fi 

    # Check directories
    for d in $GroDIR $MeadDIR $MToolsDIR $PetitDIR $StDIR $ffDIR; do
	if [ ! -d $d ]; then 
            message E  "Directory $d does not exist. Program will crash"
	fi
    done    
    #NEW: SC 2018-05-23
    if [[ $use_fixbox == "y" ]]; then
	if [ ! -f ${runname}.mdf ]; then
	    message E  "File ${runname}.mdf is missing. Program will crash.\n If you don't want to use fixbox and ${runname}.mdf, set use_fixbox=n in $1"
	fi
	if [ ! -d $fixboxDIR ]; then 
            message E  "Directory $fixboxDIR does not exist. Program will crash"
	fi
    else
	message W "use_fixbox=n only works for single molecule solute in water."
    fi

}


make_auxiliary_files ()
{
    # Rename original files
    cp -fa ${runname}.sites ${runname}-all.sites  #SC 2015; preserve links


    cp -f $TOPin TMP_CpHMD.top
    cp -f $GROin TMP_effective.gro

    # Correct Ionic Strength and Temperature in the .mdp file according 
    # to your parameters

    #Check how many tcgroups are there
    tcgroups=`gawk '/^ *tc-grps/{match($0,/=([^;$]+)/,a);print split(a[1],b)}' ${runname}.mdp`

    if [[ "$tcgroups" =~ ^$ || "$tcgroups" == 0 ]]; then
	message E  "Temperature coupling groups (tc-grps) are missing in mdp file. Check  ${runname}.mdp."
    fi

    gawk '!/^ *ionicstrength *=/ && !/^ *ref_t *=/ && !/^ *nsteps *=/' \
	${runname}.mdp > ${extraname}.mdp #SC 2015

    cat >> ${extraname}.mdp <<EOF 

;Added by CpHMD

nsteps = $EffectiveSteps 
ref_t = $(for ((i=1; i<=$tcgroups; i++)); do printf "%s " $temp; done)
EOF
    message W "Temperature in ${runname}.mdp file was changed to $temp."

    #gmx2018 #SC 2020
    # Only add ionic strength if it is defined in the original mdp
    if ! gawk '/^ *ionicstrength *=/{exit 1}' ${runname}.mdp; then
        cat >> ${extraname}.mdp <<EOF
ionicstrength = $ionicstrMolecule
EOF
        message W "Ionic strength in ${runname}.mdp file was changed to $ionicstrMolecule molecules/nm^3 (which corresponds to $ionicstr Molar)."
    fi

    # Make effective .mdp gromacs file
    cp ${extraname}.mdp TMP_effective.mdp

    # Make relaxation .mdp gromacs file
    # Change NPT to NVT: because frozen positions only make sense 
                        #when volume is fixed 

    #SC 2018-05-23 -NEW PBMC
    # Remember that energygrp_excl must be consistent with energygrps
    #gmx2018 #SC 2020
    # energygrp_excl were removed for compatibility with gmx recent
    # versions. energygrps were removed for compatibility with gmx_2018+gpu.
    gawk '!/^ *nsteps *=/ && !/^ *Pcoupl *=/ && !/^ *constraints *=/' \
	 ${extraname}.mdp > TMP_relax.mdp

    #gmx2018 #SC 2020
    cat >> TMP_relax.mdp <<EOF

;Solvent relaxation parameters
nsteps = $RelaxSteps
Pcoupl = No
constraints = none
freezegrps = PBMC
freezedim = Y Y Y
EOF

    #POSRE: remove lines with string $PosRe from relaxation mdp
    #SC 2015
    if ! [ -z $PosRe ]; then 
	mv TMP_relax.mdp TMP_relax-aux.mdp 
	gawk "!/$PosRe/" TMP_relax-aux.mdp > TMP_relax.mdp
	rm -f TMP_relax-aux.mdp
	#message W "The position restraints signaled will not be used during solvent relaxation."
	message W "Lines with string $PosRe were removed from solvent relaxation mdp file."
    fi

    cp TMP_relax.mdp ${extraname}.mdp_SR # ? SC 2015 + #AB 2019

    #Create an index file and allow the option of giving it
    #SC 2015
    if [[ -f "$NDXin" ]]
    then
        cp $NDXin TMP_CpHMD.ndx
	message W "File $NDXin will be used as index file instead of file automatically generated by make_ndx (GROMACS)."
    else
	if ! [ -z $NDXin ] 
	then
	    message W "File $NDXin is missing. A file generated by make_ndx (GROMACS) will be used instead."
	fi
	#gmx2018 #SC 2020
	echo -e "q" | "$GroDIR"/gmx make_ndx -f $GROin -o TMP_CpHMD.ndx -quiet
    fi

    #Create PBMC entry... NEW
    if gawk '/\[ *PBMC *\]/{exit 1}' TMP_CpHMD.ndx 
    then
	gawk -v s=$SOL1st 'BEGIN{print "\n[ PBMC ]"}; $1 ~ s {print ""; exit}; \
                               NR>2{printf "%4d ", substr($0,16,5); n++; if (n%15==0) print ""}' \
	     $GROin >> TMP_CpHMD.ndx  
	message W "Group PBMC was added to index file."
    fi

    cp TMP_CpHMD.ndx ${extraname}.ndx #Debug SC 2015


    # Make initial .tpr file + TMP_processed.top
    # option -r in case of posre #gmx2018 #SC 2020
    "$GroDIR"/gmx grompp -f TMP_effective.mdp -po TMP_effective_out.mdp \
        -c  $GROin -r $GROin -p $TOPin -pp TMP_processed.top -n TMP_CpHMD.ndx \
        -o TMP_CpHMD.tpr -maxwarn 1000 -quiet

    # Make grid files .mgm and .ogm (in case you don't have your own)
    if [ $GridSize != 0 ]; then 
        echo -e "ON_GEOM_CENT 61 1.0\nON_CENT_OF_INTR 65 0.25" \
            > ${runname}.mgm
        echo -e "ON_GEOM_CENT $GridSize 1.0\nON_CENT_OF_INTR 65 0.25" \
            > ${runname}.ogm
    else
        for f in ${runname}.{o,m}gm ;do
            if [ ! -f $f ];then message E \
            "File $f is missing. Program will crash"; fi
        done    
    fi
}

#NEW: SC 2018-05-29

process_sites ()
{

# We could use sed -r 's/([A-Z]+).*/\1/' instead of the gawk command,
# and even append the sed substitution command to the start of
# r2x. However, that may have 2 problems: (1) the option -r for
# extended regular expressions is needed in my system, but neither
# such need nor the option letter seem to be general; (2) the
# uppercase letters may not be sequential for some 'locales' and sed
# doesn't support character classes (see man GAWK). So, using the gawk
# comment seems safer.

r2x="/NTPRO/{b};/NTGLY/{b};s/NT.*/NT/;s/CT.*/CT/"

# uncomment to use resX
#r2x=$r2x$(gawk '!/;/{printf ";s/%s/%s/",$1,$2}' convert.def)

gawk '{print gensub(/([[:upper:]]+).*/, "\\1","g")}' $1 | sed "$r2x" 

}


run_PBMC ()
{

    # Preparing files for pqr creation.
    #System must be centered     #SC 2018-05-23
    if [[ $use_fixbox == "y" ]]; then     #SC 2018-05-23
	#SC 2018-05-28
	#molecules whole before fixbox
	#gmx2018 #SC 2020
	echo "PBMC" | "$GroDIR"/gmx trjconv -f TMP_effective.gro -o TMP_aux0.gro \
			       -s TMP_CpHMD.tpr -n TMP_CpHMD.ndx  -pbc mol -quiet
	#${runname}.mdf only for PBMC
	"$fixboxDIR"/fixbox TMP_aux0.gro ${runname}.mdf > TMP_aux.gro #SC 2018-05-23
    else
	# Making protein whole removing PBC:
	#gmx2018 #SC 2020
	echo "PBMC" | "$GroDIR"/gmx trjconv -f TMP_effective.gro -o TMP_aux.gro \
				  -s TMP_CpHMD.tpr -n TMP_CpHMD.ndx -pbc mol -quiet 
    fi     #SC 2018-05-23

    # The conversion can be obtained with MAKEPQR
    # This only works for versions after meadTools-2.1, where the
    # .gro file may have a smaller number of atoms than the topology; SC 2018-05-28
    "$MToolsDIR"/makepqr W 2RT TMP_processed.top \
        TMP_processed.top TMP_aux.gro > TMP_aux.pqr #SC 2018-05-23


    # DEBUG - store PQR in PDB trajectory format for later analysis
    # (filename was changed #AB 2019)
    if [ $1 = "std" ]; then
        echo "TITLE     PQR from simulation" >> ${extraname}.pqr
        echo "MODEL     `printf "% 3d\n" $Cycle`" >> ${extraname}.pqr
        cat TMP_aux.pqr >> ${extraname}.pqr
        echo -e "TER\nENDMDL" >> ${extraname}.pqr
    fi
    # Using the whole set of sites
    if [ $1 = "red" ]; then
	rm -f ${runname}.sites #SC 2015; in case both are links for same file
        cp -f ${runname}-all.sites ${runname}.sites
    fi 

    #Set sites excluded in reduced titration with right charges
    #SC 2018-05-30: this step could be avoided if right topology
    #was used in first makepqr
    if [[ $1 != red && $RTThreshold != 0 ]]; then
        "$MToolsDIR"/statepqr s=TMP_statepqr.in TMP_aux.pqr \
            ${runname}-all.sites > TMP_aux2.pqr
	mv  TMP_aux2.pqr TMP_aux.pqr
    fi

    # Set our pqr in the charged state
    "$MToolsDIR"/statepqr r=c TMP_aux.pqr ${runname}.sites \
        > TMP_${runname}.pqr

    # Make .sites and corresponding .pqr with offset
    "$MToolsDIR"/stmodels $offset TMP_${runname}.pqr ${runname}.sites

    mv ${runname}_stmod.sites dummy.sites #SC 2015
    mv TMP_${runname}_stmod.pqr dummy.pqr #SC 2015
    for ext in ogm mgm; do cp ${runname}.$ext dummy.$ext; done #SC 2015

    # Run MeadT and Multiflex:
    "$MToolsDIR"/meadT -n $nCPU -s "$MToolsDIR" -m "$MeadDIR" \
        -b $bsize -blab3 -epsin $epsin -epssol $epssol \
	-ionicstr $ionicstr  -T $temp \
        dummy >TMP_mead_out 2>TMP_mead_error #SC 2015; To avoid files being overwritten; e.g. $runname.err


    # Second part of the correction: We change back to the original a.a. 
    # res. numbers.
    cp dummy.g ${runname}.g #SC 2015
    gawk -v off=$offset '{match($0,/(^.+-)([0-9]+)$/,a);print a[1] a[2]-off*(1+($3~/^NT/)+2*($3~/^CT/))}' dummy.pkcrg > ${runname}.pkcrg #SC 2015


    # Run cconvert
    "$MToolsDIR"/cconvert/cconvert ${runname}.pkcrg ${runname}.g \
	$temp  ${runname}.dat
    # Run PETIT
    #Inclusion of pH gradient SC 2013-07-30
    if [[ $dpH -eq 0 ]]; then
	"$PetitDIR"/petit -H "$pH,$pH,1" -E "$pot,$pot,1" -T $temp \
            -c 2 -r $seed -q 1000 100000 <${runname}.dat \
            >TMP_MCarlo_$1.out 2>TMP_MCarlo_$1.err
    else
	:
    fi 
    
    # Reduced Titration calculations to determine the sites to use
    # in the next cycles
    if [ $1 = "red" ]; then 
        # Clear the .sites file before the new one is created 
        # (need to do this in order to work in case of empty .sites 
        # from reduced titration)
        rm -f ${runname}.sites ; touch ${runname}.sites
	################################################################
#	# ATTEMPTS TO SUBSTITUTE THE HUGE AWK CODE BELOW:  #AB 2020
#	occFull=$(gawk -v t=$RTThreshold '
#          /^\./ && ! /tot/ {
#            mpr=0 ;
#            for(i=6 ; i<=NF ; i++) if($i > mpr) { mst=i-6 ; mpr=$i }
#            # states = states " " (mpr > 1-t ? mst : "-") ;
#            print (mpr > 1-t ? mst : "-") ;
#          }
#          # END{print states}
#          ' TMP_MCarlo_red.out)
#	echo "$occFull" >> oink.occFull
	# Write (append) state populations for all sites (full PB/MC):
	if [[ $write_states == "y" ]]; then  # added #AB 2020
	    gawk -v c=$Cycle '
              /^\./ && $4!~"tot" {s=$4; gsub($1" +"$2" +"$3" +"$4,"");p[s]=$0};
              /^>/ {n[$3]=$2};
              END {print "# Cycle "c;for(s in n)printf "%-13s %s\n",n[s],p[s];
                   print "#"}
              ' TMP_MCarlo_red.out >> ${extraname}.pocc_RT
	fi
	################################################################
	# #AB 2020 (removed some stuff now replaced by .pocc_RT)
        gawk -v t=$RTThreshold -v Allsites=${runname}-all.sites \
            -v Redsites=${runname}.sites '
        BEGIN{
          # Read petit output (all-site CE/MC):
          while (getline < "TMP_MCarlo_red.out")
          {
          # Read occR (from all-site CE/MC):
          if ($0 ~ /^f/)
          {
            nsites = NF - 1 ;
            for(i = 2 ; i <= NF ; i++) occR[i-1] = $i ;
          }
          # Read moccR (from all-site CE/MC):
          if ($0 ~ /^\./ && $0 !~ /tot/)
          {
            moccR[$4+1] = $5 ;
            m = 0 ;
	    for (i = 6 ; i <= NF ; i++) 
              if($i > m)
              {
                m = $i ;
                # state with maximum population:
                maxstate[$4+1] = i - 6 ;
                # maximum population of that state:
                maxocc[$4+1] = $i ;
             }
           # If maximum population is above threshold, make switched=0, 
            # indicating that site would be fixed (in state maxstate)
            # during next MD segments.
            switched[$4+1] = (maxocc[$4+1] > 1-t ? 0 : 1) ; 
          }
          # Make input (first part of) for update of charges in .top:
          if ($0 !~ /^f/) print $0 > "TMP_MCarlo_mod.out" ; #SC 2018-05-29
          }
          close("TMP_MCarlo_red.out") ;
        
          # Make input (second part of) for update of charges in .top (among other things):
          printf("f ") > "TMP_MCarlo_mod.out" ; #SC 2018-05-29
          for (i = 1 ; i <= nsites ; i++)
          {
            printf ("%d ", maxstate[i]) > "TMP_MCarlo_mod.out" ; #SC 2018-05-29
  
            #Make input file for state_pqr with most abundant state
            printf ("%d\n", maxstate[i]) > "TMP_statepqr.in"

            printf ("%s ", switched[i] == 0 ? maxstate[i] : "-") > "TMP_template_occ" ; 
            printf ("%s ", switched[i] == 0 ? moccR[i] : "-") > "TMP_template_mocc" ; 
          }
          printf("\n") > "TMP_MCarlo_mod.out" ;
          printf("\n") > "TMP_template_occ" ;
          printf("\n") > "TMP_template_mocc" ;
        
          # Make .sites:
          n = 1 ;
          while (getline < Allsites)
          {
            if (switched[n] == 1) print $0 > Redsites ;
            n++ ;
          }
          close(Allsites) ;
        }'

        # Making Log for .sites
        echo "This is the .sites file at Cycle = $Cycle" >>  TMP_CpHMD.sites
        cat ${runname}.sites >> TMP_CpHMD.sites

        # Create file with final site states: #SC;2015
	process_sites ${runname}-all.sites | paste TMP_statepqr.in - \
	    > TMP_CpHMD_red.states

	# Update the topology:
	"$CpHDIR"/scripts/update-top TMP_CpHMD_red.states ${ffID}.ff/protstates.dic $TOPin > TMP_CpHMD_red.top #SC;2015


    fi

    # Removing PB related auxiliary files
    rm -f dummy.{summ,pqr*,pkcrg,g,pkint,out,sites,ogm,mgm}  \
	${runname}.{dat,pkcrg,g,pqr*} \
        *.potat TMP_aux* TMP_mead_out TMP_mead_error \
        TMP_MCarlo_red* TMP_CpHMD_red.states #SC;2015
}



write_fractions ()
{
    # Write the occupation files
    #SC 2015
    gawk -v t=$RTThreshold '
    BEGIN{
    
      # Read petit output (CE/MC):
      while (getline < ARGV[1]) #SC 2015
      {
        # Read occ (from CE/MC):
        if ($0 ~ /^f/)
        {
          nsites = NF - 1 ;
          for(i = 2 ; i <= NF ; i++) tocc[i-1] = occ[i-1] = $i ;
        }
        # Read mocc (from CE/MC):
        if ($0 ~ /^\./ && $0 !~ /tot/) tmocc[$4+1] = mocc[$4+1] = $5 ;
       }
      close("ARGV[1]") ;
    
      # If t != 0 read templates and override tocc and tmocc:
      if (t != 0)
      {
        getline < "TMP_template_occ" ;
        nsites = split($0, tocc) ;
        getline < "TMP_template_mocc" ;
        split($0, tmocc) ;
        c = 0 ;
      }
      # Write tocc and tmocc:
      for (i = 1 ; i <= nsites ; i++)
      {
        # Substitute "-" with corresponding occ and mocc entries:
        if (t != 0 && tocc[i] == "-")
        {
          c++ ;
          tocc[i] = occ[c] ;
          tmocc[i] = mocc[c] ;
        }
        printf ("%d ", tocc[i]) >> "TMP_CpHMD.occ" ;
        printf ("%f ", tmocc[i]) >> "TMP_CpHMD.mocc" ;
     }
      printf("\n") >> "TMP_CpHMD.occ" ;
      printf("\n") >> "TMP_CpHMD.mocc" ;
      
    }' $states_f


}


build_topology ()
{

gawk '/^f /{for(i=2;i<=NF;i++)print $i}' $states_f > TMP_state

process_sites ${runname}.sites | paste TMP_state - > TMP_CpHMD_std.states

# The top file to be updated depends upon reduced titration being on/off
#This should be defined somewhere else and be used also in function run_PBMC #SC 2020
if [[ $RTThreshold = 0 ]]; then
    top_file=$TOPin
else
    top_file=TMP_CpHMD_red.top
fi

# Update the topology:
"$CpHDIR"/scripts/update-top TMP_CpHMD_std.states ${ffID}.ff/protstates.dic $top_file > TMP_CpHMD.top

rm TMP_state

}

run_dynamics ()
{
    #option -r in case of posre #gmx2018 #SC 2020
    "$GroDIR"/gmx grompp -f TMP_$1.mdp -po TMP_$1_out.mdp \
        -c TMP_$2.gro -r TMP_$2.gro -p TMP_CpHMD.top -pp TMP_processed.top \
        -n TMP_CpHMD.ndx -o TMP_$1.tpr -maxwarn 1000 -quiet

    #gmx2018 #SC 2020
    #-quiet and -nice are gmx options
    $mdrun -nt $nCPU_MD -s TMP_$1.tpr -x TMP_$1.xtc -c TMP_$1.gro \
        -e TMP_$1.edr -g TMP_$1.log -o TMP_$1.trr -quiet \
        -nice 19
    #AB 2019: Rcon removed

    rm -f \#*
}

run_relaxation ()
{
    #Solvent relaxation
    run_dynamics relax effective

    #Prepare input GRO for dynamics
    gawk -v s=$SOL1st '$1 ~ s {exit};{print $0}' TMP_effective.gro > TMP_aux.gro
    gawk -v s=$SOL1st '$1 ~ s {a=1};a'  TMP_relax.gro >> TMP_aux.gro
    
    mv -f TMP_relax.gro TMP_relax_DEBUG.gro
    mv -f TMP_aux.gro TMP_relax.gro
}

#SC 2021-09-22: changes in variable names and trjcat appending to
#correct a bug
data_append ()
{

    if [ $Cycle -eq $InitCycle ]; then
        # Append 1st .edr file
	#gmx2018 #SC 2020
        echo -e "$InitTime\nc" | \
            "$GroDIR"/gmx eneconv  -o TMP_CpHMD.edr -f TMP_effective.edr \
            -settime -b $WriteInitTime -quiet
    else
        # Append .edr files
	#gmx2018 #SC 2020
        echo -e "$WriteInitTime\n`echo $sim_time-$WriteTime | bc -l`" | \
            "$GroDIR"/gmx eneconv -o TMP_aux.edr -f TMP_CpHMD.edr \
            TMP_effective.edr -settime -quiet
        mv -f TMP_aux.edr TMP_CpHMD.edr
    fi
    # Append .xtc files
    # The flag -app of trjconv is deprecated: trjcat used instead.
    #gmx2018 #SC 2020
    if [ $Cycle -eq $InitCycle ]; then
	# In the 1st MD step of each block, trjconv is used to set the time
	echo -e "System" | \
        "$GroDIR"/gmx trjconv -f TMP_effective.xtc -o TMP_CpHMD.xtc \
                 -t0 $InitTime -quiet \
                 -s TMP_effective.tpr
	#trjconv in gmx 2018 only works with option -s SC 2018-09-11
    else
	echo -e "$InitTime\nc" | \
            "$GroDIR"/gmx trjcat -f TMP_CpHMD.xtc TMP_effective.xtc -o TMP_aux.xtc \
                     -settime -quiet
        mv -f TMP_aux.xtc TMP_CpHMD.xtc
    fi
    

    # Append and backup remaining files
    if [ -f TMP_effective.log -o -f TMP_effective0.log ]; then
        cat TMP_effective*.log >> TMP_CpHMD.log
    fi
    cp -f TMP_effective.gro TMP_CpHMD.gro
    rm -f TMP_aux* \#* TMP_effective*.log
}

# SC 2021-09-23 : update regarding FF files 
clean_up ()
{
    rm -rf dummy.{summ,pqr*,pkcrg,g,pkint,out,sites,ogm,mgm} dummy_cpu* \
	${runname}.{dat,pkcrg,g,pqr*}  \
	*.st residuetypes.dat ${ffID}.ff \
	traj.trr state*.cpt last_line \
        TMP_{statepqr.in,${runname},effective,relax,CpHMD,aux,mead,MCarlo,template,posre,processed,allstates}* \
	SitesIN-original SitesIN-red
}



# Function to write errors (with or without usage info) and warnings:
message ()
{
    case "$1" in
        E ) shift; echo -e "$prog: Error: $*" >&2; exit 1;;
        U ) shift; echo -e "$prog: Warning: $*\n$usage" >&2; exit 1;;
        W ) shift; echo -e "$prog: Warning: $*" >&2;;
        * ) message E "Wrong use of 'message' function.";;
    esac
}

main "$@"

