# INSTRUCTIONS TO PREPARE AND INITIALIZE THE SYSTEM
#
# Although this file is written as a shell script, you should execute each
# command individually on the terminal and check the output at each step,
# reading the corresponding comments and trying to understand what you are
# doing.


############################################################################
# 1. Define some parameters and get files:

# Define dirs (you probably need to change some of these for your system):
gmxbin=/gromacs/gromacs-4.0.7_pH_I/bin
meadTdir=/programs/meadTools-2.1
meadbin=/compilations/mead-2.2.9/bin
petitdir=/programs/petit-1.6.0
CpHDIR=/data/simulation/programs/CpHMD/ST-CpHMD-v4.1_GMX4.07
Stdir=$CpHDIR/St-G54a7

ncpus=4         # number of CPUs to be used
offset=100000   # numbering offset for titrable fragments
eps=2.0         # molecular interior dielectric constant
ionstr=0.1      # ionic strength
temp=300        # temperature
pH=7            # pH
# Note that the values for ionstr, temp and pH should be the same used
# later for the CpHMD runs.

# Get required files from ../files:
cp -i ../files/{4lzt.pdb,spc902_1atm_300K.gro,min{1,2}.mdp,en_min.sh,init.sh,md_i{50,100,200}.mdp} .


############################################################################
# 2. Make topology and other MM/MD files:


# 2.1. Make pdb with extended residue names:

# The tool convert-pdb (check header for usage) changes the name of
# the titrating residues according to what is defined in the file
# convert.def:
ln -s $CpHDIR/tools/convert.def .
$CpHDIR/tools/convert-pdb 4lzt.pdb > lyso.pdb


# 2.2. Make topology with extended residue names:

# Symlink here the FF files with CpHMD building blocks:
ln -s $CpHDIR/top/{ff*,{FF,aminoacids,specbond}.dat} .

# Select NTX and CTX types for the N- and C-terminus:
echo -e "0\n0\n" \
    | $gmxbin/pdb2gmx -f lyso.pdb -o lyso.gro -p lyso.top -ter -ignh -merge

# We will need to process the topology in subsequent steps, so if you have
# more than one chain, do not forget to use the option -merge to avoid
# included .itp files. As an alternative, use grompp to generate a
# processed topology.


# 2.3. Fix the Ctr atom names (problem with pdb2gmx and ffG54a7pHt-c.tdb):
cp lyso.top lyso.top1
cp lyso.gro lyso.gro1
$CpHDIR/tools/fixCTER lyso.top lyso.gro
# Check the changes in lyso.top and lyso.gro by comparing them with their
# previous forms (lyso.top1 and lyso.gro1).

############################################################################
# 3. Make general files for PB/MC:

# The .sites file is created here using programs from meadTools, but
# it can be created some other way (e.g., by hand). The .sites file is
# crucial and, if it's created automatically as here, it should
# *always* be checked to see if it truly contains what is
# intended. Special attention should be given to N- and C-terminal
# sites, to CYS sites, and any eventually unusual site.


# 3.1. Create a general .sites file:
$meadTdir/makepqr W 2RT ff*nb.itp lyso.top lyso.gro | \
    $CpHDIR/tools/revert-res > _tmp.pqr
$meadTdir/makesites t _tmp.pqr | gawk '!/(ARG|SER|THR)/' > lyso.sites
rm -f _tmp.pqr
# Arg won't be titrated and tautomers for Ser and Thr won't be
# considered, unlike our usual approach in rigid calculations.


# 3.2. Get all required .st files (done here using getst):
$meadTdir/getst lyso.sites $Stdir


# 3.3. Create MEAD grid files .mgm and .ogm:
cat <<EOF > lyso.mgm
ON_GEOM_CENT 61 1.0
ON_CENT_OF_INTR 61 0.25
EOF
cat <<EOF > lyso.ogm
ON_GEOM_CENT 81 1.0
ON_CENT_OF_INTR 81 0.25
EOF


############################################################################
# 4. Run PB/MC and update topology:

# These calculations are needed because we want to start the simulations
# from a likely charge state at the selected pH.

# 4.1. Run PB calculations

# Create .pqr file (without offset):
$meadTdir/makepqr W 2RT ff*nb.itp lyso.top lyso.gro > aux.pqr

# Use the charged reference state and apply numbering offset:
$meadTdir/statepqr r=c aux.pqr lyso.sites > lyso.pqr
$meadTdir/stmodels $offset lyso.pqr lyso.sites

# Copy and move some files:
for ext in ogm mgm ; do cp lyso.$ext dummy.$ext; done
for ext in pqr sites ; do mv lyso_stmod.$ext dummy.$ext; done

# Run meadT (should take 20-30 seconds):
$meadTdir/meadT -n $ncpus -b 10000 -s $meadTdir -m $meadbin \
    -epsin $eps -ionicstr $ionstr -T $temp dummy \
    1> meadT_out  2> meadT_err
rm -f *.potat

# Revert offset:
gawk -v off=$offset \
     '{match($0,/(^.+-)([0-9]+)$/,a);
       print a[1] a[2]-off*(1+($3~/^NT/)+2*($3~/^CT/))}' dummy.pkcrg \
     > lyso.pkcrg

cp dummy.g lyso.g 


# 4.2. Run MC simulation

# Create input file:
$meadTdir/cconvert/cconvert lyso.pkcrg lyso.g $temp lyso.dat

# Run petit (should take just a few seconds):
$petitdir/petit -H $pH,$pH,1 -T $temp -c 2.0 -q 1000 100000 \
    < lyso.dat  1> petit_out  2> petit_err


# 4.3. Change the topology

# Create file with final site states:
r2x="/NTPRO/{b};/NTGLY/{b};s/NT.*/NT/;s/CT.*/CT/"
gawk '/^f /{for(i=2;i<=NF;i++)print $i}' petit_out \
    | paste - lyso.sites \
    | gawk '{print gensub(/([[:upper:]]+).*/, "\\1","g")}' \
    | sed "$r2x" \
    > lyso.states

# Update the topology with the new states:
$CpHDIR/scripts/update-top lyso.states ff*.dic lyso.top > newlyso.top

# Make sure that the protein is called just 'Protein':
sed 's/Protein_A/Protein/g' newlyso.top > lysoX.top


############################################################################
# 5. Solvate the system:

# Make a box:
$gmxbin/editconf -f lyso.gro -o box.gro -bt dode -d 1.4 &> editconf.out

# Add waters:
$gmxbin/genbox -cp box.gro -cs spc902_1atm_300K.gro -p lysoX.top \
	       -o lysoX.gro &> genbox.out


############################################################################
# 6. Perform energy minimization

# Run 2-step energy minimization with steepest descent (check script en_min.sh)
# (should take ~5 minutes)
./en_min.sh $gmxbin    # gromacs version given as argument


############################################################################
# 7. Perform restrained MM/MD initiation

# Create .itp file with CA position restrains:
$gmxbin/genrestr -f lysoX.gro -o posre_CA.itp <<EOF
3
EOF

# Create topology with additional posres info:
awk '{print}/#include "posre.itp"/{printf "#endif\n\n; Include Position restraint (posre_CA) file\n#ifdef POSRES_CA\n#include \"posre_CA.itp\"\n"}' \
 lysoX.top > lysoX-r.top

# Run 3-step initiation (check script init.sh)
# (should take 10-15 minutes)
./init.sh $gmxbin    # gromacs version given as argument

# Clean files:
rm -f traj.trr \#*


############################################################################
# 8. Check if the system is well equilibrated

# Check the structure, rmsd, temperature, pressure, etc, as you would do
# for a standard MD simulation.


############################################################################
# 9. Proceed to the production CpHMD

# The initiation steps are over. Go to the directory CpHMD and follow the
# instructions in there.

cd ../CpHMD

