# INSTRUCTIONS TO ANALYZE PROTONATION DATA
#
# Although this file is written as a shell script, you should execute each
# command individually on the terminal and check the output at each step,
# reading the corresponding comments and trying to understand what you are
# doing.


############################################################################
# 1. Define some parameters:

CpHDIR=/data/simulation/programs/CpHMD/ST-CpHMD-v4.1_GMX4.07

sites_file=../../CpHMD/lyso.sites
CpHMD_DIR=../../CpHMD/


############################################################################
# 2. Compute protonations vs cycle number:

# Create .prot files (from occ) for individual sites and total system:
$CpHDIR/tools/occ_to_prot.awk \
    $sites_file $CpHMD_DIR/lyso_*.occ
# These are the instantaneous protonations used explicitly during the MM/MD
# stages of the CpHMD simulation.

# Create .mprot files (from mocc) for individual sites and total system:
$CpHDIR/tools/mocc_to_mprot.awk \
    $sites_file $CpHMD_DIR/lyso_*.mocc
# These are the mean protonations computed in the Monte Carlo simulations
# (by PETIT) and should be consistent with those computed from the .occ
# files.

# Since the (non-mean) protonation occupancies are integer values, their
# temporal evolution can be difficult to visualize. Before plotting them,
# you can perform some kind of window averaging, like the simple one
# illustrated here for the total protonation:
awk '!/^#/{s+=$2;n++};n%10==0{print $1,s/10;s=0}' total.prot > total.wprot

# In order to check the convergence of the protonations, you can compute
# the cumulative averages (which should converge as the simulation
# proceeds):
for f in *.prot; do
    awk '!/^#/{n++;print $1,(s+=$2)/n}' $f > ${f%%.prot}.cprot
done


############################################################################
# 3. Plot the protonations vs time:

# You can now plot the protonations computed above, using your favorite
# plotting program. The example below uses gnuplot.

# Create gnuplot script:
cat <<EOF > plots.gp
set term pdfcairo lw 1
set output "plots.pdf"   # output file

sites=system("for f in *-*.prot; do echo \${f%.prot}; done | sort -t - -k 2g")

set xlabel "Time / ns"
set ylabel "Protonation"
set key out

# Since each cycle corresponds to 2 ps (see parameter EffectiveSteps in
# the .pHmdp file), the cycle index is multiplied by the conversion factor
# 0.002 below, to give time in ns.
c = 0.002

# Get minimum and maximum of total protonation:
stats "total.prot" using 2 name "Total" nooutput

# Plot with total protonation:
set title "Total protonation"
plot [] [Total_min-0.2:Total_max+0.2] \
    "total.mprot" using (\$1*c):2 title "mprot" w lines lt 4, \
    "total.prot" using (\$1*c):2 title "prot" w p lt 1 pt 7 ps 0.1, \
    "total.wprot" using (\$1*c):2 title "wprot" w lp lt 2 pt 6 ps 0.5, \
    "total.cprot" using (\$1*c):2 title "cprot" w l lt -1 dt 2

# One plot for each individual protonation:
do for [s in sites] {
    set title "Protonation of ".s
    plot [] [-0.05:1.05] \
        s.".mprot" using (\$1*c):2 title "mprot" with lines lt 4, \
        s.".prot" using (\$1*c):2 title "prot" w p lt 1 pt 7 ps 0.1, \
        s.".cprot" using (\$1*c):2 title "cprot" w l lt -1 dt 2
}
EOF
# You can edit the file plots.gp and change whatever you want.

# Run gnuplot, which creates PDF file plots.pdf:
gnuplot plots.gp
# Then use a PDF viewer to see the plots in file plots.pdf.


############################################################################
# 4. Other analyses:

# You can compute all sorts of quantities (statistics) from the sampled
# protonation states in the .occ and .mocc files. In particular, if you run
# CpHMD simulations at different pH values, you can compute the average
# protonation at each pH and get the total and individual titration curves.

