#!/bin/bash -e
###########################################################################
# This file is part of meadTools, version 2.2.
# 
# Copyright (c) 2001-2019, Instituto de Tecnologia Quimica e Biologica,
# Universidade Nova de Lisboa, Portugal.
# 
# meadTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 2 of the License, or (at your
# option) any later version.
# 
# meadTools is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with meadTools.  If not, see <http://www.gnu.org/licenses/>.
# 
# For further details and info check the README file.
# 
# You can get meadTools at www.itqb.unl.pt/simulation
###########################################################################


############################################################################
# selectWacc: a program to remove "external" waters from a .pqr file.
#
# This program reads a .pqr file and writes another one retaining only
# the water molecules whose individual relative accessibility
# (computed in the absence of the other waters) is below a given
# cutoff; thus, only waters of low accessibility are left.  Surface
# areas are computed by ASC, using a probe with a given radius (not
# necessarily equal to the OW radius in the .pqr file).
#
# Note: There is a curious problem with this script.  I would have
# preferred to make it reading from stdin and writing to stdout, since
# I find that always more flexible.  However, the simple act of
# redirecting the stdout of the script, even as it is (ie, not writing
# anything to stdout), leads to wrong results in the $outpqr (no
# waters are included)! This puzzling result seems to derive from
# running ASC, because if one saves a previous $tmp.asc file (ASC
# output) and comments the line where ASC is run, this problem
# disappears.  Thus, although ASC is being run with the option -i
# (non-interactive), which *seems* to send nothing to stdout (or
# stderr), it still messes somehow with the stdout of even other
# commands in the script! Running ASC in a subshell (ie, enclosing the
# command in parenthesis) does not solve the problem.  Using the
# executable $ascdir/asc.Linux instead of $ascdir/asc doesn't seem to
# make any difference either.  Suggestions for solving this problem
# are welcome.
############################################################################


# Defaults:
cutoff=0.5
rprobe=1.4
#ascdir=/programs/ASC_2.14   # old default

# Usage:
prog="selectWacc"
usage="\
Usage: $prog [OPTIONS] PQR_INFILE PQR_OUTFILE ASC_DIR
Options:
  -c ACCESS_CUTOFF   Relative accessibity cutoff for water molecules.
  -r PROBE_RADIUS    Radius of the solvent probe (in angstroms).
Defaults:  -c $cutoff  -r $rprobe"

function message {
  case "$1" in
    E ) shift; echo -e "$prog: Error: $*" >&2; exit 1;;
    U ) shift; echo -e "$prog: Error: $*\n$usage" >&2; exit 1;;
    W ) shift; echo -e "$prog: Warning: $*" >&2;;
    * ) message E "Wrong use of 'message' function.";;
  esac
}

# Parse command line:
while [[ "$1" == -* ]]; do
  case "$1" in
    -c ) shift; cutoff=$1;;
    -r ) shift; rprobe=$1;;
    *  ) message U "Unknown option $1";;
  esac
  shift
done
if [ $# -ne 3 ]; then
  message U "Wrong number of arguments."
fi
inpqr=$1
outpqr=$2
ascdir=$3
[ -f $inpqr ] || message E "File $inpqr does not exist."
[ -f $outpqr ] && message E "File $outpqr already exists."
[ -d $ascdir ] || message E "Directory $ascdir does not exist."

# Set base name for temporary files:
tmp=_selW_$$

# Make necessary symlinks to ASC directory:
ln -s $ascdir/{SCRIPTS,lib} .

# Make auxiliary .xyzr file
# (special ASC format, with residue number, etc; zero-radius atoms removed):
awk '/^ATOM/ && $10>1e-10{printf "%8.3f%8.3f%8.3f%8.3f  _%-4s%6d%6d %s\n",$6,$7,$8,$10,$3,$2,$5,$4}' $inpqr > ${tmp}.xyzr

# Get total area of OW, using its .pqr radius and a probe with radius $rprobe:
areaW=$(awk -v rprobe=$rprobe '$3=="OW"{print 4*atan2(0,-1)*($10+rprobe)^2;exit 0}' $inpqr)

# Get atom number for first and last waters, and for last protein atom:
firstW=$(awk '$5=="_OW"{print NR;exit 0}' ${tmp}.xyzr)
lastW=$(awk '$5=="_OW"{n=NR};END{print n}' ${tmp}.xyzr)
let lastP=$firstW-1

# Write the ASC command file:
cat <<EOF > ${tmp}.cmd
include "std.def"                   # Apparently not needed, but left anyway.
solrad($rprobe)                     # Assign probe radius.
r_xyzr "./${tmp}.xyzr"              # Read .xyzr file.
int i                               # Declare variable i as integer.
for (i=$firstW; i<=$lastW; i++){    # Make loop over all water molecules.
  active_at "::1-$lastP,i"             # Active atoms = protein + water i.
  asc                                  # Compute analytical surface.
  select_at "::i"                      # Select water i.
  sfatom(asc)                          # Print surface of water i.
}
quit
EOF

# Run ASC on the command file:
$ascdir/asc -i -c ./${tmp}.cmd -p ./${tmp}.asc

# Rescan original .pqr file, letting pass only waters below the cutoff:
awk -v areaW=$areaW -v cutoff=$cutoff '
  FNR==NR && $2~/^\(/ && $(NF-2)/areaW<cutoff {a[$NF]=1} ;
  FNR!=NR && ($4!~/^(HOH|H2O|WAT)$/ || a[$5]==1) ;
' ${tmp}.asc $inpqr > $outpqr

# Clean up stuff:
rm -f ${tmp}.{xyzr,cmd,asc}
rm -f SCRIPTS lib

