#!/usr/bin/gawk -f
###########################################################################
# This file is part of meadTools, version 2.2.
# 
# Copyright (c) 2001-2019, Instituto de Tecnologia Quimica e Biologica,
# Universidade Nova de Lisboa, Portugal.
# 
# meadTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 2 of the License, or (at your
# option) any later version.
# 
# meadTools is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with meadTools.  If not, see <http://www.gnu.org/licenses/>.
# 
# For further details and info check the README file.
# 
# You can get meadTools at www.itqb.unl.pt/simulation
###########################################################################


############################################################################
# addHtaut: a program to add protons for tautomeric calculations.
#
# The program presently recognizes 4 different chemical groups:
# carboxyl, phenyl, alcohol/thiol, and waters.  New site types
# containing these chemical groups can be easily added by defining the
# appropriate atoms within the function define_sites().  Addition of
# other chemical groups will require writing the corresponding
# process_*() function that adds the tautomeric protons.
############################################################################


BEGIN{
  cmd = "addHtaut" ;
  usage = "Usage: "cmd" PQR_FILE SITES_FILE" ;
  if (ARGC != 3) error("Wrong number of arguments.\n" usage) ;
  filecheck(pqr_file = ARGV[1]) ;
  filecheck(sites_file = ARGV[2]) ;
  pi = atan2(0,-1) ;

  while (getline < sites_file)
  {
    if ($2 ~ "^CT") cterm[$1] = 1 ;
    else if ($2 ~ "^NT") nterm[$1] = 1 ;    # nterm is not really used...
    else tit[$1] = substr($2,1,3) ;  # this may not be totally general...
  }

  define_sites() ;
  resnumb = "" ;
  while (getline < pqr_file)
  {
    nline++ ;
    if (nline != 1 && $5 != resnumb) process_residue() ;
    if (NF != 10) error("Wrong number of fields ("NF") in pqr line "nline) ;
    resname = $4 ;
    resnumb = $5 ;
    atm = $3 ;
    for (k = 1 ; k <= 3 ; k++) r[atm,k] = $(5+k) ;
    line[atm] = $0 ;
    print_pqr_line($2, $3, $4, $5, $6, $7, $8, $9, $10) ;
    # the next 2nd condition may not be totally general...
    if (tit[resnumb] != "" && tit[resnumb] != substr(resname,1,3))
    {
      warning("tit[resnumb] = "tit[resnumb]) ;
      error(resname " " resnumb " : name in .pqr and .sites do not match!\n") ;
    }
  }
  process_residue() ;

}

function process_residue()
{
  if (tit[resnumb] ~ carboxyl) process_carboxyl(resname) ;
  if (cterm[resnumb] == 1) process_carboxyl("CTR") ;
  if (tit[resnumb] ~ phenyl) process_phenyl(resname) ;
  if (tit[resnumb] ~ alcohol) process_alcohol(resname) ;
  if (tit[resnumb] ~ water) process_water(resname) ;
  delete r ;
  delete line ;
}

function process_carboxyl(stype)
{
  # Some parameters that depend on the force field:
  # (These are for G43a1; if necessary can be made stype-dependent):
  angCOH = 109.5 * pi / 180 ;
  distOH = 1.000 ;
  chargeH = 0.398 ;  # "Aesthetic", does not affect MEAD calculations.
  radiusH = 0.000 ;

  # Check if site is OK:
  if(line[h1[stype]] != "" || line[h2[stype]] != "")
    error("Site "stype" "resnumb" has proton(s) already.") ;
  if(line[o1[stype]] == "" || line[o2[stype]] == "" || line[c[stype]] == "")
    error("Site "stype" "resnumb" has missing atoms.") ;
  # Compute bond vectors:
  for (k = 1 ; k <= 3 ; k++) rCO1[k] = r[o1[stype],k] - r[c[stype],k] ;
  for (k = 1 ; k <= 3 ; k++) rCO2[k] = r[o2[stype],k] - r[c[stype],k] ;
  # Compute and write h1:
  cross(rCO2,rCO1,rb) ;
  for (k = 1 ; k <= 3 ; k++)
    ra[k] = -cos(angCOH) * distOH * rCO1[k] / norm(rCO1) ;
  cross(rCO1,rb,rc) ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = sin(angCOH) * distOH * rc[k] / norm(rc) ;
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o1[stype],k] + ra[k] + ru[k] ;
  print_pqr_line(0, h1[stype], resname, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
  # Compute and write h3 ("back" proton):
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o1[stype],k] + ra[k] - ru[k] ;
  print_pqr_line(0, h3[stype], resname, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
  # Compute and write h2:
  cross(rCO1,rCO2,rb) ;
  for (k = 1 ; k <= 3 ; k++)
    ra[k] = -cos(angCOH) * distOH * rCO2[k] / norm(rCO2) ;
  cross(rCO2,rb,rc) ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = sin(angCOH) * distOH * rc[k] / norm(rc) ;
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o2[stype],k] + ra[k] + ru[k] ;
  print_pqr_line(0, h2[stype], resname, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
  # Compute and write h4 ("back" proton):
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o2[stype],k] + ra[k] - ru[k] ;
  print_pqr_line(0, h4[stype], resname, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
}


function process_phenyl(stype)
{
  # Check if site is OK:
  if(line[h1[stype]] == "" || line[o[stype]] == "" || line[c[stype]] == "")
    error("Site "stype" "resnumb" has missing atoms.") ;
  # Compute bond vectors:
  for (k = 1 ; k <= 3 ; k++) rOH[k] = r[h1[stype],k] - r[o[stype],k] ;
  for (k = 1 ; k <= 3 ; k++) rOC[k] = r[c[stype],k] - r[o[stype],k] ;
  # Compute and write h2:
  cross(rOH,rOC,ra) ;
  cross(rOC,ra,rb) ;
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[h1[stype],k] - 2 * dot(rOH,rb) / dot(rb,rb) * rb[k] ;
  split(line[h1[stype]], f) ;
  print_pqr_line(0, h2[stype], f[4], f[5], rH[1], rH[2], rH[3], f[9], f[10]) ;
}

function process_alcohol(stype)
{
  # Check if site is OK:
  if(line[h1[stype]] == "" || line[o[stype]] == "" || line[c[stype]] == "")
    error("Site "stype" "resnumb" has missing atoms.") ;
  # Compute bond vectors et al:
  for (k = 1 ; k <= 3 ; k++) rOH[k] = r[h1[stype],k] - r[o[stype],k] ;
  for (k = 1 ; k <= 3 ; k++) rOC[k] = r[c[stype],k] - r[o[stype],k] ;
  cross(rOH,rOC,ra) ;
  cross(rOC,ra,rb) ;
  for (k = 1 ; k <= 3 ; k++) rc[k] = dot(rOH,rb) / dot(rb,rb) * rb[k] ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = 0.5 * sqrt(3) * norm(rc) * ra[k] / norm(ra) ;
  # Compute and write h2:
  for (k = 1 ; k <= 3 ; k++) rH[k] = r[h1[stype],k] - 1.5 * rc[k] + ru[k] ;
  split(line[h1[stype]], f) ;
  print_pqr_line(0, h2[stype], f[4], f[5], rH[1], rH[2], rH[3], f[9], f[10]) ;
  # Compute and write h3:
  for (k = 1 ; k <= 3 ; k++) rH[k] = r[h1[stype],k] - 1.5 * rc[k] - ru[k] ;
  split(line[h1[stype]], f) ;
  print_pqr_line(0, h3[stype], f[4], f[5], rH[1], rH[2], rH[3], f[9], f[10]) ;
}

function process_water(stype)
{
  # Some parameters that depend on the force field:
  # (These are for G43a1; if necessary can be made stype-dependent):
  angHOH = 109.5 * pi / 180 ;
  distOH = 1.000 ;
  chargeH = 0.410 ;  # "Aesthetic", does not affect MEAD calculations.
  radiusH = 0.000 ;

  # Check if site is OK:
  if(line[h1[stype]] != "" || line[h2[stype]] != "" ||  \
     line[h3[stype]] != "" || line[h4[stype]] != "")
    error("Site "stype" "resnumb" has proton(s) already.") ;
  if(line[o[stype]] == "")
    error("Site "stype" "resnumb" has missing atoms.") ;
  # Compute random orientation for h1:
  theta = rand() * 2 * pi ;
  phi = rand() * pi ;
  # Compute and write h1:
  rOH1[1] = distOH * sin(phi) * cos(theta) ;
  rOH1[2] = distOH * sin(phi) * sin(theta) ;
  rOH1[3] = distOH * cos(phi) ;
  for (k = 1 ; k <= 3 ; k++) rH1[k] = r[o[stype],k] + rOH1[k] ;
  split(line[o[stype]], f) ;
  print_pqr_line(0, h1[stype], f[4], f[5],
		 rH1[1], rH1[2], rH1[3], chargeH, radiusH) ;
  # Compute random orientation for h2:
  theta = rand() * 2 * pi ;
  phi = rand() * pi ;
  # Compute and write h2:
  rv[1] = distOH * sin(phi) * cos(theta) ;
  rv[2] = distOH * sin(phi) * sin(theta) ;
  rv[3] = distOH * cos(phi) ;
  cross(rOH1,rv,ru) ;
  for (k = 1 ; k <= 3 ; k++) ra[k] = distOH * sin(angHOH) * ru[k] / norm(ru) ;
  for (k = 1 ; k <= 3 ; k++) rb[k] = rOH1[k] * cos(angHOH) ;
  for (k = 1 ; k <= 3 ; k++) rH2[k] = r[o[stype],k] + ra[k] + rb[k] ;
  print_pqr_line(0, h2[stype], f[4], f[5],
		 rH2[1], rH2[2], rH2[3], chargeH, radiusH) ;
  # Compute and write h3:
  for (k = 1 ; k <= 3 ; k++) rOH2[k] = rH2[k] - r[o[stype],k] ;
  cross(rOH1,rOH2,ra) ;
  cross(rOH2,ra,rb) ;
  for (k = 1 ; k <= 3 ; k++) rc[k] = dot(rOH1,rb) / dot(rb,rb) * rb[k] ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = 0.5 * sqrt(3) * norm(rc) * ra[k] / norm(ra) ;
  for (k = 1 ; k <= 3 ; k++) rH3[k] = rH1[k] - 1.5 * rc[k] + ru[k] ;
  print_pqr_line(0, h3[stype], f[4], f[5],
		 rH3[1], rH3[2], rH3[3], chargeH, radiusH) ;
  # Compute and write h4:
  for (k = 1 ; k <= 3 ; k++) rH4[k] = rH1[k] - 1.5 * rc[k] - ru[k] ;
  print_pqr_line(0, h4[stype], f[4], f[5],
		 rH4[1], rH4[2], rH4[3], chargeH, radiusH) ;
}

function print_pqr_line(atnu,atna,resna,resnu,x,y,z,charge,rad)
{
  printf ("ATOM  %5d %-4s %-4s %4s     %7.3f %7.3f %7.3f %6.3f %6.3f\n",  \
          atnu, atna, resna, resnu, x, y, z, charge, rad) ;
}

# Vector stuff:
# Dot product of vectors a and b:
function dot(a,b)
{
  return a[1] * b[1] + a[2] * b[2] + a[3] * b[3] ;
}
# Norm of vector a:
function norm(a)
{
  return sqrt(dot(a,a)) ;
}
# Returns the cross product of vectors a and b in vector axb:
function cross(a,b,axb)
{
  axb[1] = a[2] * b[3] - a[3] * b[2] ;
  axb[2] = a[3] * b[1] - a[1] * b[3] ;
  axb[3] = a[1] * b[2] - a[2] * b[1] ;
}

function define_sites()
{
  # Define carboxyl groups:
  carboxyl = "(ASP|GLU|CTR|PRA|PRD|ACE)" ;
  # Asp:
  o1["ASP"] = "OD1" ; o2["ASP"] = "OD2" ; c["ASP"] = "CG" ;
  h1["ASP"] = "HD1" ; h2["ASP"] = "HD2" ;
  h3["ASP"] = "HD12" ; h4["ASP"] = "HD22" ;
  # Glu:
  o1["GLU"] = "OE1" ; o2["GLU"] = "OE2" ; c["GLU"] = "CD" ;
  h1["GLU"] = "HE1" ; h2["GLU"] = "HE2" ;
  h3["GLU"] = "HE12" ; h4["GLU"] = "HE22" ;
  # C-terminus:
  o1["CTR"] = "OT1" ; o2["CTR"] = "OT2" ; c["CTR"] = "CT" ;
  h1["CTR"] = "HT1" ; h2["CTR"] = "HT2" ;
  h3["CTR"] = "HT12" ; h4["CTR"] = "HT22" ;
  # Propionate A:
  o1["PRA"] = "O1A" ; o2["PRA"] = "O2A" ; c["PRA"] = "CGA" ;
  h1["PRA"] = "HO1A" ; h2["PRA"] = "HO2A" ;
  h3["PRA"] = "HO3A" ; h4["PRA"] = "HO4A" ;
  # Propionate D:
  o1["PRD"] = "O1D" ; o2["PRD"] = "O2D" ; c["PRD"] = "CGD" ;
  h1["PRD"] = "HO1D" ; h2["PRD"] = "HO2D" ;
  h3["PRD"] = "HO3D" ; h4["PRD"] = "HO4D" ;
  # Acetate:
  o1["ACE"] = "O1" ; o2["ACE"] = "O2" ; c["ACE"] = "C1" ;
  h1["ACE"] = "H1" ; h2["ACE"] = "H2" ;
  h3["ACE"] = "H12" ; h4["ACE"] = "H22" ;

  # Define phenyl groups:
  phenyl = "TYR" ;
  # Tyr :
  o["TYR"] = "OH" ; c["TYR"] = "CZ" ;
  h1["TYR"] = "HH" ; h2["TYR"] = "HH2" ;

  # Define alcohol and thiol groups:
  alcohol = "(SER|THR|CYS)" ;
  # Ser:
  o["SER"] = "OG" ; c["SER"] = "CB" ;
  h1["SER"] = "HG" ; h2["SER"] = "HG2" ; h3["SER"] = "HG3" ;
  # Thr:
  o["THR"] = "OG1" ; c["THR"] = "CB" ;
  h1["THR"] = "HG1" ; h2["THR"] = "HG12" ; h3["THR"] = "HG13" ;
  # Cys:
  o["CYS"] = "SG" ; c["CYS"] = "CB" ;
  h1["CYS"] = "HG" ; h2["CYS"] = "HG2" ; h3["CYS"] = "HG3" ;

  # Define waters:
  water = "(HOH|H2O)" ;
  # HOH :
  o["HOH"] = "OW" ;
  h1["HOH"] = "HW1" ; h2["HOH"] = "HW2" ; h3["HOH"] = "HW3" ; h4["HOH"] = "HW4" ;
  # H2O :
  o["H2O"] = "OW" ;
  h1["H2O"] = "HW1" ; h2["H2O"] = "HW2" ; h3["H2O"] = "HW3" ; h4["H2O"] = "HW4" ;

}

function filecheck(file)
{
  if (system("test -f "file))
    error("File "file" does not exist.") ;
  if (system("test -r "file))
    error("File "file" exists but is not readable.") ;
}

function warning(msg)
{
  print cmd ": Warning: " msg | "cat 1>&2" ;
  close ("cat 1>&2") ;
}

function error(msg)
{
  print cmd ": Error: " msg | "cat 1>&2" ;
  close ("cat 1>&2") ;
  exit 1 ;
}

