#!/usr/bin/gawk -f
###########################################################################
# This file is part of meadTools, version 2.2.
# 
# Copyright (c) 2001-2019, Instituto de Tecnologia Quimica e Biologica,
# Universidade Nova de Lisboa, Portugal.
# 
# meadTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 2 of the License, or (at your
# option) any later version.
# 
# meadTools is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with meadTools.  If not, see <http://www.gnu.org/licenses/>.
# 
# For further details and info check the README file.
# 
# You can get meadTools at www.itqb.unl.pt/simulation
###########################################################################


############################################################################
# nulltit: a program to  compute null-model titrations.
#
# This program reads a .st file and computes the titration curve
# obtained under the assumption of a "null model", ie, of
# non-interacting sites.  The output columns (written to stdout) are:
# pH, total number of protons, total charge.
#
# Two sets of "null model" pKa values can be used, namely the classic
# Tanford set [Nozaki & Tanford (1967) Methods. Enzymol. 11:715] and
# those obtained recently at Nick Pace's lab using pentapeptides
# [Thurlkill et al (2006) Protein Sci. 15:1214; Grimsley et al (2009)
# Protein Sci. 18:247]. Since no pentapeptide data was measured for
# the Arg guanidinium group, the Tanford value is used in both cases.
############################################################################


BEGIN{
  cmd = "nulltit" ;
  usage = "Usage: "cmd" NULL_SET MIN_pH MAX_pH DELTA_pH SITES_FILE\n" \
    "NULL_SET = T or P, for Tanford or Pace pKa sets.";
  if (ARGC != 6) error("Wrong number of arguments.\n" usage) ;
  if (ARGV[1] !~ /^[TP]$/) error("Wrong argument value.\n" usage) ;
  nullset = ARGV[1] ;
  pHmin = ARGV[2] ;
  pHmax = ARGV[3] ;
  dpH   = ARGV[4] ;
  filecheck(sites_file = ARGV[5]) ;

  if (nullset == "T")    # Define Tanford pKa set:
  {
    pknull["ARG"] = 12.0 ;
    pknull["ASP"] =  4.0 ;
    pknull["CTR"] =  3.8 ;
    pknull["CYS"] =  9.5 ;
    pknull["GLU"] =  4.4 ;
    pknull["HIS"] =  6.3 ;
    pknull["LYS"] = 10.4 ;
    pknull["NTR"] =  7.5 ;
    pknull["TYR"] =  9.6 ;
  }
  else    # Define Pace pKa set:
  {
    pknull["ARG"] = 12.0 ; # Not measured; using value from Tanford set.
    pknull["ASP"] =  3.9 ;
    pknull["CTR"] =  3.7 ;
    pknull["CYS"] =  8.6 ;
    pknull["GLU"] =  4.3 ;
    pknull["HIS"] =  6.5 ;
    pknull["LYS"] = 10.4 ;
    pknull["NTR"] =  8.0 ;
    pknull["TYR"] =  9.8 ;
  }

  # Read .sites file:
  while (getline < sites_file)
  {
    if ($2 ~ /^NT/) m = "NTR" ;
    else if ($2 ~ /^CT/) m = "CTR" ;
    else m = substr($2, 1, 3) ;
    if (pknull[m] == "")
    {
      warning("Unknown site "$2" will be ignored!") ;
      continue ;
    }
    ns++ ;
    pk[ns] = pknull[m] ;
    if ($2 ~ /^(ASP|GLU|CT|CYS|TYR)/) nA++ ;
  }
  close(sites_file) ;

  # The upper limit pHmax + dpH should include pHmax in the range:
  for (pH = pHmin ; pH <= pHmax + dpH ; pH += dpH)
  {
    nH = 0 ;
    for (s = 1 ; s <= ns ; s++) nH += 1 / (1 + 10^(pH-pk[s])) ;
    printf "%.6f %.6f %.6f\n", pH, nH, nH - nA ;
  }
}

function filecheck(file)
{
  if (system("test -f "file))
    error("File "file" does not exist.") ;
  if (system("test -r "file))
    error("File "file" exists but is not readable.") ;
}

function warning(msg)
{
  print cmd ": Warning: " msg | "cat 1>&2" ;
}

function error(msg)
{
  print cmd ": Error: " msg | "cat 1>&2" ;
  exit 1 ;
}

