#!/usr/bin/gawk -f
###########################################################################
# This file is part of meadTools, version 2.2.
# 
# Copyright (c) 2001-2019, Instituto de Tecnologia Quimica e Biologica,
# Universidade Nova de Lisboa, Portugal.
# 
# meadTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 2 of the License, or (at your
# option) any later version.
# 
# meadTools is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with meadTools.  If not, see <http://www.gnu.org/licenses/>.
# 
# For further details and info check the README file.
# 
# You can get meadTools at www.itqb.unl.pt/simulation
###########################################################################


############################################################################
# convert: a program to convert meadT output into PETIT input.
#
# This program converts the .pkcrg and .g files created by meadT into
# the single input file required by PETIT.
############################################################################


BEGIN{

  kBoltz_au  = 5.98435e-6 ;  #  e^2/(Angstrom*K)
  kBoltz_meV = 0.0861734 ;   #  meV/K
  LN10 = log(10) ;

  cmd = "convert" ;
  usage = "Usage: " cmd " PKCRG_FILE G_FILE TEMPERATURE" ;
  if (ARGC != 4) error("Wrong number of arguments.\n" usage) ;

  filecheck(pkcrg_file = ARGV[1]) ;
  filecheck(inter_file = ARGV[2]) ;
  T = ARGV[3] ;

  read_pkcrg() ;
  read_inter() ;
  make_initializations() ;
  make_new_matrix() ;
  warning("Make sure the redox sites are properly marked in the .pkcrg file!") ;
  exit 0 ;
}

function read_pkcrg()
{
  npseudos = 0 ;
  while (getline < pkcrg_file)
  {
    if (NF >= 3) npseudos++ ;
  }
  close(pkcrg_file) ;

  for (i = 0 ; i < npseudos ; i++)
  {
    getline < pkcrg_file ;
    if (NF < 3)
      error("Not enough fields in file " pkcrg_file) ;
    pkcrg[i] = $1 ;

    # Of the following fields only the ones referring to the first
    # tautomer are relevant for the calculations.
    if ($2 == "A") z0[i] = -1 ;  # anionic
    else if ($2 == "C") z0[i] = 0 ;  # cationic
    else error("Pseudo-site "$3" has wrong charge type") ;

    site_name[i] = $3 ;

    if (NF >= 4) site_type[i] = $4 ;
    else site_type[i] = "P" ;
    if (site_type[i] != "P" && site_type[i] != "R")
      error("Pseudo-site "$3" has wrong ligand type.") ;

    if (NF >= 5) stit = $5 ;
    else if (pkcrg[i] < -70) stit = "n" ;
    else stit = "*" ;
    if (stit == "*") titration[i] = 3 ;       # titrable through single and double flips
    else if (stit == "d") titration[i] = 2 ;  # titrable through double flips only
    else if (stit == "s") titration[i] = 1 ;  # titrable through single flips only
    else if (stit == "n") titration[i] = 4 ;  # neutral non-titrable but tautomerizable
    else if (stit == "f")  # fixed state (only needed in the first pseudo-site) */
    {
      titration[i] = 0 ;
      state[i] = $6 ;
    }
    else error("Pseudo-site "$3" has wrong titration.") ;
  }
  close(pkcrg_file) ;
}


function read_inter()
{
  for (i = 0 ; i < npseudos ; i++)
  for (j = 0 ; j < npseudos ; j++)
  {
    getline < inter_file ;
    if (NF != 3) error("Wrong number of fields in file "inter_file) ;
    w[i,j] = $3 ;
  }
  close(inter_file) ;
  # ATTENTION! The following assumes that pseudo-sites of the same site
  # are contiguous in the .g file.
  k = 0 ;
  for (i = 0 ; i < npseudos ; i += t)
  {
    t = 1 ;
    for (j = i + 1 ; j < npseudos ; j++)
    {
      if (w[i,j] > 0.5) t++ ;
      else break ;
    }
    ntauts[k++] = t ;
  }
  nsites = k ;
}


function make_initializations()
{
  j = 0 ;
  for (i = 0 ; i < nsites ; i++)
    for (t = 0 ; t < ntauts[i] ; t++) taut[i,t] = j++ ;

  # The same-variable assignments in the following loop are ok
  # because we have always i <= taut[i,0], and therefore data needed
  # to the next assignments is never overwritten.
  for (i = 0 ; i < nsites ; i++)
  {
    # Tautomer-part of name must be removed. The procedure used here
    # may not be the most general one.

    delete aux ;
    split(site_name[taut[i,0]], aux, "-") ;
    # gsub(/[a-z0-9]/, "", aux[1]) ;
    # Changed to use POSIX character classes:
    # gsub(/[[:lower:][:digit:]]/, "", aux[1]) ;
    sub(/(all|avx|tau[[:digit:]]+)/, "", aux[1]) ;
    site_name[i] = aux[1]"-"aux[2] ;

    site_type[i] = site_type[taut[i,0]] ;
    z0[i] = z0[taut[i,0]] ;
    titration[i] = titration[taut[i,0]] ;
    if (titration[i] == 4) nstates[i] = ntauts[i] ;
    else nstates[i] = ntauts[i] + 1 ;
    if (titration[i] == 0) state[i] = state[taut[i,0]] ;
    # ie, only the first tautomer must be fixed in the .pkcrg file.
    else state[i] = 0 ;
  }
}


function make_new_matrix()
{
  tit[0] = "f" ;
  tit[1] = "s" ;
  tit[2] = "d" ;
  tit[3] = "*" ;
  tit[4] = "n" ;

  printf "%d\n", nsites ;

  # Compute occupancies of site states
  for (i = 0 ; i < nsites ; i++)
  {
    for (si = 0 ; si < nstates[i] ; si++)
    {
      if (si == ntauts[i]) stateocc[i,si] = z0[i] + 1 ;
      else stateocc[i,si] = -z0[i] ;
    }
  }

  # Compute individual terms
  for (i = 0 ; i < nsites ; i++)
  {
    printf "%-10s %2d  %c  %c\n",  \
	   site_name[i], nstates[i], site_type[i], tit[titration[i]] ;
    for (si = 0 ; si < nstates[i] ; si++)
    {
      if (si == ntauts[i]) g = 0 ;
      else g = (2 * z0[i] + 1) * LN10 * (kBoltz_au * T) * pkcrg[taut[i,si]] ;
      printf "%1d %13.6e \n", stateocc[i,si], g ;
    }
  }

  # Compute interaction matrix
  for (i = 0 ; i < nsites - 1 ; i++)
  for (j = i + 1 ; j < nsites ; j++)
  {
    for (si = 0 ; si < nstates[i] ; si++)
    for (sj = 0 ; sj < nstates[j] ; sj++)
    {
      if (si == ntauts[i] || sj == ntauts[j])
        gg = 0.0 ;
      else
        gg = (2 * z0[i] + 1) * (2 * z0[j] + 1) * w[taut[i,si],taut[j,sj]] ;
      printf "%3d %2d   %3d %2d   %13.6e\n", i, si, j, sj, gg ;
    }
  }

}


function filecheck(file)
{
  if (system("test -f "file))
    error("File "file" does not exist.") ;
  if (system("test -r "file))
    error("File "file" exists but is not readable.") ;
}

function warning(msg)
{
  print cmd ": Warning: " msg | "cat 1>&2" ;
  close ("cat 1>&2") ;
}

function error(msg)
{
  print cmd ": Error: " msg | "cat 1>&2" ;
  close ("cat 1>&2") ;
  exit 1 ;
}

