#!/bin/bash -e

# This is a quick-and-dirty fix to rename some atoms of the
# C-terminus, which should work also for multi-chain proteins. This
# should be unnecessary if, as stated in the gmx manual, the .tdb
# format allowed for name replacement, which it doesn't in
# practice... This fix assumes that an atom named "O1" indicates a
# C-terminal residue, which is true for plain proteins but not
# necessarily for other systems (that may contain other atoms
# named"O1").
#
# ATTENTION: This is specific to the G54A7 force field.


[[ $# -ne 2 || ${1##*.} != top || ${2##*.} != gro ]] && \
    echo -e "Error: Usage: fixCTER some.top some.gro\nSpecific for G54A7!" && \
    exit 1

awk '
BEGIN{
  top=ARGV[1];gro=ARGV[2];
#  n["C"]="CT"; n["O1"]="OT1";n["O"]="OT2";
  n["O"]="O2";
#  n["HO11"]="HT11";n["HO12"]="HT12";n["HO21"]="HT21";n["HO22"]="HT22";
}
NR==FNR && $0~/\[ atoms \]/, $1=="" { if ($5=="O1") ct[$3]=1 }
END{
  while(getline<top)
  {
    if($0~/\[ atoms \]/) a=1 ;
    if(a && ct[$3] && n[$5]!="")
      s=sprintf("%s %4s %s",substr($0,1,33),n[$5],substr($0,40)) ;
    else s=$0 ;
    print s > "_aux.top" ;
    if($1=="") a=0 ;
  }
  while(getline<gro)
  {
    if($1~/^[[:digit:]]+$/) a=1 ;
    split(substr($0,11,5),b) ;
    atname=b[1] ; 
    if(a && ct[(substr($0,1,5)+0)] && n[atname]!="")
      s=sprintf("%s%5s%s",substr($0,1,10),n[atname],substr($0,16)) ;
    else s=$0 ;
    print s > "_aux.gro" ;
    if($1=="") a=0 ;
  }
  for(r in ct)
    if(ct[r]==1) print "fixCTER: atom name(s) fixed in residue "r | "cat 1>&2" ;
}
' $1 $2
cp _aux.top $1
cp _aux.gro $2
rm _aux.top _aux.gro

exit 0

############################################################################
