/*
FILE:     CovalentLinkage.C
*/
/*
VERSION:  11.308
*/
/*
DATE:     10/16/2025
*/
/*
  Comments and Questions to: sw-help@rcsb.rutgers.edu
*/
/*
COPYRIGHT 1999-2025 Rutgers - The State University of New Jersey

This software is provided WITHOUT WARRANTY OF MERCHANTABILITY OR
FITNESS FOR A PARTICULAR PURPOSE OR ANY OTHER WARRANTY, EXPRESS OR
IMPLIED.  RUTGERS MAKE NO REPRESENTATION OR WARRANTY THAT THE
SOFTWARE WILL NOT INFRINGE ANY PATENT, COPYRIGHT OR OTHER
PROPRIETARY RIGHT.

The user of this software shall indemnify, hold harmless and defend
Rutgers, its governors, trustees, officers, employees, students,
agents and the authors against any and all claims, suits,
losses, liabilities, damages, costs, fees, and expenses including
reasonable attorneys' fees resulting from or arising out of the
use of this software.  This indemnification shall include, but is
not limited to, any and all claims alleging products liability.
*/
/*
               RCSB PDB SOFTWARE LICENSE AGREEMENT

BY CLICKING THE ACCEPTANCE BUTTON OR INSTALLING OR USING 
THIS "SOFTWARE, THE INDIVIDUAL OR ENTITY LICENSING THE  
SOFTWARE ("LICENSEE") IS CONSENTING TO BE BOUND BY AND IS 
BECOMING A PARTY TO THIS AGREEMENT.  IF LICENSEE DOES NOT 
AGREE TO ALL OF THE TERMS OF THIS AGREEMENT
THE LICENSEE MUST NOT INSTALL OR USE THE SOFTWARE.

1. LICENSE AGREEMENT

This is a license between you ("Licensee") and the Protein Data Bank (PDB) 
at Rutgers, The State University of New Jersey (hereafter referred to 
as "RUTGERS").   The software is owned by RUTGERS and protected by 
copyright laws, and some elements are protected by laws governing 
trademarks, trade dress and trade secrets, and may be protected by 
patent laws. 

2. LICENSE GRANT

RUTGERS grants you, and you hereby accept, non-exclusive, royalty-free 
perpetual license to install, use, modify, prepare derivative works, 
incorporate into other computer software, and distribute in binary 
and source code format, or any derivative work thereof, together with 
any associated media, printed materials, and on-line or electronic 
documentation (if any) provided by RUTGERS (collectively, the "SOFTWARE"), 
subject to the following terms and conditions: (i) any distribution 
of the SOFTWARE shall bind the receiver to the terms and conditions 
of this Agreement; (ii) any distribution of the SOFTWARE in modified 
form shall clearly state that the SOFTWARE has been modified from 
the version originally obtained from RUTGERS.  

2. COPYRIGHT; RETENTION OF RIGHTS.  

The above license grant is conditioned on the following: (i) you must 
reproduce all copyright notices and other proprietary notices on any 
copies of the SOFTWARE and you must not remove such notices; (ii) in 
the event you compile the SOFTWARE, you will include the copyright 
notice with the binary in such a manner as to allow it to be easily 
viewable; (iii) if you incorporate the SOFTWARE into other code, you 
must provide notice that the code contains the SOFTWARE and include 
a copy of the copyright notices and other proprietary notices.  All 
copies of the SOFTWARE shall be subject to the terms of this Agreement.  

3. NO MAINTENANCE OR SUPPORT; TREATMENT OF ENHANCEMENTS 

RUTGERS is under no obligation whatsoever to: (i) provide maintenance 
or support for the SOFTWARE; or (ii) to notify you of bug fixes, patches, 
or upgrades to the features, functionality or performance of the 
SOFTWARE ("Enhancements") (if any), whether developed by RUTGERS 
or third parties.  If, in its sole discretion, RUTGERS makes an 
Enhancement available to you and RUTGERS does not separately enter 
into a written license agreement with you relating to such bug fix, 
patch or upgrade, then it shall be deemed incorporated into the SOFTWARE 
and subject to this Agreement. You are under no obligation whatsoever 
to provide any Enhancements to RUTGERS or the public that you may 
develop over time; however, if you choose to provide your Enhancements 
to RUTGERS, or if you choose to otherwise publish or distribute your 
Enhancements, in source code form without contemporaneously requiring 
end users or RUTGERS to enter into a separate written license agreement 
for such Enhancements, then you hereby grant RUTGERS a non-exclusive,
royalty-free perpetual license to install, use, modify, prepare
derivative works, incorporate into the SOFTWARE or other computer
software, distribute, and sublicense your Enhancements or derivative
works thereof, in binary and source code form.

4. FEES.  There is no license fee for the SOFTWARE.  If Licensee
wishes to receive the SOFTWARE on media, there may be a small charge
for the media and for shipping and handling.  Licensee is
responsible for any and all taxes.

5. TERMINATION.  Without prejudice to any other rights, Licensor
may terminate this Agreement if Licensee breaches any of its terms
and conditions.  Upon termination, Licensee shall destroy all
copies of the SOFTWARE.

6. PROPRIETARY RIGHTS.  Title, ownership rights, and intellectual
property rights in the Product shall remain with RUTGERS.  Licensee 
acknowledges such ownership and intellectual property rights and will 
not take any action to jeopardize, limit or interfere in any manner 
with RUTGERS' ownership of or rights with respect to the SOFTWARE.  
The SOFTWARE is protected by copyright and other intellectual 
property laws and by international treaties.  Title and related 
rights in the content accessed through the SOFTWARE is the property 
of the applicable content owner and is protected by applicable law.  
The license granted under this Agreement gives Licensee no rights to such
content.

7. DISCLAIMER OF WARRANTY.  THE SOFTWARE IS PROVIDED FREE OF 
CHARGE, AND, THEREFORE, ON AN "AS IS" BASIS, WITHOUT WARRANTY OF 
ANY KIND, INCLUDING WITHOUT LIMITATION THE WARRANTIES THAT IT 
IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE 
OR NON-INFRINGING.  THE ENTIRE RISK AS TO THE QUALITY AND 
PERFORMANCE OF THE SOFTWARE IS BORNE BY LICENSEE.  SHOULD THE 
SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, THE LICENSEE AND NOT 
LICENSOR ASSUMES THE ENTIRE COST OF ANY SERVICE AND REPAIR.  
THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF 
THIS AGREEMENT.  NO USE OF THE PRODUCT IS AUTHORIZED HEREUNDER 
EXCEPT UNDER THIS DISCLAIMER.

8. LIMITATION OF LIABILITY.  TO THE MAXIMUM EXTENT PERMITTED BY
APPLICABLE LAW,  IN NO EVENT WILL LICENSOR BE LIABLE FOR ANY 
INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 
OUT OF THE USE OF OR INABILITY TO USE THE SOFTWARE, INCLUDING, 
WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK 
STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL 
OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF ADVISED OF THE
POSSIBILITY THEREOF. 
*/
#include <stdio.h>
#include <stdlib.h>

#include "CompositeIndex.h"
#include "CovalentLinkage.h"
#include "CovalentLinkage_global.h"
#include "utillib.h"

CovalentLinkage::CovalentLinkage()
{
       clear();
}

CovalentLinkage::~CovalentLinkage()
{
       clear();
}

void CovalentLinkage::clear()
{
       _ccDic = NULL;
       _glyco_site_map.clear();
       _glyco_site_res_pair_set.clear();
       _glyco_link_set.clear();
       _iso_peptide_link_set.clear();
       _iso_peptide_atom_set.clear();
       _backbone_peptide_link_set.clear();
       _ccd_backbone_terminal_atom_mapping.clear();
       _empty_set.clear();
}

void CovalentLinkage::initialize()
{
       for (int i = 0; i < NUM_GLYCO_SITE; ++i) {
            _glyco_site_map.insert(std::make_pair(CompositeIndex::getIndex(__glyco_site_list[i][0], __glyco_site_list[i][1],
                                   __glyco_site_list[i][2], __glyco_site_list[i][3]), __glyco_site_list[i][4]));
            _glyco_site_map.insert(std::make_pair(CompositeIndex::getIndex(__glyco_site_list[i][2], __glyco_site_list[i][3],
                                   __glyco_site_list[i][0], __glyco_site_list[i][1]), __glyco_site_list[i][4]));
            _glyco_site_res_pair_set.insert(CompositeIndex::getIndex(__glyco_site_list[i][0], __glyco_site_list[i][2]));
            _glyco_site_res_pair_set.insert(CompositeIndex::getIndex(__glyco_site_list[i][2], __glyco_site_list[i][0]));
       }

       for (int i = 0; i < NUM_GLYCO_LINK; ++i) {
            _glyco_link_set.insert(CompositeIndex::getIndex(__glyco_link_list[i][0], __glyco_link_list[i][1],
                                                            __glyco_link_list[i][2], __glyco_link_list[i][3]));
            _glyco_link_set.insert(CompositeIndex::getIndex(__glyco_link_list[i][2], __glyco_link_list[i][3],
                                                            __glyco_link_list[i][0], __glyco_link_list[i][1]));
       }

       for (int i = 0; i < NUM_ISO_PEPTIDE_LINK; ++i) {
            _iso_peptide_link_set.insert(CompositeIndex::getIndex(__iso_peptide_link_list[i][0], __iso_peptide_link_list[i][1],
                                                                  __iso_peptide_link_list[i][2], __iso_peptide_link_list[i][3]));
            _iso_peptide_link_set.insert(CompositeIndex::getIndex(__iso_peptide_link_list[i][2], __iso_peptide_link_list[i][3],
                                                                  __iso_peptide_link_list[i][0], __iso_peptide_link_list[i][1]));
            _iso_peptide_atom_set.insert(CompositeIndex::getIndex(__iso_peptide_link_list[i][0], __iso_peptide_link_list[i][1]));
            _iso_peptide_atom_set.insert(CompositeIndex::getIndex(__iso_peptide_link_list[i][2], __iso_peptide_link_list[i][3]));
       }
}

void CovalentLinkage::setCCDic(ConnectDic* ccdic)
{
       _ccDic = ccdic;
}

bool CovalentLinkage::isGlycosylationSite(const std::string& idx)
{
       std::string type = findGlycosylationType(idx);
       if (!type.empty()) return true;
       return false;
}

bool CovalentLinkage::isGlycosylationSite(RCSB::Atom* atom1, RCSB::Atom* atom2)
{
       std::string type = findGlycosylationType(atom1, atom2);
       if (!type.empty()) return true;
       return false;
}

std::string CovalentLinkage::findGlycosylationType(const std::string& idx)
{
       std::map<std::string, std::string>::const_iterator mpos = _glyco_site_map.find(idx);
       if (mpos != _glyco_site_map.end()) return mpos->second;
       return "";
}

std::string CovalentLinkage::findGlycosylationType(RCSB::Atom* atom1, RCSB::Atom* atom2)
{
       return findGlycosylationType(CompositeIndex::getIndex(atom1->pdb_resnam(), atom1->atmtype(), atom2->pdb_resnam(), atom2->atmtype()));
}

bool CovalentLinkage::isGlycosylationSiteResiduePair(const std::string& idx)
{
       return (_glyco_site_res_pair_set.find(idx) != _glyco_site_res_pair_set.end());
}

bool CovalentLinkage::isGlycoLikeLinkage(RCSB::Atom* atom1, RCSB::Atom* atom2)
{
       std::string idx = CompositeIndex::getIndex(atom1->pdb_resnam(), atom1->atmtype(), atom2->pdb_resnam(), atom2->atmtype());
       return (_glyco_link_set.find(idx) != _glyco_link_set.end());
}

bool CovalentLinkage::isIsoPeptideLinkage(const std::string& resName1, const std::string& atomName1, const std::string& resName2, const std::string& atomName2)
{
       if (_iso_peptide_link_set.find(CompositeIndex::getIndex(resName1, atomName1, resName2, atomName2)) != _iso_peptide_link_set.end()) return true;

       if ((_iso_peptide_atom_set.find(CompositeIndex::getIndex(resName1, atomName1)) == _iso_peptide_atom_set.end()) &&
           (_iso_peptide_atom_set.find(CompositeIndex::getIndex(resName2, atomName2)) == _iso_peptide_atom_set.end())) return false;

       std::string standard_residue_name = "";
       std::string standard_atom_name = "";
       std::string other_residue_name = "";
       std::string other_atom_name = "";
       std::string expected_other_atom_type = "";
       if (_iso_peptide_atom_set.find(CompositeIndex::getIndex(resName1, atomName1)) != _iso_peptide_atom_set.end()) {
            standard_residue_name = resName1;
            standard_atom_name = atomName1;
            other_residue_name = resName2;
            other_atom_name = atomName2;
            if (atomName1.substr(0, 1) == "N")
                 expected_other_atom_type = "C";
            else if (atomName1.substr(0, 1) == "C")
                 expected_other_atom_type = "N";
       } else if (_iso_peptide_atom_set.find(CompositeIndex::getIndex(resName2, atomName2)) != _iso_peptide_atom_set.end()) {
            standard_residue_name = resName2;
            standard_atom_name = atomName2;
            other_residue_name = resName1;
            other_atom_name = atomName1;
            if (atomName2.substr(0, 1)  == "N")
                 expected_other_atom_type = "C";
            else if (atomName2.substr(0, 1) == "C")
                 expected_other_atom_type = "N";
       }

       if (standard_residue_name.empty() || standard_atom_name.empty() || other_residue_name.empty() ||
           other_atom_name.empty() || expected_other_atom_type.empty()) return false;

       try {
            const ConnectFormat& drug = _ccDic->find_drug(other_residue_name);

            _insert_B_C_N_atom_set(other_residue_name, drug);

            bool is_linked_to_a_terminal_atom = false;
            const AtomFormat& atom = drug.find_atom(other_atom_name);
            if (atom.atomtype() == expected_other_atom_type) {
                 const std::set<std::string>& terminalAtomSet = _get_B_C_N_atom_set(other_residue_name, expected_other_atom_type);
                 if (terminalAtomSet.find(other_atom_name) != terminalAtomSet.end()) is_linked_to_a_terminal_atom = true;
            }
            if (is_linked_to_a_terminal_atom) {
                 _iso_peptide_link_set.insert(CompositeIndex::getIndex(standard_residue_name, standard_atom_name, other_residue_name, other_atom_name));
                 _iso_peptide_link_set.insert(CompositeIndex::getIndex(other_residue_name, other_atom_name, standard_residue_name, standard_atom_name));
                 return true;
            }
       } catch (const std::exception& exc) {}

       return false;
}

bool CovalentLinkage::isIsoPeptideLinkage(RCSB::Atom* atom1, RCSB::Atom* atom2)
{
       return isIsoPeptideLinkage(atom1->pdb_resnam(), atom1->atmtype(), atom2->pdb_resnam(), atom2->atmtype());
}

bool CovalentLinkage::isBackbonePeptideLinkage(const std::string& resName1, const std::string& atomName1,
                                               const std::string& resName2, const std::string& atomName2)
{
       if (_backbone_peptide_link_set.find(CompositeIndex::getIndex(resName1, atomName1, resName2, atomName2)) != _backbone_peptide_link_set.end()) return true;

       const std::set<std::string>& cterminalAtomSet1 = _get_B_C_N_atom_set(resName1, "C");
       const std::set<std::string>& nterminalAtomSet1 = _get_B_C_N_atom_set(resName1, "N");
       const std::set<std::string>& cterminalAtomSet2 = _get_B_C_N_atom_set(resName2, "C");
       const std::set<std::string>& nterminalAtomSet2 = _get_B_C_N_atom_set(resName2, "N");

       if (((cterminalAtomSet1.find(atomName1) != cterminalAtomSet1.end()) && (nterminalAtomSet2.find(atomName2) != nterminalAtomSet2.end())) ||
           ((nterminalAtomSet1.find(atomName1) != nterminalAtomSet1.end()) && (cterminalAtomSet2.find(atomName2) != cterminalAtomSet2.end()))) {
            _backbone_peptide_link_set.insert(CompositeIndex::getIndex(resName1, atomName1, resName2, atomName2));
            _backbone_peptide_link_set.insert(CompositeIndex::getIndex(resName2, atomName2, resName1, atomName1));
            return true; 
       }

       return false;
}

bool CovalentLinkage::isBackbonePeptideLinkage(RCSB::Atom* atom1, RCSB::Atom* atom2)
{
       return isBackbonePeptideLinkage(atom1->pdb_resnam(), atom1->atmtype(), atom2->pdb_resnam(), atom2->atmtype());
}

void CovalentLinkage::_insert_B_C_N_atom_set(const std::string& residue_name, const ConnectFormat& drug)
{
       if (_ccd_backbone_terminal_atom_mapping.find(residue_name) != _ccd_backbone_terminal_atom_mapping.end()) return;

       std::map<std::string, std::set<std::string> > tmp_set_map;
       tmp_set_map.clear();

       const std::set<std::string>& backbone_atom_set = drug.getBackboneAtoms();
       if (!backbone_atom_set.empty()) tmp_set_map.insert(std::make_pair("B", backbone_atom_set));

       const std::set<std::string>& n_terminal_atom_set = drug.getNTerminalAtoms();
       if (!n_terminal_atom_set.empty()) tmp_set_map.insert(std::make_pair("N", n_terminal_atom_set));

       const std::set<std::string>& c_terminal_atom_set = drug.getCTerminalAtoms();
       if (!c_terminal_atom_set.empty()) tmp_set_map.insert(std::make_pair("C", c_terminal_atom_set));

       if (!tmp_set_map.empty()) _ccd_backbone_terminal_atom_mapping.insert(std::make_pair(residue_name, tmp_set_map));
}

const std::set<std::string>& CovalentLinkage::_get_B_C_N_atom_set(const std::string& residue_name, const std::string& type)
{
       std::map<std::string, std::map<std::string, std::set<std::string> > >::const_iterator mpos = _ccd_backbone_terminal_atom_mapping.find(residue_name);
       if (mpos == _ccd_backbone_terminal_atom_mapping.end()) {
            try {
                 const ConnectFormat& drug = _ccDic->find_drug(residue_name);
                 _insert_B_C_N_atom_set(residue_name, drug);
            } catch (const std::exception& exc) {}

            mpos = _ccd_backbone_terminal_atom_mapping.find(residue_name);
       }

       if (mpos == _ccd_backbone_terminal_atom_mapping.end()) return _empty_set;

       std::map<std::string, std::set<std::string> >::const_iterator mpos1 = mpos->second.find(type);
       if (mpos1 != mpos->second.end()) return mpos1->second;

       return _empty_set;
}
