Source code for molecule

# Copyright (c) 2014-2017 Matteo Degiacomi
#
# BiobOx is free software ;
# you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation ;
# either version 2 of the License, or (at your option) any later version.
# BiobOx is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY ;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with BiobOx ;
# if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
# Author : Matteo Degiacomi, matteothomas.degiacomi@gmail.com

import os
from copy import deepcopy
import numpy as np
import scipy.signal
import pandas as pd

# Definiton of constants for later calculations
epsilon0 = 8.8542 * 10**(-12) # m**-3 kg**-1 s**4 A**2, Permitivitty of free space
kB = 1.3806 * 10**(-23) # m**2 kg s**-2 K-1, Lattice Boltzmann constant
e = 1.602 * 10**(-19) # A s, electronic charge
m = 1 * 10**(-9) # number of nm in 1 m
c = 3.336 * 10**(-30) # conversion from debye to e m
Na = 6.022 * 10**(23) # Avagadros Number

from biobox.classes.structure import Structure
from biobox.lib import e_density

[docs]class Molecule(Structure):
    '''
    Subclass of :func:`Structure <structure.Structure>`, allows reading, manipulating and analyzing molecular structures.
    '''

    chain_names = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
                   'U', 'V', 'W', 'X', 'Y', 'Z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'a', 'b', 'c', 'd',
                   'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
                   'y', 'z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0')

    def __init__(self):
        '''
        At instantiation, properties associated to every individual atoms are stored in a pandas Dataframe self.data.
        The columns of the self.data have the following names:
        atom, index, name, resname, chain, resid, beta, occupancy, atomtype, radius, charge.

        self.knowledge contains a knowledge base about atoms and residues properties. Default values are:

        * 'residue_mass' property stores the average mass for most common aminoacids (values from Expasy website)
        * 'atom_vdw' vdw radius of common atoms
        * 'atom_mass' mass of common atoms

        The knowledge base can be edited. For instance, to add information about residue "TST" mass in molecule M type: M.knowledge['mass_residue']["TST"]=142.42
        '''

        super(Molecule, self).__init__(r=np.array([]))

        # knowledge base about atoms and residues properties (entry keys:
        # 'residue_mass', 'atom_vdw', 'atom_, mass' can be edited)
        self.knowledge = {}
        self.knowledge['residue_mass'] = {"ALA": 71.0788, "ARG": 156.1875, "ASN": 114.1038, "ASP": 115.0886, "CYS": 103.1388, "CYX": 103.1388, "GLU": 129.1155, "GLN": 128.1307, "GLY": 57.0519,
                                          "HIS": 137.1411, "HSE": 137.1411, "HSD": 137.1411, "HSP": 137.1411, "HIE": 137.1411, "HID": 137.1411, "HIP": 137.1411, "ILE": 113.1594, "LEU": 113.1594,
                                          "LYS": 128.1741, "MET": 131.1926, "MSE": 131.1926, "PHE": 147.1766, "PRO": 97.1167, "SER": 87.0782, "THR": 101.1051, "TRP": 186.2132, "TYR": 163.1760, "VAL": 99.1326}
        self.knowledge['atom_vdw'] = {'H': 1.20, 'N': 1.55, 'NA': 2.27, 'CU': 1.40, 'CL': 1.75, 'C': 1.70, 'O': 1.52, 'I': 1.98, 'P': 1.80, 'B': 1.85, 'BR': 1.85, 'S': 1.80, 'SE': 1.90,
                                      'F': 1.47, 'FE': 1.80, 'K': 2.75, 'MN': 1.73, 'MG': 1.73, 'ZN': 1.39, 'HG': 1.8, 'XE': 1.8, 'AU': 1.8, 'LI': 1.8, '.': 1.8}
        self.knowledge['atom_ccs'] = {'H': 1.2, 'C': 1.91, 'N': 1.91, 'O': 1.91, 'P': 1.91, 'S': 1.91, '.': 1.91}
        self.knowledge['atom_mass'] = {"H": 1.00794, "D": 2.01410178, "HE": 4.00, "LI": 6.941, "BE": 9.01, "B": 10.811, "C": 12.0107, "N": 14.0067, "O": 15.9994, "F": 18.998403, "NE": 20.18, "NA": 22.989769,
                                       "MG": 24.305, "AL": 26.98, "SI": 28.09, "P": 30.973762, "S": 32.065, "CL": 35.453, "AR": 39.95, "K": 39.0983, "CA": 40.078, "SC": 44.96, "TI": 47.87, "V": 50.94,
                                       "CR": 51.9961, "MN": 54.938045, "FE": 55.845, "CO": 58.93, "NI": 58.6934, "CU": 63.546, "ZN": 65.409, "GA": 69.72, "GE": 72.64, "AS": 74.9216, "SE": 78.96,
                                       "BR": 79.90, "KR": 83.80, "RB": 85.47, "SR": 87.62, "Y": 88.91, "ZR": 91.22, "NB": 92.91, "MO": 95.94, "TC": 98.0, "RU": 101.07, "RH": 102.91, "PD": 106.42,
                                       "AG": 107.8682, "CD": 112.411, "IN": 114.82, "SN": 118.71, "SB": 121.76, "TE": 127.60, "I": 126.90447, "XE": 131.29, "CS": 132.91, "BA": 137.33, "PR": 140.91,
                                       "EU": 151.96, "GD": 157.25, "TB": 158.93, "W": 183.84, "IR": 192.22, "PT": 195.084, "AU": 196.96657, "HG": 200.59, "PB": 207.2, "U": 238.03}
        self.knowledge['atomtype'] = {"C": "C", "CA": "C", "CB": "C", "CG": "C", "CG1": "C", "CG2": "C", "CZ": "C", "CD1": "C", "CD2": "C",
                                      "CD": "C", "CE": "C", "CE1": "C", "CE2": "C", "CE3": "C", "CZ2": "C", "CZ3": "C", "CH2": "C",
                                      "N": "N", "NH1": "N", "NH2": "N", "NZ": "N", "NE": "N", "NE1": "N", "NE2": "N", "ND1": "N", "ND2": "N",
                                      "O": "O", "OG": "O", "OG1": "O", "OG2": "O", "OD1": "O", "OD2": "O", "OE1": "O", "OE2": "O", "OH": "O", "OXT": "O",
                                      "SD": "S", "SG": "S", "H": "H", "HA": "H", "HB1": "H", "HB2": "H", "HE1": "H", "HE2": "H", "HD1": "H", "HD2": "H",
                                      "H1": "H", "H2": "H", "H3": "H", "HH11": "H", "HH12": "H", "HH21": "H", "HH22": "H", "HG1": "H", "HG2": "H", "HE21": "H",
                                      "HE22": "H", "HD11": "H", "HD12": "H", "HD13": "H", "HD21": "H", "HD22": "H", "HG11": "H", "HG12": "H", "HG13": "H",
                                      "HG21": "H", "HG22": "H", "HG23": "H", "HZ2": "H", "HZ3": "H", "HZ": "H", "HA1": "H", "HA2": "H", "HB": "H", "HD3": "H",
                                      "HG": "H", "HZ1": "H", "HE3": "H", "HB3": "H", "HH1": "H", "HH2": "H", "HD23": "H", "HD13": "H", "HE": "H", "HH": "H",
                                      "OC1": "O", "OC2": "O", "OW": "O", "HW1": "H", "HW2": "H", "CH3" : "C", "HH31" : "H", "HH32" : "H", "HH33" : "H",
                                      "C00" : "C", "C01" : "C", "C02" : "C", "C04" : "C", "C06" : "C", "C08" : "C", "H03" : "H", "H05" : "H", "H07" : "H",
                                      "H09" : "H", "H0A" : "H", "H0B" : "H", }

    def __add__(self, other):
        from biobox.classes.multimer import Multimer
        M = Multimer()
        M.load_list([self, other], ["A", "B"])
        M2 = M.make_molecule()
        return M2


[docs]    def know(self, prop):
        '''
        return information from knowledge base

        :param prop: desired property to extract from knowledge base
        :returns: value associated to requested property, or nan if failed
        '''
        if str(prop) in self.knowledge:
            return self.knowledge[str(prop)]
        else:
            raise Exception("entry %s not found in knowledge base!" % prop)

[docs]    def import_pdb(self, pdb, include_hetatm=False):
        '''
        read a pdb (possibly containing containing multiple models).

        Models are split according to ENDMDL and END statement.
        All alternative coordinates are expected to have the same atoms.
        After loading, the first model (M.current_model=0) will be set as active.

        :param pdb: PDB filename
        :param include_hetatm: if True, HETATM will be included (they get skipped if False)
        '''

        try:
            f_in = open(pdb, "r")
        except Exception as ex:
            raise Exception('ERROR: file %s not found!' % pdb)

        # store filename
        self.properties["filename"] = pdb

        data_in = []
        p = []
        r = []
        e = []
        alternative = []
        biomt = []
        symm = []
        for line in f_in:
            record = line[0:6].strip()

            # load biomatrix, if any is present
            if "REMARK 350   BIOMT" in line:
                try:
                    biomt.append(line.split()[4:8])
                except Exception as ex:
                    raise Exception("ERROR: biomatrix format seems corrupted")

            # load symmetry matrix, if any is present
            if "REMARK 290   SMTRY" in line:
                try:
                    symm.append(line.split()[4:8])
                except Exception as ex:
                    raise Exception("ERROR: symmetry matrix format seems corrupted")

            # if a complete model was parsed store all the saved data into
            # self.data entries (if needed) and temporary alternative
            # coordinates list
            if record == "ENDMDL" or record == "END":

                if len(alternative) == 0:

                    # load all the parsed data in superclass data (Dataframe)
                    # and points data structures
                    try:
                        #building dataframe
                        data = np.array(data_in).astype(str)
                        cols = ["atom", "index", "name", "resname", "chain", "resid", "beta", "occupancy", "atomtype"]
                        idx = np.arange(len(data))
                        self.data = pd.DataFrame(data, index=idx, columns=cols)
                        # Set the index numbers to the idx values to avoid hexadecimal counts
                        self.data["index"] = idx

                    except Exception as ex:
                        raise Exception('ERROR: something went wrong when loading the structure %s!\nERROR: are all the columns separated?' %pdb)

                    # saving vdw radii
                    try:
                        self.data['radius'] = np.array(r)
                    except Exception as ex:
                        raise Exception('ERROR: something went wrong when loading the structure %s!\nERROR: are all the columns separated?' %pdb)

                    # save default charge state
                    self.data['charge'] = np.array(e)

                # save 3D coordinates of every atom and restart the accumulator
                try:
                    if len(p) > 0:
                        alternative.append(np.array(p))
                    p = []
                except Exception as ex:
                    raise Exception('ERROR: something went wrong when loading the structure %s!\nERROR: are all the columns separated?' % pdb)

            if record == 'ATOM' or (include_hetatm and record == 'HETATM'):

                # extract xyz coordinates (save in list of point coordinates)
                p.append([float(line[30:38]), float(line[38:46]), float(line[46:54])])

                # if no complete model has been yet parsed, load also
                # information about atoms(resid, resname, ...)
                if len(alternative) == 0:
                    w = []
                    # extract ATOM/HETATM statement
                    w.append(line[0:6].strip())
                    w.append(line[6:12].strip())  # extract atom index
                    w.append(line[12:17].strip())  # extract atomname
                    w.append(line[17:21].strip())  # extract resname
                    w.append(line[21].strip())  # extract chain name
                    w.append(line[22:26].strip())  # extract residue id

                    # extract occupancy
                    try:
                        w.append(float(line[54:60]))
                    except Exception as ex:
                        w.append(1.0)

                    # extract beta factor
                    try:
                        # w.append("{0.2f}".format(float(line[60:66])))
                        w.append(float(line[60:66]))
                    except Exception as ex:
                        w.append(0.0)

                    # extract atomtype
                    try:
                        w.append(line[76:78].strip())
                    except Exception as ex:
                        w.append("")

                    # use atomtype to extract vdw radius
                    try:
                        r.append(self.know('atom_vdw')[line[76:78].strip()])
                    except Exception as ex:
                        r.append(self.know('atom_vdw')['.'])

                    # assign default charge state of 0
                    e.append(0.0)

                    data_in.append(w)

        f_in.close()

        # if p list is not empty, that means that the PDB file does not finish with an END statement (like the ones generated by SBT, for instance).
        # In this case, dump all the remaining stuff into alternate coordinates
        # array and (if needed) into properties dictionary.
        if len(p) > 0:

            # if no model has been yet loaded, save also information in
            # properties dictionary.
            if len(alternative) == 0:

                # load all the parsed data in superclass properties['data'] and
                # points data structures
                try:
                    #building dataframe
                    data = np.array(data_in).astype(str)
                    cols = ["atom", "index", "name", "resname", "chain", "resid", "beta", "occupancy", "atomtype"]
                    idx = np.arange(len(data))
                    self.data = pd.DataFrame(data, index=idx, columns=cols)
                    # Set the index numbers to the idx values to avoid hexadecimal counts
                    self.data["index"] = idx

                except Exception as ex:
                    raise Exception('ERROR: something went wrong when saving data in %s!\nERROR: are all the columns separated?' %pdb)

                try:
                    self.data['radius'] = np.array(r)
                except Exception as ex:
                    raise Exception('ERROR: something went wrong when saving van der Waals radii in %s!\nERROR: are all the columns separated?' % pdb)

                # save default charge state
                self.properties['charge'] = np.array(e)

            # save 3D coordinates of every atom and restart the accumulator
            try:
                if len(p) > 0:
                    alternative.append(np.array(p))
                p = []
            except Exception as ex:
                raise Exception('ERROR: something went wrong when saving coordinates in %s!\nERROR: are all the columns separated?' %pdb)

        # transform the alternative temporary list into a nice multiple
        # coordinates array
        if len(alternative) > 0:
            try:
                alternative_xyz = np.array(alternative).astype(float)
            except Exception as e:
                alternative_xyz = np.array([alternative[0]]).astype(float)
                print('WARNING: found %s models, but their atom count differs' % len(alternative))
                print('WARNING: treating only the first model in file %s' % pdb)
                #raise Exception('ERROR: models appear not to have the same amount of atoms')

            self.add_xyz(alternative_xyz)
        else:
            raise Exception('ERROR: something went wrong when saving alternative coordinates in %s!\nERROR: no model was loaded... are ENDMDL statements there?' % pdb)

        # if biomatrix information is provided, creat
        if len(biomt) > 0:

            # test whether there are enough lines to create biomatrix
            # statements
            if np.mod(len(biomt), 3):
                raise Exception('ERROR: found %s BIOMT entries. A multiple of 3 is expected'%len(biomt))

            b = np.array(biomt).astype(float).reshape((len(biomt) // 3, 3, 4))
            self.properties["biomatrix"] = b

        # if symmetry information is provided, create entry in properties
        if len(symm) > 0:

            # test whether there are enough lines to create biomatrix
            # statements
            if np.mod(len(symm), 3):
                raise Exception('ERROR: found %s SMTRY entries. A multiple of 3 is expected'%len(symm))

            b = np.array(symm).astype(float).reshape((len(symm) // 3, 3, 4))
            self.properties["symmetry"] = b

        #correctly set types of columns requiring other than string
        self.data["resid"] = self.data["resid"].astype(int)
        self.data["index"] = self.data["index"].astype(int)
        self.data["occupancy"] = self.data["occupancy"].astype(float)
        self.data["beta"] = self.data["beta"].astype(float)


[docs]    def import_pqr(self, pqr, include_hetatm=False):
        '''
        Read a pqr (possibly containing containing multiple models).

        models are split according to ENDMDL and END statement.
        All alternative coordinates are expected to have the same atoms.
        After loading, the first model (M.current_model=0) will be set as active.

        :param pqr: PQR filename
        :param include_hetatm: if True, HETATM will be included (they get skipped if False)
        '''

        try:
            f_in = open(pqr, "r")
        except Exception as ex:
            raise Exception('ERROR: file %s not found!' % pqr)

        # store filename
        self.properties["filename"] = pqr

        data_in = []
        p = []  # collects coordinates for every model
        r = []  # vdW radii
        e = []  # electrostatics
        alternative = []
        for line in f_in:
            record = line[0:6].strip()
            # if a complete model was parsed store all the saved data into
            # self.properties entries (if needed) and temporary alternative
            # coordinates list
            if record == "ENDMDL" or record == "END":
                if len(alternative) == 0:
                    # load all the parsed data in superclass properties['data']
                    # and points data structures
                    try:
                        #building dataframe
                        data = np.array(data_in).astype(str)
                        cols = ["atom", "index", "name", "resname", "chain", "resid", "beta", "occupancy", "atomtype"]
                        idx = np.arange(len(data))
                        self.data = pd.DataFrame(data, index=idx, columns=cols)
                        self.data["index"] = idx # convert to internal numbering system

                    except Exception as ex:
                        raise Exception('ERROR: something went wrong when loading the structure %s!\nERROR: are all the columns separated?' %pqr)

                    # saving vdw radii
                    try:
                        self.data['radius'] = np.array(r)
                    except Exception as ex:
                        raise Exception('ERROR: something went wrong when loading the structure %s!\nERROR: are all the columns separated?' %pqr)

                    # saving electrostatics
                    try:
                        self.data['charge'] = np.array(e)
                    except Exception as ex:
                        raise Exception('ERROR: something went wrong when loading the structure %s!\nERROR: are all the columns separated?' % pqr)

                # save 3D coordinates of every atom and restart the accumulator
                try:
                    if len(p) > 0:
                        alternative.append(np.array(p))
                    p = []
                except Exception as ex:
                    raise Exception('ERROR: something went wrong when loading the structure %s!\nERROR: are all the columns separated?' %pqr)

            if record == 'ATOM' or (include_hetatm and record == 'HETATM'):

                # extract xyz coordinates (save in list of point coordinates)
                p.append([float(line[30:38]), float(line[38:46]), float(line[46:54])])

                # if no complete model has been yet parsed, load also
                # information about atoms(resid, resname, ...)
                if len(alternative) == 0:

                    # extract charge
                    try:
                        # 54 is separator, 55 is plus/minus
                        e.append(float(line[54:62]))
                    except Exception as ex:
                        e.append(0.0)

                    # extract vdW radius
                    try:
                        r.append(float(line[62:69]))
                    except Exception as ex:
                        r.append(self.know('atom_vdw')['.'])

                    # initialize list
                    w = []

                    # extract ATOM/HETATM statement
                    w.append(line[0:6].strip())
                    w.append(line[6:11].strip())  # extract atom index
                    w.append(line[12:17].strip())  # extract atomname
                    w.append(line[17:20].strip())  # extract resname
                    w.append(line[21].strip())  # extract chain name
                    w.append(line[22:26].strip())  # extract residue id

                    # extract occupancy
                    w.append('1')

                    # extract beta factor
                    w.append('0')

                    # extract atomtype from atomname in BMRB notation
                    # http://www.bmrb.wisc.edu/ref_info/atom_nom.tbl
                    w.append(line[12:17].strip()[0])
                    # w.append(line[76:78].strip())

                    data_in.append(w)

        f_in.close()

        # if p list is not empty, that means that the pqr file does not finish with an END statement (like the ones generated by SBT, for instance).
        # In this case, dump all the remaining stuff into alternate coordinates
        # array and (if needed) into properties dictionary.
        if len(p) > 0:

            # if no model has been yet loaded, save also information in
            # properties dictionary.
            if len(alternative) == 0:

                # load all the parsed data in superclass properties['data'] and
                # points data structures
                try:
                    #building dataframe
                    data = np.array(data_in).astype(str)
                    cols = ["atom", "index", "name", "resname", "chain", "resid", "beta", "occupancy", "atomtype"]
                    idx = np.arange(len(data))
                    self.data = pd.DataFrame(data, index=idx, columns=cols)
                    self.data["index"] = idx # convert to internal numbering system

                except Exception as ex:
                    raise Exception('ERROR: something went wrong when saving data in %s!\nERROR: are all the columns separated?' % pqr)

                try:
                    self.data['radius'] = np.array(r)
                except Exception as ex:
                    raise Exception('ERROR: something went wrong when saving van der Waals radii in %s!\nERROR: are all the columns separated?' %pqr)

            # save 3D coordinates of every atom and restart the accumulator
            try:
                if len(p) > 0:
                    alternative.append(np.array(p))
                p = []
            except Exception as ex:
                raise Exception('ERROR: something went wrong when saving coordinates in %s!\nERROR: are all the columns separated?' %pqr)

        # transform the alternative temporary list into a nice multiple
        # coordinates array
        if len(alternative) > 0:
            try:
                alternative_xyz = np.array(alternative).astype(float)
            except Exception as ex:
                alternative_xyz = np.array([alternative[0]]).astype(float)
                print('WARNING: found %s models, but their atom count differs' % len(alternative))
                print('WARNING: treating only the first model in file %s' % pqr)
                #raise Exception('ERROR: models appear not to have the same amount of atoms')

            self.add_xyz(alternative_xyz)
        else:
            raise Exception('ERROR: something went wrong when saving alternative coordinates in %s!\nERROR: no model was loaded... are ENDMDL statements there?' % pqr)

        #correctly set types of columns requiring other than string
        self.data["resid"] = self.data["resid"].astype(int)
        self.data["index"] = self.data["index"].astype(int)
        self.data["occupancy"] = self.data["occupancy"].astype(float)
        self.data["beta"] = self.data["beta"].astype(float)

[docs]    def assign_atomtype(self):
        '''
        guess atomtype from atom names
        '''

        a_type = []
        for i in range(0, len(self.data), 1):
            atom = self.data["name"].values[i]
            try:
                a_type.append(self.knowledge["atomtype"][atom])
            except Exception as ex:
                a_type.append("")

        self.data["atomtype"] = a_type

[docs]    def get_vdw_density(self, buff=3, step=0.5, kernel_half_width=10):
        '''
        generate density map from points based on the van der Waals readius of the atoms

        :param buff: Buffer used to create the boundaries of the density map
        :param step: Stepsize for creating the density object
        :param kernel_half_width: Kernel half width of the gaussian kernel, will be scaled by atom specific sigma
        :returns: :func:`Density <density.Density>` object, containing a density map
        '''
        atomdata = [["C", 1.7, 1.455, 0.51], ["H", 1.2, 0.72, 0.25],
                    ["O", 1.52, 1.15, 0.42], ["S", 1.8, 1.62, 0.54],
                    ["N", 1.55, 1.2, 0.44]]

        dens = []
        for d in atomdata:
            # select atomtype and point only to those coordinates
            pts = self.points[[self.data["atomtype"].values == d[0]]]

            # if there are atoms from a certain type
            if len(pts) > 0:
                # use the standard density calculation with a atom-type
                # specific sigma value
                D = self.get_density(buff=buff, step=step, kernel_half_width=kernel_half_width, sigma=d[2])
                # export the np-array density map for summing
                d_tmp = D.properties["density"]

            # print 'atom %s has a maximum density of %s, occurs %s times in the protein and the density map has a shape of %s'%(d[0], np.max(d_tmp), len(pts), d_tmp.shape)
            # print 'one entry in d_tmp: \n%s'%d_tmp[35][27][20]
            # print np.unravel_index(d_tmp.argmax(), d_tmp.shape)
            # initialize the dens-list if it's empty
            if len(dens) == 0:
                dens = deepcopy(d_tmp)
            # sum up the densities from all atom types
            else:
                dens += d_tmp  # dens is a 3d numpy array with point intensities

        # print 'one entry in dens is: \n%s'%dens[35][27][20]

        from biobox.classes.density import Density
        D = Density()
        D.properties['density'] = dens
        D.properties['size'] = np.array(dens.shape)
        D.properties['origin'] = np.mean(pts, axis=0) - step * np.array(dens.shape) / 2.0
        D.properties['delta'] = np.identity(3) * step
        D.properties['format'] = 'dx'
        D.properties['filename'] = ''
        D.properties["sigma"] = np.std(dens)

        return D

[docs]    def get_electrostatics(self, step=1.0, buff=3, threshold=0.01, vdw_kernel_half_width=5, elect_kernel_half_width=12, chain='*', clear_mass=True):
        '''
        generate electrostatics map from points

        :param step: size of cubic voxels, in Angstrom
        :param buff: padding to add at points cloud boundaries
        :param threshold: Threshold used for removing mass occupied space from the electron density map
        :param vdw_kernel_half_width: kernel half width, in voxels
        :param elect_kernel_half_width: kernel half width, in voxels
        :param chain: select chain to use, default all chains
        :returns: positive :func:`Density <density.Density>` object
        :returns: negative :func:`Density <density.Density>` object
        :returns: mass density object
        '''

        pts, idx = self.atomselect(chain, '*', '*', get_index=True)

        try:
            # numpy array of charges [c1, c2, c3, ...]
            charges = self.data['charge'].values[idx]
        except Exception as e:
            raise Exception('ERROR: No charges associated with %s' % self)

        charges = np.reshape(charges, (len(charges), 1))

        # numpy 2d-array of shape (:, 4): [[x, y, z, charge], [x, y, z,
        # charge], ...]
        c_atoms = np.hstack((pts, charges))

        k = 8.9875517873681764  # Coulomb's constant in nN

        # rectangular box boundaries
        bnds = np.array([[np.min(pts[:, 0]) - buff, np.max(pts[:, 0]) + buff],
                         [np.min(pts[:, 1]) - buff, np.max(pts[:, 1]) + buff],
                         [np.min(pts[:, 2]) - buff, np.max(pts[:, 2]) + buff]])

        xax = np.arange(bnds[0, 0], bnds[0, 1] + step, step)
        yax = np.arange(bnds[1, 0], bnds[1, 1] + step, step)
        zax = np.arange(bnds[2, 0], bnds[2, 1] + step, step)

        # create empty box
        d = np.zeros((len(xax), len(yax), len(zax)))

        # place Kronecker deltas in mesh grid -> discretes point coordinates
        # into mesh grid with an intensity of charge
        for atom in c_atoms:
            xpos = np.argmin(np.abs(xax - atom[0]))
            ypos = np.argmin(np.abs(yax - atom[1]))
            zpos = np.argmin(np.abs(zax - atom[2]))
            d[xpos, ypos, zpos] = atom[3]

        # initialize 3d kernel_box
        # how many steps are needed to reach kernel half width in angstroms
        l_kernel = int(2 * elect_kernel_half_width / step)
        # the kernel is an empty box...
        kernel = np.zeros((l_kernel, l_kernel, l_kernel))
        it = np.nditer(kernel, flags=['multi_index'])
        while not it.finished:
            x_index = it.multi_index[0]  # indices
            y_index = it.multi_index[1]
            z_index = it.multi_index[2]
            x_coord = x_index * step - (l_kernel / 2)  # real space coordinates
            y_coord = y_index * step - (l_kernel / 2)
            z_coord = z_index * step - (l_kernel / 2)
            distance = np.sqrt(x_coord *x_coord + y_coord *y_coord + z_coord *z_coord)
            if distance > 0.9:  # ... where a hyperbola will be created only outside of H-vdW and inside of relevant kernel-half-width distance
                kernel[x_index, y_index, z_index] = k / distance

            it.iternext()

        # convolve point mesh with 3d coulomb hyperbola
        e = scipy.signal.fftconvolve(d, kernel, mode='same')

        # define mass-occupied space
        # mass_density is Density-object, buff=buff makes shure that the
        # density maps have the same dimensions
        mass_density = self.get_vdw_density(
            buff=buff, step=step, kernel_half_width=vdw_kernel_half_width)

        if clear_mass:
            d_map = mass_density.return_density_map()

        # initialize counter to count the amount of removed points
        i = 0
        # initialize the iterator for said function
        it = np.nditer(e, flags=['multi_index'])
        while not it.finished:
            x_index = it.multi_index[0]  # indices
            y_index = it.multi_index[1]
            z_index = it.multi_index[2]
            x_coord = xax[x_index]  # real space coordinates
            y_coord = yax[y_index]
            z_coord = zax[z_index]
            if d_map[x_index, y_index, z_index] > threshold:
                i += 1
                e[x_index, y_index, z_index] = 0

            it.iternext()

        print('removed %s points due to van der Waals clashing' % i)

        # split the density into two maps
        e_pos = deepcopy(e)
        e_neg = deepcopy(e)
        e_pos[np.where(e_pos < 0)] = 0
        e_neg[np.where(e_neg >= 0)] = 0
        # changes sign of negative array for better visualization
        e_neg = np.negative(e_neg)

        # prepare density data structure for both positive and negative maps at
        # once
        from biobox.classes.density import Density
        D_pos = Density()
        D_neg = Density()
        D_pos.properties['density'] = e_pos
        D_neg.properties['density'] = e_neg
        D_pos.properties['size'] = D_neg.properties['size'] = np.array(e.shape)
        D_pos.properties['origin'] = D_neg.properties['origin'] = np.mean(self.points, axis=0) - step * np.array(e.shape) / 2.0
        D_pos.properties['delta'] = D_neg.properties['delta'] = np.identity(3) * step
        D_pos.properties['format'] = D_neg.properties['format'] = 'dx'
        D_pos.properties['filename'] = D_neg.properties['filename'] = ''
        D_pos.properties["sigma"] = np.std(e_pos)
        D_neg.properties["sigma"] = np.std(e_neg)

        return D_pos, D_neg, mass_density

    def _apply_matrices(self, mats):
        '''
        replicate molecule according to list of transformation matrices
        '''

        xyzall = []
        #data = []
        # create new data entry (renumber indices, reassign chain name)
        data = np.empty([0, 9])

        # replicate original coordinates applying transformations
        cnt = 0
        for m in mats:
            xyz2 = self.points.copy() + m[:, 3]  # translate
            xyz2 = np.dot(xyz2, m[:, 0:3])  # rotate
            xyzall.extend(xyz2)  # merge

            #assign a specific name to the new chain
            #newdata = deepcopy(self.data)
            #newdata["chain"] = self.chain_names[cnt]

            #if cnt == 0:
            #    data = [newdata]
            #else:
            #    data.append(newdata)

            data_tmp = self.data[[
                "atom", "index", "name", "resname", "chain",
                "resid", "beta", "occupancy", "atomtype"]].values

            data_tmp[:, 4] = self.chain_names[cnt]
            data = np.concatenate((data, data_tmp))

            cnt += 1

        # temporary vectorized hexadecimal maker, in case there are more than
        # 9999 atoms
        def dohex(number):
            return hex(number).split('x')[1]

        vhex = np.vectorize(dohex)

        indices = np.linspace(1, len(data), len(data)).astype(int)
        idx = vhex(indices)
        cols = ["atom", "index", "name", "resname", "chain", "resid", "beta", "occupancy", "atomtype"]

        M2 = Molecule()
        M2.coordinates = np.array([xyzall])
        M2.data = pd.DataFrame(data, index=idx, columns=cols)
        M2.properties['center'] = M2.get_center()

        return M2

[docs]    def apply_biomatrix(self):
        '''
        if biomatrix information is provided, generate a new molecule with all symmetry operators applied.

        :returns: new Molecule containing several copies of the current Molecule, arranged according to BIOMT statements contained in pdb, or -1 if no transformation matrix is provided
        '''

        # if no biomatrix statement is found, return with error
        if "biomatrix" not in self.properties:
            raise Exception("ERROR: no biomatrix found in pdb %s" %self.properties["filename"])

        return self._apply_matrices(self.properties["biomatrix"])

[docs]    def apply_symmetry(self):
        '''
        if symmetry information is provided, generate a new molecule with all symmetry operators applied.

        :returns: new Molecule containing several copies of the current Molecule, arranged according to SMTRY statements contained in pdb, or -1 if no transformation matrix is provided
        '''

        # if no symmetry statement is found, return with error
        if "symmetry" not in self.properties:
            raise Exception("ERROR: no symmetry matrix found in pdb %s" %self.properties["filename"])

        return self._apply_matrices(self.properties["symmetry"])

[docs]    def import_gro(self, filename):
        '''
        read a gro possibly containing multiple structures.

        :param filename: name of .gro file to import
        '''

        if not os.path.isfile(filename):
            raise Exception("ERROR: %s not found!" % filename)

        self.clear()

        # print "\n> loading %s..."%filename
        fin = open(filename, "r")

        line = fin.readline()

        d_data = []
        b = []
        while line:
            cnt = 0
            d = []
            atoms = int(fin.readline())
            d_data = []
            while cnt < atoms:
                w = fin.readline()
                # Read array as defined by .gro style characters (res int, res, atomtype, int, x_coord, y_coord, z_coord)
                w = [w[0:5].strip(), w[5:10].strip(), w[10:15].strip(), w[15:20].strip(), w[20:28].strip(), w[28:36].strip(), w[36:44].strip()]
                resname = w[1]; resnumber=w[0]

                # read data useful for indexing (guess what is missing)
                d_data.append(["ATOM", w[3], w[2], resname, "A", resnumber, "0.0", "0.0", ""])
                d.append([w[4], w[5], w[6]])
                cnt += 1

            # add one conformation (in Angstrom) and store its box size in
            # temporary list
            self.add_xyz(np.array(d).astype(float) * 10)
            b.append(fin.readline().split())

            line = fin.readline()  # attempt to get header of next frame

        # store data information and box size for every frame
        self.properties['box'] = np.array(b).astype(float) * 10


        #building dataframe
        data = np.array(d_data).astype(str)
        cols = ["atom", "index", "name", "resname", "chain", "resid", "beta", "occupancy", "atomtype"]
        idx = np.arange(len(data))
        self.data = pd.DataFrame(data, index=idx, columns=cols)

        #add additional information about van der waals radius and atoms charge
        self.data['radius'] = np.ones(len(d_data)) * self.know('atom_vdw')['.']
        self.data['charge'] = np.zeros(len(d_data))

        fin.close()


[docs]    def get_atoms_ccs(self):
        '''
        return array with atomic CCS radii of every atom in molecule

        :returns: CCS in Angstrom^2
        '''

        if "atom_ccs" in self.data.columns:
            return self.data["atom_ccs"]

        ccs = np.ones(len(self.points)) * self.know("atom_ccs")["."]
        for e in self.know("atom_ccs").keys():
            if "e" != ".":
                ccs[self.data["atomtype"].values == e] = self.knowledge["atom_ccs"][e]

        self.data["atom_ccs"] = ccs

        return ccs

[docs]    def get_data(self, indices=[], columns=[]):
        '''
        Return information about atoms of interest (i.e., slice the data DataFrame)

        :param indices: list of indices, if not provided all atom data is returned
        :param columns: list of columns (e.g. ["resname", "resid", "chain"]), if not provided all columns are returned
        :returns: numpy array containing a slice of molecule's data
        '''

        if len(indices) == 0 and len(columns) == 0:
            return self.data.values

        elif len(indices) == 0 and len(columns) != 0:
            return self.data[columns].values

        elif len(indices) != 0 and len(columns) == 0:
            return self.data.loc[indices].values

        else:
            return self.data.loc[indices, columns].values


[docs]    def set_data(self, value, indices=[], columns=[]):
        '''
        Return information about atoms of interest (i.e., slice the data DataFrame)

        :param indices: list of indices, if not provided all atom data is returned
        :param columns: list of columns (e.g. ["resname", "resid", "chain"]), if not provided all columns are returned
        :returns: numpy array containing a slice of molecule's data
        '''

        if len(indices) == 0 and len(columns) == 0:
            raise Exception("indices, columns or both should be provided")

        elif len(indices) == 0 and len(columns) != 0:
            self.data[columns] = value

        elif len(indices) != 0 and len(columns) == 0:
            self.data.loc[indices] = value

        else:
            self.data.loc[indices, columns] = value


[docs]    def query(self, query_text, get_index=False):
        '''
        Select specific atoms in a multimer un the basis of a text query.

        :param query_text: string selecting atoms of interest. Uses the pandas query syntax, can access all columns in the dataframe self.data.
        :param get_index: if set to True, returns the indices of selected atoms in self.points array (and self.data)
        :returns: coordinates of the selected points (in a unique array) and, if get_index is set to true, a list of their indices in subunits' self.points array.
        '''

        idx = self.data.query(query_text).index.values

        if get_index:
            return [self.points[idx], idx]
        else:
            return self.points[idx]


[docs]    def atomselect(self, chain, res, atom, get_index=False, use_resname=False):
        '''
        Select specific atoms in the protein providing chain, residue ID and atom name.

        :param chain: selection of a specific chain name (accepts * as wildcard). Can also be a list or numpy array of strings.
        :param res: residue ID of desired atoms (accepts * as wildcard). Can also be a list or numpy array of of int.
        :param atom: name of desired atom (accepts * as wildcard). Can also be a list or numpy array of strings.
        :param get_index: if set to True, returns the indices of selected atoms in self.points array (and self.data)
        :param use_resname: if set to True, consider information in "res" variable as resnames, and not resids
        :returns: coordinates of the selected points and, if get_index is set to true, their indices in self.points array.
        '''

        # chain name boolean selector
        if isinstance(chain, str):
            if chain == '*':
                chain_query = np.array([True] * len(self.points))
            else:
                chain_query = self.data["chain"].values == chain

        elif isinstance(chain, list) or type(chain).__module__ == 'numpy':
            chain_query = self.data["chain"].values == chain[0]
            for c in range(1, len(chain), 1):
                chain_query = np.logical_or(chain_query, self.data["chain"].values == chain[c])
        else:
            raise Exception("ERROR: wrong type for chain selection. Should be str, list, or numpy")

        if isinstance(res, str):
            if res == '*':
                res_query = np.array([True] * len(self.points))
            elif use_resname:
                res_query = self.data["resname"].values == res
            else:
                res_query = self.data["resid"].values == res

        elif isinstance(res, int):
            if use_resname:
                res_query = self.data["resname"].values == str(res)
            else:
                res_query = self.data["resid"].values == res

        elif isinstance(res, list) or type(res).__module__ == 'numpy':
            if use_resname:
                res_query = self.data["resname"].values == str(res[0])
            else:
                res_query = self.data["resid"].values == res[0]

            for r in range(1, len(res), 1):
                if use_resname:
                    res_query = np.logical_or(res_query, self.data["resname"].values == str(res[r]))
                else:
                    res_query = np.logical_or(res_query, self.data["resid"].values == res[r])

        else:
            raise Exception("ERROR: wrong type for resid selection. Should be int, list, or numpy")

        # atom name boolean selector
        if isinstance(atom, str):
            if atom == '*':
                atom_query = np.array([True] * len(self.points))
            else:
                atom_query = self.data["name"].values == atom
        elif isinstance(atom, list) or type(atom).__module__ == 'numpy':
            atom_query = self.data["name"].values == atom[0]
            for a in range(1, len(atom), 1):
                atom_query = np.logical_or(atom_query, self.data["name"].values == atom[a])
        else:
            raise Exception("ERROR: wrong type for atom selection. Should be str, list, or numpy")

        # slice data array and return result (colums 5 to 7 contain xyz coords)
        query = np.logical_and(np.logical_and(chain_query, res_query), atom_query)


        if get_index:
            return [self.points[query], np.where(query == True)[0]]
        else:
            return self.points[query]

[docs]    def atomignore(self, chain, res, atom, get_index=False, use_resname=False):
        '''
        Select specific atoms that do not match a specific query (chain, residue ID and atom name).
        Useful to remove from a molecule atoms unwanted for further analysis, alternative conformations, etc...

        :param chain: chain name (accepts * as wildcard). Can also be a list or numpy array of strings.
        :param res: residue ID (accepts * as wildcard). Can also be a list or numpy array of of int.
        :param atom: atom name (accepts * as wildcard). Can also be a list or numpy array of strings.
        :param get_index: if set to True, returns the indices of atoms in self.points array (and self.data)
        :param use_resname: if set to True, consider information in "res" variable as resnames, and not resids
        :returns: coordinates of the selected points not matching the query, if get_index is set to true, their indices in self.points array.
        '''

        #extract indices of atoms matching the query
        idxs = self.atomselect(chain, res, atom, get_index=True, use_resname=use_resname)[1]

        #invert the selection
        idxs2 = []
        for i in range(len(self.points)):
            if i not in idxs:
                idxs2.append(i)

        if get_index:
            return [self.points[idxs2], np.array(idxs2)]
        else:
            return self.points[idxs2]

[docs]    def same_residue(self, index, get_index=False):
        '''
        Select atoms having the same residue and chain as a given atom (or list of atoms)

        :param index indices: of atoms of choice (integer of list of integers)
        :param get_index: if set to True, returns the indices of selected atoms in self.points array (and self.data)
        :returns: coordinates of the selected points and, if get_index is set to true, their indices in self.points array.
        '''

        D = self.data.values
        l = D[index]

        if len(l.shape) == 1:
            l = l.reshape(1, len(l))

        test = np.logical_and(D[:, 4] == l[:, 4], D[:, 5] == l[:, 5])

        idxs = np.where(test)[0]
        if len(idxs) > 0:
            pts = self.points[idxs]
        else:
            pts = []

        if get_index:
            return pts, idxs
        else:
            return pts

[docs]    def same_residue_unique(self, index, get_index=False):
        '''
        Select atoms having the same residue and chain as a given atom (or list of atoms)

        :param index: indices of atoms of choice (integer of list of integers)
        :param get_index: if set to True, returns the indices of selected atoms in self.points array (and self.data)
        :returns: coordinates of the selected points and, if get_index is set to true, their indices in self.points array.
        '''

        try:
            test = len(index)  # this should fail if index is a number
            idlist = index
        except Exception as e:
            idlist = [index]

        D = self.data.values
        pts = []
        idxs = []
        for i in idlist:
            done = False
            j = 0  # starting from same point
            while not done:

                if i - j < 0:
                    done = True

                elif D[i, 4] == D[i - j, 4] and D[i, 5] == D[i - j, 5]:

                    if len(idxs) != 0 and i - j not in idxs:
                        pts.append(self.points[i - j])
                        idxs.append(i - j)
                    elif i - j not in idxs:
                        pts = [self.points[i - j].copy()]
                        idxs = [i - j]

                    j += 1

                else:
                    done = True

            j = 1
            done = False
            while not done:

                if i + j == len(self.points):
                    done = True

                elif D[i, 4] == D[i + j, 4] and D[i, 5] == D[i + j, 5]:

                    if len(idxs) != 0 and i + j not in idxs:
                        pts.append(self.points[i + j])
                        idxs.append(i + j)
                    elif i + j not in idxs:
                        pts = [self.points[i + j].copy()]
                        idxs = [i + j]

                    j += 1

                else:
                    done = True

        if get_index:
            return np.array(pts), np.array(idxs)
        else:
            return np.array(pts)

[docs]    def get_subset(self, idxs, conformations=[]):
        '''
        Return a :func:`Molecule <molecule.Molecule>` object containing only the selected atoms and frames

        :param ixds: atoms to extract
        :param conformations: frames to extract (by default, all)
        :returns: :func:`Molecule <molecule.Molecule>` object
        '''

        # if a subset of all available frames is requested to be written,
        # select them first
        if len(conformations) == 0:
            frames = range(0, len(self.coordinates), 1)
        else:
            if np.max(conformations) < len(self.coordinates):
                frames = conformations
            else:
                raise Exception("ERROR: requested coordinate index %s, but only %s are available" %(np.max(conformations), len(self.coordinates)))

        idx = np.arange(len(idxs))

        # create molecule, and push created data information
        M = Molecule()
        postmp = self.coordinates[:, idxs]
        M.coordinates = postmp[frames]
        M.data = self.data.loc[idxs]
        M.data = M.data.reset_index(drop=True)
        M.data["index"] = idx
        M.current = 0
        M.points = M.coordinates[M.current]

        M.properties['center'] = M.get_center()

        return M

[docs]    def guess_chain_split(self, distance=3, use_backbone=True):
        '''
        reassign chain name, using distance cutoff (cannot be undone).
        If two consecutive atoms are beyond a cutoff, a new chain is assigned.

        :param distance: distance cutoff distanceR: no atomtype found!

        :param use_backbone: if True, splitting will be performed considering backbone atoms (N and C), all atoms in a sequence otherwise
        '''

        # wipe current chain assignment
        self.data["chain"] = ""

        # identify different chains
        intervals = [0]

        if not use_backbone:
            for i in range(len(self.coordinates[0]) - 1):
                dist = np.sqrt(np.dot(self.points[i] - self.points[i + 1], self.points[i] - self.points[i + 1]))
                if dist > distance:
                    intervals.append(i + 1)

        else:
            #aminoacids start with N. Find where a C is too far from the next N.
            posN, idxN = self.atomselect("*", "*", "N", get_index=True)
            posC = self.atomselect("*", "*", "C")
            for i in range(len(idxN)-1):
                dist = np.sqrt(np.dot(posC[i] - posN[i+1], posC[i] - posN[i+1]))
                if dist > distance:
                    intervals.append(idxN[i+1])

        intervals.append(len(self.coordinates[0]))

        # separate chains
        for i in range(len(intervals) - 1):
            thepos = i % len(self.chain_names)
            self.data.loc[intervals[i]:intervals[i + 1], "chain"] = self.chain_names[thepos]

        return len(intervals) - 1, intervals

[docs]    def get_pdb_data(self, index=[]):
        '''
        aggregate data and point coordinates, and return in a unique data structure

        Returned data is a list containing strings for points data and floats for point coordinates
        in the same order as a pdb file, i.e.
        ATOM/HETATM, index, name, resname, chain name, residue ID, x, y, z, occupancy, beta factor, atomtype.

        :returns: list aggregated data and coordinates for every point, as string.
        '''

        if len(index) == 0:
            index = range(0, len(self.points), 1)

        # create a list containing all infos contained in pdb (point
        # coordinates and properties)
        d = []
        for i in index:
            d.append([self.data["atom"].values[i],
                      self.data["index"].values[i],
                      self.data["name"].values[i],
                      self.data["resname"].values[i],
                      self.data["chain"].values[i],
                      self.data["resid"].values[i],
                      self.points[i, 0],
                      self.points[i, 1],
                      self.points[i, 2],
                      self.data["beta"].values[i],
                      self.data["occupancy"].values[i],
                      self.data["atomtype"].values[i]])

        return d

[docs]    def write_pdb(self, outname, conformations=[], index=[]):
        '''
        overload superclass method for writing (multi)pdb.

        :param outname: name of pdb file to be generated.
        :param index: indices of atoms to write to file. If empty, all atoms are returned. Index values obtaineable with a call like: index=molecule.atomselect("A", [1, 2, 3], "CA", True)[1]
        :param conformations: list of conformation indices to write to file. By default, a multipdb with all conformations will be produced.
        '''

        # store current frame, so it will be reestablished after file output is
        # complete
        currentbkp = self.current

        # if a subset of all available frames is requested to be written,
        # select them first
        if len(conformations) == 0:
            frames = range(0, len(self.coordinates), 1)
        else:
            if np.max(conformations) < len(self.coordinates):
                frames = conformations
            else:
                raise Exception("ERROR: requested coordinate index %s, but only %s are available" %(np.max(conformations), len(self.coordinates)))

        f_out = open(outname, "w")

        for f in frames:
            # get all informations from PDB (for current conformation) in a list
            self.set_current(f)
            d = self.get_pdb_data(index)

            # Build our hexidecimal array if num. of atoms > 99999
            idx_val = np.arange(1, len(d) + 1, 1)
            if len(idx_val) > 99999:
                vhex = np.vectorize(hex)
                idx_val = vhex(idx_val)   # convert index values to hexidecimal
                idx_val = [num[2:] for num in idx_val]  # remove 0x at start of hexidecimal number

            for i in range(0, len(d), 1):
                # create and write PDB line
                if d[i][2][0].isdigit():
                    L = '%-6s%5s %-5s%-4s%1s%4s    %8.3f%8.3f%8.3f%6.2f%6.2f          %2s\n' % (d[i][0], idx_val[i], d[i][2], d[i][3], d[i][4], d[i][5], float(d[i][6]), float(d[i][7]), float(d[i][8]), float(d[i][9]), float(d[i][10]), d[i][11])
                else:
                    L = '%-6s%5s %-4s %-4s%1s%4s    %8.3f%8.3f%8.3f%6.2f%6.2f          %2s\n' % (d[i][0], idx_val[i], d[i][2], d[i][3], d[i][4], d[i][5], float(d[i][6]), float(d[i][7]), float(d[i][8]), float(d[i][9]), float(d[i][10]), d[i][11])
                f_out.write(L)

            f_out.write("END\n")

        f_out.close()

        self.set_current(currentbkp)

        return

[docs]    def write_gro(self, outname, conformations=[], index=""):
        '''
        write structure(s) in .gro format.

        :param outname: name of .gro file to be generated.
        :param index: indices of atoms to write to file. If empty, all atoms are returned. Index values obtaineable with a call like: index=molecule.atomselect("A", [1, 2, 3], "CA", True)[1]
        :param conformations: list of conformation indices to write to file. By default, all conformations will be returned.
        '''

        # store current frame, so it will be reestablished after file output is
        # complete
        currentbkp = self.current

        # if a subset of all available frames is requested to be written,
        # select them first
        if len(conformations) == 0:
            frames = range(0, len(self.coordinates), 1)
        else:
            if np.max(conformations) < len(self.coordinates):
                frames = conformations
            else:
                raise Exception("ERROR: requested coordinate index %s, but only %s are available" %(np.max(conformations), len(self.coordinates)))

        f_out = open(outname, "w")
        for f in frames:
            # get all informations from PDB (for current conformation) in a
            # list
            self.set_current(f)

            # ATOM/HETATM, index, atom name, resname, chain name, residue ID, x,
            # y, z, beta factor, occupancy, atomtype
            d = self.get_pdb_data(index)
            f_out.write("%s\n" % outname.split(".")[0])
            f_out.write("%s\n" % len(d))
            for i in range(0, len(d), 1):
                # create and write .gro line
                L = '%5d%-5s%5s%5d%8.3f%8.3f%8.3f\n' % (int(d[i][5]), d[i][3], d[i][2], int(d[i][1]), float(d[i][6]) / 10.0, float(d[i][7]) / 10.0, float(d[i][8]) / 10.0)
                f_out.write(L)

            if "box" in self.properties:
                b = self.properties["box"][f] / 10.0
            else:
                minpos = np.min(self.points, axis=0) / 10.0
                b = np.max(self.points, axis=0) - minpos / 10.0

            formatting = ""
            for item in b:
                formatting+="%10.5f"
            formatting+="\n"

            f_out.write(formatting%tuple(b))

        f_out.close()
        self.set_current(currentbkp)
        return

[docs]    def beta_factor_from_rmsf(self, indices=-1, step=1):
        '''
        estimate atoms beta factor on the base of their RMSF.

        :param indices: indices of atoms of interest. If not set all atoms will be considered.
        :param step: timestep between two conformations (useful when using conformations extracted from molecular dynamics)
        '''

        try:
            rmsf = self.rmsf(indices, step)
            return 8.0 * (np.pi**2) * (rmsf**2) / 3.0
        except Exception as ex:
            raise Exception('ERROR: could not calculate RMSF!')

[docs]    def rmsf_from_beta_factor(self, indices=[]):
        '''
        calculate RMSF from atoms beta factors.

        :param indices: indices of atoms of interest. If not set all atoms will be considered.
        '''

        try:
            if len(indices) == 0:
                b = self.data["beta"].values
            else:
                b = self.data["beta"].values[indices]

            return np.sqrt(b * 3 / (8 * np.pi * np.pi))

        except Exception as ex:
            raise Exception('ERROR: beta factors missing?')

[docs]    def get_mass_by_residue(self, skip_resname=[]):
        '''
        Compute protein mass using residues (i.e. account also for atoms not present in the structure)

        Sum the average mass all every residue (using a knowledge base of atom masses in Dalton)
        The knowledge base can be expanded or edited by adding entries to the molecule's mass dictionary, e.g. to add the residue "TST" mass in molecule M type: M.knowledge['mass_residue']["TST"]=142.42

        :param skip_resname: list of resnames to skip. Useful to exclude ions water or other ligands from the calculation.
        :returns: mass of molecule in Dalton
        '''
        #@todo mass of N and C termini to add for every chain

        mass = 0
        chains = np.unique(self.data["chain"].values)
        for chainname in chains:
            # for every chain, get a list of all its (unique) resids
            indices = self.atomselect(chainname, "*", "*", True)[1]
            resids = np.unique(self.data['resid'].values[indices])

            for r in resids:
                # for every residue in the chain, get the index of its first
                # atom, and extract its associated resname
                index = self.atomselect(chainname, int(r), "*", True)[1]
                resname = self.data['resname'].values[index[0]]

                if resname not in skip_resname:
                    try:
                        # add mass of residue to total mass
                        mass += self.know('residue_mass')[resname]
                    except Exception as ex:
                        #@todo: if residue is not known, why not summing constituent atoms masses, warning the user that it's an estimation?
                        raise Exception("ERROR: mass for resname %s is unknown!\nInsert a key in protein\'s masses dictionary knowledge['residue_mass'] and retry!\nex.: protein.knowledge['residue_mass'][\"TST\"]=142.42" %resname)

        return mass

[docs]    def get_mass_by_atom(self, skip_resname=[]):
        '''
        compute protein mass using atoms in pdb

        sum the mass of all atoms (using a knowledge base of atom masses in Dalton)
        The knowledge base can be expanded or edited by adding or editing entries to the molecule's mass dictionary, e.g. to add the atom "PI" mass in molecule M type: M.knowledge['atom_mass']["PI"]=3.141592

        :param skip_resname: list of resnames to skip. Useful to exclude ions water or other ligands from the calculation.
        :returns: mass of molecule in Dalton
        '''

        mass = 0
        for i in range(0, len(self.data), 1):
            resname = self.data["resname"].values[i]
            atomtype = self.data["atomtype"].values[i]

            if resname not in skip_resname:
                try:
                    mass += self.know('atom_mass')[atomtype]
                except Exception as e:
                    if atomtype == "":
                        print(self.data.values[i:i+40])
                        raise Exception("ERROR: no atomtype found!")
                    else:
                        raise Exception("ERROR: mass for atom %s is unknown!\nInsert a key in protein\'s masses dictionary knowledge['atom_mass'] and retry!\nex.: protein.knowledge['atom_mass'][\"PI\"]=3.141592" %atomtype)

        return mass

[docs]    def s2(self, atomname1="N", atomname2="H"):
        '''
        compute s2, given two atoms defining the vector of interest.

        :param atomname1: name of the first atom
        :param atomname2: name of the second atom
        :returns: data numpy array containing information about residues for which measuring has been performed (i.e.[chain, resid])
        :returns: s2 s2 of residues for which both provided input atoms have been found
        '''

        Nidx = self.atomselect("*", "*", atomname1, get_index=True)[1]
        Hidx = self.atomselect("*", "*", atomname2, get_index=True)[1]

        if len(Nidx) == 0:
            raise Exception("ERROR: no atom name %s found!"%atomname1)

        if len(Hidx) == 0:
            raise Exception("ERROR: no atom name %s found!"%atomname2)

        Ndata = self.data.loc[Nidx, ["chain", "resid"]].values
        Hdata = self.data.loc[Hidx, ["chain", "resid"]].values

        a1 = []
        a2 = []
        d = []
        for i in range(0, len(Ndata), 1):
            j = np.where(
                np.logical_and(
                    Hdata[:, 0] == Ndata[i, 0],
                    Hdata[:, 1] == Ndata[i, 1]))[0]

            if len(j) == 1:
                idx1 = Nidx[i]
                idx2 = Hidx[j[0]]
                a1.append(self.coordinates[:, idx1])
                a2.append(self.coordinates[:, idx2])
                d.append(Ndata[i])

        atoms1 = np.array(a1)
        atoms2 = np.array(a2)
        data = np.array(d)

        # iterate over every residue
        s2_summary = []
        for j in range(atoms1.shape[0]):

            dx = atoms2[j, :, 0] - atoms1[j, :, 0]
            dy = atoms2[j, :, 1] - atoms1[j, :, 1]
            dz = atoms2[j, :, 2] - atoms1[j, :, 2]

            # create list of unit vectors
            dnorm = np.sqrt(dx**2 + dy**2 + dz**2)
            dx /= dnorm
            dy /= dnorm
            dz /= dnorm

            d = 0
            d += (np.sum(dx * dx) / atoms1.shape[1])**2
            d += (np.sum(dx * dy) / atoms1.shape[1])**2
            d += (np.sum(dx * dz) / atoms1.shape[1])**2
            d += (np.sum(dy * dx) / atoms1.shape[1])**2
            d += (np.sum(dy * dy) / atoms1.shape[1])**2
            d += (np.sum(dy * dz) / atoms1.shape[1])**2
            d += (np.sum(dz * dx) / atoms1.shape[1])**2
            d += (np.sum(dz * dy) / atoms1.shape[1])**2
            d += (np.sum(dz * dz) / atoms1.shape[1])**2

            s2_summary.append(0.5 * (3 * d - 1))

        s2 = np.array(s2_summary)

        return data, s2


[docs]    def get_secondary_structure(self, dssp_path=''):
        '''
        compute the protein's secondary structure, calling DSSP
        :param dssp_path: DSSP executable (path and filename). If not provided, the default behaviour is to seek for this information in the environment variable DSSPPATH
        :returns: numpy array of characters, with one-letter-coded secondary sctructure according to DSSP.
        '''
        #dssp="~/bin/dssp-2.0.4-linux-amd64"
        if dssp_path == '':
            try:
                dssp_path = os.environ['DSSPPATH']
            except KeyError:
                raise Exception("DSSPPATH environment variable undefined")

        # generate temporary PDB and calculate secondary structure using DSSP
        self.write_pdb("tmp.pdb", conformations=[self.current])

        #TMP: assign all atoms to structure
        #subprocess.check_call('~/bin/amber16_tmp/bin/tleap -f build > /dev/null', shell=True)
        try:
            import subprocess
            subprocess.check_call("%s -i tmp.pdb -o result.dssp"%dssp_path, shell=True)
            fin=open("result.dssp","r")
        except Exception as e:
            raise Exception("Could not calculate secondary structure! %s"%e)

        readit=False
        secstruct=[]
        for line in fin:

            if readit:
                try:
                    ss = line[16]
                    if line[16] == " ":
                        ss = "-"

                    secstruct.append(ss)
                except:
                    continue

            if "#" in line:
                readit=True

        fin.close()

        # clean temporary files
        os.remove("result.dssp")
        os.remove("tmp.pdb")

        return np.array(secstruct) #(secstruct[0:210])


[docs]    def get_couples(self, idx, cutoff):
        '''
        given a list of indices, compute the all-vs-all distance and return only couples below a given cutoff distance

        useful for the detection of disulfide bridges or linkable sites via cross-linking (approximation, supposing euclidean distances)'

        :param idx: indices of atoms to check.
        :param cutoff: minimal distance to consider a couple as linkable.
        :returns: nx3 numpy array containing, for every valid connection, id of first atom, id of second atom and distance between the two.
        '''

        import biobox.measures.interaction as I

        points1 = self.get_xyz()[idx]

        dist = I.distance_matrix(points1, points1)
        couples = I.get_neighbors(dist, cutoff)

        res = []
        for c in couples.transpose():
            if c[0] > c[1]:
                res.append([idx[c[0]], idx[c[1]], dist[c[0], c[1]]])

        return np.array(res)

[docs]    def match_residue(self, M2, sec = 5):
        '''
        Compares two bb.Molecule() peptide strands and returns the resids within both peptides when the two are homogenous
        beyond a certain secondary structure threashold. The default is 5 amino acids (given by sec) in a row must be identical

        Useful when aligning PDB structures that have been crystallised separately - so one may be missing the odd residue
        or have a few extra at the end.

        :param M2: The second bb.Molecule() to compare with
        :param sec: Number of consecutive amino acids in a row that must match before resid's are recorded
        '''
        # First run the match residue using the expected inputs
        M1_res, M2_res = self._match_residue_maths(M2, sec = sec)

        # Import a check to see if we've correctly counted the residues (e.g. in homodimer case)
        if np.shape(np.unique(M1_res)) != np.shape(np.unique(M2_res)):
            M2_res, M1_res = M2._match_residue_maths(self, sec = sec)

        return M1_res, M2_res


    def _match_residue_maths(self, M2, sec):
        '''
        Does the maths for match_residue. The reason for this additional step is that sometimes
        if the numbering is a bit off between the different proteins (i.e. a shift in the initial
        starting residues) and the protein is a homodimer, we can end up only adding the second
        monomer unit, and sometimes add it twice. Therefore we can compare two runs of this
        for an answer.

        :param M2: The second bb.Molecule() to compare with
        :param sec: Number of consecutive amino acids in a row that must match before resid's are recorded
        '''

        # Get residue names / unique IDs
        M1_reslist = self.data["resname"][self.data["name"] == 'CA'].values
        M2_reslist = M2.data["resname"][M2.data["name"] == 'CA'].values
        M1_resid = self.data["resid"][self.data["name"] == 'CA'].values
        M2_resid = M2.data["resid"][M2.data["name"] == 'CA'].values

        # Remove C or N prefixes
        for cnt, val in enumerate(M1_reslist):
            if len(val) == 4:
                M1_reslist[cnt] = val[1:]
            else:
                continue
        for cnt, val in enumerate(M2_reslist):
            if len(val) == 4:
                M2_reslist[cnt] = val[1:]
            else:
                continue

        # Rename residues temporararily so they match better
        M1_reslist[np.logical_or(np.logical_or(M1_reslist == 'HIE', M1_reslist == 'HIP'), M1_reslist == 'HID')] = 'HIS'
        M2_reslist[np.logical_or(np.logical_or(M2_reslist == 'HIE', M2_reslist == 'HIP'), M2_reslist == 'HID')] = 'HIS'

        M1_reskeep = []
        M2_reskeep = []
        M2_cnt = 0
        M1_cnt = 0

        while M1_cnt < len(M1_reslist):

            # Initial check to see if we have a run of good matches (more than coincidence)
            if np.all(M1_reslist[M1_cnt:(M1_cnt + sec)] == M2_reslist[M2_cnt:(M2_cnt + sec)]):

                while M1_reslist[M1_cnt] == M2_reslist[M2_cnt]:

                    M1_reskeep.append(M1_resid[M1_cnt])
                    M2_reskeep.append(M2_resid[M2_cnt])

                    M2_cnt += 1
                    M1_cnt += 1

                    # Break if we reach the maximum array length limit
                    if M1_cnt == len(M1_reslist) or M2_cnt == len(M2_reslist):
                        break

                    # Check if we conicidently had the correct corresponding resnames
                    if len(M1_reskeep) > 2:
                        if M1_reskeep[-1] - M1_reskeep[-2] != 1 and M2_reskeep[-1] - M2_reskeep[-2] == 1:
                            M1_reskeep = M1_reskeep[:-1]
                            M2_reskeep = M2_reskeep[:-1]
                            break
                        elif M1_reskeep[-1] - M1_reskeep[-2] == 1 and M2_reskeep[-1] - M2_reskeep[-2] != 1:
                            M1_reskeep = M1_reskeep[:-1]
                            M2_reskeep = M2_reskeep[:-1]
                            break
                        else:
                            continue

            # Elsewise move forward in count on second structure
            else:
                M2_cnt += 1

            # Break if M1 and M2 have reached their ends, restart if only M2 has
            if M1_cnt == len(M1_reslist): #and M2_cnt == len(M2_reslist):
                break
            # break if the length of residues in M2 we are counting to now are longer than
            # the possible max number of saved residues in M2
            elif len(M2_reskeep) >= len(M2_reslist):
                break
            # Need case so we don't recount a chain in the event of a homodimer
            elif M2_cnt >= len(M2_reslist):
                M1_cnt += 1
                M2_cnt = len(M1_reskeep)
            else:
                continue

        return M1_reskeep, M2_reskeep

[docs]    def pdb2pqr(self, ff=""):
        '''
        Parses data from the pdb input into a pqr format. This uses the panda dataframe with the information
        regarding atom indexes, types etc. in the self.data files.
        It outputs a panda dataframe with the pqr equivilent information. It requires a datafile forcefield input.
        The default is the amber14sb forcefield file held within the classes/ folder.

        :param ff: name of forcefield text file input that needs to be read to read charges / vdw radii.
        '''

        _, intervals = self.guess_chain_split()
        # patch naming of C-termini
        for i in intervals[1:]:
            idxs = self.same_residue(i-1, get_index=True)[1]
            names = self.data.loc[idxs, ["name"]].values
            if np.any(names == "OC1") or np.any(names == "OXT"):
                resname = self.data.loc[idxs[0], ["resname"]].values[0]
                newresnames = np.array(["C"+resname]*len(idxs))
                self.data.loc[idxs, ["resname"]] = newresnames

        # patch naming of N-termini
        for i in intervals[0:-1]:
            idxs = self.same_residue(i, get_index=True)[1]
            names = self.data.loc[idxs, ["name"]].values
            if np.any(names == "H1") and np.any(names == "H2"):
                resname = self.data.loc[idxs[0], ["resname"]].values[0]
                newresnames = np.array(["N"+resname]*len(idxs))
                self.data.loc[idxs, ["resname"]] = newresnames



        HIP = np.array(["HIP"] * 18)    # create numpy array structures to possibly reassign later
        HIE = np.array(["HIE"] * 17)    # create numpy array structures to possibly reassign later
        HID = np.array(["HID"] * 17)    # create numpy array structures to possibly reassign later
        NHIP = np.array(["NHIP"] * 20)
        NHIE = np.array(["NHIE"] * 19)
        NHID = np.array(["NHID"] * 19)
        CHIP = np.array(["CHIP"] * 20)
        CHIE = np.array(["CHIE"] * 18)
        CHID = np.array(["CHID"] * 19)

        start_chain = self.data["resid"].iloc[0]   # This is in case we get 1 or 2 as the first chain ID start
        end_chain = self.data["resid"].iloc[-1]    #  We don't know the end chain number so we find it here
        start_res = self.data["resname"].iloc[0]
        end_res = self.data["resname"].iloc[-1]

        # Need to check if first residue is actually an N-termini residue, and if so, reassign resnames if necessary
        if (self.data["name"].iloc[0:27] == 'H1').any() and (self.data["name"].iloc[0:27] == 'H2').any() and (self.data["name"].iloc[0:27] == 'H3').any() and self.data["resname"][0][0] != 'N':
            print('Found N-Termini, reassigning first resname to match the forcefield')
            start_index = self.data.index[self.data["resid"] == start_chain]
            for N in start_index:
                self.data["resname"].iloc[N] = 'N' + start_res   # First chain needs to be prefixed with N-termini resname

        if len(ff) == 0:
            #"amber14sb.dat"
            folder = os.path.dirname(os.path.realpath(__file__))
            ff = "%s/amber14sb.dat" % folder

        if os.path.isfile(ff) != 1:
            raise Exception("ERROR: %s not found!" % ff)

        ff = np.loadtxt(ff, usecols=(0,1,2,3,4), dtype=str)

        cols = ['resname', 'name', 'charge', 'radius', 'atomtype'] # where radius is the VdW radius in the amber file
        idx = np.arange(len(ff))
        pqr_data = pd.DataFrame(ff, index=idx, columns=cols)

        charges = []
        radius = []
        atomtypes = []

        # Need to check whether it matches HIE, HID or HIP depending on what protons are present and where
        his_check = self.data["resname"] == 'HIS'  # Check if we need to do following calculation
        nhis_check = self.data["resname"] == 'NHIS' # Check for N termini HIS
        chis_check = self.data["resname"] == 'CHIS'
        if np.sum(his_check) != 0 or np.sum(nhis_check) != 0 or np.sum(chis_check) != 0:
            print("WARNING: found residue with name HIS, checking to see what protonation state it is in and reassigning to HIP, HIE or HID.\nYou should check HIS in your pdb file is right to be sure!")
            for ix in range(len(self.data["resname"])):
                H_length = 17 # Set this as it is more common, and also covers the basis to capture HD1 or HE2 later if necessary (as C and O tend to be last a
                # N is always the first atom (use that as basis)

                if self.data["name"][ix] == 'N' and self.data["resname"][ix] == 'HIS':

                    if (self.data["name"][ix:(ix+H_length)] == 'HE2').any() and (self.data["name"][ix:(ix+H_length)] == 'HD1').any(): # If the residue contains HE2 and HD1, it is a HIP residue
                        H_length = 18     #   number of atoms in histdine (HIP)
                        self.data.loc[ix:(ix+H_length-1), "resname"] = HIP

                    elif (self.data["name"][ix:(ix+H_length)] == 'HE2').any():
                        self.data.loc[ix:(ix+H_length-1), "resname"] = HIE

                    elif (self.data["name"][ix:(ix+H_length)] == 'HD1').any():
                        self.data.loc[ix:(ix+H_length-1), "resname"] = HID

                elif self.data["name"][ix] == 'N' and self.data["resname"][ix] == 'NHIS':
                    H_length = 19

                    if (self.data["name"][ix:(ix+H_length)] == 'HE2').any() and (self.data["name"][ix:(ix+H_length)] == 'HD1').any(): # If the residue contains HE2 and HD1, it is a HIP residue
                        H_length = 20     #   number of atoms in histdine (HIP)
                        self.data.loc[ix:(ix+H_length-1), "resname"] = NHIP

                    elif (self.data["name"][ix:(ix+H_length)] == 'HE2').any():
                        self.data.loc[ix:(ix+H_length-1), "resname"] = NHIE

                    elif (self.data["name"][ix:(ix+H_length)] == 'HD1').any():
                        self.data.loc[ix:(ix+H_length-1), "resname"] = NHID

                elif self.data["name"][ix] == 'N' and self.data["resname"][ix] == 'CHIS':
                    H_length = 19

                    if (self.data["name"][ix:(ix+H_length)] == 'HE2').any() and (self.data["name"][ix:(ix+H_length)] == 'HD1').any(): # If the residue contains HE2 and HD1, it is a HIP residue
                        H_length = 20     #   number of atoms in histdine (HIP)
                        self.data.loc[ix:(ix+H_length-1), "resname"] = CHIP

                    elif (self.data["name"][ix:(ix+H_length)] == 'HE2').any():
                        H_length = 18
                        self.data.loc[ix:(ix+H_length-1), "resname"] = CHIE

                    elif (self.data["name"][ix:(ix+H_length)] == 'HD1').any():
                        self.data.loc[ix:(ix+H_length-1), "resname"] = CHID

        # Move through each line in the pdb.data file and find the corresponding charge / vdw radius as supplied by the forcefield
        for i, resnames in enumerate(self.data["resname"]):
            values_res = pqr_data["resname"] == resnames
            values_name = pqr_data["name"] == self.data["name"][i]
            values = np.logical_and(values_res, values_name)
            value_loc = pqr_data[values]

            if len(value_loc) == 0:
                print(value_loc, resnames, self.data["name"][i], self.data["resname"][i], self.data["resid"].iloc[i], self.data["index"].iloc[i])
                raise Exception("ERROR: The atom names in your PDB file do not match the PQR file")
            else:
                charges.append(float(value_loc.iloc[0]["charge"]))
                radius.append(float(value_loc.iloc[0]["radius"]))
                atomtypes.append(value_loc.iloc[0]["atomtype"])

        # Drop the beta factor / occupancy data to be replaced with charge / vdw radius numbers
        pqr = self.data.drop(['atomtype', 'radius', 'charge'], axis=1) #  remove obselete data
        pqr['atomtype'] = atomtypes  # Replace with Amber derived data for each atom
        pqr['radius'] = radius
        pqr['charge'] = charges

        print("Conversion Complete")

        return pqr

[docs]    def write_pqr(self, outname, conformations=[], index=[]):
        '''
        overload superclass method for writing (multi)pqr.

        :param outname: name of pqr file to be generated.
        :param index: indices of atoms to write to file. If empty, all atoms are returned. Index values obtaineable with a call like: index=molecule.atomselect("A", [1, 2, 3], "CA", True)[1]
        :param conformations: list of conformation indices to write to file. By default, a multipdb with all conformations will be produced.
        '''

        # store current frame, so it will be reestablished after file output is
        # complete
        currentbkp = self.current

        # if a subset of all available frames is requested to be written,
        # select them first
        if len(conformations) == 0:
            frames = range(0, len(self.coordinates), 1)
        else:
            if np.max(conformations) < len(self.coordinates):
                frames = conformations
            else:
                raise Exception("ERROR: requested coordinate index %s, but only %s are available" %(np.max(conformations), len(self.coordinates)))

        # Get our PQR database style
        pqr = self.pdb2pqr()

        f_out = open(outname, "w")

        for f in frames:
            # get all informations from PDB (for current conformation) in a list
            self.set_current(f)
            d = self.get_pdb_data(index)

            # Get our


            # Build our hexidecimal array if num. of atoms > 99999
            idx_val = np.arange(1, len(d) + 1, 1)
            if len(idx_val) > 99999:
                vhex = np.vectorize(hex)
                idx_val = vhex(idx_val)   # convert index values to hexidecimal
                idx_val = [num[2:] for num in idx_val]  # remove 0x at start of hexidecimal number

            for i in range(0, len(d), 1):
                # create and write PDB line
                if d[i][2][0].isdigit():
                    L = '%-6s%5s %-5s%-4s%1s%4s    %8.3f%8.3f%8.3f%7.4f%7.4f        %2s\n' % (d[i][0], idx_val[i], d[i][2], d[i][3], d[i][4], d[i][5], float(d[i][6]), float(d[i][7]), float(d[i][8]), float(pqr.iloc[i]["charge"]), float(pqr.iloc[i]["radius"]), d[i][11])
                else:
                    L = '%-6s%5s  %-4s%-4s%1s%4s    %8.3f%8.3f%8.3f%7.4f%7.4f        %2s\n' % (d[i][0], idx_val[i], d[i][2], d[i][3], d[i][4], d[i][5], float(d[i][6]), float(d[i][7]), float(d[i][8]), float(pqr.iloc[i]["charge"]), float(pqr.iloc[i]["radius"]), d[i][11])
                f_out.write(L)

            f_out.write("END\n")

        f_out.close()

        self.set_current(currentbkp)

        return

[docs]    def get_dipole_map(self, orig, pqr, time_start = 0, time_end = 2,resolution = 1., vox_in_window = 3., write_dipole_map = True, fname = "dipole_map.tcl"):
        '''
        Method for generating dipole maps to be used for electron density map generation. Also prints a dipole map as a result (and if desired). It calls a cython code in lib.

        :param orig: Origin points for voxel grid
        :param pqr: PQR file for self. Can be generated by calling pdb2pqr above
        :param time_start: First frame to parse in multipdb
        :param time_end: Last frame to parse in multipdb
        :param resolution: Desired resolution of voxel
        :param vox_in_window: Amount of surrounding space to contribute to local dipole. vox_in_window * resolution gives window size (in Ang.)
        :param write_dipole_map: Write a dipole map in TCL format to be read in via VMD.
        :param fname: Name of desired dipole map to be written
        '''

        charges = pqr["charge"].values[:]

        crd = self.coordinates[time_start:time_end] # cut out coordinates we're interested in

        time_end -= time_start # shift to compensate for cutting the coordinates earlier
        time_start = 0

        dipole_map = e_density.c_get_dipole_map(crd = crd, orig = orig, charges = charges, time_start = time_start, time_end = time_end,resolution = resolution, vox_in_window = vox_in_window, write_dipole_map = write_dipole_map, fname = fname)

        return dipole_map

[docs]    def get_dipole_density(self, dipole_map, orig, min_val, V, outname, vox_in_window = 3., eqn = 'gauss', T = 310.15, P = 101. * 10**3, epsilonE = 54., resolution = 1.):
        '''
        Method to generate an electron density map based on a voxel grid of dipole vectors

        :param dipole_map: The dipole map input. Can be generated with get_dipole_map above
        :param orig: Origin points for voxel grid
        :param min_val: Minimum coordinates of edge points for the voxel grid (i.e. a single x, y, z point defining the start point of the grid to match with the multipdb)
        :param V: Volume of a voxel (can be found by resolution**3, but left blank in case later version institute a sphere)
        :param outname: Name of electron density map file produced
        :param vox_in_window: Amount of surrounding space to contribute to local dipole. vox_in_window * resolution gives window size (in Ang.)
        :param eqn: Equation mode to model the electron density
        :param T: Temperature of MD
        :param P: Pressure of MD
        :param epsilonE: Continuum dielectric surrounding the protein
        :param resolution: Desired resolution of voxel
        '''

        dummy = e_density.c_get_dipole_density(dipole_map = dipole_map, orig = orig, min_val = min_val, V = V, outname = outname, vox_in_window = vox_in_window, eqn = eqn, T = T, P = P, epsilonE = epsilonE, resolution = resolution)