Source code for biobox.measures.calculators

# Copyright (c) 2014-2017 Matteo Degiacomi
#
# BiobOx is free software ;
# you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation ;
# either version 2 of the License, or (at your option) any later version.
# BiobOx is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY ;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with BiobOx ;
# if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
# Author : Matteo Degiacomi, matteothomas.degiacomi@gmail.com

'''
Functions to measure characteristics of any BiobOx object
'''

import subprocess
import os
import sys
import random
import string

import numpy as np
from ctypes import cdll, c_int, c_float, byref

import biobox.lib.fastmath as FM  # cython routines


def sasa_c(M, targets=[], probe=1.4, n_sphere_point=960, threshold=0.05):
    '''
    compute the accessible surface area using the Shrake-Rupley algorithm ("rolling ball method")

    :param M: any biobox object
    :param targets: indices to be used for surface estimation. By default, all indices are kept into account.
    :param probe: radius of the "rolling ball"
    :param n_sphere_point: number of mesh points per atom
    :param threshold: fraction of points in sphere, above which structure points are considered as exposed
    :returns: accessible surface area in A^2
    :returns: mesh numpy array containing the found points forming the accessible surface mesh
    :returns: IDs of surface points
    '''

    #make sure that everything is collected as a Structure object, and radii are available
    this_inst = type(M).__name__
    if this_inst == "Multimer":
        M = M.make_molecule()

    elif this_inst in ["Assembly", "Polyhedra"]:
        M = M.make_structure()

    # getting radii associated to every atom
    radii = M.data['radius'].values

    if threshold < 0.0 or threshold > 1.0:
        raise Exception("ERROR: threshold should be a floating point between 0 and 1!")

    if len(targets) == 0:
        return FM.c_get_surface(M.points, radii, probe, n_sphere_point, threshold)
    else:
        return FM.c_get_surface(M.points[targets], radii, probe, n_sphere_point, threshold)

[docs]def sasa(M, targets=[], probe=1.4, n_sphere_point=960, threshold=0.05): ''' compute the accessible surface area using the Shrake-Rupley algorithm ("rolling ball method") :param M: any biobox object :param targets: indices to be used for surface estimation. By default, all indices are kept into account. :param probe: radius of the "rolling ball" :param n_sphere_point: number of mesh points per atom :param threshold: fraction of points in sphere, above which structure points are considered as exposed :returns: accessible surface area in A^2 :returns: mesh numpy array containing the found points forming the accessible surface mesh :returns: IDs of surface points ''' import biobox.measures.interaction as I #make sure that everything is collected as a Structure object, and radii are available this_inst = type(M).__name__ if this_inst == "Multimer": M = M.make_molecule() elif this_inst in ["Assembly", "Polyhedra"]: M = M.make_structure() if len(targets) == 0: targets = range(0, len(M.points), 1) # getting radii associated to every atom radii = M.data['radius'].values if threshold < 0.0 or threshold > 1.0: raise Exception("ERROR: threshold should be a floating point between 0 and 1!") # create unit sphere points cloud (using golden spiral) pts = [] inc = np.pi * (3 - np.sqrt(5)) offset = 2 / float(n_sphere_point) for k in range(int(n_sphere_point)): y = k * offset - 1 + (offset / 2) r = np.sqrt(1 - y * y) phi = k * inc pts.append([np.cos(phi) * r, y, np.sin(phi) * r]) sphere_points = np.array(pts) const = 4.0 * np.pi / len(sphere_points) contact_map = I.distance_matrix(M.points, M.points) asa = 0.0 surface_atoms = [] mesh_pts = [] # compute accessible surface for every atom for i in targets: # place mesh points around atom of choice mesh = sphere_points * (radii[i] + probe) + M.points[i] # compute distance matrix between mesh points and neighboring atoms test = np.where(contact_map[i, :] < radii.max() + probe * 2)[0] neigh = M.points[test] dist = I.distance_matrix(neigh, mesh) - radii[test][:, np.newaxis] # lines=atoms, columns=mesh points. Count columns containing values greater than probe*2 # i.e. allowing sufficient space for a probe to fit completely cnt = 0 for m in range(dist.shape[1]): if not np.any(dist[:, m] < probe): cnt += 1 mesh_pts.append(mesh[m]) # calculate asa for current atom, if a sufficient amount of mesh # points is exposed (NOTE: to verify) if cnt > n_sphere_point * threshold: surface_atoms.append(i) asa += const * cnt * (radii[i] + probe)**2 return asa, np.array(mesh_pts), np.array(surface_atoms)
[docs]def rgyr(M): ''' compute radius of gyration. :param M: any biobox object :returns: radius of gyration ''' #make sure that everything is collected as a Structure object, and radii are available this_inst = type(M).__name__ if this_inst == "Multimer": M = M.make_molecule() elif this_inst in ["Assembly", "Polyhedra"]: M = M.make_structure() d_square = np.sum((M.points - M.get_center())**2, axis=1) return np.sqrt(np.sum(d_square) / d_square.shape[0])
[docs]def saxs(M, crysol_path='', crysol_options="-lm 20 -ns 500", pdbname=""): ''' compute SAXS curve using crysol (from ATSAS suite) :param M: any biobox object :param crysol_path: path to crysol executable. If not provided, the environment variable ATSASPATH is sought instead. This allows redirecting to a specific ATSAS bin folder. :param crysol_options: flags to be passes to impact executable :param pdbname: if a file has been already written, crysol can be asked to analyze it :returns: SAXS curve (nx2 numpy array) ''' if crysol_path == '': try: crysol_path = os.environ['ATSASPATH'] except KeyError: raise Exception("ATSASPATH environment variable undefined") if pdbname == "": # write temporary pdb file of current structure on which to launch # SAXS calculation pdbname = "%s.pdb" % random_string(32) while os.path.exists(pdbname): pdbname = "%s.pdb" % random_string(32) M.write_pdb(pdbname, [M.current]) else: # if file was already provided, verify its existence first! if os.path.isfile(pdbname) != 1: raise Exception("ERROR: %s not found!" % pdbname) # get basename for output outfile = os.path.basename(pdbname).split('.')[0] call_line = os.path.join(crysol_path, "crysol") #try: subprocess.check_call('%s %s %s > /dev/null' %(call_line, crysol_options, pdbname), shell=True) #except Exception as e: # raise Exception("ERROR: crysol calculation failed!") data = np.loadtxt("%s00.int" % outfile, skiprows=1) try: os.remove("%s00.alm" % outfile) os.remove("%s00.int" % outfile) os.remove("%s00.log" % outfile) os.remove("%s.pdb" % outfile) except Exception as ex: pass return data[:, 0:2]
[docs]def ccs(M, use_lib=True, impact_path='', impact_options="-Octree -nRuns 32 -cMode sem -convergence 0.01", pdbname="", tjm_scale=False, proberad=1.0): ''' compute CCS calling either impact. :param M: any biobox object :param use_lib: if true, impact library will be used, if false a system call to impact executable will be performed instead :param impact_path: by default, the environment variable IMPACTPATH is sought. This allows redirecting to a specific impact root folder. :param impact_options: flags to be passes to impact executable :param pdbname: if a file has been already written, impact can be asked to analyze it :param tjm_scale: if True, CCS value calculated with PA method is scaled to better match trajectory method. :param proberad: radius of probe. Do find out if your impact library already adds this value by default or not (old ones do)! :returns: CCS value in A^2. Error return: -1 = input filename not found, -2 = unknown code for CCS calculation\n -3 CCS calculator failed, -4 = parsing of CCS calculation results failed ''' #make sure that everything is collected as a Structure object, and radii are available this_inst = type(M).__name__ if this_inst == "Multimer": M = M.make_molecule() M.assign_atomtype() M.get_atoms_ccs() elif this_inst in ["Assembly", "Polyhedra"]: M = M.make_structure() elif this_inst == "Molecule" and "atoms_ccs" not in M.data.columns: M.assign_atomtype() M.get_atoms_ccs() if use_lib and pdbname == "": #if True: from biobox.measures.calculators import CCS try: if impact_path == '': try: impact_path = os.path.join(os.environ['IMPACTPATH'], "lib") except KeyError: raise Exception("IMPACTPATH environment variable undefined") if "win" in sys.platform: libfile = os.path.join(impact_path, "libimpact.dll") else: libfile = os.path.join(impact_path, "libimpact.so") C = CCS(libfile=libfile) except Exception as e: raise Exception(str(e)) if "atom_ccs" in M.data.columns: radii = M.data['atom_ccs'].values + proberad else: radii = M.data['radius'].values + proberad if tjm_scale: return C.get_ccs(M.points, radii)[0] else: return C.get_ccs(M.points, radii, a=1.0, b=1.0)[0] # generate random file name to capture CCS software terminal output tmp_outfile = random_string(32) while os.path.exists(tmp_outfile): tmp_outfile = "%s.pdb" % random_string(32) if pdbname == "": # write temporary pdb file of current structure on which to launch # CCS calculation filename = "%s.pdb" % random_string(32) while os.path.exists(filename): filename = "%s.pdb" % random_string(32) M.write_pdb(filename, [M.current]) else: filename = pdbname # if file was already provided, verify its existence first! if os.path.isfile(pdbname) != 1: raise Exception("ERROR: %s not found!" % pdbname) try: if impact_path == '': try: impact_path = os.path.join(os.environ['IMPACTPATH'], "bin") except KeyError: raise Exception("IMPACTPATH environment variable undefined") # if using impact, create parameterization file containing a # description for Z atoms (pseudoatom name used in this code) f = open('params', 'w') f.write('[ defaults ]\n H 2.2\n C 2.91\n N 2.91\n O 2.91\n P 2.91\n S 2.91\n') #@fix this for the general case of multiple atoms with different radius f.write(' Z %s' % (np.unique(M.data['radius'])[0] + proberad)) impact_options += " -param params" f.close() if "win" in sys.platform: impact_name = os.path.join(impact_path, "impact.exe") else: impact_name = os.path.join(impact_path, "impact") subprocess.check_call('%s %s -rProbe 0 %s > %s' % (impact_name, impact_options, filename, tmp_outfile), shell=True) except Exception as e: raise Exception(str(e)) #parse output generated by IMPACT and written into a file try: f = open(tmp_outfile, 'r') for line in f: w = line.split() if len(w) > 0 and w[0] == "CCS": if tjm_scale: v = float(w[-2]) else: v = float(w[3]) break f.close() # clean temp files if needed #(if a filename is provided, don't delete it!) os.remove(tmp_outfile) if pdbname == "": os.remove(filename) return v except: # clean temp files os.remove(tmp_outfile) if pdbname == "": os.remove(filename) return -4
class CCS(object): ''' CCS calculator (wrapper for C library) ''' def __init__(self, libfile): ''' initialize by loading IMPACT library :param libfile: library path ''' try: self.libs = cdll.LoadLibrary(libfile) self.libs.pa2tjm.restype = c_float except: raise Exception("loading library %s failed!" % libfile) # declare output variables self.ccs = c_float() self.sem = c_float() self.niter = c_int() def get_ccs(self, points, radii, a=0.842611, b=1.051280): ''' compute CCS using the PA method as implemented in IMPACT library. :param points: xyz coordinates of atoms, Angstrom (nx3 numpy array) :param radii: van der Waals radii associated to every point (numpy array with n elements) :param a: power-law factor for calibration with TJM :param b: power-law exponent for calibration with TJM :returns: TJM CCS :returns: standard error :returns: number of iterations ''' # create ctypes for intput data unravel = np.ravel(points) cpoints = (c_float * len(unravel))(*unravel) cradii = (c_float * len(radii))(*radii) natoms = (c_int)(len(radii)) # call library, and rescale obtained value using exponential law self.libs.ccs_from_atoms_defaults(natoms, byref(cpoints), byref(cradii), byref(self.ccs), byref(self.sem), byref(self.niter)) ccs_tjm = self.libs.pa2tjm(c_float(a), c_float(b), self.ccs) return ccs_tjm, self.sem.value, self.niter.value def random_string(length=32): ''' generate a random string of arbitrary characters. Useful to generate temporary file names. :param length: length of random string ''' return ''.join([random.choice(string.ascii_letters) for n in range(length)])