# Copyright (c) 2014-2017 Matteo Degiacomi
#
# BiobOx is free software ;
# you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation ;
# either version 2 of the License, or (at your option) any later version.
# BiobOx is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY ;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with BiobOx ;
# if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
# Author : Matteo Degiacomi, matteothomas.degiacomi@gmail.com
from copy import deepcopy
import numpy as np
import scipy.signal
import pandas as pd
[docs]class Structure(object):
'''
A Structure consists of an ensemble of points in 3D space, and metadata associated to each of them.
'''
def __init__(self, p=np.array([[], []]), r=1.0):
'''
Point coordinates and properties data structures are first initialized.
properties is a dictionary initially containing an entry for 'center' (center of geometry) and 'radius' (average radius of points).
:param p: coordinates data structure as a mxnx3 numpy array (alternative conformation x atom x 3D coordinate). nx3 numpy array can be supplied, in case a single conformation is present.
:param r: average radius of every point in dataset (float), or radius of every point (numpy array)
'''
if p.ndim == 3:
self.coordinates = p
'''numpy array containing an ensemble of alternative coordinates in 3D space'''
elif p.ndim == 2:
self.coordinates = np.array([p])
else:
raise Exception("ERROR: expected numpy array with 2 or three dimensions, but %s dimensions were found" %p.ndim)
self.current = 0
'''index of currently selected conformation'''
self.points = self.coordinates[self.current]
'''pointer to currently selected conformation'''
self.properties = {}
'''collection of properties. By default, 'center' (geometric center of the Structure) is defined'''
self.properties['center'] = self.get_center()
idx = np.arange(len(self.points))
if isinstance(r, list) or type(r).__module__ == 'numpy':
if len(r) > 0:
self.data = pd.DataFrame(r, index=idx, columns=["radius"])
else:
rad = r*np.ones(len(self.points))
self.data = pd.DataFrame(rad, index=idx, columns=["radius"])
''' metadata about each atom (pandas Dataframe)'''
def __len__(self, dim="atoms"):
if dim == "atoms":
return len(self.points)
def __getitem__(self, key):
return self.coordinates[key]
[docs] def set_current(self, pos):
'''
select current frame (place frame pointer at desired position)
:param pos: number of alternative conformation (starting from 0)
'''
if pos < self.coordinates.shape[0]:
self.current = pos
self.points = self.coordinates[self.current]
self.properties['center'] = self.get_center()
else:
raise Exception("ERROR: position %s requested, but only %s conformations available" %(pos, self.coordinates.shape[0]))
[docs] def get_xyz(self, indices=[]):
'''
get points coordinates.
:param indices: indices of points to select. If none is provided, all points coordinates are returned.
:returns: coordinates of all points indexed by the provided indices list, or all of them if no list is provided.
'''
if indices == []:
return self.points
else:
return self.points[indices]
[docs] def set_xyz(self, coords):
'''
set point coordinates.
:param coords: array of 3D points
'''
self.coordinates[self.current] = deepcopy(coords)
self.points = self.coordinates[self.current]
[docs] def add_xyz(self, coords):
'''
add a new alternative conformation to the database
:param coords: array of 3D points, or array of arrays of 3D points (in case multiple alternative coordinates must be added at the same time)
'''
# self.coordinates numpy array containing an ensemble of alternative
# coordinates in 3D space
if self.coordinates.size == 0 and coords.ndim == 3:
self.coordinates = deepcopy(coords)
self.set_current(0)
elif self.coordinates.size == 0 and coords.ndim == 2:
self.coordinates = deepcopy(np.array([coords]))
self.set_current(0)
elif self.coordinates.size > 0 and coords.ndim == 3:
self.coordinates = np.concatenate((self.coordinates, coords))
# set new frame to the first of the newly inserted ones
self.set_current(self.current + 1)
elif self.coordinates.size > 0 and coords.ndim == 2:
self.coordinates = np.concatenate((self.coordinates, np.array([coords])))
# set new frame to the first of the newly inserted ones
self.set_current(self.current + 1)
else:
raise Exception("ERROR: expected numpy array with 2 or three dimensions, but %s dimensions were found" %np.ndim)
[docs] def delete_xyz(self, index):
'''
remove one conformation from the conformations database.
the new current conformation will be the previous one.
:param index: alternative coordinates set to remove
'''
self.coordinates = np.delete(self.coordinates, index, axis=0)
if index > 0:
self.set_current(index - 1)
else:
self.set_current(0)
[docs] def clear(self):
'''
remove all the coordinates and empty metadata
'''
self.coordinates = np.array([[[], []], [[], []]])
self.points = self.coordinates[0]
self.data = pd.DataFrame(index=[], columns=[])
[docs] def translate(self, x, y, z):
'''
translate the whole structure by a given amount.
:param x: translation around x axis
:param y: translation around y axis
:param z: translation around z axis
'''
# if center has not been defined yet (may happen when using
# subclasses), compute it
if 'center' not in self.properties:
self.get_center()
# translate all points
self.properties['center'][0] += x
self.properties['center'][1] += y
self.properties['center'][2] += z
self.points[:, 0] += x
self.points[:, 1] += y
self.points[:, 2] += z
[docs] def rotate(self, x, y, z):
'''
rotate the structure provided angles of rotation around x, y and z axes (in degrees).
This is a rotation with respect of the origin.
Make sure that the center of your structure is at the origin, if you don't want to get a translation as well!
rotating an object being not centered requires to first translate the ellipsoid at the origin, rotate it, and bringing it back.
:param x: rotation around x axis
:param y: rotation around y axis
:param z: rotation around z axis
'''
alpha = np.radians(x)
beta = np.radians(y)
gamma = np.radians(z)
Rx = np.array([[1, 0, 0],
[0, np.cos(alpha), - np.sin(alpha)],
[0, np.sin(alpha), np.cos(alpha)]])
Ry = np.array([[np.cos(beta), 0, np.sin(beta)],
[0, 1, 0],
[-np.sin(beta), 0, np.cos(beta)]])
Rz = np.array([[np.cos(gamma), -np.sin(gamma), 0],
[np.sin(gamma), np.cos(gamma), 0],
[0, 0, 1]])
rotation = np.dot(Rx, np.dot(Ry, Rz))
# multiply rotation matrix with each point of the ellipsoid
self.apply_transformation(rotation.T)
self.get_center()
[docs] def get_center(self):
'''
compute protein center of geometry (also assigns it to self.properties["center"] key).
'''
if len(self.points) > 0:
self.properties['center'] = np.mean(self.points, axis=0)
else:
self.properties['center'] = np.array([0.0, 0.0, 0.0])
return self.properties['center']
[docs] def center_to_origin(self):
'''
move the structure so that its center of geometry is at the origin.
'''
c = self.get_center()
self.translate(-c[0], -c[1], -c[2])
[docs] def get_size(self):
'''
compute the dimensions of the object along x, y and z.
.. note: points radii are not kept into account.
'''
x = np.max(self.points[:, 0]) - np.min(self.points[:, 0])
# +self.properties['radius']*2
y = np.max(self.points[:, 1]) - np.min(self.points[:, 1])
# +self.properties['radius']*2
z = np.max(self.points[:, 2]) - np.min(self.points[:, 2])
return np.array([x, y, z])
[docs] def rotation_matrix(self, axis, theta):
'''
compute matrix needed to rotate the system around an arbitrary axis (using Euler-Rodrigues formula).
:param axis: 3d vector (numpy array), representing the axis around which to rotate
:param theta: desired rotation angle
:returns: 3x3 rotation matrix
'''
# if rotation angle is equal to zero, no rotation is needed
if theta == 0:
return np.identity(3)
# method taken from
# http://stackoverflow.com/questions/6802577/python-rotation-of-3d-vector
axis = axis / np.sqrt(np.dot(axis, axis))
a = np.cos(theta / 2)
b, c, d = -axis * np.sin(theta / 2)
return np.array([[a * a + b * b - c * c - d * d, 2 * (b * c - a * d), 2 * (b * d + a * c)],
[2 * (b * c + a * d), a * a + c * c - b * b - d * d, 2 * (c * d - a * b)],
[2 * (b * d - a * c), 2 * (c * d + a * b), a * a + d * d - b * b - c * c]])
[docs] def get_principal_axes(self):
'''
compute Structure's principal axes.
:returns: 3x3 numpy array, containing the 3 principal axes ranked from smallest to biggest.
'''
# method taken from chempy source code, geometry.py, method
# getMomentOfInertiaTensor()
# compute moment of inertia tensor
I0 = np.zeros((3, 3), np.float64)
for i in range(0, len(self.points), 1):
mass = 1 # self.mass[atom] / constants.Na
I0[0, 0] += mass * (self.points[i, 1] * self.points[i, 1] + self.points[i, 2] * self.points[i, 2])
I0[1, 1] += mass * (self.points[i, 0] * self.points[i, 0] + self.points[i, 2] * self.points[i, 2])
I0[2, 2] += mass * (self.points[i, 0] * self.points[i, 0] + self.points[i, 1] * self.points[i, 1])
I0[0, 1] -= mass * self.points[i, 0] * self.points[i, 1]
I0[0, 2] -= mass * self.points[i, 0] * self.points[i, 2]
I0[1, 2] -= mass * self.points[i, 1] * self.points[i, 2]
I0[1, 0] = I0[0, 1]
I0[2, 0] = I0[0, 2]
I0[2, 1] = I0[1, 2]
# Calculate and return the principal moments of inertia and corresponding
# principal axes for the current geometry.
e_values, e_vectors = np.linalg.eig(I0)
indices = np.argsort(e_values)
e_values = e_values[indices]
e_vectors = e_vectors.T[indices]
return e_vectors
[docs] def align_axes(self):
'''
Align structure on its principal axes.
First principal axis aligned along x, second along y and third along z.
'''
# this method is inspired from the procedure followed in in VMD's orient package:
# set I [draw principalaxes $sel] <--- show/calc the principal axes
# set A [orient $sel [lindex $I 2] {0 0 1}] <--- rotate axis 2 to match Z
# $sel move $A
# set I [draw principalaxes $sel] <--- recalc principal axes to check
# set A [orient $sel [lindex $I 1] {0 1 0}] <--- rotate axis 1 to match Y
# $sel move $A
# set I [draw principalaxes $sel] <--- recalc principal axes
# to check
# center the Structure
self.center_to_origin()
# get principal axes (ranked from smallest to biggest)
axes = self.get_principal_axes()
# align smallest principal axis against z axis
rotvec = np.cross(axes[0], np.array([1, 0, 0])) # rotation axis
sine = np.linalg.norm(rotvec)
cosine = np.dot(axes[0], np.array([1, 0, 0]))
angle = np.arctan2(sine, cosine) # angle to rotate around axis
rotmatrix = self.rotation_matrix(rotvec, angle)
self.apply_transformation(rotmatrix)
# compute new principal axes (after previous rotation)
axes = self.get_principal_axes()
# align second principal axis against y axis
rotvec = np.cross(axes[1], np.array([0, 1, 0])) # rotation axis
sine = np.linalg.norm(rotvec)
cosine = np.dot(axes[1], np.array([0, 1, 0]))
angle = np.arctan2(sine, cosine) # angle to rotate around axis
rotmatrix = self.rotation_matrix(rotvec, angle)
self.apply_transformation(rotmatrix)
[docs] def write_pdb(self, filename, index=[]):
'''
write a multi PDB file where every point is a sphere. VdW radius is written into beta factor.
:param filename: name of file to output
:param index: list of frame indices to write to file. By default, a multipdb with all frames will be produced.
'''
# if a subset of all available frames is requested to be written,
# select them first
if len(index) == 0:
frames = range(0, len(self.coordinates), 1)
else:
if np.max(index) < len(self.coordinates):
frames = index
else:
raise Exception("ERROR: requested coordinate index %s, but only %s are available" %(np.max(index), len(self.coordinates)))
fout = open(filename, "w")
for f in frames:
# Build our hexidecimal array if num. of atoms > 99999
idx_val = np.arange(1, self.coordinates.shape[1] + 1, 1)
if len(idx_val) > 99999:
vhex = np.vectorize(hex)
idx_val = vhex(idx_val) # convert index values to hexidecimal
idx_val = [num[2:] for num in idx_val] # remove 0x at start of hexidecimal number
for i in range(0, len(self.coordinates[0]), 1):
#if i > 99999:
# nb = hex(i).split('x')[1]
#else:
# nb = str(i)
l = (idx_val[i], "SPH", "SPH", "A", np.mod(i, 9999),
self.coordinates[f, i, 0],
self.coordinates[f, i, 1],
self.coordinates[f, i, 2],
self.data['radius'].values[i],
1.0, "Z")
L = 'ATOM %5s %-4s%-4s%1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f %2s\n' % l
fout.write(L)
fout.write("END\n")
fout.close()
[docs] def convex_hull(self):
'''
compute Structure's convex Hull using QuickHull algorithm.
.. note:: Qhull available only on scipy >=0.12
:returns: :func:`Structure <structure.Structure>` object, containing the coordinates of vertices composing the convex hull
'''
try:
from scipy.spatial import ConvexHull
verts = ConvexHull(self.points)
return Structure(verts)
except Exception as e:
raise Exception("Quick Hull algorithm available in scipy >=0.12!")
[docs] def get_density(self, step=1.0, sigma=1.0, kernel_half_width=5, buff=3):
'''
generate density map from points
:param step: size of cubic voxels, in Angstrom
:param sigma: gaussian kernel sigma
:param kernel_half_width: kernel half width, in voxels
:param buff: padding to add at points cloud boundaries
:returns: :func:`Density <density.Density>` object, containing a simulated density map
'''
pts = self.points
# rectangular box boundaries
bnds = np.array([[np.min(pts[:, 0]) - buff, np.max(pts[:, 0]) + buff],
[np.min(pts[:, 1]) - buff, np.max(pts[:, 1]) + buff],
[np.min(pts[:, 2]) - buff, np.max(pts[:, 2]) + buff]])
xax = np.arange(bnds[0, 0], bnds[0, 1] + step, step)
yax = np.arange(bnds[1, 0], bnds[1, 1] + step, step)
zax = np.arange(bnds[2, 0], bnds[2, 1] + step, step)
# create empty box
d = np.zeros((len(xax), len(yax), len(zax)))
# place Kronecker deltas in mesh grid
for p in pts:
xpos = np.argmin(np.abs(xax - p[0]))
ypos = np.argmin(np.abs(yax - p[1]))
zpos = np.argmin(np.abs(zax - p[2]))
d[xpos, ypos, zpos] = 1
# create 3d gaussian kernel
window = kernel_half_width * 2 + 1
shape = (window, window, window)
m, n, k = [(ss - 1.) / 2. for ss in shape]
x_ = np.arange(-m, m + 1, 1).astype(int)
y_ = np.arange(-n, n + 1, 1).astype(int)
z_ = np.arange(-k, k + 1, 1).astype(int)
x, y, z = np.meshgrid(x_, y_, z_)
h = np.exp(-(x * x + y * y + z * z) / (2. * sigma * sigma))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
sumh = h.sum()
if sumh != 0:
h /= sumh
# convolve point mesh with 3d gaussian kernel
b = scipy.signal.fftconvolve(d, h, mode='same')
b /= np.max(b)
# prepare density data structure
from biobox.classes.density import Density
D = Density()
D.properties['density'] = b
D.properties['size'] = np.array(b.shape)
D.properties['origin'] = np.mean(self.points, axis=0) - step * np.array(b.shape) / 2.0
D.properties['delta'] = np.identity(3) * step
D.properties['format'] = 'dx'
D.properties['filename'] = ''
D.properties["sigma"] = np.std(b)
return D
[docs] def rmsf(self, indices=-1, step=1):
'''
compute Root Mean Square Fluctuation (RMSF) of selected atoms.
:param indices: indices of points for which RMSF will be calculated. If no indices list is provided, RMSF of all points will be calculated.
:param step: timestep between two conformations (useful when using conformations extracted from molecular dynamics)
:returns: numpy aray with RMSF of all provided indices, in the same order
'''
if self.coordinates.shape[0] < 2:
raise Exception("ERROR: to compute RMSF several conformations must be available!")
# if no index is provided, compute RMSF of all points
if indices == -1:
indices = np.linspace(0, len(self.coordinates[0, :, 0]) - 1, len(self.coordinates[0, :, 0])).astype(int)
means = np.mean(self.coordinates[:, indices], axis=0)
# cumulate all squared distances with respect of mean
d = []
for i in range(0, self.coordinates.shape[0], 1):
d.append(np.sum((self.coordinates[i, indices] - means)**2, axis=1))
# compute square root of sum of mean squared distances
dist = np.array(d)
return np.sqrt(np.sum(dist, axis=0) / (float(self.coordinates.shape[0]) * step))
[docs] def pca(self, components, indices=[]):
'''
compute Principal Components Analysis (PCA) on specific points within all the alternative coordinates.
:param components: eigenspace dimensions
:param indices: points indices to be considered for PCA
:returns: numpy array of projection of each conformation into the n-dimensional eigenspace
:returns: sklearn PCA object
'''
from sklearn.decomposition import PCA
# define conformational space (flatten coordinates of desired atoms
if len(indices) != 0:
X = self.coordinates[:, indices].reshape(
(len(self.coordinates), len(indices) * 3))
else:
X = self.coordinates.reshape(
(self.coordinates.shape[0], self.coordinates.shape[1]*3))
# calculate system PCA and project conformations into the eigenspace
pca = PCA(n_components=components)
pca.fit(X)
Xproj = pca.transform(X)
return Xproj, pca
[docs] def rmsd_one_vs_all(self, ref_index, points_index=[], align=False):
'''
Calculate the RMSD between all structures with respect of a reference structure.
uses Kabsch alignement algorithm.
:param ref_index: index of reference structure in conformations database
:param points_index: if set, only specific points will be considered for comparison
:param align: if set to true, all conformations will be aligned to reference (note: cannot be undone!)
:returns: RMSD of all structures with respect of reference structure (in a numpy array)
'''
# see: http://www.pymolwiki.org/index.php/Kabsch#The_Code
bkpcurrent = self.current
if ref_index >= len(self.coordinates):
raise Exception("ERROR: index %s requested, but only %s exist in database" %(len(self.coordinates)))
# define reference frame, and center it
if len(points_index) == 0:
m1 = deepcopy(self.coordinates[ref_index])
elif isinstance(points_index, list) or type(points_index).__module__ == 'numpy':
m1 = deepcopy(self.coordinates[ref_index, points_index])
else:
raise Exception("ERROR: please, provide me with a list of indices to compute RMSD (or no index at all)")
L = len(m1)
COM1 = np.sum(m1, axis=0) / float(L)
m1 -= COM1
m1sum = np.sum(np.sum(m1 * m1, axis=0), axis=0)
RMSD = []
for i in range(0, len(self.coordinates), 1):
if i == ref_index:
RMSD.append(0.0)
else:
# define current frame, and center it
if len(points_index) == 0:
m2 = deepcopy(self.coordinates[i])
elif isinstance(points_index, list) or type(points_index).__module__ == 'numpy':
m2 = deepcopy(self.coordinates[i, points_index])
COM2 = np.sum(m2, axis=0) / float(L)
m2 -= COM2
E0 = m1sum + np.sum(np.sum(m2 * m2, axis=0), axis=0)
# This beautiful step provides the answer. V and Wt are the orthonormal
# bases that when multiplied by each other give us the rotation matrix, U.
# S, (Sigma, from SVD) provides us with the error! Isn't SVD
# great!
V, S, Wt = np.linalg.svd(np.dot(np.transpose(m2), m1))
# if alignement is required, move pointer to current frame, and
# apply rotation matrix
if align:
self.set_current(i)
rotation = np.dot(V, Wt)
self.apply_transformation(rotation)
reflect = float(
str(float(np.linalg.det(V) * np.linalg.det(Wt))))
if reflect == -1.0:
S[-1] = -S[-1]
V[:, -1] = -V[:, -1]
rmsdval = E0 - (2.0 * sum(S))
rmsdval = np.sqrt(abs(rmsdval / L))
RMSD.append(rmsdval)
self.set_current(bkpcurrent)
return np.array(RMSD)
[docs] def rmsd(self, i, j, points_index=[], full=False):
'''
Calculate the RMSD between two structures in alternative coordinates ensemble.
uses Kabsch alignement algorithm.
:param i: index of the first structure
:param j: index of the second structure
:param points_index: if set, only specific points will be considered for comparison
:param full: if True, RMSD an rotation matrx are returned, RMSD only otherwise
:returns: RMSD of the two structures. If full is True, the rotation matrix is also returned
'''
# see: http://www.pymolwiki.org/index.php/Kabsch#The_Code
if i >= len(self.coordinates):
raise Exception("ERROR: index %s requested, but only %s exist in database" %(i, len(self.coordinates)))
if j >= len(self.coordinates):
raise Exception("ERROR: index %s requested, but only %s exist in database" %(j, len(self.coordinates)))
# get first structure and center it
if len(points_index) == 0:
m1 = deepcopy(self.coordinates[i])
elif isinstance(points_index, list) or type(points_index).__module__ == 'numpy':
m1 = deepcopy(self.coordinates[i, points_index])
else:
raise Exception("ERROR: give me a list of indices to compute RMSD, or nothing at all, please!")
# get second structure
if len(points_index) == 0:
m2 = deepcopy(self.coordinates[j])
elif isinstance(points_index, list) or type(points_index).__module__ == 'numpy':
m2 = deepcopy(self.coordinates[j, points_index])
else:
raise Exception("ERROR: give me a list of indices to compute RMSD, or nothing at all, please!")
L = len(m1)
COM1 = np.sum(m1, axis=0) / float(L)
m1 -= COM1
m1sum = np.sum(np.sum(m1 * m1, axis=0), axis=0)
COM2 = np.sum(m2, axis=0) / float(L)
m2 -= COM2
E0 = m1sum + np.sum(np.sum(m2 * m2, axis=0), axis=0)
# This beautiful step provides the answer. V and Wt are the orthonormal
# bases that when multiplied by each other give us the rotation matrix, U.
# S, (Sigma, from SVD) provides us with the error! Isn't SVD great!
V, S, Wt = np.linalg.svd(np.dot(np.transpose(m2), m1))
reflect = float(str(float(np.linalg.det(V) * np.linalg.det(Wt))))
if reflect == -1.0:
S[-1] = -S[-1]
V[:, -1] = -V[:, -1]
rmsdval = E0 - (2.0 * sum(S))
if full:
return np.sqrt(abs(rmsdval / L)), np.matmul(V, Wt)
else:
return np.sqrt(abs(rmsdval / L))
[docs] def rmsd_distance_matrix(self, points_index=[], flat=False):
'''
compute distance matrix between structures (using RMSD as metric).
:param points_index: if set, only specific points will be considered for comparison
:param flat: if True, returns flattened distance matrix
:returns: RMSD distance matrix
'''
if flat:
rmsd = []
else:
rmsd = np.zeros((len(self.coordinates), len(self.coordinates)))
for i in range(0, len(self.coordinates) - 1, 1):
for j in range(i + 1, len(self.coordinates), 1):
r = self.rmsd(i, j, points_index)
if flat:
rmsd.append(r)
else:
rmsd[i, j] = r
rmsd[j, i] = r
if flat:
return np.array(rmsd)
else:
return rmsd