Source code for sgdml.utils.desc

#!/usr/bin/python

# MIT License
#
# Copyright (c) 2018-2022 Stefan Chmiela, Luis Galvez
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np
import scipy as sp
from scipy import spatial

import multiprocessing as mp

Pool = mp.get_context('fork').Pool

from functools import partial
import timeit

try:
    import torch
except ImportError:
    _has_torch = False
else:
    _has_torch = True


[docs]def _pbc_diff(diffs, lat_and_inv, use_torch=False):
    """
    Clamp differences of vectors to super cell.

    Parameters
    ----------
        diffs : :obj:`numpy.ndarray`
            N x 3 matrix of N pairwise differences between vectors `u - v`
        lat_and_inv : tuple of :obj:`numpy.ndarray`
            Tuple of 3 x 3 matrix containing lattice vectors as columns and its inverse.
        use_torch : boolean, optional
            Enable, if the inputs are PyTorch objects.

    Returns
    -------
        :obj:`numpy.ndarray`
            N x 3 matrix clamped differences
    """

    lat, lat_inv = lat_and_inv

    if use_torch and not _has_torch:
        raise ImportError(
            'Optional PyTorch dependency not found! Please run \'pip install sgdml[torch]\' to install it or disable the PyTorch option.'
        )

    if use_torch:
        c = lat_inv.mm(diffs.t())
        diffs -= lat.mm(c.round()).t()
    else:
        c = lat_inv.dot(diffs.T)
        diffs -= lat.dot(np.around(c)).T

    return diffs


[docs]def _pdist(r, lat_and_inv=None):
    """
    Compute pairwise Euclidean distance matrix between all atoms.

    Parameters
    ----------
        r : :obj:`numpy.ndarray`
            Array of size 3N containing the Cartesian coordinates of
            each atom.
        lat_and_inv : tuple of :obj:`numpy.ndarray`, optional
            Tuple of 3x3 matrix containing lattice vectors as columns and its inverse.

    Returns
    -------
        :obj:`numpy.ndarray`
            Array of size N(N-1)/2 containing the upper triangle of the pairwise
            distance matrix between atoms.
    """

    r = r.reshape(-1, 3)
    n_atoms = r.shape[0]

    if lat_and_inv is None:
        pdist = sp.spatial.distance.pdist(r, 'euclidean')
    else:
        pdist = sp.spatial.distance.pdist(
            r, lambda u, v: np.linalg.norm(_pbc_diff(u - v, lat_and_inv))
        )

    tril_idxs = np.tril_indices(n_atoms, k=-1)
    return sp.spatial.distance.squareform(pdist, checks=False)[tril_idxs]


def _squareform(vec_or_mat):

    # vector to matrix representation
    if vec_or_mat.ndim == 1:

        n_tril = vec_or_mat.size
        n = int((1 + np.sqrt(8 * n_tril + 1)) / 2)

        i, j = np.tril_indices(n, k=-1)

        mat = np.zeros((n, n))
        mat[i, j] = vec_or_mat
        mat[j, i] = vec_or_mat

        return mat

    else:  # matrix to vector

        assert vec_or_mat.shape[0] == vec_or_mat.shape[1]  # matrix is square

        n = vec_or_mat.shape[0]
        i, j = np.tril_indices(n, k=-1)

        return vec_or_mat[i, j]


[docs]def _r_to_desc(r, pdist):
    """
    Generate descriptor for a set of atom positions in Cartesian
    coordinates.

    Parameters
    ----------
        r : :obj:`numpy.ndarray`
            Array of size 3N containing the Cartesian coordinates of
            each atom.
        pdist : :obj:`numpy.ndarray`
            Array of size N x N containing the Euclidean distance
            (2-norm) for each pair of atoms.

    Returns
    -------
        :obj:`numpy.ndarray`
            Descriptor representation as 1D array of size N(N-1)/2
    """

    # Add singleton dimension if input is (,3N).
    if r.ndim == 1:
        r = r[None, :]

    return 1.0 / pdist


[docs]def _r_to_d_desc(r, pdist, lat_and_inv=None):
    """
    Generate descriptor Jacobian for a set of atom positions in
    Cartesian coordinates.

    This method can apply the minimum-image convention as periodic
    boundary condition for distances between atoms, given the lattice vectors.

    Parameters
    ----------
        r : :obj:`numpy.ndarray`
            Array of size 3N containing the Cartesian coordinates of
            each atom.
        pdist : :obj:`numpy.ndarray`
            Array of size N x N containing the Euclidean distance
            (2-norm) for each pair of atoms.
        lat_and_inv : tuple of :obj:`numpy.ndarray`, optional
            Tuple of 3 x 3 matrix containing lattice vectors as columns and its inverse.

    Returns
    -------
        :obj:`numpy.ndarray`
            Array of size N(N-1)/2 x 3N containing all partial
            derivatives of the descriptor.
    """

    r = r.reshape(-1, 3)
    pdiff = r[:, None] - r[None, :]  # pairwise differences ri - rj

    n_atoms = r.shape[0]
    i, j = np.tril_indices(n_atoms, k=-1)

    pdiff = pdiff[i, j, :]  # lower triangular

    if lat_and_inv is not None:
        pdiff = _pbc_diff(pdiff, lat_and_inv)

    d_desc_elem = pdiff / (pdist**3)[:, None]

    return d_desc_elem


[docs]def _from_r(r, lat_and_inv=None):
    """
    Generate descriptor and its Jacobian for one molecular geometry
    in Cartesian coordinates.

    Parameters
    ----------
        r : :obj:`numpy.ndarray`
            Array of size 3N containing the Cartesian coordinates of
            each atom.
        lat_and_inv : tuple of :obj:`numpy.ndarray`, optional
            Tuple of 3 x 3 matrix containing lattice vectors as columns and its inverse.

    Returns
    -------
        :obj:`numpy.ndarray`
            Descriptor representation as 1D array of size N(N-1)/2
        :obj:`numpy.ndarray`
            Array of size N(N-1)/2 x 3N containing all partial
            derivatives of the descriptor.
    """

    # Add singleton dimension if input is (,3N).
    if r.ndim == 1:
        r = r[None, :]

    pd = _pdist(r, lat_and_inv)

    r_desc = _r_to_desc(r, pd)
    r_d_desc = _r_to_d_desc(r, pd, lat_and_inv)

    return r_desc, r_d_desc


[docs]class Desc(object):
    # def __init__(self, n_atoms, interact_cut_off=None, max_processes=None):
    def __init__(self, n_atoms, max_processes=None):
        """
        Generate descriptors and their Jacobians for molecular geometries,
        including support for periodic boundary conditions.

        Parameters
        ----------
                n_atoms : int
                        Number of atoms in the represented system.
                max_processes : int, optional
                        Limit the max. number of processes. Otherwise
                        all CPU cores are used.
        """

        self.n_atoms = n_atoms
        self.dim_i = 3 * n_atoms

        # Size of the resulting descriptor vector.
        self.dim = (n_atoms * (n_atoms - 1)) // 2

        self.tril_indices = np.tril_indices(n_atoms, k=-1)

        # Precompute indices for nonzero entries in desriptor derivatives.
        self.d_desc_mask = np.zeros((n_atoms, n_atoms - 1), dtype=np.int)
        for a in range(n_atoms):  # for each partial derivative
            rows, cols = self.tril_indices
            self.d_desc_mask[a, :] = np.concatenate(
                [np.where(rows == a)[0], np.where(cols == a)[0]]
            )

        self.dim_range = np.arange(self.dim)  # [0, 1, ..., dim-1]

        # Precompute indices for nonzero entries in desriptor derivatives.

        self.M = np.arange(1, n_atoms)  # indexes matrix row-wise, skipping diagonal
        for a in range(1, n_atoms):
            self.M = np.concatenate((self.M, np.delete(np.arange(n_atoms), a)))

        self.A = np.repeat(
            np.arange(n_atoms), n_atoms - 1
        )  # [0, 0, ..., 1, 1, ..., 2, 2, ...]

        self.max_processes = max_processes

[docs]    def from_R(self, R, lat_and_inv=None, max_processes=None, callback=None):
        """
        Generate descriptor and its Jacobian for multiple molecular geometries
        in Cartesian coordinates.

        Parameters
        ----------
            R : :obj:`numpy.ndarray`
                Array of size M x 3N containing the Cartesian coordinates of
                each atom.
            lat_and_inv : tuple of :obj:`numpy.ndarray`, optional
                Tuple of 3 x 3 matrix containing lattice vectors as columns and its inverse.
            max_processes : int, optional
                Limit the max. number of processes. Otherwise
                all CPU cores are used. This parameter overwrites the global setting as
                set during initialization.
            callback : callable, optional
                Descriptor and descriptor Jacobian generation status.
                    current : int
                        Current progress (number of completed descriptors).
                    total : int
                        Task size (total number of descriptors to create).
                    sec_disp_str : :obj:`str`, optional
                        Once complete, this string contains the
                        time it took complete this task (seconds).

        Returns
        -------
            :obj:`numpy.ndarray`
                Array of size M x N(N-1)/2 containing the descriptor representation
                for each geometry.
            :obj:`numpy.ndarray`
                Array of size M x N(N-1)/2 x 3N containing all partial
                derivatives of the descriptor for each geometry.
        """

        # Add singleton dimension if input is (,3N).
        if R.ndim == 1:
            R = R[None, :]

        M = R.shape[0]
        if M == 1:
            return _from_r(R, lat_and_inv)

        R_desc = np.empty([M, self.dim])
        R_d_desc = np.empty([M, self.dim, 3])

        # Generate descriptor and their Jacobians
        start = timeit.default_timer()

        pool = None
        map_func = map
        max_processes = max_processes or self.max_processes
        if max_processes != 1 and mp.cpu_count() > 1:
            pool = Pool((max_processes or mp.cpu_count()) - 1)  # exclude main process
            map_func = pool.imap

        for i, r_desc_r_d_desc in enumerate(
            map_func(partial(_from_r, lat_and_inv=lat_and_inv), R)
        ):
            R_desc[i, :], R_d_desc[i, :, :] = r_desc_r_d_desc

            if callback is not None and i < M - 1:
                callback(i, M - 1)

        if pool is not None:
            pool.close()
            pool.join()  # Wait for the worker processes to terminate (to measure total runtime correctly).
            pool = None

        stop = timeit.default_timer()

        if callback is not None:
            dur_s = stop - start
            sec_disp_str = 'took {:.1f} s'.format(dur_s) if dur_s >= 0.1 else ''
            callback(M, M, sec_disp_str=sec_disp_str)

        return R_desc, R_d_desc

    # Multiplies descriptor(s) jacobian with 3N-vector(s) from the right side
[docs]    def d_desc_dot_vec(self, R_d_desc, vecs, overwrite_vecs=False):

        if R_d_desc.ndim == 2:
            R_d_desc = R_d_desc[None, ...]

        if vecs.ndim == 1:
            vecs = vecs[None, ...]

        i, j = self.tril_indices

        vecs = vecs.reshape(vecs.shape[0], -1, 3)

        einsum = np.einsum
        if _has_torch and torch.is_tensor(R_d_desc):
            assert torch.is_tensor(vecs)
            einsum = torch.einsum

        return einsum('...ij,...ij->...i', R_d_desc, vecs[:, j, :] - vecs[:, i, :])

    # Multiplies descriptor(s) jacobian with N(N-1)/2-vector(s) from the left side
[docs]    def vec_dot_d_desc(self, R_d_desc, vecs, out=None):

        if R_d_desc.ndim == 2:
            R_d_desc = R_d_desc[None, ...]

        if vecs.ndim == 1:
            vecs = vecs[None, ...]

        assert (
            R_d_desc.shape[0] == 1
            or vecs.shape[0] == 1
            or R_d_desc.shape[0] == vecs.shape[0]
        )  # either multiple descriptors or multiple vectors at once, not both (or the same number of both, than it will must be a multidot)

        n = np.max((R_d_desc.shape[0], vecs.shape[0]))
        i, j = self.tril_indices

        out = np.zeros((n, self.n_atoms, self.n_atoms, 3))
        out[:, i, j, :] = R_d_desc * vecs[..., None]
        out[:, j, i, :] = -out[:, i, j, :]
        return out.sum(axis=1).reshape(n, -1)

        # if out is None or out.shape != (n, self.n_atoms*3):
        #    out = np.zeros((n, self.n_atoms*3))

        # R_d_desc_full = np.zeros((self.n_atoms, self.n_atoms, 3))
        # for a in range(n):

        #   R_d_desc_full[i, j, :] = R_d_desc * vecs[a, :, None]
        #    R_d_desc_full[j, i, :] = -R_d_desc_full[i, j, :]
        #    out[a,:] = R_d_desc_full.sum(axis=0).ravel()

        # return out

[docs]    def d_desc_from_comp(self, R_d_desc, out=None):
        """
        Convert a compressed representation of a descriptor Jacobian back
        to its full representation.

        The compressed representation omits all zeros and scales with N
        instead of N(N-1)/2.

        Parameters
        ----------
            R_d_desc : :obj:`numpy.ndarray` or :obj:`torch.tensor`
                Array of size M x N x N x 3 containing the compressed
                descriptor Jacobian.
            out : :obj:`numpy.ndarray` or :obj:`torch.tensor`, optional
                Output argument. This must have the exact kind that would
                be returned if it was not used.

        Note
        ----
                If used, the output argument must be initialized with zeros!

        Returns
        -------
            :obj:`numpy.ndarray` or :obj:`torch.tensor`
                Array of size M x N(N-1)/2 x 3N containing the full
                representation.
        """

        if R_d_desc.ndim == 2:
            R_d_desc = R_d_desc[None, ...]

        n = R_d_desc.shape[0]
        i, j = self.tril_indices

        if out is None:
            if _has_torch and torch.is_tensor(R_d_desc):
                device = R_d_desc.device
                dtype = R_d_desc.dtype
                out = torch.zeros((n, self.dim, self.n_atoms, 3), device=device).to(
                    dtype
                )
            else:
                out = np.zeros((n, self.dim, self.n_atoms, 3))
        else:
            out = out.reshape(n, self.dim, self.n_atoms, 3)

        out[:, self.dim_range, j, :] = R_d_desc
        out[:, self.dim_range, i, :] = -R_d_desc

        return out.reshape(-1, self.dim, self.dim_i)

[docs]    def d_desc_to_comp(self, R_d_desc):
        """
        Convert a descriptor Jacobian to a compressed representation.

        The compressed representation omits all zeros and scales with N
        instead of N(N-1)/2.

        Parameters
        ----------
            R_d_desc : :obj:`numpy.ndarray`
                Array of size M x N(N-1)/2 x 3N containing the descriptor
                Jacobian.

        Returns
        -------
            :obj:`numpy.ndarray`
                Array of size M x N x N x 3 containing the compressed
                representation.
        """

        # Add singleton dimension for single inputs.
        if R_d_desc.ndim == 2:
            R_d_desc = R_d_desc[None, ...]

        n = R_d_desc.shape[0]
        n_atoms = int(R_d_desc.shape[2] / 3)

        R_d_desc = R_d_desc.reshape(n, -1, n_atoms, 3)

        ret = np.zeros((n, n_atoms, n_atoms, 3))
        ret[:, self.M, self.A, :] = R_d_desc[:, self.d_desc_mask.ravel(), self.A, :]

        # Take the upper triangle.
        i, j = self.tril_indices
        return ret[:, i, j, :]

[docs]    @staticmethod
    def perm(perm):
        """
        Convert atom permutation to descriptor permutation.

        A permutation of N atoms is converted to a permutation that acts on
        the corresponding descriptor representation. Applying the converted
        permutation to a descriptor is equivalent to permuting the atoms
        first and then generating the descriptor.

        Parameters
        ----------
            perm : :obj:`numpy.ndarray`
                Array of size N containing the atom permutation.

        Returns
        -------
            :obj:`numpy.ndarray`
                Array of size N(N-1)/2 containing the corresponding
                descriptor permutation.
        """

        n = len(perm)

        rest = np.zeros((n, n))
        rest[np.tril_indices(n, -1)] = list(range((n**2 - n) // 2))
        rest = rest + rest.T
        rest = rest[perm, :]
        rest = rest[:, perm]

        return rest[np.tril_indices(n, -1)].astype(int)