Source code for structuretoolkit.analyse.neighbors

# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.

import itertools
import warnings
from typing import Any, cast

import numpy as np
from ase.atoms import Atoms
from scipy.sparse import coo_matrix
from scipy.spatial import cKDTree
from scipy.spatial.transform import Rotation
from scipy.special import gamma, sph_harm_y

from structuretoolkit.common.helper import (
    get_average_of_unique_labels,
    get_extended_positions,
)

__author__ = "Joerg Neugebauer, Sam Waseda"
__copyright__ = (
    "Copyright 2021, Max-Planck-Institut für Eisenforschung GmbH - "
    "Computational Materials Design (CM) Department"
)
__version__ = "1.0"
__maintainer__ = "Sam Waseda"
__email__ = "waseda@mpie.de"
__status__ = "production"
__date__ = "Sep 1, 2017"


[docs] class Tree: """ Class to get tree structure for the neighborhood information. Main attributes (do not modify them): - distances (numpy.ndarray/list): Distances to the neighbors of given positions - indices (numpy.ndarray/list): Indices of the neighbors of given positions - vecs (numpy.ndarray/list): Vectors to the neighbors of given positions Auxiliary attributes: - wrap_positions (bool): Whether to wrap back the positions entered by user in get_neighborhood etc. Since the information outside the original box is limited to a few layer, wrap_positions=False might miss some points without issuing an error. Change representation mode via :attribute:`.Neighbors.mode` (cf. its DocString) Furthermore, you can re-employ the original tree structure to get neighborhood information via get_neighborhood. """
[docs] def __init__(self, ref_structure: Atoms): """ Args: ref_structure (ase.atoms.Atoms): Reference structure. """ self._distances: Any = None self._vectors: Any = None self._indices: Any = None self._mode = {"filled": True, "ragged": False, "flattened": False} self._extended_positions: Any = None self._positions: Any = None self._wrapped_indices: Any = None self._extended_indices: Any = None self._ref_structure = ref_structure.copy() self.wrap_positions = False self._tree: Any = None self.num_neighbors: int | None = None self.cutoff_radius = np.inf self._norm_order = 2
@property def mode(self) -> str: """ Change the mode of representing attributes (vecs, distances, indices, shells). The shapes of `filled` and `ragged` differ only if `cutoff_radius` is specified. - 'filled': Fill variables for the missing entries are filled as follows: `np.inf` in `distances`, `numpy.array([np.inf, np.inf, np.inf])` in `vecs`, `n_atoms+1` (or a larger value) in `indices` and -1 in `shells`. - 'ragged': Create lists of different lengths. - 'flattened': Return flattened arrays for distances, vecs and shells. The indices corresponding to the row numbers in 'filled' and 'ragged' are in `atom_numbers` The variables are stored in the `filled` mode. """ for k, v in self._mode.items(): if v: return k raise ValueError("No neighbor representation mode is active") def _set_mode(self, new_mode: str) -> None: """ Set the mode of representing attributes. Args: new_mode (str): The new mode to set. Raises: KeyError: If the new mode is not found in the available modes. """ if new_mode not in self._mode: raise KeyError( f"{new_mode} not found. Available modes: {', '.join(self._mode.keys())}" ) self._mode = dict.fromkeys(self._mode, False) self._mode[new_mode] = True def __repr__(self) -> str: """ Return a string representation of the Tree object. Returns: str: A string representation of the Tree object. """ to_return = ( "Main attributes:\n" + "- distances : Distances to the neighbors of given positions\n" + "- indices : Indices of the neighbors of given positions\n" + "- vecs : Vectors to the neighbors of given positions\n" ) return to_return
[docs] def copy(self) -> "Tree": """ Create a copy of the Tree object. Returns: Tree: A copy of the Tree object. """ new_neigh = Tree(self._ref_structure) new_neigh._distances = self._distances.copy() new_neigh._indices = self._indices.copy() new_neigh._extended_positions = self._extended_positions new_neigh._wrapped_indices = self._wrapped_indices new_neigh._extended_indices = self._extended_indices new_neigh.wrap_positions = self.wrap_positions new_neigh._tree = self._tree new_neigh.num_neighbors = self.num_neighbors new_neigh.cutoff_radius = self.cutoff_radius new_neigh._norm_order = self._norm_order new_neigh._positions = self._positions.copy() return new_neigh
def _reshape( self, value: np.ndarray, key: str | None = None, ref_vector: np.ndarray | None = None, ) -> np.ndarray: """ Reshape the given value based on the specified key and reference vector. Args: value (np.ndarray): The value to reshape. key (Optional[str]): The representation mode key. Defaults to None. ref_vector (Optional[np.ndarray]): The reference vector. Defaults to None. Returns: np.ndarray: The reshaped value. """ if value is None: raise ValueError("Neighbors not initialized yet") if key is None: key = self.mode if key == "filled": return value elif key == "ragged": return self._contract(value, ref_vector=ref_vector) elif key == "flattened": return value[self._distances < np.inf] raise ValueError(f"Unknown neighbor representation mode: {key}") @property def distances(self) -> np.ndarray: """ Get the distances to neighboring atoms. Returns: np.ndarray: The distances to neighboring atoms. """ return self._reshape(self._distances) @property def _vecs(self) -> np.ndarray: """ Get the vectors to neighboring atoms. Returns: np.ndarray: The vectors to neighboring atoms. """ if self._vectors is None: self._vectors = self._get_vectors( positions=self._positions, distances=self.filled.distances, indices=self._extended_indices, ) return self._vectors @property def vecs(self) -> np.ndarray: """ Get the vectors to neighboring atoms. Returns: np.ndarray: The vectors to neighboring atoms. """ return self._reshape(self._vecs) @property def indices(self) -> np.ndarray: """ Get the indices of neighboring atoms. Returns: np.ndarray: The indices of neighboring atoms. """ return self._reshape(self._indices) @property def atom_numbers(self) -> np.ndarray: """ Get the indices of atoms. Returns: np.ndarray: The indices of atoms. """ n = np.zeros_like(self.filled.indices) n.T[:, :] = np.arange(len(n)) return self._reshape(n) @property def norm_order(self) -> int: """ Norm to use for the neighborhood search and shell recognition. The definition follows the conventional Lp norm (cf. https://en.wikipedia.org/wiki/Lp_space). This is still an experimental feature and for anything other than norm_order=2, there is no guarantee that this works flawlessly. """ return self._norm_order @norm_order.setter def norm_order(self, value: int) -> None: """ Set the norm order for the neighborhood search and shell recognition. Args: value (int): The norm order value. Raises: ValueError: If trying to change the norm_order after initialization. """ raise ValueError( "norm_order cannot be changed after initialization. Re-initialize the Neighbor class" + " with the correct norm_order value" ) def _get_max_length(self, ref_vector: np.ndarray | None = None) -> int | None: """ Get the maximum length of the reference vector. Args: ref_vector (Optional[np.ndarray]): The reference vector. Defaults to None. Returns: int: The maximum length of the reference vector. """ if ref_vector is None: ref_vector = self.filled.distances if ( ref_vector is None or len(ref_vector) == 0 or not hasattr(ref_vector[0], "__len__") ): return None return max(len(dd[dd < np.inf]) for dd in ref_vector) def _contract(self, value: np.ndarray, ref_vector: np.ndarray | None = None): """ Contract the given value based on the specified reference vector. Args: value (np.ndarray): The value to contract. ref_vector (Optional[np.ndarray]): The reference vector. Defaults to None. Returns: np.ndarray: The contracted value. """ if self._get_max_length(ref_vector=ref_vector) is None: return value return [ vv[: np.sum(dist < np.inf)] for vv, dist in zip(value, self.filled.distances, strict=True) ] def _allow_ragged_to_mode(self, new_bool: bool | None) -> str: """ Set the representation mode based on the value of new_bool. Args: new_bool (bool): The new value for the representation mode. Returns: str: The updated representation mode. """ if new_bool is None: return self.mode elif new_bool: return "ragged" return "filled" def _get_extended_positions(self) -> np.ndarray: """ Get the extended positions. Returns: np.ndarray: The extended positions. """ if self._extended_positions is None: return self._ref_structure.positions return self._extended_positions def _get_wrapped_indices(self) -> np.ndarray: """ Get the wrapped indices. Returns: np.ndarray: The wrapped indices. """ if self._wrapped_indices is None: return np.arange(len(self._ref_structure.positions)) return self._wrapped_indices def _get_wrapped_positions( self, positions: np.ndarray, distance_buffer: float = 1.0e-12 ) -> np.ndarray: """ Get the wrapped positions based on the given positions. Args: positions (np.ndarray): The positions to wrap. distance_buffer (float): The distance buffer. Defaults to 1.0e-12. Returns: np.ndarray: The wrapped positions. """ if not self.wrap_positions: return np.asarray(positions) x = np.array(positions).copy() cell = self._ref_structure.cell x_scale = np.dot(x, np.linalg.inv(cell)) + distance_buffer x[..., self._ref_structure.pbc] -= np.dot(np.floor(x_scale), cell)[ ..., self._ref_structure.pbc ] return x def _get_distances_and_indices( self, positions: np.ndarray, num_neighbors: int | None = None, cutoff_radius: float = np.inf, width_buffer: float = 1.2, ) -> tuple[np.ndarray, np.ndarray]: """ Get the distances and indices of the neighbors for the given positions. Args: positions (np.ndarray): The positions to get the neighbors for. num_neighbors (Optional[int]): The number of neighbors to consider. Defaults to None. cutoff_radius (float): The cutoff radius for neighbor search. Defaults to np.inf. width_buffer (float): The width buffer for neighbor search. Defaults to 1.2. Returns: Tuple[np.ndarray, np.ndarray]: The distances and indices of the neighbors. """ num_neighbors = self._estimate_num_neighbors( num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, width_buffer=width_buffer, ) if ( len(self._get_extended_positions()) < num_neighbors and cutoff_radius == np.inf ): raise ValueError( "num_neighbors too large - make width_buffer larger and/or make " + "num_neighbors smaller" ) positions = self._get_wrapped_positions(positions) distances, indices = self._tree.query( positions, k=num_neighbors, distance_upper_bound=cutoff_radius, p=self.norm_order, ) shape = positions.shape[:-1] + (num_neighbors,) distances = np.array([distances]).reshape(shape) indices = np.array([indices]).reshape(shape) if cutoff_radius < np.inf and np.any(distances.T[-1] < np.inf): warnings.warn( "Number of neighbors found within the cutoff_radius is equal to (estimated) " + "num_neighbors. Increase num_neighbors (or set it to None) or " + "width_buffer to find all neighbors within cutoff_radius.", stacklevel=2, ) self._extended_indices = indices.copy() indices[distances < np.inf] = self._get_wrapped_indices()[ indices[distances < np.inf] ] indices[distances == np.inf] = np.iinfo(np.int32).max return ( self._reshape(distances, ref_vector=distances), self._reshape(indices, ref_vector=distances), ) @property def numbers_of_neighbors(self) -> int: """ Get the number of neighbors for each atom. Returns: int: The number of neighbors for each atom. Same number is returned if `cutoff_radius` was not given in the initialization. """ return np.sum(self.filled.distances < np.inf, axis=-1) def _get_vectors( self, positions: np.ndarray, num_neighbors: int | None = None, cutoff_radius: float = np.inf, distances: np.ndarray | None = None, indices: np.ndarray | None = None, width_buffer: float = 1.2, ) -> np.ndarray: """ Get the vectors of the neighbors for the given positions. Args: positions (np.ndarray): The positions to get the neighbors for. num_neighbors (Optional[int]): The number of neighbors to consider. Defaults to None. cutoff_radius (float): The cutoff radius for neighbor search. Defaults to np.inf. distances (Optional[np.ndarray]): The distances of the neighbors. Defaults to None. indices (Optional[np.ndarray]): The indices of the neighbors. Defaults to None. width_buffer (float): The width buffer for neighbor search. Defaults to 1.2. Returns: np.ndarray: The vectors of the neighbors. """ if distances is None or indices is None: distances, indices = self._get_distances_and_indices( positions=positions, num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, width_buffer=width_buffer, ) vectors = np.zeros(distances.shape + (3,)) vectors -= self._get_wrapped_positions(positions).reshape( distances.shape[:-1] + (-1, 3) ) vectors[distances < np.inf] += self._get_extended_positions()[ self._extended_indices[distances < np.inf] ] vectors[distances == np.inf] = np.array(3 * [np.inf]) return vectors def _estimate_num_neighbors( self, num_neighbors: int | None = None, cutoff_radius: float = np.inf, width_buffer: float = 1.2, ) -> int: """ Estimate the number of neighbors required for a given cutoff radius. Args: num_neighbors (Optional[int]): Number of neighbors. Defaults to None. cutoff_radius (float): Cutoff radius. Defaults to np.inf. width_buffer (float): Width of the layer to be added to account for PBC. Defaults to 1.2. Returns: int: Number of atoms required for the given cutoff. Raises: ValueError: If num_neighbors or cutoff_radius is not specified. """ if ( num_neighbors is None and cutoff_radius == np.inf and self.num_neighbors is None ): raise ValueError("Specify num_neighbors or cutoff_radius") elif num_neighbors is None and self.num_neighbors is None: volume = self._ref_structure.get_volume() / len(self._ref_structure) width_buffer = 1 + width_buffer width_buffer *= get_volume_of_n_sphere_in_p_norm(3, self.norm_order) num_neighbors = max(14, int(width_buffer * cutoff_radius**3 / volume)) elif num_neighbors is None: num_neighbors = self.num_neighbors if self.num_neighbors is None: self.num_neighbors = num_neighbors self.cutoff_radius = cutoff_radius assert num_neighbors is not None assert self.num_neighbors is not None if num_neighbors > self.num_neighbors: warnings.warn( "Taking a larger search area after initialization has the risk of " + "missing neighborhood atoms", stacklevel=2, ) return num_neighbors def _estimate_width( self, num_neighbors: int | None = None, cutoff_radius: float = np.inf, width_buffer: float = 1.2, ) -> float: """ Estimate the width of the layer required for the given number of atoms. Args: num_neighbors (Optional[int]): Number of neighbors. Defaults to None. cutoff_radius (float): Cutoff radius. Defaults to np.inf. width_buffer (float): Width of the layer to be added to account for PBC. Defaults to 1.2. Returns: float: Width of layer required for the given number of atoms. Raises: ValueError: If num_neighbors or cutoff_radius is not specified. """ if num_neighbors is None and cutoff_radius == np.inf: raise ValueError("Define either num_neighbors or cutoff_radius") if all(self._ref_structure.pbc == False): return 0 elif cutoff_radius != np.inf: return cutoff_radius prefactor = get_volume_of_n_sphere_in_p_norm(3, self.norm_order) width = np.prod( np.linalg.norm(self._ref_structure.cell, axis=-1, ord=self.norm_order) ) width *= prefactor * np.max([num_neighbors, 8]) / len(self._ref_structure) cutoff_radius = width_buffer * width ** (1 / 3) return cutoff_radius
[docs] def get_neighborhood( self, positions: np.ndarray, num_neighbors: int | None = None, cutoff_radius: float = np.inf, width_buffer: float = 1.2, ) -> "Tree": """ Get neighborhood information of `positions`. What it returns is in principle the same as `get_neighborhood` in `Atoms`. The only one difference is the reuse of the same Tree structure, which makes the algorithm more efficient, but will fail if the reference structure changed in the meantime. Args: positions (np.ndarray): Positions in a box whose neighborhood information is analyzed. num_neighbors (Optional[int]): Number of nearest neighbors. Defaults to None. cutoff_radius (float): Upper bound of the distance to which the search is to be done. Defaults to np.inf. width_buffer (float): Width of the layer to be added to account for pbc. Defaults to 1.2. Returns: Tree: Neighbors instance with the neighbor indices, distances, and vectors. """ new_neigh = self.copy() return new_neigh._get_neighborhood( positions=positions, num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, exclude_self=False, width_buffer=width_buffer, )
def _get_neighborhood( self, positions: np.ndarray, num_neighbors: int | None = 12, cutoff_radius: float = np.inf, exclude_self: bool = False, width_buffer: float = 1.2, ) -> "Tree": """ Get the neighborhood information for the given positions. Args: positions (np.ndarray): The positions to get the neighborhood for. num_neighbors (int): The number of neighbors to consider. Defaults to 12. cutoff_radius (float): The cutoff radius for neighbor search. Defaults to np.inf. exclude_self (bool): Whether to exclude the position itself from the neighbors. Defaults to False. width_buffer (float): The width buffer for neighbor search. Defaults to 1.2. Returns: Tree: The Tree instance with the neighbor indices, distances, and vectors. """ start_column = 0 if exclude_self: start_column = 1 if num_neighbors is not None: num_neighbors += 1 distances, indices = self._get_distances_and_indices( positions, num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, width_buffer=width_buffer, ) if num_neighbors is not None and self.num_neighbors is not None: self.num_neighbors -= 1 max_column = np.sum(distances < np.inf, axis=-1).max() self._distances = distances[..., start_column:max_column] self._indices = indices[..., start_column:max_column] self._extended_indices = self._extended_indices[..., start_column:max_column] self._positions = positions return self def _check_width(self, width: float, pbc: np.ndarray) -> bool: """ Check if the width of the layer exceeds the specified value. Args: width (float): The width of the layer. pbc (list[bool, bool, bool]): The periodic boundary conditions. Returns: bool: True if the width exceeds the specified value, False otherwise. """ return bool( any(pbc) and np.prod(self.filled.distances.shape) > 0 and np.linalg.norm( self.flattened.vecs[..., pbc], axis=-1, ord=self.norm_order ).max() > width )
[docs] def get_spherical_harmonics( self, l: np.ndarray, m: np.ndarray, cutoff_radius: float = np.inf, rotation: np.ndarray | None = None, ) -> np.ndarray: """ Args: l (int/numpy.array): Degree of the harmonic (int); must have ``l >= 0``. m (int/numpy.array): Order of the harmonic (int); must have ``|m| <= l``. cutoff_radius (float): maximum neighbor distance to include (default: inf, i.e. all atoms included in the neighbor search). rotation ( (3,3) numpy.array/list): Rotation to make sure phi does not become nan Returns: ( (natoms,) numpy.array) spherical harmonic values Spherical harmonics defined as follows Y^m_l(\theta,\phi) = \sqrt{\frac{2l+1}{4\pi} \frac{(l-m)!}{(l+m)!}} e^{i m \theta} P^m_l(\cos(\phi)) The angles are calculated based on `self.vecs`, where the azimuthal angle is defined on the xy-plane and the polar angle is along the z-axis. See more on: scipy.special.sph_harm_y """ vecs = self.filled.vecs if rotation is not None: vecs = np.einsum("ij,nmj->nmi", rotation, vecs) within_cutoff = self.filled.distances < cutoff_radius if np.any(np.all(~within_cutoff, axis=-1)): raise ValueError("cutoff_radius too small - some atoms have no neighbors") phi = np.zeros_like(self.filled.distances) theta = np.zeros_like(self.filled.distances) theta[within_cutoff] = np.arctan2( vecs[within_cutoff, 1], vecs[within_cutoff, 0] ) phi[within_cutoff] = np.arctan2( np.linalg.norm(vecs[within_cutoff, :2], axis=-1), vecs[within_cutoff, 2] ) return np.sum(sph_harm_y(l, m, phi, theta) * within_cutoff, axis=-1) / np.sum( within_cutoff, axis=-1 )
[docs] def get_steinhardt_parameter( self, l: np.ndarray, cutoff_radius: float = np.inf ) -> np.ndarray: """ Args: l (int/numpy.array): Order of Steinhardt parameter cutoff_radius (float): maximum neighbor distance to include (default: inf, i.e. all atoms included in the neighbor search). Returns: ( (natoms,) numpy.array) Steinhardt parameter values See more on https://pyscal.org/part3/steinhardt.html Note: This function does not have an internal algorithm to calculate a suitable cutoff radius. For automated uses, see Atoms.analyse.pyscal_steinhardt_parameter() """ random_rotation = Rotation.from_mrp(np.random.random(3)).as_matrix() harmonic_values: list[np.ndarray] = [ np.absolute( self.get_spherical_harmonics( l=l, m=m, cutoff_radius=cutoff_radius, rotation=random_rotation, ) ) ** 2 for m in np.arange(-l, l + 1) ] return np.sqrt(4 * np.pi / (2 * l + 1) * np.sum(harmonic_values, axis=0))
@staticmethod def _get_all_possible_pairs(l: int) -> np.ndarray: """ Get all possible pairs of indices for a given number of groups. Args: l (int): Number of groups. Returns: np.ndarray: Array of all possible pairs of indices. Raises: ValueError: If the number of groups is odd. """ if l % 2 != 0: raise ValueError("Pairs cannot be formed for an uneven number of groups.") all_arr = np.array(list(itertools.permutations(np.arange(l), l))) all_arr = all_arr.reshape(len(all_arr), -1, 2) all_arr.sort(axis=-1) all_arr = all_arr[ np.unique(all_arr.reshape(-1, l), axis=0, return_index=True)[1] ] indices = np.indices(all_arr.shape) all_arr = all_arr[ indices[0], all_arr[:, :, 0].argsort(axis=-1)[:, :, np.newaxis], indices[2] ] return all_arr[np.unique(all_arr.reshape(-1, l), axis=0, return_index=True)[1]] @property def centrosymmetry(self) -> np.ndarray: """ Calculate centrosymmetry parameter for the given environment. cf. https://doi.org/10.1103/PhysRevB.58.11085 NB: Currently very memory intensive for a large number of neighbors (works maybe up to 10) """ all_arr = self._get_all_possible_pairs(self.distances.shape[-1]) indices = np.indices((len(self.vecs),) + all_arr.shape[:-1]) v = self.vecs[indices[0], all_arr[np.newaxis, :, :, 0]] v += self.vecs[indices[0], all_arr[np.newaxis, :, :, 1]] return np.sum(v**2, axis=-1).sum(axis=-1).min(axis=-1) def __getattr__(self, name): """Attributes for the mode. Same as setting `neigh.mode`.""" if name not in ["filled", "ragged", "flattened"]: raise AttributeError( self.__class__.__name__ + " object has no attribute " + name ) return Mode(name, self) def __dir__(self): """Attributes for the mode.""" return ["filled", "ragged", "flattened"] + super().__dir__()
[docs] class Mode: """Helper class for mode Attributes: `distances`, `vecs`, `indices`, `shells`, `atom_numbers` and maybe more """
[docs] def __init__(self, mode: str, ref_neigh): """ Args: mode (str): Mode (`filled`, `ragged` or `flattened`) ref_neigh (Neighbors): Reference neighbor class """ self.mode = mode self.ref_neigh = ref_neigh
def __getattr__(self, name): """Return values according to their filling mode.""" if "_" + name in self.ref_neigh.__dir__(): name = "_" + name return self.ref_neigh._reshape( self.ref_neigh.__getattribute__(name), key=self.mode ) def __dir__(self): """Show value names which are available for different filling modes.""" return list( {"distances", "vecs", "indices", "shells", "atom_numbers"}.intersection( self.ref_neigh.__dir__() ) )
[docs] class Neighbors(Tree):
[docs] def __init__(self, ref_structure: Atoms, tolerance: int = 2): """ Neighbors class for analyzing neighboring atoms in a structure. Args: ref_structure (ase.Atoms): Reference structure. tolerance (int): Tolerance for rounding distances (default: 2). """ super().__init__(ref_structure=ref_structure) self._tolerance = tolerance self._cluster_vecs: Any = None self._cluster_dist: Any = None
def __repr__(self): """ Returns a string representation of the Neighbors object. """ to_return = super().__repr__() return to_return.replace("given positions", "each atom") @property def chemical_symbols(self) -> np.ndarray: """ Returns chemical symbols of the neighboring atoms. Undefined neighbors (i.e. if the neighbor distance is beyond the cutoff radius) are considered as vacancies and are marked by 'v'. Returns: np.ndarray: Chemical symbols of neighboring atoms. """ chemical_symbols = np.tile(["v"], self.filled.indices.shape).astype("<U2") cond = self.filled.indices < len(self._ref_structure) chemical_symbols[cond] = np.array(self._ref_structure.get_chemical_symbols())[ self.filled.indices[cond] ] return chemical_symbols @property def shells(self) -> np.ndarray: """ Returns the cell numbers of each atom according to the distances. Returns: np.ndarray: Shell indices. """ return self.get_local_shells(mode=self.mode)
[docs] def get_local_shells( self, mode: str | None = None, tolerance: int | None = None, cluster_by_distances: bool = False, cluster_by_vecs: bool = False, ) -> np.ndarray: """ Set shell indices based on distances available to each atom. Clustering methods can be used at the same time, which will be useful at finite temperature results, but depending on how dispersed the atoms are, the algorithm could take some time. If the clustering method(-s) have already been launched before this function, it will use the results already available and does not execute the clustering method(-s) again. Args: mode (str): Representation of the variable. Choose from 'filled', 'ragged' and 'flattened'. tolerance (int): Decimals in np.round for rounding up distances. cluster_by_distances (bool): If True, `cluster_by_distances` is called first and the distances obtained from the clustered distances are used to calculate the shells. If cluster_by_vecs is True at the same time, `cluster_by_distances` will use the clustered vectors for its clustering algorithm. cluster_by_vecs (bool): If True, `cluster_by_vectors` is called first and the distances obtained from the clustered vectors are used to calculate the shells. Returns: np.ndarray: Shell indices. """ if tolerance is None: tolerance = self._tolerance if mode is None: mode = self.mode if cluster_by_distances: if self._cluster_dist is None: self.cluster_by_distances(use_vecs=cluster_by_vecs) shells = np.array( [ np.unique(np.round(dist, decimals=tolerance), return_inverse=True)[ 1 ] + 1 for dist in self._cluster_dist.cluster_centers_[ self._cluster_dist.labels_ ] ] ) shells[self._cluster_dist.labels_ < 0] = -1 shells = shells.reshape(self.filled.indices.shape) elif cluster_by_vecs: if self._cluster_vecs is None: self.cluster_by_vecs() shells = np.array( [ np.unique(np.round(dist, decimals=tolerance), return_inverse=True)[ 1 ] + 1 for dist in np.linalg.norm( self._cluster_vecs.cluster_centers_[self._cluster_vecs.labels_], axis=-1, ord=self.norm_order, ) ] ) shells[self._cluster_vecs.labels_ < 0] = -1 shells = shells.reshape(self.filled.indices.shape) else: distances = self.filled.distances.copy() distances[distances == np.inf] = np.max(distances[distances < np.inf]) + 1 shells = np.array( [ np.unique(np.round(dist, decimals=tolerance), return_inverse=True)[ 1 ] + 1 for dist in distances ] ) shells[self.filled.distances == np.inf] = -1 return self._reshape(shells, key=mode)
[docs] def get_global_shells( self, mode: str | None = None, tolerance: int | None = None, cluster_by_distances: bool = False, cluster_by_vecs: bool = False, ) -> np.ndarray: """ Set shell indices based on all distances available in the system instead of setting them according to the local distances (in contrast to shells defined as an attribute in this class). Clustering methods can be used at the same time, which will be useful at finite temperature results, but depending on how dispersed the atoms are, the algorithm could take some time. If the clustering method(-s) have already been launched before this function, it will use the results already available and does not execute the clustering method(-s) again. Args: mode (str): Representation of the variable. Choose from 'filled', 'ragged' and 'flattened'. tolerance (int): Decimals in np.round for rounding up distances. cluster_by_distances (bool): If True, `cluster_by_distances` is called first and the distances obtained from the clustered distances are used to calculate the shells. If cluster_by_vecs is True at the same time, `cluster_by_distances` will use the clustered vectors for its clustering algorithm. cluster_by_vecs (bool): If True, `cluster_by_vectors` is called first and the distances obtained from the clustered vectors are used to calculate the shells. Returns:self.cluster_by_distances(use_vecs=cluster_by_vecs) np.ndarray: Shell indices. """ if tolerance is None: tolerance = self._tolerance if mode is None: mode = self.mode distances = self.filled.distances if cluster_by_distances: if self._cluster_dist is None: self.cluster_by_distances(use_vecs=cluster_by_vecs) distances = self._cluster_dist.cluster_centers_[ self._cluster_dist.labels_ ].reshape(self.filled.distances.shape) distances[self._cluster_dist.labels_ < 0] = np.inf elif cluster_by_vecs: if self._cluster_vecs is None: self.cluster_by_vecs() distances = np.linalg.norm( self._cluster_vecs.cluster_centers_[self._cluster_vecs.labels_], axis=-1, ord=self.norm_order, ).reshape(self.filled.distances.shape) distances[self._cluster_vecs.labels_ < 0] = np.inf dist_lst = np.unique(np.round(a=distances, decimals=tolerance)) shells = -np.ones_like(self.filled.indices).astype(int) shells[distances < np.inf] = ( np.absolute( distances[distances < np.inf, np.newaxis] - dist_lst[np.newaxis, dist_lst < np.inf] ).argmin(axis=-1) + 1 ) return self._reshape(shells, key=mode)
[docs] def get_shell_matrix( self, chemical_pair: list[str] | None = None, cluster_by_distances: bool = False, cluster_by_vecs: bool = False, ): """ Shell matrices for pairwise interaction. Note: The matrices are always symmetric, meaning if you use them as bilinear operators, you have to divide the results by 2. Args: chemical_pair (list): pair of chemical symbols (e.g. ['Fe', 'Ni']) Returns: list of sparse matrices for different shells Example: from ase.build import bulk structure = bulk('Fe', 'bcc', 2.83).repeat(2) J = -0.1 # Ising parameter magmoms = 2*np.random.random((len(structure)), 3)-1 # Random magnetic moments between -1 and 1 neigh = structure.get_neighbors(num_neighbors=8) # Iron first shell shell_matrices = neigh.get_shell_matrix() print('Energy =', 0.5*J*magmoms.dot(shell_matrices[0].dot(matmoms))) """ pairs = np.stack( ( self.filled.indices, np.ones_like(self.filled.indices) * np.arange(len(self.filled.indices))[:, np.newaxis], self.get_global_shells( cluster_by_distances=cluster_by_distances, cluster_by_vecs=cluster_by_vecs, ) - 1, ), axis=-1, ).reshape(-1, 3) shell_max = np.max(pairs[:, -1]) + 1 if chemical_pair is not None: c = np.array(self._ref_structure.get_chemical_symbols()) pairs = pairs[ np.all( np.sort(c[pairs[:, :2]], axis=-1) == np.sort(chemical_pair), axis=-1 ) ] shell_matrix = [] for ind in np.arange(shell_max): indices = pairs[ind == pairs[:, -1]] if len(indices) > 0: ind_tmp = np.unique(indices[:, :-1], axis=0, return_counts=True) shell_matrix.append( coo_matrix( (ind_tmp[1], (ind_tmp[0][:, 0], ind_tmp[0][:, 1])), shape=(len(self._ref_structure), len(self._ref_structure)), ) ) else: shell_matrix.append( coo_matrix((len(self._ref_structure), len(self._ref_structure))) ) return shell_matrix
[docs] def find_neighbors_by_vector( self, vector: np.ndarray, return_deviation: bool = False ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """ Args: vector (list/np.ndarray): vector by which positions are translated (and neighbors are searched) return_deviation (bool): whether to return distance between the expect positions and real positions Returns: np.ndarray: list of id's for the specified translation Example: a_0 = 2.832 structure = pr.create_structure('Fe', 'bcc', a_0) id_list = structure.find_neighbors_by_vector([0, 0, a_0]) # In this example, you get a list of neighbor atom id's at z+=a_0 for each atom. # This is particularly powerful for SSA when the magnetic structure has to be translated # in each direction. """ z = np.zeros(len(self._ref_structure) * 3).reshape(-1, 3) v = np.append(z[:, np.newaxis, :], self.filled.vecs, axis=1) dist = np.linalg.norm(v - np.array(vector), axis=-1, ord=self.norm_order) indices = np.append( np.arange(len(self._ref_structure))[:, np.newaxis], self.filled.indices, axis=1, ) if return_deviation: return indices[np.arange(len(dist)), np.argmin(dist, axis=-1)], np.min( dist, axis=-1 ) return indices[np.arange(len(dist)), np.argmin(dist, axis=-1)]
[docs] def cluster_by_vecs( self, distance_threshold: float | None = None, n_clusters: int | None = None, linkage: str = "complete", metric: str = "euclidean", ): """ Method to group vectors which have similar values. This method should be used as a part of neigh.get_global_shells(cluster_by_vecs=True) or neigh.get_local_shells(cluster_by_vecs=True). However, in order to specify certain arguments (such as n_jobs or max_iter), it might help to have run this function before calling parent functions, as the data obtained with this function will be stored in the variable `_cluster_vecs` Args: distance_threshold (float/None): The linkage distance threshold above which, clusters will not be merged. (cf. sklearn.cluster.AgglomerativeClustering) n_clusters (int/None): The number of clusters to find. (cf. sklearn.cluster.AgglomerativeClustering) linkage (str): Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm will merge the pairs of cluster that minimize this criterion. (cf. sklearn.cluster.AgglomerativeClustering) metric (str/callable): Metric used to compute the linkage. Can be `euclidean`, `l1`, `l2`, `manhattan`, `cosine`, or `precomputed`. If linkage is `ward`, only `euclidean` is accepted. """ from sklearn.cluster import AgglomerativeClustering if distance_threshold is None and n_clusters is None: distance_threshold = np.min(self.filled.distances) dr = self.flattened.vecs self._cluster_vecs = AgglomerativeClustering( distance_threshold=distance_threshold, n_clusters=n_clusters, linkage=linkage, metric=metric, ).fit(dr) self._cluster_vecs.cluster_centers_ = get_average_of_unique_labels( self._cluster_vecs.labels_, dr ) new_labels = -np.ones_like(self.filled.indices).astype(int) new_labels[self.filled.distances < np.inf] = self._cluster_vecs.labels_ self._cluster_vecs.labels_ = new_labels
[docs] def cluster_by_distances( self, distance_threshold: float | None = None, n_clusters: int | None = None, linkage: str = "complete", metric: str = "euclidean", use_vecs: bool = False, ): """ Method to group vectors which have similar lengths. This method should be used as a part of neigh.get_global_shells(cluster_by_vecs=True) or neigh.get_local_shells(cluster_by_distances=True). However, in order to specify certain arguments (such as n_jobs or max_iter), it might help to have run this function before calling parent functions, as the data obtained with this function will be stored in the variable `_cluster_distances` Args: distance_threshold (float/None): The linkage distance threshold above which, clusters will not be merged. (cf. sklearn.cluster.AgglomerativeClustering) n_clusters (int/None): The number of clusters to find. (cf. sklearn.cluster.AgglomerativeClustering) linkage (str): Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm will merge the pairs of cluster that minimize this criterion. (cf. sklearn.cluster.AgglomerativeClustering) metric (str/callable): Metric used to compute the linkage. Can be `euclidean`, `l1`, `l2`, `manhattan`, `cosine`, or `precomputed`. If linkage is `ward`, only `euclidean` is accepted. use_vecs (bool): Whether to form clusters for vecs beforehand. If true, the distances obtained from the clustered vectors is used for the distance clustering. Otherwise neigh.distances is used. """ from sklearn.cluster import AgglomerativeClustering if distance_threshold is None: distance_threshold = 0.1 * np.min(self.flattened.distances) dr = self.flattened.distances if use_vecs: if self._cluster_vecs is None: self.cluster_by_vecs() labels_to_consider = self._cluster_vecs.labels_[ self._cluster_vecs.labels_ >= 0 ] dr = np.linalg.norm( self._cluster_vecs.cluster_centers_[labels_to_consider], axis=-1, ord=self.norm_order, ) self._cluster_dist = AgglomerativeClustering( distance_threshold=distance_threshold, n_clusters=n_clusters, linkage=linkage, metric=metric, ).fit(dr.reshape(-1, 1)) self._cluster_dist.cluster_centers_ = get_average_of_unique_labels( self._cluster_dist.labels_, dr ) new_labels = -np.ones_like(self.filled.indices).astype(int) new_labels[self.filled.distances < np.inf] = self._cluster_dist.labels_ self._cluster_dist.labels_ = new_labels
[docs] def reset_clusters(self, vecs: bool = True, distances: bool = True): """ Method to reset clusters. Args: vecs (bool): Reset `_cluster_vecs` (cf. `cluster_by_vecs`) distances (bool): Reset `_cluster_distances` (cf. `cluster_by_distances`) """ if vecs: self._cluster_vecs = None if distances: self._cluster_distances = None
[docs] def cluster_analysis( self, id_list: list, return_cluster_sizes: bool = False ) -> dict[int, list[int]] | tuple[dict[int, list[int]], list[int]]: """ Perform cluster analysis on a list of atom IDs. Args: id_list (list): List of atom IDs to perform cluster analysis on. return_cluster_sizes (bool): Whether to return the sizes of each cluster. Returns: Union[Dict[int, List[int]], Tuple[Dict[int, List[int]], List[int]]]: Dictionary mapping cluster IDs to a list of atom IDs in each cluster. If return_cluster_sizes is True, also returns a list of cluster sizes. """ self._cluster = [0] * len(self._ref_structure) c_count = 1 for ia in id_list: nbrs = self.ragged.indices[ia] if self._cluster[ia] == 0: self._cluster[ia] = c_count self.__probe_cluster(c_count, nbrs, id_list) c_count += 1 cluster = np.array(self._cluster) cluster_dict = { i_c: np.where(cluster == i_c)[0].tolist() for i_c in range(1, c_count) } if return_cluster_sizes: sizes = [self._cluster.count(i_c + 1) for i_c in range(c_count - 1)] return cluster_dict, sizes return cluster_dict # sizes
def __probe_cluster( self, c_count: int, neighbors: list[int], id_list: list[int] ) -> None: """ Recursively probe the cluster and assign cluster IDs to neighbors. Args: c_count (int): Cluster count. neighbors (List[int]): List of neighbor IDs. id_list (List[int]): List of atom IDs. Returns: None """ for nbr_id in neighbors: if ( self._cluster[nbr_id] == 0 and nbr_id in id_list ): # TODO: check also for ordered structures self._cluster[nbr_id] = c_count nbrs = self.ragged.indices[nbr_id] self.__probe_cluster(c_count, nbrs, id_list) # TODO: combine with corresponding routine in plot3d
[docs] def get_bonds( self, radius: float = np.inf, max_shells: int | None = None, prec: float = 0.1, ) -> list[dict[str, list[list[int]]]]: """ Get the bonds in the structure. Args: radius (float): The maximum distance for a bond to be considered. max_shells (int, optional): The maximum number of shells to consider for each atom. prec (float): The minimum distance between any two clusters. If smaller, they are considered as a single cluster. Returns: List[Dict[str, List[List[int]]]]: A list of dictionaries, where each dictionary represents the shells around an atom. The keys of the dictionary are the element symbols, and the values are lists of atom indices for each shell. """ def get_cluster( dist_vec: np.ndarray, ind_vec: np.ndarray, prec: float = prec ) -> list[np.ndarray]: """ Get clusters from a distance vector and index vector. Args: dist_vec (np.ndarray): The distance vector. ind_vec (np.ndarray): The index vector. prec (float): The minimum distance between any two clusters. Returns: List[np.ndarray]: A list of arrays, where each array represents a cluster of indices. """ ind_where = np.where(np.diff(dist_vec) > prec)[0] + 1 ind_vec_cl = [np.sort(group) for group in np.split(ind_vec, ind_where)] return ind_vec_cl dist = self.filled.distances ind = self.ragged.indices el_list = self._ref_structure.get_chemical_symbols() ind_shell = [] for d, i in zip(dist, ind, strict=True): id_list = get_cluster(d[d < radius], i[d < radius]) ia_shells_dict: dict[str, list[list[int]]] = {} for i_shell_list in id_list: ia_shell_dict: dict[str, list[int]] = {} for i_s in i_shell_list: el = el_list[i_s] if el not in ia_shell_dict: ia_shell_dict[el] = [] ia_shell_dict[el].append(i_s) for el, ia_lst in ia_shell_dict.items(): if el not in ia_shells_dict: ia_shells_dict[el] = [] if ( max_shells is not None and len(ia_shells_dict[el]) + 1 > max_shells ): continue ia_shells_dict[el].append(ia_lst) ind_shell.append(ia_shells_dict) return ind_shell
Neighbors.__doc__ = Tree.__doc__
[docs] def get_volume_of_n_sphere_in_p_norm(n: int = 3, p: int = 2) -> float: """ Volume of an n-sphere in p-norm. For more info: https://en.wikipedia.org/wiki/Volume_of_an_n-ball#Balls_in_Lp_norms """ return (2 * gamma(1 + 1 / p)) ** n / gamma(1 + n / p)
[docs] def get_neighbors( structure: Atoms, num_neighbors: int = 12, tolerance: int = 2, id_list: list | None = None, cutoff_radius: float = np.inf, width_buffer: float = 1.2, mode: str = "filled", norm_order: int = 2, ) -> Neighbors: """ Get the neighbors of atoms in a structure. Args: structure (Atoms): The structure to analyze. num_neighbors (int): The number of neighbors to find for each atom. tolerance (int): The tolerance (round decimal points) used for computing neighbor shells. id_list (list): The list of atoms for which neighbors are to be found. cutoff_radius (float): The upper bound of the distance to which the search must be done. width_buffer (float): The width of the layer to be added to account for periodic boundary conditions. mode (str): The representation of per-atom quantities (distances etc.). Choose from 'filled', 'ragged', and 'flattened'. norm_order (int): The norm to use for the neighborhood search and shell recognition. Returns: Neighbors: An instance of the Neighbors class with the neighbor indices, distances, and vectors. """ neigh = cast( Neighbors, _get_neighbors( structure=structure, num_neighbors=num_neighbors, tolerance=tolerance, id_list=id_list, cutoff_radius=cutoff_radius, width_buffer=width_buffer, norm_order=norm_order, ), ) neigh._set_mode(mode) return neigh
def _get_neighbors( structure: Atoms, num_neighbors: int = 12, tolerance: int = 2, id_list: list | None = None, cutoff_radius: float = np.inf, width_buffer: float = 1.2, get_tree: bool = False, norm_order: int = 2, ) -> Neighbors | Tree: """ Get the neighbors of atoms in a structure. Args: structure (Atoms): The structure to analyze. num_neighbors (int): The number of neighbors to find for each atom. tolerance (int): The tolerance (round decimal points) used for computing neighbor shells. id_list (list): The list of atoms for which neighbors are to be found. cutoff_radius (float): The upper bound of the distance to which the search must be done. width_buffer (float): The width of the layer to be added to account for periodic boundary conditions. get_tree (bool): Whether to return a Tree instance instead of Neighbors. norm_order (int): The norm to use for the neighborhood search and shell recognition. Returns: Union[Neighbors, Tree]: An instance of the Neighbors class or Tree class with the neighbor indices, distances, and vectors. """ if num_neighbors is not None and num_neighbors <= 0: raise ValueError("invalid number of neighbors") if width_buffer < 0: raise ValueError("width_buffer must be a positive float") if get_tree: neigh = Tree(ref_structure=structure) else: neigh = Neighbors(ref_structure=structure, tolerance=tolerance) neigh._norm_order = norm_order width = neigh._estimate_width( num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, width_buffer=width_buffer, ) extended_positions, neigh._wrapped_indices = get_extended_positions( structure=structure, width=width, return_indices=True, norm_order=norm_order ) neigh._extended_positions = extended_positions neigh._tree = cKDTree(extended_positions) if get_tree: return neigh positions = structure.positions if id_list is not None: positions = positions[np.array(id_list)] neigh._get_neighborhood( positions=positions, num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, exclude_self=True, width_buffer=width_buffer, ) if neigh._check_width(width=width, pbc=structure.pbc): warnings.warn( "width_buffer may have been too small - " "most likely not all neighbors properly assigned", stacklevel=2, ) return neigh
[docs] def get_neighborhood( structure: Atoms, positions: np.ndarray, num_neighbors: int = 12, cutoff_radius: float = np.inf, width_buffer: float = 1.2, mode: str = "filled", norm_order: int = 2, ): """ Args: structure: position: Position in a box whose neighborhood information is analysed num_neighbors (int): Number of nearest neighbors cutoff_radius (float): Upper bound of the distance to which the search is to be done width_buffer (float): Width of the layer to be added to account for pbc. mode (str): Representation of per-atom quantities (distances etc.). Choose from 'filled', 'ragged' and 'flattened'. norm_order (int): Norm to use for the neighborhood search and shell recognition. The definition follows the conventional Lp norm (cf. https://en.wikipedia.org/wiki/Lp_space). This is an feature and for anything other than norm_order=2, there is no guarantee that this works flawlessly. Returns: structuretoolkit.analyse.neighbors.Tree: Neighbors instances with the neighbor indices, distances and vectors """ neigh = _get_neighbors( structure=structure, num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, width_buffer=width_buffer, get_tree=True, norm_order=norm_order, ) neigh._set_mode(mode) return neigh._get_neighborhood( positions=positions, num_neighbors=num_neighbors, cutoff_radius=cutoff_radius, )