Source code for structuretoolkit.analyse.spatial

# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.

from collections.abc import Callable
from typing import Any

import numpy as np
from ase.atoms import Atoms
from scipy.sparse import coo_matrix
from scipy.spatial import ConvexHull, Delaunay, Voronoi

from structuretoolkit.analyse.neighbors import get_neighborhood, get_neighbors
from structuretoolkit.common.helper import (
    get_average_of_unique_labels,
    get_extended_positions,
    get_vertical_length,
    get_wrapped_coordinates,
)

__author__ = "Joerg Neugebauer, Sam Waseda"
__copyright__ = (
    "Copyright 2021, Max-Planck-Institut für Eisenforschung GmbH - "
    "Computational Materials Design (CM) Department"
)
__version__ = "1.0"
__maintainer__ = "Sam Waseda"
__email__ = "waseda@mpie.de"
__status__ = "production"
__date__ = "Sep 1, 2017"


[docs] def get_mean_positions( positions: np.ndarray, cell: np.ndarray, pbc: np.ndarray, labels: np.ndarray ) -> np.ndarray: """ This function calculates the average position(-s) across periodic boundary conditions according to the labels Args: positions (numpy.ndarray (n, 3)): Coordinates to be averaged cell (numpy.ndarray (3, 3)): Cell dimensions pbc (numpy.ndarray (3,)): Periodic boundary conditions (in boolean) labels (numpy.ndarray (n,)): labels according to which the atoms are grouped Returns: (numpy.ndarray): mean positions """ # Translate labels to integer enumeration (0, 1, 2, ... etc.) and get their counts _, labels, counts = np.unique(labels, return_inverse=True, return_counts=True) # Get reference point for each unique label mean_positions = positions[np.unique(labels, return_index=True)[1]] # Get displacement vectors from reference points to all other points for the same labels all_positions = positions - mean_positions[labels] # Account for pbc all_positions = np.einsum("ji,nj->ni", np.linalg.inv(cell), all_positions) all_positions[:, pbc] -= np.rint(all_positions)[:, pbc] all_positions = np.einsum("ji,nj->ni", cell, all_positions) # Add average displacement vector of each label to the reference point np.add.at(mean_positions, labels, (all_positions.T / counts[labels]).T) return mean_positions
[docs] def create_gridpoints( structure: Atoms, n_gridpoints_per_angstrom: int = 5 ) -> np.ndarray: """ Create grid points within the structure. Args: structure (Atoms): The atomic structure. n_gridpoints_per_angstrom (int): Number of grid points per angstrom. Returns: np.ndarray: The grid points. """ cell = get_vertical_length(structure=structure) n_points = (n_gridpoints_per_angstrom * cell).astype(int) grids = np.meshgrid( *[np.linspace(0, 1, n_points[i], endpoint=False) for i in range(3)] ) positions = np.stack(grids, axis=-1).reshape(-1, 3) return np.einsum("ji,nj->ni", structure.cell, positions)
[docs] def remove_too_close( positions: np.ndarray, structure: Atoms, min_distance: float = 1 ) -> np.ndarray: """ Remove positions that are too close to the neighboring atoms. Args: positions (np.ndarray): The positions to be checked. structure (Atoms): The atomic structure. min_distance (float): The minimum distance allowed. Returns: np.ndarray: The filtered positions. """ neigh = get_neighborhood(structure=structure, positions=positions, num_neighbors=1) return positions[neigh.distances.flatten() > min_distance]
[docs] def set_to_high_symmetry_points( positions: np.ndarray, structure: Atoms, neigh, decimals: int = 4 ) -> np.ndarray: """ Adjusts the positions to the nearest high symmetry points in the structure. Args: positions (np.ndarray): The positions to be adjusted. structure (Atoms): The atomic structure. neigh: The neighborhood information. decimals (int): The number of decimal places to round the positions. Returns: np.ndarray: The adjusted positions. Raises: ValueError: If high symmetry points could not be detected after 10 iterations. """ for _ in range(10): neigh = neigh.get_neighborhood(positions) dx = np.mean(neigh.vecs, axis=-2) if np.allclose(dx, 0): return positions positions += dx positions = get_wrapped_coordinates(structure=structure, positions=positions) unique_indices = np.unique( np.round(positions, decimals=decimals), axis=0, return_index=True )[1] positions = positions[unique_indices] raise ValueError("High symmetry points could not be detected")
[docs] def cluster_by_steinhardt( positions: np.ndarray, neigh, l_values: list[int], q_eps: float, var_ratio: float, min_samples: int, ) -> np.ndarray: """ Clusters candidate positions via Steinhardt parameters and the variance in distances to host atoms. The cluster that has the lowest variance is returned, i.e. those positions that have the most "regular" coordination polyhedra. Args: positions (array): candidate positions neigh (Neighbor): neighborhood information of the candidate positions l_values (list of int): which steinhardt parameters to use for clustering q_eps (float): maximum intercluster distance in steinhardt parameters for DBSCAN clustering var_ratio (float): multiplier to make steinhardt's and distance variance numerically comparable min_samples (int): minimum size of clusters Returns: array: Positions of the most likely interstitial sites """ from sklearn.cluster import DBSCAN if min_samples is None: min_samples = min(len(neigh.distances), 5) neigh = neigh.get_neighborhood(positions) Q_values = np.array([neigh.get_steinhardt_parameter(ll) for ll in l_values]) db = DBSCAN(q_eps, min_samples=min_samples) var = np.std(neigh.distances, axis=-1) descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0) labels = db.fit_predict(descriptors.T) var_mean = np.array( [np.mean(var[labels == ll]) for ll in np.unique(labels) if ll >= 0] ) return positions[labels == np.argmin(var_mean)]
[docs] class Interstitials: """ Class to search for interstitial sites This class internally does the following steps: 0. Initialize grid points (or Voronoi vertices) which are considered as interstitial site candidates. 1. Eliminate points within a distance from the nearest neighboring atoms as given by `min_distance` 2. Shift interstitial candidates to the nearest symmetric points with respect to the neighboring atom sites/vertices. 3. Cluster interstitial candidate positions to avoid point overlapping. 4. Cluster interstitial candidates by their Steinhardt parameters (cf. `l_values` for the values of the spherical harmonics) and the variance of the distances and take the group with the smallest average distance variance The interstitial sites can be obtained via `positions` In complex structures (i.e. grain boundary, dislocation etc.), the default parameters should be chosen properly. In order to see other quantities, which potentially characterize interstitial sites, see the following class methods: - `get_variances()` - `get_distances()` - `get_steinhardt_parameters()` - `get_volumes()` - `get_areas()` Troubleshooting: Identifying interstitial sites is not a very easy task. The algorithm employed here will probably do a good job, but if it fails, it might be good to look at the following points - The intermediate results can be accessed via `run_workflow` by specifying the step number. - The most vulnerable point is the last step, clustering by Steinhardt parameters. Take a look at the step before and after. If the interstitial sites are present in the step before the clustering by Steinhardt parameters, you might want to change the values of `q_eps` and `var_ratio`. It might make a difference to use `l_values` as well. - It might fail to find sites if the box is very small. In that case it might make sense to set `min_samples` very low (you can take 1) """
[docs] def __init__( self, structure: Atoms, num_neighbors: int, n_gridpoints_per_angstrom: int = 5, min_distance: float = 1, use_voronoi: bool = False, x_eps: float = 0.1, l_values: np.ndarray = np.arange(2, 13), q_eps: float = 0.3, var_ratio: float = 5.0, min_samples: int | None = None, neigh_args: dict | None = None, **kwargs, ): """ Args: num_neighbors (int): Number of neighbors/vertices to consider for the interstitial sites. By definition, tetrahedral sites should have 4 vertices and octahedral sites 6. n_gridpoints_per_angstrom (int): Number of grid points per angstrom for the initialization of the interstitial candidates. The finer the mesh (i.e. the larger the value), the likelier it is to find the correct sites but then also it becomes computationally more expensive. Ignored if `use_voronoi` is set to `True` min_distance (float): Minimum distance from the nearest neighboring atoms to the positions for them to be considered as interstitial site candidates. Set `min_distance` to 0 if no point should be removed. use_voronoi (bool): Use Voronoi vertices for the initial interstitial candidate positions instead of grid points. x_eps (bool): eps value for the clustering of interstitial candidate positions l_values (list): list of values for the Steinhardt parameter values for the classification of the interstitial candidate points q_eps (float): eps value for the clustering of interstitial candidate points based on Steinhardt parameters and distance variances. This might play a crucial role in identifying the correct interstitial sites var_ratio (float): factor to be multiplied to the variance values in order to give a larger weight to the variances. min_samples (int/None): `min_sample` in the point clustering. neigh_args (dict): arguments to be added to `get_neighbors` """ if neigh_args is None: neigh_args = {} if use_voronoi: self.initial_positions = get_voronoi_vertices(structure) else: self.initial_positions = create_gridpoints( structure=structure, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom ) self._neigh = get_neighbors( structure=structure, num_neighbors=num_neighbors, **neigh_args ) self.workflow: list[dict[str, Any]] = [ { "f": remove_too_close, "kwargs": {"structure": structure, "min_distance": min_distance}, }, { "f": set_to_high_symmetry_points, "kwargs": {"structure": structure, "neigh": self.neigh}, }, { "f": lambda **kwargs: get_cluster_positions(structure, **kwargs), "kwargs": {"eps": x_eps}, }, { "f": cluster_by_steinhardt, "kwargs": { "neigh": self.neigh, "l_values": l_values, "q_eps": q_eps, "var_ratio": var_ratio, "min_samples": min_samples, }, }, ] self._positions: np.ndarray | None = None self.structure = structure
[docs] def run_workflow( self, positions: np.ndarray | None = None, steps: int = -1 ) -> np.ndarray: """ Run the workflow to obtain the interstitial positions. Args: positions (numpy.ndarray, optional): Initial positions of the interstitial candidates. If not provided, the initial positions stored in `self.initial_positions` will be used. steps (int, optional): Number of steps to run in the workflow. If set to -1 (default), all steps will be run. Returns: numpy.ndarray: Final positions of the interstitial sites. """ if positions is None: positions = self.initial_positions.copy() for ii, ww in enumerate(self.workflow): f: Callable[..., np.ndarray] = ww["f"] positions = f(positions=positions, **ww["kwargs"]) if ii == steps: return positions return positions
@property def neigh(self): """ Neighborhood information of each interstitial candidate and their surrounding atoms. E.g. `class.neigh.distances[0][0]` gives the distance from the first interstitial candidate to its nearest neighboring atoms. The functionalities of `neigh` follow those of `structuretoolkit.analyse.neighbors`. """ return self._neigh @property def positions(self) -> np.ndarray: """ Get the positions of the interstitial sites. Returns: np.ndarray: Positions of the interstitial sites. """ if self._positions is None: self._positions = self.run_workflow() self._neigh = self.neigh.get_neighborhood(self._positions) assert self._positions is not None return self._positions @property def hull(self) -> list: """ Convex hull of each atom. It is mainly used for the volume and area calculation of each interstitial candidate. For more info, see `get_volumes` and `get_areas`. """ return [ConvexHull(v) for v in self.neigh.vecs]
[docs] def get_variances(self) -> np.ndarray: """ Get variance of neighboring distances. Since interstitial sites are mostly in symmetric sites, the variance values tend to be small. In the case of fcc, both tetrahedral and octahedral sites as well as tetrahedral sites in bcc should have the value of 0. Returns: (numpy.array (n,)) Variance values """ return np.std(self.neigh.distances, axis=-1)
[docs] def get_distances(self, function_to_apply=np.min) -> np.ndarray: """ Get per-position return values of a given function for the neighbors. Args: function_to_apply (function): Function to apply to the distance array. Default is numpy.minimum Returns: (numpy.array (n,)) Function values on the distance array """ return function_to_apply(self.neigh.distances, axis=-1)
[docs] def get_steinhardt_parameters(self, l: int) -> np.ndarray: """ Args: l (int/numpy.array): Order of Steinhardt parameter Returns: (numpy.array (n,)) Steinhardt parameter values """ return self.neigh.get_steinhardt_parameter(l=l)
[docs] def get_volumes(self) -> np.ndarray: """ Returns: (numpy.array (n,)): Convex hull volume of each site. """ return np.array([h.volume for h in self.hull])
[docs] def get_areas(self) -> np.ndarray: """ Returns: (numpy.array (n,)): Convex hull area of each site. """ return np.array([h.area for h in self.hull])
[docs] def get_interstitials( structure: Atoms, num_neighbors: int, n_gridpoints_per_angstrom: int = 5, min_distance: float = 1, use_voronoi: bool = False, x_eps: float = 0.1, l_values: np.ndarray = np.arange(2, 13), q_eps: float = 0.3, var_ratio: float = 5.0, min_samples: int | None = None, neigh_args: dict | None = None, **kwargs, ) -> Interstitials: """ Create an instance of the Interstitials class. Args: structure (Atoms): The atomic structure. num_neighbors (int): The number of neighbors to consider. n_gridpoints_per_angstrom (int, optional): The number of grid points per angstrom. Defaults to 5. min_distance (float, optional): The minimum distance between interstitials. Defaults to 1. use_voronoi (bool, optional): Whether to use Voronoi tessellation. Defaults to False. x_eps (float, optional): The epsilon value for clustering. Defaults to 0.1. l_values (np.ndarray, optional): The array of l values for Steinhardt parameter. Defaults to np.arange(2, 13). q_eps (float, optional): The epsilon value for Steinhardt parameter. Defaults to 0.3. var_ratio (float, optional): The variance ratio for clustering. Defaults to 5.0. min_samples (Optional[int], optional): The minimum number of samples for clustering. Defaults to None. neigh_args (dict, optional): Additional arguments for neighbor calculation. Defaults to {}. **kwargs: Additional keyword arguments. Returns: Interstitials: An instance of the Interstitials class. """ if neigh_args is None: neigh_args = {} return Interstitials( structure=structure, num_neighbors=num_neighbors, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom, min_distance=min_distance, use_voronoi=use_voronoi, x_eps=x_eps, l_values=l_values, q_eps=q_eps, var_ratio=var_ratio, min_samples=min_samples, neigh_args=neigh_args, **kwargs, )
get_interstitials.__doc__ = (Interstitials.__doc__ or "").replace( "Class", "Function" ) + (Interstitials.__init__.__doc__ or "")
[docs] def get_layers( structure: Atoms, distance_threshold: float = 0.01, id_list: list[int] | None = None, wrap_atoms: bool = True, planes: np.ndarray | None = None, cluster_method: Any | None = None, ) -> np.ndarray: """ Get an array of layer numbers. Args: distance_threshold (float): Distance below which two points are considered to belong to the same layer. For detailed description: sklearn.cluster.AgglomerativeClustering id_list (list/numpy.ndarray): List of atoms for which the layers should be considered. wrap_atoms (bool): Whether to consider periodic boundary conditions according to the box definition or not. If set to `False`, atoms lying on opposite box boundaries are considered to belong to different layers, regardless of whether the box itself has the periodic boundary condition in this direction or not. If `planes` is not `None` and `wrap_atoms` is `True`, this tag has the same effect as calling `get_layers()` after calling `center_coordinates_in_unit_cell()` planes (list/numpy.ndarray): Planes along which the layers are calculated. Planes are given in vectors, i.e. [1, 0, 0] gives the layers along the x-axis. Default planes are orthogonal unit vectors: [[1, 0, 0], [0, 1, 0], [0, 0, 1]]. If you have a tilted box and want to calculate the layers along the directions of the cell vectors, use `planes=np.linalg.inv(structure.cell).T`. Whatever values are inserted, they are internally normalized, so whether [1, 0, 0] is entered or [2, 0, 0], the results will be the same. cluster_method (scikit-learn cluster algorithm): if given overrides the clustering method used, must be an instance of a cluster algorithm from scikit-learn (or compatible interface) Returns: Array of layer numbers (same shape as structure.positions) Example I - how to get the number of layers in each direction: >>> structure = Project('.').create_structure('Fe', 'bcc', 2.83).repeat(5) >>> print('Numbers of layers:', np.max(structure.analyse.get_layers(), axis=0)+1) Example II - get layers of only one species: >>> print('Iron layers:', structure.analyse.get_layers( ... id_list=structure.select_index('Fe'))) The clustering algorithm can be changed with the cluster_method argument >>> from sklearn.cluster import DBSCAN >>> layers = structure.analyse.get_layers(cluster_method=DBSCAN()) """ if distance_threshold <= 0: raise ValueError("distance_threshold must be a positive float") if id_list is not None and len(id_list) == 0: raise ValueError("id_list must contain at least one id") if wrap_atoms and planes is None: positions, indices = get_extended_positions( structure=structure, width=distance_threshold, return_indices=True ) if id_list is not None: id_arr = np.arange(len(structure))[np.array(id_list)] id_mask = np.any(id_arr[:, np.newaxis] == indices[np.newaxis, :], axis=0) positions = positions[id_mask] indices = indices[id_mask] else: positions = structure.positions if id_list is not None: positions = positions[id_list] if wrap_atoms: positions = get_wrapped_coordinates( structure=structure, positions=positions ) if planes is not None: mat = np.asarray(planes).reshape(-1, 3) positions = np.einsum( "ij,i,nj->ni", mat, 1 / np.linalg.norm(mat, axis=-1), positions ) if cluster_method is None: from sklearn.cluster import AgglomerativeClustering cluster_method = AgglomerativeClustering( linkage="complete", n_clusters=None, distance_threshold=distance_threshold, ) layers = [] for ii, x in enumerate(positions.T): cluster = cluster_method.fit(x.reshape(-1, 1)) first_occurrences = np.unique(cluster.labels_, return_index=True)[1] permutation = x[first_occurrences].argsort().argsort() labels = permutation[cluster.labels_] if wrap_atoms and planes is None and structure.pbc[ii]: mean_positions = get_average_of_unique_labels(labels, positions) scaled_positions = np.einsum( "ji,nj->ni", np.linalg.inv(structure.cell), mean_positions ) unique_inside_box = np.all( np.absolute(scaled_positions - 0.5 + 1.0e-8) < 0.5, axis=-1 ) arr_inside_box = np.any( labels[:, None] == np.unique(labels)[unique_inside_box][None, :], axis=-1, ) first_occurences = np.unique(indices[arr_inside_box], return_index=True)[1] labels = labels[arr_inside_box] labels -= np.min(labels) labels = labels[first_occurences] layers.append(labels) if planes is not None and len(np.asarray(planes).shape) == 1: return np.asarray(layers).flatten() return np.vstack(layers).T
[docs] def get_voronoi_vertices( structure: Atoms, epsilon: float = 2.5e-4, distance_threshold: float = 0, width_buffer: float = 10.0, ) -> np.ndarray: """ Get voronoi vertices of the box. Args: epsilon (float): displacement to add to avoid wrapping of atoms at borders distance_threshold (float): distance below which two vertices are considered as one. Agglomerative clustering algorithm (sklearn) is employed. Final positions are given as the average positions of clusters. width_buffer (float): width of the layer to be added to account for pbc. Returns: numpy.ndarray: 3d-array of vertices This function detect octahedral and tetrahedral sites in fcc; in bcc it detects tetrahedral sites. In defects (e.g. vacancy, dislocation, grain boundary etc.), it gives a list of positions interstitial atoms might want to occupy. In order for this to be more successful, it might make sense to look at the distance between the voronoi vertices and their nearest neighboring atoms via: >>> voronoi_vertices = structure_of_your_choice.analyse.get_voronoi_vertices() >>> neigh = structure_of_your_choice.get_neighborhood(voronoi_vertices) >>> print(neigh.distances.min(axis=-1)) """ voro = Voronoi( get_extended_positions(structure=structure, width=width_buffer) + epsilon ) xx = voro.vertices if distance_threshold > 0: from sklearn.cluster import AgglomerativeClustering cluster = AgglomerativeClustering( linkage="single", distance_threshold=distance_threshold, n_clusters=None ) cluster.fit(xx) xx = get_average_of_unique_labels(cluster.labels_, xx) xx = xx[ np.linalg.norm( xx - get_wrapped_coordinates(structure=structure, positions=xx, epsilon=0), axis=-1, ) < epsilon ] return xx - epsilon
def _get_neighbors( structure: Atoms, position_interpreter: Callable, data_field: str, width_buffer: float = 10, ) -> np.ndarray: """ Get pairs of atom indices sharing the same Voronoi vertices/areas. Args: structure (Atoms): The atomic structure. position_interpreter (callable): The position interpreter function. data_field (str): The data field to extract from the position interpreter. width_buffer (float): Width of the layer to be added to account for pbc. Returns: np.ndarray: Pair indices """ positions, indices = get_extended_positions( structure=structure, width=width_buffer, return_indices=True ) interpretation = position_interpreter(positions) data = getattr(interpretation, data_field) x = positions[data] return indices[ data[ np.isclose(get_wrapped_coordinates(structure=structure, positions=x), x) .all(axis=-1) .any(axis=-1) ] ]
[docs] def get_voronoi_neighbors(structure: Atoms, width_buffer: float = 10) -> np.ndarray: """ Get pairs of atom indices sharing the same Voronoi vertices/areas. Args: width_buffer (float): Width of the layer to be added to account for pbc. Returns: pairs (ndarray): Pair indices """ return _get_neighbors( structure=structure, position_interpreter=Voronoi, data_field="ridge_points", width_buffer=width_buffer, )
[docs] def get_delaunay_neighbors(structure: Atoms, width_buffer: float = 10.0) -> np.ndarray: """ Get indices of atoms sharing the same Delaunay tetrahedrons (commonly known as Delaunay triangles), i.e. indices of neighboring atoms, which form a tetrahedron, in which no other atom exists. Args: width_buffer (float): Width of the layer to be added to account for pbc. Returns: pairs (ndarray): Delaunay neighbor indices """ return _get_neighbors( structure=structure, position_interpreter=Delaunay, data_field="simplices", width_buffer=width_buffer, )
[docs] def get_cluster_positions( structure: Atoms, positions: np.ndarray | None = None, eps: float = 1.0, buffer_width: float | None = None, return_labels: bool = False, ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """ Cluster positions according to the distances. Clustering algorithm uses DBSCAN: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html Example I: ``` analyse = Analyze(some_ase_structure) positions = analyse.cluster_points(eps=2) ``` This example should return the atom positions, if no two atoms lie within a distance of 2. If there are at least two atoms which lie within a distance of 2, their entries will be replaced by their mean position. Example II: ``` analyse = Analyze(some_ase_structure) print(analyse.cluster_positions([3*[0.], 3*[1.]], eps=3)) ``` This returns `[0.5, 0.5, 0.5]` (if the cell is large enough) Args: positions (numpy.ndarray): Positions to consider. Default: atom positions eps (float): The maximum distance between two samples for one to be considered as in the neighborhood of the other. buffer_width (float): Buffer width to consider across the periodic boundary conditions. If too small, it is possible that atoms that are meant to belong together across PBC are missed. Default: Same as eps return_labels (bool): Whether to return the labels given according to the grouping together with the mean positions Returns: positions (numpy.ndarray): Mean positions label (numpy.ndarray): Labels of the positions (returned when `return_labels = True`) """ from sklearn.cluster import DBSCAN positions = structure.positions if positions is None else np.array(positions) if buffer_width is None: buffer_width = eps extended_positions, indices = get_extended_positions( structure=structure, width=buffer_width, return_indices=True, positions=positions, ) labels = DBSCAN(eps=eps, min_samples=1).fit_predict(extended_positions) coo = coo_matrix((labels, (np.arange(len(labels)), indices))) labels = coo.max(axis=0).toarray().flatten() # make labels look nicer labels = np.unique(labels, return_inverse=True)[1] mean_positions = get_mean_positions( positions, structure.cell, structure.pbc, labels ) if return_labels: return mean_positions, labels return mean_positions