# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.
from collections.abc import Callable
from typing import Any
import numpy as np
from ase.atoms import Atoms
from scipy.sparse import coo_matrix
from scipy.spatial import ConvexHull, Delaunay, Voronoi
from structuretoolkit.analyse.neighbors import get_neighborhood, get_neighbors
from structuretoolkit.common.helper import (
get_average_of_unique_labels,
get_extended_positions,
get_vertical_length,
get_wrapped_coordinates,
)
__author__ = "Joerg Neugebauer, Sam Waseda"
__copyright__ = (
"Copyright 2021, Max-Planck-Institut für Eisenforschung GmbH - "
"Computational Materials Design (CM) Department"
)
__version__ = "1.0"
__maintainer__ = "Sam Waseda"
__email__ = "waseda@mpie.de"
__status__ = "production"
__date__ = "Sep 1, 2017"
[docs]
def get_mean_positions(
positions: np.ndarray, cell: np.ndarray, pbc: np.ndarray, labels: np.ndarray
) -> np.ndarray:
"""
This function calculates the average position(-s) across periodic boundary conditions according
to the labels
Args:
positions (numpy.ndarray (n, 3)): Coordinates to be averaged
cell (numpy.ndarray (3, 3)): Cell dimensions
pbc (numpy.ndarray (3,)): Periodic boundary conditions (in boolean)
labels (numpy.ndarray (n,)): labels according to which the atoms are grouped
Returns:
(numpy.ndarray): mean positions
"""
# Translate labels to integer enumeration (0, 1, 2, ... etc.) and get their counts
_, labels, counts = np.unique(labels, return_inverse=True, return_counts=True)
# Get reference point for each unique label
mean_positions = positions[np.unique(labels, return_index=True)[1]]
# Get displacement vectors from reference points to all other points for the same labels
all_positions = positions - mean_positions[labels]
# Account for pbc
all_positions = np.einsum("ji,nj->ni", np.linalg.inv(cell), all_positions)
all_positions[:, pbc] -= np.rint(all_positions)[:, pbc]
all_positions = np.einsum("ji,nj->ni", cell, all_positions)
# Add average displacement vector of each label to the reference point
np.add.at(mean_positions, labels, (all_positions.T / counts[labels]).T)
return mean_positions
[docs]
def create_gridpoints(
structure: Atoms, n_gridpoints_per_angstrom: int = 5
) -> np.ndarray:
"""
Create grid points within the structure.
Args:
structure (Atoms): The atomic structure.
n_gridpoints_per_angstrom (int): Number of grid points per angstrom.
Returns:
np.ndarray: The grid points.
"""
cell = get_vertical_length(structure=structure)
n_points = (n_gridpoints_per_angstrom * cell).astype(int)
grids = np.meshgrid(
*[np.linspace(0, 1, n_points[i], endpoint=False) for i in range(3)]
)
positions = np.stack(grids, axis=-1).reshape(-1, 3)
return np.einsum("ji,nj->ni", structure.cell, positions)
[docs]
def remove_too_close(
positions: np.ndarray, structure: Atoms, min_distance: float = 1
) -> np.ndarray:
"""
Remove positions that are too close to the neighboring atoms.
Args:
positions (np.ndarray): The positions to be checked.
structure (Atoms): The atomic structure.
min_distance (float): The minimum distance allowed.
Returns:
np.ndarray: The filtered positions.
"""
neigh = get_neighborhood(structure=structure, positions=positions, num_neighbors=1)
return positions[neigh.distances.flatten() > min_distance]
[docs]
def set_to_high_symmetry_points(
positions: np.ndarray, structure: Atoms, neigh, decimals: int = 4
) -> np.ndarray:
"""
Adjusts the positions to the nearest high symmetry points in the structure.
Args:
positions (np.ndarray): The positions to be adjusted.
structure (Atoms): The atomic structure.
neigh: The neighborhood information.
decimals (int): The number of decimal places to round the positions.
Returns:
np.ndarray: The adjusted positions.
Raises:
ValueError: If high symmetry points could not be detected after 10 iterations.
"""
for _ in range(10):
neigh = neigh.get_neighborhood(positions)
dx = np.mean(neigh.vecs, axis=-2)
if np.allclose(dx, 0):
return positions
positions += dx
positions = get_wrapped_coordinates(structure=structure, positions=positions)
unique_indices = np.unique(
np.round(positions, decimals=decimals), axis=0, return_index=True
)[1]
positions = positions[unique_indices]
raise ValueError("High symmetry points could not be detected")
[docs]
def cluster_by_steinhardt(
positions: np.ndarray,
neigh,
l_values: list[int],
q_eps: float,
var_ratio: float,
min_samples: int,
) -> np.ndarray:
"""
Clusters candidate positions via Steinhardt parameters and the variance in distances to host atoms.
The cluster that has the lowest variance is returned, i.e. those positions that have the most "regular" coordination polyhedra.
Args:
positions (array): candidate positions
neigh (Neighbor): neighborhood information of the candidate positions
l_values (list of int): which steinhardt parameters to use for clustering
q_eps (float): maximum intercluster distance in steinhardt parameters for DBSCAN clustering
var_ratio (float): multiplier to make steinhardt's and distance variance numerically comparable
min_samples (int): minimum size of clusters
Returns:
array: Positions of the most likely interstitial sites
"""
from sklearn.cluster import DBSCAN
if min_samples is None:
min_samples = min(len(neigh.distances), 5)
neigh = neigh.get_neighborhood(positions)
Q_values = np.array([neigh.get_steinhardt_parameter(ll) for ll in l_values])
db = DBSCAN(q_eps, min_samples=min_samples)
var = np.std(neigh.distances, axis=-1)
descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0)
labels = db.fit_predict(descriptors.T)
var_mean = np.array(
[np.mean(var[labels == ll]) for ll in np.unique(labels) if ll >= 0]
)
return positions[labels == np.argmin(var_mean)]
[docs]
class Interstitials:
"""
Class to search for interstitial sites
This class internally does the following steps:
0. Initialize grid points (or Voronoi vertices) which are considered as
interstitial site candidates.
1. Eliminate points within a distance from the nearest neighboring atoms as
given by `min_distance`
2. Shift interstitial candidates to the nearest symmetric points with respect to the
neighboring atom sites/vertices.
3. Cluster interstitial candidate positions to avoid point overlapping.
4. Cluster interstitial candidates by their Steinhardt parameters (cf. `l_values` for
the values of the spherical harmonics) and the variance of the distances and
take the group with the smallest average distance variance
The interstitial sites can be obtained via `positions`
In complex structures (i.e. grain boundary, dislocation etc.), the default parameters
should be chosen properly. In order to see other quantities, which potentially
characterize interstitial sites, see the following class methods:
- `get_variances()`
- `get_distances()`
- `get_steinhardt_parameters()`
- `get_volumes()`
- `get_areas()`
Troubleshooting:
Identifying interstitial sites is not a very easy task. The algorithm employed here will
probably do a good job, but if it fails, it might be good to look at the following points
- The intermediate results can be accessed via `run_workflow` by specifying the step number.
- The most vulnerable point is the last step, clustering by Steinhardt parameters. Take a
look at the step before and after. If the interstitial sites are present in the step
before the clustering by Steinhardt parameters, you might want to change the values of
`q_eps` and `var_ratio`. It might make a difference to use `l_values` as well.
- It might fail to find sites if the box is very small. In that case it might make sense to
set `min_samples` very low (you can take 1)
"""
[docs]
def __init__(
self,
structure: Atoms,
num_neighbors: int,
n_gridpoints_per_angstrom: int = 5,
min_distance: float = 1,
use_voronoi: bool = False,
x_eps: float = 0.1,
l_values: np.ndarray = np.arange(2, 13),
q_eps: float = 0.3,
var_ratio: float = 5.0,
min_samples: int | None = None,
neigh_args: dict | None = None,
**kwargs,
):
"""
Args:
num_neighbors (int): Number of neighbors/vertices to consider for the interstitial
sites. By definition, tetrahedral sites should have 4 vertices and octahedral
sites 6.
n_gridpoints_per_angstrom (int): Number of grid points per angstrom for the
initialization of the interstitial candidates. The finer the mesh (i.e. the larger
the value), the likelier it is to find the correct sites but then also it becomes
computationally more expensive. Ignored if `use_voronoi` is set to `True`
min_distance (float): Minimum distance from the nearest neighboring atoms to the
positions for them to be considered as interstitial site candidates. Set
`min_distance` to 0 if no point should be removed.
use_voronoi (bool): Use Voronoi vertices for the initial interstitial candidate
positions instead of grid points.
x_eps (bool): eps value for the clustering of interstitial candidate positions
l_values (list): list of values for the Steinhardt parameter values for the
classification of the interstitial candidate points
q_eps (float): eps value for the clustering of interstitial candidate points based
on Steinhardt parameters and distance variances. This might play a crucial role
in identifying the correct interstitial sites
var_ratio (float): factor to be multiplied to the variance values in order to give
a larger weight to the variances.
min_samples (int/None): `min_sample` in the point clustering.
neigh_args (dict): arguments to be added to `get_neighbors`
"""
if neigh_args is None:
neigh_args = {}
if use_voronoi:
self.initial_positions = get_voronoi_vertices(structure)
else:
self.initial_positions = create_gridpoints(
structure=structure, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom
)
self._neigh = get_neighbors(
structure=structure, num_neighbors=num_neighbors, **neigh_args
)
self.workflow: list[dict[str, Any]] = [
{
"f": remove_too_close,
"kwargs": {"structure": structure, "min_distance": min_distance},
},
{
"f": set_to_high_symmetry_points,
"kwargs": {"structure": structure, "neigh": self.neigh},
},
{
"f": lambda **kwargs: get_cluster_positions(structure, **kwargs),
"kwargs": {"eps": x_eps},
},
{
"f": cluster_by_steinhardt,
"kwargs": {
"neigh": self.neigh,
"l_values": l_values,
"q_eps": q_eps,
"var_ratio": var_ratio,
"min_samples": min_samples,
},
},
]
self._positions: np.ndarray | None = None
self.structure = structure
[docs]
def run_workflow(
self, positions: np.ndarray | None = None, steps: int = -1
) -> np.ndarray:
"""
Run the workflow to obtain the interstitial positions.
Args:
positions (numpy.ndarray, optional): Initial positions of the interstitial candidates.
If not provided, the initial positions stored in `self.initial_positions` will be used.
steps (int, optional): Number of steps to run in the workflow. If set to -1 (default),
all steps will be run.
Returns:
numpy.ndarray: Final positions of the interstitial sites.
"""
if positions is None:
positions = self.initial_positions.copy()
for ii, ww in enumerate(self.workflow):
f: Callable[..., np.ndarray] = ww["f"]
positions = f(positions=positions, **ww["kwargs"])
if ii == steps:
return positions
return positions
@property
def neigh(self):
"""
Neighborhood information of each interstitial candidate and their surrounding atoms. E.g.
`class.neigh.distances[0][0]` gives the distance from the first interstitial candidate to
its nearest neighboring atoms. The functionalities of `neigh` follow those of
`structuretoolkit.analyse.neighbors`.
"""
return self._neigh
@property
def positions(self) -> np.ndarray:
"""
Get the positions of the interstitial sites.
Returns:
np.ndarray: Positions of the interstitial sites.
"""
if self._positions is None:
self._positions = self.run_workflow()
self._neigh = self.neigh.get_neighborhood(self._positions)
assert self._positions is not None
return self._positions
@property
def hull(self) -> list:
"""
Convex hull of each atom. It is mainly used for the volume and area calculation of each
interstitial candidate. For more info, see `get_volumes` and `get_areas`.
"""
return [ConvexHull(v) for v in self.neigh.vecs]
[docs]
def get_variances(self) -> np.ndarray:
"""
Get variance of neighboring distances. Since interstitial sites are mostly in symmetric
sites, the variance values tend to be small. In the case of fcc, both tetrahedral and
octahedral sites as well as tetrahedral sites in bcc should have the value of 0.
Returns:
(numpy.array (n,)) Variance values
"""
return np.std(self.neigh.distances, axis=-1)
[docs]
def get_distances(self, function_to_apply=np.min) -> np.ndarray:
"""
Get per-position return values of a given function for the neighbors.
Args:
function_to_apply (function): Function to apply to the distance array. Default is
numpy.minimum
Returns:
(numpy.array (n,)) Function values on the distance array
"""
return function_to_apply(self.neigh.distances, axis=-1)
[docs]
def get_steinhardt_parameters(self, l: int) -> np.ndarray:
"""
Args:
l (int/numpy.array): Order of Steinhardt parameter
Returns:
(numpy.array (n,)) Steinhardt parameter values
"""
return self.neigh.get_steinhardt_parameter(l=l)
[docs]
def get_volumes(self) -> np.ndarray:
"""
Returns:
(numpy.array (n,)): Convex hull volume of each site.
"""
return np.array([h.volume for h in self.hull])
[docs]
def get_areas(self) -> np.ndarray:
"""
Returns:
(numpy.array (n,)): Convex hull area of each site.
"""
return np.array([h.area for h in self.hull])
[docs]
def get_interstitials(
structure: Atoms,
num_neighbors: int,
n_gridpoints_per_angstrom: int = 5,
min_distance: float = 1,
use_voronoi: bool = False,
x_eps: float = 0.1,
l_values: np.ndarray = np.arange(2, 13),
q_eps: float = 0.3,
var_ratio: float = 5.0,
min_samples: int | None = None,
neigh_args: dict | None = None,
**kwargs,
) -> Interstitials:
"""
Create an instance of the Interstitials class.
Args:
structure (Atoms): The atomic structure.
num_neighbors (int): The number of neighbors to consider.
n_gridpoints_per_angstrom (int, optional): The number of grid points per angstrom. Defaults to 5.
min_distance (float, optional): The minimum distance between interstitials. Defaults to 1.
use_voronoi (bool, optional): Whether to use Voronoi tessellation. Defaults to False.
x_eps (float, optional): The epsilon value for clustering. Defaults to 0.1.
l_values (np.ndarray, optional): The array of l values for Steinhardt parameter. Defaults to np.arange(2, 13).
q_eps (float, optional): The epsilon value for Steinhardt parameter. Defaults to 0.3.
var_ratio (float, optional): The variance ratio for clustering. Defaults to 5.0.
min_samples (Optional[int], optional): The minimum number of samples for clustering. Defaults to None.
neigh_args (dict, optional): Additional arguments for neighbor calculation. Defaults to {}.
**kwargs: Additional keyword arguments.
Returns:
Interstitials: An instance of the Interstitials class.
"""
if neigh_args is None:
neigh_args = {}
return Interstitials(
structure=structure,
num_neighbors=num_neighbors,
n_gridpoints_per_angstrom=n_gridpoints_per_angstrom,
min_distance=min_distance,
use_voronoi=use_voronoi,
x_eps=x_eps,
l_values=l_values,
q_eps=q_eps,
var_ratio=var_ratio,
min_samples=min_samples,
neigh_args=neigh_args,
**kwargs,
)
get_interstitials.__doc__ = (Interstitials.__doc__ or "").replace(
"Class", "Function"
) + (Interstitials.__init__.__doc__ or "")
[docs]
def get_layers(
structure: Atoms,
distance_threshold: float = 0.01,
id_list: list[int] | None = None,
wrap_atoms: bool = True,
planes: np.ndarray | None = None,
cluster_method: Any | None = None,
) -> np.ndarray:
"""
Get an array of layer numbers.
Args:
distance_threshold (float): Distance below which two points are
considered to belong to the same layer. For detailed
description: sklearn.cluster.AgglomerativeClustering
id_list (list/numpy.ndarray): List of atoms for which the layers
should be considered.
wrap_atoms (bool): Whether to consider periodic boundary conditions according to the box definition or not.
If set to `False`, atoms lying on opposite box boundaries are considered to belong to different layers,
regardless of whether the box itself has the periodic boundary condition in this direction or not.
If `planes` is not `None` and `wrap_atoms` is `True`, this tag has the same effect as calling
`get_layers()` after calling `center_coordinates_in_unit_cell()`
planes (list/numpy.ndarray): Planes along which the layers are calculated. Planes are
given in vectors, i.e. [1, 0, 0] gives the layers along the x-axis. Default planes
are orthogonal unit vectors: [[1, 0, 0], [0, 1, 0], [0, 0, 1]]. If you have a
tilted box and want to calculate the layers along the directions of the cell
vectors, use `planes=np.linalg.inv(structure.cell).T`. Whatever values are
inserted, they are internally normalized, so whether [1, 0, 0] is entered or
[2, 0, 0], the results will be the same.
cluster_method (scikit-learn cluster algorithm): if given overrides the clustering method used, must be an
instance of a cluster algorithm from scikit-learn (or compatible interface)
Returns: Array of layer numbers (same shape as structure.positions)
Example I - how to get the number of layers in each direction:
>>> structure = Project('.').create_structure('Fe', 'bcc', 2.83).repeat(5)
>>> print('Numbers of layers:', np.max(structure.analyse.get_layers(), axis=0)+1)
Example II - get layers of only one species:
>>> print('Iron layers:', structure.analyse.get_layers(
... id_list=structure.select_index('Fe')))
The clustering algorithm can be changed with the cluster_method argument
>>> from sklearn.cluster import DBSCAN
>>> layers = structure.analyse.get_layers(cluster_method=DBSCAN())
"""
if distance_threshold <= 0:
raise ValueError("distance_threshold must be a positive float")
if id_list is not None and len(id_list) == 0:
raise ValueError("id_list must contain at least one id")
if wrap_atoms and planes is None:
positions, indices = get_extended_positions(
structure=structure, width=distance_threshold, return_indices=True
)
if id_list is not None:
id_arr = np.arange(len(structure))[np.array(id_list)]
id_mask = np.any(id_arr[:, np.newaxis] == indices[np.newaxis, :], axis=0)
positions = positions[id_mask]
indices = indices[id_mask]
else:
positions = structure.positions
if id_list is not None:
positions = positions[id_list]
if wrap_atoms:
positions = get_wrapped_coordinates(
structure=structure, positions=positions
)
if planes is not None:
mat = np.asarray(planes).reshape(-1, 3)
positions = np.einsum(
"ij,i,nj->ni", mat, 1 / np.linalg.norm(mat, axis=-1), positions
)
if cluster_method is None:
from sklearn.cluster import AgglomerativeClustering
cluster_method = AgglomerativeClustering(
linkage="complete",
n_clusters=None,
distance_threshold=distance_threshold,
)
layers = []
for ii, x in enumerate(positions.T):
cluster = cluster_method.fit(x.reshape(-1, 1))
first_occurrences = np.unique(cluster.labels_, return_index=True)[1]
permutation = x[first_occurrences].argsort().argsort()
labels = permutation[cluster.labels_]
if wrap_atoms and planes is None and structure.pbc[ii]:
mean_positions = get_average_of_unique_labels(labels, positions)
scaled_positions = np.einsum(
"ji,nj->ni", np.linalg.inv(structure.cell), mean_positions
)
unique_inside_box = np.all(
np.absolute(scaled_positions - 0.5 + 1.0e-8) < 0.5, axis=-1
)
arr_inside_box = np.any(
labels[:, None] == np.unique(labels)[unique_inside_box][None, :],
axis=-1,
)
first_occurences = np.unique(indices[arr_inside_box], return_index=True)[1]
labels = labels[arr_inside_box]
labels -= np.min(labels)
labels = labels[first_occurences]
layers.append(labels)
if planes is not None and len(np.asarray(planes).shape) == 1:
return np.asarray(layers).flatten()
return np.vstack(layers).T
[docs]
def get_voronoi_vertices(
structure: Atoms,
epsilon: float = 2.5e-4,
distance_threshold: float = 0,
width_buffer: float = 10.0,
) -> np.ndarray:
"""
Get voronoi vertices of the box.
Args:
epsilon (float): displacement to add to avoid wrapping of atoms at borders
distance_threshold (float): distance below which two vertices are considered as one.
Agglomerative clustering algorithm (sklearn) is employed. Final positions are given
as the average positions of clusters.
width_buffer (float): width of the layer to be added to account for pbc.
Returns:
numpy.ndarray: 3d-array of vertices
This function detect octahedral and tetrahedral sites in fcc; in bcc it detects tetrahedral
sites. In defects (e.g. vacancy, dislocation, grain boundary etc.), it gives a list of
positions interstitial atoms might want to occupy. In order for this to be more successful,
it might make sense to look at the distance between the voronoi vertices and their nearest
neighboring atoms via:
>>> voronoi_vertices = structure_of_your_choice.analyse.get_voronoi_vertices()
>>> neigh = structure_of_your_choice.get_neighborhood(voronoi_vertices)
>>> print(neigh.distances.min(axis=-1))
"""
voro = Voronoi(
get_extended_positions(structure=structure, width=width_buffer) + epsilon
)
xx = voro.vertices
if distance_threshold > 0:
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(
linkage="single", distance_threshold=distance_threshold, n_clusters=None
)
cluster.fit(xx)
xx = get_average_of_unique_labels(cluster.labels_, xx)
xx = xx[
np.linalg.norm(
xx - get_wrapped_coordinates(structure=structure, positions=xx, epsilon=0),
axis=-1,
)
< epsilon
]
return xx - epsilon
def _get_neighbors(
structure: Atoms,
position_interpreter: Callable,
data_field: str,
width_buffer: float = 10,
) -> np.ndarray:
"""
Get pairs of atom indices sharing the same Voronoi vertices/areas.
Args:
structure (Atoms): The atomic structure.
position_interpreter (callable): The position interpreter function.
data_field (str): The data field to extract from the position interpreter.
width_buffer (float): Width of the layer to be added to account for pbc.
Returns:
np.ndarray: Pair indices
"""
positions, indices = get_extended_positions(
structure=structure, width=width_buffer, return_indices=True
)
interpretation = position_interpreter(positions)
data = getattr(interpretation, data_field)
x = positions[data]
return indices[
data[
np.isclose(get_wrapped_coordinates(structure=structure, positions=x), x)
.all(axis=-1)
.any(axis=-1)
]
]
[docs]
def get_voronoi_neighbors(structure: Atoms, width_buffer: float = 10) -> np.ndarray:
"""
Get pairs of atom indices sharing the same Voronoi vertices/areas.
Args:
width_buffer (float): Width of the layer to be added to account for pbc.
Returns:
pairs (ndarray): Pair indices
"""
return _get_neighbors(
structure=structure,
position_interpreter=Voronoi,
data_field="ridge_points",
width_buffer=width_buffer,
)
[docs]
def get_delaunay_neighbors(structure: Atoms, width_buffer: float = 10.0) -> np.ndarray:
"""
Get indices of atoms sharing the same Delaunay tetrahedrons (commonly known as Delaunay
triangles), i.e. indices of neighboring atoms, which form a tetrahedron, in which no other
atom exists.
Args:
width_buffer (float): Width of the layer to be added to account for pbc.
Returns:
pairs (ndarray): Delaunay neighbor indices
"""
return _get_neighbors(
structure=structure,
position_interpreter=Delaunay,
data_field="simplices",
width_buffer=width_buffer,
)
[docs]
def get_cluster_positions(
structure: Atoms,
positions: np.ndarray | None = None,
eps: float = 1.0,
buffer_width: float | None = None,
return_labels: bool = False,
) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
"""
Cluster positions according to the distances. Clustering algorithm uses DBSCAN:
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html
Example I:
```
analyse = Analyze(some_ase_structure)
positions = analyse.cluster_points(eps=2)
```
This example should return the atom positions, if no two atoms lie within a distance of 2.
If there are at least two atoms which lie within a distance of 2, their entries will be
replaced by their mean position.
Example II:
```
analyse = Analyze(some_ase_structure)
print(analyse.cluster_positions([3*[0.], 3*[1.]], eps=3))
```
This returns `[0.5, 0.5, 0.5]` (if the cell is large enough)
Args:
positions (numpy.ndarray): Positions to consider. Default: atom positions
eps (float): The maximum distance between two samples for one to be considered as in
the neighborhood of the other.
buffer_width (float): Buffer width to consider across the periodic boundary
conditions. If too small, it is possible that atoms that are meant to belong
together across PBC are missed. Default: Same as eps
return_labels (bool): Whether to return the labels given according to the grouping
together with the mean positions
Returns:
positions (numpy.ndarray): Mean positions
label (numpy.ndarray): Labels of the positions (returned when `return_labels = True`)
"""
from sklearn.cluster import DBSCAN
positions = structure.positions if positions is None else np.array(positions)
if buffer_width is None:
buffer_width = eps
extended_positions, indices = get_extended_positions(
structure=structure,
width=buffer_width,
return_indices=True,
positions=positions,
)
labels = DBSCAN(eps=eps, min_samples=1).fit_predict(extended_positions)
coo = coo_matrix((labels, (np.arange(len(labels)), indices)))
labels = coo.max(axis=0).toarray().flatten()
# make labels look nicer
labels = np.unique(labels, return_inverse=True)[1]
mean_positions = get_mean_positions(
positions, structure.cell, structure.pbc, labels
)
if return_labels:
return mean_positions, labels
return mean_positions