"""Bond computation from declarative BondSpec rules."""
from fnmatch import fnmatch
import numpy as np
from hofmann.model import Bond, BondSpec
from hofmann.model._util import _site_species
from hofmann.model.composition import Composition
[docs]
def compute_bonds(
species: tuple[str | Composition, ...],
coords: np.ndarray,
bond_specs: list[BondSpec],
lattice: np.ndarray | None = None,
) -> list[Bond]:
"""Compute bonds for a single frame based on bond specification rules.
For each pair of atoms, checks all bond specs in order to find the
first matching rule where the interatomic distance falls within
``[min_length, max_length]``.
When *lattice* is provided, bonds across periodic boundaries are
found correctly. If all bond lengths are shorter than the
inscribed sphere radius of the cell, the minimum image convention
(MIC) is used for an efficient O(n²) computation. Otherwise, all
27 images in ``{-1, 0, 1}^3`` are checked iteratively to handle
multi-image bonds (e.g. atoms on opposite cell faces at half a
lattice parameter apart).
Args:
species: Species labels, one per atom.
coords: Coordinates array of shape ``(n_atoms, 3)``.
bond_specs: List of BondSpec rules to apply.
lattice: 3x3 matrix of lattice vectors (row vectors).
``None`` for non-periodic scenes.
Returns:
List of Bond objects for all detected bonds.
"""
if len(species) == 0 or len(bond_specs) == 0:
return []
coords = np.asarray(coords, dtype=float)
n_atoms = len(species)
if coords.ndim != 2 or coords.shape[1] != 3:
raise ValueError(
f"coords must have 3 columns, got shape {coords.shape}"
)
if coords.shape[0] != n_atoms:
raise ValueError(
f"species has {n_atoms} entries but coords has "
f"{coords.shape[0]} rows"
)
unique_species_set: set[str] = set()
for site in species:
unique_species_set |= _site_species(site)
unique_species = list(unique_species_set)
# Vectorised pairwise difference vectors: diff[i,j] = coords[i] - coords[j].
diff = coords[:, np.newaxis, :] - coords[np.newaxis, :, :]
if lattice is not None:
return _compute_bonds_periodic(
species, diff, bond_specs, lattice, unique_species, n_atoms,
)
else:
return _compute_bonds_direct(
species, diff, bond_specs, unique_species, n_atoms,
)
def _compute_bonds_direct(
species: tuple[str | Composition, ...],
diff: np.ndarray,
bond_specs: list[BondSpec],
unique_species: list[str],
n_atoms: int,
) -> list[Bond]:
"""Compute bonds for a non-periodic structure."""
dist_matrix = np.linalg.norm(diff, axis=2)
upper = np.triu(np.ones((n_atoms, n_atoms), dtype=bool), k=1)
claimed = np.zeros((n_atoms, n_atoms), dtype=bool)
bonds: list[Bond] = []
for spec in bond_specs:
pair_mask = _species_pair_mask(spec, species, unique_species)
dist_ok = (
(dist_matrix >= spec.min_length)
& (dist_matrix <= spec.max_length)
)
hits = upper & pair_mask & dist_ok & ~claimed
ii, jj = np.nonzero(hits)
if len(ii) > 0:
claimed[ii, jj] = True
for idx in range(len(ii)):
i, j = int(ii[idx]), int(jj[idx])
bonds.append(
Bond(i, j, float(dist_matrix[i, j]), spec)
)
return bonds
def _inscribed_sphere_radius(lattice: np.ndarray) -> float:
"""Radius of the largest sphere fitting inside the unit cell.
For lattice vectors **a**, **b**, **c** (rows of *lattice*), this
is ``min(h_a, h_b, h_c) / 2`` where each ``h_i`` is the
perpendicular distance between the pair of faces normal to the
*i*-th reciprocal lattice direction.
"""
a, b, c = lattice[0], lattice[1], lattice[2]
volume = abs(np.dot(a, np.cross(b, c)))
heights = np.array([
volume / np.linalg.norm(np.cross(b, c)),
volume / np.linalg.norm(np.cross(a, c)),
volume / np.linalg.norm(np.cross(a, b)),
])
return float(heights.min() / 2.0)
def _compute_bonds_periodic(
species: tuple[str | Composition, ...],
diff: np.ndarray,
bond_specs: list[BondSpec],
lattice: np.ndarray,
unique_species: list[str],
n_atoms: int,
) -> list[Bond]:
"""Compute bonds for a periodic structure.
Uses a two-tier approach to minimise memory:
* **MIC fast path** — when the longest bond spec is shorter than
the inscribed sphere radius, the minimum image convention
guarantees at most one image per pair. Memory: O(n²).
* **Multi-image slow path** — when bond lengths are comparable to
cell dimensions, all 27 images in ``{-1, 0, 1}^3`` are checked
by iterating one offset at a time. Memory: O(n²) per
iteration. Self-bonds (atom to its own periodic image) are
found on this path.
"""
lattice = np.asarray(lattice, dtype=float)
lat_inv = np.linalg.inv(lattice)
diff_frac = diff @ lat_inv # (n, n, 3) fractional differences
max_bond = max(s.max_length for s in bond_specs)
r_ins = _inscribed_sphere_radius(lattice)
if max_bond < r_ins:
return _compute_bonds_mic(
species, diff_frac, bond_specs, lattice,
unique_species, n_atoms,
)
return _compute_bonds_multi_image(
species, diff_frac, bond_specs, lattice,
unique_species, n_atoms,
)
def _compute_bonds_mic(
species: tuple[str | Composition, ...],
diff_frac: np.ndarray,
bond_specs: list[BondSpec],
lattice: np.ndarray,
unique_species: list[str],
n_atoms: int,
) -> list[Bond]:
"""MIC fast path: one image per pair, no self-bonds possible."""
images = np.rint(diff_frac).astype(int) # (n, n, 3)
mic_frac = diff_frac - images # (n, n, 3)
mic_cart = mic_frac @ lattice # (n, n, 3)
dist = np.linalg.norm(mic_cart, axis=2) # (n, n)
upper = np.triu(np.ones((n_atoms, n_atoms), dtype=bool), k=1)
claimed = np.zeros((n_atoms, n_atoms), dtype=bool)
bonds: list[Bond] = []
for spec in bond_specs:
pair_mask = _species_pair_mask(spec, species, unique_species)
dist_ok = (dist >= spec.min_length) & (dist <= spec.max_length)
hits = upper & pair_mask & dist_ok & ~claimed
ii, jj = np.nonzero(hits)
if len(ii) > 0:
claimed[ii, jj] = True
for idx in range(len(ii)):
i, j = int(ii[idx]), int(jj[idx])
image = (
int(images[i, j, 0]),
int(images[i, j, 1]),
int(images[i, j, 2]),
)
bonds.append(
Bond(i, j, float(dist[i, j]), spec, image=image)
)
return bonds
def _compute_bonds_multi_image(
species: tuple[str | Composition, ...],
diff_frac: np.ndarray,
bond_specs: list[BondSpec],
lattice: np.ndarray,
unique_species: list[str],
n_atoms: int,
) -> list[Bond]:
"""Multi-image slow path: iterate over 27 offsets one at a time.
Checks all ``{-1, 0, 1}^3`` image offsets so that bonds through
multiple images of the same atom pair are found correctly,
including self-bonds (atom to its own periodic image).
Peak memory is O(n²) per iteration rather than O(27 n²) for the
fully vectorised approach.
"""
img_offsets = np.array([
(n1, n2, n3)
for n1 in (-1, 0, 1) for n2 in (-1, 0, 1) for n3 in (-1, 0, 1)
]) # (27, 3)
# Pair constraints:
# offset (0,0,0): i < j only (no self-bonds, no double-counting).
# non-zero offset: i <= j (self-bonds on diagonal; reverse pair
# at opposite offset is the same physical bond).
upper = np.triu(np.ones((n_atoms, n_atoms), dtype=bool), k=1)
upper_diag = upper | np.eye(n_atoms, dtype=bool)
# Pre-compute pair masks for all specs.
pair_masks = [
_species_pair_mask(spec, species, unique_species)
for spec in bond_specs
]
# Per-image claimed arrays (booleans — negligible vs float64).
claimed = [
np.zeros((n_atoms, n_atoms), dtype=bool)
for _ in range(len(img_offsets))
]
bonds: list[Bond] = []
for k, offset in enumerate(img_offsets):
is_zero = not offset.any()
valid = upper if is_zero else upper_diag
shifted = diff_frac - offset # (n, n, 3)
cart = shifted @ lattice # (n, n, 3)
dist = np.linalg.norm(cart, axis=2) # (n, n)
for s_idx, spec in enumerate(bond_specs):
dist_ok = (dist >= spec.min_length) & (dist <= spec.max_length)
hits = valid & pair_masks[s_idx] & dist_ok & ~claimed[k]
ii, jj = np.nonzero(hits)
if len(ii) > 0:
claimed[k][ii, jj] = True
image = (int(offset[0]), int(offset[1]), int(offset[2]))
for idx in range(len(ii)):
i, j = int(ii[idx]), int(jj[idx])
bonds.append(
Bond(i, j, float(dist[i, j]), spec, image=image)
)
return bonds
def _species_pair_mask(
spec: BondSpec,
species: tuple[str | Composition, ...],
unique_species: list[str],
) -> np.ndarray:
"""Build a boolean (n, n) mask for species pairs matching *spec*.
For a mixed site, the row matches the spec's species if any
constituent species satisfies the (fnmatch-aware) pattern.
"""
sp_a, sp_b = spec.species
match_a = {s for s in unique_species if fnmatch(s, sp_a)}
match_b = {s for s in unique_species if fnmatch(s, sp_b)}
mask_a = np.array([
bool(_site_species(site) & match_a) for site in species
])
mask_b = np.array([
bool(_site_species(site) & match_b) for site in species
])
return (
(mask_a[:, np.newaxis] & mask_b[np.newaxis, :])
| (mask_b[:, np.newaxis] & mask_a[np.newaxis, :])
)