"""Validated container for per-atom metadata arrays."""
from __future__ import annotations
from collections.abc import Iterator, Mapping
from types import MappingProxyType
import numpy as np
from hofmann.model.colour import _is_categorical_missing
def _compute_global_range(
arr: np.ndarray,
) -> tuple[float, float] | None:
"""Compute the global ``(min, max)`` for a 2-D numeric array.
Returns ``None`` for 1-D arrays, categorical (string/object)
dtypes, empty arrays, or arrays where every value is NaN.
"""
if arr.ndim != 2 or arr.dtype.kind in ("U", "O"):
return None
if arr.size == 0:
return None
if np.all(np.isnan(arr)):
return None
lo = float(np.nanmin(arr))
hi = float(np.nanmax(arr))
return (lo, hi)
def _compute_global_labels(arr: np.ndarray) -> tuple[str, ...] | None:
"""Return the unique non-missing labels in a 2-D categorical array.
Returns ``None`` for 1-D arrays or non-categorical (numeric)
dtypes. Missing values (``None``, ``""``, NaN) are excluded.
Labels are returned in first-encountered order across
``arr.ravel()``. This ordering is load-bearing: downstream
colourmap assignment (``_resolve_categorical``) indexes into this
tuple via :func:`enumerate`, so the order determines which label
gets which colour. Preserve the insertion-order semantics;
sorting or otherwise reordering the result will silently change
per-atom colours.
"""
if arr.ndim != 2 or arr.dtype.kind not in ("U", "O"):
return None
seen: dict[str, None] = {}
for v in arr.ravel():
if _is_categorical_missing(v):
continue
s = str(v)
if s not in seen:
seen[s] = None
if not seen:
return None
return tuple(seen)
[docs]
class AtomData(Mapping[str, np.ndarray]):
"""Per-atom metadata container.
The supported way to obtain an ``AtomData`` is via
:attr:`~hofmann.StructureScene.atom_data`; the class is not
re-exported from ``hofmann`` or ``hofmann.model``, and direct
construction is considered an internal implementation detail.
User-facing access goes through
:attr:`~hofmann.StructureScene.atom_data` for reads,
:meth:`~hofmann.StructureScene.set_atom_data`,
:meth:`~hofmann.StructureScene.del_atom_data`, and
:meth:`~hofmann.StructureScene.clear_2d_atom_data` for writes.
Stores named per-atom arrays. Each value is either a 1-D array
of shape ``(n_atoms,)`` (static across the trajectory) or a 2-D
array of shape ``(m, n_atoms)`` where ``m`` is the trajectory
length the caller declares at write time via the
``expected_frames`` kwarg on ``_set``. All stored 2-D entries
in a single container must share the same ``m``; this cross-
entry invariant is enforced at assignment.
The container holds no cached frame count between calls; each
``_set`` is told ``expected_frames`` by the caller, and the
invariant is re-derived from the stored data on every call.
Frame consistency is enforced at two sites:
- At assignment, via ``_set`` calling ``_check_2d_consistency``
with ``pending={key: arr}``, validating the prospective
post-write state against ``expected_frames``. Both the
incoming array and any already-stored 2-D entries not being
overridden are checked.
- At render, via
:meth:`~hofmann.StructureScene.render_mpl` (and friends)
calling the scene's private ``_validate_for_render`` helper,
which in turn calls ``_check_2d_consistency`` with no
``pending``. This validates the current stored state against
``len(scene.frames)`` as a backstop that catches the specific
case where ``scene.frames`` is mutated after the last write
but before the next render.
Inherits from :class:`collections.abc.Mapping` (not
:class:`~collections.abc.MutableMapping`). Mutation goes
through the private ``_set``, ``_del``, and ``_clear_2d``
methods; no ``ad[key] = value`` or ``del ad[key]`` shortcut
exists. Assigned values are always copied via
:func:`numpy.array` -- including existing numpy arrays -- so
the container owns the buffer and the caller's source array is
left untouched.
.. note::
Stored arrays are returned read-only. In-place mutation
(e.g. ``ad["charge"][0] = 99``) raises
``ValueError: assignment destination is read-only``. To
update values, pass a fresh array through
:meth:`~hofmann.StructureScene.set_atom_data`, which
re-validates the shape and recomputes the :attr:`ranges`
and :attr:`labels` entries for the key. Only the array
buffer is frozen -- for ``object``-dtype arrays, any
mutable objects stored inside remain mutable.
Attributes:
n_atoms: The number of atoms the container was built for.
Fixed at construction and not mutable.
ranges: Read-only mapping of keys to ``(min, max)`` tuples
for 2-D numeric arrays, or ``None`` for keys that do
not have a meaningful numeric range (1-D arrays,
categorical arrays, empty arrays, all-NaN numeric
arrays). Entries are added on assignment, replaced on
reassignment, and removed on deletion.
labels: Read-only mapping of keys to tuples of unique
non-missing categorical labels, or ``None`` for keys
without a meaningful label set (1-D arrays, numeric
dtypes, categorical arrays with no non-missing values).
Missing values (``None``, ``""``, NaN) are excluded
from the label set. Entries are added on assignment,
replaced on reassignment, and removed on deletion.
For 2-D arrays, ``ranges`` is populated for numeric dtypes and
``labels`` for categorical dtypes; the other side is always
``None``. Either side may itself be ``None`` for empty arrays
or arrays containing only missing values. For 1-D arrays, both
are ``None``.
Args:
n_atoms: Number of atoms in the scene. Non-negative.
Raises:
ValueError: If *n_atoms* is negative.
"""
[docs]
def __init__(self, *, n_atoms: int) -> None:
if n_atoms < 0:
raise ValueError(f"n_atoms must be non-negative, got {n_atoms}")
self._n_atoms = n_atoms
self._data: dict[str, np.ndarray] = {}
self._ranges: dict[str, tuple[float, float] | None] = {}
self._labels: dict[str, tuple[str, ...] | None] = {}
self._ranges_view: Mapping[str, tuple[float, float] | None] = (
MappingProxyType(self._ranges)
)
self._labels_view: Mapping[str, tuple[str, ...] | None] = (
MappingProxyType(self._labels)
)
@property
def n_atoms(self) -> int:
return self._n_atoms
@property
def ranges(self) -> Mapping[str, tuple[float, float] | None]:
return self._ranges_view
@property
def labels(self) -> Mapping[str, tuple[str, ...] | None]:
return self._labels_view
def _check_2d_consistency(
self,
expected: int,
*,
pending: Mapping[str, np.ndarray] | None = None,
) -> None:
"""Check the 2-D shape invariant against *expected* for the
prospective post-operation state.
The prospective state is the current stored state with any
keys in *pending* treated as overrides: a pending entry
replaces the stored entry of the same key (for reassignment)
or adds a new entry (for a first write). Stored entries
whose keys appear in *pending* are skipped during validation
because they will no longer exist in the post-operation
state.
Called at two sites, both with ``expected=len(scene.frames)``:
- ``_set`` with ``pending={key: arr}`` to validate the state
that would result from storing *arr* under *key*.
- ``StructureScene._validate_for_render`` with no *pending*
(current stored state is the prospective state) at the
start of every public ``render_*`` method.
Pending entries are checked before stored entries so that a
user passing a wrong-shape array gets an actionable
call-site error, not a confusing stale-stored error.
Stored entries form an equivalence class under the cross-
entry invariant (all 2-D share the same ``shape[0]``), so
once the walk finds a non-overridden 2-D entry that passes,
it short-circuits -- any other non-overridden 2-D entries
would report the same shape.
Args:
expected: The ``shape[0]`` value to check against.
pending: Optional mapping of keys to arrays being
written. Pending arrays with ``ndim != 2`` are
ignored for this check.
Raises:
ValueError: If any pending or non-overridden stored 2-D
entry has ``shape[0] != expected``.
"""
pending = pending or {}
# Check pending entries first: errors on these are caused by
# the user's current call, so their messages are the most
# actionable.
for pending_key, arr in pending.items():
if arr.ndim == 2 and arr.shape[0] != expected:
raise ValueError(
f"atom_data[{pending_key!r}] has {arr.shape[0]} "
f"rows but {expected} frames were expected"
)
# Then check stored entries, skipping any overridden by
# pending: those stored entries will be replaced by the
# operation that is about to complete.
for stored_key, arr in self._data.items():
if stored_key in pending:
continue
if arr.ndim == 2:
if arr.shape[0] != expected:
raise ValueError(
f"atom_data has stale 2-D entry "
f"{stored_key!r} sized for {arr.shape[0]} "
f"frames, but {expected} frames were "
f"expected; call "
f"scene.clear_2d_atom_data() to recover "
f"before reassigning"
)
return
def _set(
self,
key: str,
value: object,
*,
expected_frames: int,
) -> None:
"""Store *value* under *key* with full validation.
Private internal write method called from ``StructureScene``
plumbing (``set_atom_data``, ``__init__``), not part of any
public protocol. The container inherits from
:class:`~collections.abc.Mapping` (not
:class:`~collections.abc.MutableMapping`), so there is no
``ad[key] = value`` shortcut for users.
For 2-D values, the post-operation state (the current stored
state with *key* replaced by *value*) must have every 2-D
entry sized for *expected_frames*. This catches both
user-supplied arrays that do not match the scene's
trajectory and stale stored entries left over from an
earlier ``scene.frames`` mutation. Reassigning a single 2-D
entry with a new shape is allowed when it is the only 2-D
entry, because the stored entry for *key* is treated as
overridden by the pending write and skipped during the walk.
Args:
key: Entry key.
value: Array-like input.
expected_frames: The scene's current ``len(frames)``;
required on every call because the container exists
only to back a scene.
Raises:
ValueError: If *value* does not coerce to a 1-D array of
length ``n_atoms`` or a 2-D array of shape
``(expected_frames, n_atoms)``, if a non-overridden
stored 2-D entry is stale relative to
*expected_frames*, or if the value has an
unsupported dtype (only bool, integer, float,
string, and object are accepted).
"""
arr = np.array(value)
# Input validation: ndim and per-atom shape. Pure function of
# *value*; no dependence on stored state.
if arr.ndim == 1:
if len(arr) != self._n_atoms:
raise ValueError(
f"atom_data[{key!r}] must have length "
f"{self._n_atoms}, got {len(arr)}"
)
elif arr.ndim == 2:
if arr.shape[1] != self._n_atoms:
raise ValueError(
f"atom_data[{key!r}] must have {self._n_atoms} "
f"columns (one per atom), got {arr.shape[1]}"
)
else:
raise ValueError(
f"atom_data[{key!r}] must be 1-D or 2-D, "
f"got {arr.ndim}-D"
)
# Input validation: dtype whitelist. Also a pure function of
# *value*. Must fire before any state-dependent check so a
# user passing a bad-dtype array is told about the dtype
# directly, rather than being sent on an unnecessary
# ``clear_2d_atom_data()`` recovery for a write that was
# never going to land.
if arr.dtype.kind not in ("b", "i", "u", "f", "U", "O"):
raise ValueError(
f"atom_data[{key!r}] has unsupported dtype "
f"{arr.dtype}; supported dtypes are bool, integer, "
f"float, string, and object"
)
# State-dependent invariant: pending shape against expected
# frames, and any non-overridden stored 2-D entries. Runs
# after all input checks so input errors take priority.
if arr.ndim == 2:
self._check_2d_consistency(
expected_frames, pending={key: arr}
)
arr.flags.writeable = False
new_range = _compute_global_range(arr)
new_labels = _compute_global_labels(arr)
self._data[key] = arr
self._ranges[key] = new_range
self._labels[key] = new_labels
def __getitem__(self, key: str) -> np.ndarray:
return self._data[key]
def _del(self, key: str) -> None:
"""Remove an entry by key.
Private internal delete method called from ``StructureScene``
plumbing (``del_atom_data`` and ``clear_2d_atom_data``), not
part of any public protocol. The container inherits from
:class:`~collections.abc.Mapping` (not
:class:`~collections.abc.MutableMapping`), so there is no
``del ad[key]`` shortcut for users.
Raises:
KeyError: If *key* is not present.
"""
del self._data[key]
del self._ranges[key]
del self._labels[key]
def _clear_2d(self) -> None:
"""Remove all 2-D entries, leaving 1-D entries untouched.
Private helper called from
:meth:`~hofmann.StructureScene.clear_2d_atom_data`. After this method
runs, the cross-entry 2-D shape constraint is released, so a
subsequent ``_set`` with a 2-D array of any ``shape[0]``
will succeed. 1-D entries and their derived ``ranges`` /
``labels`` metadata are preserved.
Uses ``_del`` internally so all bookkeeping for removed
entries stays in one place.
"""
keys_to_delete = [
k for k, v in self._data.items() if v.ndim == 2
]
for key in keys_to_delete:
self._del(key)
def __iter__(self) -> Iterator[str]:
return iter(self._data)
def __len__(self) -> int:
return len(self._data)
def __repr__(self) -> str:
if not self._data:
return "AtomData()"
items = [
f"{key!r}: {arr.shape}"
for key, arr in self._data.items()
]
return f"AtomData({{{', '.join(items)}}})"