From 525e9dc4ff3fc9bf85f74aec45f5eb4e6a5a62ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Mon, 1 Jul 2024 22:23:16 +0100 Subject: [PATCH] Add interactions iterator (#210) * add interactions iterator * fix test --- CHANGELOG.md | 3 ++ docs/notebooks/advanced.ipynb | 58 ++++++++++++++++++++++++++++++++++- prolif/ifp.py | 24 +++++++++++++++ tests/test_ifp.py | 20 +++++++++--- 4 files changed, 100 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da69aa9..531486d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `IFP.interactions()` iterator that yields all interaction data for a given frame in + a single flat structure. This makes iterating over the `fp.ifp` results a bit + easier / less nested. - `Complex3D` and `fp.plot_3d` now have access to `only_interacting` and `remove_hydrogens` parameters to control which residues and hydrogen atoms are displayed. Non-polar hydrogen atoms that aren't involved in interactions are now diff --git a/docs/notebooks/advanced.ipynb b/docs/notebooks/advanced.ipynb index 7cc34a6..d757e33 100644 --- a/docs/notebooks/advanced.ipynb +++ b/docs/notebooks/advanced.ipynb @@ -381,6 +381,62 @@ "source": [ "You can then prepare your system and run the analysis as you normally would." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accessing results\n", + "\n", + "Once the fingerprint analysis has been run, there are multiple ways to access the data. The most convenient one showcased in the tutorials is through a pandas DataFrame, however this only shows the residues involved in each interaction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fp.to_dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The complete data is stored on the `ifp` attribute of the fingerprint object as a dictionary indexed by residues:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "frame_number = 0\n", + "ligand_residue = \"UNL1\"\n", + "protein_residue = \"VAL200.A\"\n", + "\n", + "fp.ifp[frame_number][(ligand_residue, protein_residue)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To make it easier to work with this deeply nested data structure, the results can also be accessed in a flatter structure like so:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for interaction_data in fp.ifp[frame_number].interactions():\n", + " print(interaction_data)\n", + " break" + ] } ], "metadata": { @@ -399,7 +455,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.11.6" }, "orig_nbformat": 4 }, diff --git a/prolif/ifp.py b/prolif/ifp.py index 8754d13..48ce399 100644 --- a/prolif/ifp.py +++ b/prolif/ifp.py @@ -4,10 +4,18 @@ """ from collections import UserDict +from typing import Iterator, NamedTuple from prolif.residue import ResidueId +class InteractionData(NamedTuple): + ligand: ResidueId + protein: ResidueId + interaction: str + metadata: dict + + class IFP(UserDict): """Mapping between residue pairs and interaction fingerprint. @@ -67,3 +75,19 @@ def __getitem__(self, key): "either ResidueId or residue string. If you need to filter the IFP, a " "single ResidueId or residue string can also be used.", ) + + def interactions(self) -> Iterator[InteractionData]: + """Yields all interactions data as an :class:`InteractionData` namedtuple. + + .. versionadded:: 2.1.0 + + """ + for (ligand_resid, protein_resid), ifp_dict in self.data.items(): + for int_name, metadata_tuple in ifp_dict.items(): + for metadata in metadata_tuple: + yield InteractionData( + ligand=ligand_resid, + protein=protein_resid, + interaction=int_name, + metadata=metadata, + ) diff --git a/tests/test_ifp.py b/tests/test_ifp.py index 0e8d87f..150d67f 100644 --- a/tests/test_ifp.py +++ b/tests/test_ifp.py @@ -1,24 +1,25 @@ import pytest from prolif.fingerprint import Fingerprint +from prolif.ifp import IFP, InteractionData from prolif.residue import ResidueId @pytest.fixture(scope="session") -def ifp(u, ligand_ag, protein_ag): +def ifp(u, ligand_ag, protein_ag) -> IFP: fp = Fingerprint(["Hydrophobic", "VdWContact"]) fp.run(u.trajectory[0:1], ligand_ag, protein_ag) return fp.ifp[0] -def test_ifp_indexing(ifp): +def test_ifp_indexing(ifp: IFP) -> None: lig_id, prot_id = "LIG1.G", "LEU126.A" metadata1 = ifp[(ResidueId.from_string(lig_id), ResidueId.from_string(prot_id))] metadata2 = ifp[(lig_id, prot_id)] assert metadata1 is metadata2 -def test_ifp_filtering(ifp): +def test_ifp_filtering(ifp: IFP) -> None: lig_id, prot_id = "LIG1.G", "LEU126.A" assert ifp[lig_id] == ifp assert ( @@ -27,6 +28,17 @@ def test_ifp_filtering(ifp): ) -def test_wrong_key(ifp): +def test_wrong_key(ifp: IFP) -> None: with pytest.raises(KeyError, match="does not correspond to a valid IFP key"): ifp[0] + + +def test_interaction_data_iteration(ifp: IFP) -> None: + data = next(ifp.interactions()) + assert isinstance(data, InteractionData) + assert data.ligand == ResidueId("LIG", 1, "G") + assert data.protein.chain in {"A", "B"} + assert data.interaction in {"Hydrophobic", "VdWContact"} + assert "distance" in data.metadata + for data in ifp.interactions(): + assert isinstance(data, InteractionData)