Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Water bridge from iterable of mols #237

Open
wants to merge 2 commits into
base: water_bridge
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 47 additions & 8 deletions prolif/fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,21 +683,44 @@ def run_from_iterable(
raise ValueError("n_jobs must be > 0 or None")
if residues == "all":
residues = list(prot_mol.residues)
if n_jobs != 1:
return self._run_iter_parallel(
lig_iterable=lig_iterable,
prot_mol=prot_mol,
if n_jobs is None:
n_jobs = int(os.environ.get("PROLIF_N_JOBS", "0")) or None

if self.interactions:
if n_jobs == 1:
ifp = self._run_iter_serial(
lig_iterable=lig_iterable,
prot_mol=prot_mol,
residues=residues,
progress=progress,
)
else:
ifp = self._run_iter_parallel(
lig_iterable=lig_iterable,
prot_mol=prot_mol,
residues=residues,
progress=progress,
n_jobs=n_jobs,
)
self.ifp = ifp

if self.bridged_interactions:
self._run_iter_bridged_analysis(
lig_iterable,
prot_mol,
residues=residues,
progress=progress,
n_jobs=n_jobs,
)

return self

def _run_iter_serial(self, lig_iterable, prot_mol, residues, progress):
iterator = tqdm(lig_iterable) if progress else lig_iterable
ifp = {}
for i, lig_mol in enumerate(iterator):
ifp[i] = self.generate(lig_mol, prot_mol, residues=residues, metadata=True)
self.ifp = ifp
return self
return ifp

def _run_iter_parallel(
self,
Expand Down Expand Up @@ -725,8 +748,7 @@ def _run_iter_parallel(
for i, ifp_data in enumerate(pool.process(lig_iterable)):
ifp[i] = ifp_data

self.ifp = ifp
return self
return ifp

def _run_bridged_analysis(self, traj, lig, prot, **kwargs):
"""Implementation of the WaterBridge analysis for trajectories.
Expand All @@ -747,6 +769,23 @@ def _run_bridged_analysis(self, traj, lig, prot, **kwargs):
interaction.run(traj, lig, prot)
return self

def _run_iter_bridged_analysis(self, lig_iterable, prot_mol, **kwargs):
"""Implementation of the WaterBridge analysis for trajectories.

Parameters
----------
lig_iterable : list or generator
An iterable yielding ligands as :class:`~prolif.molecule.Molecule`
objects
prot_mol : prolif.molecule.Molecule
The protein
"""
self.ifp = getattr(self, "ifp", {})
for interaction in self.bridged_interactions.values():
interaction.setup(ifp_store=self.ifp, **kwargs)
interaction.run_from_iterable(lig_iterable, prot_mol)
return self

def to_dataframe(
self,
*,
Expand Down
23 changes: 19 additions & 4 deletions prolif/interactions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from abc import abstractmethod
from itertools import product
from math import degrees, radians
from typing import TYPE_CHECKING, Any, Iterable, Optional

import numpy as np
from rdkit import Geometry
Expand All @@ -18,6 +19,12 @@
from prolif.interactions.utils import DISTANCE_FUNCTIONS, get_mapindex
from prolif.utils import angle_between_limits, get_centroid, get_ring_normal_vector

if TYPE_CHECKING:
from MDAnalysis.core.groups import AtomGroup

from prolif.molecule import Molecule
from prolif.typeshed import Trajectory

_INTERACTIONS: dict[str, type["Interaction"]] = {}
_BRIDGED_INTERACTIONS: dict[str, type["BridgedInteraction"]] = {}
_BASE_INTERACTIONS: dict[str, type["Interaction"]] = {}
Expand Down Expand Up @@ -117,7 +124,7 @@ def detect(self, ligand, residue):
class BridgedInteraction:
"""Base class for bridged interactions."""

def __init_subclass__(cls):
def __init_subclass__(cls) -> None:
super().__init_subclass__()
name = cls.__name__
register = _BRIDGED_INTERACTIONS
Expand All @@ -129,20 +136,28 @@ def __init_subclass__(cls):
)
register[name] = cls

def __init__(self):
def __init__(self) -> None:
self.ifp = {}
# force empty setup to initialize args with defaults
self.setup()

def setup(self, ifp_store=None, **kwargs) -> None:
def setup(self, ifp_store: Optional[dict[int, IFP]] = None, **kwargs: Any) -> None:
"""Setup additional arguments passed at runtime to the fingerprint generator's
``run`` method.
"""
self.ifp = ifp_store if ifp_store is not None else {}
self.kwargs = kwargs

@abstractmethod
def run(self, traj, lig, prot) -> dict[int, IFP]:
def run(
self, traj: "Trajectory", lig: "AtomGroup", prot: "AtomGroup"
) -> dict[int, IFP]:
raise NotImplementedError()

@abstractmethod
def run_from_iterable(
self, lig_iterable: Iterable["Molecule"], prot_mol: "Molecule"
) -> dict[int, IFP]:
raise NotImplementedError()


Expand Down
66 changes: 59 additions & 7 deletions prolif/interactions/water_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,28 @@

import itertools as it
from collections import defaultdict
from typing import Iterator, Optional
from typing import TYPE_CHECKING, Iterable, Iterator, Optional, Union

import networkx as nx

from prolif.ifp import IFP, InteractionData
from prolif.interactions.base import BridgedInteraction

if TYPE_CHECKING:
from MDAnalysis.core.groups import AtomGroup

from prolif.molecule import Molecule
from prolif.typeshed import Trajectory


class WaterBridge(BridgedInteraction):
"""Implementation of the WaterBridge analysis.

Parameters
----------
water : MDAnalysis.core.groups.AtomGroup
An MDAnalysis AtomGroup containing the water molecules
water : Union[MDAnalysis.core.groups.AtomGroup, Iterable[prolif.molecule.Molecule]]
An MDAnalysis AtomGroup or iterable of prolif Molecule objects containing the
water molecules
order : int
Maximum number of water molecules that can be involved in a water-bridged
interaction.
Expand All @@ -39,7 +46,7 @@

def __init__(
self,
water,
water: Union["AtomGroup", Iterable["Molecule"]],
order: int = 1,
min_order: int = 1,
hbdonor: Optional[dict] = None,
Expand Down Expand Up @@ -69,12 +76,17 @@
self.residues = kwargs.pop("residues", None)
self.kwargs = kwargs

def run(self, traj, lig, prot) -> dict[int, IFP]:
"""Run the water bridge analysis.
def run(
self,
traj: "Trajectory",
lig: "AtomGroup",
prot: "AtomGroup",
) -> dict[int, IFP]:
"""Run the water bridge analysis for a trajectory.

Parameters
----------
traj : MDAnalysis.coordinates.base.ProtoReader or MDAnalysis.coordinates.base.FrameIteratorSliced
traj : Union[MDAnalysis.coordinates.base.ProtoReader, MDAnalysis.coordinates.base.FrameIteratorSliced]
Iterate over this Universe trajectory or sliced trajectory object
to extract the frames used for the fingerprint extraction
lig : MDAnalysis.core.groups.AtomGroup
Expand Down Expand Up @@ -109,6 +121,46 @@

return self.ifp

def run_from_iterable(
self, lig_iterable: Iterable["Molecule"], prot_mol: "Molecule"
) -> dict[int, IFP]:
"""Run the water-bridge analysis for an iterable of molecules.

Parameters
----------
lig_iterable : list or generator
An iterable yielding ligands as :class:`~prolif.molecule.Molecule`
objects
prot_mol : prolif.molecule.Molecule
The protein
"""
# Run analysis for ligand-water and water-protein interactions
lig_water_ifp: dict[int, IFP] = self.water_fp._run_iter_serial(
lig_iterable, self.water, residues=None, **self.kwargs
)
water_prot_ifp: dict[int, IFP] = self.water_fp._run_iter_serial(
[self.water], prot_mol, residues=self.residues, **self.kwargs
)
ifp_wp = water_prot_ifp[0] # Water → Protein

if self.order >= 2:
# Run water-water interaction analysis
water_ifp: dict[int, IFP] = self.water_fp._run_iter_serial(

Check warning on line 148 in prolif/interactions/water_bridge.py

View check run for this annotation

Codecov / codecov/patch

prolif/interactions/water_bridge.py#L148

Added line #L148 was not covered by tests
[self.water], self.water, residues=None, **self.kwargs
)
ifp_ww = water_ifp[0] # WaterX -> WaterY

Check warning on line 151 in prolif/interactions/water_bridge.py

View check run for this annotation

Codecov / codecov/patch

prolif/interactions/water_bridge.py#L151

Added line #L151 was not covered by tests

for pose in lig_water_ifp:
ifp_lw = lig_water_ifp[pose] # Ligand → Water
self.ifp.setdefault(pose, IFP())

if self.order >= 2:
self._any_order(pose, ifp_lw, ifp_ww, ifp_wp)

Check warning on line 158 in prolif/interactions/water_bridge.py

View check run for this annotation

Codecov / codecov/patch

prolif/interactions/water_bridge.py#L158

Added line #L158 was not covered by tests

else:
self._first_order_only(pose, ifp_lw, ifp_wp)
return self.ifp

def _first_order_only(
self, frame: int, ifp_lw: IFP, ifp_wp: IFP
) -> Iterator[tuple[InteractionData, InteractionData]]:
Expand Down
7 changes: 7 additions & 0 deletions prolif/typeshed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Helper module containing type aliases."""

from typing import TypeAlias, Union

Check warning on line 3 in prolif/typeshed.py

View check run for this annotation

Codecov / codecov/patch

prolif/typeshed.py#L3

Added line #L3 was not covered by tests

from MDAnalysis.coordinates.base import FrameIteratorSliced, ProtoReader

Check warning on line 5 in prolif/typeshed.py

View check run for this annotation

Codecov / codecov/patch

prolif/typeshed.py#L5

Added line #L5 was not covered by tests

Trajectory: TypeAlias = Union[FrameIteratorSliced, ProtoReader]

Check warning on line 7 in prolif/typeshed.py

View check run for this annotation

Codecov / codecov/patch

prolif/typeshed.py#L7

Added line #L7 was not covered by tests
12 changes: 10 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,27 @@ def water_u():


@pytest.fixture(scope="session")
def water_params(water_u):
def water_atomgroups(water_u):
ligand = water_u.select_atoms("resname QNB")
protein = water_u.select_atoms(
"protein and byres around 4 group ligand", ligand=ligand
)
water = water_u.select_atoms(
"resname TIP3 and byres around 6 (group ligand or group pocket)",
"resname TIP3 and byres around 4 (group ligand or group pocket)",
ligand=ligand,
pocket=protein,
)
return ligand, protein, water


@pytest.fixture(scope="session")
def water_mols(water_atomgroups):
lig_mol = Molecule.from_mda(water_atomgroups[0])
prot_mol = Molecule.from_mda(water_atomgroups[1])
water_mol = Molecule.from_mda(water_atomgroups[2])
return lig_mol, prot_mol, water_mol


class BaseTestMixinRDKitMol:
def test_init(self, mol):
assert isinstance(mol, Chem.Mol)
Expand Down
20 changes: 16 additions & 4 deletions tests/test_fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,8 @@ def test_water_bridge_instance_without_params_raises_error(self):
):
Fingerprint(["WaterBridge"])

def test_mix_water_bridge_and_other_interactions(self, water_u, water_params):
ligand, protein, water = water_params
def test_mix_water_bridge_and_other_interactions(self, water_u, water_atomgroups):
ligand, protein, water = water_atomgroups
fp = Fingerprint(
["HBDonor", "WaterBridge"], parameters={"WaterBridge": {"water": water}}
)
Expand All @@ -355,8 +355,20 @@ def test_mix_water_bridge_and_other_interactions(self, water_u, water_params):
assert "WaterBridge" in fp.ifp[0]["QNB1.X", "TRP400.X"]
assert "HBDonor" in fp.ifp[0]["QNB1.X", "ASN404.X"]

def test_water_bridge_updates_cache_size(self, water_u, water_params, monkeypatch):
ligand, protein, water = water_params
def test_water_bridge_run_iter(self, water_mols):
ligand, protein, water = water_mols
fp = Fingerprint(
["HBDonor", "WaterBridge"], parameters={"WaterBridge": {"water": water}}
)
fp.run_from_iterable([ligand], protein)

assert "WaterBridge" in fp.ifp[0]["QNB1.X", "TRP400.X"]
assert "HBDonor" in fp.ifp[0]["QNB1.X", "ASN404.X"]

def test_water_bridge_updates_cache_size(
self, water_u, water_atomgroups, monkeypatch
):
ligand, protein, water = water_atomgroups
set_converter_cache_size(2)
mocked = Mock(wraps=set_converter_cache_size)
monkeypatch.setattr(
Expand Down
18 changes: 14 additions & 4 deletions tests/test_interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,16 +357,16 @@ class TestBridgedInteractions:
({"order": 1, "min_order": 2}, "min_order cannot be greater than order"),
],
)
def test_water_bridge_validation(self, water_params, kwargs, match):
*_, water = water_params
def test_water_bridge_validation(self, water_atomgroups, kwargs, match):
*_, water = water_atomgroups
with pytest.raises(ValueError, match=match):
Fingerprint(
["WaterBridge"],
parameters={"WaterBridge": {"water": water, **kwargs}},
)

def test_direct_water_bridge(self, water_u, water_params):
ligand, protein, water = water_params
def test_direct_water_bridge(self, water_u, water_atomgroups):
ligand, protein, water = water_atomgroups
fp = Fingerprint(["WaterBridge"], parameters={"WaterBridge": {"water": water}})
fp.run(water_u.trajectory[:1], ligand, protein)
int_data = next(fp.ifp[0].interactions())
Expand Down Expand Up @@ -395,3 +395,13 @@ def test_higher_order_water_bridge(self, water_u, kwargs, num_expected):
assert len(all_int_data) == num_expected
int_data = all_int_data[-1]
assert "distance_TIP383.X_TIP317.X" in int_data.metadata

def test_run_iter_water_bridge(self, water_mols):
ligand, protein, water = water_mols
fp = Fingerprint(["WaterBridge"], parameters={"WaterBridge": {"water": water}})
# mimick multiple poses
fp.run_from_iterable([ligand, ligand], protein)
int_data = next(fp.ifp[1].interactions())

assert int_data.interaction == "WaterBridge"
assert str(int_data.protein) == "TRP400.X"
Loading