Skip to content

Commit

Permalink
feat: add implementation for any order
Browse files Browse the repository at this point in the history
  • Loading branch information
cbouy committed Dec 28, 2024
1 parent cb88965 commit 8036a40
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 133 deletions.
292 changes: 161 additions & 131 deletions prolif/interactions/water_bridge.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Module for the WaterBridge interaction implementation."""

import itertools as it
from collections import defaultdict
from typing import Any, Callable, Iterator, Optional
from typing import Any, Iterator, Optional

import networkx as nx

from prolif.fingerprint import Fingerprint
from prolif.ifp import IFP, InteractionData
Expand All @@ -23,6 +26,10 @@ class WaterBridge:
kwargs : Any
Additional arguments passed at runtime to the fingerprint generator's ``run``
method.
Notes
-----
This analysis currently only runs in serial.
"""

def __init__(
Expand Down Expand Up @@ -66,145 +73,168 @@ def run(self, traj, lig, prot, water, order=1) -> dict[int, IFP]:
water_prot_ifp: dict[int, IFP] = self.water_fp._run_serial(
traj, water, prot, residues=self.residues, **self.kwargs
)

# Run water-water interaction analysis if order is 2 or above
if order >= 2:
water_ifp: Optional[dict[int, IFP]] = self.water_fp._run_serial(
# Run water-water interaction analysis
water_ifp: dict[int, IFP] = self.water_fp._run_serial(
traj, water, water, residues=None, **self.kwargs
)
else:
water_ifp = None

# first order
self._merge_metadata(lig_water_ifp, None, water_prot_ifp)
for frame in lig_water_ifp:
ifp_lw = lig_water_ifp[frame] # Ligand → Water
ifp_wp = water_prot_ifp[frame] # Water → Protein
self.ifp.setdefault(frame, IFP())

if order >= 2:
ifp_ww = water_ifp[frame] # WaterX -> WaterY
self._any_order(frame, ifp_lw, ifp_ww, ifp_wp, order=order)

# add metadata for higher order
if water_ifp:
self._merge_metadata(lig_water_ifp, water_ifp, water_prot_ifp)
else:
self._first_order_only(frame, ifp_lw, ifp_wp)

return self.ifp

def _first_order_only(
self, frame: int, ifp_lw: IFP, ifp_wp: IFP
) -> Iterator[tuple[InteractionData, InteractionData]]:
"""Iterates over all relevant combinations of ligand-water-protein"""
# for each ligand-water interaction
for data_lw in ifp_lw.interactions():
# for each water-protein interaction
for data_wp in ifp_wp.interactions():
if data_lw.protein == data_wp.ligand:
self._merge_metadata(frame, data_lw, data_wp)

def _any_order(
self, frame: int, ifp_lw: IFP, ifp_ww: IFP, ifp_wp: IFP, order: int
) -> None:
"""Generate results for any order of water-bridge interactions.
Constructs a graph to represent the water network and iterates over all paths
up to a given length (corresponding to the ``order``).
"""
# MultiGraph to allow the same pair of nodes to interact as both HBA and HBD
# and record it as different paths
graph = nx.MultiGraph()
nodes = defaultdict(set)

# construct graph of water interactions
for ifp, role in [(ifp_lw, "ligand"), (ifp_wp, "protein")]:
for data in ifp.interactions():
graph.add_edge(data.ligand, data.protein, int_data=data)
# assign ligand and protein residue nodes to corresponding role
nodes[role].add(getattr(data, role))

# remove mirror interactions before adding water-water to the graph
# TODO: sort these so that it matches the order going from ligand to protein
deduplicated = {
frozenset((ligand_resid, protein_resid))
for ligand_resid, protein_resid in ifp_ww
if ligand_resid != protein_resid
}
for ligand_resid, protein_resid in deduplicated:
ww_dict = ifp_ww.data[ligand_resid, protein_resid]
for int_name, metadata_tuple in ww_dict.items():
for metadata in metadata_tuple:
data = InteractionData(
ligand=ligand_resid,
protein=protein_resid,
interaction=int_name,
metadata=metadata,
)
graph.add_edge(
data.ligand, data.protein, int_data=data, water_only=True
)

# find all edge paths of length up to `order + 1`
for source in nodes["ligand"]:
targets = (t for t in nodes["protein"] if nx.has_path(graph, source, t))
paths = nx.all_simple_edge_paths(graph, source, targets, cutoff=order + 1)
for path in paths:
# path is a list[tuple[node_id1, node_id2, interaction]]
# first element in path is LIG-WAT1, last is WATn-PROT
data_lw = graph.edges[*path[0]]["int_data"]
data_wp = graph.edges[*path[-1]]["int_data"]
ww_edges = [graph.edges[*p] for p in path[1:-1]]
# only include if strictly passing through water
if all(e.get("water_only") for e in ww_edges):
data_ww_list = [e["int_data"] for e in ww_edges]
self._merge_metadata(frame, data_lw, data_wp, *data_ww_list)

def _merge_metadata(
self,
lig_water_ifp: dict[int, IFP],
water_ifp: Optional[dict[int, IFP]],
water_prot_ifp: dict[int, IFP],
frame: int,
data_lw: InteractionData,
data_wp: InteractionData,
*data_ww_args: InteractionData,
) -> None:
"""Merge results from all fingerprints on matching water residues"""
for frame in lig_water_ifp:
ifp_lw = lig_water_ifp[frame] # Ligand → Water
ifp_wp = water_prot_ifp[frame] # Water → Protein
ifp_ww = water_ifp[frame] if water_ifp else None # Water → Water

# for each ligand-water1 interaction
for data_lw in ifp_lw.interactions():
# for each water1-water2 interaction (optional)
for data_ww in self._filter_interactions(
ifp_ww,
# match with ligand-water1 and exclude interaction with itself
lambda x: x.ligand == data_lw.protein and x.ligand != x.protein,
# if water_ifp is None, simply yields data_lw
default=data_lw,
):
# for each water2-protein interaction where water1 == water2
for data_wp in self._filter_interactions(
ifp_wp, lambda x: x.ligand == data_ww.protein
):
# get indices for individual water molecules
water_indices = defaultdict(set)
for data, role in [
(data_lw, "protein"),
(data_wp, "ligand"),
*(
((data_ww, "ligand"), (data_ww, "protein"))
if ifp_ww
else ()
),
]:
resid = getattr(data, role)
water_indices[f"water_{resid}"].update(
data.metadata["indices"][role]
)
# construct merged metadata
metadata = {
"indices": {
"ligand": data_lw.metadata["indices"]["ligand"],
"protein": data_wp.metadata["indices"]["protein"],
**{
key: tuple(indices)
for key, indices in water_indices.items()
},
},
"parent_indices": {
"ligand": data_lw.metadata["parent_indices"]["ligand"],
"protein": data_wp.metadata["parent_indices"][
"protein"
],
"water": tuple(
set().union(
data_lw.metadata["parent_indices"]["protein"],
data_wp.metadata["parent_indices"]["ligand"],
*(
(
data_ww.metadata["parent_indices"][
"ligand"
],
data_ww.metadata["parent_indices"][
"protein"
],
)
if ifp_ww
else ()
),
)
),
},
"water_residues": tuple(
{
data_lw.protein,
data_ww.ligand,
data_ww.protein,
data_wp.ligand,
}
if ifp_ww
else (data_lw.protein,)
),
"order": 2 if ifp_ww else 1,
"ligand_role": data_lw.interaction,
"protein_role": ( # invert role
"HBDonor"
if data_wp.interaction == "HBAcceptor"
else "HBAcceptor"
),
**{
f"{key}{suffix}": data.metadata[key]
for suffix, data in ([
("_ligand_water", data_lw),
("_water_protein", data_wp),
*(("_water_water", data_ww) if ifp_ww else ()),
])
for key in ["distance", "DHA_angle"]
},
}

# store metadata
ifp = self.ifp.setdefault(frame, IFP())
ifp.setdefault(
(data_lw.ligand, data_wp.protein), {}
).setdefault("WaterBridge", []).append(metadata)

@staticmethod
def _filter_interactions(
ifp: Optional[IFP],
predicate: Callable[[InteractionData], bool],
default: Optional[InteractionData] = None,
) -> Iterator[InteractionData]:
"""Filters interactions to those that satisfy the predicate. If ``ifp==None``,
simply yields the ``default`` value.
"""
if ifp is None:
yield default
else:
for data in ifp.interactions():
if predicate(data):
yield data
# get indices for individual water molecules
water_indices = defaultdict(set)
for data, role in [
(data_lw, "protein"),
(data_wp, "ligand"),
*it.chain.from_iterable([
[(data_ww, "ligand"), (data_ww, "protein")] for data_ww in data_ww_args
]),
]:
resid = getattr(data, role)
water_indices[str(resid)].update(data.metadata["indices"][role])
# construct merged metadata
metadata = {
"indices": {
"ligand": data_lw.metadata["indices"]["ligand"],
"protein": data_wp.metadata["indices"]["protein"],
**{key: tuple(indices) for key, indices in water_indices.items()},
},
"parent_indices": {
"ligand": data_lw.metadata["parent_indices"]["ligand"],
"protein": data_wp.metadata["parent_indices"]["protein"],
"water": tuple(
set().union(
data_lw.metadata["parent_indices"]["protein"],
data_wp.metadata["parent_indices"]["ligand"],
*it.chain.from_iterable([
[
data_ww.metadata["parent_indices"]["ligand"],
data_ww.metadata["parent_indices"]["protein"],
]
for data_ww in data_ww_args
]),
)
),
},
"water_residues": tuple(
dict.fromkeys( # uniquify but keep order
[
data_lw.protein,
*it.chain.from_iterable([
[data_ww.ligand, data_ww.protein]
for data_ww in data_ww_args
]),
data_wp.ligand,
]
)
),
"order": len(data_ww_args) + 1,
"ligand_role": data_lw.interaction,
"protein_role": ( # invert role
"HBDonor" if data_wp.interaction == "HBAcceptor" else "HBAcceptor"
),
**{
f"{key}{suffix}": data.metadata[key]
for suffix, data in [
(f"_ligand_{data_lw.protein}", data_lw),
*it.chain.from_iterable([
[(f"_{data_ww.ligand}_{data_ww.protein}", data_ww)]
for data_ww in data_ww_args
]),
(f"_{data_wp.ligand}_protein", data_wp),
]
for key in ["distance", "DHA_angle"]
},
}

# store metadata
self.ifp[frame].setdefault((data_lw.ligand, data_wp.protein), {}).setdefault(
"WaterBridge", []
).append(metadata)
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ dependencies = [
file = "LICENSE"

[project.optional-dependencies]
water = [
"networkx",
]
plots = [
"py3Dmol",
"matplotlib>=3.5",
Expand All @@ -62,7 +65,7 @@ tests = [
"pytest>=6.1.2",
"pytest-cov",
"ipython",
"prolif[plots]"
"prolif[plots,water]"
]
dev = [
"prolif[build,tests]",
Expand Down
37 changes: 36 additions & 1 deletion tests/test_fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,9 @@ def water_params(self, water_u):
"protein and byres around 4 group ligand", ligand=ligand
)
water = water_u.select_atoms(
"resname TIP3 and byres around 4 group ligand", ligand=ligand
"resname TIP3 and byres around 6 (group ligand or group pocket)",
ligand=ligand,
pocket=protein,
)
return ligand, protein, water

Expand All @@ -370,6 +372,39 @@ def test_direct_water_bridge(self, water_u, water_params):
assert int_data.interaction == "WaterBridge"
assert str(int_data.protein) == "TRP400.X"

def test_higher_order_water_bridge(self, water_u):
ligand = water_u.select_atoms("resname QNB")
pocket = water_u.select_atoms(
"protein and byres around 4 group ligand", ligand=ligand
)
wshell = water_u.select_atoms(
"resname TIP3 and byres around 4 (group ligand or group pocket)",
ligand=ligand,
pocket=pocket,
)
extended_pocket = water_u.select_atoms(
"protein and byres around 4 (group ligand or group wshell)",
ligand=ligand,
wshell=wshell,
)
water = water_u.select_atoms(
"resname TIP3 and byres around 4 (group ligand or group pocket)",
ligand=ligand,
pocket=extended_pocket,
)
fp = Fingerprint(
["WaterBridge"], parameters={"WaterBridge": {"water": water, "order": 2}}
)
fp.run(water_u.trajectory[:1], ligand, extended_pocket)
all_int_data = list(fp.ifp[0].interactions())

assert len(all_int_data) == 3
int_data = all_int_data[2]
# order is not yet stable...
assert ("distance_TIP317.X_TIP383.X" in int_data.metadata) or (
"distance_TIP383.X_TIP317.X" in int_data.metadata
)

def test_mix_water_bridge_and_other_interactions(self, water_u, water_params):
ligand, protein, water = water_params
fp = Fingerprint(
Expand Down

0 comments on commit 8036a40

Please sign in to comment.