From b7a536e4dc4359551f8d343f6a062f7c3a8faea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Sat, 28 Dec 2024 16:55:59 +0100 Subject: [PATCH] fix: stable order of water residues in metadata --- prolif/interactions/water_bridge.py | 34 +++++++++++++++++++++++------ tests/test_fingerprint.py | 27 ++++------------------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/prolif/interactions/water_bridge.py b/prolif/interactions/water_bridge.py index 77852e3..4c56264 100644 --- a/prolif/interactions/water_bridge.py +++ b/prolif/interactions/water_bridge.py @@ -110,10 +110,11 @@ def _any_order( """Generate results for any order of water-bridge interactions. Constructs a graph to represent the water network and iterates over all paths - up to a given length (corresponding to the ``order``). + up to a given length (corresponding to ``order + 1``). """ # MultiGraph to allow the same pair of nodes to interact as both HBA and HBD - # and record it as different paths + # and potentially multiple groups of atoms satisfying the constraints. + # Each of these interaction will have its own edge in the graph. graph = nx.MultiGraph() nodes = defaultdict(set) @@ -125,7 +126,6 @@ def _any_order( nodes[role].add(getattr(data, role)) # remove mirror interactions before adding water-water to the graph - # TODO: sort these so that it matches the order going from ligand to protein deduplicated = { frozenset((ligand_resid, protein_resid)) for ligand_resid, protein_resid in ifp_ww @@ -150,14 +150,34 @@ def _any_order( targets = (t for t in nodes["protein"] if nx.has_path(graph, source, t)) paths = nx.all_simple_edge_paths(graph, source, targets, cutoff=order + 1) for path in paths: - # path is a list[tuple[node_id1, node_id2, interaction]] - # first element in path is LIG-WAT1, last is WATn-PROT + # path is a list[tuple[node_id1, node_id2, deduplication_key]] + # first element in path is lig-water1, last is waterN-prot data_lw = graph.edges[path[0]]["int_data"] data_wp = graph.edges[path[-1]]["int_data"] ww_edges = [graph.edges[p] for p in path[1:-1]] - # only include if strictly passing through water + # only include if strictly passing through water (going back through + # ligand or protein is not a valid higher-order interaction) if all(e.get("water_only") for e in ww_edges): - data_ww_list = [e["int_data"] for e in ww_edges] + # reorder ligand and protein in InteractionData to be contiguous + # i.e. lig-w1, w1-w2, w2-prot instead of lig-w1, w2-w1, w2-prot + data_ww_list = [] + left = data_lw.protein + for e in ww_edges: + d = e["int_data"] + is_sorted = d.ligand == left + data_ww = InteractionData( + ligand=d.ligand if is_sorted else d.protein, + protein=d.protein if is_sorted else d.ligand, + # interaction name is not kept in final metadata + # so no need to invert it + interaction=d.interaction, + # the indices of "ligand" water and "protein" water are + # merged in the final metadata for water mols so no need to + # invert the roles in indices + metadata=d.metadata, + ) + data_ww_list.append(data_ww) + left = data_ww.protein self._merge_metadata(frame, data_lw, data_wp, *data_ww_list) def _merge_metadata( diff --git a/tests/test_fingerprint.py b/tests/test_fingerprint.py index 3e7a4d5..1eaf37d 100644 --- a/tests/test_fingerprint.py +++ b/tests/test_fingerprint.py @@ -374,36 +374,17 @@ def test_direct_water_bridge(self, water_u, water_params): def test_higher_order_water_bridge(self, water_u): ligand = water_u.select_atoms("resname QNB") - pocket = water_u.select_atoms( - "protein and byres around 4 group ligand", ligand=ligand - ) - wshell = water_u.select_atoms( - "resname TIP3 and byres around 4 (group ligand or group pocket)", - ligand=ligand, - pocket=pocket, - ) - extended_pocket = water_u.select_atoms( - "protein and byres around 4 (group ligand or group wshell)", - ligand=ligand, - wshell=wshell, - ) - water = water_u.select_atoms( - "resname TIP3 and byres around 4 (group ligand or group pocket)", - ligand=ligand, - pocket=extended_pocket, - ) + pocket = water_u.select_atoms("protein and resid 398:404") + water = water_u.select_atoms("segid WAT and (resid 17 or resid 83)") fp = Fingerprint( ["WaterBridge"], parameters={"WaterBridge": {"water": water, "order": 2}} ) - fp.run(water_u.trajectory[:1], ligand, extended_pocket) + fp.run(water_u.trajectory[:1], ligand, pocket) all_int_data = list(fp.ifp[0].interactions()) assert len(all_int_data) == 3 int_data = all_int_data[2] - # order is not yet stable... - assert ("distance_TIP317.X_TIP383.X" in int_data.metadata) or ( - "distance_TIP383.X_TIP317.X" in int_data.metadata - ) + assert "distance_TIP383.X_TIP317.X" in int_data.metadata def test_mix_water_bridge_and_other_interactions(self, water_u, water_params): ligand, protein, water = water_params