Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bond deletion #4763

Merged
merged 8 commits into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ The rules for this file:
* 2.8.0

Fixes
* Fixes bug where multiple identical connection topologies could be
added to a _Connection TopologyAttr, and that deleting connections
by index would only delete one of them (Issue #4762, PR #4763)
* Changes error to warning on Universe creation if guessing fails
due to missing information (Issue #4750, PR #4754)
* Adds guessed attributes documentation back to each parser page
Expand Down
23 changes: 8 additions & 15 deletions package/MDAnalysis/core/topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3042,19 +3042,12 @@ class _Connection(AtomAttr, metaclass=_ConnectionTopologyAttrMeta):

@_check_connection_values
def __init__(self, values, types=None, guessed=False, order=None):
self.values = values
if types is None:
types = [None] * len(values)
self.types = types
if guessed in (True, False):
# if single value passed, multiply this across
# all bonds
guessed = [guessed] * len(values)
self._guessed = guessed
if order is None:
order = [None] * len(values)
self.order = order
self.values = []
self.types = []
self._guessed = []
self.order = []
self._cache = dict()
self._add_bonds(values, types, guessed, order)

def copy(self):
"""Return a deepcopy of this attribute"""
Expand Down Expand Up @@ -3118,9 +3111,8 @@ def _add_bonds(self, values, types=None, guessed=True, order=None):
if order is None:
order = itertools.cycle((None,))

existing = set(self.values)
for v, t, g, o in zip(values, types, guessed, order):
if v not in existing:
if v not in self.values:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is likely the culprit - doing a not in operation on a list is orders of magnitude slower than on a set.

Example:

In [1]: import random

In [2]: lister = [random.random() for i in range(10000)]

In [3]: setter = set(lister)

In [4]: len(setter)
Out[4]: 10000

In [5]: len(lister)
Out[5]: 10000

In [6]: def notin(a):
   ...:     for v in range(10000):
   ...:         if v not in a:
   ...:             pass
   ...:     return
   ...: 

In [7]: %timeit notin(lister)
734 ms ± 5.85 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In [8]: %timeit notin(setter)
129 μs ± 166 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)

Copy link
Member Author

@lilyminium lilyminium Oct 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this in the commit below, but note to self that changing self.values to a dict might be the easiest way to do this performantly and retain both uniqueness and order.

Or masking the input values after checking a set version of self.values might be less work but still more performant.

self.values.append(v)
self.types.append(t)
self._guessed.append(g)
Expand All @@ -3146,7 +3138,8 @@ def _delete_bonds(self, values):
'{attrname} with atom indices:'
'{indices}').format(attrname=self.attrname,
indices=indices))
idx = [self.values.index(v) for v in to_check]
# allow multiple matches
idx = [i for i, x in enumerate(self.values) if x in to_check]
for i in sorted(idx, reverse=True):
del self.values[i]

Expand Down
11 changes: 11 additions & 0 deletions testsuite/MDAnalysisTests/core/test_universe.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
two_water_gro, two_water_gro_nonames,
TRZ, TRZ_psf,
PDB, MMTF, CONECT,
PDB_conect
)

import MDAnalysis as mda
Expand Down Expand Up @@ -1247,6 +1248,16 @@ def test_delete_bonds_refresh_fragments(self, universe):
universe.delete_bonds([universe.atoms[[2, 3]]])
assert len(universe.atoms.fragments) == n_fragments + 1

@pytest.mark.parametrize("filename, n_bonds", [
(CONECT, 72),
(PDB_conect, 8)
])
def test_delete_all_bonds(self, filename, n_bonds):
u = mda.Universe(filename)
assert len(u.bonds) == n_bonds
u.delete_bonds(u.bonds)
assert len(u.bonds) == 0

@pytest.mark.parametrize(
'attr,values', existing_atom_indices
)
Expand Down
4 changes: 2 additions & 2 deletions testsuite/MDAnalysisTests/topology/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def parse():
struc = parse()

assert hasattr(struc, 'bonds')
assert len(struc.bonds.values) == 4
assert len(struc.bonds.values) == 2
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These used to be bonds between [(0, 1), (0, 1), (1, 2), (1, 2)] -- so two bonds repeated. This change makes it so each bond is unique.



def test_single_conect():
Expand All @@ -158,7 +158,7 @@ def parse():
with pytest.warns(UserWarning):
struc = parse()
assert hasattr(struc, 'bonds')
assert len(struc.bonds.values) == 2
assert len(struc.bonds.values) == 1
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to above.



def test_new_chainid_new_res():
Expand Down
Loading