Skip to content

Commit

Permalink
Fixed a bug with disconnected graphs in da!
Browse files Browse the repository at this point in the history
  • Loading branch information
rlaplaza committed Aug 7, 2024
1 parent 8020927 commit 9ff491a
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 17 deletions.
49 changes: 35 additions & 14 deletions navicat_marc/da.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,35 +31,56 @@ def da_matrix(mols, normalize=True, kernel="rbf", mode="dfs"):
if natoms > 4:
n_d = max(natoms // 4, 1)
else:
return M
return M.np.max(M)
DA = np.zeros((n, n_d))
coords = np.array([mol.coordinates for mol in mols])

# All molecules share the same connectivity (at least in principle)
# Lets traverse the graph and collect dihedrals
if mode == "auto":
bc = nx.betweenness_centrality(refgraph, endpoints=True, weight="coulomb_term")
all_indices = sorted(range(len(bc)), key=lambda i: bc[i])[::-1]
for i in range(n):
for d in range(n_d - 1):
k = 4 * d
l = 4 * (d + 1)
a0, a1, a2, a3 = coords[i][all_indices[k:l]]
DA[i, d] = dihedral(a0, a1, a2, a3)
elif mode == "dfs":
dfs_nodes = list(nx.dfs_preorder_nodes(refgraph, source=0))
if nx.is_connected(refgraph):
if mode == "auto":
bc = nx.betweenness_centrality(
refgraph, endpoints=True, weight="coulomb_term"
)
all_indices = sorted(range(len(bc)), key=lambda i: bc[i])[::-1]
for i in range(n):
for d in range(n_d - 1):
k = 4 * d
l = 4 * (d + 1)
a0, a1, a2, a3 = coords[i][all_indices[k:l]]
DA[i, d] = dihedral(a0, a1, a2, a3)
elif mode == "dfs":
dfs_nodes = list(nx.dfs_preorder_nodes(refgraph, source=0))
n_d = max(len(dfs_nodes) // 4, 1)
for i in range(n):
for d in range(n_d - 1):
k = 4 * d
l = 4 * (d + 1)
a0, a1, a2, a3 = coords[i][dfs_nodes[k:l]]
DA[i, d] = dihedral(a0, a1, a2, a3)
else:
return M, np.max(M)
else:
# If graph is disconnected we default to dfs for each component
dfs_nodes = []
for cc in nx.connected_components(refgraph):
dfs_nodes.extend(
list(nx.dfs_preorder_nodes(refgraph.subgraph(cc), source=min(cc)))
)
n_d = max(len(dfs_nodes) // 4, 1)
for i in range(n):
for d in range(n_d - 1):
k = 4 * d
l = 4 * (d + 1)
a0, a1, a2, a3 = coords[i][dfs_nodes[k:l]]
DA[i, d] = dihedral(a0, a1, a2, a3)
else:
return M
# Now generate a kernel based on the dihedrals
if kernel == "rbf":
euclid_0 = np.linalg.norm(DA[:, :] - DA[0, :], axis=0)
# In some cases DA may be ill conditioned leading to euclid_0 of 0
# The second option may be better and might end up replacing the one above
if euclid_0.std() == 0:
gamma_heuristic = 1 / (n_d * DA.var())
gamma_heuristic = 1 / (euclid_0.std())
M -= pairwise_kernels(DA, DA, gamma=gamma_heuristic, metric="rbf")
else:
Expand Down
2 changes: 1 addition & 1 deletion navicat_marc/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def molecules_from_file(filename, scale_factor=1.10, noh=True):
def processargs(arguments):
input_list = sys.argv
input_str = " ".join(input_list)
version_str = "0.2.1"
version_str = "0.2.2"
mbuilder = argparse.ArgumentParser(
prog="navicat_marc",
description="Analyse conformer ensembles to find the most representative structures.",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "navicat_marc"
version = "0.2.1"
version = "0.2.2"
authors = [
{ name="R. Laplaza", email="[email protected]" },
]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
setup(
name="marc",
packages=["navicat_marc"],
version="0.2.1",
version="0.2.2",
description="Modular Analysis of Representative Conformers",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 9ff491a

Please sign in to comment.