galaxyproject · ivelet · Mar 15, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 22, 2024
diff --git a/tools/muon/assign_cell_type_labels_mudata.xml b/tools/muon/assign_cell_type_labels_mudata.xml
@@ -0,0 +1,87 @@
+<tool id="assign_cell_type_labels_mudata" name="Assign Cell Type Labels for MuData" version="0.1.0">
-<tool id="assign_cell_type_labels_mudata" name="Assign Cell Type Labels for MuData" version="0.1.0">
+<tool id="assign_cell_type_labels_mudata" name="Assign Cell Type Labels for MuData" version="0.1.0" profile="22.05">
-<tool id="assign_cell_type_labels_mudata" name="Assign Cell Type Labels for MuData" version="0.1.0">
+<tool id="assign_cell_type_labels_mudata" name="Assign Cell Type Labels for MuData" version="0.1.0" profile="22.05">
+    <requirements>
+        <requirement type="package" version="1.7.0">scanpy</requirement>
+        <requirement type="package" version="0.1.2">muon</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    python '$script_file'
+]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[import scanpy as sc
+import muon as mu
+import numpy as np
+import matplotlib
+from matplotlib import pyplot as plt
+mdata = mu.read_h5mu('$rna_atac_input')
+
+new_cluster_names_keys = str('$new_cluster_names_keys')
+new_cluster_names_keys = new_cluster_names_keys.strip().split(",")
+new_cluster_names_values = str('$new_cluster_names_values')
+new_cluster_names_values = new_cluster_names_values.strip().split(",")
+new_cluster_names = dict(zip(new_cluster_names_keys, new_cluster_names_values))
+mdata.obs['celltype'] = mdata.obs['$key_added_leiden'].astype("str")
+mdata.obs.celltype = mdata.obs.celltype.map(new_cluster_names).astype("category")
+
+reorder_cat_list = str('$reorder_cat_list')
+reorder_cat_list = reorder_cat_list.split(",")
+mdata.obs.celltype.cat.reorder_categories(reorder_cat_list, inplace=True)
+
+cmap = plt.get_cmap('$get_cmap_name')
+colors = cmap(np.linspace(0, 1, len(mdata.obs.celltype.cat.categories)))
+mdata.uns["celltype_colors"] = list(map(matplotlib.colors.to_hex, colors))
+
+umap_plot = mu.pl.umap(
+    mdata,
+    color='$umap_color',
+    legend_loc='$umap_legend_loc',
+    frameon=$umap_frame_on,
+    return_fig=True
+)
+umap_plot.savefig("celltype_umap_plot.png")
+mdata.write("rna_atac_assigned_celltype_labels.h5mu")
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="rna_atac_input" type="data" format="h5mu" label="RNA ATAC Data Matrix"/>
+        <param name="new_cluster_names_keys" type="text" label="New Cluster Names Keys" help="Add the keys of the new clsuter names as numbers separated by commas" value="0,8,22,1,6,17,2,5,20,10,11,19,12,16,15,3,7,9,14,4,13,18,21"/>
+        <param name="new_cluster_names_values" type="text" label="New Cluster Names Values" help="Add the corresponding values to the previously added keys (in the same order and for every key added)" value="CD4+ naïve T, CD4+ naïve T, CD4+ naïve T, CD4+ memory T, CD4+ memory T, CD4+ memory T, CD8+ naïve T, CD8+ naïve T, CD8+ naïve T, CD8+ cytotoxic effector T, CD8+ transitional effector T, MAIT, NK, naïve B, memory B, classical mono, classical mono, classical mono, classical mono, intermediate mono, non-classical mono, mDC, pDC"/>
+        <param name="reorder_cat_list" type="text" label="Categories to reorder" help="Add the names of the categories to reorder, ensuring the same names as previously defined are used." value="CD4+ naïve T,CD4+ memory T,CD8+ naïve T, CD8+ transitional effector T, CD8+ cytotoxic effector T,MAIT, NK,naïve B, memory B,classical mono,intermediate mono, non-classical mono,mDC, pDC"/>
+        <param name="key_added_leiden" type="text" label="Key used in leiden embeddings" help="(key_added when performed leiden clustering as saved in .obs)" value="leiden_joint"/>
+        <param name="get_cmap_name" type="text" label="Name of the colormap instance" help="(name in matplotlib.cm.get_cmap()" value="rainbow"/>
+        <param name="umap_color" type="text" label="Keys for annotations of observations/cells or variables/genes" help="(color in muon.pl.umap())" value="celltype"/>
+        <param name="umap_legend_loc" type="text" label="Location of legend" help="(legend_loc in muon.pl.umap())" value="on data"/>
+        <param name="umap_frame_on" type="boolean" label="Draw a frame around the scatter plot" help="(frameon in muon.pl.umap())" checked="true" truevalue="True" falsevalue="False"/>
+    </inputs>
+    <outputs>
+        <data name="celltype_umap_plot" format="png" label="RNA ATAC Celltype UMAP Plot"
+              from_work_dir="celltype_umap_plot.png"/>
+        <data name="rna_atac_assigned_celltype_labels" format="h5mu" label="RNA ATAC with Assigned Celltype Labels"
+              from_work_dir="rna_atac_assigned_celltype_labels.h5mu"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="rna_atac_input" value="assign_celltype_labels_mudata_in.h5mu"/>
+            <param name="new_cluster_names_keys" value="0,8,22,1,17"/>
+            <param name="new_cluster_names_values" value="CD4+ naïve T, CD4+ naïve T, CD4+ naïve T, CD4+ memory T, CD4+ memory T"/>
+            <param name="reorder_cat_list" value="CD4+ naïve T, CD4+ memory T"/>
+            <param name="key_added_leiden" value="leiden_joint"/>
+            <param name="get_cmap_name" value="rainbow"/>
+            <param name="umap_color" value="celltype"/>
+            <param name="umap_legend_loc" value="on data"/>
+            <param name="umap_frame_on" value="True"/>
+            <output name="rna_atac_assigned_celltype_labels" value="rna_atac_assigned_celltype_labels.h5mu"/>
+            <output name="celltype_umap_plot" value="celltype_umap_plot.png"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+    **What it does**
+        This tool takes Multimodal data that has been clustered and then assigns new cluster names to it, re-orders
+        its categories and then plots as a UMAP, using muon.pl.umap().
+
+        Returns the UMAP plot as a png file and the MuData object after processing as a h5mu file.
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+}</citation>
+    </citations>
+</tool>
diff --git a/tools/muon/cluster_analyze_embed_muon.xml b/tools/muon/cluster_analyze_embed_muon.xml
@@ -0,0 +1,246 @@
+<tool id="cluster_analyze_embed_muon" name="Cluster, analyze, and embed multimodal data with muon" version="0.1.0">
+    <requirements>
+        <requirement type="package" version="1.7.0">scanpy</requirement>
+        <requirement type="package" version="0.1.2">muon</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$script_file'
+]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[import scanpy as sc
+import muon as mu
+mdata = mu.read_h5mu('$mudata_input')
+
+#if $method.method == 'leiden'
+mu.tl.leiden(
+    mdata,
+    key_added='$method.leiden_key_added',
+    resolution=$method.leiden_resolution,
+    random_state=$method.leiden_random_state,
+    mod_weights=$method.leiden_mod_weights,
+    n_iterations=$method.leiden_n_iterations
+)
+
+#else if $method.method == 'louvain'
+mu.tl.louvain(
+mdata,
+    key_added='$method.louvain_key_added',
+    resolution=$method.louvain_resolution,
+    random_state=$method.louvain_random_state,
+    mod_weights=$method.louvain_mod_weights
+)
+
+#else if $method.method == 'ica'
+mu.tl.ica(
+    mdata,
+    basis='$method.ica_basis',
+    n_components=$method.ica_n_components,
+    random_state=$method.ica_random_state,
+    scale=$method.ica_scale
+)
+
+#else if $method.method == 'mofa'
+mu.tl.mofa(
+    mdata,
+    n_factors=$method.mofa_n_factors
+)
+
+#else if $method.method == 'snf'
+mu.tl.snf(
+    mdata,
+    n_neighbors=$method.snf_n_neighbors,
+    neighbor_keys=$method.snf_neighbor_keys,
+    key_added=$method.snf_key_added,
+    n_iterations=$method.snf_n_iterations,
+    sigma=$method.snf_sigma
+)
+
+#else if $method.method == 'umap'
+mu.tl.umap(
+    mdata,
+    min_dist=$method.umap_min_dist,
+    spread=$method.umap_spread,
+    n_components=$method.umap_n_components,
+    maxiter=$method.umap_maxiter,
+    alpha=$method.umap_alpha,
+    gamma=$method.umap_gamma,
+    negative_sample_rate=$method.umap_negative_sample_rate,
+    init_pos='$method.umap_init_pos',
+    random_state=$method.umap_random_state,
+    a=$method.umap_a,
+    b=$method.umap_b,
+    neighbors_key=$method.umap_neighbors_key
+)
+
+#end if
+mdata.write("mudata_with_processing.h5mu")
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="mudata_input" type="data" format="h5mu" label="MuData input file to run processing" help="(.h5mu)"/>
+        <conditional name="method">
+            <param argument="method" type="select" label="Method used for processing">
+                    <option value="leiden">Cluster: Cluster cells using the Leiden algorithm, using 'muon.tl.leiden'</option>
+                    <option value="louvain">Cluster: Cluster cells using the Louvain algorithm, using 'muon.tl.louvain'</option>
+                    <option value="mofa">Analyze: Run Multi Omics Factor Analysis, using 'muon.tl.mofa'</option>
+                    <option value="snf">Analyze: Similarity Network Fusion, using 'muon.tl.snf'</option>
+                    <option value="umap">Embed: Embed the multimodal neighborhood graph using UMAP, using 'muon.tl.umap'</option>
+            </param>
+            <when value="leiden">
+                <param name="leiden_resolution" type="float" value="1" label="Coarseness of the clustering" help="Higher values lead to more clusters (resolution)"/>
+                <param name="leiden_random_state" type="integer" value="0" label="Random seed" help="Change the initialization of the optimization (random_state)"/>
+                <param name="leiden_key_added" type="text" value="leiden_joint" label="Key under which to add the cluster labels" help="(key_added)"/>
+                <param name="leiden_mod_weights" type="float" value="1" label="Weight for each modality to control contribution" help="(mod_weights)"/>
+                <param name="leiden_n_iterations" type="integer" value="-1" label="Number of iterations of the Leiden clustering algorithm to perform" help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering. (n_iterations)"/>
+
+            </when>
+            <when value="louvain">
+                <param name="louvain_resolution" type="float" value="1" label="Coarseness of the clustering" help="Higher values lead to more clusters (resolution)"/>
+                <param name="louvain_random_state" type="integer" value="0" label="Random seed" help="Change the initialization of the optimization (random_state)"/>
+                <param name="louvain_key_added" type="text" value="louvain_joint" label="Key under which to add the cluster labels" help="(key_added)"/>
+                <param name="louvain_mod_weights" type="float" value="1" label="Weight for each modality to control contribution" help="(mod_weights)"/>
+            </when>
+<!--            <when value="ica">-->
+<!--                <param name="ica_basis" type="text" value="X_pca" label="Basis for Independent Component Analysis" help="(basis)"/>-->
+<!--                <param name="ica_n_components" type="integer" value="1" label="Number of components" help="(n_components)"/>-->
+<!--                <param name="ica_random_state" type="integer" value="0" label="Random seed" help="(random_state)"/>-->
+<!--                <param name="ica_scale" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Use scaling?" help="(scale)"/>-->
+<!--            </when>-->
+            <when value="mofa">
+                <param name="mofa_n_factors" type="integer" label="Number of factors to train the MOFA model with" value="10" help="(n_factors), ignored if loading trained model"/>
+            </when>
+            <when value="snf">
+                <param name="snf_n_neighbors" type="integer" value="20" label="Number of neighbours to be used in the K-nearest neighbours step" help="(n_neighbors)"/>
+                <param name="snf_neighbor_keys" type="text" value="neighbors" label="Keys in .uns where per-modality neighborhood information is stored" help="(neighbor_keys)"/>
+                <param name="snf_key_added" type="text" value="neighbors" label="Keys where multimodal neighbors data are stored" help="(key_added)"/>
+                <param name="snf_n_iterations" type="integer" value="20" label="Number of iterations for the diffusion process" help="(n_iterations)"/>
+                <param name="snf_sigma" type="float" value="0.5" label="Variance for the local model when calculating affinity matrices" help="(sigma)"/>
+            </when>
+            <when value="umap">
+                <param name="umap_min_dist" type="float" value="0.5" label="The effective minimum distance between embedded points" help="(min_dist)"/>
+                <param name="umap_spread" type="float" value="1.0" label="The effective scale of embedded points" help="Determines how clustered/clumped the embedded points are (spread)"/>
+                <param name="umap_n_components" type="integer" value="2" label="The number of dimensions of the embedding" help="(n_components)"/>
+                <param name="umap_maxiter" type="integer" value="None" label="The number of iterations (epochs) of the optimization" help="(maxiter)"/>
+                <param name="umap_alpha" type="float" value="1.0" label="The initial learning rate for the embedding optimization" help="(alpha)"/>
+                <param name="umap_gamma" type="float" value="1.0" label="Weighting applied to negative samples in low dimensional embedding optimization" help="Values higher than one will result in greater weight being given to negative samples (gamma)"/>
+                <param name="umap_negative_sample_rate" type="integer" value="5" label="Negative sample rate" help="The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding (negative_sample_rate)"/>
+                <param name="umap_init_pos" type="text" value="spectral" label="How to initialize the low dimensional embedding" help="(init_pos)"/>
+                <param name="umap_random_state" type="integer" value="42" label="Random seed" help="(random_state)"/>
+                <param name="umap_a" type="text" value="None" label="More specific parameters controlling the embedding" help="(a)"/>
+                <param name="umap_b" type="text" value="None" label="More specific parameters controlling the embedding" help="(b)"/>
+                <param name="umap_neighbors_key" type="text" value="None" label="Neighbors key" help="(neighbors_key)"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="mudata_with_processing" format="h5mu" label="MuData with Processing" from_work_dir="mudata_with_processing.h5mu"/>
+    </outputs>
+    <tests>
+        <test>
+            <!--Test for leiden-->
+            <param name="mudata_input" value="mudata_input.h5mu"/>
+            <param name="method" value="leiden"/>
+            <param name="leiden_resolution" value="1"/>
+            <param name="leiden_random_state" value="0"/>
+            <param name="leiden_key_added" value="leiden_joint"/>
+            <param name="leiden_mod_weights" value="1"/>
+            <param name="leiden_n_iterations" value="-1"/>
+            <output name="mudata_with_processing" file="mudata_with_processing.h5mu"/>
+        </test>
+        <test>
+            <!--Test for louvain-->
+            <param name="mudata_input" value="mudata_input.h5mu"/>
+            <param name="method" value="louvain"/>
+            <param name="louvain_resolution" value="1"/>
+            <param name="louvain_random_state" value="0"/>
+            <param name="louvain_key_added" value="louvain_joint"/>
+            <param name="louvain_mod_weights" value="1"/>
+            <output name="mudata_with_processing" file="mudata_with_processing.h5mu"/>
+        </test>
+        <test>
+            <!--Test for mofa-->
+            <param name="mudata_input" value="mudata_input.h5mu"/>
+            <param name="method" value="mofa"/>
+            <param name="mofa_n_factors" value="10"/>
+            <output name="mudata_with_processing" file="mudata_with_processing.h5mu"/>
+        </test>
+        <test>
+            <!--Test for snf-->
+            <param name="mudata_input" value="mudata_input.h5mu"/>
+            <param name="method" value="snf"/>
+            <param name="snf_n_neighbors" value="20"/>
+            <param name="snf_neighbor_keys" value="neighbors"/>
+            <param name="snf_key_added" value="neighbors"/>
+            <param name="snf_n_iterations" value="20"/>
+            <param name="snf_sigma" value="0.5"/>
+            <output name="mudata_with_processing" file="mudata_with_processing.h5mu"/>
+        </test>
+        <test>
+            <!--Test for umap-->
+            <param name="mudata_input" value="mudata_input.h5mu"/>
+            <param name="method" value="umap"/>
+            <param name="umap_min_dist" value="0.5"/>
+            <param name="umap_spread" value="1.0"/>
+            <param name="umap_n_components" value="2"/>
+            <param name="umap_maxiter" value="None"/>
+            <param name="umap_alpha" value="1.0"/>
+            <param name="umap_gamma" value="1.0"/>
+            <param name="umap_negative_sample_rate" value="5"/>
+            <param name="umap_init_pos" value="spectral"/>
+            <param name="umap_random_state" value="42"/>
+            <param name="umap_a" value="None"/>
+            <param name="umap_b" value="None"/>
+            <param name="umap_neighbors_key" value="None"/>
+            <output name="mudata_with_processing" file="mudata_with_processing.h5mu"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Cluster: Cluster cells using the Leiden algorithm (`muon.tl.leiden`)
+====================================================================
+
+        Cluster cells using the Leiden algorithm. This runs only the multiplex Leiden algorithm on the MuData object
+        using connectivities of individual modalities.
+
+        More details on the `muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.leiden.html#muon.tl.leiden>`__
+
+Cluster: Cluster cells using the Louvain algorithm ('muon.tl.louvain')
+======================================================================
+
+        Cluster cells using the Louvain algorithm. This runs only the multiplex Louvain algorithm on the MuData object
+        using connectivities of individual modalities
+
+        More details on the `muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.louvain.html#muon.tl.louvain>`__
+
+Analyze: Run Multi Omics Factor Analysis ('muon.tl.mofa')
+=========================================================
+
+        Run Multi-Omics Factor Analysis
+
+        More details on the 'muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.mofa.html#muon.tl.mofa>`__
+
+Analyze: Similarity Network Fusion ('muon.tl.snf')
+==================================================
+
+        Similarity network fusion (SNF). See Wang et al., 2014 (DOI: 10.1038/nmeth.2810).
+
+        More details on the 'muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.snf.html#muon.tl.snf>`__
+
+Embed: Embed the multimodal neighborhood graph using UMAP ('muon.tl.umap')
+==========================================================================
+
+        Embed the multimodal neighborhood graph using UMAP (McInnes et al, 2018). UMAP (Uniform Manifold Approximation
+        and Projection) is a manifold learning technique suitable for visualizing high-dimensional data.
+
+        More details on the 'muon documentation
+        <https://muon.readthedocs.io/en/latest/api/generated/muon.tl.umap.html#muon.tl.umap>`__
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+}</citation>
+    </citations>
+</tool>