From 951b94ad57b1f5a783ad767e610dd1712a36fdc8 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Tue, 8 Oct 2024 08:53:09 -0700
Subject: [PATCH 01/13] integrate legend in Visualization

if `export_legend` is enabled, present the legend in the resulting
Visualization.
Note that there was a minor bug in the original version of the
Visualizer - since the conditional was being evaluated after
`export_legend` was cast to a string, it always evaluated to `True`.
---
 gut_to_soil_manuscript_figures/_methods.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py
index 96d131b..eef69ed 100644
--- a/gut_to_soil_manuscript_figures/_methods.py
+++ b/gut_to_soil_manuscript_figures/_methods.py
@@ -44,12 +44,11 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
     swap_axes = str(swap_axes)
     himalaya = str(himalaya)
     pit_toilet = str(pit_toilet)
-    export_legend = str(export_legend)
+    export_legend_str = str(export_legend)
 
     plot_fp = os.path.join(output_dir, 'pcoa_plot.png')
 
-    if export_legend:
-        legend_fp = os.path.join(output_dir, 'legend.png')
+    legend_fp = os.path.join(output_dir, 'legend.png')
 
     command = [
         'python', script_path,
@@ -64,7 +63,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
         swap_axes,
         himalaya,
         pit_toilet,
-        export_legend,
+        export_legend_str,
         highlighted_buckets,
         legend_fp
     ]
@@ -80,6 +79,13 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
         <body>
             <h1>2D PCoA Plot</h1>
             <img src="pcoa_plot.png" alt="PCoA Plot">
+        ''')
+        if export_legend:
+            f.write('''
+                    <p>
+                    <img src="legend.png" alt="PCoA Plot legend">
+                    ''')
+        f.write('''
         </body>
         </html>
         ''')

From b9da739701dc4af9d49d3f889d32744bc946cb41 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Tue, 8 Oct 2024 09:09:12 -0700
Subject: [PATCH 02/13] updates to reflect refactored metadata

---
 README.md                                     |  4 +-
 .../scripts/plot_pcoa_2d.py                   | 61 ++++++++++---------
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index 0029d6d..fc553c7 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,7 @@ Your first step will be filtering the distance matrix you'd like to use for the
 qiime diversity filter-distance-matrix \
 --i-distance-matrix unweighted-unifrac-distance-matrix.qza \
 --m-metadata-file final-analysis-metadata.tsv \
---p-where "[SampleType2] IN ('EMP-Soils', 'Food-Compost', 'Self Sample', 'Compost Post-Roll', 'Bulking Material')" \
+--p-where "[SampleType] IN ('Soil', 'Food Compost', 'Landscape Compost', 'Human Excrement', 'Human Excrement Compost', 'Bulking Material')" \
 --o-filtered-distance-matrix filtered-unweighted-unifrac-distance-matrix.qza
 ```
 
@@ -95,7 +95,7 @@ Now we're ready to generate a pcoa plot!
 qiime gut-to-soil-manuscript-figures pcoa-2d \
 --i-ordination filtered-unweighted-unifrac-2d-pcoa.qza \
 --m-metadata-file final-analysis-metadata.tsv \
---p-measure 'Unweighted Unifrac' \
+--p-measure 'Unweighted UniFrac' \
 --p-average \
 --p-export-legend \
 --p-highlighted-buckets '3, 4' \
diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
index df36685..e32d123 100644
--- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
+++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
@@ -43,11 +43,12 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
         # connecting time series data in order
         md_bucket_sorted = \
             md[(md['Bucket'] == bucket) &
-               (md['SampleType2'] == 'Compost Post-Roll')].sort_values('Week')
+               (md['SampleType'] == 'Human Excrement Compost')].\
+            sort_values('Composting Time Point')
 
         # week 1-52 IDs for selected bucket
-        bucket_ids_sorted = \
-            md_bucket_sorted[md_bucket_sorted['Week'] > 0.0].index.values
+        bucket_ids_sorted = md_bucket_sorted[
+            md_bucket_sorted['Composting Time Point'] > 0.0].index.values
 
         # making sure bucket IDs used are only ones that are present in both
         # the md and ordination results
@@ -62,8 +63,7 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
         # HE
         bucket_ids_HE_week0 = \
             md[(md['Bucket'] == bucket) &
-               (md['SampleType2'] == 'Self Sample') &
-               (md['Week'] == 0.0)].index.values
+               (md['SampleType'] == 'Human Excrement')].index.values
 
         ids_HE_week0 = []
         for i in bucket_ids_HE_week0:
@@ -73,8 +73,7 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
         # bulking
         bucket_ids_bulk_week0 = \
             md[(md['Bucket'] == bucket) &
-               (md['SampleType2'] == 'Bulking Material') &
-               (md['Week'] == 0.0)].index.values
+               (md['SampleType'] == 'Bulking Material')].index.values
 
         ids_bulk_week0 = []
         for i in bucket_ids_bulk_week0:
@@ -82,8 +81,8 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
                 ids_bulk_week0.append(i)
 
         # week 1 i.e. end points for dotted line connecting HE & BM -> HEC
-        bucket_ids_HEC_week1 = \
-            md_bucket_sorted[md_bucket_sorted['Week'] == 1.0].index.values
+        bucket_ids_HEC_week1 = md_bucket_sorted[
+            md_bucket_sorted['Composting Time Point'] == 1.0].index.values
 
         ids_HEC_week1 = []
         for i in bucket_ids_HEC_week1:
@@ -141,8 +140,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
         ord_2d[1] = ord_2d[1].multiply(-1)
 
     # allowed sample types to be pulled from the md
-    sample_types = ['EMP-Soils', 'Food-Compost', 'Self Sample',
-                    'Compost Post-Roll', 'Bulking Material']
+    sample_types = ['Soil', 'Food Compost', 'Landscape Compost',
+                    'Human Excrement', 'Human Excrement Compost',
+                    'Bulking Material']
 
     # if using himalaya and/or pit toilet data
     if himalaya == 'True':
@@ -151,21 +151,23 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
         sample_types.append('Pit Toilet')
 
     # sorting the filtered md (by allowed sample types) by week
-    md = metadata[metadata['SampleType2']
-                  .isin(sample_types)].sort_values('Week')
+    md = metadata[metadata['SampleType']
+                  .isin(sample_types)].sort_values('Composting Time Point')
     md['Bucket'] = md['Bucket'].astype(float)
-    md['Week'] = md['Week'].astype(float)
+    md['Composting Time Point'] = md['Composting Time Point'].astype(float)
 
     buckets_md = md[md['Bucket'].between(1, 16)]
 
     # ALL SUBJECT FECAL SAMPLES: IDs -> XY ordination points
-    fecal_ids = list(set(buckets_md[buckets_md['Week'] == 0.0].index.values) &
-                     set(ord_2d.index.values))
+    fecal_ids = list(
+        set(buckets_md[buckets_md['SampleType'] == 'Human Excrement']
+            .index.values) &
+        set(ord_2d.index.values))
     x_fecal, y_fecal = _swap_axis(ord_2d, fecal_ids, swap_axes)
 
     # ALL SUBJECT BULKING MATERIAL: IDs -> XY ordination points
     bulking_ids = \
-        list(set(md[md['SampleType2'] == 'Bulking Material'].index.values) &
+        list(set(md[md['SampleType'] == 'Bulking Material'].index.values) &
              set(ord_2d.index.values))
     x_bulking, y_bulking = _swap_axis(ord_2d, bulking_ids, swap_axes)
 
@@ -176,8 +178,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
     # (OPTIONAL) WEEKLY MEAN FOR ALL BUCKETS: IDs -> XY ordination points
     if average == 'True':
-        weeks_md = md[md['Week'].between(1, 52)]
-        weeks = list(set(weeks_md['Week'].values))
+        weeks_md = md[md['Composting Time Point'].between(1, 52)]
+        weeks = list(set(weeks_md['Composting Time Point'].values))
 
         # dicts for each week's mean x&y values
         bucket_weekly_avgs_x = {}
@@ -188,9 +190,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
             y_list = []
 
             # filtering the md to only include post-roll sample types
-            weekly_bucket_ids = \
-                md[(md['Week'] == week) &
-                    (md['SampleType2'] == 'Compost Post-Roll')].index.values
+            weekly_bucket_ids = md[
+                (md['Composting Time Point'] == week) &
+                (md['SampleType'] == 'Human Excrement Compost')].index.values
 
             # only use IDs that are present both in the md and ordination
             included_ids = []
@@ -233,8 +235,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
         if not highlighted_buckets:
             # HE wk 0 mean
             HE_week0 = \
-                md[(md['SampleType2'] == 'Self Sample') &
-                   (md['Week'] == 0.0)].index.values
+                md[(md['SampleType'] == 'Human Excrement')].index.values
             ids_HE_week0 = []
             for i in HE_week0:
                 if i in ord_2d.index.values:
@@ -245,8 +246,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
             # bulk wk 0 mean
             bulk_week0 = \
-                md[(md['SampleType2'] == 'Bulking Material') &
-                   (md['Week'] == 0.0)].index.values
+                md[(md['SampleType'] == 'Bulking Material')].index.values
             ids_bulk_week0 = []
             for i in bulk_week0:
                 if i in ord_2d.index.values:
@@ -268,13 +268,14 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
     x_buckets, y_buckets = _swap_axis(ord_2d, bucket_ids, swap_axes)
 
     # EMP SOILS
-    emp_ids = md.loc[md['Bucket'] == 0.0].index.values
+    emp_ids = md.loc[md['SampleType'] == 'Soil'].index.values
     x_emp, y_emp = _swap_axis(ord_2d, emp_ids, swap_axes)
 
     # FOOD COMPOST
     compost_ids = \
-        list(set(md.loc[md['Bucket'] == 17.0].index.values) &
-             set(ord_2d.index.values))
+        list(set(md.loc[(md['SampleType'] == 'Food Compost') |
+                        (md['SampleType'] == 'Landscape Compost')].index.values)
+             & set(ord_2d.index.values))
     x_compost, y_compost = _swap_axis(ord_2d, compost_ids, swap_axes)
 
     # (OPTIONAL SAMPLE TYPES) HIMALAYA
@@ -424,7 +425,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
             # adding week annotations for each highlighted bucket
             if week_annotations == 'True':
-                for week, x, y in zip((md.loc[ids]['Week']),
+                for week, x, y in zip((md.loc[ids]['Composting Time Point']),
                                       x_bucket, y_bucket):
                     week_int = int(week)
                     ax.annotate(str(week_int), weight='bold', color='purple',

From 0331476156d06eb2fa44c6ffa911ba86b5eb1d7b Mon Sep 17 00:00:00 2001
From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com>
Date: Wed, 9 Oct 2024 06:46:55 -0700
Subject: [PATCH 03/13] Update gut_to_soil_manuscript_figures/_methods.py

Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com>
---
 gut_to_soil_manuscript_figures/_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py
index eef69ed..f2a0538 100644
--- a/gut_to_soil_manuscript_figures/_methods.py
+++ b/gut_to_soil_manuscript_figures/_methods.py
@@ -44,7 +44,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
     swap_axes = str(swap_axes)
     himalaya = str(himalaya)
     pit_toilet = str(pit_toilet)
-    export_legend_str = str(export_legend)
+    export_legend = str(export_legend)
 
     plot_fp = os.path.join(output_dir, 'pcoa_plot.png')
 

From 04f45c929be15f90f397258babb2ca3720d54c26 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com>
Date: Wed, 9 Oct 2024 06:47:08 -0700
Subject: [PATCH 04/13] Update gut_to_soil_manuscript_figures/_methods.py

Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com>
---
 gut_to_soil_manuscript_figures/_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py
index f2a0538..8711672 100644
--- a/gut_to_soil_manuscript_figures/_methods.py
+++ b/gut_to_soil_manuscript_figures/_methods.py
@@ -63,7 +63,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
         swap_axes,
         himalaya,
         pit_toilet,
-        export_legend_str,
+        export_legend,
         highlighted_buckets,
         legend_fp
     ]

From fffaaecf542f8cd6924bab7a0314de8edce68e50 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com>
Date: Wed, 9 Oct 2024 06:47:23 -0700
Subject: [PATCH 05/13] Update gut_to_soil_manuscript_figures/_methods.py

Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com>
---
 gut_to_soil_manuscript_figures/_methods.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py
index 8711672..2f1d97e 100644
--- a/gut_to_soil_manuscript_figures/_methods.py
+++ b/gut_to_soil_manuscript_figures/_methods.py
@@ -48,7 +48,6 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
 
     plot_fp = os.path.join(output_dir, 'pcoa_plot.png')
 
-    legend_fp = os.path.join(output_dir, 'legend.png')
 
     command = [
         'python', script_path,

From aabe60ffa2f9a068df2d78d88da2d31a705e6e59 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <192372+gregcaporaso@users.noreply.github.com>
Date: Wed, 9 Oct 2024 06:47:36 -0700
Subject: [PATCH 06/13] Update gut_to_soil_manuscript_figures/_methods.py

Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com>
---
 gut_to_soil_manuscript_figures/_methods.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py
index 2f1d97e..80c1cf5 100644
--- a/gut_to_soil_manuscript_figures/_methods.py
+++ b/gut_to_soil_manuscript_figures/_methods.py
@@ -79,7 +79,8 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
             <h1>2D PCoA Plot</h1>
             <img src="pcoa_plot.png" alt="PCoA Plot">
         ''')
-        if export_legend:
+        if export_legend == 'True':
+            legend_fp = os.path.join(output_dir, 'legend.png')
             f.write('''
                     <p>
                     <img src="legend.png" alt="PCoA Plot legend">

From ac3f9921b83d6eb2994cad3782c76922a8fdf1d6 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Wed, 9 Oct 2024 06:56:37 -0700
Subject: [PATCH 07/13] legend_fp was referenced before assignment

---
 gut_to_soil_manuscript_figures/_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py
index 80c1cf5..67a42d4 100644
--- a/gut_to_soil_manuscript_figures/_methods.py
+++ b/gut_to_soil_manuscript_figures/_methods.py
@@ -47,7 +47,8 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
     export_legend = str(export_legend)
 
     plot_fp = os.path.join(output_dir, 'pcoa_plot.png')
-
+    if export_legend == 'True':
+        legend_fp = os.path.join(output_dir, 'legend.png')
 
     command = [
         'python', script_path,
@@ -80,7 +81,6 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
             <img src="pcoa_plot.png" alt="PCoA Plot">
         ''')
         if export_legend == 'True':
-            legend_fp = os.path.join(output_dir, 'legend.png')
             f.write('''
                     <p>
                     <img src="legend.png" alt="PCoA Plot legend">

From c917fee98dd1f0bdb99ad219ced21279665b7007 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Wed, 9 Oct 2024 07:10:49 -0700
Subject: [PATCH 08/13] address issue with missing Week 0.0 checks

---
 .../scripts/plot_pcoa_2d.py                          | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
index e32d123..e47de3c 100644
--- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
+++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
@@ -63,7 +63,8 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
         # HE
         bucket_ids_HE_week0 = \
             md[(md['Bucket'] == bucket) &
-               (md['SampleType'] == 'Human Excrement')].index.values
+               (md['SampleType'] == 'Human Excrement') &
+               (md['Composting Time Point']).isna()].index.values
 
         ids_HE_week0 = []
         for i in bucket_ids_HE_week0:
@@ -73,7 +74,8 @@ def _bucket_util(highlighted_buckets, md, ord_2d):
         # bulking
         bucket_ids_bulk_week0 = \
             md[(md['Bucket'] == bucket) &
-               (md['SampleType'] == 'Bulking Material')].index.values
+               (md['SampleType'] == 'Bulking Material') &
+               (md['Composting Time Point']).isna()].index.values
 
         ids_bulk_week0 = []
         for i in bucket_ids_bulk_week0:
@@ -235,7 +237,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
         if not highlighted_buckets:
             # HE wk 0 mean
             HE_week0 = \
-                md[(md['SampleType'] == 'Human Excrement')].index.values
+                md[(md['SampleType'] == 'Human Excrement') &
+                   (md['Composting Time Point']).isna()].index.values
             ids_HE_week0 = []
             for i in HE_week0:
                 if i in ord_2d.index.values:
@@ -246,7 +249,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
             # bulk wk 0 mean
             bulk_week0 = \
-                md[(md['SampleType'] == 'Bulking Material')].index.values
+                md[(md['SampleType'] == 'Bulking Material') &
+                   (md['Composting Time Point']).isna()].index.values
             ids_bulk_week0 = []
             for i in bulk_week0:
                 if i in ord_2d.index.values:

From 1e179a6fc0a1fecaa8452573f1269ededcb47014 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Tue, 15 Oct 2024 10:45:23 -0600
Subject: [PATCH 09/13] add debugging information

---
 .../scripts/plot_pcoa_2d.py                   | 33 +++++++++++--------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
index e47de3c..101f52c 100644
--- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
+++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
@@ -113,11 +113,13 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
     ord_rslts = skbio.OrdinationResults.read(str(ordination_fp))
     ord_2d = ord_rslts.samples.iloc[:, 0:2]
+    print(f"\n***\nSamples in ordination (n={ord_2d.shape[0]})\n***\n")
 
     metadata_in = pd.read_csv(str(metadata_fp),
                               sep='\t').set_index('sample-id')
     # filtering metadata to only include samples w/IDs present in ordination
     metadata = metadata_in.loc[ord_2d.index.values]
+    print(metadata['SampleType'].value_counts())
 
     # setting XY labels based on swap axis &
     # figure aspect based on proportion explained
@@ -306,18 +308,19 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
     fecal_scatter = \
         plt.scatter(x=x_fecal, y=y_fecal, facecolors='none',
                     edgecolors='tab:brown',
-                    label='HE (other buckets)')
+                    label=f'HE (other buckets) (n={len(x_fecal)})')
 
     # Bulking Material - all subjects
     bulking_scatter = \
         plt.scatter(x=x_bulking, y=y_bulking, facecolors='none',
-                    edgecolors='g', label='Bulking Material (other buckets)')
+                    edgecolors='g',
+                    label=f'Bulking Material (other buckets) (n={len(x_bulking)})')
 
     # All buckets (minus highlighted bucket(s))
     all_sample_buckets = \
         plt.scatter(x=x_buckets, y=y_buckets, facecolors='none',
                     edgecolors='#C5C9C7', marker='^',
-                    label='HEC (other buckets)')
+                    label=f'HEC (other buckets) (n={len(x_buckets)})')
 
     # (OPTIONAL) Weekly Mean for all Buckets
     if average == 'True':
@@ -325,7 +328,8 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
             plt.scatter(x=bucket_weekly_avgs_x.values(),
                         y=bucket_weekly_avgs_y.values(),
                         marker='*', facecolors='#1f77b4',
-                        s=100, label='HEC (Weekly Mean)')
+                        s=100,
+                        label=f'HEC (Weekly Mean) (n={len(bucket_weekly_avgs_x)})')
 
         # adding HE mean if only plotting the weekly mean
         # (w/o any highlighted bucket(s))
@@ -335,33 +339,36 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
                 plt.scatter(x=x0_HE_mean, y=y0_HE_mean,
                             marker='*', s=150, zorder=1,
                             facecolors='tab:brown', edgecolors='k',
-                            label='HE (Weekly Mean)')
+                            label=f'HE (Weekly Mean) (n={len(x0_HE_mean)})')
 
             # bulking
             bulk_week0_scatter = \
                 plt.scatter(x=x0_bulk_mean, y=y0_bulk_mean,
                             marker='*', s=150, zorder=1,
                             facecolors='g', edgecolors='k',
-                            label='Bulking Material (Weekly Mean)')
+                            label=f'Bulking Material (Weekly Mean) (n={len(x0_bulk_mean)})')
 
     # EMP Soil
     emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp,
-                                   facecolors='k', label='Soil')
+                                   facecolors='k',
+                                   label=f'Soil (n={len(x_emp)})')
 
     # Food Compost
     food_compost_scatter = plt.scatter(x=x_compost, y=y_compost,
                                        facecolors='r',
-                                       label='FLWC')
+                                       label=f'FLWC (n={len(x_compost)})')
 
     # (OPTIONAL SAMPLE TYPES) Himalaya
     if himalaya == 'True':
         himalaya_scatter = plt.scatter(x=x_hima, y=y_hima,
-                                       facecolors='b', label='Himalaya')
+                                       facecolors='b',
+                                       label=f'Himalaya (n={len(x_hima)})')
 
     # (OPTIONAL SAMPLE TYPES) Pit Toilet
     if pit_toilet == 'True':
         pit_toilet_scatter = plt.scatter(x=x_pt, y=y_pt,
-                                         facecolors='y', label='Pit Toilet')
+                                         facecolors='y',
+                                         label=f'Pit Toilet (n={len(x_pt)})')
 
     # collecting the handle info to add to the legend
     bucket_handles = []
@@ -405,12 +412,12 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
             HE_week0_scatter = \
                 plt.scatter(x=x0_HE, y=y0_HE, facecolors='tab:brown',
-                            label=f'HE (Bucket #{bucket})')
+                            label=f'HE (Bucket #{bucket}) (n={len(x0_HE)})')
             bucket_handles.append(HE_week0_scatter)
 
             bulk_week0_scatter = \
                 plt.scatter(x=x0_bulk, y=y0_bulk, facecolors='g',
-                            label=f'Bulking Material (Bucket #{bucket})')
+                            label=f'Bulking Material (Bucket #{bucket}) (n={len(x0_bulk)})')
             bucket_handles.append(bulk_week0_scatter)
 
         for bucket, ids in buckets_dict.items():
@@ -422,7 +429,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
                 plt.scatter(x=x_bucket, y=y_bucket,
                             facecolors=viridis(color_dict[bucket]),
                             edgecolors='k', marker='^',
-                            label=f'HEC (Bucket #{bucket})')
+                            label=f'HEC (Bucket #{bucket}) (n={len(x_bucket)})')
 
             bucket_handles.append(highlighted_bucket_scatter)
             bucket_nums.append(bucket)

From 3d77c78ad0f5ef6b83a6e90d4d75faf6810687e9 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Thu, 17 Oct 2024 09:23:25 -0600
Subject: [PATCH 10/13] working

---
 gut_to_soil_manuscript_figures/_methods.py           |  3 +--
 .../scripts/plot_pcoa_2d.py                          | 12 ++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/gut_to_soil_manuscript_figures/_methods.py b/gut_to_soil_manuscript_figures/_methods.py
index 67a42d4..0e4f1a1 100644
--- a/gut_to_soil_manuscript_figures/_methods.py
+++ b/gut_to_soil_manuscript_figures/_methods.py
@@ -47,8 +47,7 @@ def pcoa_2d(output_dir: str, metadata: qiime2.Metadata,
     export_legend = str(export_legend)
 
     plot_fp = os.path.join(output_dir, 'pcoa_plot.png')
-    if export_legend == 'True':
-        legend_fp = os.path.join(output_dir, 'legend.png')
+    legend_fp = os.path.join(output_dir, 'legend.png')
 
     command = [
         'python', script_path,
diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
index 101f52c..9a997b3 100644
--- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
+++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
@@ -120,6 +120,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
     # filtering metadata to only include samples w/IDs present in ordination
     metadata = metadata_in.loc[ord_2d.index.values]
     print(metadata['SampleType'].value_counts())
+    xyz = metadata[metadata['SampleType'] == 'Human Excrement Compost'].index
 
     # setting XY labels based on swap axis &
     # figure aspect based on proportion explained
@@ -262,11 +263,10 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
             y0_bulk_mean = np.mean(y0_bulk)
 
     # ALL BUCKETS (minus highlighted bucket(s))
-    all_bucket_ids_w_fecal = \
+    all_bucket_ids = \
         list(set(md[md['Bucket'].between(1, 16)].index.values) &
+             set(md[md['SampleType'] == 'Human Excrement Compost'].index.values) &
              set(ord_2d.index.values))
-    all_bucket_ids = list(set(all_bucket_ids_w_fecal) - set(fecal_ids))
-
     if highlighted_buckets:
         bucket_ids = list(set(all_bucket_ids) - bucket_set)
     else:
@@ -329,7 +329,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
                         y=bucket_weekly_avgs_y.values(),
                         marker='*', facecolors='#1f77b4',
                         s=100,
-                        label=f'HEC (Weekly Mean) (n={len(bucket_weekly_avgs_x)})')
+                        label=f'HEC (Weekly Mean)')
 
         # adding HE mean if only plotting the weekly mean
         # (w/o any highlighted bucket(s))
@@ -339,14 +339,14 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
                 plt.scatter(x=x0_HE_mean, y=y0_HE_mean,
                             marker='*', s=150, zorder=1,
                             facecolors='tab:brown', edgecolors='k',
-                            label=f'HE (Weekly Mean) (n={len(x0_HE_mean)})')
+                            label=f'HE (Weekly Mean)')
 
             # bulking
             bulk_week0_scatter = \
                 plt.scatter(x=x0_bulk_mean, y=y0_bulk_mean,
                             marker='*', s=150, zorder=1,
                             facecolors='g', edgecolors='k',
-                            label=f'Bulking Material (Weekly Mean) (n={len(x0_bulk_mean)})')
+                            label=f'Bulking Material (Weekly Mean)')
 
     # EMP Soil
     emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp,

From 77edf76ab4c2337c24ccf61bab6b5f0319e6f3dd Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Thu, 17 Oct 2024 09:27:23 -0600
Subject: [PATCH 11/13] remove debugging statements

---
 .../scripts/plot_pcoa_2d.py                   | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
index 9a997b3..8d5c34a 100644
--- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
+++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
@@ -113,14 +113,11 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
     ord_rslts = skbio.OrdinationResults.read(str(ordination_fp))
     ord_2d = ord_rslts.samples.iloc[:, 0:2]
-    print(f"\n***\nSamples in ordination (n={ord_2d.shape[0]})\n***\n")
 
     metadata_in = pd.read_csv(str(metadata_fp),
                               sep='\t').set_index('sample-id')
     # filtering metadata to only include samples w/IDs present in ordination
     metadata = metadata_in.loc[ord_2d.index.values]
-    print(metadata['SampleType'].value_counts())
-    xyz = metadata[metadata['SampleType'] == 'Human Excrement Compost'].index
 
     # setting XY labels based on swap axis &
     # figure aspect based on proportion explained
@@ -308,19 +305,19 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
     fecal_scatter = \
         plt.scatter(x=x_fecal, y=y_fecal, facecolors='none',
                     edgecolors='tab:brown',
-                    label=f'HE (other buckets) (n={len(x_fecal)})')
+                    label=f'HE (other buckets)')
 
     # Bulking Material - all subjects
     bulking_scatter = \
         plt.scatter(x=x_bulking, y=y_bulking, facecolors='none',
                     edgecolors='g',
-                    label=f'Bulking Material (other buckets) (n={len(x_bulking)})')
+                    label=f'Bulking Material (other buckets)')
 
     # All buckets (minus highlighted bucket(s))
     all_sample_buckets = \
         plt.scatter(x=x_buckets, y=y_buckets, facecolors='none',
                     edgecolors='#C5C9C7', marker='^',
-                    label=f'HEC (other buckets) (n={len(x_buckets)})')
+                    label=f'HEC (other buckets)')
 
     # (OPTIONAL) Weekly Mean for all Buckets
     if average == 'True':
@@ -351,24 +348,24 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
     # EMP Soil
     emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp,
                                    facecolors='k',
-                                   label=f'Soil (n={len(x_emp)})')
+                                   label=f'Soil')
 
     # Food Compost
     food_compost_scatter = plt.scatter(x=x_compost, y=y_compost,
                                        facecolors='r',
-                                       label=f'FLWC (n={len(x_compost)})')
+                                       label=f'FLWC')
 
     # (OPTIONAL SAMPLE TYPES) Himalaya
     if himalaya == 'True':
         himalaya_scatter = plt.scatter(x=x_hima, y=y_hima,
                                        facecolors='b',
-                                       label=f'Himalaya (n={len(x_hima)})')
+                                       label=f'Himalaya')
 
     # (OPTIONAL SAMPLE TYPES) Pit Toilet
     if pit_toilet == 'True':
         pit_toilet_scatter = plt.scatter(x=x_pt, y=y_pt,
                                          facecolors='y',
-                                         label=f'Pit Toilet (n={len(x_pt)})')
+                                         label=f'Pit Toilet')
 
     # collecting the handle info to add to the legend
     bucket_handles = []
@@ -412,12 +409,12 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
             HE_week0_scatter = \
                 plt.scatter(x=x0_HE, y=y0_HE, facecolors='tab:brown',
-                            label=f'HE (Bucket #{bucket}) (n={len(x0_HE)})')
+                            label=f'HE (Bucket #{bucket})')
             bucket_handles.append(HE_week0_scatter)
 
             bulk_week0_scatter = \
                 plt.scatter(x=x0_bulk, y=y0_bulk, facecolors='g',
-                            label=f'Bulking Material (Bucket #{bucket}) (n={len(x0_bulk)})')
+                            label=f'Bulking Material (Bucket #{bucket})')
             bucket_handles.append(bulk_week0_scatter)
 
         for bucket, ids in buckets_dict.items():
@@ -429,7 +426,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
                 plt.scatter(x=x_bucket, y=y_bucket,
                             facecolors=viridis(color_dict[bucket]),
                             edgecolors='k', marker='^',
-                            label=f'HEC (Bucket #{bucket}) (n={len(x_bucket)})')
+                            label=f'HEC (Bucket #{bucket})')
 
             bucket_handles.append(highlighted_bucket_scatter)
             bucket_nums.append(bucket)

From b86893aabf1f3cdc8f3dcd0e1b42290c266ffa38 Mon Sep 17 00:00:00 2001
From: Greg Caporaso <greg.caporaso@nau.edu>
Date: Thu, 17 Oct 2024 09:31:22 -0600
Subject: [PATCH 12/13] lint

---
 .../scripts/plot_pcoa_2d.py                   | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
index 8d5c34a..1992e79 100644
--- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
+++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
@@ -261,9 +261,9 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
 
     # ALL BUCKETS (minus highlighted bucket(s))
     all_bucket_ids = \
-        list(set(md[md['Bucket'].between(1, 16)].index.values) &
-             set(md[md['SampleType'] == 'Human Excrement Compost'].index.values) &
-             set(ord_2d.index.values))
+        list(set(md[md['Bucket'].between(1, 16)].index) &
+             set(md[md['SampleType'] == 'Human Excrement Compost'].index) &
+             set(ord_2d.index))
     if highlighted_buckets:
         bucket_ids = list(set(all_bucket_ids) - bucket_set)
     else:
@@ -305,19 +305,19 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
     fecal_scatter = \
         plt.scatter(x=x_fecal, y=y_fecal, facecolors='none',
                     edgecolors='tab:brown',
-                    label=f'HE (other buckets)')
+                    label='HE (other buckets)')
 
     # Bulking Material - all subjects
     bulking_scatter = \
         plt.scatter(x=x_bulking, y=y_bulking, facecolors='none',
                     edgecolors='g',
-                    label=f'Bulking Material (other buckets)')
+                    label='Bulking Material (other buckets)')
 
     # All buckets (minus highlighted bucket(s))
     all_sample_buckets = \
         plt.scatter(x=x_buckets, y=y_buckets, facecolors='none',
                     edgecolors='#C5C9C7', marker='^',
-                    label=f'HEC (other buckets)')
+                    label='HEC (other buckets)')
 
     # (OPTIONAL) Weekly Mean for all Buckets
     if average == 'True':
@@ -326,7 +326,7 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
                         y=bucket_weekly_avgs_y.values(),
                         marker='*', facecolors='#1f77b4',
                         s=100,
-                        label=f'HEC (Weekly Mean)')
+                        label='HEC (Weekly Mean)')
 
         # adding HE mean if only plotting the weekly mean
         # (w/o any highlighted bucket(s))
@@ -336,36 +336,36 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
                 plt.scatter(x=x0_HE_mean, y=y0_HE_mean,
                             marker='*', s=150, zorder=1,
                             facecolors='tab:brown', edgecolors='k',
-                            label=f'HE (Weekly Mean)')
+                            label='HE (Weekly Mean)')
 
             # bulking
             bulk_week0_scatter = \
                 plt.scatter(x=x0_bulk_mean, y=y0_bulk_mean,
                             marker='*', s=150, zorder=1,
                             facecolors='g', edgecolors='k',
-                            label=f'Bulking Material (Weekly Mean)')
+                            label='Bulking Material (Weekly Mean)')
 
     # EMP Soil
     emp_soil_scatter = plt.scatter(x=x_emp, y=y_emp,
                                    facecolors='k',
-                                   label=f'Soil')
+                                   label='Soil')
 
     # Food Compost
     food_compost_scatter = plt.scatter(x=x_compost, y=y_compost,
                                        facecolors='r',
-                                       label=f'FLWC')
+                                       label='FLWC')
 
     # (OPTIONAL SAMPLE TYPES) Himalaya
     if himalaya == 'True':
         himalaya_scatter = plt.scatter(x=x_hima, y=y_hima,
                                        facecolors='b',
-                                       label=f'Himalaya')
+                                       label='Himalaya')
 
     # (OPTIONAL SAMPLE TYPES) Pit Toilet
     if pit_toilet == 'True':
         pit_toilet_scatter = plt.scatter(x=x_pt, y=y_pt,
                                          facecolors='y',
-                                         label=f'Pit Toilet')
+                                         label='Pit Toilet')
 
     # collecting the handle info to add to the legend
     bucket_handles = []

From 0dcf1cdb1ce92b71d2e18d7f58111ca375bdba09 Mon Sep 17 00:00:00 2001
From: Liz Gehret <elizabeth.gehret@gmail.com>
Date: Thu, 17 Oct 2024 15:41:48 -0600
Subject: [PATCH 13/13] improve figure title

---
 gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
index 1992e79..3c796cc 100644
--- a/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
+++ b/gut_to_soil_manuscript_figures/scripts/plot_pcoa_2d.py
@@ -484,9 +484,16 @@ def plot_pcoa_2d(metadata_fp, ordination_fp, measure,
             ax.plot([x0_bulk_mean, x1_mean], [y0_bulk_mean, y1_mean],
                     '--', color='#C5C9C7', linewidth=0.75, zorder=2)
 
+    # handling title text for buckets depending on bucket highlighting
+    if len(bucket_nums) == 0:
+        bucket_title_text = f'{measure} (all Buckets)'
+    elif len(bucket_nums) == 1:
+        bucket_title_text = f'{measure} for Bucket {bucket_nums}'
+    elif len(bucket_nums) > 1:
+        bucket_title_text = f'{measure} for Buckets {sorted(bucket_nums)}'
     # Adding title, labels & legend details
     plt.gca().set(xlabel=f'PCoA {x_label}', ylabel=f'PCoA {y_label}',
-                  title=f'{measure} for Bucket(s) {bucket_nums}',
+                  title=f'{bucket_title_text}',
                   label='Bucket#')
 
     # Helper method for exporting legend as a separate figure